Chris@26
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@26
|
2
|
Chris@26
|
3 /*
|
Chris@26
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
Chris@26
|
5 algorithm.
|
Chris@26
|
6
|
Chris@26
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@236
|
8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
|
Chris@230
|
9 University of London, Copyright (c) 2014-2015 Tido GmbH.
|
Chris@26
|
10
|
Chris@26
|
11 This program is free software; you can redistribute it and/or
|
Chris@26
|
12 modify it under the terms of the GNU General Public License as
|
Chris@26
|
13 published by the Free Software Foundation; either version 2 of the
|
Chris@26
|
14 License, or (at your option) any later version. See the file
|
Chris@26
|
15 COPYING included with this distribution for more information.
|
Chris@26
|
16 */
|
Chris@26
|
17
|
Chris@26
|
18 #include "DistanceMetric.h"
|
Chris@26
|
19
|
Chris@26
|
20 #include <cassert>
|
Chris@26
|
21 #include <cmath>
|
Chris@133
|
22 #include <iostream>
|
Chris@26
|
23
|
Chris@133
|
24 using namespace std;
|
Chris@26
|
25
|
Chris@196
|
26 //#define DEBUG_DISTANCE_METRIC 1
|
Chris@140
|
27
|
Chris@185
|
28 template <> uint8_t
|
Chris@185
|
29 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
30 {
|
Chris@186
|
31 double scaled = m_params.scale * distance;
|
Chris@190
|
32 if (scaled < 0) {
|
Chris@190
|
33 scaled = 0;
|
Chris@190
|
34 }
|
Chris@212
|
35 if (scaled > DISTANCE_MAX) {
|
Chris@212
|
36 scaled = DISTANCE_MAX;
|
Chris@190
|
37 ++m_overcount;
|
Chris@190
|
38 }
|
Chris@186
|
39 return uint8_t(scaled);
|
Chris@185
|
40 }
|
Chris@185
|
41
|
Chris@185
|
42 template <> float
|
Chris@185
|
43 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
44 {
|
Chris@185
|
45 return float(distance);
|
Chris@185
|
46 }
|
Chris@185
|
47
|
Chris@185
|
48 template <> double
|
Chris@185
|
49 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
50 {
|
Chris@185
|
51 return distance;
|
Chris@185
|
52 }
|
Chris@185
|
53
|
Chris@143
|
54 DistanceMetric::DistanceMetric(Parameters params) :
|
Chris@190
|
55 m_params(params),
|
Chris@190
|
56 m_max(0),
|
Chris@190
|
57 m_overcount(0)
|
Chris@140
|
58 {
|
Chris@140
|
59 #ifdef DEBUG_DISTANCE_METRIC
|
Chris@190
|
60 cerr << "*** DistanceMetric: metric = " << m_params.metric
|
Chris@190
|
61 << ", norm = " << m_params.norm
|
Chris@190
|
62 << ", noise = " << m_params.noise
|
Chris@190
|
63 << ", scale = " << m_params.scale
|
Chris@143
|
64 << endl;
|
Chris@140
|
65 #endif
|
Chris@140
|
66 }
|
Chris@140
|
67
|
Chris@190
|
68 DistanceMetric::~DistanceMetric()
|
Chris@190
|
69 {
|
Chris@190
|
70 #ifdef DEBUG_DISTANCE_METRIC
|
Chris@190
|
71 cerr << "*** DistanceMetric::~DistanceMetric: metric = " << m_params.metric
|
Chris@190
|
72 << ", norm = " << m_params.norm
|
Chris@190
|
73 << ", noise = " << m_params.noise;
|
Chris@190
|
74 #ifdef USE_COMPACT_TYPES
|
Chris@190
|
75 cerr << ", scale = " << m_params.scale;
|
Chris@190
|
76 cerr << "\n*** DistanceMetric::~DistanceMetric: max scaled value = "
|
Chris@190
|
77 << distance_print_t(m_max)
|
Chris@190
|
78 << ", " << m_overcount << " clipped" << endl;
|
Chris@190
|
79 #else
|
Chris@190
|
80 cerr << ", no scaling";
|
Chris@190
|
81 cerr << "\n*** DistanceMetric::~DistanceMetric: max value = "
|
Chris@190
|
82 << distance_print_t(m_max)
|
Chris@190
|
83 << endl;
|
Chris@190
|
84 #endif
|
Chris@190
|
85 #endif
|
Chris@190
|
86 }
|
Chris@190
|
87
|
Chris@183
|
88 distance_t
|
Chris@186
|
89 DistanceMetric::scaleValueIntoDistanceRange(double value)
|
Chris@186
|
90 {
|
Chris@186
|
91 return scaleIntoRange<distance_t>(value);
|
Chris@186
|
92 }
|
Chris@190
|
93
|
Chris@190
|
94 distance_t
|
Chris@190
|
95 DistanceMetric::scaleAndTally(double value)
|
Chris@190
|
96 {
|
Chris@190
|
97 distance_t dist = scaleIntoRange<distance_t>(value);
|
Chris@190
|
98 if (dist > m_max) m_max = dist;
|
Chris@190
|
99 return dist;
|
Chris@190
|
100 }
|
Chris@190
|
101
|
Chris@186
|
102 distance_t
|
Chris@183
|
103 DistanceMetric::calcDistance(const feature_t &f1,
|
Chris@183
|
104 const feature_t &f2)
|
Chris@26
|
105 {
|
Chris@26
|
106 double d = 0;
|
Chris@26
|
107 double sum = 0;
|
Chris@156
|
108 double eps = 1e-16;
|
Chris@26
|
109
|
Chris@180
|
110 assert(f2.size() == f1.size());
|
Chris@180
|
111 int featureSize = static_cast<int>(f1.size());
|
Chris@145
|
112
|
Chris@197
|
113 double minNoise = 0.0;
|
Chris@197
|
114 #ifdef USE_COMPACT_TYPES
|
Chris@197
|
115 minNoise = 1.0 / m_params.scale;
|
Chris@197
|
116 #endif
|
Chris@197
|
117
|
Chris@156
|
118 if (m_params.metric == Cosine) {
|
Chris@156
|
119
|
Chris@156
|
120 double num = 0, denom1 = 0, denom2 = 0;
|
Chris@156
|
121
|
Chris@156
|
122 for (int i = 0; i < featureSize; ++i) {
|
Chris@156
|
123 num += f1[i] * f2[i];
|
Chris@156
|
124 denom1 += f1[i] * f1[i];
|
Chris@156
|
125 denom2 += f2[i] * f2[i];
|
Chris@156
|
126 }
|
Chris@156
|
127
|
Chris@156
|
128 d = 1.0 - (num / (eps + sqrt(denom1 * denom2)));
|
Chris@156
|
129
|
Chris@156
|
130 if (m_params.noise == AddNoise) {
|
Chris@197
|
131 double noise = 1e-2;
|
Chris@197
|
132 if (noise < minNoise) noise = minNoise;
|
Chris@197
|
133 d += noise;
|
Chris@156
|
134 }
|
Chris@156
|
135 if (d > 1.0) d = 1.0;
|
Chris@156
|
136
|
Chris@190
|
137 return scaleAndTally(d); // normalisation param ignored
|
Chris@156
|
138 }
|
Chris@156
|
139
|
Chris@157
|
140 if (m_params.metric == Manhattan) {
|
Chris@157
|
141 for (int i = 0; i < featureSize; i++) {
|
Chris@157
|
142 d += fabs(f1[i] - f2[i]);
|
Chris@157
|
143 sum += fabs(f1[i]) + fabs(f2[i]);
|
Chris@157
|
144 }
|
Chris@157
|
145 } else {
|
Chris@157
|
146 // Euclidean
|
Chris@157
|
147 for (int i = 0; i < featureSize; i++) {
|
Chris@157
|
148 d += (f1[i] - f2[i]) * (f1[i] - f2[i]);
|
Chris@157
|
149 sum += fabs(f1[i]) + fabs(f2[i]);
|
Chris@157
|
150 }
|
Chris@157
|
151 d = sqrt(d);
|
Chris@26
|
152 }
|
Chris@26
|
153
|
Chris@150
|
154 if (m_params.noise == AddNoise) {
|
Chris@197
|
155 double noise = 1e-3 * featureSize;
|
Chris@197
|
156 if (noise < minNoise) noise = minNoise;
|
Chris@150
|
157 d += noise;
|
Chris@150
|
158 sum += noise;
|
Chris@150
|
159 }
|
Chris@145
|
160
|
Chris@143
|
161 if (sum == 0) {
|
Chris@190
|
162 return scaleAndTally(0);
|
Chris@143
|
163 }
|
Chris@26
|
164
|
Chris@143
|
165 double distance = 0;
|
Chris@26
|
166
|
Chris@143
|
167 if (m_params.norm == NormaliseDistanceToSum) {
|
Chris@143
|
168
|
Chris@143
|
169 distance = d / sum; // 0 <= d/sum <= 2
|
Chris@143
|
170
|
Chris@143
|
171 } else if (m_params.norm == NormaliseDistanceToLogSum) {
|
Chris@143
|
172
|
Chris@143
|
173 // note if this were to be restored, it would have to use
|
Chris@143
|
174 // totalEnergies vector instead of f1[freqMapSize] which used to
|
Chris@143
|
175 // store the total energy:
|
Chris@143
|
176 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0;
|
Chris@143
|
177
|
Chris@143
|
178 double weight = (8 + log(sum)) / 10.0;
|
Chris@133
|
179
|
Chris@143
|
180 if (weight < 0) weight = 0;
|
Chris@143
|
181 else if (weight > 1) weight = 1;
|
Chris@26
|
182
|
Chris@143
|
183 distance = d / sum * weight;
|
Chris@143
|
184
|
Chris@143
|
185 } else {
|
Chris@143
|
186
|
Chris@143
|
187 distance = d;
|
Chris@143
|
188 }
|
Chris@143
|
189
|
Chris@190
|
190 return scaleAndTally(distance);
|
Chris@26
|
191 }
|