Chris@26
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@26
|
2
|
Chris@26
|
3 /*
|
Chris@26
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
Chris@26
|
5 algorithm.
|
Chris@26
|
6
|
Chris@26
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@26
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
Chris@26
|
9
|
Chris@26
|
10 This program is free software; you can redistribute it and/or
|
Chris@26
|
11 modify it under the terms of the GNU General Public License as
|
Chris@26
|
12 published by the Free Software Foundation; either version 2 of the
|
Chris@26
|
13 License, or (at your option) any later version. See the file
|
Chris@26
|
14 COPYING included with this distribution for more information.
|
Chris@26
|
15 */
|
Chris@26
|
16
|
Chris@26
|
17 #include "DistanceMetric.h"
|
Chris@26
|
18
|
Chris@26
|
19 #include <cassert>
|
Chris@26
|
20 #include <cmath>
|
Chris@133
|
21 #include <iostream>
|
Chris@26
|
22
|
Chris@133
|
23 using namespace std;
|
Chris@26
|
24
|
Chris@190
|
25 #define DEBUG_DISTANCE_METRIC 1
|
Chris@140
|
26
|
Chris@185
|
27 template <> uint8_t
|
Chris@185
|
28 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
29 {
|
Chris@186
|
30 double scaled = m_params.scale * distance;
|
Chris@190
|
31 if (scaled < 0) {
|
Chris@190
|
32 scaled = 0;
|
Chris@190
|
33 }
|
Chris@190
|
34 if (scaled > MaxDistance) {
|
Chris@190
|
35 scaled = MaxDistance;
|
Chris@190
|
36 ++m_overcount;
|
Chris@190
|
37 }
|
Chris@186
|
38 return uint8_t(scaled);
|
Chris@185
|
39 }
|
Chris@185
|
40
|
Chris@185
|
41 template <> float
|
Chris@185
|
42 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
43 {
|
Chris@185
|
44 return float(distance);
|
Chris@185
|
45 }
|
Chris@185
|
46
|
Chris@185
|
47 template <> double
|
Chris@185
|
48 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
49 {
|
Chris@185
|
50 return distance;
|
Chris@185
|
51 }
|
Chris@185
|
52
|
Chris@143
|
53 DistanceMetric::DistanceMetric(Parameters params) :
|
Chris@190
|
54 m_params(params),
|
Chris@190
|
55 m_max(0),
|
Chris@190
|
56 m_overcount(0)
|
Chris@140
|
57 {
|
Chris@140
|
58 #ifdef DEBUG_DISTANCE_METRIC
|
Chris@190
|
59 cerr << "*** DistanceMetric: metric = " << m_params.metric
|
Chris@190
|
60 << ", norm = " << m_params.norm
|
Chris@190
|
61 << ", noise = " << m_params.noise
|
Chris@190
|
62 << ", scale = " << m_params.scale
|
Chris@143
|
63 << endl;
|
Chris@140
|
64 #endif
|
Chris@140
|
65 }
|
Chris@140
|
66
|
Chris@190
|
67 DistanceMetric::~DistanceMetric()
|
Chris@190
|
68 {
|
Chris@190
|
69 #ifdef DEBUG_DISTANCE_METRIC
|
Chris@190
|
70 cerr << "*** DistanceMetric::~DistanceMetric: metric = " << m_params.metric
|
Chris@190
|
71 << ", norm = " << m_params.norm
|
Chris@190
|
72 << ", noise = " << m_params.noise;
|
Chris@190
|
73 #ifdef USE_COMPACT_TYPES
|
Chris@190
|
74 cerr << ", scale = " << m_params.scale;
|
Chris@190
|
75 cerr << "\n*** DistanceMetric::~DistanceMetric: max scaled value = "
|
Chris@190
|
76 << distance_print_t(m_max)
|
Chris@190
|
77 << ", " << m_overcount << " clipped" << endl;
|
Chris@190
|
78 #else
|
Chris@190
|
79 cerr << ", no scaling";
|
Chris@190
|
80 cerr << "\n*** DistanceMetric::~DistanceMetric: max value = "
|
Chris@190
|
81 << distance_print_t(m_max)
|
Chris@190
|
82 << endl;
|
Chris@190
|
83 #endif
|
Chris@190
|
84 #endif
|
Chris@190
|
85 }
|
Chris@190
|
86
|
Chris@183
|
87 distance_t
|
Chris@186
|
88 DistanceMetric::scaleValueIntoDistanceRange(double value)
|
Chris@186
|
89 {
|
Chris@186
|
90 return scaleIntoRange<distance_t>(value);
|
Chris@186
|
91 }
|
Chris@190
|
92
|
Chris@190
|
93 distance_t
|
Chris@190
|
94 DistanceMetric::scaleAndTally(double value)
|
Chris@190
|
95 {
|
Chris@190
|
96 distance_t dist = scaleIntoRange<distance_t>(value);
|
Chris@190
|
97 if (dist > m_max) m_max = dist;
|
Chris@190
|
98 return dist;
|
Chris@190
|
99 }
|
Chris@190
|
100
|
Chris@186
|
101 distance_t
|
Chris@183
|
102 DistanceMetric::calcDistance(const feature_t &f1,
|
Chris@183
|
103 const feature_t &f2)
|
Chris@26
|
104 {
|
Chris@26
|
105 double d = 0;
|
Chris@26
|
106 double sum = 0;
|
Chris@156
|
107 double eps = 1e-16;
|
Chris@26
|
108
|
Chris@180
|
109 assert(f2.size() == f1.size());
|
Chris@180
|
110 int featureSize = static_cast<int>(f1.size());
|
Chris@145
|
111
|
Chris@156
|
112 if (m_params.metric == Cosine) {
|
Chris@156
|
113
|
Chris@156
|
114 double num = 0, denom1 = 0, denom2 = 0;
|
Chris@156
|
115
|
Chris@156
|
116 for (int i = 0; i < featureSize; ++i) {
|
Chris@156
|
117 num += f1[i] * f2[i];
|
Chris@156
|
118 denom1 += f1[i] * f1[i];
|
Chris@156
|
119 denom2 += f2[i] * f2[i];
|
Chris@156
|
120 }
|
Chris@156
|
121
|
Chris@156
|
122 d = 1.0 - (num / (eps + sqrt(denom1 * denom2)));
|
Chris@156
|
123
|
Chris@156
|
124 if (m_params.noise == AddNoise) {
|
Chris@156
|
125 d += 1e-2;
|
Chris@156
|
126 }
|
Chris@156
|
127 if (d > 1.0) d = 1.0;
|
Chris@156
|
128
|
Chris@190
|
129 return scaleAndTally(d); // normalisation param ignored
|
Chris@156
|
130 }
|
Chris@156
|
131
|
Chris@157
|
132 if (m_params.metric == Manhattan) {
|
Chris@157
|
133 for (int i = 0; i < featureSize; i++) {
|
Chris@157
|
134 d += fabs(f1[i] - f2[i]);
|
Chris@157
|
135 sum += fabs(f1[i]) + fabs(f2[i]);
|
Chris@157
|
136 }
|
Chris@157
|
137 } else {
|
Chris@157
|
138 // Euclidean
|
Chris@157
|
139 for (int i = 0; i < featureSize; i++) {
|
Chris@157
|
140 d += (f1[i] - f2[i]) * (f1[i] - f2[i]);
|
Chris@157
|
141 sum += fabs(f1[i]) + fabs(f2[i]);
|
Chris@157
|
142 }
|
Chris@157
|
143 d = sqrt(d);
|
Chris@26
|
144 }
|
Chris@26
|
145
|
Chris@145
|
146 double noise = 1e-3 * featureSize;
|
Chris@150
|
147 if (m_params.noise == AddNoise) {
|
Chris@150
|
148 d += noise;
|
Chris@150
|
149 sum += noise;
|
Chris@150
|
150 }
|
Chris@145
|
151
|
Chris@143
|
152 if (sum == 0) {
|
Chris@190
|
153 return scaleAndTally(0);
|
Chris@143
|
154 }
|
Chris@26
|
155
|
Chris@143
|
156 double distance = 0;
|
Chris@26
|
157
|
Chris@143
|
158 if (m_params.norm == NormaliseDistanceToSum) {
|
Chris@143
|
159
|
Chris@143
|
160 distance = d / sum; // 0 <= d/sum <= 2
|
Chris@143
|
161
|
Chris@143
|
162 } else if (m_params.norm == NormaliseDistanceToLogSum) {
|
Chris@143
|
163
|
Chris@143
|
164 // note if this were to be restored, it would have to use
|
Chris@143
|
165 // totalEnergies vector instead of f1[freqMapSize] which used to
|
Chris@143
|
166 // store the total energy:
|
Chris@143
|
167 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0;
|
Chris@143
|
168
|
Chris@143
|
169 double weight = (8 + log(sum)) / 10.0;
|
Chris@133
|
170
|
Chris@143
|
171 if (weight < 0) weight = 0;
|
Chris@143
|
172 else if (weight > 1) weight = 1;
|
Chris@26
|
173
|
Chris@143
|
174 distance = d / sum * weight;
|
Chris@143
|
175
|
Chris@143
|
176 } else {
|
Chris@143
|
177
|
Chris@143
|
178 distance = d;
|
Chris@143
|
179 }
|
Chris@143
|
180
|
Chris@190
|
181 return scaleAndTally(distance);
|
Chris@26
|
182 }
|