Chris@26
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@26
|
2
|
Chris@26
|
3 /*
|
Chris@26
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
Chris@26
|
5 algorithm.
|
Chris@26
|
6
|
Chris@26
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@26
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
Chris@26
|
9
|
Chris@26
|
10 This program is free software; you can redistribute it and/or
|
Chris@26
|
11 modify it under the terms of the GNU General Public License as
|
Chris@26
|
12 published by the Free Software Foundation; either version 2 of the
|
Chris@26
|
13 License, or (at your option) any later version. See the file
|
Chris@26
|
14 COPYING included with this distribution for more information.
|
Chris@26
|
15 */
|
Chris@26
|
16
|
Chris@26
|
17 #include "DistanceMetric.h"
|
Chris@26
|
18
|
Chris@26
|
19 #include <cassert>
|
Chris@26
|
20 #include <cmath>
|
Chris@133
|
21 #include <iostream>
|
Chris@26
|
22
|
Chris@133
|
23 using namespace std;
|
Chris@26
|
24
|
Chris@140
|
25 //#define DEBUG_DISTANCE_METRIC 1
|
Chris@140
|
26
|
Chris@185
|
27 template <> uint8_t
|
Chris@185
|
28 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
29 {
|
Chris@186
|
30 double scaled = m_params.scale * distance;
|
Chris@186
|
31 if (scaled < 0) scaled = 0;
|
Chris@186
|
32 if (scaled > 255) scaled = 255;
|
Chris@186
|
33 return uint8_t(scaled);
|
Chris@185
|
34 }
|
Chris@185
|
35
|
Chris@185
|
36 template <> float
|
Chris@185
|
37 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
38 {
|
Chris@185
|
39 return float(distance);
|
Chris@185
|
40 }
|
Chris@185
|
41
|
Chris@185
|
42 template <> double
|
Chris@185
|
43 DistanceMetric::scaleIntoRange(double distance)
|
Chris@185
|
44 {
|
Chris@185
|
45 return distance;
|
Chris@185
|
46 }
|
Chris@185
|
47
|
Chris@143
|
48 DistanceMetric::DistanceMetric(Parameters params) :
|
Chris@143
|
49 m_params(params)
|
Chris@140
|
50 {
|
Chris@140
|
51 #ifdef DEBUG_DISTANCE_METRIC
|
Chris@143
|
52 cerr << "*** DistanceMetric: norm = " << m_params.norm
|
Chris@143
|
53 << endl;
|
Chris@140
|
54 #endif
|
Chris@140
|
55 }
|
Chris@140
|
56
|
Chris@183
|
57 distance_t
|
Chris@186
|
58 DistanceMetric::scaleValueIntoDistanceRange(double value)
|
Chris@186
|
59 {
|
Chris@186
|
60 return scaleIntoRange<distance_t>(value);
|
Chris@186
|
61 }
|
Chris@186
|
62
|
Chris@186
|
63 distance_t
|
Chris@183
|
64 DistanceMetric::calcDistance(const feature_t &f1,
|
Chris@183
|
65 const feature_t &f2)
|
Chris@26
|
66 {
|
Chris@26
|
67 double d = 0;
|
Chris@26
|
68 double sum = 0;
|
Chris@156
|
69 double eps = 1e-16;
|
Chris@26
|
70
|
Chris@180
|
71 assert(f2.size() == f1.size());
|
Chris@180
|
72 int featureSize = static_cast<int>(f1.size());
|
Chris@145
|
73
|
Chris@156
|
74 if (m_params.metric == Cosine) {
|
Chris@156
|
75
|
Chris@156
|
76 double num = 0, denom1 = 0, denom2 = 0;
|
Chris@156
|
77
|
Chris@156
|
78 for (int i = 0; i < featureSize; ++i) {
|
Chris@156
|
79 num += f1[i] * f2[i];
|
Chris@156
|
80 denom1 += f1[i] * f1[i];
|
Chris@156
|
81 denom2 += f2[i] * f2[i];
|
Chris@156
|
82 }
|
Chris@156
|
83
|
Chris@156
|
84 d = 1.0 - (num / (eps + sqrt(denom1 * denom2)));
|
Chris@156
|
85
|
Chris@156
|
86 if (m_params.noise == AddNoise) {
|
Chris@156
|
87 d += 1e-2;
|
Chris@156
|
88 }
|
Chris@156
|
89 if (d > 1.0) d = 1.0;
|
Chris@156
|
90
|
Chris@185
|
91 return scaleIntoRange<distance_t>(d); // normalisation param ignored
|
Chris@156
|
92 }
|
Chris@156
|
93
|
Chris@157
|
94 if (m_params.metric == Manhattan) {
|
Chris@157
|
95 for (int i = 0; i < featureSize; i++) {
|
Chris@157
|
96 d += fabs(f1[i] - f2[i]);
|
Chris@157
|
97 sum += fabs(f1[i]) + fabs(f2[i]);
|
Chris@157
|
98 }
|
Chris@157
|
99 } else {
|
Chris@157
|
100 // Euclidean
|
Chris@157
|
101 for (int i = 0; i < featureSize; i++) {
|
Chris@157
|
102 d += (f1[i] - f2[i]) * (f1[i] - f2[i]);
|
Chris@157
|
103 sum += fabs(f1[i]) + fabs(f2[i]);
|
Chris@157
|
104 }
|
Chris@157
|
105 d = sqrt(d);
|
Chris@26
|
106 }
|
Chris@26
|
107
|
Chris@145
|
108 double noise = 1e-3 * featureSize;
|
Chris@150
|
109 if (m_params.noise == AddNoise) {
|
Chris@150
|
110 d += noise;
|
Chris@150
|
111 sum += noise;
|
Chris@150
|
112 }
|
Chris@145
|
113
|
Chris@143
|
114 if (sum == 0) {
|
Chris@185
|
115 return scaleIntoRange<distance_t>(0);
|
Chris@143
|
116 }
|
Chris@26
|
117
|
Chris@143
|
118 double distance = 0;
|
Chris@26
|
119
|
Chris@143
|
120 if (m_params.norm == NormaliseDistanceToSum) {
|
Chris@143
|
121
|
Chris@143
|
122 distance = d / sum; // 0 <= d/sum <= 2
|
Chris@143
|
123
|
Chris@143
|
124 } else if (m_params.norm == NormaliseDistanceToLogSum) {
|
Chris@143
|
125
|
Chris@143
|
126 // note if this were to be restored, it would have to use
|
Chris@143
|
127 // totalEnergies vector instead of f1[freqMapSize] which used to
|
Chris@143
|
128 // store the total energy:
|
Chris@143
|
129 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0;
|
Chris@143
|
130
|
Chris@143
|
131 double weight = (8 + log(sum)) / 10.0;
|
Chris@133
|
132
|
Chris@143
|
133 if (weight < 0) weight = 0;
|
Chris@143
|
134 else if (weight > 1) weight = 1;
|
Chris@26
|
135
|
Chris@143
|
136 distance = d / sum * weight;
|
Chris@143
|
137
|
Chris@143
|
138 } else {
|
Chris@143
|
139
|
Chris@143
|
140 distance = d;
|
Chris@143
|
141 }
|
Chris@143
|
142
|
Chris@185
|
143 return scaleIntoRange<distance_t>(distance);
|
Chris@26
|
144 }
|