Chris@37
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@37
|
2
|
Chris@37
|
3 /*
|
Chris@37
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
Chris@37
|
5 algorithm.
|
Chris@37
|
6
|
Chris@37
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@236
|
8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
|
Chris@230
|
9 University of London, Copyright (c) 2014-2015 Tido GmbH.
|
Chris@37
|
10
|
Chris@37
|
11 This program is free software; you can redistribute it and/or
|
Chris@37
|
12 modify it under the terms of the GNU General Public License as
|
Chris@37
|
13 published by the Free Software Foundation; either version 2 of the
|
Chris@37
|
14 License, or (at your option) any later version. See the file
|
Chris@37
|
15 COPYING included with this distribution for more information.
|
Chris@37
|
16 */
|
Chris@37
|
17
|
Chris@37
|
18 #include "FeatureExtractor.h"
|
Chris@37
|
19
|
Chris@37
|
20 #include <iostream>
|
Chris@37
|
21
|
Chris@37
|
22 #include <cstdlib>
|
Chris@37
|
23 #include <cassert>
|
Chris@37
|
24 #include <cmath>
|
Chris@37
|
25
|
Chris@37
|
26 using namespace std;
|
Chris@37
|
27
|
Chris@174
|
28 //#define DEBUG_FEATURE_EXTRACTOR 1
|
Chris@140
|
29
|
Chris@37
|
30 FeatureExtractor::FeatureExtractor(Parameters parameters) :
|
Chris@103
|
31 m_params(parameters)
|
Chris@37
|
32 {
|
Chris@74
|
33 m_featureSize = getFeatureSizeFor(parameters);
|
Chris@37
|
34 makeFreqMap();
|
Chris@140
|
35
|
Chris@140
|
36 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@140
|
37 cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate
|
Chris@140
|
38 << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap
|
Chris@140
|
39 << ", fftSize = " << parameters.fftSize << endl;
|
Chris@140
|
40 #endif
|
Chris@37
|
41 }
|
Chris@37
|
42
|
Chris@74
|
43 int
|
Chris@74
|
44 FeatureExtractor::getFeatureSizeFor(Parameters parameters)
|
Chris@74
|
45 {
|
Chris@74
|
46 if (parameters.useChromaFrequencyMap) {
|
Chris@74
|
47 return 13;
|
Chris@74
|
48 } else {
|
Chris@74
|
49 return 84;
|
Chris@74
|
50 }
|
Chris@74
|
51 }
|
Chris@74
|
52
|
Chris@37
|
53 void
|
Chris@37
|
54 FeatureExtractor::makeFreqMap()
|
Chris@37
|
55 {
|
Chris@37
|
56 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
|
Chris@37
|
57
|
Chris@37
|
58 if (m_params.useChromaFrequencyMap) {
|
Chris@140
|
59 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@37
|
60 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
|
Chris@37
|
61 #endif
|
Chris@37
|
62 makeChromaFrequencyMap();
|
Chris@37
|
63 } else {
|
Chris@140
|
64 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@37
|
65 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
|
Chris@37
|
66 #endif
|
Chris@37
|
67 makeStandardFrequencyMap();
|
Chris@37
|
68 }
|
Chris@37
|
69 }
|
Chris@37
|
70
|
Chris@37
|
71 void
|
Chris@37
|
72 FeatureExtractor::makeStandardFrequencyMap()
|
Chris@37
|
73 {
|
Chris@169
|
74 // Our handling of the referenceFrequency parameter depends on the
|
Chris@169
|
75 // frequency map in use.
|
Chris@169
|
76
|
Chris@169
|
77 // With the chroma frequency map, we use referenceFrequency to set
|
Chris@169
|
78 // up the chroma bin frequencies when constructing the map, and
|
Chris@169
|
79 // then just follow the map (without having to refer to
|
Chris@169
|
80 // referenceFrequency again) when we get the frequency-domain
|
Chris@169
|
81 // audio.
|
Chris@169
|
82
|
Chris@169
|
83 // With the standard frequency map, using referenceFrequency to
|
Chris@169
|
84 // set up the map doesn't work so well -- it only really affects
|
Chris@169
|
85 // the crossover frequency, and much of the useful information is
|
Chris@169
|
86 // below that frequency. What we do instead is to ignore the
|
Chris@169
|
87 // referenceFrequency when creating the map -- setting it up for
|
Chris@169
|
88 // 440Hz -- and then use it to scale the individual
|
Chris@169
|
89 // frequency-domain audio frames before applying the map to them.
|
Chris@169
|
90
|
Chris@169
|
91 double refFreq = 440.; // See above -- *not* the parameter!
|
Chris@180
|
92 double binWidth = double(m_params.sampleRate) / m_params.fftSize;
|
Chris@188
|
93 int crossoverBin = int(2 / (pow(2, 1/12.0) - 1));
|
Chris@180
|
94 int crossoverMidi = int(log(crossoverBin * binWidth / refFreq)/
|
Chris@180
|
95 log(2.0) * 12 + 69 + 0.5);
|
Chris@163
|
96
|
Chris@37
|
97 int i = 0;
|
Chris@37
|
98 while (i <= crossoverBin) {
|
Chris@176
|
99 double freq = i * binWidth;
|
Chris@176
|
100 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
|
Chris@176
|
101 m_freqMap[i++] = -1;
|
Chris@176
|
102 } else {
|
Chris@176
|
103 m_freqMap[i] = i;
|
Chris@176
|
104 i++;
|
Chris@176
|
105 }
|
Chris@37
|
106 }
|
Chris@37
|
107
|
Chris@37
|
108 while (i <= m_params.fftSize/2) {
|
Chris@176
|
109 double freq = i * binWidth;
|
Chris@176
|
110 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
|
Chris@176
|
111 m_freqMap[i++] = -1;
|
Chris@176
|
112 } else {
|
Chris@176
|
113 double midi = log(freq / refFreq) / log(2.0) * 12 + 69;
|
Chris@176
|
114 if (midi > 127) midi = 127;
|
Chris@180
|
115 int target = crossoverBin + int(midi + 0.5) - crossoverMidi;
|
Chris@176
|
116 if (target >= m_featureSize) target = m_featureSize - 1;
|
Chris@176
|
117 m_freqMap[i++] = target;
|
Chris@176
|
118 }
|
Chris@37
|
119 }
|
Chris@166
|
120
|
Chris@166
|
121 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@166
|
122 cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi "
|
Chris@166
|
123 << crossoverMidi << endl;
|
Chris@176
|
124 cerr << "FeatureExtractor: map is:" << endl;
|
Chris@176
|
125 for (i = 0; i <= m_params.fftSize/2; ++i) {
|
Chris@176
|
126 cerr << i << ": " << m_freqMap[i] << ", ";
|
Chris@176
|
127 }
|
Chris@176
|
128 cerr << endl;
|
Chris@166
|
129 #endif
|
Chris@37
|
130 }
|
Chris@37
|
131
|
Chris@37
|
132 void
|
Chris@37
|
133 FeatureExtractor::makeChromaFrequencyMap()
|
Chris@37
|
134 {
|
Chris@159
|
135 double refFreq = m_params.referenceFrequency;
|
Chris@180
|
136 double binWidth = double(m_params.sampleRate) / m_params.fftSize;
|
Chris@188
|
137 int crossoverBin = int(1 / (pow(2, 1/12.0) - 1));
|
Chris@37
|
138 int i = 0;
|
Chris@37
|
139 while (i <= crossoverBin) {
|
Chris@176
|
140 double freq = i * binWidth;
|
Chris@176
|
141 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
|
Chris@176
|
142 m_freqMap[i++] = -1;
|
Chris@176
|
143 } else {
|
Chris@176
|
144 m_freqMap[i++] = 0;
|
Chris@176
|
145 }
|
Chris@37
|
146 }
|
Chris@37
|
147 while (i <= m_params.fftSize/2) {
|
Chris@176
|
148 double freq = i * binWidth;
|
Chris@176
|
149 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
|
Chris@176
|
150 m_freqMap[i++] = -1;
|
Chris@176
|
151 } else {
|
Chris@176
|
152 double midi = log(freq / refFreq) / log(2.0) * 12 + 69;
|
Chris@180
|
153 m_freqMap[i++] = (int(midi + 0.5)) % 12 + 1;
|
Chris@176
|
154 }
|
Chris@37
|
155 }
|
Chris@37
|
156 }
|
Chris@37
|
157
|
Chris@183
|
158 feature_t
|
Chris@37
|
159 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
|
Chris@37
|
160 {
|
Chris@184
|
161 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
|
Chris@184
|
162
|
Chris@184
|
163 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@184
|
164 mags[i] = float(real[i] * real[i] + imag[i] * imag[i]);
|
Chris@184
|
165 }
|
Chris@184
|
166
|
Chris@184
|
167 return processMags(mags);
|
Chris@184
|
168 }
|
Chris@184
|
169
|
Chris@184
|
170 feature_t
|
Chris@184
|
171 FeatureExtractor::process(const vector<float> &real, const vector<float> &imag)
|
Chris@184
|
172 {
|
Chris@184
|
173 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
|
Chris@169
|
174
|
Chris@169
|
175 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@169
|
176 mags[i] = real[i] * real[i] + imag[i] * imag[i];
|
Chris@169
|
177 }
|
Chris@169
|
178
|
Chris@169
|
179 return processMags(mags);
|
Chris@169
|
180 }
|
Chris@169
|
181
|
Chris@183
|
182 feature_t
|
Chris@201
|
183 FeatureExtractor::process(const float *real, const float *imag)
|
Chris@201
|
184 {
|
Chris@201
|
185 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
|
Chris@201
|
186
|
Chris@201
|
187 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@201
|
188 mags[i] = real[i] * real[i] + imag[i] * imag[i];
|
Chris@201
|
189 }
|
Chris@201
|
190
|
Chris@201
|
191 return processMags(mags);
|
Chris@201
|
192 }
|
Chris@201
|
193
|
Chris@201
|
194 feature_t
|
Chris@169
|
195 FeatureExtractor::process(const float *cframe)
|
Chris@169
|
196 {
|
Chris@184
|
197 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
|
Chris@169
|
198
|
Chris@169
|
199 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@169
|
200 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
|
Chris@169
|
201 }
|
Chris@169
|
202
|
Chris@169
|
203 return processMags(mags);
|
Chris@169
|
204 }
|
Chris@169
|
205
|
Chris@183
|
206 feature_t
|
Chris@184
|
207 FeatureExtractor::processMags(const vector<float> &mags)
|
Chris@169
|
208 {
|
Chris@183
|
209 feature_t frame(m_featureSize, 0.0);
|
Chris@169
|
210
|
Chris@169
|
211 if (!m_params.useChromaFrequencyMap &&
|
Chris@169
|
212 (m_params.referenceFrequency != 440.)) {
|
Chris@169
|
213
|
Chris@169
|
214 // See comment in makeStandardFrequencyMap above
|
Chris@184
|
215 vector<float> scaled = scaleMags(mags);
|
Chris@169
|
216
|
Chris@169
|
217 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@178
|
218 int index = m_freqMap[i];
|
Chris@178
|
219 if (index >= 0) {
|
Chris@178
|
220 frame[index] += scaled[i];
|
Chris@178
|
221 }
|
Chris@169
|
222 }
|
Chris@169
|
223
|
Chris@169
|
224 } else {
|
Chris@169
|
225 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@178
|
226 int index = m_freqMap[i];
|
Chris@178
|
227 if (index >= 0) {
|
Chris@178
|
228 frame[index] += mags[i];
|
Chris@178
|
229 }
|
Chris@176
|
230 }
|
Chris@37
|
231 }
|
Chris@37
|
232
|
Chris@103
|
233 return frame;
|
Chris@74
|
234 }
|
Chris@74
|
235
|
Chris@184
|
236 vector<float>
|
Chris@184
|
237 FeatureExtractor::scaleMags(const vector<float> &mags)
|
Chris@74
|
238 {
|
Chris@169
|
239 // Scale the pitch content in the given magnitude spectrum to
|
Chris@169
|
240 // accommodate a difference in tuning frequency (between the 440Hz
|
Chris@169
|
241 // reference and the actual tuning frequency of the input audio).
|
Chris@169
|
242 // We only do this when not using chroma features -- see the
|
Chris@169
|
243 // comment in makeStandardFrequencyMap() above.
|
Chris@169
|
244
|
Chris@169
|
245 if (m_params.useChromaFrequencyMap) return mags;
|
Chris@169
|
246
|
Chris@184
|
247 double ratio = 440.f / m_params.referenceFrequency;
|
Chris@169
|
248
|
Chris@180
|
249 int n = static_cast<int>(mags.size());
|
Chris@169
|
250
|
Chris@184
|
251 vector<float> scaled(n, 0.0);
|
Chris@169
|
252
|
Chris@169
|
253 for (int target = 0; target < n; ++target) {
|
Chris@169
|
254
|
Chris@169
|
255 double source = target / ratio;
|
Chris@169
|
256
|
Chris@169
|
257 int lower = int(source);
|
Chris@169
|
258 int higher = lower + 1;
|
Chris@169
|
259
|
Chris@169
|
260 double lowerProp = higher - source;
|
Chris@169
|
261 double higherProp = source - lower;
|
Chris@169
|
262
|
Chris@169
|
263 double value = 0.0;
|
Chris@169
|
264 if (lower >= 0 && lower < n) {
|
Chris@169
|
265 value += lowerProp * mags[lower];
|
Chris@176
|
266 }
|
Chris@169
|
267 if (higher >= 0 && higher < n) {
|
Chris@169
|
268 value += higherProp * mags[higher];
|
Chris@169
|
269 }
|
Chris@169
|
270
|
Chris@184
|
271 scaled[target] = float(value);
|
Chris@74
|
272 }
|
Chris@74
|
273
|
Chris@169
|
274 return scaled;
|
Chris@74
|
275 }
|
Chris@74
|
276
|