Chris@37
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@37
|
2
|
Chris@37
|
3 /*
|
Chris@37
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
Chris@37
|
5 algorithm.
|
Chris@37
|
6
|
Chris@37
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@37
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
Chris@37
|
9
|
Chris@37
|
10 This program is free software; you can redistribute it and/or
|
Chris@37
|
11 modify it under the terms of the GNU General Public License as
|
Chris@37
|
12 published by the Free Software Foundation; either version 2 of the
|
Chris@37
|
13 License, or (at your option) any later version. See the file
|
Chris@37
|
14 COPYING included with this distribution for more information.
|
Chris@37
|
15 */
|
Chris@37
|
16
|
Chris@37
|
17 #include "FeatureExtractor.h"
|
Chris@37
|
18
|
Chris@37
|
19 #include <iostream>
|
Chris@37
|
20
|
Chris@37
|
21 #include <cstdlib>
|
Chris@37
|
22 #include <cassert>
|
Chris@37
|
23 #include <cmath>
|
Chris@37
|
24
|
Chris@37
|
25 using namespace std;
|
Chris@37
|
26
|
Chris@166
|
27 #define DEBUG_FEATURE_EXTRACTOR 1
|
Chris@140
|
28
|
Chris@37
|
29 FeatureExtractor::FeatureExtractor(Parameters parameters) :
|
Chris@103
|
30 m_params(parameters)
|
Chris@37
|
31 {
|
Chris@74
|
32 m_featureSize = getFeatureSizeFor(parameters);
|
Chris@37
|
33 makeFreqMap();
|
Chris@140
|
34
|
Chris@140
|
35 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@140
|
36 cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate
|
Chris@140
|
37 << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap
|
Chris@140
|
38 << ", fftSize = " << parameters.fftSize << endl;
|
Chris@140
|
39 #endif
|
Chris@37
|
40 }
|
Chris@37
|
41
|
Chris@74
|
42 int
|
Chris@74
|
43 FeatureExtractor::getFeatureSizeFor(Parameters parameters)
|
Chris@74
|
44 {
|
Chris@74
|
45 if (parameters.useChromaFrequencyMap) {
|
Chris@74
|
46 return 13;
|
Chris@74
|
47 } else {
|
Chris@74
|
48 return 84;
|
Chris@74
|
49 }
|
Chris@74
|
50 }
|
Chris@74
|
51
|
Chris@37
|
52 void
|
Chris@37
|
53 FeatureExtractor::makeFreqMap()
|
Chris@37
|
54 {
|
Chris@37
|
55 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
|
Chris@37
|
56
|
Chris@37
|
57 if (m_params.useChromaFrequencyMap) {
|
Chris@140
|
58 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@37
|
59 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
|
Chris@37
|
60 #endif
|
Chris@37
|
61 makeChromaFrequencyMap();
|
Chris@37
|
62 } else {
|
Chris@140
|
63 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@37
|
64 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
|
Chris@37
|
65 #endif
|
Chris@37
|
66 makeStandardFrequencyMap();
|
Chris@37
|
67 }
|
Chris@37
|
68 }
|
Chris@37
|
69
|
Chris@37
|
70 void
|
Chris@37
|
71 FeatureExtractor::makeStandardFrequencyMap()
|
Chris@37
|
72 {
|
Chris@169
|
73 // Our handling of the referenceFrequency parameter depends on the
|
Chris@169
|
74 // frequency map in use.
|
Chris@169
|
75
|
Chris@169
|
76 // With the chroma frequency map, we use referenceFrequency to set
|
Chris@169
|
77 // up the chroma bin frequencies when constructing the map, and
|
Chris@169
|
78 // then just follow the map (without having to refer to
|
Chris@169
|
79 // referenceFrequency again) when we get the frequency-domain
|
Chris@169
|
80 // audio.
|
Chris@169
|
81
|
Chris@169
|
82 // With the standard frequency map, using referenceFrequency to
|
Chris@169
|
83 // set up the map doesn't work so well -- it only really affects
|
Chris@169
|
84 // the crossover frequency, and much of the useful information is
|
Chris@169
|
85 // below that frequency. What we do instead is to ignore the
|
Chris@169
|
86 // referenceFrequency when creating the map -- setting it up for
|
Chris@169
|
87 // 440Hz -- and then use it to scale the individual
|
Chris@169
|
88 // frequency-domain audio frames before applying the map to them.
|
Chris@169
|
89
|
Chris@169
|
90 double refFreq = 440.; // See above -- *not* the parameter!
|
Chris@37
|
91 double binWidth = m_params.sampleRate / m_params.fftSize;
|
Chris@37
|
92 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
|
Chris@159
|
93 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/
|
Chris@37
|
94 log(2.0) * 12 + 69);
|
Chris@163
|
95
|
Chris@37
|
96 int i = 0;
|
Chris@37
|
97 while (i <= crossoverBin) {
|
Chris@37
|
98 m_freqMap[i] = i;
|
Chris@37
|
99 ++i;
|
Chris@37
|
100 }
|
Chris@37
|
101
|
Chris@37
|
102 while (i <= m_params.fftSize/2) {
|
Chris@159
|
103 double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69;
|
Chris@37
|
104 if (midi > 127) midi = 127;
|
Chris@40
|
105 int target = crossoverBin + lrint(midi) - crossoverMidi;
|
Chris@40
|
106 if (target >= m_featureSize) target = m_featureSize - 1;
|
Chris@40
|
107 m_freqMap[i++] = target;
|
Chris@37
|
108 }
|
Chris@166
|
109
|
Chris@166
|
110 #ifdef DEBUG_FEATURE_EXTRACTOR
|
Chris@166
|
111 cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi "
|
Chris@166
|
112 << crossoverMidi << endl;
|
Chris@166
|
113 #endif
|
Chris@37
|
114 }
|
Chris@37
|
115
|
Chris@37
|
116 void
|
Chris@37
|
117 FeatureExtractor::makeChromaFrequencyMap()
|
Chris@37
|
118 {
|
Chris@159
|
119 double refFreq = m_params.referenceFrequency;
|
Chris@37
|
120 double binWidth = m_params.sampleRate / m_params.fftSize;
|
Chris@37
|
121 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
|
Chris@37
|
122 int i = 0;
|
Chris@37
|
123 while (i <= crossoverBin) {
|
Chris@37
|
124 m_freqMap[i++] = 0;
|
Chris@37
|
125 }
|
Chris@37
|
126 while (i <= m_params.fftSize/2) {
|
Chris@159
|
127 double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69;
|
Chris@37
|
128 m_freqMap[i++] = (lrint(midi)) % 12 + 1;
|
Chris@37
|
129 }
|
Chris@37
|
130 }
|
Chris@37
|
131
|
Chris@37
|
132 vector<double>
|
Chris@37
|
133 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
|
Chris@37
|
134 {
|
Chris@169
|
135 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
|
Chris@169
|
136
|
Chris@169
|
137 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@169
|
138 mags[i] = real[i] * real[i] + imag[i] * imag[i];
|
Chris@169
|
139 }
|
Chris@169
|
140
|
Chris@169
|
141 return processMags(mags);
|
Chris@169
|
142 }
|
Chris@169
|
143
|
Chris@169
|
144 vector<double>
|
Chris@169
|
145 FeatureExtractor::process(const float *cframe)
|
Chris@169
|
146 {
|
Chris@169
|
147 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
|
Chris@169
|
148
|
Chris@169
|
149 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@169
|
150 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
|
Chris@169
|
151 }
|
Chris@169
|
152
|
Chris@169
|
153 return processMags(mags);
|
Chris@169
|
154 }
|
Chris@169
|
155
|
Chris@169
|
156 vector<double>
|
Chris@169
|
157 FeatureExtractor::processMags(const vector<double> &mags)
|
Chris@169
|
158 {
|
Chris@37
|
159 vector<double> frame(m_featureSize, 0.0);
|
Chris@169
|
160
|
Chris@169
|
161 if (!m_params.useChromaFrequencyMap &&
|
Chris@169
|
162 (m_params.referenceFrequency != 440.)) {
|
Chris@169
|
163
|
Chris@169
|
164 // See comment in makeStandardFrequencyMap above
|
Chris@169
|
165 vector<double> scaled = scaleMags(mags);
|
Chris@169
|
166
|
Chris@169
|
167 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@169
|
168 frame[m_freqMap[i]] += scaled[i];
|
Chris@169
|
169 }
|
Chris@169
|
170
|
Chris@169
|
171 } else {
|
Chris@169
|
172 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@169
|
173 frame[m_freqMap[i]] += mags[i];
|
Chris@169
|
174 }
|
Chris@37
|
175 }
|
Chris@37
|
176
|
Chris@103
|
177 return frame;
|
Chris@74
|
178 }
|
Chris@74
|
179
|
Chris@74
|
180 vector<double>
|
Chris@169
|
181 FeatureExtractor::scaleMags(const vector<double> &mags)
|
Chris@74
|
182 {
|
Chris@169
|
183 // Scale the pitch content in the given magnitude spectrum to
|
Chris@169
|
184 // accommodate a difference in tuning frequency (between the 440Hz
|
Chris@169
|
185 // reference and the actual tuning frequency of the input audio).
|
Chris@169
|
186 // We only do this when not using chroma features -- see the
|
Chris@169
|
187 // comment in makeStandardFrequencyMap() above.
|
Chris@169
|
188
|
Chris@169
|
189 if (m_params.useChromaFrequencyMap) return mags;
|
Chris@169
|
190
|
Chris@169
|
191 double ratio = 440. / m_params.referenceFrequency;
|
Chris@169
|
192
|
Chris@169
|
193 int n = mags.size();
|
Chris@169
|
194
|
Chris@169
|
195 vector<double> scaled(n, 0.0);
|
Chris@169
|
196
|
Chris@169
|
197 for (int target = 0; target < n; ++target) {
|
Chris@169
|
198
|
Chris@169
|
199 double source = target / ratio;
|
Chris@169
|
200
|
Chris@169
|
201 int lower = int(source);
|
Chris@169
|
202 int higher = lower + 1;
|
Chris@169
|
203
|
Chris@169
|
204 double lowerProp = higher - source;
|
Chris@169
|
205 double higherProp = source - lower;
|
Chris@169
|
206
|
Chris@169
|
207 double value = 0.0;
|
Chris@169
|
208 if (lower >= 0 && lower < n) {
|
Chris@169
|
209 value += lowerProp * mags[lower];
|
Chris@169
|
210 }
|
Chris@169
|
211 if (higher >= 0 && higher < n) {
|
Chris@169
|
212 value += higherProp * mags[higher];
|
Chris@169
|
213 }
|
Chris@169
|
214
|
Chris@169
|
215 scaled[target] = value;
|
Chris@74
|
216 }
|
Chris@74
|
217
|
Chris@169
|
218 return scaled;
|
Chris@74
|
219 }
|
Chris@74
|
220
|