Mercurial > hg > match-vamp
comparison src/FeatureExtractor.cpp @ 169:001db4c32eb0 tuning-rescale
Alternative handling of reference frequency parameter -- scale the whole spectrum, don't just adjust the semitone bins above the crossover freq
author | Chris Cannam |
---|---|
date | Thu, 05 Feb 2015 16:26:41 +0000 |
parents | d23dad16d6f9 |
children | 1440773da492 |
comparison
equal
deleted
inserted
replaced
166:d23dad16d6f9 | 169:001db4c32eb0 |
---|---|
68 } | 68 } |
69 | 69 |
70 void | 70 void |
71 FeatureExtractor::makeStandardFrequencyMap() | 71 FeatureExtractor::makeStandardFrequencyMap() |
72 { | 72 { |
73 double refFreq = m_params.referenceFrequency; | 73 // Our handling of the referenceFrequency parameter depends on the |
74 // frequency map in use. | |
75 | |
76 // With the chroma frequency map, we use referenceFrequency to set | |
77 // up the chroma bin frequencies when constructing the map, and | |
78 // then just follow the map (without having to refer to | |
79 // referenceFrequency again) when we get the frequency-domain | |
80 // audio. | |
81 | |
82 // With the standard frequency map, using referenceFrequency to | |
83 // set up the map doesn't work so well -- it only really affects | |
84 // the crossover frequency, and much of the useful information is | |
85 // below that frequency. What we do instead is to ignore the | |
86 // referenceFrequency when creating the map -- setting it up for | |
87 // 440Hz -- and then use it to scale the individual | |
88 // frequency-domain audio frames before applying the map to them. | |
89 | |
90 double refFreq = 440.; // See above -- *not* the parameter! | |
74 double binWidth = m_params.sampleRate / m_params.fftSize; | 91 double binWidth = m_params.sampleRate / m_params.fftSize; |
75 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); | 92 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); |
76 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/ | 93 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/ |
77 log(2.0) * 12 + 69); | 94 log(2.0) * 12 + 69); |
78 | 95 |
79 #ifdef DEBUG_FEATURE_EXTRACTOR | |
80 cerr << "FeatureExtractor::makeStandardFrequencyMap: refFreq = " << refFreq << endl; | |
81 #endif | |
82 | |
83 int i = 0; | 96 int i = 0; |
84 while (i <= crossoverBin) { | 97 while (i <= crossoverBin) { |
85 m_freqMap[i] = i; | 98 m_freqMap[i] = i; |
86 ++i; | 99 ++i; |
87 } | 100 } |
117 } | 130 } |
118 | 131 |
119 vector<double> | 132 vector<double> |
120 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag) | 133 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag) |
121 { | 134 { |
135 vector<double> mags(m_params.fftSize/2 + 1, 0.0); | |
136 | |
137 for (int i = 0; i <= m_params.fftSize/2; i++) { | |
138 mags[i] = real[i] * real[i] + imag[i] * imag[i]; | |
139 } | |
140 | |
141 return processMags(mags); | |
142 } | |
143 | |
144 vector<double> | |
145 FeatureExtractor::process(const float *cframe) | |
146 { | |
147 vector<double> mags(m_params.fftSize/2 + 1, 0.0); | |
148 | |
149 for (int i = 0; i <= m_params.fftSize/2; i++) { | |
150 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1]; | |
151 } | |
152 | |
153 return processMags(mags); | |
154 } | |
155 | |
156 vector<double> | |
157 FeatureExtractor::processMags(const vector<double> &mags) | |
158 { | |
122 vector<double> frame(m_featureSize, 0.0); | 159 vector<double> frame(m_featureSize, 0.0); |
123 | 160 |
124 for (int i = 0; i <= m_params.fftSize/2; i++) { | 161 if (!m_params.useChromaFrequencyMap && |
125 double mag = real[i] * real[i] + imag[i] * imag[i]; | 162 (m_params.referenceFrequency != 440.)) { |
126 frame[m_freqMap[i]] += mag; | 163 |
164 // See comment in makeStandardFrequencyMap above | |
165 vector<double> scaled = scaleMags(mags); | |
166 | |
167 for (int i = 0; i <= m_params.fftSize/2; i++) { | |
168 frame[m_freqMap[i]] += scaled[i]; | |
169 } | |
170 | |
171 } else { | |
172 for (int i = 0; i <= m_params.fftSize/2; i++) { | |
173 frame[m_freqMap[i]] += mags[i]; | |
174 } | |
127 } | 175 } |
128 | 176 |
129 return frame; | 177 return frame; |
130 } | 178 } |
131 | 179 |
132 vector<double> | 180 vector<double> |
133 FeatureExtractor::process(const float *cframe) | 181 FeatureExtractor::scaleMags(const vector<double> &mags) |
134 { | 182 { |
135 vector<double> frame(m_featureSize, 0.0); | 183 // Scale the pitch content in the given magnitude spectrum to |
136 | 184 // accommodate a difference in tuning frequency (between the 440Hz |
137 for (int i = 0; i <= m_params.fftSize/2; i++) { | 185 // reference and the actual tuning frequency of the input audio). |
138 double mag = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1]; | 186 // We only do this when not using chroma features -- see the |
139 frame[m_freqMap[i]] += mag; | 187 // comment in makeStandardFrequencyMap() above. |
140 } | 188 |
141 | 189 if (m_params.useChromaFrequencyMap) return mags; |
142 return frame; | 190 |
143 } | 191 double ratio = 440. / m_params.referenceFrequency; |
144 | 192 |
193 int n = mags.size(); | |
194 | |
195 vector<double> scaled(n, 0.0); | |
196 | |
197 for (int target = 0; target < n; ++target) { | |
198 | |
199 double source = target / ratio; | |
200 | |
201 int lower = int(source); | |
202 int higher = lower + 1; | |
203 | |
204 double lowerProp = higher - source; | |
205 double higherProp = source - lower; | |
206 | |
207 double value = 0.0; | |
208 if (lower >= 0 && lower < n) { | |
209 value += lowerProp * mags[lower]; | |
210 } | |
211 if (higher >= 0 && higher < n) { | |
212 value += higherProp * mags[higher]; | |
213 } | |
214 | |
215 scaled[target] = value; | |
216 } | |
217 | |
218 return scaled; | |
219 } | |
220 |