Mercurial > hg > qm-vamp-plugins
comparison plugins/SimilarityPlugin.cpp @ 66:12516e68c81e
* Start work on fixes to similarity plugin -- avoid crash when running in
chroma mode -- set up chroma to have same blocksize as mfcc -- unfortunately
we now have meaningless beat spectra for chroma+rhythm mode -- probably
something very trivial, but I don't see what right now
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Mon, 03 Mar 2008 18:07:27 +0000 |
parents | 90fa946fda40 |
children | e8e103090d97 |
comparison
equal
deleted
inserted
replaced
65:851560f7fca2 | 66:12516e68c81e |
---|---|
176 | 176 |
177 } else if (m_type == TypeChroma) { | 177 } else if (m_type == TypeChroma) { |
178 | 178 |
179 m_featureColumnSize = 12; | 179 m_featureColumnSize = 12; |
180 | 180 |
181 // For simplicity, aim to have the chroma fft size equal to | |
182 // 2048, the same as the mfcc fft size (so the input block | |
183 // size does not depend on the feature type and we can use the | |
184 // same processing parameters for rhythm etc). This is also | |
185 // why getPreferredBlockSize can confidently return 2048 * the | |
186 // decimation factor. | |
187 | |
188 // The fft size for a chromagram is the filterbank Q value | |
189 // times the sample rate, divided by the minimum frequency, | |
190 // rounded up to the nearest power of two. | |
191 | |
192 double q = 1.0 / (pow(2.0, (1.0 / 12.0)) - 1.0); | |
193 double fmin = (q * m_processRate) / 2048.0; | |
194 // std::cerr << "chroma fmin = " << fmin; | |
195 | |
196 // Round fmin up to the nearest MIDI pitch multiple of 12. | |
197 // So long as fmin is greater than 12 to start with, this | |
198 // should not change the resulting fft size. | |
199 | |
200 int pmin = Pitch::getPitchForFrequency(float(fmin)); | |
201 pmin = ((pmin / 12) + 1) * 12; | |
202 fmin = Pitch::getFrequencyForPitch(pmin); | |
203 // std::cerr << " -> " << fmin << " for pitch " << pmin << std::endl; | |
204 | |
205 float fmax = Pitch::getFrequencyForPitch(pmin + 36); | |
206 // std::cerr << "fmax = " << fmax << " for pitch " << (pmin+36) << std::endl; | |
207 | |
208 | |
181 ChromaConfig config; | 209 ChromaConfig config; |
182 config.FS = m_processRate; | 210 config.FS = m_processRate; |
183 config.min = Pitch::getFrequencyForPitch(24, 0, 440); | 211 config.min = fmin; |
184 config.max = Pitch::getFrequencyForPitch(96, 0, 440); | 212 config.max = fmax; |
213 // config.min = Pitch::getFrequencyForPitch(24, 0, 440); | |
214 // config.max = Pitch::getFrequencyForPitch(96, 0, 440); | |
185 config.BPO = 12; | 215 config.BPO = 12; |
186 config.CQThresh = 0.0054; | 216 config.CQThresh = 0.0054; |
187 // We don't normalise the chromagram's columns individually; | 217 // We don't normalise the chromagram's columns individually; |
188 // we normalise the mean at the end instead | 218 // we normalise the mean at the end instead |
189 config.normalise = MathUtilities::NormaliseNone; | 219 config.normalise = MathUtilities::NormaliseNone; |
190 m_chromagram = new Chromagram(config); | 220 m_chromagram = new Chromagram(config); |
191 m_fftSize = m_chromagram->getFrameSize(); | 221 m_fftSize = m_chromagram->getFrameSize(); |
222 | |
223 if (m_fftSize != 2048) { | |
224 std::cerr << "WARNING: SimilarityPlugin::initialise: Internal processing FFT size " << m_fftSize << " != expected size 2048 in chroma mode" << std::endl; | |
225 } | |
192 | 226 |
193 // std::cerr << "fftsize = " << m_fftSize << std::endl; | 227 // std::cerr << "fftsize = " << m_fftSize << std::endl; |
194 | 228 |
195 m_rhythmClipFrameSize = m_fftSize / 16; | 229 m_rhythmClipFrameSize = m_fftSize / 4; |
196 while (m_rhythmClipFrameSize < 512) m_rhythmClipFrameSize *= 2; | 230 |
231 // m_rhythmClipFrameSize = m_fftSize / 16; | |
232 // while (m_rhythmClipFrameSize < 512) m_rhythmClipFrameSize *= 2; | |
233 | |
197 // std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl; | 234 // std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl; |
198 | 235 |
199 // std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl; | 236 // std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl; |
200 | 237 |
201 } else { | 238 } else { |
288 void | 325 void |
289 SimilarityPlugin::calculateBlockSize() const | 326 SimilarityPlugin::calculateBlockSize() const |
290 { | 327 { |
291 if (m_blockSize != 0) return; | 328 if (m_blockSize != 0) return; |
292 int decimationFactor = getDecimationFactor(); | 329 int decimationFactor = getDecimationFactor(); |
293 if (m_type == TypeChroma) { | 330 m_blockSize = 2048 * decimationFactor; |
294 ChromaConfig config; | |
295 config.FS = m_processRate; | |
296 config.min = Pitch::getFrequencyForPitch(24, 0, 440); | |
297 config.max = Pitch::getFrequencyForPitch(96, 0, 440); | |
298 config.BPO = 12; | |
299 config.CQThresh = 0.0054; | |
300 config.normalise = MathUtilities::NormaliseNone; | |
301 Chromagram *c = new Chromagram(config); | |
302 size_t sz = c->getFrameSize(); | |
303 delete c; | |
304 m_blockSize = sz * decimationFactor; | |
305 } else { | |
306 m_blockSize = 2048 * decimationFactor; | |
307 } | |
308 } | 331 } |
309 | 332 |
310 SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const | 333 SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const |
311 { | 334 { |
312 ParameterList list; | 335 ParameterList list; |
558 m_decimator->process(dblbuf, decbuf); | 581 m_decimator->process(dblbuf, decbuf); |
559 } | 582 } |
560 | 583 |
561 if (needTimbre()) { | 584 if (needTimbre()) { |
562 | 585 |
586 FeatureColumn mf(m_featureColumnSize); | |
587 | |
563 if (m_type == TypeMFCC) { | 588 if (m_type == TypeMFCC) { |
564 m_mfcc->process(decbuf, raw); | 589 m_mfcc->process(decbuf, raw); |
590 for (int i = 0; i < m_featureColumnSize; ++i) { | |
591 mf[i] = raw[i]; | |
592 } | |
565 } else if (m_type == TypeChroma) { | 593 } else if (m_type == TypeChroma) { |
566 raw = m_chromagram->process(decbuf); | 594 double *chroma = m_chromagram->process(decbuf); |
595 for (int i = 0; i < m_featureColumnSize; ++i) { | |
596 mf[i] = chroma[i]; | |
597 } | |
567 } | 598 } |
568 | 599 |
569 FeatureColumn mf(m_featureColumnSize); | |
570 for (int i = 0; i < m_featureColumnSize; ++i) { | |
571 mf[i] = raw[i]; | |
572 } | |
573 | |
574 m_values[c].push_back(mf); | 600 m_values[c].push_back(mf); |
575 } | 601 } |
576 | 602 |
577 // std::cerr << "needRhythm = " << needRhythm() << ", frame = " << m_frameNo << std::endl; | 603 // std::cerr << "needRhythm = " << needRhythm() << ", frame = " << m_frameNo << std::endl; |
578 | 604 |