comparison plugins/SimilarityPlugin.cpp @ 66:12516e68c81e

* Start work on fixes to similarity plugin -- avoid crash when running in chroma mode -- set up chroma to have same blocksize as mfcc -- unfortunately we now have meaningless beat spectra for chroma+rhythm mode -- probably something very trivial, but I don't see what right now
author Chris Cannam <c.cannam@qmul.ac.uk>
date Mon, 03 Mar 2008 18:07:27 +0000
parents 90fa946fda40
children e8e103090d97
comparison
equal deleted inserted replaced
65:851560f7fca2 66:12516e68c81e
176 176
177 } else if (m_type == TypeChroma) { 177 } else if (m_type == TypeChroma) {
178 178
179 m_featureColumnSize = 12; 179 m_featureColumnSize = 12;
180 180
181 // For simplicity, aim to have the chroma fft size equal to
182 // 2048, the same as the mfcc fft size (so the input block
183 // size does not depend on the feature type and we can use the
184 // same processing parameters for rhythm etc). This is also
185 // why getPreferredBlockSize can confidently return 2048 * the
186 // decimation factor.
187
188 // The fft size for a chromagram is the filterbank Q value
189 // times the sample rate, divided by the minimum frequency,
190 // rounded up to the nearest power of two.
191
192 double q = 1.0 / (pow(2.0, (1.0 / 12.0)) - 1.0);
193 double fmin = (q * m_processRate) / 2048.0;
194 // std::cerr << "chroma fmin = " << fmin;
195
196 // Round fmin up to the nearest MIDI pitch multiple of 12.
197 // So long as fmin is greater than 12 to start with, this
198 // should not change the resulting fft size.
199
200 int pmin = Pitch::getPitchForFrequency(float(fmin));
201 pmin = ((pmin / 12) + 1) * 12;
202 fmin = Pitch::getFrequencyForPitch(pmin);
203 // std::cerr << " -> " << fmin << " for pitch " << pmin << std::endl;
204
205 float fmax = Pitch::getFrequencyForPitch(pmin + 36);
206 // std::cerr << "fmax = " << fmax << " for pitch " << (pmin+36) << std::endl;
207
208
181 ChromaConfig config; 209 ChromaConfig config;
182 config.FS = m_processRate; 210 config.FS = m_processRate;
183 config.min = Pitch::getFrequencyForPitch(24, 0, 440); 211 config.min = fmin;
184 config.max = Pitch::getFrequencyForPitch(96, 0, 440); 212 config.max = fmax;
213 // config.min = Pitch::getFrequencyForPitch(24, 0, 440);
214 // config.max = Pitch::getFrequencyForPitch(96, 0, 440);
185 config.BPO = 12; 215 config.BPO = 12;
186 config.CQThresh = 0.0054; 216 config.CQThresh = 0.0054;
187 // We don't normalise the chromagram's columns individually; 217 // We don't normalise the chromagram's columns individually;
188 // we normalise the mean at the end instead 218 // we normalise the mean at the end instead
189 config.normalise = MathUtilities::NormaliseNone; 219 config.normalise = MathUtilities::NormaliseNone;
190 m_chromagram = new Chromagram(config); 220 m_chromagram = new Chromagram(config);
191 m_fftSize = m_chromagram->getFrameSize(); 221 m_fftSize = m_chromagram->getFrameSize();
222
223 if (m_fftSize != 2048) {
224 std::cerr << "WARNING: SimilarityPlugin::initialise: Internal processing FFT size " << m_fftSize << " != expected size 2048 in chroma mode" << std::endl;
225 }
192 226
193 // std::cerr << "fftsize = " << m_fftSize << std::endl; 227 // std::cerr << "fftsize = " << m_fftSize << std::endl;
194 228
195 m_rhythmClipFrameSize = m_fftSize / 16; 229 m_rhythmClipFrameSize = m_fftSize / 4;
196 while (m_rhythmClipFrameSize < 512) m_rhythmClipFrameSize *= 2; 230
231 // m_rhythmClipFrameSize = m_fftSize / 16;
232 // while (m_rhythmClipFrameSize < 512) m_rhythmClipFrameSize *= 2;
233
197 // std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl; 234 // std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl;
198 235
199 // std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl; 236 // std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl;
200 237
201 } else { 238 } else {
288 void 325 void
289 SimilarityPlugin::calculateBlockSize() const 326 SimilarityPlugin::calculateBlockSize() const
290 { 327 {
291 if (m_blockSize != 0) return; 328 if (m_blockSize != 0) return;
292 int decimationFactor = getDecimationFactor(); 329 int decimationFactor = getDecimationFactor();
293 if (m_type == TypeChroma) { 330 m_blockSize = 2048 * decimationFactor;
294 ChromaConfig config;
295 config.FS = m_processRate;
296 config.min = Pitch::getFrequencyForPitch(24, 0, 440);
297 config.max = Pitch::getFrequencyForPitch(96, 0, 440);
298 config.BPO = 12;
299 config.CQThresh = 0.0054;
300 config.normalise = MathUtilities::NormaliseNone;
301 Chromagram *c = new Chromagram(config);
302 size_t sz = c->getFrameSize();
303 delete c;
304 m_blockSize = sz * decimationFactor;
305 } else {
306 m_blockSize = 2048 * decimationFactor;
307 }
308 } 331 }
309 332
310 SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const 333 SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const
311 { 334 {
312 ParameterList list; 335 ParameterList list;
558 m_decimator->process(dblbuf, decbuf); 581 m_decimator->process(dblbuf, decbuf);
559 } 582 }
560 583
561 if (needTimbre()) { 584 if (needTimbre()) {
562 585
586 FeatureColumn mf(m_featureColumnSize);
587
563 if (m_type == TypeMFCC) { 588 if (m_type == TypeMFCC) {
564 m_mfcc->process(decbuf, raw); 589 m_mfcc->process(decbuf, raw);
590 for (int i = 0; i < m_featureColumnSize; ++i) {
591 mf[i] = raw[i];
592 }
565 } else if (m_type == TypeChroma) { 593 } else if (m_type == TypeChroma) {
566 raw = m_chromagram->process(decbuf); 594 double *chroma = m_chromagram->process(decbuf);
595 for (int i = 0; i < m_featureColumnSize; ++i) {
596 mf[i] = chroma[i];
597 }
567 } 598 }
568 599
569 FeatureColumn mf(m_featureColumnSize);
570 for (int i = 0; i < m_featureColumnSize; ++i) {
571 mf[i] = raw[i];
572 }
573
574 m_values[c].push_back(mf); 600 m_values[c].push_back(mf);
575 } 601 }
576 602
577 // std::cerr << "needRhythm = " << needRhythm() << ", frame = " << m_frameNo << std::endl; 603 // std::cerr << "needRhythm = " << needRhythm() << ", frame = " << m_frameNo << std::endl;
578 604