comparison plugins/SimilarityPlugin.cpp @ 68:68f181553123

* bit of tidying
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 05 Mar 2008 12:26:11 +0000
parents e8e103090d97
children 4a354c18e688
comparison
equal deleted inserted replaced
67:e8e103090d97 68:68f181553123
115 115
116 size_t 116 size_t
117 SimilarityPlugin::getMaxChannelCount() const 117 SimilarityPlugin::getMaxChannelCount() const
118 { 118 {
119 return 1024; 119 return 1024;
120 }
121
122 bool
123 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
124 {
125 if (channels < getMinChannelCount()) return false;
126
127 // Using more than getMaxChannelCount is not actually a problem
128 // for us. Using "incorrect" step and block sizes would be fine
129 // for timbral or chroma similarity, but will break rhythmic
130 // similarity, so we'd better enforce these.
131
132 if (stepSize != getPreferredStepSize()) {
133 std::cerr << "SimilarityPlugin::initialise: supplied step size "
134 << stepSize << " differs from required step size "
135 << getPreferredStepSize() << std::endl;
136 return false;
137 }
138
139 if (blockSize != getPreferredBlockSize()) {
140 std::cerr << "SimilarityPlugin::initialise: supplied block size "
141 << blockSize << " differs from required block size "
142 << getPreferredBlockSize() << std::endl;
143 return false;
144 }
145
146 m_blockSize = blockSize;
147 m_channels = channels;
148
149 m_lastNonEmptyFrame = std::vector<int>(m_channels);
150 for (int i = 0; i < m_channels; ++i) m_lastNonEmptyFrame[i] = -1;
151
152 m_emptyFrameCount = std::vector<int>(m_channels);
153 for (int i = 0; i < m_channels; ++i) m_emptyFrameCount[i] = 0;
154
155 m_frameNo = 0;
156
157 int decimationFactor = getDecimationFactor();
158 if (decimationFactor > 1) {
159 m_decimator = new Decimator(m_blockSize, decimationFactor);
160 }
161
162 if (m_type == TypeMFCC) {
163
164 m_featureColumnSize = 20;
165
166 MFCCConfig config(m_processRate);
167 config.fftsize = 2048;
168 config.nceps = m_featureColumnSize - 1;
169 config.want_c0 = true;
170 config.logpower = 1;
171 m_mfcc = new MFCC(config);
172 m_fftSize = m_mfcc->getfftlength();
173 m_rhythmClipFrameSize = m_fftSize / 4;
174
175 // std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl;
176
177 } else if (m_type == TypeChroma) {
178
179 m_featureColumnSize = 12;
180
181 // For simplicity, aim to have the chroma fft size equal to
182 // 2048, the same as the mfcc fft size (so the input block
183 // size does not depend on the feature type and we can use the
184 // same processing parameters for rhythm etc). This is also
185 // why getPreferredBlockSize can confidently return 2048 * the
186 // decimation factor.
187
188 // The fft size for a chromagram is the filterbank Q value
189 // times the sample rate, divided by the minimum frequency,
190 // rounded up to the nearest power of two.
191
192 double q = 1.0 / (pow(2.0, (1.0 / 12.0)) - 1.0);
193 double fmin = (q * m_processRate) / 2048.0;
194 // std::cerr << "chroma fmin = " << fmin;
195
196 // Round fmin up to the nearest MIDI pitch multiple of 12.
197 // So long as fmin is greater than 12 to start with, this
198 // should not change the resulting fft size.
199
200 int pmin = Pitch::getPitchForFrequency(float(fmin));
201 pmin = ((pmin / 12) + 1) * 12;
202 fmin = Pitch::getFrequencyForPitch(pmin);
203 // std::cerr << " -> " << fmin << " for pitch " << pmin << std::endl;
204
205 float fmax = Pitch::getFrequencyForPitch(pmin + 36);
206 // std::cerr << "fmax = " << fmax << " for pitch " << (pmin+36) << std::endl;
207
208
209 ChromaConfig config;
210 config.FS = m_processRate;
211 config.min = fmin;
212 config.max = fmax;
213 // config.min = Pitch::getFrequencyForPitch(24, 0, 440);
214 // config.max = Pitch::getFrequencyForPitch(96, 0, 440);
215 config.BPO = 12;
216 config.CQThresh = 0.0054;
217 // We don't normalise the chromagram's columns individually;
218 // we normalise the mean at the end instead
219 config.normalise = MathUtilities::NormaliseNone;
220 m_chromagram = new Chromagram(config);
221 m_fftSize = m_chromagram->getFrameSize();
222
223 if (m_fftSize != 2048) {
224 std::cerr << "WARNING: SimilarityPlugin::initialise: Internal processing FFT size " << m_fftSize << " != expected size 2048 in chroma mode" << std::endl;
225 }
226
227 std::cerr << "fftsize = " << m_fftSize << std::endl;
228
229 m_rhythmClipFrameSize = m_fftSize / 4;
230
231 // m_rhythmClipFrameSize = m_fftSize / 16;
232 // while (m_rhythmClipFrameSize < 512) m_rhythmClipFrameSize *= 2;
233
234 std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl;
235
236 std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl;
237
238 } else {
239
240 std::cerr << "SimilarityPlugin::initialise: internal error: unknown type " << m_type << std::endl;
241 return false;
242 }
243
244 if (needRhythm()) {
245 m_rhythmClipFrames =
246 int(ceil((m_rhythmClipDuration * m_processRate)
247 / m_rhythmClipFrameSize));
248 std::cerr << "SimilarityPlugin::initialise: rhythm clip requires "
249 << m_rhythmClipFrames << " frames of size "
250 << m_rhythmClipFrameSize << " at process rate "
251 << m_processRate << " ( = "
252 << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )"
253 << std::endl;
254
255 MFCCConfig config(m_processRate);
256 config.fftsize = m_rhythmClipFrameSize;
257 config.nceps = m_rhythmColumnSize - 1;
258 config.want_c0 = true;
259 config.logpower = 1;
260 config.window = RectangularWindow; // because no overlap
261 m_rhythmfcc = new MFCC(config);
262 }
263
264 for (int i = 0; i < m_channels; ++i) {
265
266 m_values.push_back(FeatureMatrix());
267
268 if (needRhythm()) {
269 m_rhythmValues.push_back(FeatureColumnQueue());
270 }
271 }
272
273 m_done = false;
274
275 return true;
276 }
277
278 void
279 SimilarityPlugin::reset()
280 {
281 for (int i = 0; i < m_values.size(); ++i) {
282 m_values[i].clear();
283 }
284
285 for (int i = 0; i < m_rhythmValues.size(); ++i) {
286 m_rhythmValues[i].clear();
287 }
288
289 for (int i = 0; i < m_lastNonEmptyFrame.size(); ++i) {
290 m_lastNonEmptyFrame[i] = -1;
291 }
292
293 for (int i = 0; i < m_emptyFrameCount.size(); ++i) {
294 m_emptyFrameCount[i] = 0;
295 }
296
297 m_done = false;
298 } 120 }
299 121
300 int 122 int
301 SimilarityPlugin::getDecimationFactor() const 123 SimilarityPlugin::getDecimationFactor() const
302 { 124 {
529 list.push_back(beatspectrum); 351 list.push_back(beatspectrum);
530 352
531 return list; 353 return list;
532 } 354 }
533 355
356 bool
357 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
358 {
359 if (channels < getMinChannelCount()) return false;
360
361 // Using more than getMaxChannelCount is not actually a problem
362 // for us. Using "incorrect" step and block sizes would be fine
363 // for timbral or chroma similarity, but will break rhythmic
364 // similarity, so we'd better enforce these.
365
366 if (stepSize != getPreferredStepSize()) {
367 std::cerr << "SimilarityPlugin::initialise: supplied step size "
368 << stepSize << " differs from required step size "
369 << getPreferredStepSize() << std::endl;
370 return false;
371 }
372
373 if (blockSize != getPreferredBlockSize()) {
374 std::cerr << "SimilarityPlugin::initialise: supplied block size "
375 << blockSize << " differs from required block size "
376 << getPreferredBlockSize() << std::endl;
377 return false;
378 }
379
380 m_blockSize = blockSize;
381 m_channels = channels;
382
383 m_lastNonEmptyFrame = std::vector<int>(m_channels);
384 for (int i = 0; i < m_channels; ++i) m_lastNonEmptyFrame[i] = -1;
385
386 m_emptyFrameCount = std::vector<int>(m_channels);
387 for (int i = 0; i < m_channels; ++i) m_emptyFrameCount[i] = 0;
388
389 m_frameNo = 0;
390
391 int decimationFactor = getDecimationFactor();
392 if (decimationFactor > 1) {
393 m_decimator = new Decimator(m_blockSize, decimationFactor);
394 }
395
396 if (m_type == TypeMFCC) {
397
398 m_featureColumnSize = 20;
399
400 MFCCConfig config(m_processRate);
401 config.fftsize = 2048;
402 config.nceps = m_featureColumnSize - 1;
403 config.want_c0 = true;
404 config.logpower = 1;
405 m_mfcc = new MFCC(config);
406 m_fftSize = m_mfcc->getfftlength();
407 m_rhythmClipFrameSize = m_fftSize / 4;
408
409 // std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl;
410
411 } else if (m_type == TypeChroma) {
412
413 m_featureColumnSize = 12;
414
415 // For simplicity, aim to have the chroma fft size equal to
416 // 2048, the same as the mfcc fft size (so the input block
417 // size does not depend on the feature type and we can use the
418 // same processing parameters for rhythm etc). This is also
419 // why getPreferredBlockSize can confidently return 2048 * the
420 // decimation factor.
421
422 // The fft size for a chromagram is the filterbank Q value
423 // times the sample rate, divided by the minimum frequency,
424 // rounded up to the nearest power of two.
425
426 double q = 1.0 / (pow(2.0, (1.0 / 12.0)) - 1.0);
427 double fmin = (q * m_processRate) / 2048.0;
428
429 // Round fmin up to the nearest MIDI pitch multiple of 12.
430 // So long as fmin is greater than 12 to start with, this
431 // should not change the resulting fft size.
432
433 int pmin = Pitch::getPitchForFrequency(float(fmin));
434 pmin = ((pmin / 12) + 1) * 12;
435 fmin = Pitch::getFrequencyForPitch(pmin);
436
437 float fmax = Pitch::getFrequencyForPitch(pmin + 36);
438
439 ChromaConfig config;
440 config.FS = m_processRate;
441 config.min = fmin;
442 config.max = fmax;
443 config.BPO = 12;
444 config.CQThresh = 0.0054;
445 // We don't normalise the chromagram's columns individually;
446 // we normalise the mean at the end instead
447 config.normalise = MathUtilities::NormaliseNone;
448 m_chromagram = new Chromagram(config);
449 m_fftSize = m_chromagram->getFrameSize();
450
451 if (m_fftSize != 2048) {
452 std::cerr << "WARNING: SimilarityPlugin::initialise: Internal processing FFT size " << m_fftSize << " != expected size 2048 in chroma mode" << std::endl;
453 }
454
455 // std::cerr << "fftsize = " << m_fftSize << std::endl;
456
457 m_rhythmClipFrameSize = m_fftSize / 4;
458
459 // std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl;
460 // std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl;
461
462 } else {
463
464 std::cerr << "SimilarityPlugin::initialise: internal error: unknown type " << m_type << std::endl;
465 return false;
466 }
467
468 if (needRhythm()) {
469 m_rhythmClipFrames =
470 int(ceil((m_rhythmClipDuration * m_processRate)
471 / m_rhythmClipFrameSize));
472 // std::cerr << "SimilarityPlugin::initialise: rhythm clip requires "
473 // << m_rhythmClipFrames << " frames of size "
474 // << m_rhythmClipFrameSize << " at process rate "
475 // << m_processRate << " ( = "
476 // << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )"
477 // << std::endl;
478
479 MFCCConfig config(m_processRate);
480 config.fftsize = m_rhythmClipFrameSize;
481 config.nceps = m_rhythmColumnSize - 1;
482 config.want_c0 = true;
483 config.logpower = 1;
484 config.window = RectangularWindow; // because no overlap
485 m_rhythmfcc = new MFCC(config);
486 }
487
488 for (int i = 0; i < m_channels; ++i) {
489
490 m_values.push_back(FeatureMatrix());
491
492 if (needRhythm()) {
493 m_rhythmValues.push_back(FeatureColumnQueue());
494 }
495 }
496
497 m_done = false;
498
499 return true;
500 }
501
502 void
503 SimilarityPlugin::reset()
504 {
505 for (int i = 0; i < m_values.size(); ++i) {
506 m_values[i].clear();
507 }
508
509 for (int i = 0; i < m_rhythmValues.size(); ++i) {
510 m_rhythmValues[i].clear();
511 }
512
513 for (int i = 0; i < m_lastNonEmptyFrame.size(); ++i) {
514 m_lastNonEmptyFrame[i] = -1;
515 }
516
517 for (int i = 0; i < m_emptyFrameCount.size(); ++i) {
518 m_emptyFrameCount[i] = 0;
519 }
520
521 m_done = false;
522 }
523
534 SimilarityPlugin::FeatureSet 524 SimilarityPlugin::FeatureSet
535 SimilarityPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */) 525 SimilarityPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */)
536 { 526 {
537 if (m_done) { 527 if (m_done) {
538 return FeatureSet(); 528 return FeatureSet();