Mercurial > hg > qm-vamp-plugins
comparison plugins/SimilarityPlugin.cpp @ 48:3b4572153ce3
* Similarity -> single user control rather than separate weighting
* Key detector -> correct reported min/max values for outputs
* Start some documentation
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Mon, 21 Jan 2008 18:05:28 +0000 |
parents | f8c5f11e60a6 |
children | fc88b465548a |
comparison
equal
deleted
inserted
replaced
47:f8c5f11e60a6 | 48:3b4572153ce3 |
---|---|
37 m_mfcc(0), | 37 m_mfcc(0), |
38 m_rhythmfcc(0), | 38 m_rhythmfcc(0), |
39 m_chromagram(0), | 39 m_chromagram(0), |
40 m_decimator(0), | 40 m_decimator(0), |
41 m_featureColumnSize(20), | 41 m_featureColumnSize(20), |
42 m_rhythmWeighting(0.f), | 42 m_rhythmWeighting(0.5f), |
43 m_rhythmClipDuration(4.f), // seconds | 43 m_rhythmClipDuration(4.f), // seconds |
44 m_rhythmClipOrigin(40.f), // seconds | 44 m_rhythmClipOrigin(40.f), // seconds |
45 m_rhythmClipFrameSize(0), | 45 m_rhythmClipFrameSize(0), |
46 m_rhythmClipFrames(0), | 46 m_rhythmClipFrames(0), |
47 m_rhythmColumnSize(20), | 47 m_rhythmColumnSize(20), |
288 ParameterList list; | 288 ParameterList list; |
289 | 289 |
290 ParameterDescriptor desc; | 290 ParameterDescriptor desc; |
291 desc.identifier = "featureType"; | 291 desc.identifier = "featureType"; |
292 desc.name = "Feature Type"; | 292 desc.name = "Feature Type"; |
293 desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma."; | 293 desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma. Rhythmic: compare beat spectra of short regions."; |
294 desc.unit = ""; | 294 desc.unit = ""; |
295 desc.minValue = 0; | 295 desc.minValue = 0; |
296 desc.maxValue = 1; | 296 desc.maxValue = 4; |
297 desc.defaultValue = 0; | 297 desc.defaultValue = 1; |
298 desc.isQuantized = true; | 298 desc.isQuantized = true; |
299 desc.quantizeStep = 1; | 299 desc.quantizeStep = 1; |
300 desc.valueNames.push_back("Timbral (MFCC)"); | 300 desc.valueNames.push_back("Timbre"); |
301 desc.valueNames.push_back("Chromatic (Chroma)"); | 301 desc.valueNames.push_back("Timbre and Rhythm"); |
302 desc.valueNames.push_back("Chroma"); | |
303 desc.valueNames.push_back("Chroma and Rhythm"); | |
304 desc.valueNames.push_back("Rhythm only"); | |
302 list.push_back(desc); | 305 list.push_back(desc); |
303 | 306 /* |
304 desc.identifier = "rhythmWeighting"; | 307 desc.identifier = "rhythmWeighting"; |
305 desc.name = "Influence of Rhythm"; | 308 desc.name = "Influence of Rhythm"; |
306 desc.description = "Proportion of similarity measure made up from rhythmic similarity component, from 0 (entirely timbral or chromatic) to 100 (entirely rhythmic)."; | 309 desc.description = "Proportion of similarity measure made up from rhythmic similarity component, from 0 (entirely timbral or chromatic) to 100 (entirely rhythmic)."; |
307 desc.unit = "%"; | 310 desc.unit = "%"; |
308 desc.minValue = 0; | 311 desc.minValue = 0; |
309 desc.maxValue = 100; | 312 desc.maxValue = 100; |
310 desc.defaultValue = 0; | 313 desc.defaultValue = 0; |
311 desc.isQuantized = true; | 314 desc.isQuantized = false; |
312 desc.quantizeStep = 1; | |
313 desc.valueNames.clear(); | 315 desc.valueNames.clear(); |
314 list.push_back(desc); | 316 list.push_back(desc); |
315 | 317 */ |
316 return list; | 318 return list; |
317 } | 319 } |
318 | 320 |
319 float | 321 float |
320 SimilarityPlugin::getParameter(std::string param) const | 322 SimilarityPlugin::getParameter(std::string param) const |
321 { | 323 { |
322 if (param == "featureType") { | 324 if (param == "featureType") { |
323 if (m_type == TypeMFCC) return 0; | 325 |
324 else if (m_type == TypeChroma) return 1; | 326 if (m_rhythmWeighting > m_allRhythm) { |
325 else return 0; | 327 return 4; |
326 } else if (param == "rhythmWeighting") { | 328 } |
327 return nearbyint(m_rhythmWeighting * 100.0); | 329 |
330 switch (m_type) { | |
331 | |
332 case TypeMFCC: | |
333 if (m_rhythmWeighting < m_noRhythm) return 0; | |
334 else return 1; | |
335 break; | |
336 | |
337 case TypeChroma: | |
338 if (m_rhythmWeighting < m_noRhythm) return 2; | |
339 else return 3; | |
340 break; | |
341 } | |
342 | |
343 return 1; | |
344 | |
345 // } else if (param == "rhythmWeighting") { | |
346 // return nearbyint(m_rhythmWeighting * 100.0); | |
328 } | 347 } |
329 | 348 |
330 std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \"" | 349 std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \"" |
331 << param << "\"" << std::endl; | 350 << param << "\"" << std::endl; |
332 return 0.0; | 351 return 0.0; |
334 | 353 |
335 void | 354 void |
336 SimilarityPlugin::setParameter(std::string param, float value) | 355 SimilarityPlugin::setParameter(std::string param, float value) |
337 { | 356 { |
338 if (param == "featureType") { | 357 if (param == "featureType") { |
358 | |
339 int v = int(value + 0.1); | 359 int v = int(value + 0.1); |
340 Type prevType = m_type; | 360 |
341 if (v == 0) m_type = TypeMFCC; | 361 Type newType = m_type; |
342 else if (v == 1) m_type = TypeChroma; | 362 |
343 if (m_type != prevType) m_blockSize = 0; | 363 switch (v) { |
364 case 0: newType = TypeMFCC; m_rhythmWeighting = 0.0f; break; | |
365 case 1: newType = TypeMFCC; m_rhythmWeighting = 0.5f; break; | |
366 case 2: newType = TypeChroma; m_rhythmWeighting = 0.0f; break; | |
367 case 3: newType = TypeChroma; m_rhythmWeighting = 0.5f; break; | |
368 case 4: newType = TypeMFCC; m_rhythmWeighting = 1.f; break; | |
369 } | |
370 | |
371 if (newType != m_type) m_blockSize = 0; | |
372 | |
373 m_type = newType; | |
344 return; | 374 return; |
345 } else if (param == "rhythmWeighting") { | 375 |
346 m_rhythmWeighting = value / 100; | 376 // } else if (param == "rhythmWeighting") { |
347 return; | 377 // m_rhythmWeighting = value / 100; |
378 // return; | |
348 } | 379 } |
349 | 380 |
350 std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \"" | 381 std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \"" |
351 << param << "\"" << std::endl; | 382 << param << "\"" << std::endl; |
352 } | 383 } |
627 | 658 |
628 m[i] = mean; | 659 m[i] = mean; |
629 v[i] = variance; | 660 v[i] = variance; |
630 } | 661 } |
631 | 662 |
632 // "Despite the fact that MFCCs extracted from music are clearly | |
633 // not Gaussian, [14] showed, somewhat surprisingly, that a | |
634 // similarity function comparing single Gaussians modelling MFCCs | |
635 // for each track can perform as well as mixture models. A great | |
636 // advantage of using single Gaussians is that a simple closed | |
637 // form exists for the KL divergence." -- Mark Levy, "Lightweight | |
638 // measures for timbral similarity of musical audio" | |
639 // (http://www.elec.qmul.ac.uk/easaier/papers/mlevytimbralsimilarity.pdf) | |
640 | |
641 KLDivergence kld; | |
642 FeatureMatrix distances(m_channels); | 663 FeatureMatrix distances(m_channels); |
643 | 664 |
644 for (int i = 0; i < m_channels; ++i) { | 665 if (m_type == TypeMFCC) { |
645 for (int j = 0; j < m_channels; ++j) { | 666 |
646 double d = kld.distance(m[i], v[i], m[j], v[j]); | 667 // "Despite the fact that MFCCs extracted from music are |
647 distances[i].push_back(d); | 668 // clearly not Gaussian, [14] showed, somewhat surprisingly, |
669 // that a similarity function comparing single Gaussians | |
670 // modelling MFCCs for each track can perform as well as | |
671 // mixture models. A great advantage of using single | |
672 // Gaussians is that a simple closed form exists for the KL | |
673 // divergence." -- Mark Levy, "Lightweight measures for | |
674 // timbral similarity of musical audio" | |
675 // (http://www.elec.qmul.ac.uk/easaier/papers/mlevytimbralsimilarity.pdf) | |
676 | |
677 KLDivergence kld; | |
678 | |
679 for (int i = 0; i < m_channels; ++i) { | |
680 for (int j = 0; j < m_channels; ++j) { | |
681 double d = kld.distanceGaussian(m[i], v[i], m[j], v[j]); | |
682 distances[i].push_back(d); | |
683 } | |
684 } | |
685 | |
686 } else { | |
687 | |
688 // Chroma are histograms already | |
689 | |
690 KLDivergence kld; | |
691 | |
692 for (int i = 0; i < m_channels; ++i) { | |
693 for (int j = 0; j < m_channels; ++j) { | |
694 double d = kld.distanceDistribution(m[i], m[j], true); | |
695 distances[i].push_back(d); | |
696 } | |
648 } | 697 } |
649 } | 698 } |
650 | 699 |
651 Feature feature; | 700 Feature feature; |
652 feature.hasTimestamp = true; | 701 feature.hasTimestamp = true; |