Mercurial > hg > qm-vamp-plugins
diff plugins/SimilarityPlugin.cpp @ 48:3b4572153ce3
* Similarity -> single user control rather than separate weighting
* Key detector -> correct reported min/max values for outputs
* Start some documentation
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Mon, 21 Jan 2008 18:05:28 +0000 |
parents | f8c5f11e60a6 |
children | fc88b465548a |
line wrap: on
line diff
--- a/plugins/SimilarityPlugin.cpp Fri Jan 18 18:11:01 2008 +0000 +++ b/plugins/SimilarityPlugin.cpp Mon Jan 21 18:05:28 2008 +0000 @@ -39,7 +39,7 @@ m_chromagram(0), m_decimator(0), m_featureColumnSize(20), - m_rhythmWeighting(0.f), + m_rhythmWeighting(0.5f), m_rhythmClipDuration(4.f), // seconds m_rhythmClipOrigin(40.f), // seconds m_rhythmClipFrameSize(0), @@ -290,17 +290,20 @@ ParameterDescriptor desc; desc.identifier = "featureType"; desc.name = "Feature Type"; - desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma."; + desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma. Rhythmic: compare beat spectra of short regions."; desc.unit = ""; desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = 0; + desc.maxValue = 4; + desc.defaultValue = 1; desc.isQuantized = true; desc.quantizeStep = 1; - desc.valueNames.push_back("Timbral (MFCC)"); - desc.valueNames.push_back("Chromatic (Chroma)"); + desc.valueNames.push_back("Timbre"); + desc.valueNames.push_back("Timbre and Rhythm"); + desc.valueNames.push_back("Chroma"); + desc.valueNames.push_back("Chroma and Rhythm"); + desc.valueNames.push_back("Rhythm only"); list.push_back(desc); - +/* desc.identifier = "rhythmWeighting"; desc.name = "Influence of Rhythm"; desc.description = "Proportion of similarity measure made up from rhythmic similarity component, from 0 (entirely timbral or chromatic) to 100 (entirely rhythmic)."; @@ -308,11 +311,10 @@ desc.minValue = 0; desc.maxValue = 100; desc.defaultValue = 0; - desc.isQuantized = true; - desc.quantizeStep = 1; + desc.isQuantized = false; desc.valueNames.clear(); list.push_back(desc); - +*/ return list; } @@ -320,11 +322,28 @@ SimilarityPlugin::getParameter(std::string param) const { if (param == "featureType") { - if (m_type == TypeMFCC) return 0; - else if (m_type == TypeChroma) return 1; - else return 0; - } else if (param == "rhythmWeighting") { - return nearbyint(m_rhythmWeighting * 100.0); + + if (m_rhythmWeighting > m_allRhythm) { + return 4; + } + + switch (m_type) { + + case TypeMFCC: + if (m_rhythmWeighting < m_noRhythm) return 0; + else return 1; + break; + + case TypeChroma: + if (m_rhythmWeighting < m_noRhythm) return 2; + else return 3; + break; + } + + return 1; + +// } else if (param == "rhythmWeighting") { +// return nearbyint(m_rhythmWeighting * 100.0); } std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \"" @@ -336,15 +355,27 @@ SimilarityPlugin::setParameter(std::string param, float value) { if (param == "featureType") { + int v = int(value + 0.1); - Type prevType = m_type; - if (v == 0) m_type = TypeMFCC; - else if (v == 1) m_type = TypeChroma; - if (m_type != prevType) m_blockSize = 0; + + Type newType = m_type; + + switch (v) { + case 0: newType = TypeMFCC; m_rhythmWeighting = 0.0f; break; + case 1: newType = TypeMFCC; m_rhythmWeighting = 0.5f; break; + case 2: newType = TypeChroma; m_rhythmWeighting = 0.0f; break; + case 3: newType = TypeChroma; m_rhythmWeighting = 0.5f; break; + case 4: newType = TypeMFCC; m_rhythmWeighting = 1.f; break; + } + + if (newType != m_type) m_blockSize = 0; + + m_type = newType; return; - } else if (param == "rhythmWeighting") { - m_rhythmWeighting = value / 100; - return; + +// } else if (param == "rhythmWeighting") { +// m_rhythmWeighting = value / 100; +// return; } std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \"" @@ -629,22 +660,40 @@ v[i] = variance; } - // "Despite the fact that MFCCs extracted from music are clearly - // not Gaussian, [14] showed, somewhat surprisingly, that a - // similarity function comparing single Gaussians modelling MFCCs - // for each track can perform as well as mixture models. A great - // advantage of using single Gaussians is that a simple closed - // form exists for the KL divergence." -- Mark Levy, "Lightweight - // measures for timbral similarity of musical audio" - // (http://www.elec.qmul.ac.uk/easaier/papers/mlevytimbralsimilarity.pdf) - - KLDivergence kld; FeatureMatrix distances(m_channels); - for (int i = 0; i < m_channels; ++i) { - for (int j = 0; j < m_channels; ++j) { - double d = kld.distance(m[i], v[i], m[j], v[j]); - distances[i].push_back(d); + if (m_type == TypeMFCC) { + + // "Despite the fact that MFCCs extracted from music are + // clearly not Gaussian, [14] showed, somewhat surprisingly, + // that a similarity function comparing single Gaussians + // modelling MFCCs for each track can perform as well as + // mixture models. A great advantage of using single + // Gaussians is that a simple closed form exists for the KL + // divergence." -- Mark Levy, "Lightweight measures for + // timbral similarity of musical audio" + // (http://www.elec.qmul.ac.uk/easaier/papers/mlevytimbralsimilarity.pdf) + + KLDivergence kld; + + for (int i = 0; i < m_channels; ++i) { + for (int j = 0; j < m_channels; ++j) { + double d = kld.distanceGaussian(m[i], v[i], m[j], v[j]); + distances[i].push_back(d); + } + } + + } else { + + // Chroma are histograms already + + KLDivergence kld; + + for (int i = 0; i < m_channels; ++i) { + for (int j = 0; j < m_channels; ++j) { + double d = kld.distanceDistribution(m[i], m[j], true); + distances[i].push_back(d); + } } }