Mercurial > hg > qm-vamp-plugins
changeset 49:fc88b465548a
* Normalise type option for chromagram
* Minimum segment duration option for segmenter
* Bit more documentation
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Tue, 22 Jan 2008 17:27:48 +0000 |
parents | 3b4572153ce3 |
children | df7a0bc46592 |
files | README.txt plugins/ChromagramPlugin.cpp plugins/ChromagramPlugin.h plugins/SegmenterPlugin.cpp plugins/SegmenterPlugin.h plugins/SimilarityPlugin.cpp plugins/TonalChangeDetect.cpp |
diffstat | 7 files changed, 97 insertions(+), 64 deletions(-) [+] |
line wrap: on
line diff
--- a/README.txt Mon Jan 21 18:05:28 2008 +0000 +++ b/README.txt Tue Jan 22 17:27:48 2008 +0000 @@ -9,6 +9,22 @@ and http://www.sonicvisualiser.org/. +License +======= + +These plugins are provided in binary form only. You may install and +use the plugin binaries without fee for any purpose commercial or +non-commercial. You may redistribute the plugin binaries provided you +do so without fee and you retain this README file with your +distribution. You may not bundle these plugins with a commercial +product or distribute them on commercial terms. If you wish to +arrange commercial licensing terms, please contact the Centre for +Digital Music at Queen Mary, University of London. + +Copyright (c) 2006-2008 Queen Mary, University of London. All rights +reserved. + + New In This Release =================== @@ -80,8 +96,12 @@ Detection and Spectral Modulation. ISSC 2005 -This plugin analyses a single channel of audio and estimates the -locations of note onsets within the music. +The Note Onset Detector plugin analyses a single channel of audio and +estimates the locations of note onsets within the music. + +It calculates an onset likelihood function for each spectral frame, +and picks peaks in a smoothed version of this function. The plugin is +non-causal, returning all results at the end of processing. It has three outputs: the note onset positions, the onset detection function used in estimating onset positions, and a smoothed version of @@ -105,8 +125,8 @@ on Acoustics, Speech and Signal Processing (ICASSP 2005), Vol. 3, pp241-244 Philadelphia, USA, March 19-23, 2005. -This plugin analyses a single channel of audio and estimates the -locations of metrical beats and the resulting tempo of the music. +The Tempo and Beat Tracker plugin analyses a single channel of audio +and estimates the locations of metrical beats and the resulting tempo. It has three outputs: the beat positions, an ongoing estimate of tempo where available, and the onset detection function used in estimating @@ -126,8 +146,8 @@ In Proceedings of Audio Engineering Society 122nd Convention, Vienna, 2007. -This plugin analyses a single channel of audio and continuously -estimates the key of the music. +The Key Detector plugin analyses a single channel of audio and +continuously estimates the key of the music. It has three outputs: the tonic pitch of the key; a major or minor mode flag; and key (combining the tonic and major/minor into a single @@ -169,6 +189,8 @@ + + Similarity ---------- @@ -183,8 +205,8 @@ K. Jacobson. A Multifaceted Approach to Music Similarity. - In Proceedings of the Seventh International Conference on Music - Information Retrieval (ISMIR), 2006. + In Proceedings of the Seventh International Conference on + Music Information Retrieval (ISMIR), 2006. Constant-Q Spectrogram
--- a/plugins/ChromagramPlugin.cpp Mon Jan 21 18:05:28 2008 +0000 +++ b/plugins/ChromagramPlugin.cpp Tue Jan 22 17:27:48 2008 +0000 @@ -26,7 +26,7 @@ m_minMIDIPitch = 12; m_maxMIDIPitch = 96; m_tuningFrequency = 440; - m_normalized = true; + m_normalise = MathUtilities::NormaliseUnitMax; m_bpo = 12; setupConfig(); @@ -42,7 +42,7 @@ (m_maxMIDIPitch, 0, m_tuningFrequency); m_config.BPO = m_bpo; m_config.CQThresh = 0.0054; - m_config.isNormalised = m_normalized; + m_config.normalise = m_normalise; m_step = 0; m_block = 0; @@ -137,14 +137,17 @@ desc.quantizeStep = 1; list.push_back(desc); - desc.identifier = "normalized"; - desc.name = "Normalized"; + desc.identifier = "normalization"; + desc.name = "Normalization"; desc.unit = ""; desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = 1; + desc.maxValue = 2; + desc.defaultValue = 2; desc.isQuantized = true; desc.quantizeStep = 1; + desc.valueNames.push_back("None"); + desc.valueNames.push_back("Unit Sum"); + desc.valueNames.push_back("Unit Maximum"); list.push_back(desc); return list; @@ -165,8 +168,8 @@ if (param == "bpo") { return m_bpo; } - if (param == "normalized") { - return m_normalized; + if (param == "normalization") { + return int(m_normalise); } std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \"" << param << "\"" << std::endl; @@ -184,8 +187,8 @@ m_tuningFrequency = value; } else if (param == "bpo") { m_bpo = lrintf(value); - } else if (param == "normalized") { - m_normalized = (value > 0.0001); + } else if (param == "normalization") { + m_normalise = MathUtilities::NormaliseType(int(value + 0.0001)); } else { std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \"" << param << "\"" << std::endl; @@ -292,9 +295,9 @@ d.binNames.push_back(names[m_minMIDIPitch % 12]); } - d.hasKnownExtents = m_normalized; + d.hasKnownExtents = (m_normalise != MathUtilities::NormaliseNone); d.minValue = 0.0; - d.maxValue = (m_normalized ? 1.0 : 0.0); + d.maxValue = (d.hasKnownExtents ? 1.0 : 0.0); d.isQuantized = false; d.sampleType = OutputDescriptor::OneSamplePerStep; list.push_back(d); @@ -309,28 +312,6 @@ return list; } -ChromagramPlugin::Feature -ChromagramPlugin::normalize(const Feature &feature) -{ - float min = 0.0, max = 0.0; - - for (size_t i = 0; i < feature.values.size(); ++i) { - if (i == 0 || feature.values[i] < min) min = feature.values[i]; - if (i == 0 || feature.values[i] > max) max = feature.values[i]; - } - - if (max == 0.0 || max == min) return feature; - - Feature normalized; - normalized.hasTimestamp = false; - - for (size_t i = 0; i < feature.values.size(); ++i) { - normalized.values.push_back((feature.values[i] - min) / (max - min)); - } - - return normalized; -} - ChromagramPlugin::FeatureSet ChromagramPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */)
--- a/plugins/ChromagramPlugin.h Mon Jan 21 18:05:28 2008 +0000 +++ b/plugins/ChromagramPlugin.h Tue Jan 22 17:27:48 2008 +0000 @@ -49,7 +49,7 @@ int m_minMIDIPitch; int m_maxMIDIPitch; float m_tuningFrequency; - bool m_normalized; + MathUtilities::NormaliseType m_normalise; int m_bpo; void setupConfig(); @@ -61,8 +61,6 @@ vector<double> m_binsums; size_t m_count; - - Feature normalize(const Feature &); };
--- a/plugins/SegmenterPlugin.cpp Mon Jan 21 18:05:28 2008 +0000 +++ b/plugins/SegmenterPlugin.cpp Tue Jan 22 17:27:48 2008 +0000 @@ -24,6 +24,7 @@ Plugin(inputSampleRate), segmenter(0), nSegmentTypes(10), + neighbourhoodLimit(4), featureType(feature_types(1)) { @@ -143,6 +144,18 @@ desc2.valueNames.push_back("Timbral (MFCC)"); list.push_back(desc2); + ParameterDescriptor desc3; + desc3.identifier = "neighbourhoodLimit"; + desc3.name = "Minimum segment duration"; + desc3.description = "Approximate expected minimum duration for each segment"; + desc3.unit = "s"; + desc3.minValue = 1; + desc3.maxValue = 15; + desc3.defaultValue = 4; + desc3.isQuantized = true; + desc3.quantizeStep = 0.2; + list.push_back(desc3); + return list; } @@ -156,6 +169,10 @@ if (param == "featureType") { return featureType; } + + if (param == "neighbourhoodLimit") { + return neighbourhoodLimit; + } std::cerr << "WARNING: SegmenterPlugin::getParameter: unknown parameter \"" << param << "\"" << std::endl; @@ -168,22 +185,28 @@ if (param == "nSegmentTypes") { nSegmentTypes = int(value + 0.0001); + return; + } - } else { + if (param == "featureType") { + if (featureType != feature_types(value)) // feature type changed, create a new segmenter + { + featureType = feature_types(value); + makeSegmenter(); + } + return; + } - if (param == "featureType") { - if (featureType != feature_types(value)) // feature type changed, create a new segmenter - { - featureType = feature_types(value); - makeSegmenter(); - } + if (param == "neighbourhoodLimit") { + if (neighbourhoodLimit != value) { + neighbourhoodLimit = value; + makeSegmenter(); } - else - { - std::cerr << "WARNING: SegmenterPlugin::setParameter: unknown parameter \"" - << param << "\"" << std::endl; - } + return; } + + std::cerr << "WARNING: SegmenterPlugin::setParameter: unknown parameter \"" + << param << "\"" << std::endl; } void @@ -195,7 +218,6 @@ if (params.featureType == FEATURE_TYPE_CONSTQ) { params.ncomponents = 20; - params.neighbourhoodLimit = 30; } if (params.featureType == FEATURE_TYPE_CHROMA) { @@ -203,15 +225,16 @@ params.windowSize = 0.372; params.nbins = 12; params.histogramLength = 20; - params.neighbourhoodLimit = 40; } if (params.featureType == FEATURE_TYPE_MFCC) { params.ncomponents = 20; - params.neighbourhoodLimit = 30; } delete segmenter; + params.neighbourhoodLimit = + int(neighbourhoodLimit / params.hopSize + 0.0001); + segmenter = new ClusterMeltSegmenter(params); segmenter->initialise(m_inputSampleRate); hopsize = segmenter->getHopsize();
--- a/plugins/SegmenterPlugin.h Mon Jan 21 18:05:28 2008 +0000 +++ b/plugins/SegmenterPlugin.h Tue Jan 22 17:27:48 2008 +0000 @@ -52,6 +52,7 @@ mutable Segmenter* segmenter; mutable int hopsize; mutable int windowsize; + mutable int neighbourhoodLimit; // in sec int nSegmentTypes; feature_types featureType; // 1 = constant-Q, 2 = chroma
--- a/plugins/SimilarityPlugin.cpp Mon Jan 21 18:05:28 2008 +0000 +++ b/plugins/SimilarityPlugin.cpp Tue Jan 22 17:27:48 2008 +0000 @@ -18,6 +18,7 @@ #include "dsp/rhythm/BeatSpectrum.h" #include "maths/KLDivergence.h" #include "maths/CosineDistance.h" +#include "maths/MathUtilities.h" using std::string; using std::vector; @@ -177,7 +178,9 @@ config.max = Pitch::getFrequencyForPitch(96, 0, 440); config.BPO = 12; config.CQThresh = 0.0054; - config.isNormalised = true; + // We don't normalise the chromagram's columns individually; + // we normalise the mean at the end instead + config.normalise = MathUtilities::NormaliseNone; m_chromagram = new Chromagram(config); m_fftSize = m_chromagram->getFrameSize(); @@ -273,7 +276,7 @@ config.max = Pitch::getFrequencyForPitch(96, 0, 440); config.BPO = 12; config.CQThresh = 0.0054; - config.isNormalised = false; + config.normalise = MathUtilities::NormaliseNone; Chromagram *c = new Chromagram(config); size_t sz = c->getFrameSize(); delete c; @@ -685,7 +688,12 @@ } else { - // Chroma are histograms already + // We use the KL divergence for distributions of discrete + // variables, as chroma are histograms already. Or at least, + // they will be when we've normalised them like this: + for (int i = 0; i < m_channels; ++i) { + MathUtilities::normalise(m[i], MathUtilities::NormaliseUnitSum); + } KLDivergence kld;
--- a/plugins/TonalChangeDetect.cpp Mon Jan 21 18:05:28 2008 +0000 +++ b/plugins/TonalChangeDetect.cpp Tue Jan 22 17:27:48 2008 +0000 @@ -204,7 +204,7 @@ (m_maxMIDIPitch, 0, m_tuningFrequency); m_config.BPO = 12; m_config.CQThresh = 0.0054; - m_config.isNormalised = false; + m_config.normalise = MathUtilities::NormaliseNone; m_step = 0; m_block = 0;