# HG changeset patch # User Chris Cannam # Date 1200663056 0 # Node ID 5d7ce1d87301e6e4eafa3232eb1efa5650c6a73b # Parent 1dc00e4dbae6af004338ab98da24955f62ef151c * Add MFCC plugin * Add means output to Chromagram plugin * Update similarity plugin for MFCC changes diff -r 1dc00e4dbae6 -r 5d7ce1d87301 libmain.cpp --- a/libmain.cpp Thu Jan 17 15:37:37 2008 +0000 +++ b/libmain.cpp Fri Jan 18 13:30:56 2008 +0000 @@ -16,6 +16,7 @@ #include "plugins/ConstantQSpectrogram.h" #include "plugins/TonalChangeDetect.h" #include "plugins/KeyDetect.h" +#include "plugins/MFCCPlugin.h" #include "plugins/SegmenterPlugin.h" #include "plugins/SimilarityPlugin.h" @@ -25,6 +26,7 @@ static Vamp::PluginAdapter constantQAdapter; static Vamp::PluginAdapter tonalChangeDetectorAdapter; static Vamp::PluginAdapter keyDetectorAdapter; +static Vamp::PluginAdapter mfccPluginAdapter; static Vamp::PluginAdapter segmenterPluginAdapter; static Vamp::PluginAdapter similarityPluginAdapter; @@ -42,6 +44,7 @@ case 5: return keyDetectorAdapter.getDescriptor(); case 6: return segmenterPluginAdapter.getDescriptor(); case 7: return similarityPluginAdapter.getDescriptor(); + case 8: return mfccPluginAdapter.getDescriptor(); default: return 0; } } diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/ChromagramPlugin.cpp --- a/plugins/ChromagramPlugin.cpp Thu Jan 17 15:37:37 2008 +0000 +++ b/plugins/ChromagramPlugin.cpp Fri Jan 18 13:30:56 2008 +0000 @@ -81,12 +81,13 @@ int ChromagramPlugin::getPluginVersion() const { - return 2; + return 3; } string ChromagramPlugin::getCopyright() const { + //!!! update return "Copyright (c) 2006 - All Rights Reserved"; } @@ -99,6 +100,7 @@ desc.identifier = "minpitch"; desc.name = "Minimum Pitch"; desc.unit = "MIDI units"; + //!!! descriptions desc.minValue = 0; desc.maxValue = 127; desc.defaultValue = 12; @@ -208,6 +210,13 @@ << blockSize << std::endl; m_chromagram = new Chromagram(m_config); + m_binsums = vector(m_config.BPO); + + for (int i = 0; i < m_config.BPO; ++i) { + m_binsums[i] = 0.0; + } + + m_count = 0; m_step = m_chromagram->getHopSize(); m_block = m_chromagram->getFrameSize(); @@ -290,6 +299,13 @@ d.sampleType = OutputDescriptor::OneSamplePerStep; list.push_back(d); + d.identifier = "chromameans"; + d.name = "Chroma Means"; + //!!! descriptions + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = 1; + list.push_back(d); + return list; } @@ -346,9 +362,11 @@ for (size_t i = 0; i < m_config.BPO; ++i) { double value = output[i]; if (isnan(value)) value = 0.0; + m_binsums[i] += value; feature.values.push_back(value); } feature.label = ""; + ++m_count; FeatureSet returnFeatures; returnFeatures[0].push_back(feature); @@ -358,6 +376,19 @@ ChromagramPlugin::FeatureSet ChromagramPlugin::getRemainingFeatures() { - return FeatureSet(); + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = Vamp::RealTime::zeroTime; + + for (size_t i = 0; i < m_config.BPO; ++i) { + double v = m_binsums[i]; + if (m_count > 0) v /= m_count; + feature.values.push_back(v); + } + feature.label = "Chromagram bin means"; + + FeatureSet returnFeatures; + returnFeatures[1].push_back(feature); + return returnFeatures; } diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/ChromagramPlugin.h --- a/plugins/ChromagramPlugin.h Thu Jan 17 15:37:37 2008 +0000 +++ b/plugins/ChromagramPlugin.h Fri Jan 18 13:30:56 2008 +0000 @@ -13,8 +13,6 @@ #include #include -#include - class ChromagramPlugin : public Vamp::Plugin { public: @@ -61,6 +59,9 @@ mutable size_t m_step; mutable size_t m_block; + vector m_binsums; + size_t m_count; + Feature normalize(const Feature &); }; diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/MFCCPlugin.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plugins/MFCCPlugin.cpp Fri Jan 18 13:30:56 2008 +0000 @@ -0,0 +1,301 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + All rights reserved. +*/ + +#include "MFCCPlugin.h" + +#include + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +MFCCPlugin::MFCCPlugin(float inputSampleRate) : + Vamp::Plugin(inputSampleRate), + m_config(lrintf(inputSampleRate)), + m_mfcc(0), + m_step(1024), + m_block(2048) +{ + m_bins = 20; + m_wantC0 = true; + m_logpower = 1; + + setupConfig(); +} + +void +MFCCPlugin::setupConfig() +{ + m_config.FS = lrintf(m_inputSampleRate); + m_config.fftsize = m_block; + m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins); + m_config.want_c0 = m_wantC0; + m_config.logpower = m_logpower; +} + +MFCCPlugin::~MFCCPlugin() +{ + delete m_mfcc; +} + +string +MFCCPlugin::getIdentifier() const +{ + return "qm-mfcc"; +} + +string +MFCCPlugin::getName() const +{ + return "Mel-Frequency Cepstral Coefficients"; +} + +string +MFCCPlugin::getDescription() const +{ + //!!! + return ""; +} + +string +MFCCPlugin::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +MFCCPlugin::getPluginVersion() const +{ + return 1; +} + +string +MFCCPlugin::getCopyright() const +{ + //!!! update + return "Copyright (c) 2008 - All Rights Reserved"; +} + +MFCCPlugin::ParameterList +MFCCPlugin::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + desc.identifier = "nceps"; + desc.name = "Number of Coefficients"; + desc.unit = ""; + //!!! descriptions -- "including C0 if requested" + desc.minValue = 1; + desc.maxValue = 40; + desc.defaultValue = 20; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "logpower"; + desc.name = "Power for Mel Amplitude Logs"; + desc.unit = ""; + desc.minValue = 0; + desc.maxValue = 5; + desc.defaultValue = 1; + desc.isQuantized = false; + desc.quantizeStep = 0; + list.push_back(desc); + + desc.identifier = "wantc0"; + desc.name = "Include C0"; + desc.unit = ""; + //!!! description + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 1; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + return list; +} + +float +MFCCPlugin::getParameter(std::string param) const +{ + if (param == "nceps") { + return m_bins; + } + if (param == "logpower") { + return m_logpower; + } + if (param == "wantc0") { + return m_wantC0 ? 1 : 0; + } + std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \"" + << param << "\"" << std::endl; + return 0.0; +} + +void +MFCCPlugin::setParameter(std::string param, float value) +{ + if (param == "nceps") { + m_bins = lrintf(value); + } else if (param == "logpower") { + m_logpower = lrintf(value); + } else if (param == "wantc0") { + m_wantC0 = (value > 0.5); + } else { + std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \"" + << param << "\"" << std::endl; + } + + setupConfig(); +} + +bool +MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_mfcc) { + delete m_mfcc; + m_mfcc = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + + std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block " + << blockSize << std::endl; + + m_step = stepSize; + m_block = blockSize; + setupConfig(); + + m_mfcc = new MFCC(m_config); + + m_binsums = vector(m_bins); + for (int i = 0; i < m_bins; ++i) { + m_binsums[i] = 0.0; + } + + return true; +} + +void +MFCCPlugin::reset() +{ + if (m_mfcc) { + delete m_mfcc; + m_mfcc = new MFCC(m_config); + for (int i = 0; i < m_bins; ++i) { + m_binsums[i] = 0.0; + } + } +} + +size_t +MFCCPlugin::getPreferredStepSize() const +{ + return 1024; +} + +size_t +MFCCPlugin::getPreferredBlockSize() const +{ + return 2048; +} + +MFCCPlugin::OutputList +MFCCPlugin::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor d; + d.identifier = "coefficients"; + d.name = "Coefficients"; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = m_bins; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::OneSamplePerStep; + list.push_back(d); + + d.identifier = "means"; + d.name = "Means of Coefficients"; + //!!! descriptions + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = 1; + list.push_back(d); + + return list; +} + +MFCCPlugin::FeatureSet +MFCCPlugin::process(const float *const *inputBuffers, + Vamp::RealTime /* timestamp */) +{ + if (!m_mfcc) { + cerr << "ERROR: MFCCPlugin::process: " + << "MFCC has not been initialised" + << endl; + return FeatureSet(); + } + + double *real = new double[m_block]; + double *imag = new double[m_block]; + + for (size_t i = 0; i < m_block/2; ++i) { + real[i] = inputBuffers[0][i*2]; + if (i > 0) real[m_block - i] = real[i]; + imag[i] = inputBuffers[0][i*2+1]; + if (i > 0) imag[m_block - i] = imag[i]; + } + + double *output = new double[m_bins]; + + m_mfcc->process(real, imag, output); + + delete[] real; + delete[] imag; + + Feature feature; + feature.hasTimestamp = false; + for (size_t i = 0; i < m_bins; ++i) { + double value = output[i]; + if (isnan(value)) value = 0.0; + m_binsums[i] += value; + feature.values.push_back(value); + } + feature.label = ""; + ++m_count; + + FeatureSet returnFeatures; + returnFeatures[0].push_back(feature); + return returnFeatures; +} + +MFCCPlugin::FeatureSet +MFCCPlugin::getRemainingFeatures() +{ + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = Vamp::RealTime::zeroTime; + + for (size_t i = 0; i < m_bins; ++i) { + double v = m_binsums[i]; + if (m_count > 0) v /= m_count; + feature.values.push_back(v); + } + feature.label = "Coefficient means"; + + FeatureSet returnFeatures; + returnFeatures[1].push_back(feature); + return returnFeatures; +} + diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/MFCCPlugin.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plugins/MFCCPlugin.h Fri Jan 18 13:30:56 2008 +0000 @@ -0,0 +1,69 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + All rights reserved. +*/ + +#ifndef _MFCC_PLUGIN_H_ +#define _MFCC_PLUGIN_H_ + +#include +#include + +#include + +class MFCCPlugin : public Vamp::Plugin +{ +public: + MFCCPlugin(float inputSampleRate); + virtual ~MFCCPlugin(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return FrequencyDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + int m_bins; // == nceps is m_wantC0 false or nceps+1 if m_wantC0 true + bool m_wantC0; + float m_logpower; + + void setupConfig(); + + MFCCConfig m_config; + MFCC *m_mfcc; + mutable size_t m_step; + mutable size_t m_block; + + std::vector m_binsums; + size_t m_count; + + Feature normalize(const Feature &); +}; + + +#endif diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/SegmenterPlugin.cpp --- a/plugins/SegmenterPlugin.cpp Thu Jan 17 15:37:37 2008 +0000 +++ b/plugins/SegmenterPlugin.cpp Fri Jan 18 13:30:56 2008 +0000 @@ -34,6 +34,21 @@ delete segmenter; } +std::string SegmenterPlugin::getIdentifier() const +{ + return "qm-segmenter"; +} + +std::string SegmenterPlugin::getName() const +{ + return "Segmenter"; +} + +std::string SegmenterPlugin::getDescription() const +{ + return "Divide the track into a sequence of consistent segments"; +} + string SegmenterPlugin::getMaker() const { @@ -202,8 +217,8 @@ hopsize = segmenter->getHopsize(); windowsize = segmenter->getWindowsize(); - std::cerr << "segmenter window size: " << segmenter->getWindowsize() - << std::endl; +// std::cerr << "segmenter window size: " << segmenter->getWindowsize() +// << std::endl; } SegmenterPlugin::OutputList diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/SegmenterPlugin.h --- a/plugins/SegmenterPlugin.h Thu Jan 17 15:37:37 2008 +0000 +++ b/plugins/SegmenterPlugin.h Fri Jan 18 13:30:56 2008 +0000 @@ -27,9 +27,9 @@ bool initialise(size_t channels, size_t stepSize, size_t blockSize); void reset(); - std::string getIdentifier() const { return "qm-segmenter"; } - std::string getName() const { return "Segmenter"; } - std::string getDescription() const { return "Divide the track into a sequence of consistent segments"; } + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; std::string getMaker() const; int getPluginVersion() const; std::string getCopyright() const; diff -r 1dc00e4dbae6 -r 5d7ce1d87301 plugins/SimilarityPlugin.cpp --- a/plugins/SimilarityPlugin.cpp Thu Jan 17 15:37:37 2008 +0000 +++ b/plugins/SimilarityPlugin.cpp Fri Jan 18 13:30:56 2008 +0000 @@ -63,7 +63,7 @@ string SimilarityPlugin::getMaker() const { - return "Chris Cannam, Queen Mary, University of London"; + return "Mark Levy and Chris Cannam, Queen Mary, University of London"; } int @@ -88,7 +88,6 @@ SimilarityPlugin::getMaxChannelCount() const { return 1024; -// return 1; } bool @@ -129,11 +128,11 @@ m_featureColumnSize = 20; - MFCCConfig config; - config.FS = lrintf(m_inputSampleRate) / decimationFactor; + MFCCConfig config(lrintf(m_inputSampleRate) / decimationFactor); config.fftsize = 2048; config.nceps = m_featureColumnSize - 1; config.want_c0 = true; + config.logpower = 1; m_mfcc = new MFCC(config); m_fftSize = m_mfcc->getfftlength(); @@ -192,13 +191,7 @@ SimilarityPlugin::getPreferredStepSize() const { if (m_blockSize == 0) calculateBlockSize(); - if (m_type == TypeChroma) { - return m_blockSize/2; - } else { - // for compatibility with old-skool Soundbite, which doesn't - // overlap blocks on input - return m_blockSize; - } + return m_blockSize/2; } size_t @@ -237,7 +230,7 @@ ParameterDescriptor desc; desc.identifier = "featureType"; desc.name = "Feature Type"; - desc.description = "";//!!! + desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma."; desc.unit = ""; desc.minValue = 0; desc.maxValue = 1; @@ -399,7 +392,7 @@ } if (m_type == TypeMFCC) { - m_mfcc->process(m_fftSize, decbuf, raw); + m_mfcc->process(decbuf, raw); } else if (m_type == TypeChroma) { raw = m_chromagram->process(decbuf); } @@ -575,7 +568,7 @@ for (std::map::iterator i = sorted.begin(); i != sorted.end(); ++i) { - feature.values.push_back(i->second); + feature.values.push_back(i->second + 1); } returnFeatures[m_sortedVectorOutput].push_back(feature); diff -r 1dc00e4dbae6 -r 5d7ce1d87301 qm-vamp-plugins.pro --- a/qm-vamp-plugins.pro Thu Jan 17 15:37:37 2008 +0000 +++ b/qm-vamp-plugins.pro Fri Jan 18 13:30:56 2008 +0000 @@ -24,6 +24,7 @@ plugins/ChromagramPlugin.h \ plugins/ConstantQSpectrogram.h \ plugins/KeyDetect.h \ + plugins/MFCCPlugin.h \ plugins/SegmenterPlugin.h \ plugins/SimilarityPlugin.h \ plugins/TonalChangeDetect.h @@ -32,6 +33,7 @@ plugins/ChromagramPlugin.cpp \ plugins/ConstantQSpectrogram.cpp \ plugins/KeyDetect.cpp \ + plugins/MFCCPlugin.cpp \ plugins/SegmenterPlugin.cpp \ plugins/SimilarityPlugin.cpp \ plugins/TonalChangeDetect.cpp \