changeset 45:5d7ce1d87301

* Add MFCC plugin * Add means output to Chromagram plugin * Update similarity plugin for MFCC changes
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 18 Jan 2008 13:30:56 +0000
parents 1dc00e4dbae6
children 26a2e341d358
files libmain.cpp plugins/ChromagramPlugin.cpp plugins/ChromagramPlugin.h plugins/MFCCPlugin.cpp plugins/MFCCPlugin.h plugins/SegmenterPlugin.cpp plugins/SegmenterPlugin.h plugins/SimilarityPlugin.cpp qm-vamp-plugins.pro
diffstat 9 files changed, 438 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/libmain.cpp	Thu Jan 17 15:37:37 2008 +0000
+++ b/libmain.cpp	Fri Jan 18 13:30:56 2008 +0000
@@ -16,6 +16,7 @@
 #include "plugins/ConstantQSpectrogram.h"
 #include "plugins/TonalChangeDetect.h"
 #include "plugins/KeyDetect.h"
+#include "plugins/MFCCPlugin.h"
 #include "plugins/SegmenterPlugin.h"
 #include "plugins/SimilarityPlugin.h"
 
@@ -25,6 +26,7 @@
 static Vamp::PluginAdapter<ConstantQSpectrogram> constantQAdapter;
 static Vamp::PluginAdapter<TonalChangeDetect> tonalChangeDetectorAdapter;
 static Vamp::PluginAdapter<KeyDetector> keyDetectorAdapter;
+static Vamp::PluginAdapter<MFCCPlugin> mfccPluginAdapter;
 static Vamp::PluginAdapter<SegmenterPlugin> segmenterPluginAdapter;
 static Vamp::PluginAdapter<SimilarityPlugin> similarityPluginAdapter;
 
@@ -42,6 +44,7 @@
     case  5: return keyDetectorAdapter.getDescriptor();
     case  6: return segmenterPluginAdapter.getDescriptor();
     case  7: return similarityPluginAdapter.getDescriptor();
+    case  8: return mfccPluginAdapter.getDescriptor();
     default: return 0;
     }
 }
--- a/plugins/ChromagramPlugin.cpp	Thu Jan 17 15:37:37 2008 +0000
+++ b/plugins/ChromagramPlugin.cpp	Fri Jan 18 13:30:56 2008 +0000
@@ -81,12 +81,13 @@
 int
 ChromagramPlugin::getPluginVersion() const
 {
-    return 2;
+    return 3;
 }
 
 string
 ChromagramPlugin::getCopyright() const
 {
+    //!!! update
     return "Copyright (c) 2006 - All Rights Reserved";
 }
 
@@ -99,6 +100,7 @@
     desc.identifier = "minpitch";
     desc.name = "Minimum Pitch";
     desc.unit = "MIDI units";
+    //!!! descriptions
     desc.minValue = 0;
     desc.maxValue = 127;
     desc.defaultValue = 12;
@@ -208,6 +210,13 @@
 	      << blockSize << std::endl;
 
     m_chromagram = new Chromagram(m_config);
+    m_binsums = vector<double>(m_config.BPO);
+
+    for (int i = 0; i < m_config.BPO; ++i) {
+        m_binsums[i] = 0.0;
+    }
+
+    m_count = 0;
 
     m_step = m_chromagram->getHopSize();
     m_block = m_chromagram->getFrameSize();
@@ -290,6 +299,13 @@
     d.sampleType = OutputDescriptor::OneSamplePerStep;
     list.push_back(d);
 
+    d.identifier = "chromameans";
+    d.name = "Chroma Means";
+    //!!! descriptions
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = 1;
+    list.push_back(d);
+
     return list;
 }
 
@@ -346,9 +362,11 @@
     for (size_t i = 0; i < m_config.BPO; ++i) {
         double value = output[i];
         if (isnan(value)) value = 0.0;
+        m_binsums[i] += value;
 	feature.values.push_back(value);
     }
     feature.label = "";
+    ++m_count;
 
     FeatureSet returnFeatures;
     returnFeatures[0].push_back(feature);
@@ -358,6 +376,19 @@
 ChromagramPlugin::FeatureSet
 ChromagramPlugin::getRemainingFeatures()
 {
-    return FeatureSet();
+    Feature feature;
+    feature.hasTimestamp = true;
+    feature.timestamp = Vamp::RealTime::zeroTime;
+  
+    for (size_t i = 0; i < m_config.BPO; ++i) {
+        double v = m_binsums[i];
+        if (m_count > 0) v /= m_count;
+        feature.values.push_back(v);
+    }
+    feature.label = "Chromagram bin means";
+
+    FeatureSet returnFeatures;
+    returnFeatures[1].push_back(feature);
+    return returnFeatures;
 }
 
--- a/plugins/ChromagramPlugin.h	Thu Jan 17 15:37:37 2008 +0000
+++ b/plugins/ChromagramPlugin.h	Fri Jan 18 13:30:56 2008 +0000
@@ -13,8 +13,6 @@
 #include <vamp-sdk/Plugin.h>
 #include <dsp/chromagram/Chromagram.h>
 
-#include <queue>
-
 class ChromagramPlugin : public Vamp::Plugin
 {
 public:
@@ -61,6 +59,9 @@
     mutable size_t m_step;
     mutable size_t m_block;
 
+    vector<double> m_binsums;
+    size_t m_count;
+
     Feature normalize(const Feature &);
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plugins/MFCCPlugin.cpp	Fri Jan 18 13:30:56 2008 +0000
@@ -0,0 +1,301 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    QM Vamp Plugin Set
+
+    Centre for Digital Music, Queen Mary, University of London.
+    All rights reserved.
+*/
+
+#include "MFCCPlugin.h"
+
+#include <dsp/mfcc/MFCC.h>
+
+using std::string;
+using std::vector;
+using std::cerr;
+using std::endl;
+
+MFCCPlugin::MFCCPlugin(float inputSampleRate) :
+    Vamp::Plugin(inputSampleRate),
+    m_config(lrintf(inputSampleRate)),
+    m_mfcc(0),
+    m_step(1024),
+    m_block(2048)
+{
+    m_bins = 20;
+    m_wantC0 = true;
+    m_logpower = 1;
+
+    setupConfig();
+}
+
+void
+MFCCPlugin::setupConfig()
+{
+    m_config.FS = lrintf(m_inputSampleRate);
+    m_config.fftsize = m_block;
+    m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins);
+    m_config.want_c0 = m_wantC0;
+    m_config.logpower = m_logpower;
+}
+
+MFCCPlugin::~MFCCPlugin()
+{
+    delete m_mfcc;
+}
+
+string
+MFCCPlugin::getIdentifier() const
+{
+    return "qm-mfcc";
+}
+
+string
+MFCCPlugin::getName() const
+{
+    return "Mel-Frequency Cepstral Coefficients";
+}
+
+string
+MFCCPlugin::getDescription() const
+{
+    //!!!
+    return "";
+}
+
+string
+MFCCPlugin::getMaker() const
+{
+    return "Queen Mary, University of London";
+}
+
+int
+MFCCPlugin::getPluginVersion() const
+{
+    return 1;
+}
+
+string
+MFCCPlugin::getCopyright() const
+{
+    //!!! update
+    return "Copyright (c) 2008 - All Rights Reserved";
+}
+
+MFCCPlugin::ParameterList
+MFCCPlugin::getParameterDescriptors() const
+{
+    ParameterList list;
+
+    ParameterDescriptor desc;
+    desc.identifier = "nceps";
+    desc.name = "Number of Coefficients";
+    desc.unit = "";
+    //!!! descriptions -- "including C0 if requested"
+    desc.minValue = 1;
+    desc.maxValue = 40;
+    desc.defaultValue = 20;
+    desc.isQuantized = true;
+    desc.quantizeStep = 1;
+    list.push_back(desc);
+
+    desc.identifier = "logpower";
+    desc.name = "Power for Mel Amplitude Logs";
+    desc.unit = "";
+    desc.minValue = 0;
+    desc.maxValue = 5;
+    desc.defaultValue = 1;
+    desc.isQuantized = false;
+    desc.quantizeStep = 0;
+    list.push_back(desc);
+
+    desc.identifier = "wantc0";
+    desc.name = "Include C0";
+    desc.unit = "";
+    //!!! description
+    desc.minValue = 0;
+    desc.maxValue = 1;
+    desc.defaultValue = 1;
+    desc.isQuantized = true;
+    desc.quantizeStep = 1;
+    list.push_back(desc);
+
+    return list;
+}
+
+float
+MFCCPlugin::getParameter(std::string param) const
+{
+    if (param == "nceps") {
+        return m_bins;
+    }
+    if (param == "logpower") {
+        return m_logpower;
+    }
+    if (param == "wantc0") {
+        return m_wantC0 ? 1 : 0;
+    }
+    std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \""
+              << param << "\"" << std::endl;
+    return 0.0;
+}
+
+void
+MFCCPlugin::setParameter(std::string param, float value)
+{
+    if (param == "nceps") {
+        m_bins = lrintf(value);
+    } else if (param == "logpower") {
+        m_logpower = lrintf(value);
+    } else if (param == "wantc0") {
+        m_wantC0 = (value > 0.5);
+    } else {
+        std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \""
+                  << param << "\"" << std::endl;
+    }
+
+    setupConfig();
+}
+
+bool
+MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
+{
+    if (m_mfcc) {
+	delete m_mfcc;
+	m_mfcc = 0;
+    }
+
+    if (channels < getMinChannelCount() ||
+	channels > getMaxChannelCount()) return false;
+
+    std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block "
+	      << blockSize << std::endl;
+
+    m_step = stepSize;
+    m_block = blockSize;
+    setupConfig();
+
+    m_mfcc = new MFCC(m_config);
+
+    m_binsums = vector<double>(m_bins);
+    for (int i = 0; i < m_bins; ++i) {
+        m_binsums[i] = 0.0;
+    }
+
+    return true;
+}
+
+void
+MFCCPlugin::reset()
+{
+    if (m_mfcc) {
+	delete m_mfcc;
+	m_mfcc = new MFCC(m_config);
+        for (int i = 0; i < m_bins; ++i) {
+            m_binsums[i] = 0.0;
+        }
+    }
+}
+
+size_t
+MFCCPlugin::getPreferredStepSize() const
+{
+    return 1024;
+}
+
+size_t
+MFCCPlugin::getPreferredBlockSize() const
+{
+    return 2048;
+}
+
+MFCCPlugin::OutputList
+MFCCPlugin::getOutputDescriptors() const
+{
+    OutputList list;
+
+    OutputDescriptor d;
+    d.identifier = "coefficients";
+    d.name = "Coefficients";
+    d.unit = "";
+    d.hasFixedBinCount = true;
+    d.binCount = m_bins;
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::OneSamplePerStep;
+    list.push_back(d);
+
+    d.identifier = "means";
+    d.name = "Means of Coefficients";
+    //!!! descriptions
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = 1;
+    list.push_back(d);
+
+    return list;
+}
+
+MFCCPlugin::FeatureSet
+MFCCPlugin::process(const float *const *inputBuffers,
+                    Vamp::RealTime /* timestamp */)
+{
+    if (!m_mfcc) {
+	cerr << "ERROR: MFCCPlugin::process: "
+	     << "MFCC has not been initialised"
+	     << endl;
+	return FeatureSet();
+    }
+
+    double *real = new double[m_block];
+    double *imag = new double[m_block];
+
+    for (size_t i = 0; i < m_block/2; ++i) {
+	real[i] = inputBuffers[0][i*2];
+	if (i > 0) real[m_block - i] = real[i];
+        imag[i] = inputBuffers[0][i*2+1];
+        if (i > 0) imag[m_block - i] = imag[i];
+    }
+
+    double *output = new double[m_bins];
+
+    m_mfcc->process(real, imag, output);
+
+    delete[] real;
+    delete[] imag;
+
+    Feature feature;
+    feature.hasTimestamp = false;
+    for (size_t i = 0; i < m_bins; ++i) {
+        double value = output[i];
+        if (isnan(value)) value = 0.0;
+        m_binsums[i] += value;
+	feature.values.push_back(value);
+    }
+    feature.label = "";
+    ++m_count;
+
+    FeatureSet returnFeatures;
+    returnFeatures[0].push_back(feature);
+    return returnFeatures;
+}
+
+MFCCPlugin::FeatureSet
+MFCCPlugin::getRemainingFeatures()
+{
+    Feature feature;
+    feature.hasTimestamp = true;
+    feature.timestamp = Vamp::RealTime::zeroTime;
+  
+    for (size_t i = 0; i < m_bins; ++i) {
+        double v = m_binsums[i];
+        if (m_count > 0) v /= m_count;
+        feature.values.push_back(v);
+    }
+    feature.label = "Coefficient means";
+
+    FeatureSet returnFeatures;
+    returnFeatures[1].push_back(feature);
+    return returnFeatures;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plugins/MFCCPlugin.h	Fri Jan 18 13:30:56 2008 +0000
@@ -0,0 +1,69 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    QM Vamp Plugin Set
+
+    Centre for Digital Music, Queen Mary, University of London.
+    All rights reserved.
+*/
+
+#ifndef _MFCC_PLUGIN_H_
+#define _MFCC_PLUGIN_H_
+
+#include <vamp-sdk/Plugin.h>
+#include <dsp/mfcc/MFCC.h>
+
+#include <vector>
+
+class MFCCPlugin : public Vamp::Plugin
+{
+public:
+    MFCCPlugin(float inputSampleRate);
+    virtual ~MFCCPlugin();
+
+    bool initialise(size_t channels, size_t stepSize, size_t blockSize);
+    void reset();
+
+    InputDomain getInputDomain() const { return FrequencyDomain; }
+
+    std::string getIdentifier() const;
+    std::string getName() const;
+    std::string getDescription() const;
+    std::string getMaker() const;
+    int getPluginVersion() const;
+    std::string getCopyright() const;
+
+    ParameterList getParameterDescriptors() const;
+    float getParameter(std::string) const;
+    void setParameter(std::string, float);
+
+    size_t getPreferredStepSize() const;
+    size_t getPreferredBlockSize() const;
+
+    OutputList getOutputDescriptors() const;
+
+    FeatureSet process(const float *const *inputBuffers,
+                       Vamp::RealTime timestamp);
+
+    FeatureSet getRemainingFeatures();
+
+protected:
+    int m_bins; // == nceps is m_wantC0 false or nceps+1 if m_wantC0 true
+    bool m_wantC0;
+    float m_logpower;
+
+    void setupConfig();
+
+    MFCCConfig m_config;
+    MFCC *m_mfcc;
+    mutable size_t m_step;
+    mutable size_t m_block;
+
+    std::vector<double> m_binsums;
+    size_t m_count;
+
+    Feature normalize(const Feature &);
+};
+
+
+#endif
--- a/plugins/SegmenterPlugin.cpp	Thu Jan 17 15:37:37 2008 +0000
+++ b/plugins/SegmenterPlugin.cpp	Fri Jan 18 13:30:56 2008 +0000
@@ -34,6 +34,21 @@
     delete segmenter;
 }
 
+std::string SegmenterPlugin::getIdentifier() const
+{
+    return "qm-segmenter";
+}
+
+std::string SegmenterPlugin::getName() const
+{
+    return "Segmenter";
+}
+
+std::string SegmenterPlugin::getDescription() const
+{
+    return "Divide the track into a sequence of consistent segments";
+}
+
 string
 SegmenterPlugin::getMaker() const
 {
@@ -202,8 +217,8 @@
     hopsize = segmenter->getHopsize();
     windowsize = segmenter->getWindowsize();
 
-    std::cerr << "segmenter window size: " << segmenter->getWindowsize()
-              << std::endl;
+//    std::cerr << "segmenter window size: " << segmenter->getWindowsize()
+//              << std::endl;
 }
 
 SegmenterPlugin::OutputList
--- a/plugins/SegmenterPlugin.h	Thu Jan 17 15:37:37 2008 +0000
+++ b/plugins/SegmenterPlugin.h	Fri Jan 18 13:30:56 2008 +0000
@@ -27,9 +27,9 @@
     bool initialise(size_t channels, size_t stepSize, size_t blockSize);
     void reset();
 	
-    std::string getIdentifier() const { return "qm-segmenter"; }
-    std::string getName() const { return "Segmenter"; }
-    std::string getDescription() const { return "Divide the track into a sequence of consistent segments"; }
+    std::string getIdentifier() const;
+    std::string getName() const;
+    std::string getDescription() const;
     std::string getMaker() const;
     int getPluginVersion() const;
     std::string getCopyright() const;
--- a/plugins/SimilarityPlugin.cpp	Thu Jan 17 15:37:37 2008 +0000
+++ b/plugins/SimilarityPlugin.cpp	Fri Jan 18 13:30:56 2008 +0000
@@ -63,7 +63,7 @@
 string
 SimilarityPlugin::getMaker() const
 {
-    return "Chris Cannam, Queen Mary, University of London";
+    return "Mark Levy and Chris Cannam, Queen Mary, University of London";
 }
 
 int
@@ -88,7 +88,6 @@
 SimilarityPlugin::getMaxChannelCount() const
 {
     return 1024;
-//    return 1;
 }
 
 bool
@@ -129,11 +128,11 @@
 
         m_featureColumnSize = 20;
 
-        MFCCConfig config;
-        config.FS = lrintf(m_inputSampleRate) / decimationFactor;
+        MFCCConfig config(lrintf(m_inputSampleRate) / decimationFactor);
         config.fftsize = 2048;
         config.nceps = m_featureColumnSize - 1;
         config.want_c0 = true;
+        config.logpower = 1;
         m_mfcc = new MFCC(config);
         m_fftSize = m_mfcc->getfftlength();
 
@@ -192,13 +191,7 @@
 SimilarityPlugin::getPreferredStepSize() const
 {
     if (m_blockSize == 0) calculateBlockSize();
-    if (m_type == TypeChroma) {
-        return m_blockSize/2;
-    } else {
-        // for compatibility with old-skool Soundbite, which doesn't
-        // overlap blocks on input
-        return m_blockSize;
-    }
+    return m_blockSize/2;
 }
 
 size_t
@@ -237,7 +230,7 @@
     ParameterDescriptor desc;
     desc.identifier = "featureType";
     desc.name = "Feature Type";
-    desc.description = "";//!!!
+    desc.description = "Audio feature used for similarity measure.  Timbral: use the first 20 MFCCs (19 plus C0).  Chromatic: use 12 bin-per-octave chroma.";
     desc.unit = "";
     desc.minValue = 0;
     desc.maxValue = 1;
@@ -399,7 +392,7 @@
         }
 
         if (m_type == TypeMFCC) {
-            m_mfcc->process(m_fftSize, decbuf, raw);
+            m_mfcc->process(decbuf, raw);
         } else if (m_type == TypeChroma) {
             raw = m_chromagram->process(decbuf);
         }                
@@ -575,7 +568,7 @@
 
     for (std::map<double, int>::iterator i = sorted.begin();
          i != sorted.end(); ++i) {
-        feature.values.push_back(i->second);
+        feature.values.push_back(i->second + 1);
     }
 
     returnFeatures[m_sortedVectorOutput].push_back(feature);
--- a/qm-vamp-plugins.pro	Thu Jan 17 15:37:37 2008 +0000
+++ b/qm-vamp-plugins.pro	Fri Jan 18 13:30:56 2008 +0000
@@ -24,6 +24,7 @@
            plugins/ChromagramPlugin.h \
            plugins/ConstantQSpectrogram.h \
            plugins/KeyDetect.h \
+           plugins/MFCCPlugin.h \
            plugins/SegmenterPlugin.h \
            plugins/SimilarityPlugin.h \
            plugins/TonalChangeDetect.h
@@ -32,6 +33,7 @@
            plugins/ChromagramPlugin.cpp \
            plugins/ConstantQSpectrogram.cpp \
            plugins/KeyDetect.cpp \
+           plugins/MFCCPlugin.cpp \
            plugins/SegmenterPlugin.cpp \
            plugins/SimilarityPlugin.cpp \
            plugins/TonalChangeDetect.cpp \