changeset 49:fc88b465548a

* Normalise type option for chromagram * Minimum segment duration option for segmenter * Bit more documentation
author Chris Cannam <c.cannam@qmul.ac.uk>
date Tue, 22 Jan 2008 17:27:48 +0000
parents 3b4572153ce3
children df7a0bc46592
files README.txt plugins/ChromagramPlugin.cpp plugins/ChromagramPlugin.h plugins/SegmenterPlugin.cpp plugins/SegmenterPlugin.h plugins/SimilarityPlugin.cpp plugins/TonalChangeDetect.cpp
diffstat 7 files changed, 97 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- a/README.txt	Mon Jan 21 18:05:28 2008 +0000
+++ b/README.txt	Tue Jan 22 17:27:48 2008 +0000
@@ -9,6 +9,22 @@
 and http://www.sonicvisualiser.org/.
 
 
+License
+=======
+
+These plugins are provided in binary form only.  You may install and
+use the plugin binaries without fee for any purpose commercial or
+non-commercial.  You may redistribute the plugin binaries provided you
+do so without fee and you retain this README file with your
+distribution.  You may not bundle these plugins with a commercial
+product or distribute them on commercial terms.  If you wish to
+arrange commercial licensing terms, please contact the Centre for
+Digital Music at Queen Mary, University of London.
+
+Copyright (c) 2006-2008 Queen Mary, University of London.  All rights
+reserved.
+
+
 New In This Release
 ===================
 
@@ -80,8 +96,12 @@
 		Detection and Spectral Modulation.
 		ISSC 2005
 
-This plugin analyses a single channel of audio and estimates the
-locations of note onsets within the music.
+The Note Onset Detector plugin analyses a single channel of audio and
+estimates the locations of note onsets within the music.
+
+It calculates an onset likelihood function for each spectral frame,
+and picks peaks in a smoothed version of this function.  The plugin is
+non-causal, returning all results at the end of processing.
 
 It has three outputs: the note onset positions, the onset detection
 function used in estimating onset positions, and a smoothed version of
@@ -105,8 +125,8 @@
 		on Acoustics, Speech and Signal Processing (ICASSP 2005),
 		Vol. 3, pp241-244 Philadelphia, USA, March 19-23, 2005.
 
-This plugin analyses a single channel of audio and estimates the
-locations of metrical beats and the resulting tempo of the music.
+The Tempo and Beat Tracker plugin analyses a single channel of audio
+and estimates the locations of metrical beats and the resulting tempo.
 
 It has three outputs: the beat positions, an ongoing estimate of tempo
 where available, and the onset detection function used in estimating
@@ -126,8 +146,8 @@
 		In Proceedings of Audio Engineering Society 122nd Convention,
 		Vienna, 2007.
 
-This plugin analyses a single channel of audio and continuously
-estimates the key of the music.
+The Key Detector plugin analyses a single channel of audio and
+continuously estimates the key of the music.
 
 It has three outputs: the tonic pitch of the key; a major or minor
 mode flag; and key (combining the tonic and major/minor into a single
@@ -169,6 +189,8 @@
 
 
 
+
+
 Similarity
 ----------
 
@@ -183,8 +205,8 @@
 
 		K. Jacobson.
 		A Multifaceted Approach to Music Similarity.
-		In Proceedings of the Seventh International Conference on Music
-		Information Retrieval (ISMIR), 2006.
+		In Proceedings of the Seventh International Conference on
+		Music Information Retrieval (ISMIR), 2006.
 
 
 Constant-Q Spectrogram
--- a/plugins/ChromagramPlugin.cpp	Mon Jan 21 18:05:28 2008 +0000
+++ b/plugins/ChromagramPlugin.cpp	Tue Jan 22 17:27:48 2008 +0000
@@ -26,7 +26,7 @@
     m_minMIDIPitch = 12;
     m_maxMIDIPitch = 96;
     m_tuningFrequency = 440;
-    m_normalized = true;
+    m_normalise = MathUtilities::NormaliseUnitMax;
     m_bpo = 12;
 
     setupConfig();
@@ -42,7 +42,7 @@
         (m_maxMIDIPitch, 0, m_tuningFrequency);
     m_config.BPO = m_bpo;
     m_config.CQThresh = 0.0054;
-    m_config.isNormalised = m_normalized;
+    m_config.normalise = m_normalise;
 
     m_step = 0;
     m_block = 0;
@@ -137,14 +137,17 @@
     desc.quantizeStep = 1;
     list.push_back(desc);
 
-    desc.identifier = "normalized";
-    desc.name = "Normalized";
+    desc.identifier = "normalization";
+    desc.name = "Normalization";
     desc.unit = "";
     desc.minValue = 0;
-    desc.maxValue = 1;
-    desc.defaultValue = 1;
+    desc.maxValue = 2;
+    desc.defaultValue = 2;
     desc.isQuantized = true;
     desc.quantizeStep = 1;
+    desc.valueNames.push_back("None");
+    desc.valueNames.push_back("Unit Sum");
+    desc.valueNames.push_back("Unit Maximum");
     list.push_back(desc);
 
     return list;
@@ -165,8 +168,8 @@
     if (param == "bpo") {
         return m_bpo;
     }
-    if (param == "normalized") {
-        return m_normalized;
+    if (param == "normalization") {
+        return int(m_normalise);
     }
     std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \""
               << param << "\"" << std::endl;
@@ -184,8 +187,8 @@
         m_tuningFrequency = value;
     } else if (param == "bpo") {
         m_bpo = lrintf(value);
-    } else if (param == "normalized") {
-        m_normalized = (value > 0.0001);
+    } else if (param == "normalization") {
+        m_normalise = MathUtilities::NormaliseType(int(value + 0.0001));
     } else {
         std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \""
                   << param << "\"" << std::endl;
@@ -292,9 +295,9 @@
         d.binNames.push_back(names[m_minMIDIPitch % 12]);
     }
 
-    d.hasKnownExtents = m_normalized;
+    d.hasKnownExtents = (m_normalise != MathUtilities::NormaliseNone);
     d.minValue = 0.0;
-    d.maxValue = (m_normalized ? 1.0 : 0.0);
+    d.maxValue = (d.hasKnownExtents ? 1.0 : 0.0);
     d.isQuantized = false;
     d.sampleType = OutputDescriptor::OneSamplePerStep;
     list.push_back(d);
@@ -309,28 +312,6 @@
     return list;
 }
 
-ChromagramPlugin::Feature
-ChromagramPlugin::normalize(const Feature &feature)
-{
-    float min = 0.0, max = 0.0;
-
-    for (size_t i = 0; i < feature.values.size(); ++i) {
-	if (i == 0 || feature.values[i] < min) min = feature.values[i];
-	if (i == 0 || feature.values[i] > max) max = feature.values[i];
-    }
-	
-    if (max == 0.0 || max == min) return feature;
-
-    Feature normalized;
-    normalized.hasTimestamp = false;
-
-    for (size_t i = 0; i < feature.values.size(); ++i) {
-	normalized.values.push_back((feature.values[i] - min) / (max - min));
-    }
-
-    return normalized;
-}
-
 ChromagramPlugin::FeatureSet
 ChromagramPlugin::process(const float *const *inputBuffers,
                           Vamp::RealTime /* timestamp */)
--- a/plugins/ChromagramPlugin.h	Mon Jan 21 18:05:28 2008 +0000
+++ b/plugins/ChromagramPlugin.h	Tue Jan 22 17:27:48 2008 +0000
@@ -49,7 +49,7 @@
     int m_minMIDIPitch;
     int m_maxMIDIPitch;
     float m_tuningFrequency;
-    bool m_normalized;
+    MathUtilities::NormaliseType m_normalise;
     int m_bpo;
 
     void setupConfig();
@@ -61,8 +61,6 @@
 
     vector<double> m_binsums;
     size_t m_count;
-
-    Feature normalize(const Feature &);
 };
 
 
--- a/plugins/SegmenterPlugin.cpp	Mon Jan 21 18:05:28 2008 +0000
+++ b/plugins/SegmenterPlugin.cpp	Tue Jan 22 17:27:48 2008 +0000
@@ -24,6 +24,7 @@
     Plugin(inputSampleRate),
     segmenter(0),
     nSegmentTypes(10),
+    neighbourhoodLimit(4),
     featureType(feature_types(1))
 {
 	
@@ -143,6 +144,18 @@
     desc2.valueNames.push_back("Timbral (MFCC)");
     list.push_back(desc2);	
 	
+    ParameterDescriptor desc3;
+    desc3.identifier = "neighbourhoodLimit";
+    desc3.name = "Minimum segment duration";
+    desc3.description = "Approximate expected minimum duration for each segment";
+    desc3.unit = "s";
+    desc3.minValue = 1;
+    desc3.maxValue = 15;
+    desc3.defaultValue = 4;
+    desc3.isQuantized = true;
+    desc3.quantizeStep = 0.2;
+    list.push_back(desc3);
+
     return list;
 }
 
@@ -156,6 +169,10 @@
     if (param == "featureType") {
         return featureType;
     }
+
+    if (param == "neighbourhoodLimit") {
+        return neighbourhoodLimit;
+    }
     
     std::cerr << "WARNING: SegmenterPlugin::getParameter: unknown parameter \""
               << param << "\"" << std::endl;
@@ -168,22 +185,28 @@
     if (param == "nSegmentTypes") {
 
         nSegmentTypes = int(value + 0.0001);
+        return;
+    }
 
-    } else {
+    if (param == "featureType") {
+        if (featureType != feature_types(value)) // feature type changed, create a new segmenter
+        {
+            featureType = feature_types(value);
+            makeSegmenter();
+        }
+        return;
+    }
 
-        if (param == "featureType") {
-            if (featureType != feature_types(value))	// feature type changed, create a new segmenter
-            {
-                featureType = feature_types(value);
-                makeSegmenter();
-            }
+    if (param == "neighbourhoodLimit") {
+        if (neighbourhoodLimit != value) {
+            neighbourhoodLimit = value;
+            makeSegmenter();
         }
-        else
-        {
-            std::cerr << "WARNING: SegmenterPlugin::setParameter: unknown parameter \""
-                      << param << "\"" << std::endl;
-        }
+        return;
     }
+    
+    std::cerr << "WARNING: SegmenterPlugin::setParameter: unknown parameter \""
+              << param << "\"" << std::endl;
 }
 
 void
@@ -195,7 +218,6 @@
     if (params.featureType == FEATURE_TYPE_CONSTQ)
     {
         params.ncomponents = 20;
-        params.neighbourhoodLimit = 30; 
     }
     if (params.featureType == FEATURE_TYPE_CHROMA)
     {
@@ -203,15 +225,16 @@
         params.windowSize = 0.372;
         params.nbins = 12;
         params.histogramLength = 20;
-        params.neighbourhoodLimit = 40;
     }
     if (params.featureType == FEATURE_TYPE_MFCC)
     {
         params.ncomponents = 20;
-        params.neighbourhoodLimit = 30; 
     }
     delete segmenter;
 
+    params.neighbourhoodLimit =
+        int(neighbourhoodLimit / params.hopSize + 0.0001);
+
     segmenter = new ClusterMeltSegmenter(params);
     segmenter->initialise(m_inputSampleRate);
     hopsize = segmenter->getHopsize();
--- a/plugins/SegmenterPlugin.h	Mon Jan 21 18:05:28 2008 +0000
+++ b/plugins/SegmenterPlugin.h	Tue Jan 22 17:27:48 2008 +0000
@@ -52,6 +52,7 @@
     mutable Segmenter* segmenter;
     mutable int hopsize;
     mutable int windowsize;
+    mutable int neighbourhoodLimit; // in sec
     int nSegmentTypes;
     feature_types featureType;	// 1 = constant-Q, 2 = chroma
     
--- a/plugins/SimilarityPlugin.cpp	Mon Jan 21 18:05:28 2008 +0000
+++ b/plugins/SimilarityPlugin.cpp	Tue Jan 22 17:27:48 2008 +0000
@@ -18,6 +18,7 @@
 #include "dsp/rhythm/BeatSpectrum.h"
 #include "maths/KLDivergence.h"
 #include "maths/CosineDistance.h"
+#include "maths/MathUtilities.h"
 
 using std::string;
 using std::vector;
@@ -177,7 +178,9 @@
         config.max = Pitch::getFrequencyForPitch(96, 0, 440);
         config.BPO = 12;
         config.CQThresh = 0.0054;
-        config.isNormalised = true;
+        // We don't normalise the chromagram's columns individually;
+        // we normalise the mean at the end instead
+        config.normalise = MathUtilities::NormaliseNone;
         m_chromagram = new Chromagram(config);
         m_fftSize = m_chromagram->getFrameSize();
 
@@ -273,7 +276,7 @@
         config.max = Pitch::getFrequencyForPitch(96, 0, 440);
         config.BPO = 12;
         config.CQThresh = 0.0054;
-        config.isNormalised = false;
+        config.normalise = MathUtilities::NormaliseNone;
         Chromagram *c = new Chromagram(config);
         size_t sz = c->getFrameSize();
         delete c;
@@ -685,7 +688,12 @@
 
     } else {
 
-        // Chroma are histograms already
+        // We use the KL divergence for distributions of discrete
+        // variables, as chroma are histograms already.  Or at least,
+        // they will be when we've normalised them like this:
+        for (int i = 0; i < m_channels; ++i) {
+            MathUtilities::normalise(m[i], MathUtilities::NormaliseUnitSum);
+        }
 
         KLDivergence kld;
 
--- a/plugins/TonalChangeDetect.cpp	Mon Jan 21 18:05:28 2008 +0000
+++ b/plugins/TonalChangeDetect.cpp	Tue Jan 22 17:27:48 2008 +0000
@@ -204,7 +204,7 @@
         (m_maxMIDIPitch, 0, m_tuningFrequency);
     m_config.BPO = 12;
     m_config.CQThresh = 0.0054;
-    m_config.isNormalised = false;
+    m_config.normalise = MathUtilities::NormaliseNone;
 
     m_step = 0;
     m_block = 0;