changeset 60:90fa946fda40

* Add key strength plot to key detector * Fix vector overrun in similarity plugin if some empty frames have been encountered * Fix uninitialised m_count in MFCC plugin * Doc update
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 01 Feb 2008 16:47:39 +0000
parents af3ec1585323
children c78ec2a208e2
files README.txt plugins/KeyDetect.cpp plugins/KeyDetect.h plugins/MFCCPlugin.cpp plugins/SimilarityPlugin.cpp plugins/SimilarityPlugin.h qm-vamp-plugins.pro
diffstat 7 files changed, 92 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/README.txt	Thu Jan 31 12:04:59 2008 +0000
+++ b/README.txt	Fri Feb 01 16:47:39 2008 +0000
@@ -33,8 +33,8 @@
 segmentation of music audio, and a Mel-frequency cepstral coefficients
 calculation plugin.
 
-This release also includes significant fixes to the existing key
-detector and chromagram plugins.
+This release also includes significant updates to the existing key
+detector, tempo tracker, and chromagram plugins.
 
 
 To Install
@@ -122,7 +122,8 @@
 
  References:    M. E. P. Davies and M. D. Plumbley.
                 Context-dependent beat tracking of musical audio.
-                Technical Report C4DM-TR-06-02. 5 April 2006.
+                In IEEE Transactions on Audio, Speech and Language
+                Processing. Vol. 15, No. 3, pp1009-1020, 2007.
 
                 M. E. P. Davies and M. D. Plumbley.
                 Beat Tracking With A Two State Model.
@@ -153,15 +154,30 @@
 The Key Detector plugin analyses a single channel of audio and
 continuously estimates the key of the music.
 
-It has three outputs: the tonic pitch of the key; a major or minor
-mode flag; and key (combining the tonic and major/minor into a single
-value).  These outputs have the values:
+It has four outputs: the tonic pitch of the key; a major or minor mode
+flag; the key (combining the tonic and major/minor into a single
+value); and a key strength plot which reports the degree to which the
+chroma vector extracted from each input block correlates to the stored
+key profiles for each major and minor key.  The key profiles are drawn
+from analysis of Book I of the Well Tempered Klavier by J S Bach,
+recorded at A=440 equal temperament.
+
+The outputs have the values:
 
   Tonic pitch: C = 1, C#/Db = 2, ..., B = 12
+
   Major/minor mode: major = 0, minor = 1
+
   Key: C major = 1, C#/Db major = 2, ..., B major = 12
        C minor = 13, C#/Db minor = 14, ..., B minor = 24
 
+  Key Strength Plot: 25 separate bins per feature, separated into 1-12
+       (major from C) and 14-25 (minor from C).  Bin 13 is unused, not
+       for superstitious reasons but simply so as to delimit the major
+       and minor areas if they are displayed on a single plot by the
+       plugin host.  Higher bin values show increased correlation with
+       the key profile for that key.
+
 The outputs are also labelled with pitch or key as text.
 
 
--- a/plugins/KeyDetect.cpp	Thu Jan 31 12:04:59 2008 +0000
+++ b/plugins/KeyDetect.cpp	Fri Feb 01 16:47:39 2008 +0000
@@ -17,6 +17,12 @@
 #include <cmath>
 
 
+// Order for circle-of-5ths plotting
+static int conversion[24] =
+{ 7, 12, 5, 10, 3, 8, 1, 6, 11, 4, 9, 2,
+  16, 21, 14, 19, 24, 17, 22, 15, 20, 13, 18, 23 };
+
+
 KeyDetector::KeyDetector(float inputSampleRate) :
     Plugin(inputSampleRate),
     m_stepSize(0),
@@ -232,6 +238,25 @@
     d.sampleType = OutputDescriptor::OneSamplePerStep;
     list.push_back(d);
 
+    d.identifier = "keystrength";
+    d.name = "Key Strength Plot";
+    d.unit = "";
+    d.description = "Correlation of the chroma vector with stored key profile for each major and minor key";
+    d.hasFixedBinCount = true;
+    d.binCount = 25;
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::OneSamplePerStep;
+    for (int i = 0; i < 24; ++i) {
+        if (i == 12) d.binNames.push_back(" ");
+        int idx = conversion[i];
+        std::string label = getKeyName(idx > 12 ? idx-12 : idx);
+        if (i < 12) label += " major";
+        else label += " minor";
+        d.binNames.push_back(label);
+    }
+    list.push_back(d);
+
     return list;
 }
 
@@ -255,7 +280,6 @@
     int tonic = key;
     if (tonic > 12) tonic -= 12;
 
-
     int prevTonic = m_prevKey;
     if (prevTonic > 12) prevTonic -= 12;
 
@@ -293,6 +317,16 @@
 
     m_prevKey = key;
 
+    Feature ksf;
+    ksf.values.reserve(25);
+    double *keystrengths = m_getKeyMode->getKeyStrengths();
+    for (int i = 0; i < 24; ++i) {
+        if (i == 12) ksf.values.push_back(-1);
+        ksf.values.push_back(keystrengths[conversion[i]-1]);
+    }
+    ksf.hasTimestamp = false;
+    returnFeatures[3].push_back(ksf);
+
     return returnFeatures;
 }
 
@@ -328,7 +362,7 @@
 }
 
 const char *
-KeyDetector::getKeyName(int index)
+KeyDetector::getKeyName(int index) const
 {
     // Keys are numbered with 1 => C, 12 => B
     // This is based on chromagram base set to a C in qm-dsp's GetKeyMode.cpp
--- a/plugins/KeyDetect.h	Thu Jan 31 12:04:59 2008 +0000
+++ b/plugins/KeyDetect.h	Fri Feb 01 16:47:39 2008 +0000
@@ -79,7 +79,7 @@
     float m_tuningFrequency;
     int m_length;
 
-    const char *getKeyName(int index);
+    const char *getKeyName(int index) const;
 
     GetKeyMode* m_getKeyMode;
     double* m_inputFrame;
--- a/plugins/MFCCPlugin.cpp	Thu Jan 31 12:04:59 2008 +0000
+++ b/plugins/MFCCPlugin.cpp	Fri Feb 01 16:47:39 2008 +0000
@@ -25,7 +25,8 @@
     m_config(lrintf(inputSampleRate)),
     m_mfcc(0),
     m_step(1024),
-    m_block(2048)
+    m_block(2048),
+    m_count(0)
 {
     m_bins = 20;
     m_wantC0 = true;
@@ -199,6 +200,7 @@
             m_binsums[i] = 0.0;
         }
     }
+    m_count = 0;
 }
 
 size_t
--- a/plugins/SimilarityPlugin.cpp	Thu Jan 31 12:04:59 2008 +0000
+++ b/plugins/SimilarityPlugin.cpp	Fri Feb 01 16:47:39 2008 +0000
@@ -148,6 +148,10 @@
 
     m_lastNonEmptyFrame = std::vector<int>(m_channels);
     for (int i = 0; i < m_channels; ++i) m_lastNonEmptyFrame[i] = -1;
+
+    m_emptyFrameCount = std::vector<int>(m_channels);
+    for (int i = 0; i < m_channels; ++i) m_emptyFrameCount[i] = 0;
+
     m_frameNo = 0;
 
     int decimationFactor = getDecimationFactor();
@@ -204,12 +208,12 @@
         m_rhythmClipFrames =
             int(ceil((m_rhythmClipDuration * m_processRate) 
                      / m_rhythmClipFrameSize));
-        std::cerr << "SimilarityPlugin::initialise: rhythm clip is "
-                  << m_rhythmClipFrames << " frames of size "
-                  << m_rhythmClipFrameSize << " at process rate "
-                  << m_processRate << " ( = "
-                  << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )"
-                  << std::endl;
+//        std::cerr << "SimilarityPlugin::initialise: rhythm clip requires "
+//                  << m_rhythmClipFrames << " frames of size "
+//                  << m_rhythmClipFrameSize << " at process rate "
+//                  << m_processRate << " ( = "
+//                  << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )"
+//                  << std::endl;
 
         MFCCConfig config(m_processRate);
         config.fftsize = m_rhythmClipFrameSize;
@@ -245,6 +249,14 @@
         m_rhythmValues[i].clear();
     }
 
+    for (int i = 0; i < m_lastNonEmptyFrame.size(); ++i) {
+        m_lastNonEmptyFrame[i] = -1;
+    }
+
+    for (int i = 0; i < m_emptyFrameCount.size(); ++i) {
+        m_emptyFrameCount[i] = 0;
+    }
+
     m_done = false;
 }
 
@@ -536,6 +548,7 @@
                     }
                 }
             }
+            m_emptyFrameCount[c]++;
             continue;
         }
 
@@ -645,8 +658,9 @@
             // We want to take values up to, but not including, the
             // last non-empty frame (which may be partial)
 
-            int sz = m_lastNonEmptyFrame[i];
+            int sz = m_lastNonEmptyFrame[i] - m_emptyFrameCount[i];
             if (sz < 0) sz = 0;
+            if (sz >= m_values[i].size()) sz = m_values[i].size()-1;
 
             count = 0;
             for (int k = 0; k < sz; ++k) {
@@ -752,6 +766,13 @@
 {
     if (!needRhythm()) return FeatureMatrix();
 
+//        std::cerr << "SimilarityPlugin::initialise: rhythm clip for channel 0 contains "
+//                  << m_rhythmValues[0].size() << " frames of size "
+//                  << m_rhythmClipFrameSize << " at process rate "
+//                  << m_processRate << " ( = "
+//                  << (float(m_rhythmValues[0].size() * m_rhythmClipFrameSize) / m_processRate) << " sec )"
+//                  << std::endl;
+
     BeatSpectrum bscalc;
     CosineDistance cd;
 
--- a/plugins/SimilarityPlugin.h	Thu Jan 31 12:04:59 2008 +0000
+++ b/plugins/SimilarityPlugin.h	Fri Feb 01 16:47:39 2008 +0000
@@ -88,6 +88,7 @@
     static const float m_allRhythm;
 
     std::vector<int> m_lastNonEmptyFrame; // per channel
+    std::vector<int> m_emptyFrameCount; // per channel
 
     mutable int m_distanceMatrixOutput;
     mutable int m_distanceVectorOutput;
--- a/qm-vamp-plugins.pro	Thu Jan 31 12:04:59 2008 +0000
+++ b/qm-vamp-plugins.pro	Fri Feb 01 16:47:39 2008 +0000
@@ -4,7 +4,7 @@
 CONFIG += plugin warn_on release
 CONFIG -= qt
 
-linux-g++:QMAKE_CXXFLAGS_RELEASE += -DNDEBUG -O3 -march=pentium3 -msse 
+linux-g++:QMAKE_CXXFLAGS_RELEASE += -DNDEBUG -O3 -march=pentium3 -mfpmath=sse -msse -ffast-math
 
 OBJECTS_DIR = tmp_obj
 MOC_DIR = tmp_moc