diff examples/FixedTempoEstimator.cpp @ 255:88ef5ffdbe8d

* docs
author cannam
date Wed, 12 Nov 2008 14:11:01 +0000
parents 5bfed156b45d
children 23352e424631
line wrap: on
line diff
--- a/examples/FixedTempoEstimator.cpp	Wed Nov 12 10:39:05 2008 +0000
+++ b/examples/FixedTempoEstimator.cpp	Wed Nov 12 14:11:01 2008 +0000
@@ -47,6 +47,7 @@
 
 
 class FixedTempoEstimator::D
+// this class just avoids us having to declare any data members in the header
 {
 public:
     D(float inputSampleRate);
@@ -314,16 +315,22 @@
     m_lasttime = ts;
 
     if (m_n == m_dfsize) {
+        // If we have seen enough input, do the estimation and return
         calculate();
         fs = assembleFeatures();
         ++m_n;
         return fs;
     }
 
+    // If we have seen more than enough, just discard and return!
     if (m_n > m_dfsize) return FeatureSet();
 
     float value = 0.f;
 
+    // m_df will contain an onset detection function based on the rise
+    // in overall power from one spectral frame to the next --
+    // simplistic but reasonably effective for our purposes.
+
     for (size_t i = 1; i < m_blockSize/2; ++i) {
 
         float real = inputBuffers[0][i*2];
@@ -378,18 +385,25 @@
         return;
     }
 
-    int n = m_n;
+    // This function takes m_df (the detection function array filled
+    // out in process()) and calculates m_r (the raw autocorrelation)
+    // and m_fr (the filtered autocorrelation from whose peaks tempo
+    // estimates will be taken).
 
-    m_r = new float[n/2];
-    m_fr = new float[n/2];
-    m_t = new float[n/2];
+    int n = m_n; // length of actual df array (m_dfsize is the theoretical max)
+
+    m_r  = new float[n/2]; // raw autocorrelation
+    m_fr = new float[n/2]; // filtered autocorrelation
+    m_t  = new float[n/2]; // averaged tempo estimate for each lag value
 
     for (int i = 0; i < n/2; ++i) {
-        m_r[i] = 0.f;
+        m_r[i]  = 0.f;
         m_fr[i] = 0.f;
-        m_t[i] = lag2tempo(i);
+        m_t[i]  = lag2tempo(i);
     }
 
+    // Calculate the raw autocorrelation of the detection function
+
     for (int i = 0; i < n/2; ++i) {
 
         for (int j = i; j < n-1; ++j) {
@@ -399,20 +413,20 @@
         m_r[i] /= n - i - 1;
     }
 
+    // Filter the autocorrelation and average out the tempo estimates
+    
     float related[] = { 0.5, 2, 4, 8 };
 
     for (int i = 1; i < n/2-1; ++i) {
 
-        float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
-        if (weight < 0.f) weight = 0.f;
-        weight = weight * weight * weight;
-
         m_fr[i] = m_r[i];
 
         int div = 1;
 
         for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) {
 
+            // Check for an obvious peak at each metrically related lag
+
             int k0 = int(i * related[j] + 0.5);
 
             if (k0 >= 0 && k0 < int(n/2)) {
@@ -431,11 +445,18 @@
                     have = true;
                 }
                 
+                // Boost the original lag according to the strongest
+                // value found close to this related lag
+
                 m_fr[i] += m_r[kmax] / 5;
 
                 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) &&
                     (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) &&
                     kvmax > kvmin * 1.05) {
+
+                    // The strongest value close to the related lag is
+                    // also a pretty good looking peak, so use it to
+                    // improve our tempo estimate for the original lag
                     
                     m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
                     ++div;
@@ -445,6 +466,13 @@
         
         m_t[i] /= div;
         
+        // Finally apply a primitive perceptual weighting (to prefer
+        // tempi of around 120-130)
+
+        float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
+        if (weight < 0.f) weight = 0.f;
+        weight = weight * weight * weight;
+
         m_fr[i] += m_fr[i] * (weight / 3);
     }
 }
@@ -453,7 +481,7 @@
 FixedTempoEstimator::D::assembleFeatures()
 {
     FeatureSet fs;
-    if (!m_r) return fs; // No results
+    if (!m_r) return fs; // No autocorrelation: no results
 
     Feature feature;
     feature.hasTimestamp = true;
@@ -467,6 +495,9 @@
     int n = m_n;
 
     for (int i = 0; i < n; ++i) {
+
+        // Return the detection function in the DF output
+
         feature.timestamp = m_start +
             RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
         feature.values[0] = m_df[i];
@@ -475,6 +506,10 @@
     }
 
     for (int i = 1; i < n/2; ++i) {
+
+        // Return the raw autocorrelation in the ACF output, each
+        // value labelled according to its corresponding tempo
+
         feature.timestamp = m_start +
             RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
         feature.values[0] = m_r[i];
@@ -496,9 +531,16 @@
 
         if (m_fr[i] > m_fr[i-1] &&
             m_fr[i] > m_fr[i+1]) {
+
+            // This is a peak in the filtered autocorrelation: stick
+            // it into the map from filtered autocorrelation to lag
+            // index -- this sorts our peaks by filtered acf value
+
             candidates[m_fr[i]] = i;
         }
 
+        // Also return the filtered autocorrelation in its own output
+
         feature.timestamp = m_start +
             RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
         feature.values[0] = m_fr[i];
@@ -519,15 +561,25 @@
     feature.hasDuration = true;
     feature.duration = m_lasttime - m_start;
 
+    // The map contains only peaks and is sorted by filtered acf
+    // value, so the final element in it is our "best" tempo guess
+
     std::map<float, int>::const_iterator ci = candidates.end();
     --ci;
     int maxpi = ci->second;
 
     if (m_t[maxpi] > 0) {
-        cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl;
+
+        // This lag has an adjusted tempo from the averaging process:
+        // use it
+
         feature.values[0] = m_t[maxpi];
+
     } else {
-        // shouldn't happen -- it would imply that this high value was not a peak!
+
+        // shouldn't happen -- it would imply that this high value was
+        // not a peak!
+
         feature.values[0] = lag2tempo(maxpi);
         cerr << "WARNING: No stored tempo for index " << maxpi << endl;
     }
@@ -535,12 +587,17 @@
     sprintf(buffer, "%.1f bpm", feature.values[0]);
     feature.label = buffer;
 
+    // Return the best tempo in the main output
+
     fs[TempoOutput].push_back(feature);
 
+    // And return the other estimates (up to the arbitrarily chosen
+    // number of 10 of them) in the candidates output
+
     feature.values.clear();
     feature.label = "";
 
-    while (feature.values.size() < 8) {
+    while (feature.values.size() < 10) {
         if (m_t[ci->second] > 0) {
             feature.values.push_back(m_t[ci->second]);
         } else {