changeset 20:7786d595d2f2 track

Introduce peak-to-second-peak ratio, which looks like a reasonable proxy for harmonic-ness. Use it to ascribe a confidence to estimates in the pitch tracker & rely on that to determine how many similar estimates make a satisfied hypothesis
author Chris Cannam
date Mon, 02 Jul 2012 21:37:02 +0100
parents c9cac05ef9f2
children df41333abbc9
files CepstrumPitchTracker.cpp CepstrumPitchTracker.h SimpleCepstrum.cpp SimpleCepstrum.h
diffstat 4 files changed, 69 insertions(+), 143 deletions(-) [+]
line wrap: on
line diff
--- a/CepstrumPitchTracker.cpp	Sun Jul 01 11:33:37 2012 +0100
+++ b/CepstrumPitchTracker.cpp	Mon Jul 02 21:37:02 2012 +0100
@@ -57,7 +57,18 @@
 bool 
 CepstrumPitchTracker::Hypothesis::isSatisfied()
 {
-    return (m_pending.size() > 2);
+    if (m_pending.empty()) return false;
+    
+    double meanConfidence = 0.0;
+    for (int i = 0; i < m_pending.size(); ++i) {
+        meanConfidence += m_pending[i].confidence;
+    }
+    meanConfidence /= m_pending.size();
+
+    int lengthRequired = int(2.0 / meanConfidence + 0.5);
+    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
+
+    return (m_pending.size() > lengthRequired);
 }
 
 void
@@ -153,25 +164,15 @@
     m_blockSize(1024),
     m_fmin(50),
     m_fmax(1000),
-    m_histlen(1),
     m_vflen(3),
     m_binFrom(0),
     m_binTo(0),
-    m_bins(0),
-    m_history(0),
-    m_prevpeak(0),
-    m_prevprop(0)
+    m_bins(0)
 {
 }
 
 CepstrumPitchTracker::~CepstrumPitchTracker()
 {
-    if (m_history) {
-        for (int i = 0; i < m_histlen; ++i) {
-            delete[] m_history[i];
-        }
-        delete[] m_history;
-    }
 }
 
 string
@@ -328,11 +329,6 @@
 
     m_bins = (m_binTo - m_binFrom) + 1;
 
-    m_history = new double *[m_histlen];
-    for (int i = 0; i < m_histlen; ++i) {
-        m_history[i] = new double[m_bins];
-    }
-
     reset();
 
     return true;
@@ -341,28 +337,11 @@
 void
 CepstrumPitchTracker::reset()
 {
-    for (int i = 0; i < m_histlen; ++i) {
-        for (int j = 0; j < m_bins; ++j) {
-            m_history[i][j] = 0.0;
-        }
-    }
 }
 
 void
-CepstrumPitchTracker::filter(const double *cep, double *result)
+CepstrumPitchTracker::filter(const double *cep, double *data)
 {
-    int hix = m_histlen - 1; // current history index
-
-    // roll back the history
-    if (m_histlen > 1) {
-        double *oldest = m_history[0];
-        for (int i = 1; i < m_histlen; ++i) {
-            m_history[i-1] = m_history[i];
-        }
-        // and stick this back in the newest spot, to recycle
-        m_history[hix] = oldest;
-    }
-
     for (int i = 0; i < m_bins; ++i) {
         double v = 0;
         int n = 0;
@@ -374,52 +353,8 @@
                 ++n;
             }
         }
-        m_history[hix][i] = v / n;
+        data[i] = v / n;
     }
-
-    for (int i = 0; i < m_bins; ++i) {
-        double mean = 0.0;
-        for (int j = 0; j < m_histlen; ++j) {
-            mean += m_history[j][i];
-        }
-        mean /= m_histlen;
-        result[i] = mean;
-    }
-}
-
-double
-CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
-{
-    double aroundPeak = data[n];
-    double peakProportion = 0.0;
-
-    int i = n - 1;
-    while (i > 0 && data[i] <= data[i+1]) {
-        aroundPeak += fabs(data[i]);
-        --i;
-    }
-    i = n + 1;
-    while (i < m_bins && data[i] <= data[i-1]) {
-        aroundPeak += fabs(data[i]);
-        ++i;
-    }
-    peakProportion = aroundPeak / abstot;
-
-    return peakProportion;
-}
-
-bool
-CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
-{
-    bool accept = false;
-
-    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
-        accept = true;
-    } else if (peakProportion > m_prevprop * 2) {
-        accept = true;
-    }
-
-    return accept;
 }
 
 CepstrumPitchTracker::FeatureSet
@@ -475,12 +410,33 @@
         }
     }
 
-    if (maxbin < 0) return fs;
+    if (maxbin < 0) {
+        delete[] data;
+        return fs;
+    }
+
+    double nextPeakVal = 0.0;
+    for (int i = 1; i+1 < n; ++i) {
+        if (data[i] > data[i-1] &&
+            data[i] > data[i+1] &&
+            i != maxbin &&
+            data[i] > nextPeakVal) {
+            nextPeakVal = data[i];
+        }
+    }
 
     double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
+
+    double confidence = 0.0;
+    if (nextPeakVal != 0.0) {
+        confidence = ((maxval / nextPeakVal) - 1.0) / 4.0;
+        if (confidence > 1.0) confidence = 1.0;
+    }
+
     Hypothesis::Estimate e;
     e.freq = peakfreq;
     e.time = timestamp;
+    e.confidence = confidence;
 
     m_accepted.advanceTime();
 
@@ -534,59 +490,10 @@
         }
     }  
 
-        std::cerr << "accepted length = " << m_accepted.getPendingLength()
-                  << ", state = " << m_accepted.getState()
-                  << ", hypothesis count = " << m_possible.size() << std::endl;
+    std::cerr << "accepted length = " << m_accepted.getPendingLength()
+              << ", state = " << m_accepted.getState()
+              << ", hypothesis count = " << m_possible.size() << std::endl;
 
-            
-
-/*
-    bool accepted = false;
-
-    if (maxbin >= 0) {
-        double pp = calculatePeakProportion(data, abstot, maxbin);
-        if (acceptPeak(maxbin, pp)) {
-            accepted = true;
-        } else {
-            // try a secondary peak
-            maxval = 0.0;
-            int secondbin = 0;
-            for (int i = 1; i < n-1; ++i) {
-                if (i != maxbin &&
-                    data[i] > data[i-1] &&
-                    data[i] > data[i+1] &&
-                    data[i] > maxval) {
-                    maxval = data[i];
-                    secondbin = i;
-                }
-            }
-            double spp = calculatePeakProportion(data, abstot, secondbin);
-            if (acceptPeak(secondbin, spp)) {
-                maxbin = secondbin;
-                pp = spp;
-                accepted = true;
-            }
-        }
-        if (accepted) {
-            m_prevpeak = maxbin;
-            m_prevprop = pp;
-        }
-    }
-*/
-//    std::cerr << "peakProportion = " << peakProportion << std::endl;
-//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
-//    std::cerr << "bins = " << m_bins << std::endl;
-
-//    if (peakProportion >= (0.00006 * m_bins)) {
-/*
-    if (accepted) {
-	Feature f;
-	f.hasTimestamp = true;
-	f.timestamp = timestamp;
-	f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
-	fs[0].push_back(f);
-    }
-*/
     delete[] data;
     return fs;
 }
@@ -595,7 +502,7 @@
 CepstrumPitchTracker::getRemainingFeatures()
 {
     FeatureSet fs;
-    if (m_accepted.getState() != Hypothesis::New) {
+    if (m_accepted.getState() == Hypothesis::Satisfied) {
         m_accepted.addFeatures(fs[0]);
     }
     return fs;
--- a/CepstrumPitchTracker.h	Sun Jul 01 11:33:37 2012 +0100
+++ b/CepstrumPitchTracker.h	Mon Jul 02 21:37:02 2012 +0100
@@ -68,7 +68,6 @@
     size_t m_blockSize;
     float m_fmin;
     float m_fmax;
-    int m_histlen;
     int m_vflen;
 
     int m_binFrom;
@@ -81,6 +80,7 @@
         struct Estimate {
             double freq;
             Vamp::RealTime time;
+            double confidence;
         };
         typedef std::vector<Estimate> Estimates;
         
@@ -119,14 +119,6 @@
     Hypotheses m_possible;
     Hypothesis m_accepted;
 
-    double **m_history;
-    
-    int m_prevpeak;
-    double m_prevprop;
-
-    double calculatePeakProportion(const double *data, double abstot, int n);
-    bool acceptPeak(int n, double peakProportion);
-
     void filter(const double *in, double *out);
     void fft(unsigned int n, bool inverse,
              double *ri, double *ii, double *ro, double *io);
--- a/SimpleCepstrum.cpp	Sun Jul 01 11:33:37 2012 +0100
+++ b/SimpleCepstrum.cpp	Mon Jul 02 21:37:02 2012 +0100
@@ -299,6 +299,13 @@
     m_ppOutput = n++;
     outputs.push_back(d);
 
+    d.identifier = "peak_to_second_peak";
+    d.name = "Peak to second-peak ratio";
+    d.unit = "";
+    d.description = "Return the ratio of the value found in the peak bin within the specified range of the cepstrum, to the value found in the next highest peak";
+    m_pkoOutput = n++;
+    outputs.push_back(d);
+
     d.identifier = "total";
     d.name = "Total energy";
     d.unit = "";
@@ -454,6 +461,17 @@
         }
     }
 
+    double nextPeakVal = 0.0;
+
+    for (int i = 1; i+1 < n; ++i) {
+        if (data[i] > data[i-1] &&
+            data[i] > data[i+1] &&
+            i != maxbin &&
+            data[i] > nextPeakVal) {
+            nextPeakVal = data[i];
+        }
+    }
+
     Feature rf;
     if (maxval > 0.0) {
         rf.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
@@ -520,6 +538,14 @@
     pv.values.push_back(maxval);
     fs[m_pvOutput].push_back(pv);
 
+    Feature pko;
+    if (nextPeakVal != 0.0) {
+        pko.values.push_back(maxval / nextPeakVal);
+    } else {
+        pko.values.push_back(0.0);
+    }
+    fs[m_pkoOutput].push_back(pko);
+
     Feature am;
     for (int i = 0; i < n; ++i) {
         if (data[i] < rms) am.values.push_back(0);
--- a/SimpleCepstrum.h	Sun Jul 01 11:33:37 2012 +0100
+++ b/SimpleCepstrum.h	Mon Jul 02 21:37:02 2012 +0100
@@ -92,6 +92,7 @@
     mutable int m_esOutput;
     mutable int m_ppOutput;
     mutable int m_totOutput;
+    mutable int m_pkoOutput;
 
     int m_binFrom;
     int m_binTo;