changeset 46:e17206dafa65 tony

frantically fixed some things -- at least pyin is not behaving randomly any more.
author matthiasm
date Wed, 19 Feb 2014 22:48:08 +0000
parents 68812db649e6
children 42bafbc673b9
files LocalCandidatePYIN.cpp LocalCandidatePYIN.h PYinVamp.cpp Yin.cpp YinUtil.cpp
diffstat 5 files changed, 210 insertions(+), 107 deletions(-) [+]
line wrap: on
line diff
--- a/LocalCandidatePYIN.cpp	Wed Feb 05 11:25:35 2014 +0000
+++ b/LocalCandidatePYIN.cpp	Wed Feb 19 22:48:08 2014 +0000
@@ -27,6 +27,8 @@
 #include <complex>
 #include <map>
 
+#include <boost/math/distributions.hpp>
+
 using std::string;
 using std::vector;
 using std::map;
@@ -46,7 +48,7 @@
     m_outputUnvoiced(0.0f),
     m_pitchProb(0),
     m_timestamp(0),
-    m_nCandidate(20)
+    m_nCandidate(10)
 {
 }
 
@@ -265,7 +267,7 @@
     m_pitchProb.clear();
     for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
     {
-        m_pitchProb.push_back(vector<vector<pair<double, double> > >());
+        m_pitchProb.push_back(vector<pair<double, double> >());
     }
     m_timestamp.clear();
 /*    
@@ -278,7 +280,7 @@
 LocalCandidatePYIN::FeatureSet
 LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
 {
-    timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
+    timestamp = timestamp - Vamp::RealTime::frame2RealTime(m_blockSize, lrintf(m_inputSampleRate));
     
     double *dInputBuffers = new double[m_blockSize];
     for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
@@ -291,30 +293,28 @@
 
     YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
     
-    for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
+    float minFrequency = 60;
+    float maxFrequency = 900;
+    vector<double> peakProbability = YinUtil::yinProb(yinBuffer, 
+                                                      m_threshDistr, 
+                                                      yinBufferSize, 
+                                                      m_inputSampleRate/maxFrequency, 
+                                                      m_inputSampleRate/minFrequency);
+
+    vector<pair<double, double> > tempPitchProb;
+    for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
     {
-        float minFrequency = m_fmin * std::pow(2,(3.0*iCandidate)/12);
-        float maxFrequency = m_fmin * std::pow(2,(3.0*iCandidate+9)/12);
-        vector<double> peakProbability = YinUtil::yinProb(yinBuffer, 
-                                                          m_threshDistr, 
-                                                          yinBufferSize, 
-                                                          m_inputSampleRate/maxFrequency, 
-                                                          m_inputSampleRate/minFrequency);
-
-        vector<pair<double, double> > tempPitchProb;
-        for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
+        if (peakProbability[iBuf] > 0)
         {
-            if (peakProbability[iBuf] > 0)
-            {
-                double currentF0 = 
-                    m_inputSampleRate * (1.0 /
-                    YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
-                double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
-                tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
-            }
+            double currentF0 = 
+                m_inputSampleRate * (1.0 /
+                YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
+            double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
+            if (tempPitch != tempPitch) std::cerr << "AAAAAAAAA! " << currentF0 << " " << (m_inputSampleRate * 1.0 / iBuf) << std::endl;
+            tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
         }
-        m_pitchProb[iCandidate].push_back(tempPitchProb);
     }
+    m_pitchProb.push_back(tempPitchProb);
     m_timestamp.push_back(timestamp);
 
     return FeatureSet();
@@ -340,21 +340,46 @@
     vector<float> freqNumber = vector<float>(m_nCandidate);
     vector<float> freqMean = vector<float>(m_nCandidate);
     
+    boost::math::normal normalDist(0, 8); // semitones sd
+    float maxNormalDist = boost::math::pdf(normalDist, 0);
+    
     for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
     {
         pitchTracks.push_back(vector<float>(nFrame));
-        vector<float> mpOut = mp.process(m_pitchProb[iCandidate]);
+        vector<vector<pair<double,double> > > tempPitchProb;
+        float centrePitch = 45 + 3 * iCandidate;
+        for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
+            tempPitchProb.push_back(vector<pair<double,double> >(0));
+            float sumProb = 0;
+            float pitch = 0;
+            float prob = 0;
+            for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) {
+                pitch = m_pitchProb[iFrame][iProb].first;
+                // std::cerr << pitch << " " << m_pitchProb[iFrame][iProb].second << std::endl;
+                prob  = m_pitchProb[iFrame][iProb].second * boost::math::pdf(normalDist, pitch-centrePitch) / maxNormalDist;
+                // if (abs(pitch-centrePitch) > 5) prob *= 0.5;
+                // if (abs(pitch-centrePitch) > 7) prob *= 0.5;
+                // if (abs(pitch-centrePitch) > 9) prob *= 0.5;
+                sumProb += prob;
+                tempPitchProb[iFrame].push_back(pair<double,double>(pitch,prob));
+                // std::cerr << m_timestamp[iFrame] << " " << iCandidate << " " << centrePitch << " " << pitch << " " << prob << std::endl;
+            }
+            for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) {
+                tempPitchProb[iFrame][iProb].second /= sumProb;
+            }
+        }
+        vector<float> mpOut = mp.process(tempPitchProb);
         float prevFreq = 0;
         for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
         {
             if (mpOut[iFrame] > 0) {
-                if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) {
-                    for (int jFrame = iFrame; jFrame != -1; --jFrame) {
-                        // hack: setting all freqs to 0 -- will be eliminated later
-                        pitchTracks[iCandidate][jFrame] = 0;
-                    }
-                    break;
-                }
+                // if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) {
+                //     for (int jFrame = iFrame; jFrame != -1; --jFrame) {
+                //         // hack: setting all freqs to 0 -- will be eliminated later
+                //         pitchTracks[iCandidate][jFrame] = 0;
+                //     }
+                //     break;
+                // }
                 pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
                 freqSum[iCandidate] += mpOut[iFrame];
                 freqNumber[iCandidate]++;
@@ -371,23 +396,33 @@
             size_t countEqual = 0;
             for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) 
             {
-                if (fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
+                if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
+                fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
                 countEqual++;
             }
+            // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;    
             if (countEqual * 1.0 / nFrame > 0.8) {
                 if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
                     duplicates.push_back(jCandidate);
-                } else {
+                } else if (iCandidate < jCandidate) {
                     duplicates.push_back(iCandidate);
                 }
             }
         }
     }
 
+    // std::cerr << "n duplicate: " << duplicates.size() << std::endl;    
+    for (size_t iDup = 0; iDup < duplicates.size(); ++ iDup) {
+        // std::cerr << "duplicate: " << iDup << std::endl;
+    }
+
     // now find non-duplicate pitch tracks
     map<int, int> candidateActuals;
     map<int, std::string> candidateLabels;
 
+    vector<vector<float> > outputFrequencies;
+    for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>(0));
+
     int actualCandidateNumber = 0;
     for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
         bool isDuplicate = false;
@@ -398,19 +433,20 @@
                 break;
             }
         }
-        if (!isDuplicate && freqNumber[iCandidate] > 0.8*nFrame)
+        if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame)
         {
             std::ostringstream convert;
             convert << actualCandidateNumber++;
             candidateLabels[iCandidate] = convert.str();
             candidateActuals[iCandidate] = actualCandidateNumber;
-            std::cerr << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
+            // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
             for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) 
             {
                 if (pitchTracks[iCandidate][iFrame] > 0)
                 {
-                    featureValues[m_timestamp[iFrame]][iCandidate] = 
-                        pitchTracks[iCandidate][iFrame];
+                    // featureValues[m_timestamp[iFrame]][iCandidate] = 
+                    //     pitchTracks[iCandidate][iFrame];
+                    outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
                 }
             }
         }
@@ -422,24 +458,34 @@
 
     FeatureSet fs;
 
-    for (map<RealTime, map<int, float> >::const_iterator i =
-             featureValues.begin(); i != featureValues.end(); ++i) {
+    for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){
         Feature f;
         f.hasTimestamp = true;
-        f.timestamp = i->first;
-        int nextCandidate = candidateActuals.begin()->second;
-        for (map<int, float>::const_iterator j = 
-                 i->second.begin(); j != i->second.end(); ++j) {
-            while (candidateActuals[j->first] > nextCandidate) {
-                f.values.push_back(0);
-                ++nextCandidate;
-            }
-            f.values.push_back(j->second);
-            nextCandidate = j->first + 1;
-        }
-        //!!! can't use labels?
+        f.timestamp = m_timestamp[iFrame];
+        f.values = outputFrequencies[iFrame];
         fs[0].push_back(f);
     }
+    
+    // I stopped using Chris's map stuff below because I couldn't get my head around it
+    //
+    // for (map<RealTime, map<int, float> >::const_iterator i =
+    //          featureValues.begin(); i != featureValues.end(); ++i) {
+    //     Feature f;
+    //     f.hasTimestamp = true;
+    //     f.timestamp = i->first;
+    //     int nextCandidate = candidateActuals.begin()->second;
+    //     for (map<int, float>::const_iterator j = 
+    //              i->second.begin(); j != i->second.end(); ++j) {
+    //         while (candidateActuals[j->first] > nextCandidate) {
+    //             f.values.push_back(0);
+    //             ++nextCandidate;
+    //         }
+    //         f.values.push_back(j->second);
+    //         nextCandidate = j->first + 1;
+    //     }
+    //     //!!! can't use labels?
+    //     fs[0].push_back(f);
+    // }
 
     return fs;
 }
--- a/LocalCandidatePYIN.h	Wed Feb 05 11:25:35 2014 +0000
+++ b/LocalCandidatePYIN.h	Wed Feb 19 22:48:08 2014 +0000
@@ -67,7 +67,7 @@
     
     float m_threshDistr;
     float m_outputUnvoiced;
-    vector<vector<vector<pair<double, double> > > > m_pitchProb;
+    vector<vector<pair<double, double> > > m_pitchProb;
     vector<Vamp::RealTime> m_timestamp;
     size_t m_nCandidate;
 };
--- a/PYinVamp.cpp	Wed Feb 05 11:25:35 2014 +0000
+++ b/PYinVamp.cpp	Wed Feb 19 22:48:08 2014 +0000
@@ -357,8 +357,16 @@
     timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
     FeatureSet fs;
     
+    float rms = 0;
+    
     double *dInputBuffers = new double[m_blockSize];
-    for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
+    for (size_t i = 0; i < m_blockSize; ++i) {
+        dInputBuffers[i] = inputBuffers[0][i];
+        rms += inputBuffers[0][i] * inputBuffers[0][i];
+    }
+    rms /= m_blockSize;
+    rms = sqrt(rms);
+    bool isLowVolume = (rms < 0.01);
     
     Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
     delete [] dInputBuffers;
@@ -369,8 +377,12 @@
     for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
     {
         double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
-        tempPitchProb.push_back(pair<double, double>
-            (tempPitch, yo.freqProb[iCandidate].second));
+        if (!isLowVolume)
+            tempPitchProb.push_back(pair<double, double>
+                (tempPitch, yo.freqProb[iCandidate].second));
+        else
+            tempPitchProb.push_back(pair<double, double>
+                (tempPitch, yo.freqProb[iCandidate].second*.01));
     }
     m_pitchProb.push_back(tempPitchProb);
     m_timestamp.push_back(timestamp);
--- a/Yin.cpp	Wed Feb 05 11:25:35 2014 +0000
+++ b/Yin.cpp	Wed Feb 19 22:48:08 2014 +0000
@@ -57,7 +57,7 @@
     double aperiodicity;
     double f0;
     
-    if (tau!=0)
+    if (tau!=0 && tau!=m_yinBufferSize-1)
     {
         interpolatedTau = YinUtil::parabolicInterpolation(yinBuffer, abs(tau), m_yinBufferSize);
         f0 = m_inputSampleRate * (1.0 / interpolatedTau);
@@ -93,7 +93,7 @@
         
     // basic yin output
     Yin::YinOutput yo(0,0,0);
-    for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf)
+    for (size_t iBuf = 1; iBuf < m_yinBufferSize-1; ++iBuf)
     {
         if (peakProbability[iBuf] > 0)
         {
--- a/YinUtil.cpp	Wed Feb 05 11:25:35 2014 +0000
+++ b/YinUtil.cpp	Wed Feb 19 22:48:08 2014 +0000
@@ -233,9 +233,37 @@
     // double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight
     size_t minInd = 0;
     float minVal = 42.f;
-    while (currThreshInd != -1 && tau < maxTau)
+    // while (currThreshInd != -1 && tau < maxTau)
+    // {
+    //     if (yinBuffer[tau] < thresholds[currThreshInd])
+    //     {
+    //         while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau])
+    //         {
+    //             tau++;
+    //         }
+    //         // tau is now local minimum
+    //         // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
+    //         if (yinBuffer[tau] < minVal && tau > 2){
+    //             minVal = yinBuffer[tau];
+    //             minInd = tau;
+    //         }
+    //         peakProb[tau] += distribution[currThreshInd];
+    //         currThreshInd--;
+    //     } else {
+    //         tau++;
+    //     }
+    // }
+    // double nonPeakProb = 1;
+    // for (size_t i = minTau; i < maxTau; ++i)
+    // {
+    //     nonPeakProb -= peakProb[i];
+    // }
+    // 
+    // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
+    float sumProb = 0;
+    while (tau < maxTau)
     {
-        if (yinBuffer[tau] < thresholds[currThreshInd])
+        if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau])
         {
             while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau])
             {
@@ -247,18 +275,29 @@
                 minVal = yinBuffer[tau];
                 minInd = tau;
             }
-            peakProb[tau] += distribution[currThreshInd];
-            currThreshInd--;
+            currThreshInd = nThresholdInt-1;
+            while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) {
+                // std::cerr << distribution[currThreshInd] << std::endl;
+                peakProb[tau] += distribution[currThreshInd];
+                currThreshInd--;
+            }
+            // peakProb[tau] = 1 - yinBuffer[tau];
+            sumProb += peakProb[tau];
+            tau++;
         } else {
             tau++;
         }
     }
+    
     double nonPeakProb = 1;
-    for (size_t i = minTau; i < maxTau; ++i)
-    {
-        nonPeakProb -= peakProb[i];
+    if (sumProb > 0) {
+        for (size_t i = minTau; i < maxTau; ++i)
+        {
+            peakProb[i] = peakProb[i] / sumProb * peakProb[minInd];
+            nonPeakProb -= peakProb[i];
+        }
     }
-    // std::cerr << nonPeakProb << std::endl;
+    std::cerr << nonPeakProb << std::endl;
     if (minInd > 0)
     {
         // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl; 
@@ -278,55 +317,61 @@
     }
     
     double betterTau = 0.0;
-    size_t x0;
-    size_t x2;
+    // size_t x0;
+    // size_t x2;
 
-    if (tau < 1) 
-    {
-        x0 = tau;
-    } else {
-        x0 = tau - 1;
-    }
-    
-    if (tau + 1 < yinBufferSize) 
-    {
-        x2 = tau + 1;
-    } else {
-        x2 = tau;
-    }
-    
-    if (x0 == tau) 
-    {
-        if (yinBuffer[tau] <= yinBuffer[x2]) 
-        {
-            betterTau = tau;
-        } else {
-            betterTau = x2;
-        }
-    } 
-    else if (x2 == tau) 
-    {
-        if (yinBuffer[tau] <= yinBuffer[x0]) 
-        {
-            betterTau = tau;
-        } 
-        else 
-        {
-            betterTau = x0;
-        }
-    } 
-    else 
-    {
+    // if (tau < 1) 
+    // {
+    //     x0 = tau;
+    // } else {
+    //     x0 = tau - 1;
+    // }
+    // 
+    // if (tau + 1 < yinBufferSize) 
+    // {
+    //     x2 = tau + 1;
+    // } else {
+    //     x2 = tau;
+    // }
+    // 
+    // if (x0 == tau) 
+    // {
+    //     if (yinBuffer[tau] <= yinBuffer[x2]) 
+    //     {
+    //         betterTau = tau;
+    //     } else {
+    //         betterTau = x2;
+    //     }
+    // } 
+    // else if (x2 == tau) 
+    // {
+    //     if (yinBuffer[tau] <= yinBuffer[x0]) 
+    //     {
+    //         betterTau = tau;
+    //     } 
+    //     else 
+    //     {
+    //         betterTau = x0;
+    //     }
+    // } 
+    // else 
+    // {
+    if (tau > 0 && tau < yinBufferSize-1) {
         float s0, s1, s2;
-        s0 = yinBuffer[x0];
+        s0 = yinBuffer[tau-1];
         s1 = yinBuffer[tau];
-        s2 = yinBuffer[x2];
+        s2 = yinBuffer[tau+1];
         // fixed AUBIO implementation, thanks to Karl Helgason:
         // (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1
-        betterTau = tau + (s2 - s0) / (2 * (2 * s1 - s2 - s0));
         
-        // std::cerr << tau << " --> " << betterTau << std::endl;
+        double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0));
         
+        if (abs(adjustment)>1) adjustment = 0;
+        
+        betterTau = tau + adjustment;
+    } else {
+        std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n";
+        betterTau = tau;
     }
     return betterTau;
 }