changeset 131:b877df85ad9e fixedlag

mono pitch works now with the refactored HMM implementation
author Matthias Mauch <mail@matthiasmauch.net>
date Fri, 03 Jul 2015 14:09:05 +0100
parents 080fe18f5ebf
children 926c292fa3ff
files PYinVamp.cpp PYinVamp.h SparseHMM.cpp SparseHMM.h
diffstat 4 files changed, 87 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/PYinVamp.cpp	Fri Jul 03 12:22:44 2015 +0100
+++ b/PYinVamp.cpp	Fri Jul 03 14:09:05 2015 +0100
@@ -14,6 +14,7 @@
 #include "PYinVamp.h"
 #include "MonoNote.h"
 #include "MonoPitch.h"
+#include "MonoPitchHMM.h"
 
 #include "vamp-sdk/FFT.h"
 
@@ -50,6 +51,7 @@
     m_lowAmp(0.1f),
     m_onsetSensitivity(0.7f),
     m_pruneThresh(0.1f),
+    m_pitchHmm(),
     m_pitchProb(0),
     m_timestamp(0),
     m_level(0)
@@ -493,6 +495,13 @@
 
     if (m_fixedLag == 0.f)
     {
+        vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb);
+        if (m_timestamp.empty())
+        {
+            m_pitchHmm.initialise(tempObsProb);
+        } else {
+            m_pitchHmm.process(tempObsProb);
+        }
         m_pitchProb.push_back(tempPitchProb);
     } else {
         // Damn, so I need the hmm right here! Sadly it isn't defined here yet.
@@ -549,26 +558,46 @@
         return fs;
     }
 
-    // MONO-PITCH STUFF
-    MonoPitch mp;
-    vector<float> mpOut = mp.process(m_pitchProb);
-    for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
+    // ================== P I T C H  T R A C K =================================
+
+    vector<int> rawPitchPath = m_pitchHmm.finalise();
+    vector<float> mpOut; 
+    
+    for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
     {
-        if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
+        float freq = pitchState2Freq(rawPitchPath[iFrame], m_pitchProb[iFrame]);
+        mpOut.push_back(freq); // for note processing below
+        
         f.timestamp = m_timestamp[iFrame];
+        // std::cerr << f.timestamp << std::endl;
         f.values.clear();
+
+        // different output modes
+        if (freq < 0 && (m_outputUnvoiced==0)) continue;
         if (m_outputUnvoiced == 1)
         {
-            f.values.push_back(fabs(mpOut[iFrame]));
+            f.values.push_back(fabs(freq));
         } else {
-            f.values.push_back(mpOut[iFrame]);
+            f.values.push_back(freq);
         }
-        
         fs[m_oSmoothedPitchTrack].push_back(f);
     }
+
+    // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
+    // {
+    //     if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
+
+    //     if (m_outputUnvoiced == 1)
+    //     {
+    //         f.values.push_back(fabs(mpOut[iFrame]));
+    //     } else {
+    //         f.values.push_back(mpOut[iFrame]);
+    //     }
+        
+    //     fs[m_oSmoothedPitchTrack].push_back(f);
+    // }
     
-    // MONO-NOTE STUFF
-//    std::cerr << "Mono Note Stuff" << std::endl;
+    // ======================== N O T E S ======================================
     MonoNote mn;
     std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
     for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
@@ -634,3 +663,33 @@
     }
     return fs;
 }
+
+float
+PYinVamp::pitchState2Freq(int state, vector<pair<double, double> > pitchProb)
+{
+    float hmmFreq = m_pitchHmm.m_freqs[state];
+    float bestFreq = 0;
+    float leastDist = 10000;
+    if (hmmFreq > 0)
+    {
+        // This was a Yin estimate, so try to get original pitch estimate back
+        // ... a bit hacky, since we could have direclty saved the frequency
+        // that was assigned to the HMM bin in hmm.calculateObsProb -- but would
+        // have had to rethink the interface of that method.
+        for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt)
+        {
+            float freq = 440. * 
+                         std::pow(2, 
+                                  (pitchProb[iPt].first - 69)/12);
+            float dist = std::abs(hmmFreq-freq);
+            if (dist < leastDist)
+            {
+                leastDist = dist;
+                bestFreq = freq;
+            }
+        }
+    } else {
+        bestFreq = hmmFreq;
+    }
+    return bestFreq;
+}
\ No newline at end of file
--- a/PYinVamp.h	Fri Jul 03 12:22:44 2015 +0100
+++ b/PYinVamp.h	Fri Jul 03 14:09:05 2015 +0100
@@ -17,6 +17,7 @@
 #include <vamp-sdk/Plugin.h>
 
 #include "Yin.h"
+#include "MonoPitchHMM.h"
 
 class PYinVamp : public Vamp::Plugin
 {
@@ -55,6 +56,8 @@
 
     FeatureSet getRemainingFeatures();
 
+    float pitchState2Freq(int state, vector<pair<double, double> > pitchProb);
+
 protected:
     size_t m_channels;
     size_t m_stepSize;
@@ -77,6 +80,9 @@
     float m_lowAmp;
     float m_onsetSensitivity;
     float m_pruneThresh;
+
+    MonoPitchHMM m_pitchHmm;
+
     vector<vector<pair<double, double> > > m_pitchProb;
     vector<Vamp::RealTime> m_timestamp;
     vector<float> m_level;
--- a/SparseHMM.cpp	Fri Jul 03 12:22:44 2015 +0100
+++ b/SparseHMM.cpp	Fri Jul 03 14:09:05 2015 +0100
@@ -52,8 +52,7 @@
     if (nFrame < 1) {
         return vector<int>();
     }
-    
-    
+
     initialise(obsProb[0]);
 
     // rest of forward step
@@ -66,10 +65,20 @@
     return(path);
 }
 
+void
+SparseHMM::reset()
+{
+    m_scale.clear();
+    m_psi.clear();
+    for (size_t i = 0; i < m_delta.size(); ++i) m_delta[i] = 0;
+    for (size_t i = 0; i < m_oldDelta.size(); ++i) m_oldDelta[i] = 0;
+}
 
 void
 SparseHMM::initialise(vector<double> firstObs)
 {
+    reset();
+
     double deltasum = 0;
 
     // initialise first frame
--- a/SparseHMM.h	Fri Jul 03 12:22:44 2015 +0100
+++ b/SparseHMM.h	Fri Jul 03 14:09:05 2015 +0100
@@ -30,6 +30,7 @@
                            calculateObsProb(const vector<pair<double, double> >);
     virtual void           build();
     const std::vector<int> decodeViterbi(std::vector<vector<double> > obs);
+    void                   reset();
     void                   initialise(vector<double> firstObs);
     int                    process(vector<double> newObs);
     vector<int>            finalise();