Mercurial > hg > pyin

--- a/LocalCandidatePYIN.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ b/LocalCandidatePYIN.cpp	Fri Mar 24 14:50:44 2017 +0000
@@ -12,7 +12,7 @@
 */

 #include "LocalCandidatePYIN.h"
-#include "MonoPitch.h"
+#include "MonoPitchHMM.h"
 #include "YinUtil.h"

 #include "vamp-sdk/FFT.h"
@@ -348,7 +348,7 @@
     }

     // MONO-PITCH STUFF
-    MonoPitch mp;
+    MonoPitchHMM hmm(0);
     size_t nFrame = m_timestamp.size();
     vector<vector<float> > pitchTracks;
     vector<float> freqSum = vector<float>(m_nCandidate);
@@ -362,11 +362,11 @@
     for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
     {
         pitchTracks.push_back(vector<float>(nFrame));
-        vector<vector<pair<double,double> > > tempPitchProb;
+        vector<pair<double,double> > tempPitchProb;
+        vector<vector<double> > tempObsProb;
         float centrePitch = 45 + 3 * iCandidate;

         for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
-            tempPitchProb.push_back(vector<pair<double,double> >());
             float sumProb = 0;
             float pitch = 0;
             float prob = 0;
@@ -377,17 +377,27 @@
                     boost::math::pdf(normalDist, pitch-centrePitch) /
                     maxNormalDist * 2;
                 sumProb += prob;
-                tempPitchProb[iFrame].push_back(
+                tempPitchProb.push_back(
                     pair<double,double>(pitch,prob));
             }
             for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
             {
-                tempPitchProb[iFrame][iProb].second /= sumProb;
+                tempPitchProb[iProb].second /= sumProb;
             }
+            tempObsProb.push_back(hmm.calculateObsProb(tempPitchProb));
         }

-        vector<float> mpOut = mp.process(tempPitchProb);
-        for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+        vector<int> rawPitchPath = hmm.decodeViterbi(tempObsProb);
+        vector<float> mpOut;
+
+        for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
+        {
+            float freq = hmm.nearestFreq(rawPitchPath[iFrame],
+                                         m_pitchProb[iFrame]);
+            mpOut.push_back(freq); // for note processing below
+        }
+
+        for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
         {
             if (mpOut[iFrame] > 0) {
--- a/Makefile.inc	Fri Aug 19 13:40:11 2016 +0100
+++ b/Makefile.inc	Fri Mar 24 14:50:44 2017 +0000
@@ -15,7 +15,6 @@
            Yin.cpp \
            YinUtil.cpp \
            MonoNote.cpp \
-           MonoPitch.cpp \
            MonoNoteParameters.cpp \
            SparseHMM.cpp \
            MonoNoteHMM.cpp \
@@ -66,15 +65,14 @@
 # DO NOT DELETE

 libmain.o: PYinVamp.h Yin.h MeanFilter.h YinVamp.h LocalCandidatePYIN.h
-LocalCandidatePYIN.o: LocalCandidatePYIN.h Yin.h MeanFilter.h MonoPitch.h
+LocalCandidatePYIN.o: LocalCandidatePYIN.h Yin.h MeanFilter.h
 LocalCandidatePYIN.o: MonoPitchHMM.h SparseHMM.h YinUtil.h
 MonoNote.o: MonoNote.h MonoNoteHMM.h MonoNoteParameters.h SparseHMM.h
 MonoNoteHMM.o: MonoNoteHMM.h MonoNoteParameters.h SparseHMM.h
 MonoNoteParameters.o: MonoNoteParameters.h
-MonoPitch.o: MonoPitch.h MonoPitchHMM.h SparseHMM.h
 MonoPitchHMM.o: MonoPitchHMM.h SparseHMM.h
 PYinVamp.o: PYinVamp.h Yin.h MeanFilter.h MonoNote.h MonoNoteHMM.h
-PYinVamp.o: MonoNoteParameters.h SparseHMM.h MonoPitch.h MonoPitchHMM.h
+PYinVamp.o: MonoNoteParameters.h SparseHMM.h MonoPitchHMM.h
 SparseHMM.o: SparseHMM.h
 Yin.o: Yin.h MeanFilter.h YinUtil.h
 YinUtil.o: YinUtil.h MeanFilter.h
@@ -91,7 +89,6 @@
 LocalCandidatePYIN.o: Yin.h MeanFilter.h
 MonoNote.o: MonoNoteHMM.h MonoNoteParameters.h SparseHMM.h
 MonoNoteHMM.o: MonoNoteParameters.h SparseHMM.h
-MonoPitch.o: MonoPitchHMM.h SparseHMM.h
 MonoPitchHMM.o: SparseHMM.h
 PYinVamp.o: Yin.h MeanFilter.h
 Yin.o: MeanFilter.h
--- a/Makefile.osx	Fri Aug 19 13:40:11 2016 +0100
+++ b/Makefile.osx	Fri Mar 24 14:50:44 2017 +0000
@@ -2,7 +2,7 @@
 CFLAGS := $(ARCHFLAGS) -O3 -I../vamp-plugin-sdk -I../../vamp-plugin-sdk -I/usr/local/boost -Wall -fPIC
 CXXFLAGS := $(CFLAGS)

-LDFLAGS := -L../vamp-plugin-sdk -L../../vamp-plugin-sdk -lvamp-sdk $(ARCHFLAGS)
+LDFLAGS := -L../vamp-plugin-sdk -L../vamp-plugin-sdk -lvamp-sdk $(ARCHFLAGS) -L/usr/local/lib
 PLUGIN_LDFLAGS := -dynamiclib $(LDFLAGS) -exported_symbols_list vamp-plugin.list
 TEST_LDFLAGS := $(LDFLAGS) -lboost_unit_test_framework
 PLUGIN_EXT := .dylib
--- a/MonoNote.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ b/MonoNote.cpp	Fri Mar 24 14:50:44 2017 +0000
@@ -22,7 +22,7 @@
 using std::pair;

 MonoNote::MonoNote() :
-    hmm()
+    hmm(0)
 {
 }

@@ -39,11 +39,9 @@
         obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
     }

-    vector<double> *scale = new vector<double>(pitchProb.size());
-
     vector<MonoNote::FrameOutput> out;

-    vector<int> path = hmm.decodeViterbi(obsProb, scale);
+    vector<int> path = hmm.decodeViterbi(obsProb);

     for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
     {
@@ -54,8 +52,6 @@
         stateKind = (path[iFrame]) % hmm.par.nSPP + 1;

         out.push_back(FrameOutput(iFrame, currPitch, stateKind));
-        // std::cerr << path[iFrame] << " -- "<< pitchProb[iFrame][0].first << " -- "<< currPitch << " -- " << stateKind << std::endl;
     }
-    delete scale;
     return(out);
 }
--- a/MonoNoteHMM.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ b/MonoNoteHMM.cpp	Fri Mar 24 14:50:44 2017 +0000
@@ -21,7 +21,8 @@
 using std::vector;
 using std::pair;

-MonoNoteHMM::MonoNoteHMM() :
+MonoNoteHMM::MonoNoteHMM(int fixedLag) :
+    SparseHMM(fixedLag),
     par()
 {
     build();
@@ -36,14 +37,13 @@

     // what is the probability of pitched
     double pIsPitched = 0;
-    for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
+    for (size_t iCand = 0; iCand < nCandidate; ++iCand)
     {
-        // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched;
-        pIsPitched += pitchProb[iCandidate].second;
+        pIsPitched += pitchProb[iCand].second;
     }

-    // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight);
-    pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight;
+    pIsPitched = pIsPitched * (1-par.priorWeight) +
+                     par.priorPitchedProb * par.priorWeight;

     vector<double> out = vector<double>(par.n);
     double tempProbSum = 0;
@@ -58,14 +58,15 @@
                 double minDist = 10000.0;
                 double minDistProb = 0;
                 size_t minDistCandidate = 0;
-                for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
+                for (size_t iCand = 0; iCand < nCandidate; ++iCand)
                 {
-                    double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first);
+                    double currDist = std::abs(getMidiPitch(i)-
+                                               pitchProb[iCand].first);
                     if (currDist < minDist)
                     {
                         minDist = currDist;
-                        minDistProb = pitchProb[iCandidate].second;
-                        minDistCandidate = iCandidate;
+                        minDistProb = pitchProb[iCand].second;
+                        minDistCandidate = iCand;
                     }
                 }
                 tempProb = std::pow(minDistProb, par.yinTrust) *
@@ -107,6 +108,8 @@
     //    3. attack state
     //    ...

+    m_nState = par.n;
+
     // observation distributions
     for (size_t iState = 0; iState < par.n; ++iState)
     {
@@ -114,9 +117,9 @@
         if (iState % par.nSPP == 2)
         {
             // silent state starts tracking
-            init.push_back(1.0/(par.nS * par.nPPS));
+            m_init.push_back(1.0/(par.nS * par.nPPS));
         } else {
-            init.push_back(0.0);
+            m_init.push_back(0.0);
         }
     }

@@ -137,27 +140,27 @@
         size_t index = iPitch * par.nSPP;

         // transitions from attack state
-        from.push_back(index);
-        to.push_back(index);
-        transProb.push_back(par.pAttackSelftrans);
+        m_from.push_back(index);
+        m_to.push_back(index);
+        m_transProb.push_back(par.pAttackSelftrans);

-        from.push_back(index);
-        to.push_back(index+1);
-        transProb.push_back(1-par.pAttackSelftrans);
+        m_from.push_back(index);
+        m_to.push_back(index+1);
+        m_transProb.push_back(1-par.pAttackSelftrans);

         // transitions from stable state
-        from.push_back(index+1);
-        to.push_back(index+1); // to itself
-        transProb.push_back(par.pStableSelftrans);
+        m_from.push_back(index+1);
+        m_to.push_back(index+1); // to itself
+        m_transProb.push_back(par.pStableSelftrans);

-        from.push_back(index+1);
-        to.push_back(index+2); // to silent
-        transProb.push_back(par.pStable2Silent);
+        m_from.push_back(index+1);
+        m_to.push_back(index+2); // to silent
+        m_transProb.push_back(par.pStable2Silent);

         // the "easy" transitions from silent state
-        from.push_back(index+2);
-        to.push_back(index+2);
-        transProb.push_back(par.pSilentSelftrans);
+        m_from.push_back(index+2);
+        m_to.push_back(index+2);
+        m_transProb.push_back(par.pSilentSelftrans);


         // the more complicated transitions from the silent
@@ -171,7 +174,7 @@
             double semitoneDistance =
                 std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;

-            // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance)
+
             if (semitoneDistance == 0 ||
                 (semitoneDistance > par.minSemitoneDistance
                  && semitoneDistance < par.maxJump))
@@ -184,15 +187,19 @@

                 tempTransProbSilent.push_back(tempWeightSilent);

-                from.push_back(index+2);
-                to.push_back(toIndex);
+                m_from.push_back(index+2);
+                m_to.push_back(toIndex);
             }
         }
         for (size_t i = 0; i < tempTransProbSilent.size(); ++i)
         {
-            transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent);
+            m_transProb.push_back((1-par.pSilentSelftrans) *
+                                  tempTransProbSilent[i]/probSumSilent);
         }
     }
+    m_nTrans = m_transProb.size();
+    m_delta = vector<double>(m_nState);
+    m_oldDelta = vector<double>(m_nState);
 }

 double
--- a/MonoNoteHMM.h	Fri Aug 19 13:40:11 2016 +0100
+++ b/MonoNoteHMM.h	Fri Mar 24 14:50:44 2017 +0000
@@ -27,8 +27,9 @@
 class MonoNoteHMM : public SparseHMM
 {
 public:
-    MonoNoteHMM();
+    MonoNoteHMM(int fixedLag);
     const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
+
     double getMidiPitch(size_t index);
     double getFrequency(size_t index);
     void build();
--- a/MonoPitch.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
-
-/*
-    pYIN - A fundamental frequency estimator for monophonic audio
-    Centre for Digital Music, Queen Mary, University of London.
-
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.  See the file
-    COPYING included with this distribution for more information.
-*/
-
-#include "MonoPitch.h"
-#include "MonoPitchHMM.h"
-#include <vector>
-
-#include <cstdio>
-#include <cmath>
-#include <complex>
-
-using std::vector;
-using std::pair;
-
-MonoPitch::MonoPitch() :
-    hmm()
-{
-}
-
-MonoPitch::~MonoPitch()
-{
-}
-
-const vector<float>
-MonoPitch::process(const vector<vector<pair<double, double> > > pitchProb)
-{
-    // std::cerr << "before observation prob calculation" << std::endl;
-    vector<vector<double> > obsProb;
-    for (size_t iFrame = 0; iFrame < pitchProb.size(); ++iFrame)
-    {
-        obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
-    }
-
-    vector<double> *scale = new vector<double>(0);
-
-    vector<float> out;
-
-    // std::cerr << "before Viterbi decoding" << obsProb.size() << "ng" << obsProb[1].size() << std::endl;
-    vector<int> path = hmm.decodeViterbi(obsProb, scale);
-    // std::cerr << "after Viterbi decoding" << std::endl;
-
-    for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
-    {
-        // std::cerr << path[iFrame] << " " << hmm.m_freqs[path[iFrame]] << std::endl;
-        float hmmFreq = hmm.m_freqs[path[iFrame]];
-        float bestFreq = 0;
-        float leastDist = 10000;
-        if (hmmFreq > 0)
-        {
-            // This was a Yin estimate, so try to get original pitch estimate back
-            // ... a bit hacky, since we could have direclty saved the frequency
-            // that was assigned to the HMM bin in hmm.calculateObsProb -- but would
-            // have had to rethink the interface of that method.
-            for (size_t iPitch = 0; iPitch < pitchProb[iFrame].size(); ++iPitch)
-            {
-                float freq = 440. * std::pow(2, (pitchProb[iFrame][iPitch].first - 69)/12);
-                float dist = std::abs(hmmFreq-freq);
-                if (dist < leastDist)
-                {
-                    leastDist = dist;
-                    bestFreq = freq;
-                }
-            }
-        } else {
-            bestFreq = hmmFreq;
-        }
-        out.push_back(bestFreq);
-    }
-    delete scale;
-    return(out);
-}
--- a/MonoPitch.h	Fri Aug 19 13:40:11 2016 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
-
-/*
-    pYIN - A fundamental frequency estimator for monophonic audio
-    Centre for Digital Music, Queen Mary, University of London.
-
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.  See the file
-    COPYING included with this distribution for more information.
-*/
-
-#ifndef _MONOPITCH_H_
-#define _MONOPITCH_H_
-
-#include "MonoPitchHMM.h"
-
-#include <iostream>
-#include <vector>
-#include <exception>
-
-using std::vector;
-using std::pair;
-
-class MonoPitch {
-public:
-    MonoPitch();
-    virtual ~MonoPitch();
-
-    // pitchProb is a frame-wise vector carrying a vector of pitch-probability pairs
-    const vector<float> process(const vector<vector<pair<double, double> > > pitchProb);
-private:
-    MonoPitchHMM hmm;
-};
-
-#endif
--- a/MonoPitchHMM.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ b/MonoPitchHMM.cpp	Fri Mar 24 14:50:44 2017 +0000
@@ -17,11 +17,13 @@

 #include <cstdio>
 #include <cmath>
+#include <iostream>

 using std::vector;
 using std::pair;

-MonoPitchHMM::MonoPitchHMM() :
+MonoPitchHMM::MonoPitchHMM(int fixedLag) :
+    SparseHMM(fixedLag),
     m_minFreq(61.735),
     m_nBPS(5),
     m_nPitch(0),
@@ -32,6 +34,7 @@
 {
     m_transitionWidth = 5*(m_nBPS/2) + 1;
     m_nPitch = 69 * m_nBPS;
+    m_nState = 2 * m_nPitch; // voiced and unvoiced
     m_freqs = vector<double>(2*m_nPitch);
     for (int iPitch = 0; iPitch < m_nPitch; ++iPitch)
     {
@@ -83,7 +86,7 @@
 MonoPitchHMM::build()
 {
     // INITIAL VECTOR
-    init = vector<double>(2*m_nPitch, 1.0 / 2*m_nPitch);
+    m_init = vector<double>(2*m_nPitch, 1.0 / 2*m_nPitch);

     // TRANSITIONS
     for (int iPitch = 0; iPitch < int(m_nPitch); ++iPitch)
@@ -112,22 +115,22 @@
         // TRANSITIONS TO CLOSE PITCH
         for (int i = minNextPitch; i <= maxNextPitch; ++i)
         {
-            from.push_back(iPitch);
-            to.push_back(i);
-            transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
+            m_from.push_back(iPitch);
+            m_to.push_back(i);
+            m_transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);

-            from.push_back(iPitch);
-            to.push_back(i+m_nPitch);
-            transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
+            m_from.push_back(iPitch);
+            m_to.push_back(i+m_nPitch);
+            m_transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));

-            from.push_back(iPitch+m_nPitch);
-            to.push_back(i+m_nPitch);
-            transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
+            m_from.push_back(iPitch+m_nPitch);
+            m_to.push_back(i+m_nPitch);
+            m_transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
             // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);

-            from.push_back(iPitch+m_nPitch);
-            to.push_back(i);
-            transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
+            m_from.push_back(iPitch+m_nPitch);
+            m_to.push_back(i);
+            m_transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
             // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);
         }

@@ -149,5 +152,45 @@
     // for (int i = 0; i < from.size(); ++i) {
     //     std::cerr << "P(["<< from[i] << " --> " << to[i] << "]) = " << transProb[i] << std::endl;
     // }
-
+    m_nTrans = m_transProb.size();
+    m_delta = vector<double>(m_nState);
+    m_oldDelta = vector<double>(m_nState);
 }
+
+/*
+Takes a state number and a pitch-prob vector, then finds the pitch that would
+have been closest to the pitch of the state. Easy to understand? ;)
+*/
+const float
+MonoPitchHMM::nearestFreq(int state, vector<pair<double, double> > pitchProb)
+{
+    float hmmFreq = m_freqs[state];
+    // std::cerr << "hmmFreq " << hmmFreq << std::endl;
+    float bestFreq = 0;
+    float leastDist = 10000;
+    if (hmmFreq > 0)
+    {
+        // This was a Yin estimate, so try to get original pitch estimate back
+        // ... a bit hacky, since we could have direclty saved the frequency
+        // that was assigned to the HMM bin in hmm.calculateObsProb -- but would
+        // have had to rethink the interface of that method.
+
+        // std::cerr << "pitch prob size " << pitchProb.size() << std::endl;
+
+        for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt)
+        {
+            float freq = 440. *
+                         std::pow(2,
+                                  (pitchProb[iPt].first - 69)/12);
+            float dist = std::abs(hmmFreq-freq);
+            if (dist < leastDist)
+            {
+                leastDist = dist;
+                bestFreq = freq;
+            }
+        }
+    } else {
+        bestFreq = hmmFreq;
+    }
+    return bestFreq;
+}
--- a/MonoPitchHMM.h	Fri Aug 19 13:40:11 2016 +0100
+++ b/MonoPitchHMM.h	Fri Mar 24 14:50:44 2017 +0000
@@ -26,8 +26,9 @@
 class MonoPitchHMM : public SparseHMM
 {
 public:
-    MonoPitchHMM();
+    MonoPitchHMM(int fixedLag);
     const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
+    const float nearestFreq(int state, vector<pair<double, double> > pitchProb);
     void build();
     double m_minFreq; // 82.40689f/2
     int m_nBPS;
--- a/PYinVamp.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ b/PYinVamp.cpp	Fri Mar 24 14:50:44 2017 +0000
@@ -13,7 +13,7 @@

 #include "PYinVamp.h"
 #include "MonoNote.h"
-#include "MonoPitch.h"
+#include "MonoPitchHMM.h"

 #include <vector>
 #include <algorithm>
@@ -42,14 +42,17 @@
     m_oSmoothedPitchTrack(0),
     m_oNotes(0),
     m_threshDistr(2.0f),
+    m_fixedLag(1.0f),
     m_outputUnvoiced(0.0f),
     m_preciseTime(0.0f),
     m_lowAmp(0.1f),
     m_onsetSensitivity(0.7f),
     m_pruneThresh(0.1f),
+    m_pitchHmm(0),
     m_pitchProb(0),
     m_timestamp(0),
-    m_level(0)
+    m_level(0),
+    m_pitchTrack(0)
 {
 }

@@ -151,6 +154,19 @@
     d.valueNames.push_back("Single Value 0.20");
     list.push_back(d);

+    d.valueNames.clear();
+
+    d.identifier = "fixedlag";
+    d.name = "Fixed-lag smoothing";
+    d.description = "Use fixed lag smoothing, not full Viterbi smoothing.";
+    d.unit = "";
+    d.minValue = 0.0f;
+    d.maxValue = 1.0f;
+    d.defaultValue = 0.0f;
+    d.isQuantized = true;
+    d.quantizeStep = 1.0f;
+    list.push_back(d);
+
     d.identifier = "outputunvoiced";
     d.valueNames.clear();
     d.name = "Output estimates classified as unvoiced?";
@@ -220,6 +236,9 @@
     if (identifier == "threshdistr") {
             return m_threshDistr;
     }
+    if (identifier == "fixedlag") {
+            return m_fixedLag;
+    }
     if (identifier == "outputunvoiced") {
             return m_outputUnvoiced;
     }
@@ -245,6 +264,10 @@
     {
         m_threshDistr = value;
     }
+    if (identifier == "fixedlag")
+    {
+        m_fixedLag = value;
+    }
     if (identifier == "outputunvoiced")
     {
         m_outputUnvoiced = value;
@@ -417,10 +440,14 @@
     m_yin.setThresholdDistr(m_threshDistr);
     m_yin.setFrameSize(m_blockSize);
     m_yin.setFast(!m_preciseTime);
+
+    if (m_fixedLag == 1.f) m_pitchHmm = MonoPitchHMM(100);
+    else                   m_pitchHmm = MonoPitchHMM(0);

     m_pitchProb.clear();
     m_timestamp.clear();
     m_level.clear();
+    m_pitchTrack.clear();
 /*
     std::cerr << "PYinVamp::reset"
           << ", blockSize = " << m_blockSize
@@ -431,8 +458,10 @@
 PYinVamp::FeatureSet
 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
 {
+    std::cerr << timestamp << std::endl;
     int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
-    timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
+    timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset,
+        lrintf(m_inputSampleRate));

     FeatureSet fs;

@@ -453,8 +482,6 @@

     m_level.push_back(yo.rms);

-    // First, get the things out of the way that we don't want to output
-    // immediately, but instead save for later.
     vector<pair<double, double> > tempPitchProb;
     for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
     {
@@ -469,9 +496,53 @@
                 (tempPitch, yo.freqProb[iCandidate].second*factor));
         }
     }
+
+    vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb);
+    if (m_timestamp.empty())
+    {
+        m_pitchHmm.initialise(tempObsProb);
+    } else {
+        m_pitchHmm.process(tempObsProb);
+    }
+
     m_pitchProb.push_back(tempPitchProb);
     m_timestamp.push_back(timestamp);

+    int lag = m_pitchHmm.m_fixedLag;
+
+    if (m_fixedLag == 1.f) // do fixed-lag smoothing instead of full Viterbi
+    {
+        if (int(m_timestamp.size()) == lag + 1)
+        {
+            m_timestamp.pop_front();
+            m_pitchProb.pop_front();
+
+            Feature f;
+            f.hasTimestamp = true;
+            vector<int> rawPitchPath = m_pitchHmm.track();
+            float freq = m_pitchHmm.nearestFreq(rawPitchPath[0],
+                                                m_pitchProb[0]);
+            m_pitchTrack.push_back(freq);
+            f.timestamp = m_timestamp[0];
+            f.values.clear();
+
+            // different output modes
+            if (freq < 0 && (m_outputUnvoiced==0))
+            {
+
+            } else {
+                if (m_outputUnvoiced == 1)
+                {
+                    f.values.push_back(fabs(freq));
+                } else {
+                    f.values.push_back(freq);
+                }
+                fs[m_oSmoothedPitchTrack].push_back(f);
+            }
+        }
+    }
+
+
     // F0 CANDIDATES
     Feature f;
     f.hasTimestamp = true;
@@ -521,39 +592,49 @@
         return fs;
     }

-    // MONO-PITCH STUFF
-    MonoPitch mp;
-    vector<float> mpOut = mp.process(m_pitchProb);
-    for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
+    // ================== P I T C H  T R A C K =================================
+
+    vector<int> rawPitchPath = m_pitchHmm.track();
+
+    for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
     {
-        if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
+        float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
+                                            m_pitchProb[iFrame]);
+        m_pitchTrack.push_back(freq); // for note processing below
+
         f.timestamp = m_timestamp[iFrame];
         f.values.clear();
+
+        // different output modes
+        if (freq < 0 && (m_outputUnvoiced==0)) continue;
         if (m_outputUnvoiced == 1)
         {
-            f.values.push_back(fabs(mpOut[iFrame]));
+            f.values.push_back(fabs(freq));
         } else {
-            f.values.push_back(mpOut[iFrame]);
+            f.values.push_back(freq);
         }
-
         fs[m_oSmoothedPitchTrack].push_back(f);
     }

-    // MONO-NOTE STUFF
-//    std::cerr << "Mono Note Stuff" << std::endl;
+    // ======================== N O T E S ======================================
     MonoNote mn;
     std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
-    for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
+    for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) {
         std::vector<std::pair<double, double> > temp;
-        if (mpOut[iFrame] > 0)
+        if (m_pitchTrack[iFrame] > 0)
         {
-            double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
+            double tempPitch = 12 *
+                std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69;
             temp.push_back(std::pair<double,double>(tempPitch, .9));
+            // std::cerr << "tempPitch: " << tempPitch << std::endl;
         }
+        // std::cerr << "temp size: " << temp.size() << std::endl;
         smoothedPitch.push_back(temp);
     }
-    // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
+
     vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
+    std::cerr << "mnOut size: " << mnOut.size() << std::endl;
+    std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl;

     // turning feature into a note feature
     f.hasTimestamp = true;
@@ -563,18 +644,30 @@
     int onsetFrame = 0;
     bool isVoiced = 0;
     bool oldIsVoiced = 0;
-    size_t nFrame = m_pitchProb.size();
+    size_t nFrame = m_pitchTrack.size();

     float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;

-    std::vector<float> notePitchTrack; // collects pitches for one note at a time
+    // the body of the loop below should be in a function/method
+    // but what does it actually do??
+    // * takes the result of the note tracking HMM
+    // * collects contiguously pitched pitches
+    // * writes a note once it notices the voiced segment has ended
+    // complications:
+    // * it needs a lookahead of two frames for m_level (wtf was I thinking)
+    // * it needs to know the timestamp (which can be guessed from the frame no)
+    // *
+    int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
+    RealTime timestampOffset = Vamp::RealTime::frame2RealTime(offset,
+        lrintf(m_inputSampleRate));
+
+    std::vector<float> notePitchTrack; // collects pitches for 1 note at a time
     for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
     {
-        isVoiced = mnOut[iFrame].noteState < 3
-                   && smoothedPitch[iFrame].size() > 0
-                   && (iFrame >= nFrame-2
-                       || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
-        // std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl;
+        isVoiced = mnOut[iFrame].noteState < 3
+            && smoothedPitch[iFrame].size() > 0
+            && (iFrame >= nFrame-2
+                || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
         if (isVoiced && iFrame != nFrame-1)
         {
             if (oldIsVoiced == 0) // beginning of a note
@@ -586,16 +679,22 @@
         } else { // not currently voiced
             if (oldIsVoiced == 1) // end of note
             {
-                // std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl;
                 if (notePitchTrack.size() >= minNoteFrames)
                 {
                     std::sort(notePitchTrack.begin(), notePitchTrack.end());
                     float medianPitch = notePitchTrack[notePitchTrack.size()/2];
-                    float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
+                    float medianFreq =
+                        std::pow(2,(medianPitch - 69) / 12) * 440;
                     f.values.clear();
                     f.values.push_back(medianFreq);
-                    f.timestamp = m_timestamp[onsetFrame];
-                    f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
+                    RealTime start = RealTime::frame2RealTime(
+                        onsetFrame * m_stepSize, lrintf(m_inputSampleRate)) +
+                        timestampOffset;
+                    RealTime end   = RealTime::frame2RealTime(
+                            iFrame * m_stepSize, lrintf(m_inputSampleRate)) +
+                        timestampOffset;
+                    f.timestamp = start;
+                    f.duration = end - start;
                     fs[m_oNotes].push_back(f);
                 }
                 notePitchTrack.clear();
--- a/PYinVamp.h	Fri Aug 19 13:40:11 2016 +0100
+++ b/PYinVamp.h	Fri Mar 24 14:50:44 2017 +0000
@@ -17,6 +17,7 @@
 #include <vamp-sdk/Plugin.h>

 #include "Yin.h"
+#include "MonoPitchHMM.h"

 class PYinVamp : public Vamp::Plugin
 {
@@ -71,14 +72,23 @@
     mutable int m_oNotes;

     float m_threshDistr;
+    float m_fixedLag;
     float m_outputUnvoiced;
     float m_preciseTime;
     float m_lowAmp;
     float m_onsetSensitivity;
     float m_pruneThresh;
-    vector<vector<pair<double, double> > > m_pitchProb;
-    vector<Vamp::RealTime> m_timestamp;
+
+    MonoPitchHMM m_pitchHmm;
+
+    deque<vector<pair<double, double> > > m_pitchProb;
+    deque<Vamp::RealTime> m_timestamp;
     vector<float> m_level;
+    vector<float> m_pitchTrack;
+
+    // for note writing
+    // vector<float> m_notePitchTrack; // contains pitches of one current note
+    // bool m_oldIsVoiced;
 };

 #endif
--- a/SparseHMM.cpp	Fri Aug 19 13:40:11 2016 +0100
+++ b/SparseHMM.cpp	Fri Mar 24 14:50:44 2017 +0000
@@ -19,6 +19,22 @@
 using std::vector;
 using std::pair;

+SparseHMM::SparseHMM(int fixedLag) :
+    m_fixedLag(fixedLag),
+    m_nState(0),
+    m_nTrans(0),
+    m_init(0),
+    m_from(0),
+    m_to(0),
+    m_transProb(0),
+    m_scale(0),
+    m_psi(0),
+    m_delta(0),
+    m_oldDelta(0)
+{
+
+}
+
 const vector<double>
 SparseHMM::calculateObsProb(const vector<pair<double, double> > )
 {
@@ -26,103 +42,144 @@
     return(vector<double>());
 }

+void
+SparseHMM::build()
+{ }
+
 const std::vector<int>
-SparseHMM::decodeViterbi(std::vector<vector<double> > obsProb,
-                         vector<double> *scale)
+SparseHMM::decodeViterbi(std::vector<vector<double> > obsProb)
 {
-    if (obsProb.size() < 1) {
+    size_t nFrame = obsProb.size();
+    if (nFrame < 1) {
         return vector<int>();
     }

-    size_t nState = init.size();
-    size_t nFrame = obsProb.size();
-
-    // check for consistency
-    size_t nTrans = transProb.size();
-
-    // declaring variables
-    std::vector<double> delta = std::vector<double>(nState);
-    std::vector<double> oldDelta = std::vector<double>(nState);
-    vector<vector<int> > psi; //  "matrix" of remembered indices of the best transitions
-    vector<int> path = vector<int>(nFrame, nState-1); // the final output path (current assignment arbitrary, makes sense only for Chordino, where nChord-1 is the "no chord" label)
+    initialise(obsProb[0]);
+
+    // rest of forward step
+    for (size_t iFrame = 1; iFrame < nFrame; ++iFrame)
+    {
+        process(obsProb[iFrame]);
+    }
+
+    vector<int> path = track();
+    return(path);
+}
+
+void
+SparseHMM::reset()
+{
+    m_scale.clear();
+    m_psi.clear();
+    for (size_t i = 0; i < m_delta.size(); ++i) m_delta[i] = 0;
+    for (size_t i = 0; i < m_oldDelta.size(); ++i) m_oldDelta[i] = 0;
+}
+
+void
+SparseHMM::initialise(vector<double> firstObs)
+{
+    reset();

     double deltasum = 0;

     // initialise first frame
-    for (size_t iState = 0; iState < nState; ++iState)
+    for (size_t iState = 0; iState < m_nState; ++iState)
     {
-        oldDelta[iState] = init[iState] * obsProb[0][iState];
-        // std::cerr << iState << " ----- " << init[iState] << std::endl;
-        deltasum += oldDelta[iState];
+        m_oldDelta[iState] = m_init[iState] * firstObs[iState];
+        deltasum += m_oldDelta[iState];
     }

-    for (size_t iState = 0; iState < nState; ++iState)
+    for (size_t iState = 0; iState < m_nState; ++iState)
     {
-        oldDelta[iState] /= deltasum; // normalise (scale)
-        // std::cerr << oldDelta[iState] << std::endl;
+        m_oldDelta[iState] /= deltasum; // normalise (scale)
     }

-    scale->push_back(1.0/deltasum);
-    psi.push_back(vector<int>(nState,0));
+    m_scale.push_back(1.0/deltasum);
+    m_psi.push_back(vector<int>(m_nState,0));
+}

-    // rest of forward step
-    for (size_t iFrame = 1; iFrame < nFrame; ++iFrame)
+int
+SparseHMM::process(vector<double> newObs)
+{
+    vector<int> tempPsi = vector<int>(m_nState,0);
+
+    // calculate best previous state for every current state
+    size_t fromState;
+    size_t toState;
+    double currentTransProb;
+    double currentValue;
+
+    // this is the "sparse" loop
+    for (size_t iTrans = 0; iTrans < m_nTrans; ++iTrans)
     {
-        deltasum = 0;
-        psi.push_back(vector<int>(nState,0));
-
-        // calculate best previous state for every current state
-        size_t fromState;
-        size_t toState;
-        double currentTransProb;
-        double currentValue;
+        fromState = m_from[iTrans];
+        toState = m_to[iTrans];
+        currentTransProb = m_transProb[iTrans];

-        // this is the "sparse" loop
-        for (size_t iTrans = 0; iTrans < nTrans; ++iTrans)
+        currentValue = m_oldDelta[fromState] * currentTransProb;
+        if (currentValue > m_delta[toState])
         {
-            fromState = from[iTrans];
-            toState = to[iTrans];
-            currentTransProb = transProb[iTrans];
-
-            currentValue = oldDelta[fromState] * currentTransProb;
-            if (currentValue > delta[toState])
-            {
-                delta[toState] = currentValue; // will be multiplied by the right obs later!
-                psi[iFrame][toState] = fromState;
-            }
-        }
-
-        for (size_t jState = 0; jState < nState; ++jState)
-        {
-            delta[jState] *= obsProb[iFrame][jState];
-            deltasum += delta[jState];
-        }
-
-        if (deltasum > 0)
-        {
-            for (size_t iState = 0; iState < nState; ++iState)
-            {
-                oldDelta[iState] = delta[iState] / deltasum; // normalise (scale)
-                delta[iState] = 0;
-            }
-            scale->push_back(1.0/deltasum);
-        } else
-        {
-            std::cerr << "WARNING: Viterbi has been fed some zero probabilities, at least they become zero at frame " <<  iFrame << " in combination with the model." << std::endl;
-            for (size_t iState = 0; iState < nState; ++iState)
-            {
-                oldDelta[iState] = 1.0/nState;
-                delta[iState] = 0;
-            }
-            scale->push_back(1.0);
+            // will be multiplied by the right obs later!
+            m_delta[toState] = currentValue;
+            tempPsi[toState] = fromState;
         }
     }
+    m_psi.push_back(tempPsi);

+
+    double deltasum = 0;
+    for (size_t jState = 0; jState < m_nState; ++jState)
+    {
+        m_delta[jState] *= newObs[jState];
+        deltasum += m_delta[jState];
+    }
+
+    if (deltasum > 0)
+    {
+        for (size_t iState = 0; iState < m_nState; ++iState)
+        {
+            m_oldDelta[iState] = m_delta[iState] / deltasum;// normalise (scale)
+            m_delta[iState] = 0;
+        }
+        m_scale.push_back(1.0/deltasum);
+    } else
+    {
+        std::cerr << "WARNING: Viterbi has been fed some zero "
+            "probabilities, at least they become zero "
+            "in combination with the model." << std::endl;
+        for (size_t iState = 0; iState < m_nState; ++iState)
+        {
+            m_oldDelta[iState] = 1.0/m_nState;
+            m_delta[iState] = 0;
+        }
+        m_scale.push_back(1.0);
+    }
+
+    if (m_fixedLag > 0 && m_psi.size() > m_fixedLag)
+    {
+        m_psi.pop_front();
+        m_scale.pop_front();
+    }
+
+    // std::cerr << m_fixedLag << " " << m_psi.size() << std::endl;
+
+    return 0;
+}
+
+const vector<int>
+SparseHMM::track()
+{
     // initialise backward step
+    size_t nFrame = m_psi.size();
+
+    // The final output path (current assignment arbitrary, makes sense only for
+    // Chordino, where nChord-1 is the "no chord" label)
+    vector<int> path = vector<int>(nFrame, m_nState-1);
+
     double bestValue = 0;
-    for (size_t iState = 0; iState < nState; ++iState)
+    for (size_t iState = 0; iState < m_nState; ++iState)
     {
-        double currentValue = oldDelta[iState];
+        double currentValue = m_oldDelta[iState];
         if (currentValue > bestValue)
         {
             bestValue = currentValue;
@@ -130,16 +187,11 @@
         }
     }

-    // rest of backward step
+    // Rest of backward step
     for (int iFrame = nFrame-2; iFrame != -1; --iFrame)
     {
-        path[iFrame] = psi[iFrame+1][path[iFrame+1]];
+        path[iFrame] = m_psi[iFrame+1][path[iFrame+1]];
     }
-
-    // for (size_t iState = 0; iState < nState; ++iState)
-    // {
-    //     // std::cerr << psi[2][iState] << std::endl;
-    // }
-
+
     return path;
-}
+}
\ No newline at end of file
--- a/SparseHMM.h	Fri Aug 19 13:40:11 2016 +0100
+++ b/SparseHMM.h	Fri Mar 24 14:50:44 2017 +0000
@@ -15,21 +15,39 @@
 #define _SPARSEHMM_H_

 #include <vector>
+#include <queue>
 #include <cstdio>

 using std::vector;
+using std::deque;
 using std::pair;

 class SparseHMM
 {
 public:
-    virtual const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
-    const std::vector<int> decodeViterbi(std::vector<vector<double> > obs,
-                                   vector<double> *scale);
-    vector<double> init;
-    vector<size_t> from;
-    vector<size_t> to;
-    vector<double> transProb;
+    SparseHMM(int fixedLag);
+    virtual const std::vector<double>
+                           calculateObsProb(const vector<pair<double, double> >);
+    virtual void           build();
+    const std::vector<int> decodeViterbi(std::vector<vector<double> > obs);
+    void                   reset();
+    void                   initialise(vector<double> firstObs);
+    int                    process(vector<double> newObs);
+    const vector<int>      track();
+    // "sparse" HMM definition
+    int m_fixedLag;
+    int m_nState;
+    int m_nTrans;
+    vector<double> m_init;
+    vector<size_t> m_from;
+    vector<size_t> m_to;
+    vector<double> m_transProb;
+
+    // variables for decoding
+    deque<double> m_scale;
+    deque<vector<int> > m_psi;
+    vector<double> m_delta;
+    vector<double> m_oldDelta;
 };

 #endif
--- a/win32-build/pyin.pro	Fri Aug 19 13:40:11 2016 +0100
+++ b/win32-build/pyin.pro	Fri Mar 24 14:50:44 2017 +0000
@@ -13,7 +13,6 @@
     ../Yin.cpp \
     ../SparseHMM.cpp \
     ../MonoPitchHMM.cpp \
-    ../MonoPitch.cpp \
     ../MonoNoteParameters.cpp \
     ../MonoNoteHMM.cpp \
     ../MonoNote.cpp \
@@ -27,7 +26,6 @@
     ../Yin.h \
     ../SparseHMM.h \
     ../MonoPitchHMM.h \
-    ../MonoPitch.h \
     ../MonoNoteParameters.h \
     ../MonoNoteHMM.h \
     ../MonoNote.h \