Mercurial > hg > pyin

--- a/MonoNote.cpp	Fri Mar 24 15:35:32 2017 +0000
+++ b/MonoNote.cpp	Wed May 17 14:50:10 2017 +0100
@@ -33,16 +33,37 @@
 const vector<MonoNote::FrameOutput>
 MonoNote::process(const vector<vector<pair<double, double> > > pitchProb)
 {
-    vector<vector<double> > obsProb;
-    for (size_t iFrame = 0; iFrame < pitchProb.size(); ++iFrame)
-    {
-        obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
+    // Previously, this built up a single matrix of probabilities, by
+    // calling calculateObsProb to get a column for each frame in
+    // pitchProb.
+    //
+    // The number of distinct states depends on MonoNoteParameters,
+    // but the defaults have 3 states per pitch, 3 pitches per MIDI
+    // note, and 69 MIDI notes, giving 681 states per frame. With a
+    // frame step size of 256 at 44100Hz sample rate, a 3-minute song
+    // has about 30K frames leading to a 20 million element
+    // probability matrix.
+    //
+    // Since the matrix is very sparse, we can avoid some of this by
+    // feeding the (sparse implementation of) HMM one column at a
+    // time.
+
+    vector<int> path;
+
+    if (!pitchProb.empty()) {
+
+        hmm.initialise(hmm.calculateObsProb(pitchProb[0]));
+
+        for (size_t iFrame = 1; iFrame < pitchProb.size(); ++iFrame)
+        {
+            hmm.process(hmm.calculateObsProb(pitchProb[iFrame]));
+        }
+
+        path = hmm.track();
     }

     vector<MonoNote::FrameOutput> out;
-
-    vector<int> path = hmm.decodeViterbi(obsProb);
-
+
     for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
     {
         double currPitch = -1;
@@ -53,5 +74,6 @@

         out.push_back(FrameOutput(iFrame, currPitch, stateKind));
     }
+
     return(out);
 }
--- a/MonoNoteHMM.cpp	Fri Mar 24 15:35:32 2017 +0000
+++ b/MonoNoteHMM.cpp	Wed May 17 14:50:10 2017 +0100
@@ -92,7 +92,7 @@
             out[i] = (1-pIsPitched) / (par.nPPS * par.nS);
         }
     }
-
+
     return(out);
 }
--- a/PYinVamp.cpp	Fri Mar 24 15:35:32 2017 +0000
+++ b/PYinVamp.cpp	Wed May 17 14:50:10 2017 +0100
@@ -322,7 +322,6 @@
     d.description = "Estimated fundamental frequency candidates.";
     d.unit = "Hz";
     d.hasFixedBinCount = false;
-    // d.binCount = 1;
     d.hasKnownExtents = true;
     d.minValue = m_fmin;
     d.maxValue = 500;
@@ -335,10 +334,9 @@

     d.identifier = "f0probs";
     d.name = "Candidate Probabilities";
-    d.description = "Probabilities  of estimated fundamental frequency candidates.";
+    d.description = "Probabilities of estimated fundamental frequency candidates.";
     d.unit = "";
     d.hasFixedBinCount = false;
-    // d.binCount = 1;
     d.hasKnownExtents = true;
     d.minValue = 0;
     d.maxValue = 1;
@@ -382,13 +380,11 @@

     d.identifier = "smoothedpitchtrack";
     d.name = "Smoothed Pitch Track";
-    d.description = ".";
+    d.description = "Frame-by-frame pitch estimate after smoothing";
     d.unit = "Hz";
     d.hasFixedBinCount = true;
     d.binCount = 1;
     d.hasKnownExtents = false;
-    // d.minValue = 0;
-    // d.maxValue = 1;
     d.isQuantized = false;
     d.sampleType = OutputDescriptor::FixedSampleRate;
     d.sampleRate = (m_inputSampleRate / m_stepSize);
@@ -399,7 +395,6 @@
     d.identifier = "notes";
     d.name = "Notes";
     d.description = "Derived fixed-pitch note frequencies";
-    // d.unit = "MIDI unit";
     d.unit = "Hz";
     d.hasFixedBinCount = true;
     d.binCount = 1;
@@ -420,11 +415,6 @@
     if (channels < getMinChannelCount() ||
 	channels > getMaxChannelCount()) return false;

-/*
-    std::cerr << "PYinVamp::initialise: channels = " << channels
-          << ", stepSize = " << stepSize << ", blockSize = " << blockSize
-          << std::endl;
-*/
     m_channels = channels;
     m_stepSize = stepSize;
     m_blockSize = blockSize;
@@ -448,17 +438,11 @@
     m_timestamp.clear();
     m_level.clear();
     m_pitchTrack.clear();
-/*
-    std::cerr << "PYinVamp::reset"
-          << ", blockSize = " << m_blockSize
-          << std::endl;
-*/
 }

 PYinVamp::FeatureSet
 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
 {
-//    std::cerr << timestamp << std::endl;
     int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
     timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset,
         lrintf(m_inputSampleRate));
@@ -542,7 +526,6 @@
         }
     }

-
     // F0 CANDIDATES
     Feature f;
     f.hasTimestamp = true;
@@ -584,16 +567,20 @@
 PYinVamp::getRemainingFeatures()
 {
     FeatureSet fs;
-    Feature f;
-    f.hasTimestamp = true;
-    f.hasDuration = false;
-
+
     if (m_pitchProb.empty()) {
         return fs;
     }

+    Feature f;
+    f.hasTimestamp = true;
+    f.hasDuration = false;
+
     // ================== P I T C H  T R A C K =================================

+    // NB we do this even in fixed-lag mode, as we still have the last
+    // lag's-worth of pitch probs to consume
+
     vector<int> rawPitchPath = m_pitchHmm.track();

     for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
@@ -601,10 +588,10 @@
         float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
                                             m_pitchProb[iFrame]);
         m_pitchTrack.push_back(freq); // for note processing below
-
+
         f.timestamp = m_timestamp[iFrame];
         f.values.clear();
-
+
         // different output modes
         if (freq < 0 && (m_outputUnvoiced==0)) continue;
         if (m_outputUnvoiced == 1)
@@ -615,9 +602,15 @@
         }
         fs[m_oSmoothedPitchTrack].push_back(f);
     }
-
-    // ======================== N O T E S ======================================
-    MonoNote mn;
+
+    addNoteFeatures(fs);
+
+    return fs;
+}
+
+void
+PYinVamp::addNoteFeatures(FeatureSet &fs)
+{
     std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
     for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) {
         std::vector<std::pair<double, double> > temp;
@@ -626,17 +619,18 @@
             double tempPitch = 12 *
                 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69;
             temp.push_back(std::pair<double,double>(tempPitch, .9));
-            // std::cerr << "tempPitch: " << tempPitch << std::endl;
         }
-        // std::cerr << "temp size: " << temp.size() << std::endl;
         smoothedPitch.push_back(temp);
     }

+    MonoNote mn;
     vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
+
     std::cerr << "mnOut size: " << mnOut.size() << std::endl;
     std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl;

     // turning feature into a note feature
+    Feature f;
     f.hasTimestamp = true;
     f.hasDuration = true;
     f.values.clear();
@@ -702,5 +696,4 @@
         }
         oldIsVoiced = isVoiced;
     }
-    return fs;
 }
--- a/PYinVamp.h	Fri Mar 24 15:35:32 2017 +0000
+++ b/PYinVamp.h	Wed May 17 14:50:10 2017 +0100
@@ -86,9 +86,7 @@
     vector<float> m_level;
     vector<float> m_pitchTrack;

-    // for note writing
-    // vector<float> m_notePitchTrack; // contains pitches of one current note
-    // bool m_oldIsVoiced;
+    void addNoteFeatures(FeatureSet &fs);
 };

 #endif
--- a/SparseHMM.cpp	Fri Mar 24 15:35:32 2017 +0000
+++ b/SparseHMM.cpp	Wed May 17 14:50:10 2017 +0100
@@ -35,13 +35,6 @@

 }

-vector<double>
-SparseHMM::calculateObsProb(const vector<pair<double, double> > )
-{
-    // dummy (virtual?) implementation to be overloaded
-    return(vector<double>());
-}
-
 void
 SparseHMM::build()
 { }
@@ -126,7 +119,6 @@
     }
     m_psi.push_back(tempPsi);

-
     double deltasum = 0;
     for (int jState = 0; jState < m_nState; ++jState)
     {
--- a/SparseHMM.h	Fri Mar 24 15:35:32 2017 +0000
+++ b/SparseHMM.h	Wed May 17 14:50:10 2017 +0100
@@ -25,15 +25,18 @@
 class SparseHMM
 {
 public:
-    SparseHMM(int fixedLag);
-    virtual std::vector<double>
-                           calculateObsProb(const vector<pair<double, double> >);
+    SparseHMM(int fixedLag); // set fixedLag == 0 when doing full Viterbi
+
+    virtual std::vector<double> calculateObsProb
+    (const vector<pair<double, double> >) = 0;
+
     virtual void           build();
     std::vector<int>       decodeViterbi(std::vector<vector<double> > obs);
     void                   reset();
     void                   initialise(vector<double> firstObs);
     int                    process(vector<double> newObs);
     vector<int>            track();
+
     // "sparse" HMM definition
     int m_fixedLag;
     int m_nState;