Mercurial > hg > pyin

--- a/MonoNote.cpp	Fri Jul 03 17:34:38 2015 +0100
+++ b/MonoNote.cpp	Mon Jul 13 12:10:06 2015 +0100
@@ -52,7 +52,6 @@
         stateKind = (path[iFrame]) % hmm.par.nSPP + 1;

         out.push_back(FrameOutput(iFrame, currPitch, stateKind));
-        // std::cerr << path[iFrame] << " -- "<< pitchProb[iFrame][0].first << " -- "<< currPitch << " -- " << stateKind << std::endl;
     }
     return(out);
 }
--- a/MonoNoteHMM.cpp	Fri Jul 03 17:34:38 2015 +0100
+++ b/MonoNoteHMM.cpp	Mon Jul 13 12:10:06 2015 +0100
@@ -37,14 +37,13 @@

     // what is the probability of pitched
     double pIsPitched = 0;
-    for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
+    for (size_t iCand = 0; iCand < nCandidate; ++iCand)
     {
-        // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched;
-        pIsPitched += pitchProb[iCandidate].second;
+        pIsPitched += pitchProb[iCand].second;
     }

-    // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight);
-    pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight;
+    pIsPitched = pIsPitched * (1-par.priorWeight) +
+                     par.priorPitchedProb * par.priorWeight;

     vector<double> out = vector<double>(par.n);
     double tempProbSum = 0;
@@ -59,14 +58,15 @@
                 double minDist = 10000.0;
                 double minDistProb = 0;
                 size_t minDistCandidate = 0;
-                for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
+                for (size_t iCand = 0; iCand < nCandidate; ++iCand)
                 {
-                    double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first);
+                    double currDist = std::abs(getMidiPitch(i)-
+                                               pitchProb[iCand].first);
                     if (currDist < minDist)
                     {
                         minDist = currDist;
-                        minDistProb = pitchProb[iCandidate].second;
-                        minDistCandidate = iCandidate;
+                        minDistProb = pitchProb[iCand].second;
+                        minDistCandidate = iCand;
                     }
                 }
                 tempProb = std::pow(minDistProb, par.yinTrust) *
@@ -174,7 +174,7 @@
             double semitoneDistance =
                 std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;

-            // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance)
+
             if (semitoneDistance == 0 ||
                 (semitoneDistance > par.minSemitoneDistance
                  && semitoneDistance < par.maxJump))
@@ -193,7 +193,8 @@
         }
         for (size_t i = 0; i < tempTransProbSilent.size(); ++i)
         {
-            m_transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent);
+            m_transProb.push_back((1-par.pSilentSelftrans) *
+                                  tempTransProbSilent[i]/probSumSilent);
         }
     }
     m_nTrans = m_transProb.size();
--- a/MonoNoteHMM.h	Fri Jul 03 17:34:38 2015 +0100
+++ b/MonoNoteHMM.h	Mon Jul 13 12:10:06 2015 +0100
@@ -29,6 +29,7 @@
 public:
     MonoNoteHMM(int fixedLag);
     const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
+
     double getMidiPitch(size_t index);
     double getFrequency(size_t index);
     void build();
--- a/PYinVamp.cpp	Fri Jul 03 17:34:38 2015 +0100
+++ b/PYinVamp.cpp	Mon Jul 13 12:10:06 2015 +0100
@@ -53,7 +53,8 @@
     m_pitchHmm(0),
     m_pitchProb(0),
     m_timestamp(0),
-    m_level(0)
+    m_level(0),
+    m_pitchTrack(0)
 {
 }

@@ -448,6 +449,7 @@
     m_pitchProb.clear();
     m_timestamp.clear();
     m_level.clear();
+    m_pitchTrack.clear();
 /*
     std::cerr << "PYinVamp::reset"
           << ", blockSize = " << m_blockSize
@@ -458,8 +460,10 @@
 PYinVamp::FeatureSet
 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
 {
+    std::cerr << timestamp << std::endl;
     int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
-    timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
+    timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset,
+        lrintf(m_inputSampleRate));

     FeatureSet fs;

@@ -508,7 +512,7 @@

     int lag = m_pitchHmm.m_fixedLag;

-    if (m_fixedLag == 1.f)
+    if (m_fixedLag == 1.f) // do fixed-lag smoothing instead of full Viterbi
     {
         if (m_timestamp.size() == lag + 1)
         {
@@ -520,6 +524,7 @@
             vector<int> rawPitchPath = m_pitchHmm.track();
             float freq = m_pitchHmm.nearestFreq(rawPitchPath[0],
                                                 m_pitchProb[0]);
+            m_pitchTrack.push_back(freq);
             f.timestamp = m_timestamp[0];
             f.values.clear();

@@ -592,14 +597,13 @@
     // ================== P I T C H  T R A C K =================================

     vector<int> rawPitchPath = m_pitchHmm.track();
-    vector<float> mpOut;

     for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
     {
         float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
                                             m_pitchProb[iFrame]);
-        mpOut.push_back(freq); // for note processing below
-
+        m_pitchTrack.push_back(freq); // for note processing below
+
         f.timestamp = m_timestamp[iFrame];
         f.values.clear();

@@ -615,68 +619,90 @@
     }

     // ======================== N O T E S ======================================
-    // MonoNote mn;
-    // std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
-    // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
-    //     std::vector<std::pair<double, double> > temp;
-    //     if (mpOut[iFrame] > 0)
-    //     {
-    //         double tempPitch = 12 *
-    //                            std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
-    //         temp.push_back(std::pair<double,double>(tempPitch, .9));
-    //     }
-    //     smoothedPitch.push_back(temp);
-    // }
-    // // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
-    // vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
+    MonoNote mn;
+    std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
+    for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) {
+        std::vector<std::pair<double, double> > temp;
+        if (m_pitchTrack[iFrame] > 0)
+        {
+            double tempPitch = 12 *
+                std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69;
+            temp.push_back(std::pair<double,double>(tempPitch, .9));
+            // std::cerr << "tempPitch: " << tempPitch << std::endl;
+        }
+        // std::cerr << "temp size: " << temp.size() << std::endl;
+        smoothedPitch.push_back(temp);
+    }
+
+    vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
+    std::cerr << "mnOut size: " << mnOut.size() << std::endl;
+    std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl;

-    // // turning feature into a note feature
-    // f.hasTimestamp = true;
-    // f.hasDuration = true;
-    // f.values.clear();
+    // turning feature into a note feature
+    f.hasTimestamp = true;
+    f.hasDuration = true;
+    f.values.clear();

-    // int onsetFrame = 0;
-    // bool isVoiced = 0;
-    // bool oldIsVoiced = 0;
-    // size_t nFrame = m_pitchProb.size();
+    int onsetFrame = 0;
+    bool isVoiced = 0;
+    bool oldIsVoiced = 0;
+    size_t nFrame = m_pitchTrack.size();

-    // float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
+    float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;

-    // // the body of the loop below should be in a function/method
-    // std::vector<float> notePitchTrack; // collects pitches for one note at a time
-    // for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
-    // {
-    //     isVoiced = mnOut[iFrame].noteState < 3
-    //                && smoothedPitch[iFrame].size() > 0
-    //                && (iFrame >= nFrame-2
-    //                    || ((m_level[iFrame]/m_level[iFrame+2]) >
-    //                     m_onsetSensitivity));
-    //     if (isVoiced && iFrame != nFrame-1)
-    //     {
-    //         if (oldIsVoiced == 0) // beginning of a note
-    //         {
-    //             onsetFrame = iFrame;
-    //         }
-    //         float pitch = smoothedPitch[iFrame][0].first;
-    //         notePitchTrack.push_back(pitch); // add to the note's pitch track
-    //     } else { // not currently voiced
-    //         if (oldIsVoiced == 1) // end of note
-    //         {
-    //             if (notePitchTrack.size() >= minNoteFrames)
-    //             {
-    //                 std::sort(notePitchTrack.begin(), notePitchTrack.end());
-    //                 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
-    //                 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
-    //                 f.values.clear();
-    //                 f.values.push_back(medianFreq);
-    //                 f.timestamp = m_timestamp[onsetFrame];
-    //                 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
-    //                 fs[m_oNotes].push_back(f);
-    //             }
-    //             notePitchTrack.clear();
-    //         }
-    //     }
-    //     oldIsVoiced = isVoiced;
-    // }
+    // the body of the loop below should be in a function/method
+    // but what does it actually do??
+    // * takes the result of the note tracking HMM
+    // * collects contiguously pitched pitches
+    // * writes a note once it notices the voiced segment has ended
+    // complications:
+    // * it needs a lookahead of two frames for m_level (wtf was I thinking)
+    // * it needs to know the timestamp (which can be guessed from the frame no)
+    // *
+    int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
+    RealTime timestampOffset = Vamp::RealTime::frame2RealTime(offset,
+        lrintf(m_inputSampleRate));
+
+    std::vector<float> notePitchTrack; // collects pitches for 1 note at a time
+    for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+    {
+        isVoiced = mnOut[iFrame].noteState < 3
+            && smoothedPitch[iFrame].size() > 0
+            && (iFrame >= nFrame-2
+                || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
+        if (isVoiced && iFrame != nFrame-1)
+        {
+            if (oldIsVoiced == 0) // beginning of a note
+            {
+                onsetFrame = iFrame;
+            }
+            float pitch = smoothedPitch[iFrame][0].first;
+            notePitchTrack.push_back(pitch); // add to the note's pitch track
+        } else { // not currently voiced
+            if (oldIsVoiced == 1) // end of note
+            {
+                if (notePitchTrack.size() >= minNoteFrames)
+                {
+                    std::sort(notePitchTrack.begin(), notePitchTrack.end());
+                    float medianPitch = notePitchTrack[notePitchTrack.size()/2];
+                    float medianFreq =
+                        std::pow(2,(medianPitch - 69) / 12) * 440;
+                    f.values.clear();
+                    f.values.push_back(medianFreq);
+                    RealTime start = RealTime::frame2RealTime(
+                        onsetFrame * m_stepSize, lrintf(m_inputSampleRate)) +
+                        timestampOffset;
+                    RealTime end   = RealTime::frame2RealTime(
+                            iFrame * m_stepSize, lrintf(m_inputSampleRate)) +
+                        timestampOffset;
+                    f.timestamp = start;
+                    f.duration = end - start;
+                    fs[m_oNotes].push_back(f);
+                }
+                notePitchTrack.clear();
+            }
+        }
+        oldIsVoiced = isVoiced;
+    }
     return fs;
 }
--- a/PYinVamp.h	Fri Jul 03 17:34:38 2015 +0100
+++ b/PYinVamp.h	Mon Jul 13 12:10:06 2015 +0100
@@ -84,6 +84,11 @@
     deque<vector<pair<double, double> > > m_pitchProb;
     deque<Vamp::RealTime> m_timestamp;
     vector<float> m_level;
+    vector<float> m_pitchTrack;
+
+    // for note writing
+    // vector<float> m_notePitchTrack; // contains pitches of one current note
+    // bool m_oldIsVoiced;
 };

 #endif
--- a/YinUtil.cpp	Fri Jul 03 17:34:38 2015 +0100
+++ b/YinUtil.cpp	Mon Jul 13 12:10:06 2015 +0100
@@ -187,7 +187,9 @@
 static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};

 std::vector<double>
-YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0)
+YinUtil::yinProb(const double *yinBuffer, const size_t prior,
+                 const size_t yinBufferSize, const size_t minTau0,
+                 const size_t maxTau0)
 {
     size_t minTau = 2;
     size_t maxTau = yinBufferSize;