changeset 91:854d9403c5be

note separation based on RMS
author matthiasm
date Wed, 07 Jan 2015 16:30:16 +0000
parents b087967c4417
children 37e59aa69322
files MonoNoteHMM.cpp PYIN.cpp PYIN.h Yin.cpp
diffstat 4 files changed, 22 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/MonoNoteHMM.cpp	Wed Jan 07 15:22:03 2015 +0000
+++ b/MonoNoteHMM.cpp	Wed Jan 07 16:30:16 2015 +0000
@@ -49,7 +49,7 @@
     double tempProbSum = 0;
     for (size_t i = 0; i < par.n; ++i)
     {
-        if (i % 4 != 2)
+        if (i % par.nSPP != 2)
         {
             // std::cerr << getMidiPitch(i) << std::endl;
             double tempProb = 0;
@@ -81,7 +81,7 @@
     
     for (size_t i = 0; i < par.n; ++i)
     {
-        if (i % 4 != 2)
+        if (i % par.nSPP != 2)
         {
             if (tempProbSum > 0) 
             {
@@ -103,16 +103,15 @@
     //    0. attack state
     //    1. stable state
     //    2. silent state
-    //    3. inter state
-    // 4-6. second-lowest pitch
-    //    4. attack state
+    // 3-5. second-lowest pitch
+    //    3. attack state
     //    ...
     
     // observation distributions
     for (size_t iState = 0; iState < par.n; ++iState)
     {
         pitchDistr.push_back(boost::math::normal(0,1));
-        if (iState % 4 == 2)
+        if (iState % par.nSPP == 2)
         {
             // silent state starts tracking
             init.push_back(1.0/(par.nS * par.nPPS));
@@ -156,9 +155,9 @@
         to.push_back(index+2); // to silent
         transProb.push_back(par.pStable2Silent);
 
-        from.push_back(index+1);
-        to.push_back(index+3); // to inter-note
-        transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent);
+        // from.push_back(index+1);
+        // to.push_back(index+3); // to inter-note
+        // transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent);
 
         // the "easy" transitions from silent state
         from.push_back(index+2);
--- a/PYIN.cpp	Wed Jan 07 15:22:03 2015 +0000
+++ b/PYIN.cpp	Wed Jan 07 16:30:16 2015 +0000
@@ -46,7 +46,8 @@
     m_threshDistr(2.0f),
     m_outputUnvoiced(0.0f),
     m_pitchProb(0),
-    m_timestamp(0)
+    m_timestamp(0),
+    m_level(0)
 {
 }
 
@@ -344,6 +345,7 @@
     
     m_pitchProb.clear();
     m_timestamp.clear();
+    m_level.clear();
 /*    
     std::cerr << "PYIN::reset"
           << ", blockSize = " << m_blockSize
@@ -363,6 +365,8 @@
     Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
     delete [] dInputBuffers;
 
+    m_level.push_back(yo.rms);
+
     // First, get the things out of the way that we don't want to output 
     // immediately, but instead save for later.
     vector<pair<double, double> > tempPitchProb;
@@ -470,7 +474,11 @@
     std::vector<float> notePitchTrack; // collects pitches for one note at a time
     for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
     {
-        isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0;
+        isVoiced = mnOut[iFrame].noteState < 3
+                   && smoothedPitch[iFrame].size() > 0
+                   && (iFrame == nFrame-1 
+                       || ((m_level[iFrame+1]/m_level[iFrame]) < 1.25));
+        // std::cerr << m_level[iFrame]/m_level[iFrame-1] << std::endl;
         if (isVoiced && iFrame != nFrame-1)
         {
             if (oldIsVoiced == 0) // beginning of a note
@@ -481,7 +489,7 @@
             float pitch = smoothedPitch[iFrame][0].first;
             notePitchTrack.push_back(pitch); // add to the note's pitch track
         } else { // not currently voiced
-            if (oldIsVoiced == 1 && notePitchTrack.size() > 17) // end of note
+            if (oldIsVoiced == 1 && notePitchTrack.size() > 14) // end of note
             {
                 std::sort(notePitchTrack.begin(), notePitchTrack.end());
                 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
--- a/PYIN.h	Wed Jan 07 15:22:03 2015 +0000
+++ b/PYIN.h	Wed Jan 07 16:30:16 2015 +0000
@@ -74,6 +74,7 @@
     float m_outputUnvoiced;
     vector<vector<pair<double, double> > > m_pitchProb;
     vector<Vamp::RealTime> m_timestamp;
+    vector<float> m_level;
 };
 
 #endif
--- a/Yin.cpp	Wed Jan 07 15:22:03 2015 +0000
+++ b/Yin.cpp	Wed Jan 07 16:30:16 2015 +0000
@@ -97,8 +97,8 @@
     {
         probSum += peakProbability[iBin];
     }
-        
-    Yin::YinOutput yo(0,0,0);
+    double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize);
+    Yin::YinOutput yo(0,0,rms);
     for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf)
     {
         yo.salience.push_back(peakProbability[iBuf]);