# HG changeset patch # User matthiasm # Date 1420648216 0 # Node ID 228aae2253c372206d65db189f6d32fbd1e03756 # Parent 686d59c7c3bef3742bf4ca126f8fdcbbaa0508c4 note separation based on RMS diff -r 686d59c7c3be -r 228aae2253c3 MonoNoteHMM.cpp --- a/MonoNoteHMM.cpp Wed Jan 07 15:22:03 2015 +0000 +++ b/MonoNoteHMM.cpp Wed Jan 07 16:30:16 2015 +0000 @@ -49,7 +49,7 @@ double tempProbSum = 0; for (size_t i = 0; i < par.n; ++i) { - if (i % 4 != 2) + if (i % par.nSPP != 2) { // std::cerr << getMidiPitch(i) << std::endl; double tempProb = 0; @@ -81,7 +81,7 @@ for (size_t i = 0; i < par.n; ++i) { - if (i % 4 != 2) + if (i % par.nSPP != 2) { if (tempProbSum > 0) { @@ -103,16 +103,15 @@ // 0. attack state // 1. stable state // 2. silent state - // 3. inter state - // 4-6. second-lowest pitch - // 4. attack state + // 3-5. second-lowest pitch + // 3. attack state // ... // observation distributions for (size_t iState = 0; iState < par.n; ++iState) { pitchDistr.push_back(boost::math::normal(0,1)); - if (iState % 4 == 2) + if (iState % par.nSPP == 2) { // silent state starts tracking init.push_back(1.0/(par.nS * par.nPPS)); @@ -156,9 +155,9 @@ to.push_back(index+2); // to silent transProb.push_back(par.pStable2Silent); - from.push_back(index+1); - to.push_back(index+3); // to inter-note - transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent); + // from.push_back(index+1); + // to.push_back(index+3); // to inter-note + // transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent); // the "easy" transitions from silent state from.push_back(index+2); diff -r 686d59c7c3be -r 228aae2253c3 PYIN.cpp --- a/PYIN.cpp Wed Jan 07 15:22:03 2015 +0000 +++ b/PYIN.cpp Wed Jan 07 16:30:16 2015 +0000 @@ -46,7 +46,8 @@ m_threshDistr(2.0f), m_outputUnvoiced(0.0f), m_pitchProb(0), - m_timestamp(0) + m_timestamp(0), + m_level(0) { } @@ -344,6 +345,7 @@ m_pitchProb.clear(); m_timestamp.clear(); + m_level.clear(); /* std::cerr << "PYIN::reset" << ", blockSize = " << m_blockSize @@ -363,6 +365,8 @@ Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); delete [] dInputBuffers; + m_level.push_back(yo.rms); + // First, get the things out of the way that we don't want to output // immediately, but instead save for later. vector > tempPitchProb; @@ -470,7 +474,11 @@ std::vector notePitchTrack; // collects pitches for one note at a time for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { - isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; + isVoiced = mnOut[iFrame].noteState < 3 + && smoothedPitch[iFrame].size() > 0 + && (iFrame == nFrame-1 + || ((m_level[iFrame+1]/m_level[iFrame]) < 1.25)); + // std::cerr << m_level[iFrame]/m_level[iFrame-1] << std::endl; if (isVoiced && iFrame != nFrame-1) { if (oldIsVoiced == 0) // beginning of a note @@ -481,7 +489,7 @@ float pitch = smoothedPitch[iFrame][0].first; notePitchTrack.push_back(pitch); // add to the note's pitch track } else { // not currently voiced - if (oldIsVoiced == 1 && notePitchTrack.size() > 17) // end of note + if (oldIsVoiced == 1 && notePitchTrack.size() > 14) // end of note { std::sort(notePitchTrack.begin(), notePitchTrack.end()); float medianPitch = notePitchTrack[notePitchTrack.size()/2]; diff -r 686d59c7c3be -r 228aae2253c3 PYIN.h --- a/PYIN.h Wed Jan 07 15:22:03 2015 +0000 +++ b/PYIN.h Wed Jan 07 16:30:16 2015 +0000 @@ -74,6 +74,7 @@ float m_outputUnvoiced; vector > > m_pitchProb; vector m_timestamp; + vector m_level; }; #endif diff -r 686d59c7c3be -r 228aae2253c3 Yin.cpp --- a/Yin.cpp Wed Jan 07 15:22:03 2015 +0000 +++ b/Yin.cpp Wed Jan 07 16:30:16 2015 +0000 @@ -97,8 +97,8 @@ { probSum += peakProbability[iBin]; } - - Yin::YinOutput yo(0,0,0); + double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize); + Yin::YinOutput yo(0,0,rms); for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) { yo.salience.push_back(peakProbability[iBuf]);