Mercurial > hg > pyin
changeset 91:854d9403c5be
note separation based on RMS
author | matthiasm |
---|---|
date | Wed, 07 Jan 2015 16:30:16 +0000 |
parents | b087967c4417 |
children | 37e59aa69322 |
files | MonoNoteHMM.cpp PYIN.cpp PYIN.h Yin.cpp |
diffstat | 4 files changed, 22 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/MonoNoteHMM.cpp Wed Jan 07 15:22:03 2015 +0000 +++ b/MonoNoteHMM.cpp Wed Jan 07 16:30:16 2015 +0000 @@ -49,7 +49,7 @@ double tempProbSum = 0; for (size_t i = 0; i < par.n; ++i) { - if (i % 4 != 2) + if (i % par.nSPP != 2) { // std::cerr << getMidiPitch(i) << std::endl; double tempProb = 0; @@ -81,7 +81,7 @@ for (size_t i = 0; i < par.n; ++i) { - if (i % 4 != 2) + if (i % par.nSPP != 2) { if (tempProbSum > 0) { @@ -103,16 +103,15 @@ // 0. attack state // 1. stable state // 2. silent state - // 3. inter state - // 4-6. second-lowest pitch - // 4. attack state + // 3-5. second-lowest pitch + // 3. attack state // ... // observation distributions for (size_t iState = 0; iState < par.n; ++iState) { pitchDistr.push_back(boost::math::normal(0,1)); - if (iState % 4 == 2) + if (iState % par.nSPP == 2) { // silent state starts tracking init.push_back(1.0/(par.nS * par.nPPS)); @@ -156,9 +155,9 @@ to.push_back(index+2); // to silent transProb.push_back(par.pStable2Silent); - from.push_back(index+1); - to.push_back(index+3); // to inter-note - transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent); + // from.push_back(index+1); + // to.push_back(index+3); // to inter-note + // transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent); // the "easy" transitions from silent state from.push_back(index+2);
--- a/PYIN.cpp Wed Jan 07 15:22:03 2015 +0000 +++ b/PYIN.cpp Wed Jan 07 16:30:16 2015 +0000 @@ -46,7 +46,8 @@ m_threshDistr(2.0f), m_outputUnvoiced(0.0f), m_pitchProb(0), - m_timestamp(0) + m_timestamp(0), + m_level(0) { } @@ -344,6 +345,7 @@ m_pitchProb.clear(); m_timestamp.clear(); + m_level.clear(); /* std::cerr << "PYIN::reset" << ", blockSize = " << m_blockSize @@ -363,6 +365,8 @@ Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); delete [] dInputBuffers; + m_level.push_back(yo.rms); + // First, get the things out of the way that we don't want to output // immediately, but instead save for later. vector<pair<double, double> > tempPitchProb; @@ -470,7 +474,11 @@ std::vector<float> notePitchTrack; // collects pitches for one note at a time for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { - isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; + isVoiced = mnOut[iFrame].noteState < 3 + && smoothedPitch[iFrame].size() > 0 + && (iFrame == nFrame-1 + || ((m_level[iFrame+1]/m_level[iFrame]) < 1.25)); + // std::cerr << m_level[iFrame]/m_level[iFrame-1] << std::endl; if (isVoiced && iFrame != nFrame-1) { if (oldIsVoiced == 0) // beginning of a note @@ -481,7 +489,7 @@ float pitch = smoothedPitch[iFrame][0].first; notePitchTrack.push_back(pitch); // add to the note's pitch track } else { // not currently voiced - if (oldIsVoiced == 1 && notePitchTrack.size() > 17) // end of note + if (oldIsVoiced == 1 && notePitchTrack.size() > 14) // end of note { std::sort(notePitchTrack.begin(), notePitchTrack.end()); float medianPitch = notePitchTrack[notePitchTrack.size()/2];
--- a/PYIN.h Wed Jan 07 15:22:03 2015 +0000 +++ b/PYIN.h Wed Jan 07 16:30:16 2015 +0000 @@ -74,6 +74,7 @@ float m_outputUnvoiced; vector<vector<pair<double, double> > > m_pitchProb; vector<Vamp::RealTime> m_timestamp; + vector<float> m_level; }; #endif
--- a/Yin.cpp Wed Jan 07 15:22:03 2015 +0000 +++ b/Yin.cpp Wed Jan 07 16:30:16 2015 +0000 @@ -97,8 +97,8 @@ { probSum += peakProbability[iBin]; } - - Yin::YinOutput yo(0,0,0); + double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize); + Yin::YinOutput yo(0,0,rms); for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) { yo.salience.push_back(peakProbability[iBuf]);