# HG changeset patch # User Matthias Mauch # Date 1436785806 -3600 # Node ID 83978b93aac16c3b9d41f702ab5eb4b904cefe64 # Parent 926c292fa3ffffbc62284f59c58fa6e8c2f8b6fa ah, didn't commit when I stopped working... what did I do? diff -r 926c292fa3ff -r 83978b93aac1 MonoNote.cpp --- a/MonoNote.cpp Fri Jul 03 17:34:38 2015 +0100 +++ b/MonoNote.cpp Mon Jul 13 12:10:06 2015 +0100 @@ -52,7 +52,6 @@ stateKind = (path[iFrame]) % hmm.par.nSPP + 1; out.push_back(FrameOutput(iFrame, currPitch, stateKind)); - // std::cerr << path[iFrame] << " -- "<< pitchProb[iFrame][0].first << " -- "<< currPitch << " -- " << stateKind << std::endl; } return(out); } diff -r 926c292fa3ff -r 83978b93aac1 MonoNoteHMM.cpp --- a/MonoNoteHMM.cpp Fri Jul 03 17:34:38 2015 +0100 +++ b/MonoNoteHMM.cpp Mon Jul 13 12:10:06 2015 +0100 @@ -37,14 +37,13 @@ // what is the probability of pitched double pIsPitched = 0; - for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate) + for (size_t iCand = 0; iCand < nCandidate; ++iCand) { - // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched; - pIsPitched += pitchProb[iCandidate].second; + pIsPitched += pitchProb[iCand].second; } - // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight); - pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight; + pIsPitched = pIsPitched * (1-par.priorWeight) + + par.priorPitchedProb * par.priorWeight; vector out = vector(par.n); double tempProbSum = 0; @@ -59,14 +58,15 @@ double minDist = 10000.0; double minDistProb = 0; size_t minDistCandidate = 0; - for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate) + for (size_t iCand = 0; iCand < nCandidate; ++iCand) { - double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first); + double currDist = std::abs(getMidiPitch(i)- + pitchProb[iCand].first); if (currDist < minDist) { minDist = currDist; - minDistProb = pitchProb[iCandidate].second; - minDistCandidate = iCandidate; + minDistProb = pitchProb[iCand].second; + minDistCandidate = iCand; } } tempProb = std::pow(minDistProb, par.yinTrust) * @@ -174,7 +174,7 @@ double semitoneDistance = std::abs(fromPitch - toPitch) * 1.0 / par.nPPS; - // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance) + if (semitoneDistance == 0 || (semitoneDistance > par.minSemitoneDistance && semitoneDistance < par.maxJump)) @@ -193,7 +193,8 @@ } for (size_t i = 0; i < tempTransProbSilent.size(); ++i) { - m_transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent); + m_transProb.push_back((1-par.pSilentSelftrans) * + tempTransProbSilent[i]/probSumSilent); } } m_nTrans = m_transProb.size(); diff -r 926c292fa3ff -r 83978b93aac1 MonoNoteHMM.h --- a/MonoNoteHMM.h Fri Jul 03 17:34:38 2015 +0100 +++ b/MonoNoteHMM.h Mon Jul 13 12:10:06 2015 +0100 @@ -29,6 +29,7 @@ public: MonoNoteHMM(int fixedLag); const std::vector calculateObsProb(const vector >); + double getMidiPitch(size_t index); double getFrequency(size_t index); void build(); diff -r 926c292fa3ff -r 83978b93aac1 PYinVamp.cpp --- a/PYinVamp.cpp Fri Jul 03 17:34:38 2015 +0100 +++ b/PYinVamp.cpp Mon Jul 13 12:10:06 2015 +0100 @@ -53,7 +53,8 @@ m_pitchHmm(0), m_pitchProb(0), m_timestamp(0), - m_level(0) + m_level(0), + m_pitchTrack(0) { } @@ -448,6 +449,7 @@ m_pitchProb.clear(); m_timestamp.clear(); m_level.clear(); + m_pitchTrack.clear(); /* std::cerr << "PYinVamp::reset" << ", blockSize = " << m_blockSize @@ -458,8 +460,10 @@ PYinVamp::FeatureSet PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) { + std::cerr << timestamp << std::endl; int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; - timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate)); + timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, + lrintf(m_inputSampleRate)); FeatureSet fs; @@ -508,7 +512,7 @@ int lag = m_pitchHmm.m_fixedLag; - if (m_fixedLag == 1.f) + if (m_fixedLag == 1.f) // do fixed-lag smoothing instead of full Viterbi { if (m_timestamp.size() == lag + 1) { @@ -520,6 +524,7 @@ vector rawPitchPath = m_pitchHmm.track(); float freq = m_pitchHmm.nearestFreq(rawPitchPath[0], m_pitchProb[0]); + m_pitchTrack.push_back(freq); f.timestamp = m_timestamp[0]; f.values.clear(); @@ -592,14 +597,13 @@ // ================== P I T C H T R A C K ================================= vector rawPitchPath = m_pitchHmm.track(); - vector mpOut; for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) { float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame], m_pitchProb[iFrame]); - mpOut.push_back(freq); // for note processing below - + m_pitchTrack.push_back(freq); // for note processing below + f.timestamp = m_timestamp[iFrame]; f.values.clear(); @@ -615,68 +619,90 @@ } // ======================== N O T E S ====================================== - // MonoNote mn; - // std::vector > > smoothedPitch; - // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { - // std::vector > temp; - // if (mpOut[iFrame] > 0) - // { - // double tempPitch = 12 * - // std::log(mpOut[iFrame]/440)/std::log(2.) + 69; - // temp.push_back(std::pair(tempPitch, .9)); - // } - // smoothedPitch.push_back(temp); - // } - // // vector mnOut = mn.process(m_pitchProb); - // vector mnOut = mn.process(smoothedPitch); + MonoNote mn; + std::vector > > smoothedPitch; + for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) { + std::vector > temp; + if (m_pitchTrack[iFrame] > 0) + { + double tempPitch = 12 * + std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69; + temp.push_back(std::pair(tempPitch, .9)); + // std::cerr << "tempPitch: " << tempPitch << std::endl; + } + // std::cerr << "temp size: " << temp.size() << std::endl; + smoothedPitch.push_back(temp); + } + + vector mnOut = mn.process(smoothedPitch); + std::cerr << "mnOut size: " << mnOut.size() << std::endl; + std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl; - // // turning feature into a note feature - // f.hasTimestamp = true; - // f.hasDuration = true; - // f.values.clear(); + // turning feature into a note feature + f.hasTimestamp = true; + f.hasDuration = true; + f.values.clear(); - // int onsetFrame = 0; - // bool isVoiced = 0; - // bool oldIsVoiced = 0; - // size_t nFrame = m_pitchProb.size(); + int onsetFrame = 0; + bool isVoiced = 0; + bool oldIsVoiced = 0; + size_t nFrame = m_pitchTrack.size(); - // float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; + float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; - // // the body of the loop below should be in a function/method - // std::vector notePitchTrack; // collects pitches for one note at a time - // for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) - // { - // isVoiced = mnOut[iFrame].noteState < 3 - // && smoothedPitch[iFrame].size() > 0 - // && (iFrame >= nFrame-2 - // || ((m_level[iFrame]/m_level[iFrame+2]) > - // m_onsetSensitivity)); - // if (isVoiced && iFrame != nFrame-1) - // { - // if (oldIsVoiced == 0) // beginning of a note - // { - // onsetFrame = iFrame; - // } - // float pitch = smoothedPitch[iFrame][0].first; - // notePitchTrack.push_back(pitch); // add to the note's pitch track - // } else { // not currently voiced - // if (oldIsVoiced == 1) // end of note - // { - // if (notePitchTrack.size() >= minNoteFrames) - // { - // std::sort(notePitchTrack.begin(), notePitchTrack.end()); - // float medianPitch = notePitchTrack[notePitchTrack.size()/2]; - // float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; - // f.values.clear(); - // f.values.push_back(medianFreq); - // f.timestamp = m_timestamp[onsetFrame]; - // f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; - // fs[m_oNotes].push_back(f); - // } - // notePitchTrack.clear(); - // } - // } - // oldIsVoiced = isVoiced; - // } + // the body of the loop below should be in a function/method + // but what does it actually do?? + // * takes the result of the note tracking HMM + // * collects contiguously pitched pitches + // * writes a note once it notices the voiced segment has ended + // complications: + // * it needs a lookahead of two frames for m_level (wtf was I thinking) + // * it needs to know the timestamp (which can be guessed from the frame no) + // * + int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; + RealTime timestampOffset = Vamp::RealTime::frame2RealTime(offset, + lrintf(m_inputSampleRate)); + + std::vector notePitchTrack; // collects pitches for 1 note at a time + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + isVoiced = mnOut[iFrame].noteState < 3 + && smoothedPitch[iFrame].size() > 0 + && (iFrame >= nFrame-2 + || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity)); + if (isVoiced && iFrame != nFrame-1) + { + if (oldIsVoiced == 0) // beginning of a note + { + onsetFrame = iFrame; + } + float pitch = smoothedPitch[iFrame][0].first; + notePitchTrack.push_back(pitch); // add to the note's pitch track + } else { // not currently voiced + if (oldIsVoiced == 1) // end of note + { + if (notePitchTrack.size() >= minNoteFrames) + { + std::sort(notePitchTrack.begin(), notePitchTrack.end()); + float medianPitch = notePitchTrack[notePitchTrack.size()/2]; + float medianFreq = + std::pow(2,(medianPitch - 69) / 12) * 440; + f.values.clear(); + f.values.push_back(medianFreq); + RealTime start = RealTime::frame2RealTime( + onsetFrame * m_stepSize, lrintf(m_inputSampleRate)) + + timestampOffset; + RealTime end = RealTime::frame2RealTime( + iFrame * m_stepSize, lrintf(m_inputSampleRate)) + + timestampOffset; + f.timestamp = start; + f.duration = end - start; + fs[m_oNotes].push_back(f); + } + notePitchTrack.clear(); + } + } + oldIsVoiced = isVoiced; + } return fs; } diff -r 926c292fa3ff -r 83978b93aac1 PYinVamp.h --- a/PYinVamp.h Fri Jul 03 17:34:38 2015 +0100 +++ b/PYinVamp.h Mon Jul 13 12:10:06 2015 +0100 @@ -84,6 +84,11 @@ deque > > m_pitchProb; deque m_timestamp; vector m_level; + vector m_pitchTrack; + + // for note writing + // vector m_notePitchTrack; // contains pitches of one current note + // bool m_oldIsVoiced; }; #endif diff -r 926c292fa3ff -r 83978b93aac1 YinUtil.cpp --- a/YinUtil.cpp Fri Jul 03 17:34:38 2015 +0100 +++ b/YinUtil.cpp Mon Jul 13 12:10:06 2015 +0100 @@ -187,7 +187,9 @@ static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; std::vector -YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0) +YinUtil::yinProb(const double *yinBuffer, const size_t prior, + const size_t yinBufferSize, const size_t minTau0, + const size_t maxTau0) { size_t minTau = 2; size_t maxTau = yinBufferSize;