# HG changeset patch # User Matthias Mauch # Date 1435922564 -3600 # Node ID 080fe18f5ebf52eacec8e7a4f97997fd5c3804ac # Parent 72b3892c1d511e7122d37fd31ebd27a7718172e1 refactored Viterbi * perhaps I even discovered a bug (probablity sum was not reset for every frame) diff -r 72b3892c1d51 -r 080fe18f5ebf Makefile.osx --- a/Makefile.osx Wed Jul 01 19:13:05 2015 +0100 +++ b/Makefile.osx Fri Jul 03 12:22:44 2015 +0100 @@ -2,7 +2,7 @@ CFLAGS := $(ARCHFLAGS) -O3 -I../vamp-plugin-sdk -I/usr/local/include -Wall -fPIC CXXFLAGS := $(CFLAGS) -LDFLAGS := -L../vamp-plugin-sdk -L../vamp-plugin-sdk -lvamp-sdk $(ARCHFLAGS) +LDFLAGS := -L../vamp-plugin-sdk -L../vamp-plugin-sdk -lvamp-sdk $(ARCHFLAGS) -L/usr/local/lib PLUGIN_LDFLAGS := -dynamiclib $(LDFLAGS) -exported_symbols_list vamp-plugin.list TEST_LDFLAGS := $(LDFLAGS) -lboost_unit_test_framework PLUGIN_EXT := .dylib diff -r 72b3892c1d51 -r 080fe18f5ebf MonoNote.cpp --- a/MonoNote.cpp Wed Jul 01 19:13:05 2015 +0100 +++ b/MonoNote.cpp Fri Jul 03 12:22:44 2015 +0100 @@ -39,11 +39,9 @@ obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame])); } - vector *scale = new vector(pitchProb.size()); - vector out; - vector path = hmm.decodeViterbi(obsProb, scale); + vector path = hmm.decodeViterbi(obsProb); for (size_t iFrame = 0; iFrame < path.size(); ++iFrame) { @@ -56,6 +54,5 @@ out.push_back(FrameOutput(iFrame, currPitch, stateKind)); // std::cerr << path[iFrame] << " -- "<< pitchProb[iFrame][0].first << " -- "<< currPitch << " -- " << stateKind << std::endl; } - delete scale; return(out); } diff -r 72b3892c1d51 -r 080fe18f5ebf MonoNoteHMM.cpp --- a/MonoNoteHMM.cpp Wed Jul 01 19:13:05 2015 +0100 +++ b/MonoNoteHMM.cpp Fri Jul 03 12:22:44 2015 +0100 @@ -107,6 +107,8 @@ // 3. attack state // ... + m_nState = par.n; + // observation distributions for (size_t iState = 0; iState < par.n; ++iState) { @@ -114,9 +116,9 @@ if (iState % par.nSPP == 2) { // silent state starts tracking - init.push_back(1.0/(par.nS * par.nPPS)); + m_init.push_back(1.0/(par.nS * par.nPPS)); } else { - init.push_back(0.0); + m_init.push_back(0.0); } } @@ -137,27 +139,27 @@ size_t index = iPitch * par.nSPP; // transitions from attack state - from.push_back(index); - to.push_back(index); - transProb.push_back(par.pAttackSelftrans); + m_from.push_back(index); + m_to.push_back(index); + m_transProb.push_back(par.pAttackSelftrans); - from.push_back(index); - to.push_back(index+1); - transProb.push_back(1-par.pAttackSelftrans); + m_from.push_back(index); + m_to.push_back(index+1); + m_transProb.push_back(1-par.pAttackSelftrans); // transitions from stable state - from.push_back(index+1); - to.push_back(index+1); // to itself - transProb.push_back(par.pStableSelftrans); + m_from.push_back(index+1); + m_to.push_back(index+1); // to itself + m_transProb.push_back(par.pStableSelftrans); - from.push_back(index+1); - to.push_back(index+2); // to silent - transProb.push_back(par.pStable2Silent); + m_from.push_back(index+1); + m_to.push_back(index+2); // to silent + m_transProb.push_back(par.pStable2Silent); // the "easy" transitions from silent state - from.push_back(index+2); - to.push_back(index+2); - transProb.push_back(par.pSilentSelftrans); + m_from.push_back(index+2); + m_to.push_back(index+2); + m_transProb.push_back(par.pSilentSelftrans); // the more complicated transitions from the silent @@ -184,15 +186,18 @@ tempTransProbSilent.push_back(tempWeightSilent); - from.push_back(index+2); - to.push_back(toIndex); + m_from.push_back(index+2); + m_to.push_back(toIndex); } } for (size_t i = 0; i < tempTransProbSilent.size(); ++i) { - transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent); + m_transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent); } } + m_nTrans = m_transProb.size(); + m_delta = vector(m_nState); + m_oldDelta = vector(m_nState); } double diff -r 72b3892c1d51 -r 080fe18f5ebf MonoPitch.cpp --- a/MonoPitch.cpp Wed Jul 01 19:13:05 2015 +0100 +++ b/MonoPitch.cpp Fri Jul 03 12:22:44 2015 +0100 @@ -40,13 +40,11 @@ { obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame])); } - - vector *scale = new vector(0); - + vector out; // std::cerr << "before Viterbi decoding" << obsProb.size() << "ng" << obsProb[1].size() << std::endl; - vector path = hmm.decodeViterbi(obsProb, scale); + vector path = hmm.decodeViterbi(obsProb); // std::cerr << "after Viterbi decoding" << std::endl; for (size_t iFrame = 0; iFrame < path.size(); ++iFrame) @@ -76,6 +74,5 @@ } out.push_back(bestFreq); } - delete scale; return(out); } diff -r 72b3892c1d51 -r 080fe18f5ebf MonoPitchHMM.cpp --- a/MonoPitchHMM.cpp Wed Jul 01 19:13:05 2015 +0100 +++ b/MonoPitchHMM.cpp Fri Jul 03 12:22:44 2015 +0100 @@ -17,11 +17,13 @@ #include #include +#include using std::vector; using std::pair; MonoPitchHMM::MonoPitchHMM() : +SparseHMM(), m_minFreq(61.735), m_nBPS(5), m_nPitch(0), @@ -32,6 +34,7 @@ { m_transitionWidth = 5*(m_nBPS/2) + 1; m_nPitch = 69 * m_nBPS; + m_nState = 2 * m_nPitch; // voiced and unvoiced m_freqs = vector(2*m_nPitch); for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) { @@ -83,7 +86,7 @@ MonoPitchHMM::build() { // INITIAL VECTOR - init = vector(2*m_nPitch, 1.0 / 2*m_nPitch); + m_init = vector(2*m_nPitch, 1.0 / 2*m_nPitch); // TRANSITIONS for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) @@ -112,22 +115,22 @@ // TRANSITIONS TO CLOSE PITCH for (size_t i = minNextPitch; i <= maxNextPitch; ++i) { - from.push_back(iPitch); - to.push_back(i); - transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans); + m_from.push_back(iPitch); + m_to.push_back(i); + m_transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans); - from.push_back(iPitch); - to.push_back(i+m_nPitch); - transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans)); + m_from.push_back(iPitch); + m_to.push_back(i+m_nPitch); + m_transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans)); - from.push_back(iPitch+m_nPitch); - to.push_back(i+m_nPitch); - transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans); + m_from.push_back(iPitch+m_nPitch); + m_to.push_back(i+m_nPitch); + m_transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans); // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5); - from.push_back(iPitch+m_nPitch); - to.push_back(i); - transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans)); + m_from.push_back(iPitch+m_nPitch); + m_to.push_back(i); + m_transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans)); // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5); } @@ -149,5 +152,7 @@ // for (size_t i = 0; i < from.size(); ++i) { // std::cerr << "P(["<< from[i] << " --> " << to[i] << "]) = " << transProb[i] << std::endl; // } - + m_nTrans = m_transProb.size(); + m_delta = vector(m_nState); + m_oldDelta = vector(m_nState); } diff -r 72b3892c1d51 -r 080fe18f5ebf PYinVamp.cpp --- a/PYinVamp.cpp Wed Jul 01 19:13:05 2015 +0100 +++ b/PYinVamp.cpp Fri Jul 03 12:22:44 2015 +0100 @@ -44,6 +44,7 @@ m_oSmoothedPitchTrack(0), m_oNotes(0), m_threshDistr(2.0f), + m_fixedLag(0.0f), m_outputUnvoiced(0.0f), m_preciseTime(0.0f), m_lowAmp(0.1f), @@ -153,6 +154,19 @@ d.valueNames.push_back("Single Value 0.20"); list.push_back(d); + d.valueNames.clear(); + + d.identifier = "fixedlag"; + d.name = "Fixed-lag smoothing"; + d.description = "Use fixed lag smoothing, not full Viterbi smoothing."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + list.push_back(d); + d.identifier = "outputunvoiced"; d.valueNames.clear(); d.name = "Output estimates classified as unvoiced?"; @@ -222,6 +236,9 @@ if (identifier == "threshdistr") { return m_threshDistr; } + if (identifier == "fixedlag") { + return m_fixedLag; + } if (identifier == "outputunvoiced") { return m_outputUnvoiced; } @@ -247,6 +264,10 @@ { m_threshDistr = value; } + if (identifier == "fixedlag") + { + m_fixedLag = value; + } if (identifier == "outputunvoiced") { m_outputUnvoiced = value; @@ -455,8 +476,6 @@ m_level.push_back(yo.rms); - // First, get the things out of the way that we don't want to output - // immediately, but instead save for later. vector > tempPitchProb; for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) { @@ -471,7 +490,14 @@ (tempPitch, yo.freqProb[iCandidate].second*factor)); } } - m_pitchProb.push_back(tempPitchProb); + + if (m_fixedLag == 0.f) + { + m_pitchProb.push_back(tempPitchProb); + } else { + // Damn, so I need the hmm right here! Sadly it isn't defined here yet. + // Perhaps I could re-design the whole shabang + } m_timestamp.push_back(timestamp); // F0 CANDIDATES @@ -549,7 +575,8 @@ std::vector > temp; if (mpOut[iFrame] > 0) { - double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; + double tempPitch = 12 * + std::log(mpOut[iFrame]/440)/std::log(2.) + 69; temp.push_back(std::pair(tempPitch, .9)); } smoothedPitch.push_back(temp); @@ -569,14 +596,15 @@ float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; + // the body of the loop below should be in a function/method std::vector notePitchTrack; // collects pitches for one note at a time for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0 && (iFrame >= nFrame-2 - || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity)); - // std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl; + || ((m_level[iFrame]/m_level[iFrame+2]) > + m_onsetSensitivity)); if (isVoiced && iFrame != nFrame-1) { if (oldIsVoiced == 0) // beginning of a note @@ -588,7 +616,6 @@ } else { // not currently voiced if (oldIsVoiced == 1) // end of note { - // std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl; if (notePitchTrack.size() >= minNoteFrames) { std::sort(notePitchTrack.begin(), notePitchTrack.end()); diff -r 72b3892c1d51 -r 080fe18f5ebf PYinVamp.h --- a/PYinVamp.h Wed Jul 01 19:13:05 2015 +0100 +++ b/PYinVamp.h Fri Jul 03 12:22:44 2015 +0100 @@ -71,6 +71,7 @@ mutable int m_oNotes; float m_threshDistr; + float m_fixedLag; float m_outputUnvoiced; float m_preciseTime; float m_lowAmp; diff -r 72b3892c1d51 -r 080fe18f5ebf SparseHMM.cpp --- a/SparseHMM.cpp Wed Jul 01 19:13:05 2015 +0100 +++ b/SparseHMM.cpp Fri Jul 03 12:22:44 2015 +0100 @@ -19,6 +19,21 @@ using std::vector; using std::pair; +SparseHMM::SparseHMM() : + m_nState(0), + m_nTrans(0), + m_init(0), + m_from(0), + m_to(0), + m_transProb(0), + m_scale(0), + m_psi(0), + m_delta(0), + m_oldDelta(0) +{ + +} + const vector SparseHMM::calculateObsProb(const vector > data) { @@ -26,103 +41,126 @@ return(vector()); } +void +SparseHMM::build() +{ } + const std::vector -SparseHMM::decodeViterbi(std::vector > obsProb, - vector *scale) +SparseHMM::decodeViterbi(std::vector > obsProb) { - if (obsProb.size() < 1) { + size_t nFrame = obsProb.size(); + if (nFrame < 1) { return vector(); } - - size_t nState = init.size(); - size_t nFrame = obsProb.size(); - // check for consistency - size_t nTrans = transProb.size(); - // declaring variables - std::vector delta = std::vector(nState); - std::vector oldDelta = std::vector(nState); - vector > psi; // "matrix" of remembered indices of the best transitions - vector path = vector(nFrame, nState-1); // the final output path (current assignment arbitrary, makes sense only for Chordino, where nChord-1 is the "no chord" label) - - double deltasum = 0; - - // initialise first frame - for (size_t iState = 0; iState < nState; ++iState) - { - oldDelta[iState] = init[iState] * obsProb[0][iState]; - // std::cerr << iState << " ----- " << init[iState] << std::endl; - deltasum += oldDelta[iState]; - } - - for (size_t iState = 0; iState < nState; ++iState) - { - oldDelta[iState] /= deltasum; // normalise (scale) - // std::cerr << oldDelta[iState] << std::endl; - } - - scale->push_back(1.0/deltasum); - psi.push_back(vector(nState,0)); + initialise(obsProb[0]); // rest of forward step for (size_t iFrame = 1; iFrame < nFrame; ++iFrame) { - deltasum = 0; - psi.push_back(vector(nState,0)); + process(obsProb[iFrame]); + } - // calculate best previous state for every current state - size_t fromState; - size_t toState; - double currentTransProb; - double currentValue; + vector path = finalise(); + return(path); +} + + +void +SparseHMM::initialise(vector firstObs) +{ + double deltasum = 0; + + // initialise first frame + for (size_t iState = 0; iState < m_nState; ++iState) + { + m_oldDelta[iState] = m_init[iState] * firstObs[iState]; + deltasum += m_oldDelta[iState]; + } + + for (size_t iState = 0; iState < m_nState; ++iState) + { + m_oldDelta[iState] /= deltasum; // normalise (scale) + } + + m_scale.push_back(1.0/deltasum); + m_psi.push_back(vector(m_nState,0)); +} + +int +SparseHMM::process(vector newObs) +{ + vector tempPsi = vector(m_nState,0); + + // calculate best previous state for every current state + size_t fromState; + size_t toState; + double currentTransProb; + double currentValue; + + // this is the "sparse" loop + for (size_t iTrans = 0; iTrans < m_nTrans; ++iTrans) + { + fromState = m_from[iTrans]; + toState = m_to[iTrans]; + currentTransProb = m_transProb[iTrans]; - // this is the "sparse" loop - for (size_t iTrans = 0; iTrans < nTrans; ++iTrans) + currentValue = m_oldDelta[fromState] * currentTransProb; + if (currentValue > m_delta[toState]) { - fromState = from[iTrans]; - toState = to[iTrans]; - currentTransProb = transProb[iTrans]; - - currentValue = oldDelta[fromState] * currentTransProb; - if (currentValue > delta[toState]) - { - delta[toState] = currentValue; // will be multiplied by the right obs later! - psi[iFrame][toState] = fromState; - } - } - - for (size_t jState = 0; jState < nState; ++jState) - { - delta[jState] *= obsProb[iFrame][jState]; - deltasum += delta[jState]; - } - - if (deltasum > 0) - { - for (size_t iState = 0; iState < nState; ++iState) - { - oldDelta[iState] = delta[iState] / deltasum; // normalise (scale) - delta[iState] = 0; - } - scale->push_back(1.0/deltasum); - } else - { - std::cerr << "WARNING: Viterbi has been fed some zero probabilities, at least they become zero at frame " << iFrame << " in combination with the model." << std::endl; - for (size_t iState = 0; iState < nState; ++iState) - { - oldDelta[iState] = 1.0/nState; - delta[iState] = 0; - } - scale->push_back(1.0); + // will be multiplied by the right obs later! + m_delta[toState] = currentValue; + tempPsi[toState] = fromState; } } + m_psi.push_back(tempPsi); + + double deltasum = 0; + for (size_t jState = 0; jState < m_nState; ++jState) + { + m_delta[jState] *= newObs[jState]; + deltasum += m_delta[jState]; + } + + if (deltasum > 0) + { + for (size_t iState = 0; iState < m_nState; ++iState) + { + m_oldDelta[iState] = m_delta[iState] / deltasum;// normalise (scale) + m_delta[iState] = 0; + } + m_scale.push_back(1.0/deltasum); + } else + { + std::cerr << "WARNING: Viterbi has been fed some zero " + "probabilities, at least they become zero " + "in combination with the model." << std::endl; + for (size_t iState = 0; iState < m_nState; ++iState) + { + m_oldDelta[iState] = 1.0/m_nState; + m_delta[iState] = 0; + } + m_scale.push_back(1.0); + } + return 0; +} + +vector +SparseHMM::finalise() +{ // initialise backward step + size_t nFrame = m_psi.size(); + + // The final output path (current assignment arbitrary, makes sense only for + // Chordino, where nChord-1 is the "no chord" label) + vector path = vector(nFrame, m_nState-1); + double bestValue = 0; - for (size_t iState = 0; iState < nState; ++iState) + for (size_t iState = 0; iState < m_nState; ++iState) { - double currentValue = oldDelta[iState]; + double currentValue = m_oldDelta[iState]; if (currentValue > bestValue) { bestValue = currentValue; @@ -130,16 +168,11 @@ } } - // rest of backward step + // Rest of backward step for (int iFrame = nFrame-2; iFrame != -1; --iFrame) { - path[iFrame] = psi[iFrame+1][path[iFrame+1]]; + path[iFrame] = m_psi[iFrame+1][path[iFrame+1]]; } - - // for (size_t iState = 0; iState < nState; ++iState) - // { - // // std::cerr << psi[2][iState] << std::endl; - // } - + return path; -} +} \ No newline at end of file diff -r 72b3892c1d51 -r 080fe18f5ebf SparseHMM.h --- a/SparseHMM.h Wed Jul 01 19:13:05 2015 +0100 +++ b/SparseHMM.h Fri Jul 03 12:22:44 2015 +0100 @@ -15,21 +15,37 @@ #define _SPARSEHMM_H_ #include +#include #include using std::vector; +using std::deque; using std::pair; class SparseHMM { public: - virtual const std::vector calculateObsProb(const vector >); - const std::vector decodeViterbi(std::vector > obs, - vector *scale); - vector init; - vector from; - vector to; - vector transProb; + SparseHMM(); + virtual const std::vector + calculateObsProb(const vector >); + virtual void build(); + const std::vector decodeViterbi(std::vector > obs); + void initialise(vector firstObs); + int process(vector newObs); + vector finalise(); + // "sparse" HMM definition + int m_nState; + int m_nTrans; + vector m_init; + vector m_from; + vector m_to; + vector m_transProb; + + // variables for decoding + deque m_scale; + deque > m_psi; + vector m_delta; + vector m_oldDelta; }; #endif