Mercurial > hg > nnls-chroma
changeset 43:131801714118 matthiasm-plugin
* HMM implementation for chord recognition
author | matthiasm |
---|---|
date | Mon, 25 Oct 2010 00:52:39 +0900 |
parents | d01f94d58ef0 |
children | 109d3b2c7105 |
files | Chordino.cpp Makefile NNLSChroma.cpp viterbi.cpp viterbi.h viterbi.o viterbi_path.m |
diffstat | 7 files changed, 387 insertions(+), 183 deletions(-) [+] |
line wrap: on
line diff
--- a/Chordino.cpp Sun Oct 24 20:43:11 2010 +0900 +++ b/Chordino.cpp Mon Oct 25 00:52:39 2010 +0900 @@ -19,6 +19,7 @@ #include "Chordino.h" #include "chromamethods.h" +#include "viterbi.h" #include <cstdlib> #include <fstream> @@ -161,8 +162,8 @@ /** Tune Log-Frequency Spectrogram - calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to - perform linear interpolation on the existing log-frequency spectrogram (kinda f1). + calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to + perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum). **/ cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... "; @@ -173,13 +174,17 @@ int count = 0; FeatureList tunedSpec; + int nFrame = m_logSpectrum.size(); + + vector<Vamp::RealTime> timestamps; for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { - Feature f1 = *i; - Feature f2; // tuned log-frequency spectrum - f2.hasTimestamp = true; - f2.timestamp = f1.timestamp; - f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero + Feature currentLogSpectum = *i; + Feature currentTunedSpec; // tuned log-frequency spectrum + currentTunedSpec.hasTimestamp = true; + currentTunedSpec.timestamp = currentLogSpectum.timestamp; + timestamps.push_back(currentLogSpectum.timestamp); + currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero if (m_tuneLocal) { intShift = floor(m_localTuning[count] * 3); @@ -188,31 +193,31 @@ // cerr << intShift << " " << intFactor << endl; - for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins - tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; - f2.values.push_back(tempValue); + for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins + tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor; + currentTunedSpec.values.push_back(tempValue); } - f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge - vector<float> runningmean = SpecialConvolution(f2.values,hw); + currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge + vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw); vector<float> runningstd; for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) - runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); + runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i])); } runningstd = SpecialConvolution(runningstd,hw); // second step convolve for (int i = 0; i < 256; i++) { runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std if (runningstd[i] > 0) { - // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? - // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; - f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? - (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; + // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ? + // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; + currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ? + (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; } - if (f2.values[i] < 0) { + if (currentTunedSpec.values[i] < 0) { cerr << "ERROR: negative value in logfreq spectrum" << endl; } } - tunedSpec.push_back(f2); + tunedSpec.push_back(currentTunedSpec); count++; } cerr << "done." << endl; @@ -230,19 +235,21 @@ } - vector<vector<float> > chordogram; + vector<vector<double> > chordogram; vector<vector<int> > scoreChordogram; vector<float> chordchange = vector<float>(tunedSpec.size(),0); count = 0; FeatureList chromaList; + + for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) { - Feature f2 = *it; // logfreq spectrum - Feature f6; // treble and bass chromagram + Feature currentTunedSpec = *it; // logfreq spectrum + Feature currentChromas; // treble and bass chromagram - f6.hasTimestamp = true; - f6.timestamp = f2.timestamp; + currentChromas.hasTimestamp = true; + currentChromas.timestamp = currentTunedSpec.timestamp; float b[256]; @@ -250,7 +257,7 @@ float sumb = 0; for (int i = 0; i < 256; i++) { // b[i] = m_dict[(256 * count + i) % (256 * 84)]; - b[i] = f2.values[i]; + b[i] = currentTunedSpec.values[i]; sumb += b[i]; if (b[i] > 0) { some_b_greater_zero = true; @@ -314,7 +321,7 @@ vector<float> origchroma = chroma; chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas - f6.values = chroma; + currentChromas.values = chroma; if (m_doNormalizeChroma > 0) { vector<float> chromanorm = vector<float>(3,0); @@ -340,17 +347,17 @@ } if (chromanorm[2] > 0) { for (int i = 0; i < chroma.size(); i++) { - f6.values[i] /= chromanorm[2]; + currentChromas.values[i] /= chromanorm[2]; } } } - chromaList.push_back(f6); + chromaList.push_back(currentChromas); // local chord estimation - vector<float> currentChordSalience; - float tempchordvalue = 0; - float sumchordvalue = 0; + vector<double> currentChordSalience; + double tempchordvalue = 0; + double sumchordvalue = 0; for (int iChord = 0; iChord < nChord; iChord++) { tempchordvalue = 0; @@ -377,165 +384,186 @@ cerr << "done." << endl; - /* Simple chord estimation - I just take the local chord estimates ("currentChordSalience") and average them over time, then - take the maximum. Very simple, don't do this at home... - */ - cerr << "[Chordino Plugin] Chord Estimation ... "; - count = 0; - int halfwindowlength = m_inputSampleRate / m_stepSize; - vector<int> chordSequence; - - for (FeatureList::iterator it = chromaList.begin(); it != chromaList.end(); ++it) { // initialise the score chordogram - vector<int> temp = vector<int>(nChord,0); - scoreChordogram.push_back(temp); - } - - for (FeatureList::iterator it = chromaList.begin(); it < chromaList.end()-2*halfwindowlength-1; ++it) { - int startIndex = count + 1; - int endIndex = count + 2 * halfwindowlength; - - float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); - - vector<int> chordCandidates; - for (unsigned iChord = 0; iChord < nChord-1; iChord++) { - // float currsum = 0; - // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { - // currsum += chordogram[iFrame][iChord]; - // } - // if (currsum > chordThreshold) chordCandidates.push_back(iChord); - for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { - if (chordogram[iFrame][iChord] > chordThreshold) { - chordCandidates.push_back(iChord); - break; - } + bool m_useHMM = true; // this will go into the chordino header file. + if (m_useHMM) { + + int oldchord = nChord-1; + double selftransprob = 0.1; + + vector<double> init = vector<double>(nChord,1.0/nChord); + vector<vector<double> > trans; + for (int iChord = 0; iChord < nChord; iChord++) { + vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1)); + temp[iChord] = selftransprob; + trans.push_back(temp); + } + vector<int> chordpath = ViterbiPath(init,trans,chordogram); + + for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) { + // cerr << chordpath[iFrame] << endl; + if (chordpath[iFrame] != oldchord) { + Feature chord_feature; // chord estimate + chord_feature.hasTimestamp = true; + chord_feature.timestamp = timestamps[iFrame]; + chord_feature.label = m_chordnames[chordpath[iFrame]]; + fsOut[0].push_back(chord_feature); + oldchord = chordpath[iFrame]; } } - chordCandidates.push_back(nChord-1); -// cerr << chordCandidates.size() << endl; - - float maxval = 0; // will be the value of the most salient *chord change* in this frame - float maxindex = 0; //... and the index thereof - unsigned bestchordL = nChord-1; // index of the best "left" chord - unsigned bestchordR = nChord-1; // index of the best "right" chord - - for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { - // now find the max values on both sides of iWF - // left side: - float maxL = 0; - unsigned maxindL = nChord-1; - for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { - unsigned iChord = chordCandidates[kChord]; - float currsum = 0; - for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { - currsum += chordogram[count+iFrame][iChord]; - } - if (iChord == nChord-1) currsum *= 0.8; - if (currsum > maxL) { - maxL = currsum; - maxindL = iChord; - } - } - // right side: - float maxR = 0; - unsigned maxindR = nChord-1; - for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { - unsigned iChord = chordCandidates[kChord]; - float currsum = 0; - for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { - currsum += chordogram[count+iFrame][iChord]; - } - if (iChord == nChord-1) currsum *= 0.8; - if (currsum > maxR) { - maxR = currsum; - maxindR = iChord; + + // cerr << chordpath[0] << endl; + } else { + /* Simple chord estimation + I just take the local chord estimates ("currentChordSalience") and average them over time, then + take the maximum. Very simple, don't do this at home... + */ + cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; + count = 0; + int halfwindowlength = m_inputSampleRate / m_stepSize; + vector<int> chordSequence; + for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram + vector<int> temp = vector<int>(nChord,0); + scoreChordogram.push_back(temp); + } + for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) { + int startIndex = count + 1; + int endIndex = count + 2 * halfwindowlength; + + float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); + + vector<int> chordCandidates; + for (unsigned iChord = 0; iChord < nChord-1; iChord++) { + // float currsum = 0; + // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { + // currsum += chordogram[iFrame][iChord]; + // } + // if (currsum > chordThreshold) chordCandidates.push_back(iChord); + for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { + if (chordogram[iFrame][iChord] > chordThreshold) { + chordCandidates.push_back(iChord); + break; + } } } - if (maxL+maxR > maxval) { - maxval = maxL+maxR; - maxindex = iWF; - bestchordL = maxindL; - bestchordR = maxindR; + chordCandidates.push_back(nChord-1); + // cerr << chordCandidates.size() << endl; + + float maxval = 0; // will be the value of the most salient *chord change* in this frame + float maxindex = 0; //... and the index thereof + unsigned bestchordL = nChord-1; // index of the best "left" chord + unsigned bestchordR = nChord-1; // index of the best "right" chord + + for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { + // now find the max values on both sides of iWF + // left side: + float maxL = 0; + unsigned maxindL = nChord-1; + for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { + unsigned iChord = chordCandidates[kChord]; + float currsum = 0; + for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { + currsum += chordogram[count+iFrame][iChord]; + } + if (iChord == nChord-1) currsum *= 0.8; + if (currsum > maxL) { + maxL = currsum; + maxindL = iChord; + } + } + // right side: + float maxR = 0; + unsigned maxindR = nChord-1; + for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { + unsigned iChord = chordCandidates[kChord]; + float currsum = 0; + for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { + currsum += chordogram[count+iFrame][iChord]; + } + if (iChord == nChord-1) currsum *= 0.8; + if (currsum > maxR) { + maxR = currsum; + maxindR = iChord; + } + } + if (maxL+maxR > maxval) { + maxval = maxL+maxR; + maxindex = iWF; + bestchordL = maxindL; + bestchordR = maxindR; + } + } - + // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; + // add a score to every chord-frame-point that was part of a maximum + for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { + scoreChordogram[iFrame+count][bestchordL]++; + } + for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { + scoreChordogram[iFrame+count][bestchordR]++; + } + if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; + count++; } -// cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; - // add a score to every chord-frame-point that was part of a maximum - for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { - scoreChordogram[iFrame+count][bestchordL]++; + // cerr << "******* agent finished *******" << endl; + count = 0; + for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { + float maxval = 0; // will be the value of the most salient chord in this frame + float maxindex = 0; //... and the index thereof + for (unsigned iChord = 0; iChord < nChord; iChord++) { + if (scoreChordogram[count][iChord] > maxval) { + maxval = scoreChordogram[count][iChord]; + maxindex = iChord; + // cerr << iChord << endl; + } + } + chordSequence.push_back(maxindex); + count++; } - for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { - scoreChordogram[iFrame+count][bestchordR]++; + + + // mode filter on chordSequence + count = 0; + string oldChord = ""; + for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { + Feature chord_feature; // chord estimate + chord_feature.hasTimestamp = true; + chord_feature.timestamp = *it; + // Feature currentChord; // chord estimate + // currentChord.hasTimestamp = true; + // currentChord.timestamp = currentChromas.timestamp; + + vector<int> chordCount = vector<int>(nChord,0); + int maxChordCount = 0; + int maxChordIndex = nChord-1; + string maxChord; + int startIndex = max(count - halfwindowlength/2,0); + int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); + for (int i = startIndex; i < endIndex; i++) { + chordCount[chordSequence[i]]++; + if (chordCount[chordSequence[i]] > maxChordCount) { + // cerr << "start index " << startIndex << endl; + maxChordCount++; + maxChordIndex = chordSequence[i]; + maxChord = m_chordnames[maxChordIndex]; + } + } + // chordSequence[count] = maxChordIndex; + // cerr << maxChordIndex << endl; + // cerr << chordchange[count] << endl; + // fsOut[9].push_back(currentChord); + if (oldChord != maxChord) { + oldChord = maxChord; + chord_feature.label = m_chordnames[maxChordIndex]; + fsOut[0].push_back(chord_feature); + } + count++; } - if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; - count++; } -// cerr << "******* agent finished *******" << endl; - count = 0; - for (FeatureList::iterator it = chromaList.begin(); it != chromaList.end(); ++it) { - float maxval = 0; // will be the value of the most salient chord in this frame - float maxindex = 0; //... and the index thereof - for (unsigned iChord = 0; iChord < nChord; iChord++) { - if (scoreChordogram[count][iChord] > maxval) { - maxval = scoreChordogram[count][iChord]; - maxindex = iChord; - // cerr << iChord << endl; - } - } - chordSequence.push_back(maxindex); - // cerr << "before modefilter, maxindex: " << maxindex << endl; - count++; - } -// cerr << "******* mode filter done *******" << endl; - - - // mode filter on chordSequence - count = 0; - string oldChord = ""; - for (FeatureList::iterator it = chromaList.begin(); it != chromaList.end(); ++it) { - Feature f6 = *it; - Feature f7; // chord estimate - f7.hasTimestamp = true; - f7.timestamp = f6.timestamp; - Feature f8; // chord estimate - f8.hasTimestamp = true; - f8.timestamp = f6.timestamp; - - vector<int> chordCount = vector<int>(nChord,0); - int maxChordCount = 0; - int maxChordIndex = nChord-1; - string maxChord; - int startIndex = max(count - halfwindowlength/2,0); - int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); - for (int i = startIndex; i < endIndex; i++) { - chordCount[chordSequence[i]]++; - if (chordCount[chordSequence[i]] > maxChordCount) { - // cerr << "start index " << startIndex << endl; - maxChordCount++; - maxChordIndex = chordSequence[i]; - maxChord = m_chordnames[maxChordIndex]; - } - } - // chordSequence[count] = maxChordIndex; - // cerr << maxChordIndex << endl; - f8.values.push_back(chordchange[count]/(halfwindowlength*2)); - // cerr << chordchange[count] << endl; - fsOut[m_outputHarmonicChange].push_back(f8); - if (oldChord != maxChord) { - oldChord = maxChord; - f7.label = m_chordnames[maxChordIndex]; - fsOut[m_outputChords].push_back(f7); - } - count++; - } - Feature f7; // last chord estimate - f7.hasTimestamp = true; - f7.timestamp = chromaList[chromaList.size()-1].timestamp; - f7.label = "N"; - fsOut[m_outputChords].push_back(f7); + Feature chord_feature; // last chord estimate + chord_feature.hasTimestamp = true; + chord_feature.timestamp = timestamps[timestamps.size()-1]; + chord_feature.label = "N"; + fsOut[0].push_back(chord_feature); cerr << "done." << endl; - return fsOut; - } -
--- a/Makefile Sun Oct 24 20:43:11 2010 +0900 +++ b/Makefile Mon Oct 25 00:52:39 2010 +0900 @@ -2,7 +2,7 @@ # Edit this to list one .o file for each .cpp file in your plugin project # -PLUGIN_CODE_OBJECTS = NNLSBase.o NNLSChroma.o Chordino.o Tuning.o plugins.o nnls.o chromamethods.o +PLUGIN_CODE_OBJECTS = NNLSBase.o NNLSChroma.o Chordino.o Tuning.o plugins.o nnls.o chromamethods.o viterbi.o # Edit this to the location of the Vamp plugin SDK, relative to your # project directory @@ -41,3 +41,4 @@ chromamethods.o: nnls.h NNLSChroma.o: NNLSBase.h Tuning.o: NNLSBase.h +viterbi.o: viterbi.h
--- a/NNLSChroma.cpp Sun Oct 24 20:43:11 2010 +0900 +++ b/NNLSChroma.cpp Mon Oct 25 00:52:39 2010 +0900 @@ -74,7 +74,7 @@ for (int iNote = 0; iNote < 24; iNote++) { bothchromanames.push_back(notenames[iNote]); if (iNote < 12) { - chromanames.push_back(notenames[iNote]); + chromanames.push_back(notenames[iNote+12]); } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/viterbi.cpp Mon Oct 25 00:52:39 2010 +0900 @@ -0,0 +1,85 @@ + +#include "viterbi.h" +#include <iostream> + +std::vector<int> ViterbiPath(std::vector<double> init, std::vector<vector<double> > trans, std::vector<vector<double> > obs) { + + int nState = init.size(); + int nFrame = obs.size(); + // cerr << nState << " " << nFrame << endl; + + // check for consistency + if (trans[0].size() != nState || trans.size() != nState || obs[0].size() != nState) { + cerr << "ERROR: matrix sizes inconsistent." << endl; + } + + vector<vector<double> > delta; // "matrix" of conditional probabilities + vector<vector<int> > psi; // "matrix" of remembered indices of the best transitions + vector<int> path = vector<int>(nFrame, nState-1); // the final output path (current assignment arbitrary, makes sense only for Chordino, where nChord-1 is the "no chord" label) + vector<double> scale = vector<double>(nFrame, 0); // remembers by how much the vectors in delta are scaled. + + double deltasum = 0; + + /* initialise first frame */ + delta.push_back(init); + for (int iState = 0; iState < nState; ++iState) { + delta[0][iState] *= obs[0][iState]; + deltasum += delta[0][iState]; + } + for (int iState = 0; iState < nState; ++iState) delta[0][iState] /= deltasum; // normalise (scale) + scale.push_back(1.0/deltasum); + psi.push_back(vector<int>(nState,0)); + + /* rest of the forward step */ + for (int iFrame = 1; iFrame < nFrame; ++iFrame) { + delta.push_back(vector<double>(nState,0)); + deltasum = 0; + psi.push_back(vector<int>(nState,0)); + /* every state wants to know which previous state suits him best */ + for (int jState = 0; jState < nState; ++jState) { + int bestState = nState - 1; + double bestValue = 0; + for (int iState = 0; iState < nState; ++iState) { + double currentValue = delta[iFrame-1][iState] * trans[iState][jState]; + if (currentValue > bestValue) { + bestValue = currentValue; + bestState = iState; + } + } + // cerr << bestState <<" ::: " << bestValue << endl ; + delta[iFrame][jState] = bestValue * obs[iFrame][jState]; + deltasum += delta[iFrame][jState]; + psi[iFrame][jState] = bestState; + } + if (deltasum > 0) { + for (int iState = 0; iState < nState; ++iState) { + delta[iFrame][iState] /= deltasum; // normalise (scale) + } + scale.push_back(1.0/deltasum); + } else { + for (int iState = 0; iState < nState; ++iState) { + delta[iFrame][iState] = 1.0/nState; + } + scale.push_back(1.0); + } + + } + + /* initialise backward step */ + int bestValue = 0; + for (int iState = 0; iState < nState; ++iState) { + double currentValue = delta[nFrame-1][iState]; + if (currentValue > path[nFrame-1]) { + bestValue = currentValue; + path[nFrame-1] = iState; + } + } + + /* rest of backward step */ + for (int iFrame = nFrame-2; iFrame > -1; --iFrame) { + path[iFrame] = psi[iFrame+1][path[iFrame+1]]; + // cerr << path[iFrame] << endl; + } + + return path; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/viterbi.h Mon Oct 25 00:52:39 2010 +0900 @@ -0,0 +1,28 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + NNLS-Chroma / Chordino + + Audio feature extraction plugins for chromagram and chord + estimation. + + Centre for Digital Music, Queen Mary University of London. + This file copyright 2008-2010 Matthias Mauch and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _VITERBI_H_ +#define _VITERBI_H_ + +#include <vector> +#include <string> +using namespace std; + +extern std::vector<int> ViterbiPath(std::vector<double> init, std::vector<vector<double> > trans, std::vector<vector<double> > obs); + +#endif \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/viterbi_path.m Mon Oct 25 00:52:39 2010 +0900 @@ -0,0 +1,62 @@ +function path = viterbi_path(prior, transmat, obslik) +% VITERBI Find the most-probable (Viterbi) path through the HMM state trellis. +% path = viterbi(prior, transmat, obslik) +% +% Inputs: +% prior(i) = Pr(Q(1) = i) +% transmat(i,j) = Pr(Q(t+1)=j | Q(t)=i) +% obslik(i,t) = Pr(y(t) | Q(t)=i) +% +% Outputs: +% path(t) = q(t), where q1 ... qT is the argmax of the above expression. + + +% delta(j,t) = prob. of the best sequence of length t-1 and then going to state j, and O(1:t) +% psi(j,t) = the best predecessor state, given that we ended up in state j at t + +scaled = 1; + +T = size(obslik, 2); +prior = prior(:); +Q = length(prior); + +delta = zeros(Q,T); +psi = zeros(Q,T); +path = zeros(1,T); +scale = ones(1,T); + + +t=1; +delta(:,t) = prior .* obslik(:,t); +if scaled + [delta(:,t), n] = normalise(delta(:,t)); + scale(t) = 1/n; +end +psi(:,t) = 0; % arbitrary value, since there is no predecessor to t=1 +for t=2:T + for j=1:Q + [delta(j,t), psi(j,t)] = max(delta(:,t-1) .* transmat(:,j)); + delta(j,t) = delta(j,t) * obslik(j,t); + end + if scaled + [delta(:,t), n] = normalise(delta(:,t)); + scale(t) = 1/n; + end +end +[p, path(T)] = max(delta(:,T)); +for t=T-1:-1:1 + path(t) = psi(path(t+1),t+1); +end + +% If scaled==0, p = prob_path(best_path) +% If scaled==1, p = Pr(replace sum with max and proceed as in the scaled forwards algo) +% Both are different from p(data) as computed using the sum-product (forwards) algorithm + +if 0 +if scaled + loglik = -sum(log(scale)); + %loglik = prob_path(prior, transmat, obslik, path); +else + loglik = log(p); +end +end