# HG changeset patch # User matthiasm # Date 1287999284 -32400 # Node ID b6cddb10948247f1ce4cb4e038a9527fba45351b # Parent 6e76c7710fa148d9d15a03a6eed284a34fb5a96f added chord change value, some other tweaks diff -r 6e76c7710fa1 -r b6cddb109482 Chordino.cpp --- a/Chordino.cpp Mon Oct 25 16:58:32 2010 +0900 +++ b/Chordino.cpp Mon Oct 25 18:34:44 2010 +0900 @@ -63,6 +63,104 @@ return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; } +Chordino::ParameterList +Chordino::getParameterDescriptors() const +{ + if (debug_on) cerr << "--> getParameterDescriptors" << endl; + ParameterList list; + + ParameterDescriptor d; + d.identifier = "useNNLS"; + d.name = "use approximate transcription (NNLS)"; + d.description = "Toggles approximate transcription (NNLS)."; + d.unit = ""; + d.minValue = 0.0; + d.maxValue = 1.0; + d.defaultValue = 1.0; + d.isQuantized = true; + d.quantizeStep = 1.0; + list.push_back(d); + + ParameterDescriptor d4; + d4.identifier = "useHMM"; + d4.name = "Viterbi decoding"; + d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used)."; + d4.unit = ""; + d4.minValue = 0.0; + d4.maxValue = 1.0; + d4.defaultValue = 1.0; + d4.isQuantized = true; + d4.quantizeStep = 1.0; + list.push_back(d4); + + ParameterDescriptor d0; + d0.identifier = "rollon"; + d0.name = "spectral roll-on"; + d0.description = "The bins below the spectral roll-on quantile will be set to 0."; + d0.unit = ""; + d0.minValue = 0; + d0.maxValue = 0.05; + d0.defaultValue = 0; + d0.isQuantized = true; + d0.quantizeStep = 0.005; + list.push_back(d0); + + ParameterDescriptor d1; + d1.identifier = "tuningmode"; + d1.name = "tuning mode"; + d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; + d1.unit = ""; + d1.minValue = 0; + d1.maxValue = 1; + d1.defaultValue = 0; + d1.isQuantized = true; + d1.valueNames.push_back("global tuning"); + d1.valueNames.push_back("local tuning"); + d1.quantizeStep = 1.0; + list.push_back(d1); + + ParameterDescriptor d2; + d2.identifier = "whitening"; + d2.name = "spectral whitening"; + d2.description = "Spectral whitening: no whitening - 0; whitening - 1."; + d2.unit = ""; + d2.isQuantized = true; + d2.minValue = 0.0; + d2.maxValue = 1.0; + d2.defaultValue = 1.0; + d2.isQuantized = false; + list.push_back(d2); + + ParameterDescriptor d3; + d3.identifier = "s"; + d3.name = "spectral shape"; + d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; + d3.unit = ""; + d3.minValue = 0.5; + d3.maxValue = 0.9; + d3.defaultValue = 0.7; + d3.isQuantized = false; + list.push_back(d3); + + // ParameterDescriptor d4; + // d4.identifier = "chromanormalize"; + // d4.name = "chroma normalization"; + // d4.description = "How shall the chroma vector be normalized?"; + // d4.unit = ""; + // d4.minValue = 0; + // d4.maxValue = 3; + // d4.defaultValue = 0; + // d4.isQuantized = true; + // d4.valueNames.push_back("none"); + // d4.valueNames.push_back("maximum norm"); + // d4.valueNames.push_back("L1 norm"); + // d4.valueNames.push_back("L2 norm"); + // d4.quantizeStep = 1.0; + // list.push_back(d4); + + return list; +} + Chordino::OutputList Chordino::getOutputDescriptors() const { @@ -369,7 +467,7 @@ } if (iChord == nChord-1) tempchordvalue *= .7; if (tempchordvalue < 0) tempchordvalue = 0.0; - tempchordvalue = pow(1.5,tempchordvalue); + tempchordvalue = pow(1.3,tempchordvalue); sumchordvalue+=tempchordvalue; currentChordSalience.push_back(tempchordvalue); } @@ -387,8 +485,8 @@ cerr << "done." << endl; - bool m_useHMM = true; // this will go into the chordino header file. - if (m_useHMM) { + // bool m_useHMM = true; // this will go into the chordino header file. + if (m_useHMM == 1.0) { cerr << "[Chordino Plugin] HMM Chord Estimation ... "; int oldchord = nChord-1; double selftransprob = 0.99; @@ -396,13 +494,16 @@ // vector init = vector(nChord,1.0/nChord); vector init = vector(nChord,0); init[nChord-1] = 1; + double *delta; + delta = (double *)malloc(sizeof(double)*nFrame*nChord); + vector > trans; for (int iChord = 0; iChord < nChord; iChord++) { vector temp = vector(nChord,(1-selftransprob)/(nChord-1)); temp[iChord] = selftransprob; trans.push_back(temp); } - vector chordpath = ViterbiPath(init,trans,chordogram); + vector chordpath = ViterbiPath(init, trans, chordogram, delta); Feature chord_feature; // chord estimate @@ -411,7 +512,7 @@ chord_feature.label = m_chordnames[chordpath[0]]; fsOut[0].push_back(chord_feature); - for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) { + for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) { // cerr << chordpath[iFrame] << endl; if (chordpath[iFrame] != oldchord ) { Feature chord_feature; // chord estimate @@ -421,6 +522,10 @@ fsOut[0].push_back(chord_feature); oldchord = chordpath[iFrame]; } + /* calculating simple chord change prob */ + for (int iChord = 0; iChord < nChord; iChord++) { + chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]); + } } // cerr << chordpath[0] << endl; @@ -513,7 +618,9 @@ for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { scoreChordogram[iFrame+count][bestchordR]++; } - if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; + if (bestchordL != bestchordR) { + chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; + } count++; } // cerr << "******* agent finished *******" << endl; @@ -561,8 +668,7 @@ } // chordSequence[count] = maxChordIndex; // cerr << maxChordIndex << endl; - // cerr << chordchange[count] << endl; - // fsOut[9].push_back(currentChord); + // cerr << chordchange[count] << endl; if (oldChord != maxChord) { oldChord = maxChord; chord_feature.label = m_chordnames[maxChordIndex]; @@ -577,5 +683,17 @@ chord_feature.label = "N"; fsOut[0].push_back(chord_feature); cerr << "done." << endl; + + for (int iFrame = 0; iFrame < nFrame; iFrame++) { + Feature chordchange_feature; + chordchange_feature.hasTimestamp = true; + chordchange_feature.timestamp = timestamps[iFrame]; + chordchange_feature.values.push_back(chordchange[iFrame]); + fsOut[1].push_back(chordchange_feature); + } + + + + return fsOut; } diff -r 6e76c7710fa1 -r b6cddb109482 Chordino.h --- a/Chordino.h Mon Oct 25 16:58:32 2010 +0900 +++ b/Chordino.h Mon Oct 25 18:34:44 2010 +0900 @@ -34,6 +34,7 @@ string getName() const; string getDescription() const; + ParameterList getParameterDescriptors() const; OutputList getOutputDescriptors() const; FeatureSet process(const float *const *inputBuffers, diff -r 6e76c7710fa1 -r b6cddb109482 NNLSBase.cpp --- a/NNLSBase.cpp Mon Oct 25 16:58:32 2010 +0900 +++ b/NNLSBase.cpp Mon Oct 25 18:34:44 2010 +0900 @@ -55,7 +55,8 @@ m_doNormalizeChroma(0), m_rollon(0.0), m_s(0.7), - m_useNNLS(1) + m_useNNLS(1), + m_useHMM(1) { if (debug_on) cerr << "--> NNLSBase" << endl; @@ -259,6 +260,11 @@ if (identifier == "chromanormalize") { return m_doNormalizeChroma; } + + if (identifier == "useHMM") { + return m_useHMM; + } + return 0; } @@ -279,6 +285,10 @@ m_s = value; } + if (identifier == "useHMM") { + m_useHMM = value; + } + if (identifier == "tuningmode") { m_tuneLocal = (value > 0) ? true : false; // cerr << "m_tuneLocal :" << m_tuneLocal << endl; diff -r 6e76c7710fa1 -r b6cddb109482 NNLSBase.h --- a/NNLSBase.h Mon Oct 25 16:58:32 2010 +0900 +++ b/NNLSBase.h Mon Oct 25 18:34:44 2010 +0900 @@ -70,6 +70,7 @@ float m_preset; float m_s; float m_useNNLS; + float m_useHMM; vector m_localTuning; vector m_kernelValue; vector m_kernelFftIndex; diff -r 6e76c7710fa1 -r b6cddb109482 README --- a/README Mon Oct 25 16:58:32 2010 +0900 +++ b/README Mon Oct 25 18:34:44 2010 +0900 @@ -31,8 +31,6 @@ * Chromagram: a 12-dimensional chromagram, restricted with mid-range emphasis. At each frame the Semitone Spectrum is multiplied by a mid-range pattern and then mapped to the 12 chroma bins. * Chromagram and Bass Chromagram: a 24-dimensional chromagram, consisting of the both Bass Chromgram and Chromagram, see above. When normalisation is used, this representation will however be scaled differently, and hence be different from the individual chromagrams. -### References and Credits ### - ## Chordino ## System identifier – vamp:nnls-chroma:chordino @@ -40,13 +38,14 @@ ### General Description ### -Chordino provides a simple chord transcription based on NNLS Chroma (described above). +Chordino provides a simple chord transcription based on NNLS Chroma (described above). Chord profiles given by the user in the file "chord.dict" are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach. ### Parameters ### The default settings (in brackets, below) are those used for Matthias Mauch's 2010 MIREX submissions. * use approximate transcription (NNLS) (on or off; default: on): toggle between NNLS approximate transcription and linear spectral mapping. +* Viterbi decoding (on or off; default: on): uses HMM/Viterbi smoothing. Otherwise: heuristic chord change smoothing. * spectral roll on (0.00 -- 0.05; default: 0.0): consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed. * tuning mode (global or local; default: global): local uses a local average for tuning, global uses ... exactly. * spectral whitening (0.0 -- 1.0; default: 1.0): determines how much the log-frequency spectrum is whitened. A value of 0.0 means no whitening. For values other than 0.0 the log-freq spectral bins are divided by [standard deviation of their neighbours]^[spectral whitening], where "^" means "to the power of". @@ -55,5 +54,8 @@ ### Outputs ### -### References and Credits ### +* Chord Estimate: estimated chord times and labels. +* Harmonic Change Value: an indication of the likelihood harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate. +## References and Credits ## + diff -r 6e76c7710fa1 -r b6cddb109482 viterbi.cpp --- a/viterbi.cpp Mon Oct 25 16:58:32 2010 +0900 +++ b/viterbi.cpp Mon Oct 25 18:34:44 2010 +0900 @@ -2,7 +2,7 @@ #include "viterbi.h" #include -std::vector ViterbiPath(std::vector init, std::vector > trans, std::vector > obs) { +std::vector ViterbiPath(std::vector init, std::vector > trans, std::vector > obs, double *delta) { int nState = init.size(); int nFrame = obs.size(); @@ -12,8 +12,13 @@ if (trans[0].size() != nState || trans.size() != nState || obs[0].size() != nState) { cerr << "ERROR: matrix sizes inconsistent." << endl; } + + for (int iState = 0; iState < nState; ++iState) delta[iState] = init[iState]; + for (int iFrame = 1; iFrame < nFrame; ++iFrame) { + for (int iState = 0; iState < nState; ++iState) delta[iFrame*nState + iState]; + } - vector > delta; // "matrix" of conditional probabilities + // vector > delta; // "matrix" of conditional probabilities vector > psi; // "matrix" of remembered indices of the best transitions vector path = vector(nFrame, nState-1); // the final output path (current assignment arbitrary, makes sense only for Chordino, where nChord-1 is the "no chord" label) vector scale = vector(nFrame, 0); // remembers by how much the vectors in delta are scaled. @@ -21,18 +26,18 @@ double deltasum = 0; /* initialise first frame */ - delta.push_back(init); + // delta.push_back(init); for (int iState = 0; iState < nState; ++iState) { - delta[0][iState] *= obs[0][iState]; - deltasum += delta[0][iState]; + delta[iState] *= obs[0][iState]; + deltasum += delta[iState]; } - for (int iState = 0; iState < nState; ++iState) delta[0][iState] /= deltasum; // normalise (scale) + for (int iState = 0; iState < nState; ++iState) delta[iState] /= deltasum; // normalise (scale) scale.push_back(1.0/deltasum); psi.push_back(vector(nState,0)); /* rest of the forward step */ for (int iFrame = 1; iFrame < nFrame; ++iFrame) { - delta.push_back(vector(nState,0)); + // delta.push_back(vector(nState,0)); deltasum = 0; psi.push_back(vector(nState,0)); /* every state wants to know which previous state suits him best */ @@ -41,7 +46,7 @@ double bestValue = 0; if (obs[iFrame][jState] > 0) { for (int iState = 0; iState < nState; ++iState) { - double currentValue = delta[iFrame-1][iState] * trans[iState][jState]; + double currentValue = delta[(iFrame-1) * nState + iState] * trans[iState][jState]; if (currentValue > bestValue) { bestValue = currentValue; bestState = iState; @@ -49,18 +54,18 @@ } } // cerr << bestState <<" ::: " << bestValue << endl ; - delta[iFrame][jState] = bestValue * obs[iFrame][jState]; - deltasum += delta[iFrame][jState]; + delta[iFrame * nState + jState] = bestValue * obs[iFrame][jState]; + deltasum += delta[iFrame * nState + jState]; psi[iFrame][jState] = bestState; } if (deltasum > 0) { for (int iState = 0; iState < nState; ++iState) { - delta[iFrame][iState] /= deltasum; // normalise (scale) + delta[iFrame * nState + iState] /= deltasum; // normalise (scale) } scale.push_back(1.0/deltasum); } else { for (int iState = 0; iState < nState; ++iState) { - delta[iFrame][iState] = 1.0/nState; + delta[iFrame * nState + iState] = 1.0/nState; } scale.push_back(1.0); } @@ -70,7 +75,7 @@ /* initialise backward step */ int bestValue = 0; for (int iState = 0; iState < nState; ++iState) { - double currentValue = delta[nFrame-1][iState]; + double currentValue = delta[(nFrame-1) * nState + iState]; if (currentValue > path[nFrame-1]) { bestValue = currentValue; path[nFrame-1] = iState; diff -r 6e76c7710fa1 -r b6cddb109482 viterbi.h --- a/viterbi.h Mon Oct 25 16:58:32 2010 +0900 +++ b/viterbi.h Mon Oct 25 18:34:44 2010 +0900 @@ -23,6 +23,6 @@ #include using namespace std; -extern std::vector ViterbiPath(std::vector init, std::vector > trans, std::vector > obs); +extern std::vector ViterbiPath(std::vector init, std::vector > trans, std::vector > obs, double *delta); #endif \ No newline at end of file diff -r 6e76c7710fa1 -r b6cddb109482 viterbi.o Binary file viterbi.o has changed