# HG changeset patch # User matthiasm # Date 1287993512 -32400 # Node ID 6e76c7710fa148d9d15a03a6eed284a34fb5a96f # Parent ea35e6e804f11bf7f9bc9e1b1f14253932fd0ca0 removed subtraction in chroma dictionary, added to-the-power-of-1.5 in chordino diff -r ea35e6e804f1 -r 6e76c7710fa1 Chordino.cpp --- a/Chordino.cpp Mon Oct 25 02:33:47 2010 +0900 +++ b/Chordino.cpp Mon Oct 25 16:58:32 2010 +0900 @@ -367,7 +367,9 @@ for (int iBin = 12; iBin < 24; iBin++) { tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; } - if (tempchordvalue < 0) tempchordvalue = 0; + if (iChord == nChord-1) tempchordvalue *= .7; + if (tempchordvalue < 0) tempchordvalue = 0.0; + tempchordvalue = pow(1.5,tempchordvalue); sumchordvalue+=tempchordvalue; currentChordSalience.push_back(tempchordvalue); } @@ -389,9 +391,11 @@ if (m_useHMM) { cerr << "[Chordino Plugin] HMM Chord Estimation ... "; int oldchord = nChord-1; - double selftransprob = 0.9; + double selftransprob = 0.99; - vector init = vector(nChord,1.0/nChord); + // vector init = vector(nChord,1.0/nChord); + vector init = vector(nChord,0); init[nChord-1] = 1; + vector > trans; for (int iChord = 0; iChord < nChord; iChord++) { vector temp = vector(nChord,(1-selftransprob)/(nChord-1)); @@ -399,10 +403,17 @@ trans.push_back(temp); } vector chordpath = ViterbiPath(init,trans,chordogram); + + + Feature chord_feature; // chord estimate + chord_feature.hasTimestamp = true; + chord_feature.timestamp = timestamps[0]; + chord_feature.label = m_chordnames[chordpath[0]]; + fsOut[0].push_back(chord_feature); for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) { // cerr << chordpath[iFrame] << endl; - if (chordpath[iFrame] != oldchord) { + if (chordpath[iFrame] != oldchord ) { Feature chord_feature; // chord estimate chord_feature.hasTimestamp = true; chord_feature.timestamp = timestamps[iFrame]; diff -r ea35e6e804f1 -r 6e76c7710fa1 NNLSBase.cpp --- a/NNLSBase.cpp Mon Oct 25 02:33:47 2010 +0900 +++ b/NNLSBase.cpp Mon Oct 25 16:58:32 2010 +0900 @@ -166,7 +166,8 @@ d0.minValue = 0; d0.maxValue = 0.05; d0.defaultValue = 0; - d0.isQuantized = false; + d0.isQuantized = true; + d0.quantizeStep = 0.005; list.push_back(d0); ParameterDescriptor d1; diff -r ea35e6e804f1 -r 6e76c7710fa1 README --- a/README Mon Oct 25 02:33:47 2010 +0900 +++ b/README Mon Oct 25 16:58:32 2010 +0900 @@ -1,7 +1,7 @@ ## NNLS Chroma ## -System identifier – vamp:nnls-chroma:nnls_chroma -RDF URI – http://vamp-plugins.org/rdf/plugins/matthiasm#nnls_chroma (not yet available) +System identifier – vamp:nnls-chroma:nnls-chroma +RDF URI – http://vamp-plugins.org/rdf/plugins/nnls-chroma#nnls-chroma (not yet available) ### General Description ### @@ -14,6 +14,7 @@ ### Parameters ### The default settings (in brackets, below) are those used for Matthias Mauch's 2010 MIREX submissions. + * use approximate transcription (NNLS) (on or off; default: on): toggle between NNLS approximate transcription and linear spectral mapping. * spectral roll on (0.00 -- 0.05; default: 0.0): consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed. * tuning mode (global or local; default: global): local uses a local average for tuning, global uses ... exactly. @@ -23,4 +24,36 @@ ### Outputs ### -### References and Credits ### \ No newline at end of file +* Log-frequency Spectrum: a spectrum similar to the well-known constant Q spectrum, in which bins are linear in log-frequency. Three bins per semitone. +* Tuned Log-frequency Spectrum: has the same format as Log-frequency Spectrum, but has been processed by the following processes: tuning, subtraction of background spectrum, spectral whitening. +* Semitone Spectrum: a spectral representation with one bin per semitone. If NNLS is selected in the parameters, this is the note activation, otherwise just a linear mapping to semitones. +* Bass Chromagram: a 12-dimensional chromagram, restricted to the bass range. At each frame the Semitone Spectrum is multiplied by a bass pattern and then mapped to the 12 chroma bins. +* Chromagram: a 12-dimensional chromagram, restricted with mid-range emphasis. At each frame the Semitone Spectrum is multiplied by a mid-range pattern and then mapped to the 12 chroma bins. +* Chromagram and Bass Chromagram: a 24-dimensional chromagram, consisting of the both Bass Chromgram and Chromagram, see above. When normalisation is used, this representation will however be scaled differently, and hence be different from the individual chromagrams. + +### References and Credits ### + +## Chordino ## + +System identifier – vamp:nnls-chroma:chordino +RDF URI – http://vamp-plugins.org/rdf/plugins/nnls-chroma#chordino (not yet available) + +### General Description ### + +Chordino provides a simple chord transcription based on NNLS Chroma (described above). + +### Parameters ### + +The default settings (in brackets, below) are those used for Matthias Mauch's 2010 MIREX submissions. + +* use approximate transcription (NNLS) (on or off; default: on): toggle between NNLS approximate transcription and linear spectral mapping. +* spectral roll on (0.00 -- 0.05; default: 0.0): consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed. +* tuning mode (global or local; default: global): local uses a local average for tuning, global uses ... exactly. +* spectral whitening (0.0 -- 1.0; default: 1.0): determines how much the log-frequency spectrum is whitened. A value of 0.0 means no whitening. For values other than 0.0 the log-freq spectral bins are divided by [standard deviation of their neighbours]^[spectral whitening], where "^" means "to the power of". +* spectral shape (0.5 -- 0.9; default: 0.7): the shape of the notes in the NNLS dictionary. Their harmonic amplitude follows a geometrically decreasing pattern, in which the i-th harmonic has an amplitude of [spectral shape]^[i-1], where "^" means "to the power of". +* chroma normalisation (none, maximum norm, L1 norm, L2 norm; default: none): determines whether or how the chromagrams are normalised. If the setting is not 'none', then each chroma frame separately is divided by the chosen vector norm. Note that normalisation implies that the joint 24-dim. "Chroma and Bass Chromagram" output will be different from the individual 12-dim. "Chromagram" and "Bass Chromagram" outputs. + +### Outputs ### + +### References and Credits ### + diff -r ea35e6e804f1 -r 6e76c7710fa1 chord.dict --- a/chord.dict Mon Oct 25 02:33:47 2010 +0900 +++ b/chord.dict Mon Oct 25 16:58:32 2010 +0900 @@ -16,13 +16,15 @@ 7=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0 maj7=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1 m7=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0 +m6=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0 =0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0 =0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0 dim=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0 aug=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 =0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0 =0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0 -7=0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0 +7=0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0 +# sus4=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0 ### Rock'n'Roll # :1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0 diff -r ea35e6e804f1 -r 6e76c7710fa1 chromamethods.cpp --- a/chromamethods.cpp Mon Oct 25 02:33:47 2010 +0900 +++ b/chromamethods.cpp Mon Oct 25 16:58:32 2010 +0900 @@ -378,6 +378,7 @@ // // } + float exponent = 2.0; for (int iChord = 0; iChord < loadedChordDict.size()/24; iChord++) { float sum = 0; @@ -386,10 +387,10 @@ sum += loadedChordDict[24 * iChord + iST]; } for (int iST = 0; iST < 24; ++iST) { - loadedChordDict[24 * iChord + iST] -= sum/24; - stand += pow(loadedChordDict[24 * iChord + iST],2)/24; + // loadedChordDict[24 * iChord + iST] -= sum/24; + stand += pow(abs(loadedChordDict[24 * iChord + iST]),exponent)/24; } - stand = sqrt(stand); + stand = pow(stand,(float)1.0/exponent); for (int iST = 0; iST < 24; ++iST) { loadedChordDict[24 * iChord + iST] /= stand; } diff -r ea35e6e804f1 -r 6e76c7710fa1 viterbi.o Binary file viterbi.o has changed