changeset 48:6e76c7710fa1 matthiasm-plugin

removed subtraction in chroma dictionary, added to-the-power-of-1.5 in chordino
author matthiasm
date Mon, 25 Oct 2010 16:58:32 +0900
parents ea35e6e804f1
children f29747403cb1 b6cddb109482
files Chordino.cpp NNLSBase.cpp README chord.dict chromamethods.cpp viterbi.o
diffstat 6 files changed, 60 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/Chordino.cpp	Mon Oct 25 02:33:47 2010 +0900
+++ b/Chordino.cpp	Mon Oct 25 16:58:32 2010 +0900
@@ -367,7 +367,9 @@
             for (int iBin = 12; iBin < 24; iBin++) {
                 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
             }
-            if (tempchordvalue < 0) tempchordvalue = 0;
+            if (iChord == nChord-1) tempchordvalue *= .7;
+            if (tempchordvalue < 0) tempchordvalue = 0.0;
+            tempchordvalue = pow(1.5,tempchordvalue);
             sumchordvalue+=tempchordvalue;
             currentChordSalience.push_back(tempchordvalue);
         }
@@ -389,9 +391,11 @@
 	if (m_useHMM) {
         cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
         int oldchord = nChord-1;
-        double selftransprob = 0.9;
+        double selftransprob = 0.99;
 	    
-        vector<double> init = vector<double>(nChord,1.0/nChord);
+        // vector<double> init = vector<double>(nChord,1.0/nChord);
+        vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
+        
         vector<vector<double> > trans;
         for (int iChord = 0; iChord < nChord; iChord++) {
             vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));            
@@ -399,10 +403,17 @@
             trans.push_back(temp);
         }
         vector<int> chordpath = ViterbiPath(init,trans,chordogram);
+
+
+        Feature chord_feature; // chord estimate
+        chord_feature.hasTimestamp = true;
+        chord_feature.timestamp = timestamps[0];
+        chord_feature.label = m_chordnames[chordpath[0]];
+        fsOut[0].push_back(chord_feature);
         
         for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) {
             // cerr << chordpath[iFrame] << endl;
-            if (chordpath[iFrame] != oldchord) {
+            if (chordpath[iFrame] != oldchord ) {
                 Feature chord_feature; // chord estimate
                 chord_feature.hasTimestamp = true;
                 chord_feature.timestamp = timestamps[iFrame];
--- a/NNLSBase.cpp	Mon Oct 25 02:33:47 2010 +0900
+++ b/NNLSBase.cpp	Mon Oct 25 16:58:32 2010 +0900
@@ -166,7 +166,8 @@
     d0.minValue = 0;
     d0.maxValue = 0.05;
     d0.defaultValue = 0;
-    d0.isQuantized = false;
+    d0.isQuantized = true;
+	d0.quantizeStep = 0.005;
     list.push_back(d0);
 
     ParameterDescriptor d1;
--- a/README	Mon Oct 25 02:33:47 2010 +0900
+++ b/README	Mon Oct 25 16:58:32 2010 +0900
@@ -1,7 +1,7 @@
 ## NNLS Chroma ##
 
-System identifier – vamp:nnls-chroma:nnls_chroma
-RDF URI – http://vamp-plugins.org/rdf/plugins/matthiasm#nnls_chroma (not yet available)
+System identifier – vamp:nnls-chroma:nnls-chroma
+RDF URI – http://vamp-plugins.org/rdf/plugins/nnls-chroma#nnls-chroma (not yet available)
 
 ### General Description ###
 
@@ -14,6 +14,7 @@
 ### Parameters ###
 
 The default settings (in brackets, below) are those used for Matthias Mauch's 2010 MIREX submissions.
+
 * use approximate transcription (NNLS) (on or off; default: on): toggle between NNLS approximate transcription and linear spectral mapping.
 * spectral roll on (0.00 -- 0.05; default: 0.0): consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.
 * tuning mode (global or local; default: global): local uses a local average for tuning, global uses ... exactly.
@@ -23,4 +24,36 @@
 
 ### Outputs ###
 
-### References and Credits ###
\ No newline at end of file
+* Log-frequency Spectrum: a spectrum similar to the well-known constant Q spectrum, in which bins are linear in log-frequency. Three bins per semitone.
+* Tuned Log-frequency Spectrum: has the same format as Log-frequency Spectrum, but has been processed by the following processes: tuning, subtraction of background spectrum, spectral whitening.
+* Semitone Spectrum: a spectral representation with one bin per semitone. If NNLS is selected in the parameters, this is the note activation, otherwise just a linear mapping to semitones.
+* Bass Chromagram: a 12-dimensional chromagram, restricted to the bass range. At each frame the Semitone Spectrum is multiplied by a bass pattern and then mapped to the 12 chroma bins. 
+* Chromagram: a 12-dimensional chromagram, restricted with mid-range emphasis. At each frame the Semitone Spectrum is multiplied by a mid-range pattern and then mapped to the 12 chroma bins.
+* Chromagram and Bass Chromagram: a 24-dimensional chromagram, consisting of the both Bass Chromgram and Chromagram, see above. When normalisation is used, this representation will however be scaled differently, and hence be different from the individual chromagrams.
+
+### References and Credits ###
+
+## Chordino ##
+
+System identifier – vamp:nnls-chroma:chordino
+RDF URI – http://vamp-plugins.org/rdf/plugins/nnls-chroma#chordino (not yet available)
+
+### General Description ###
+
+Chordino provides a simple chord transcription based on NNLS Chroma (described above). 
+
+### Parameters ###
+
+The default settings (in brackets, below) are those used for Matthias Mauch's 2010 MIREX submissions.
+
+* use approximate transcription (NNLS) (on or off; default: on): toggle between NNLS approximate transcription and linear spectral mapping.
+* spectral roll on (0.00 -- 0.05; default: 0.0): consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.
+* tuning mode (global or local; default: global): local uses a local average for tuning, global uses ... exactly.
+* spectral whitening (0.0 -- 1.0; default: 1.0): determines how much the log-frequency spectrum is whitened. A value of 0.0 means no whitening. For values other than 0.0 the log-freq spectral bins are divided by  [standard deviation of their neighbours]^[spectral whitening], where "^" means "to the power of".
+* spectral shape (0.5 -- 0.9; default: 0.7): the shape of the notes in the NNLS dictionary. Their harmonic amplitude follows a geometrically decreasing pattern, in which the i-th harmonic has an amplitude of [spectral shape]^[i-1], where "^" means "to the power of".
+* chroma normalisation (none, maximum norm, L1 norm, L2 norm; default: none): determines whether or how the chromagrams are normalised. If the setting is not 'none', then each chroma frame separately is divided by the chosen vector norm. Note that normalisation implies that the joint 24-dim. "Chroma and Bass Chromagram" output will be different from the individual 12-dim. "Chromagram" and "Bass Chromagram" outputs.
+
+### Outputs ###
+
+### References and Credits ###
+
--- a/chord.dict	Mon Oct 25 02:33:47 2010 +0900
+++ b/chord.dict	Mon Oct 25 16:58:32 2010 +0900
@@ -16,13 +16,15 @@
 7=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0
 maj7=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1
 m7=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0
+m6=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0
 =0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
 =0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
 dim=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0
 aug=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0
 =0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
 =0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0 
-7=0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0
+7=0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0
+# sus4=1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0
 
 ### Rock'n'Roll
 # :1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
--- a/chromamethods.cpp	Mon Oct 25 02:33:47 2010 +0900
+++ b/chromamethods.cpp	Mon Oct 25 16:58:32 2010 +0900
@@ -378,6 +378,7 @@
         //      
         // }
         
+        float exponent = 2.0;
         
         for (int iChord = 0; iChord < loadedChordDict.size()/24; iChord++) {
             float sum = 0;
@@ -386,10 +387,10 @@
                 sum += loadedChordDict[24 * iChord + iST];
             }
             for (int iST = 0; iST < 24; ++iST) {
-                loadedChordDict[24 * iChord + iST] -= sum/24;
-                stand += pow(loadedChordDict[24 * iChord + iST],2)/24;
+                // loadedChordDict[24 * iChord + iST] -= sum/24;
+                stand += pow(abs(loadedChordDict[24 * iChord + iST]),exponent)/24;
             }
-            stand = sqrt(stand);
+            stand = pow(stand,(float)1.0/exponent);
             for (int iST = 0; iST < 24; ++iST) {
                 loadedChordDict[24 * iChord + iST] /= stand;            
             }
Binary file viterbi.o has changed