changeset 41:d6bb9b43ac1c matthiasm-plugin

README and new parameters, not tested yet
author Matthias Mauch <mail@matthiasmauch.net>
date Fri, 22 Oct 2010 21:43:57 +0900
parents 61d35e59ee2b
children d01f94d58ef0
files Chordino.cpp NNLSBase.cpp NNLSBase.h NNLSChroma.cpp README chromamethods.cpp chromamethods.h
diffstat 7 files changed, 89 insertions(+), 72 deletions(-) [+]
line wrap: on
line diff
--- a/Chordino.cpp	Fri Oct 22 11:58:16 2010 +0100
+++ b/Chordino.cpp	Fri Oct 22 21:43:57 2010 +0900
@@ -204,9 +204,9 @@
             runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
             if (runningstd[i] > 0) {
                 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 
-                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
                 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
-                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
             }
             if (f2.values[i] < 0) {
                 cerr << "ERROR: negative value in logfreq spectrum" << endl;
--- a/NNLSBase.cpp	Fri Oct 22 11:58:16 2010 +0100
+++ b/NNLSBase.cpp	Fri Oct 22 21:43:57 2010 +0900
@@ -42,7 +42,7 @@
     m_localTuning0(0),
     m_localTuning1(0),
     m_localTuning2(0),
-    m_paling(1.0),
+    m_whitening(1.0),
     m_preset(0.0),
     m_localTuning(0),
     m_kernelValue(0),
@@ -54,14 +54,15 @@
     m_chorddict(0),
     m_chordnames(0),
     m_doNormalizeChroma(0),
-    m_rollon(0.01)
+    m_rollon(0.0),
+	m_s(0.7)
 {
     if (debug_on) cerr << "--> NNLSBase" << endl;
 
     // make the *note* dictionary matrix
     m_dict = new float[nNote * 84];
     for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
-    dictionaryMatrix(m_dict);
+    dictionaryMatrix(m_dict, 0.7);
 	
     // get the *chord* dictionary from file (if the file exists)
     m_chordnames = chordDictionary(&m_chorddict);
@@ -145,46 +146,16 @@
     if (debug_on) cerr << "--> getParameterDescriptors" << endl;
     ParameterList list;
 
-    ParameterDescriptor d3;
-    d3.identifier = "preset";
-    d3.name = "preset";
-    d3.description = "Spectral paling: no paling - 0; whitening - 1.";
-    d3.unit = "";
-    d3.isQuantized = true;
-    d3.quantizeStep = 1;
-    d3.minValue = 0.0;
-    d3.maxValue = 3.0;
-    d3.defaultValue = 0.0;
-    d3.valueNames.push_back("polyphonic pop");
-    d3.valueNames.push_back("polyphonic pop (fast)");
-    d3.valueNames.push_back("solo keyboard");
-    d3.valueNames.push_back("manual");
-    list.push_back(d3);
-
-    ParameterDescriptor d5;
-    d5.identifier = "rollon";
-    d5.name = "spectral roll-on";
-    d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
-    d5.unit = "";
-    d5.minValue = 0;
-    d5.maxValue = 1;
-    d5.defaultValue = 0;
-    d5.isQuantized = false;
-    list.push_back(d5);
-
-    // ParameterDescriptor d0;
-    //  d0.identifier = "notedict";
-    //  d0.name = "note dictionary";
-    //  d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
-    //  d0.unit = "";
-    //  d0.minValue = 0;
-    //  d0.maxValue = 1;
-    //  d0.defaultValue = 0;
-    //  d0.isQuantized = true;
-    //  d0.valueNames.push_back("s = 0.6");
-    //  d0.valueNames.push_back("no NNLS");
-    //  d0.quantizeStep = 1.0;
-    //  list.push_back(d0);
+    ParameterDescriptor d0;
+    d0.identifier = "rollon";
+    d0.name = "spectral roll-on";
+    d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
+    d0.unit = "";
+    d0.minValue = 0;
+    d0.maxValue = 0.05;
+    d0.defaultValue = 0;
+    d0.isQuantized = false;
+    list.push_back(d0);
 
     ParameterDescriptor d1;
     d1.identifier = "tuningmode";
@@ -200,18 +171,29 @@
     d1.quantizeStep = 1.0;
     list.push_back(d1);
 
-    //     ParameterDescriptor d2;
-    //     d2.identifier = "paling";
-    //     d2.name = "spectral paling";
-    //     d2.description = "Spectral paling: no paling - 0; whitening - 1.";
-    //     d2.unit = "";
-    // d2.isQuantized = true;
-    // // d2.quantizeStep = 0.1;
-    //     d2.minValue = 0.0;
-    //     d2.maxValue = 1.0;
-    //     d2.defaultValue = 1.0;
-    //     d2.isQuantized = false;
-    //     list.push_back(d2);
+    ParameterDescriptor d2;
+    d2.identifier = "whitening";
+    d2.name = "spectral whitening";
+    d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
+    d2.unit = "";
+    d2.isQuantized = true;
+    d2.minValue = 0.0;
+    d2.maxValue = 1.0;
+    d2.defaultValue = 1.0;
+    d2.isQuantized = false;
+    list.push_back(d2);
+
+    ParameterDescriptor d3;
+    d3.identifier = "s";
+    d3.name = "spectral shape";
+    d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
+    d3.unit = "";
+    d3.minValue = 0.5;
+    d3.maxValue = 0.9;
+    d3.defaultValue = 0.7;
+    d3.isQuantized = false;
+    list.push_back(d3);
+
     ParameterDescriptor d4;
     d4.identifier = "chromanormalize";
     d4.name = "chroma normalization";
@@ -239,8 +221,12 @@
         return m_dictID; 
     }
     
-    if (identifier == "paling") {
-        return m_paling; 
+    if (identifier == "whitening") {
+        return m_whitening; 
+    }
+
+    if (identifier == "s") {
+        return m_s; 
     }
 
     if (identifier == "rollon") {
@@ -272,10 +258,14 @@
         m_dictID = (int) value;
     }
     
-    if (identifier == "paling") {
-        m_paling = value;
+    if (identifier == "whitening") {
+        m_whitening = value;
     }
     
+    if (identifier == "s") {
+        m_s = value;
+    }
+
     if (identifier == "tuningmode") {
         m_tuneLocal = (value > 0) ? true : false;
         // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
@@ -284,17 +274,17 @@
         m_preset = value;
         if (m_preset == 0.0) {
             m_tuneLocal = false;
-            m_paling = 1.0;
+            m_whitening = 1.0;
             m_dictID = 0.0;
         }
         if (m_preset == 1.0) {
             m_tuneLocal = false;
-            m_paling = 1.0;
+            m_whitening = 1.0;
             m_dictID = 1.0;
         }
         if (m_preset == 2.0) {
             m_tuneLocal = false;
-            m_paling = 0.7;
+            m_whitening = 0.7;
             m_dictID = 0.0;
         }
     }
@@ -565,9 +555,9 @@
             runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
             if (runningstd[i] > 0) {
                 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 
-                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
                 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
-                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
             }
             if (f2.values[i] < 0) {
                 cerr << "ERROR: negative value in logfreq spectrum" << endl;
--- a/NNLSBase.h	Fri Oct 22 11:58:16 2010 +0100
+++ b/NNLSBase.h	Fri Oct 22 21:43:57 2010 +0900
@@ -66,8 +66,9 @@
     float m_localTuning0;
     float m_localTuning1;
     float m_localTuning2;
-    float m_paling;
+    float m_whitening;
     float m_preset;
+	float m_s;
     vector<float> m_localTuning;
     vector<float> m_kernelValue;
     vector<int> m_kernelFftIndex;
--- a/NNLSChroma.cpp	Fri Oct 22 11:58:16 2010 +0100
+++ b/NNLSChroma.cpp	Fri Oct 22 21:43:57 2010 +0900
@@ -276,9 +276,9 @@
             runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
             if (runningstd[i] > 0) {
                 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 
-                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
                 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
-                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
             }
             if (f2.values[i] < 0) {
                 cerr << "ERROR: negative value in logfreq spectrum" << endl;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Fri Oct 22 21:43:57 2010 +0900
@@ -0,0 +1,26 @@
+## NNLS Chroma ##
+
+System identifier – vamp:matthiasm:nnls_chroma
+RDF URI – http://vamp-plugins.org/rdf/plugins/matthiasm#nnls_chroma (not yet available)
+
+### General Description ###
+
+NNLS Chroma analyses a single channel of audio using frame-wise spectral input from the Vamp host. The plugin was originally developed to extract treble and bass chromagrams for subsequent use in chord extraction methods. The spectrum is transformed to a log-frequency spectrum (constant-Q) with three bins per semitone. On this representation, two processing steps are performed:
+* tuning, after which each centre bin (i.e. bin 2, 5, 8, ...) corresponds to a semitone, even if the tuning of the piece deviates from 440 Hz standard pitch.
+* running standardisation: subtraction of the running mean, division by the running standard deviation. This has a spectral whitening effect.
+
+The processed log-frequency spectrum is then used as an input for NNLS approximate transcription (using a dictionary of harmonic notes with geometrically decaying harmonics magnitudes). The output of the NNLS approximate transcription is semitone-spaced. To get the chroma, this semitone spectrum is multiplied (element-wise) with the desired profile (chroma or bass chroma) and then mapped to 12 bins. The resulting chroma frames can be normalised by (dividing by) their norm (L1, L2 and maximum norm available).
+
+### Parameters ###
+
+The default settings (in brackets, below) are those used for Matthias Mauch's 2010 MIREX submissions.
+
+* spectral roll on (0.00 -- 0.05; default: 0.0): consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.
+* tuning mode (global or local; default: global): local uses a local average for tuning, global uses ... exactly.
+* spectral whitening (0.0 -- 1.0; default: 1.0): determines how much the log-frequency spectrum is whitened. A value of 0.0 means no whitening. For values other than 0.0 the log-freq spectral bins are divided by  [standard deviation of their neighbours]^[spectral whitening], where "^" means "to the power of".
+* spectral shape (0.5 -- 0.9; default: 0.7): the shape of the notes in the NNLS dictionary. Their harmonic amplitude follows a geometrically decreasing pattern, in which the i-th harmonic has an amplitude of [spectral shape]^[i-1], where "^" means "to the power of".
+* chroma normalisation (none, maximum norm, L1 norm, L2 norm; default: none): determines whether or how the chromagrams are normalised. If the setting is not 'none', then each chroma frame separately is divided by the chosen vector norm. Note that normalisation implies that the joint 24-dim. "Chroma and Bass Chromagram" output will be different from the individual 12-dim. "Chromagram" and "Bass Chromagram" outputs.
+
+### Outputs ###
+
+### References and Credits ###
\ No newline at end of file
--- a/chromamethods.cpp	Fri Oct 22 11:58:16 2010 +0100
+++ b/chromamethods.cpp	Fri Oct 22 21:43:57 2010 +0900
@@ -170,11 +170,11 @@
     return true;	
 }
 
-void dictionaryMatrix(float* dm) {
+void dictionaryMatrix(float* dm, float s_param) {
     int binspersemitone = 3; // this must be 3
     int minoctave = 0; // this must be 0
     int maxoctave = 7; // this must be 7
-    float s_param = 0.7;
+    // float s_param = 0.7;
 	
     // pitch-spaced frequency vector
     int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
--- a/chromamethods.h	Fri Oct 22 11:58:16 2010 +0100
+++ b/chromamethods.h	Fri Oct 22 21:43:57 2010 +0900
@@ -25,7 +25,7 @@
 const int nNote = 256;
 
 extern std::vector<float> SpecialConvolution(std::vector<float> convolvee, std::vector<float> kernel);
-extern void dictionaryMatrix(float* dm);
+extern void dictionaryMatrix(float* dm, float s_param);
 extern std::vector<std::string> chordDictionary(std::vector<float> *mchorddict);
 extern bool logFreqMatrix(int fs, int blocksize, float *outmatrix);