diff NNLSChroma.cpp @ 13:9ae90fa5fa74 matthiasm-plugin

NNLS is now taken from a file without gpl. more chroma normalisation options.
author matthiasm
date Wed, 16 Jun 2010 10:16:13 +0000
parents 54f28d8ac098
children 75fb80542cd2
line wrap: on
line diff
--- a/NNLSChroma.cpp	Wed Jun 09 03:33:36 2010 +0000
+++ b/NNLSChroma.cpp	Wed Jun 16 10:16:13 2010 +0000
@@ -433,7 +433,7 @@
 {
     // Return something helpful here!
 	if (debug_on) cerr << "--> getDescription" << endl;
-    return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
+    return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
 }
 
 string
@@ -569,11 +569,13 @@
     d4.description = "How shall the chroma vector be normalized?";
     d4.unit = "";
     d4.minValue = 0;
-    d4.maxValue = 1;
+    d4.maxValue = 3;
     d4.defaultValue = 0;
     d4.isQuantized = true;
-    d4.valueNames.push_back("no normalization");
-    d4.valueNames.push_back("maximum normalization");
+    d4.valueNames.push_back("none");
+    d4.valueNames.push_back("maximum norm");
+	d4.valueNames.push_back("L1 norm");
+	d4.valueNames.push_back("L2 norm");
     d4.quantizeStep = 1.0;
     list.push_back(d4);
 
@@ -1045,8 +1047,9 @@
 		    /** Tune Log-Frequency Spectrogram
 		    calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 
 		    perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
-		    **/    
-		
+		    **/
+			cerr << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
+					
 		    float tempValue = 0;
 		    float dbThreshold = 0; // relative to the background spectrum
 		    float thresh = pow(10,dbThreshold/20);
@@ -1094,6 +1097,7 @@
 		        fsOut[2].push_back(f2);
 		        count++;
 		    }
+			cerr << "done." << endl;
 	    
 	    /** Semitone spectrum and chromagrams
 	    Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
@@ -1101,7 +1105,12 @@
 	    Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means 
 	    bass and treble stacked onto each other).
 	    **/
-	    // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
+		if (m_dictID == 1) {
+			cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
+		} else {
+			cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
+		}
+
 	    
 	    vector<vector<float> > chordogram;
 		vector<vector<int> > scoreChordogram;
@@ -1199,19 +1208,69 @@
 					}
 				}	
 			}
+			
             
 	        
-			if (m_doNormalizeChroma > 0) {
-				float chromamax = *max_element(chroma.begin(), chroma.end());
-				for (int i = 0; i < chroma.size(); i++) {
-					chroma[i] /= chromamax;
-				}
-			}
+			
 			f4.values = chroma; 
 	        f5.values = basschroma;
 	        chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 
 	        f6.values = chroma; 
 	        
+			if (m_doNormalizeChroma > 0) {
+				vector<float> chromanorm = vector<float>(3,0);			
+				switch (int(m_doNormalizeChroma)) {
+					case 0: // should never end up here
+						break;
+					case 1:
+						chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
+						chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
+						chromanorm[2] = max(chromanorm[0], chromanorm[1]);
+						break;
+					case 2:
+						for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+							chromanorm[0] += *it; 						
+						}
+						for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
+							chromanorm[1] += *it; 						
+						}
+						for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
+							chromanorm[2] += *it; 						
+						}
+						break;
+					case 3:
+						for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+							chromanorm[0] += pow(*it,2); 						
+						}
+						chromanorm[0] = sqrt(chromanorm[0]);
+						for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
+							chromanorm[1] += pow(*it,2); 						
+						}
+						chromanorm[1] = sqrt(chromanorm[1]);
+						for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
+							chromanorm[2] += pow(*it,2); 						
+						}
+						chromanorm[2] = sqrt(chromanorm[2]);
+						break;
+				}
+				if (chromanorm[0] > 0) {
+					for (int i = 0; i < f4.values.size(); i++) {
+						f4.values[i] /= chromanorm[0];
+					}
+				}
+				if (chromanorm[1] > 0) {
+					for (int i = 0; i < f5.values.size(); i++) {
+						f5.values[i] /= chromanorm[1];
+					}
+				}
+				if (chromanorm[2] > 0) {
+					for (int i = 0; i < f6.values.size(); i++) {
+						f6.values[i] /= chromanorm[2];
+					}
+				}
+				
+			}
+	
 	        // local chord estimation
 	        vector<float> currentChordSalience;
 	        float tempchordvalue = 0;
@@ -1239,12 +1298,14 @@
 	        fsOut[6].push_back(f6);
 	        count++;
 	    }
-	    cerr << "*******    NNLS done      *******" << endl;
+	    cerr << "done." << endl;
+		
 
 	    /* Simple chord estimation
 	    I just take the local chord estimates ("currentChordSalience") and average them over time, then
 	    take the maximum. Very simple, don't do this at home...
 	    */
+		cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
 	    count = 0; 
 	    int halfwindowlength = m_inputSampleRate / m_stepSize;
 	    vector<int> chordSequence;
@@ -1330,7 +1391,7 @@
 			}
 			count++;	
 	    }
-        cerr << "*******  agent finished   *******" << endl;
+        // cerr << "*******  agent finished   *******" << endl;
 		count = 0;
 	 	for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 
 			float maxval = 0; // will be the value of the most salient chord in this frame
@@ -1346,7 +1407,7 @@
 			// cerr << "before modefilter, maxindex: " << maxindex << endl;
 			count++;
 		}
-		cerr << "*******  mode filter done *******" << endl;
+		// cerr << "*******  mode filter done *******" << endl;
 
 	
 	    // mode filter on chordSequence
@@ -1389,6 +1450,7 @@
 	        }
 	        count++;
 	    }
+		cerr << "done." << endl;
 	//     // musicity
 	//     count = 0;
 	//     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2