Mercurial > hg > nnls-chroma

diff NNLSChroma.cpp @ 3:8360483a026e matthiasm-plugin
new simple chord estimation
author: matthiasm
date: Mon, 31 May 2010 14:12:37 +0000
parents: 2a491d71057d
children: 266d23a41cdc
--- a/NNLSChroma.cpp	Wed May 19 11:39:23 2010 +0000
+++ b/NNLSChroma.cpp	Mon May 31 14:12:37 2010 +0000
@@ -3,6 +3,7 @@
 #include <cmath>
 #include <list>
 #include <iostream>
+#include <fstream>
 #include <sstream>
 #include <cassert>
 #include <cstdio>
@@ -156,7 +157,7 @@
 	return true;	
 }
 
-bool dictionaryMatrix(double* dm) {
+bool dictionaryMatrix(float* dm) {
 	int binspersemitone = 3; // this must be 3
 	int minoctave = 0; // this must be 0
 	int maxoctave = 7; // this must be 7
@@ -177,27 +178,29 @@
 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
 	cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
 
-	// make out frequency vector
-	vector<float> out_f;
-
 	float curr_f;
 	float floatbin;
 	float curr_amp;
 	// now for every combination calculate the matrix element
-	unsigned countElement = 0;
 	for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
+		// cerr << iOut << endl;
 		for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
 			curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
-			if (curr_f > cq_f[nNote-1])  break;
-			floatbin = (iOut * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
+			// if (curr_f > cq_f[nNote-1])  break;
+			floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
+			// cerr << floatbin << endl;
 			curr_amp = pow(s_param,float(iHarm-1));
+			// cerr << "curramp" << curr_amp << endl;
 			for (unsigned iNote = 0; iNote < nNote; ++iNote) {
-				// cerr << dm[countElement] << endl;
-				dm[countElement] = cospuls(iNote+1.0, floatbin, binspersemitone + 0.0);
-				countElement++;
+				if (abs(iNote+1.0-floatbin)<2) {
+					dm[iNote  + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
+					// dm[iNote + nNote * iOut] += 1 * curr_amp;
+				}
 			}
-	   }
+		}
 	}
+
+
 }
 
 
@@ -213,7 +216,8 @@
   m_localTuning0(0),
   m_localTuning1(0),
   m_localTuning2(0),
-  m_paling(0),
+  m_paling(0.8),
+  m_preset(0.0),
   m_localTuning(0),
   m_kernelValue(0),
   m_kernelFftIndex(0),
@@ -223,7 +227,8 @@
   m_dictID(0)
 {
 	if (debug_on) cerr << "--> NNLSChroma" << endl;
-	m_dict = new double[nNote * 84];
+	m_dict = new float[nNote * 84];
+	for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
 	dictionaryMatrix(m_dict);
 }
 
@@ -346,7 +351,7 @@
     ParameterDescriptor d1;
     d1.identifier = "tuningmode";
     d1.name = "tuning mode";
-    d1.description = "Tuning can be performed locally or on the whole extraction area.";
+    d1.description = "Tuning can be performed locally or on the whole extraction segment.";
     d1.unit = "";
     d1.minValue = 0;
     d1.maxValue = 1;
@@ -362,22 +367,36 @@
     d2.name = "spectral paling";
     d2.description = "Spectral paling: no paling - 0; whitening - 1.";
     d2.unit = "";
-    d2.minValue = 0;
-    d2.maxValue = 1;
+	d2.isQuantized = true;
+	d2.quantizeStep = 0.1;
+    d2.minValue = 0.0;
+    d2.maxValue = 1.0;
     d2.defaultValue = 0.5;
-    d2.isQuantized = false;
-    // d1.valueNames.push_back("global tuning");
-    // d1.valueNames.push_back("local tuning");
-    // d1.quantizeStep = 0.1;
+    // d2.isQuantized = false;
     list.push_back(d2);
 
+    ParameterDescriptor d3;
+    d3.identifier = "preset";
+    d3.name = "preset";
+    d3.description = "Spectral paling: no paling - 0; whitening - 1.";
+    d3.unit = "";
+	d3.isQuantized = true;
+	d3.quantizeStep = 1;
+    d3.minValue = 0.0;
+    d3.maxValue = 2.0;
+    d3.defaultValue = 0.0;
+    d3.valueNames.push_back("polyphonic pop");
+	d3.valueNames.push_back("polyphonic pop (fast)");
+    d3.valueNames.push_back("solo keyboard");
+	d3.valueNames.push_back("manual");
+    list.push_back(d3);
     return list;
 }
 
 float
 NNLSChroma::getParameter(string identifier) const
 {
-		if (debug_on) cerr << "--> getParameter" << endl;
+	if (debug_on) cerr << "--> getParameter" << endl;
     if (identifier == "notedict") {
         return m_dictID; 
     }
@@ -393,7 +412,9 @@
             return 0.0;
         }
     }
-
+	if (identifier == "preset") {
+		return m_preset;
+    }
     return 0;
     
 }
@@ -401,7 +422,7 @@
 void
 NNLSChroma::setParameter(string identifier, float value) 
 {
-		if (debug_on) cerr << "--> setParameter" << endl;
+	if (debug_on) cerr << "--> setParameter" << endl;
     if (identifier == "notedict") {
         m_dictID = (int) value;
     }
@@ -414,6 +435,24 @@
         m_tuneLocal = (value > 0) ? true : false;
         // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
     }
+    if (identifier == "preset") {
+        m_preset = value;
+		if (m_preset == 0.0) {
+			m_tuneLocal = false;
+			m_paling = 1.0;
+			m_dictID = 0.0;
+		}
+		if (m_preset == 1.0) {
+			m_tuneLocal = false;
+			m_paling = 1.0;
+			m_dictID = 1.0;
+		}
+		if (m_preset == 2.0) {
+			m_tuneLocal = false;
+			m_paling = 0.7;
+			m_dictID = 0.0;
+		}
+    }
 }
 
 NNLSChroma::ProgramList
@@ -620,9 +659,9 @@
 		d10.minValue = 427.47;
   		d10.maxValue = 452.89;
   	    d10.isQuantized = false;
-  	    d10.sampleType = OutputDescriptor::OneSamplePerStep;
+  	    d10.sampleType = OutputDescriptor::FixedSampleRate;
   	    d10.hasDuration = false;
-  	    d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+  	    // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
   	    list.push_back(d10);
   
     return list;
@@ -666,8 +705,13 @@
 	}
 	cerr << "nonzero count : " << countNonzero << endl;
 	delete [] tempkernel;
-
-
+	ofstream myfile;
+	myfile.open ("matrix.txt");
+    // myfile << "Writing this to a file.\n";	
+	for (int i = 0; i < nNote * 84; ++i) {
+		myfile << m_dict[i] << endl;		
+	}
+    myfile.close();
     return true;
 }
 
@@ -693,7 +737,8 @@
 	float *magnitude = new float[m_blockSize/2];
 	
 	Feature f10; // local tuning
-	
+	f10.hasTimestamp = true;
+	f10.timestamp = timestamp - Vamp::RealTime::fromSeconds(0);
 	const float *fbuf = inputBuffers[0];	
 	
 	// make magnitude
@@ -730,9 +775,10 @@
         m_meanTuning0 += nm[iTone + 0]*one_over_N;
     	m_meanTuning1 += nm[iTone + 1]*one_over_N;
     	m_meanTuning2 += nm[iTone + 2]*one_over_N;
-        m_localTuning0 *= 0.99994; m_localTuning0 += nm[iTone + 0];
-        m_localTuning1 *= 0.99994; m_localTuning1 += nm[iTone + 1];
-        m_localTuning2 *= 0.99994; m_localTuning2 += nm[iTone + 2];
+		float ratioOld = 0.997;
+        m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
+        m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
+        m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
     }
 	
     // if (m_tuneLocal) {
@@ -743,6 +789,7 @@
         m_localTuning.push_back(normalisedtuning);
         float tuning440 = 440 * pow(2,normalisedtuning/12);
         f10.values.push_back(tuning440);
+		// cerr << tuning440 << endl;
     // }
     
 	Feature f1; // logfreqspec
@@ -754,7 +801,7 @@
 	
 	FeatureSet fs;
 	fs[1].push_back(f1);
-    fs[10].push_back(f10);
+    fs[8].push_back(f10);
 
     // deletes
     delete[] magnitude;
@@ -856,6 +903,7 @@
 	    // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
 	    
 	    vector<vector<float> > chordogram;
+		vector<vector<int> > scoreChordogram;
 	    vector<float> oldchroma = vector<float>(12,0);
 	    vector<float> oldbasschroma = vector<float>(12,0);
 	    count = 0;
@@ -879,11 +927,14 @@
 	        f6.hasTimestamp = true;
 	        f6.timestamp = f2.timestamp;
 	        
-			double b[256];
+			float b[256];
 	
 	        bool some_b_greater_zero = false;
+			float sumb = 0;
 	        for (int i = 0; i < 256; i++) {
-	            b[i] = f2.values[i];
+				// b[i] = m_dict[(256 * count + i) % (256 * 84)];
+				b[i] = f2.values[i];
+				sumb += b[i];
 	            if (b[i] > 0) {
 	                some_b_greater_zero = true;
 	            }            
@@ -897,12 +948,12 @@
 			unsigned iSemitone = 0;
 			
 			if (some_b_greater_zero) {
-				if (m_dictID == 0) {
+				if (m_dictID == 1) {
 					for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
 						currval = 0;
-						for (unsigned iBin = 0; iBin < 3; ++iBin) {
-							currval += b[iNote + iBin];						
-						}
+						currval += b[iNote + 1 + -1] * 0.5;						
+						currval += b[iNote + 1 +  0] * 1.0;						
+						currval += b[iNote + 1 +  1] * 0.5;						
 						f3.values.push_back(currval);
 						chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
 						basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
@@ -910,14 +961,31 @@
 					}
 		        
 				} else {
-					double x[84+1] = {1.0};
-				    double rnorm;
-				    double w[84+1];
-				    double zz[84+1];
-				    int indx[84+2];
+					float x[84+1000];
+					for (int i = 1; i < 1084; ++i) x[i] = 1.0;
+					// for (int i = 0; i < 84; ++i) {
+					// 	x[i] = b[3*i+3];
+					// }
+				    float rnorm;
+				    float w[84+1000];
+				    float zz[84+1000];
+				    int indx[84+1000];
 				    int mode;
-				  
-					nnls(m_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
+					float curr_dict[256*84];
+					for (unsigned i = 0; i < 256 * 84; ++i) {
+						curr_dict[i] = 1.0 * m_dict[i];
+					}
+					nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
+					for (unsigned iNote = 0; iNote < 84; ++iNote) {
+						// for	(unsigned kNote = 0; kNote < 256; ++kNote) {
+						// 						x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
+						// 					}					
+						f3.values.push_back(x[iNote]);
+						// cerr << mode << endl;
+						chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
+						basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
+						// iSemitone++;
+					}
 				}	
 			}
 	
@@ -951,86 +1019,131 @@
 	        fsOut[4].push_back(f4);
 	        fsOut[5].push_back(f5);
 	        fsOut[6].push_back(f6);
-	// if (x) free(x);
-	        // delete[] b;
 	        count++;
 	    }
-	//     // cerr << m_stepSize << endl<< endl;
-	//     count = 0;
-	//     int kernelwidth = (49 * 2048) / m_stepSize;
-	//     int nChord = nChorddict / 24;
 	//     int musicitykernelwidth = (50 * 2048) / m_stepSize;
 	//     
-	//     /* Simple chord estimation
-	//     I just take the local chord estimates ("currentChordSalience") and average them over time, then
-	//     take the maximum. Very simple, don't do this at home...
-	//     */
-	//     vector<int> chordSequence;
-	//     for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
-	//     
-	//         int startIndex = max(count - kernelwidth/2 + 1,0);
-	//         int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1 + 1);
-	//         vector<float> temp = vector<float>(nChord,0);
-	//         for (int iChord = 0; iChord < nChord; iChord++) {
-	//             float val = 0;
-	//             for (int i = startIndex; i < endIndex; i++) {
-	//                 val += chordogram[i][iChord] * 
-	//                     (kernelwidth - abs(i - startIndex - kernelwidth * 0.5)); // weigthed sum (triangular window)
-	//             }
-	//             temp[iChord] = val; // sum
-	//         }
-	//         
-	//         // get maximum for "chord estimate"
-	//         
-	//         float bestChordValue = 0;
-	//         int bestChordIndex = nChord-1; // "no chord" is default
-	//         for (int iChord = 0; iChord < nChord; iChord++) {
-	//             if (temp[iChord] > bestChordValue) {
-	//                 bestChordValue = temp[iChord];
-	//                 bestChordIndex = iChord;
-	//             }
-	//         }
-	//         // cerr << bestChordIndex << endl;
-	//         chordSequence.push_back(bestChordIndex);
-	//         count++;
-	//     }
-	//     // mode filter on chordSequence
-	//     count = 0;
-	//     int oldChordIndex = -1;
-	//     for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
-	//         Feature f6 = *it;
-	//         Feature f7; // chord estimate
-	//         
-	//         f7.hasTimestamp = true;
-	//         f7.timestamp = f6.timestamp;
-	//         
-	//         vector<int> chordCount = vector<int>(121,0);
-	//         
-	//         int maxChordCount = 0;
-	//         int maxChordIndex = 120;
-	//         int startIndex = max(count - kernelwidth/2,0);
-	//         int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1);
-	//         for (int i = startIndex; i < endIndex; i++) {
-	//             chordCount[chordSequence[i]]++;
-	//             if (chordCount[chordSequence[i]] > maxChordCount) {
-	//                 maxChordCount++;
-	//                 maxChordIndex = chordSequence[i];
-	//             }
-	//         }
-	//         if (oldChordIndex != maxChordIndex) {
-	//             oldChordIndex = maxChordIndex;
-	// 
-	//             char buffer1 [50];
-	//             if (maxChordIndex < nChord - 1) {
-	//                 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
-	//             } else {
-	//                 sprintf(buffer1, "N");
-	//             }
-	//             f7.label = buffer1;
-	//             fsOut[7].push_back(f7);
-	//         }
-	//         count++;
-	//     }
+	    /* Simple chord estimation
+	    I just take the local chord estimates ("currentChordSalience") and average them over time, then
+	    take the maximum. Very simple, don't do this at home...
+	    */
+	    count = 0; 
+	    int halfwindowlength = m_inputSampleRate / m_stepSize;
+	    int nChord = nChorddict / 24;
+	    vector<int> chordSequence;
+  	 	for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
+			vector<int> temp = vector<int>(nChord,0);
+			scoreChordogram.push_back(temp);
+		}
+	    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end()-2*halfwindowlength-1; ++it) {		
+			int startIndex = count + 1;
+			int endIndex = count + 2 * halfwindowlength;
+	        vector<float> temp = vector<float>(nChord,0);
+			float maxval = 0; // will be the value of the most salient chord in this frame
+			float maxindex = nChord-1; //... and the index thereof
+			unsigned bestchordL = 0; // index of the best "left" chord
+ 	 		unsigned bestchordR = 0; // index of the best "right" chord
+			for (unsigned iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
+				// now find the max values on both sides of iWF
+				// left side:
+				float maxL = 0;
+				unsigned maxindL = nChord-1;
+				for (unsigned iChord = 0; iChord < nChord; iChord++) {
+					float currsum = 0;
+					for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
+						currsum += chordogram[count+iFrame][iChord];
+					}
+					if (iChord == nChord-1) currsum *= 0.8;
+					if (currsum > maxL) {
+						maxL = currsum;
+						maxindL = iChord;
+					}
+				}				
+				// right side:
+				float maxR = 0;
+				unsigned maxindR = nChord-1;
+				for (unsigned iChord = 0; iChord < nChord; iChord++) {
+					float currsum = 0;
+					for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
+						currsum += chordogram[count+iFrame][iChord];
+					}
+					if (iChord == nChord-1) currsum *= 0.8;
+					if (currsum > maxR) {
+						maxR = currsum;
+						maxindR = iChord;
+					}
+				}
+				if (maxL+maxR > maxval) {					
+					maxval = maxL+maxR;
+					maxindex = iWF;
+					bestchordL = maxindL;
+					bestchordR = maxindR;
+				}
+				
+			}
+			// cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
+			// add a score to every chord-frame-point that was part of a maximum 
+			for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
+				scoreChordogram[iFrame+count][bestchordL]++;
+			}
+			for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
+				scoreChordogram[iFrame+count][bestchordR]++;
+			}
+			count++;	
+	    }
+
+		count = 0;
+	 	for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 
+			float maxval = 0; // will be the value of the most salient chord in this frame
+			float maxindex = 0; //... and the index thereof
+			for (unsigned iChord = 0; iChord < nChord; iChord++) {
+				if (scoreChordogram[count][iChord] > maxval) {
+					maxval = scoreChordogram[count][iChord];
+					maxindex = iChord;
+					cerr << iChord << endl;
+				}
+			}
+			chordSequence.push_back(maxindex);
+			cerr << "before modefilter, maxindex: " << maxindex << endl;
+			count++;
+		}
+	
+	
+	    // mode filter on chordSequence
+	    count = 0;
+	    int oldChordIndex = -1;
+	    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
+			Feature f6 = *it;
+			Feature f7; // chord estimate
+			f7.hasTimestamp = true;
+			f7.timestamp = f6.timestamp;
+			vector<int> chordCount = vector<int>(nChord,0);
+	        int maxChordCount = 0;
+	        int maxChordIndex = nChord-1;
+	        // int startIndex = max(count - halfwindowlength,0);
+	        // int endIndex = min(int(chordogram.size()), startIndex + halfwindowlength);
+	        // for (int i = startIndex; i < endIndex; i++) {
+	        //     chordCount[chordSequence[i]]++;
+	        //     if (chordCount[chordSequence[i]] > maxChordCount) {
+	        //         maxChordCount++;
+	        //         maxChordIndex = chordSequence[i];
+	        //     }
+	        // }
+			maxChordIndex = chordSequence[count];
+	        if (oldChordIndex != maxChordIndex) {
+	            oldChordIndex = maxChordIndex;
+	
+	            char buffer1 [50];
+	            if (maxChordIndex < nChord - 1) {
+	                sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
+	            } else {
+	                sprintf(buffer1, "N");
+	            }
+	            f7.label = buffer1;
+	            fsOut[7].push_back(f7);
+	        }
+	        count++;
+	    }
 	//     // musicity
 	//     count = 0;
 	//     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
author	matthiasm
date	Mon, 31 May 2010 14:12:37 +0000
parents	2a491d71057d
children	266d23a41cdc