Mercurial > hg > nnls-chroma

diff NNLSBase.cpp @ 81:4270f3039ab0 matthiasm-plugin
dont remember, sorry
author: Matthias Mauch <mail@matthiasmauch.net>
date: Mon, 15 Nov 2010 11:01:36 +0900
parents: 026a5c0ee2c2
children: e5c16976513d
--- a/NNLSBase.cpp	Thu Nov 11 15:11:05 2010 +0900
+++ b/NNLSBase.cpp	Mon Nov 15 11:01:36 2010 +0900
@@ -529,526 +529,526 @@
 NNLSBase::FeatureSet
 NNLSBase::getRemainingFeatures()
 {
-    if (debug_on) cerr << "--> getRemainingFeatures" << endl;
-    FeatureSet fsOut;
-    if (m_logSpectrum.size() == 0) return fsOut;
-    int nChord = m_chordnames.size();
+    // if (debug_on) cerr << "--> getRemainingFeatures" << endl;
+       FeatureSet fsOut;
+    //    if (m_logSpectrum.size() == 0) return fsOut;
+    //    int nChord = m_chordnames.size();
+    //    // 
+    //    /**  Calculate Tuning
+    //         calculate tuning from (using the angle of the complex number defined by the 
+    //         cumulative mean real and imag values)
+    //    **/
+    //    float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
+    //    float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
+    //    float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
+    //    float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
+    //    int intShift = floor(normalisedtuning * 3);
+    //    float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
+    //          
+    //    char buffer0 [50];
+    //      
+    //    sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
+    //          
+    //    // cerr << "normalisedtuning: " << normalisedtuning << '\n';
+    //          
+    //    // push tuning to FeatureSet fsOut
+    //    Feature f0; // tuning
+    //    f0.hasTimestamp = true;
+    //    f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
+    //    f0.label = buffer0;
+    //    fsOut[0].push_back(f0);  
+    //          
+    //    /** Tune Log-Frequency Spectrogram
+    //        calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 
+    //        perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
+    //    **/
+    //    cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
+    //                  
+    //    float tempValue = 0;
+    //    float dbThreshold = 0; // relative to the background spectrum
+    //    float thresh = pow(10,dbThreshold/20);
+    //    // cerr << "tune local ? " << m_tuneLocal << endl;
+    //    int count = 0;
+    //      
+    //    for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
+    //        Feature f1 = *i;
+    //        Feature f2; // tuned log-frequency spectrum
+    //        f2.hasTimestamp = true;
+    //        f2.timestamp = f1.timestamp;
+    //        f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
+    //      
+    //        if (m_tuneLocal == 1.0) {
+    //            intShift = floor(m_localTuning[count] * 3);
+    //            floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
+    //        }
+    //              
+    //        // cerr << intShift << " " << floatShift << endl;
+    //              
+    //        for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
+    //            tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
+    //            f2.values.push_back(tempValue);
+    //        }
+    //              
+    //        f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
+    //        vector<float> runningmean = SpecialConvolution(f2.values,hw);
+    //        vector<float> runningstd;
+    //        for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
+    //            runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
+    //        }
+    //        runningstd = SpecialConvolution(runningstd,hw); // second step convolve
+    //        for (int i = 0; i < nNote; i++) { 
+    //            runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
+    //            if (runningstd[i] > 0) {
+    //                // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 
+    //                //                            (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
+    //                f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
+    //                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
+    //            }
+    //            if (f2.values[i] < 0) {
+    //                cerr << "ERROR: negative value in logfreq spectrum" << endl;
+    //            }
+    //        }
+    //        fsOut[2].push_back(f2);
+    //        count++;
+    //    }
+    //    cerr << "done." << endl;
+    //      
+    //    /** Semitone spectrum and chromagrams
+    //        Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
+    //        is inferred using a non-negative least squares algorithm.
+    //        Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means 
+    //        bass and treble stacked onto each other).
+    //    **/
+    //    if (m_useNNLS == 0) {
+    //        cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
+    //    } else {
+    //        cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
+    //    }
     // 
-    /**  Calculate Tuning
-         calculate tuning from (using the angle of the complex number defined by the 
-         cumulative mean real and imag values)
-    **/
-    float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
-    float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
-    float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
-    float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
-    int intShift = floor(normalisedtuning * 3);
-    float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
-		    
-    char buffer0 [50];
-		
-    sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
-		    
-    // cerr << "normalisedtuning: " << normalisedtuning << '\n';
-		    
-    // push tuning to FeatureSet fsOut
-    Feature f0; // tuning
-    f0.hasTimestamp = true;
-    f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
-    f0.label = buffer0;
-    fsOut[0].push_back(f0);  
-		    
-    /** Tune Log-Frequency Spectrogram
-        calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 
-        perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
-    **/
-    cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
-					
-    float tempValue = 0;
-    float dbThreshold = 0; // relative to the background spectrum
-    float thresh = pow(10,dbThreshold/20);
-    // cerr << "tune local ? " << m_tuneLocal << endl;
-    int count = 0;
-		
-    for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
-        Feature f1 = *i;
-        Feature f2; // tuned log-frequency spectrum
-        f2.hasTimestamp = true;
-        f2.timestamp = f1.timestamp;
-        f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
-		
-        if (m_tuneLocal == 1.0) {
-            intShift = floor(m_localTuning[count] * 3);
-            floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
-        }
-		        
-        // cerr << intShift << " " << floatShift << endl;
-		        
-        for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
-            tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
-            f2.values.push_back(tempValue);
-        }
-		        
-        f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
-        vector<float> runningmean = SpecialConvolution(f2.values,hw);
-        vector<float> runningstd;
-        for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
-            runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
-        }
-        runningstd = SpecialConvolution(runningstd,hw); // second step convolve
-        for (int i = 0; i < nNote; i++) { 
-            runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
-            if (runningstd[i] > 0) {
-                // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 
-                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
-                f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
-                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
-            }
-            if (f2.values[i] < 0) {
-                cerr << "ERROR: negative value in logfreq spectrum" << endl;
-            }
-        }
-        fsOut[2].push_back(f2);
-        count++;
-    }
-    cerr << "done." << endl;
-	    
-    /** Semitone spectrum and chromagrams
-        Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
-        is inferred using a non-negative least squares algorithm.
-        Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means 
-        bass and treble stacked onto each other).
-    **/
-    if (m_useNNLS == 0) {
-        cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
-    } else {
-        cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
-    }
-
-	    
-    vector<vector<float> > chordogram;
-    vector<vector<int> > scoreChordogram;
-    vector<float> chordchange = vector<float>(fsOut[2].size(),0);
-    vector<float> oldchroma = vector<float>(12,0);
-    vector<float> oldbasschroma = vector<float>(12,0);
-    count = 0;
-
-    for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
-        Feature f2 = *it; // logfreq spectrum
-        Feature f3; // semitone spectrum
-        Feature f4; // treble chromagram
-        Feature f5; // bass chromagram
-        Feature f6; // treble and bass chromagram
-	
-        f3.hasTimestamp = true;
-        f3.timestamp = f2.timestamp;
-	        
-        f4.hasTimestamp = true;
-        f4.timestamp = f2.timestamp;
-	        
-        f5.hasTimestamp = true;
-        f5.timestamp = f2.timestamp;
-	        
-        f6.hasTimestamp = true;
-        f6.timestamp = f2.timestamp;
-	        
-			float b[nNote];
-	
-        bool some_b_greater_zero = false;
-        float sumb = 0;
-        for (int i = 0; i < nNote; i++) {
-            // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
-            b[i] = f2.values[i];
-            sumb += b[i];
-            if (b[i] > 0) {
-                some_b_greater_zero = true;
-            }            
-        }
-	    
-        // here's where the non-negative least squares algorithm calculates the note activation x
-	
-        vector<float> chroma = vector<float>(12, 0);
-        vector<float> basschroma = vector<float>(12, 0);
-        float currval;
-        unsigned iSemitone = 0;
-			
-        if (some_b_greater_zero) {
-            if (m_useNNLS == 0) {
-                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
-                    currval = 0;
-                    currval += b[iNote + 1 + -1] * 0.5;						
-                    currval += b[iNote + 1 +  0] * 1.0;						
-                    currval += b[iNote + 1 +  1] * 0.5;						
-                    f3.values.push_back(currval);
-                    chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
-                    basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
-                    iSemitone++;
-                }
-		        
-            } else {
-					float x[84+1000];
-                for (int i = 1; i < 1084; ++i) x[i] = 1.0;
-                vector<int> signifIndex;
-                int index=0;
-                sumb /= 84.0;
-                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
-                    float currval = 0;
-                    currval += b[iNote + 1 + -1];						
-                    currval += b[iNote + 1 +  0];						
-                    currval += b[iNote + 1 +  1];
-                    if (currval > 0) signifIndex.push_back(index);
-                    f3.values.push_back(0); // fill the values, change later
-                    index++;
-                }
-				    float rnorm;
-				    float w[84+1000];
-				    float zz[84+1000];
-                int indx[84+1000];
-                int mode;
-                int dictsize = nNote*signifIndex.size();
-                // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
-					float *curr_dict = new float[dictsize];
-                for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
-                    for (unsigned iBin = 0; iBin < nNote; iBin++) {
-                        curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
-                    }
-                }
-					nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
-                delete [] curr_dict;
-                for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
-                    f3.values[signifIndex[iNote]] = x[iNote];
-                    // cerr << mode << endl;
-                    chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
-                    basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
-                }
-            }	
-        }
-			
-            
-	        
-			
-        f4.values = chroma; 
-        f5.values = basschroma;
-        chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 
-        f6.values = chroma; 
-	        
-        if (m_doNormalizeChroma > 0) {
-            vector<float> chromanorm = vector<float>(3,0);			
-            switch (int(m_doNormalizeChroma)) {
-            case 0: // should never end up here
-                break;
-            case 1:
-                chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
-                chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
-                chromanorm[2] = max(chromanorm[0], chromanorm[1]);
-                break;
-            case 2:
-                for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
-                    chromanorm[0] += *it; 						
-                }
-                for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
-                    chromanorm[1] += *it; 						
-                }
-                for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
-                    chromanorm[2] += *it; 						
-                }
-                break;
-            case 3:
-                for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
-                    chromanorm[0] += pow(*it,2); 						
-                }
-                chromanorm[0] = sqrt(chromanorm[0]);
-                for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
-                    chromanorm[1] += pow(*it,2); 						
-                }
-                chromanorm[1] = sqrt(chromanorm[1]);
-                for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
-                    chromanorm[2] += pow(*it,2); 						
-                }
-                chromanorm[2] = sqrt(chromanorm[2]);
-                break;
-            }
-            if (chromanorm[0] > 0) {
-                for (int i = 0; i < f4.values.size(); i++) {
-                    f4.values[i] /= chromanorm[0];
-                }
-            }
-            if (chromanorm[1] > 0) {
-                for (int i = 0; i < f5.values.size(); i++) {
-                    f5.values[i] /= chromanorm[1];
-                }
-            }
-            if (chromanorm[2] > 0) {
-                for (int i = 0; i < f6.values.size(); i++) {
-                    f6.values[i] /= chromanorm[2];
-                }
-            }
-				
-        }
-	
-        // local chord estimation
-        vector<float> currentChordSalience;
-        float tempchordvalue = 0;
-        float sumchordvalue = 0;
-	        
-        for (int iChord = 0; iChord < nChord; iChord++) {
-            tempchordvalue = 0;
-            for (int iBin = 0; iBin < 12; iBin++) {
-                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
-            }
-            for (int iBin = 12; iBin < 24; iBin++) {
-                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
-            }
-            sumchordvalue+=tempchordvalue;
-            currentChordSalience.push_back(tempchordvalue);
-        }
-        if (sumchordvalue > 0) {
-            for (int iChord = 0; iChord < nChord; iChord++) {
-                currentChordSalience[iChord] /= sumchordvalue;
-            }
-        } else {
-            currentChordSalience[nChord-1] = 1.0;
-        }
-        chordogram.push_back(currentChordSalience);
-	        
-        fsOut[3].push_back(f3);
-        fsOut[4].push_back(f4);
-        fsOut[5].push_back(f5);
-        fsOut[6].push_back(f6);
-        count++;
-    }
-    cerr << "done." << endl;
-		
-
-    /* Simple chord estimation
-       I just take the local chord estimates ("currentChordSalience") and average them over time, then
-       take the maximum. Very simple, don't do this at home...
-    */
-    cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
-    count = 0; 
-    int halfwindowlength = m_inputSampleRate / m_stepSize;
-    vector<int> chordSequence;
-    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
-        vector<int> temp = vector<int>(nChord,0);
-        scoreChordogram.push_back(temp);
-    }
-    for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {		
-        int startIndex = count + 1;
-        int endIndex = count + 2 * halfwindowlength;
-			
-        float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
-            
-        vector<int> chordCandidates;
-        for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
-            // float currsum = 0;
-            // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
-            //  currsum += chordogram[iFrame][iChord];
-            // }
-            //                 if (currsum > chordThreshold) chordCandidates.push_back(iChord);
-            for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
-                if (chordogram[iFrame][iChord] > chordThreshold) {
-                    chordCandidates.push_back(iChord);
-                    break;
-                }                    
-            }
-        }
-        chordCandidates.push_back(nChord-1);
-        // cerr << chordCandidates.size() << endl;          
-	        
-        float maxval = 0; // will be the value of the most salient *chord change* in this frame
-        float maxindex = 0; //... and the index thereof
-        unsigned bestchordL = nChord-1; // index of the best "left" chord
-        unsigned bestchordR = nChord-1; // index of the best "right" chord
- 	 		
-        for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
-            // now find the max values on both sides of iWF
-            // left side:
-            float maxL = 0;
-            unsigned maxindL = nChord-1;
-            for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
-                unsigned iChord = chordCandidates[kChord];
-                float currsum = 0;
-                for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
-                    currsum += chordogram[count+iFrame][iChord];
-                }
-                if (iChord == nChord-1) currsum *= 0.8;
-                if (currsum > maxL) {
-                    maxL = currsum;
-                    maxindL = iChord;
-                }
-            }				
-            // right side:
-            float maxR = 0;
-            unsigned maxindR = nChord-1;
-            for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
-                unsigned iChord = chordCandidates[kChord];
-                float currsum = 0;
-                for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
-                    currsum += chordogram[count+iFrame][iChord];
-                }
-                if (iChord == nChord-1) currsum *= 0.8;
-                if (currsum > maxR) {
-                    maxR = currsum;
-                    maxindR = iChord;
-                }
-            }
-            if (maxL+maxR > maxval) {					
-                maxval = maxL+maxR;
-                maxindex = iWF;
-                bestchordL = maxindL;
-                bestchordR = maxindR;
-            }
-				
-        }
-        // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
-        // add a score to every chord-frame-point that was part of a maximum 
-        for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
-            scoreChordogram[iFrame+count][bestchordL]++;
-        }
-        for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
-            scoreChordogram[iFrame+count][bestchordR]++;
-        }
-        if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
-        count++;	
-    }
-    // cerr << "*******  agent finished   *******" << endl;
-    count = 0;
-    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 
-        float maxval = 0; // will be the value of the most salient chord in this frame
-        float maxindex = 0; //... and the index thereof
-        for (unsigned iChord = 0; iChord < nChord; iChord++) {
-            if (scoreChordogram[count][iChord] > maxval) {
-                maxval = scoreChordogram[count][iChord];
-                maxindex = iChord;
-                // cerr << iChord << endl;
-            }
-        }
-        chordSequence.push_back(maxindex);
-        // cerr << "before modefilter, maxindex: " << maxindex << endl;
-        count++;
-    }
-    // cerr << "*******  mode filter done *******" << endl;
-
-	
-    // mode filter on chordSequence
-    count = 0;
-    string oldChord = "";
-    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
-        Feature f6 = *it;
-        Feature f7; // chord estimate
-        f7.hasTimestamp = true;
-        f7.timestamp = f6.timestamp;
-        Feature f8; // chord estimate
-        f8.hasTimestamp = true;
-        f8.timestamp = f6.timestamp;
-			
-        vector<int> chordCount = vector<int>(nChord,0);
-        int maxChordCount = 0;
-        int maxChordIndex = nChord-1;
-        string maxChord;
-        int startIndex = max(count - halfwindowlength/2,0);
-        int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
-        for (int i = startIndex; i < endIndex; i++) {				
-            chordCount[chordSequence[i]]++;
-            if (chordCount[chordSequence[i]] > maxChordCount) {
-                // cerr << "start index " << startIndex << endl;
-                maxChordCount++;
-                maxChordIndex = chordSequence[i];
-                maxChord = m_chordnames[maxChordIndex];
-            }
-        }
-        // chordSequence[count] = maxChordIndex;
-        // cerr << maxChordIndex << endl;
-        f8.values.push_back(chordchange[count]/(halfwindowlength*2));
-        // cerr << chordchange[count] << endl;
-        fsOut[9].push_back(f8);
-        if (oldChord != maxChord) {
-            oldChord = maxChord;
-	
-            // char buffer1 [50];
-            // if (maxChordIndex < nChord - 1) {
-            //     sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
-            // } else {
-            //     sprintf(buffer1, "N");
-            // }
-            // f7.label = buffer1;
-            f7.label = m_chordnames[maxChordIndex];
-            fsOut[7].push_back(f7);
-        }
-        count++;
-    }
-    Feature f7; // last chord estimate
-    f7.hasTimestamp = true;
-    f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
-    f7.label = "N";
-    fsOut[7].push_back(f7);
-    cerr << "done." << endl;
-    //     // musicity
-    //     count = 0;
-    //     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
-    //     vector<float> musicityValue; 
-    //     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
-    //         Feature f4 = *it;
-    //         
-    //         int startIndex = max(count - musicitykernelwidth/2,0);
-    //         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
-    //         float chromasum = 0;
-    //         float diffsum = 0;
-    //         for (int k = 0; k < 12; k++) {
-    //             for (int i = startIndex + 1; i < endIndex; i++) {
-    //                 chromasum += pow(fsOut[4][i].values[k],2);
-    //                 diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
-    //             }
-    //         }
-    //         diffsum /= chromasum;
-    //         musicityValue.push_back(diffsum);        
-    //         count++;
-    //     }
-    //     
-    //     float musicityThreshold = 0.44;
-    //     if (m_stepSize == 4096) {
-    //         musicityThreshold = 0.74;
-    //     }
-    //     if (m_stepSize == 4410) {
-    //         musicityThreshold = 0.77;
-    //     }
-    //     
-    //     count = 0;
-    //     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
-    //         Feature f4 = *it;
-    //         Feature f8; // musicity
-    //         Feature f9; // musicity segmenter
-    //         
-    //         f8.hasTimestamp = true;
-    //         f8.timestamp = f4.timestamp;
-    //         f9.hasTimestamp = true;
-    //         f9.timestamp = f4.timestamp;    
-    //         
-    //         int startIndex = max(count - musicitykernelwidth/2,0);
-    //         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
-    //         int musicityCount = 0;
-    //         for (int i = startIndex; i <= endIndex; i++) {
-    //             if (musicityValue[i] > musicityThreshold) musicityCount++;
-    //         }
-    //         bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); 
-    //         
-    //         if (isSpeech) {
-    //             if (oldlabeltype != 2) {
-    //                 f9.label = "Speech";
-    //                 fsOut[9].push_back(f9);
-    //                 oldlabeltype = 2;
-    //             }
-    //         } else {
-    //             if (oldlabeltype != 1) {
-    //                 f9.label = "Music";
-    //                 fsOut[9].push_back(f9);
-    //                 oldlabeltype = 1;
-    //             }
-    //         }
-    //         f8.values.push_back(musicityValue[count]);
-    //         fsOut[8].push_back(f8);
-    //         count++;
-    //      }
+    //      
+    //    vector<vector<float> > chordogram;
+    //    vector<vector<int> > scoreChordogram;
+    //    vector<float> chordchange = vector<float>(fsOut[2].size(),0);
+    //    vector<float> oldchroma = vector<float>(12,0);
+    //    vector<float> oldbasschroma = vector<float>(12,0);
+    //    count = 0;
+    // 
+    //    for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
+    //        Feature f2 = *it; // logfreq spectrum
+    //        Feature f3; // semitone spectrum
+    //        Feature f4; // treble chromagram
+    //        Feature f5; // bass chromagram
+    //        Feature f6; // treble and bass chromagram
+    //  
+    //        f3.hasTimestamp = true;
+    //        f3.timestamp = f2.timestamp;
+    //          
+    //        f4.hasTimestamp = true;
+    //        f4.timestamp = f2.timestamp;
+    //          
+    //        f5.hasTimestamp = true;
+    //        f5.timestamp = f2.timestamp;
+    //          
+    //        f6.hasTimestamp = true;
+    //        f6.timestamp = f2.timestamp;
+    //          
+    //          float b[nNote];
+    //  
+    //        bool some_b_greater_zero = false;
+    //        float sumb = 0;
+    //        for (int i = 0; i < nNote; i++) {
+    //            // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
+    //            b[i] = f2.values[i];
+    //            sumb += b[i];
+    //            if (b[i] > 0) {
+    //                some_b_greater_zero = true;
+    //            }            
+    //        }
+    //      
+    //        // here's where the non-negative least squares algorithm calculates the note activation x
+    //  
+    //        vector<float> chroma = vector<float>(12, 0);
+    //        vector<float> basschroma = vector<float>(12, 0);
+    //        float currval;
+    //        unsigned iSemitone = 0;
+    //          
+    //        if (some_b_greater_zero) {
+    //            if (m_useNNLS == 0) {
+    //                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
+    //                    currval = 0;
+    //                    currval += b[iNote + 1 + -1] * 0.5;                       
+    //                    currval += b[iNote + 1 +  0] * 1.0;                       
+    //                    currval += b[iNote + 1 +  1] * 0.5;                       
+    //                    f3.values.push_back(currval);
+    //                    chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
+    //                    basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
+    //                    iSemitone++;
+    //                }
+    //              
+    //            } else {
+    //                  float x[84+1000];
+    //                for (int i = 1; i < 1084; ++i) x[i] = 1.0;
+    //                vector<int> signifIndex;
+    //                int index=0;
+    //                sumb /= 84.0;
+    //                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
+    //                    float currval = 0;
+    //                    currval += b[iNote + 1 + -1];                     
+    //                    currval += b[iNote + 1 +  0];                     
+    //                    currval += b[iNote + 1 +  1];
+    //                    if (currval > 0) signifIndex.push_back(index);
+    //                    f3.values.push_back(0); // fill the values, change later
+    //                    index++;
+    //                }
+    //                  float rnorm;
+    //                  float w[84+1000];
+    //                  float zz[84+1000];
+    //                int indx[84+1000];
+    //                int mode;
+    //                int dictsize = nNote*signifIndex.size();
+    //                // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
+    //                  float *curr_dict = new float[dictsize];
+    //                for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
+    //                    for (unsigned iBin = 0; iBin < nNote; iBin++) {
+    //                        curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
+    //                    }
+    //                }
+    //                  nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
+    //                delete [] curr_dict;
+    //                for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
+    //                    f3.values[signifIndex[iNote]] = x[iNote];
+    //                    // cerr << mode << endl;
+    //                    chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
+    //                    basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
+    //                }
+    //            } 
+    //        }
+    //          
+    //            
+    //          
+    //          
+    //        f4.values = chroma; 
+    //        f5.values = basschroma;
+    //        chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 
+    //        f6.values = chroma; 
+    //          
+    //        if (m_doNormalizeChroma > 0) {
+    //            vector<float> chromanorm = vector<float>(3,0);            
+    //            switch (int(m_doNormalizeChroma)) {
+    //            case 0: // should never end up here
+    //                break;
+    //            case 1:
+    //                chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
+    //                chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
+    //                chromanorm[2] = max(chromanorm[0], chromanorm[1]);
+    //                break;
+    //            case 2:
+    //                for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+    //                    chromanorm[0] += *it;                         
+    //                }
+    //                for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
+    //                    chromanorm[1] += *it;                         
+    //                }
+    //                for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
+    //                    chromanorm[2] += *it;                         
+    //                }
+    //                break;
+    //            case 3:
+    //                for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+    //                    chromanorm[0] += pow(*it,2);                      
+    //                }
+    //                chromanorm[0] = sqrt(chromanorm[0]);
+    //                for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
+    //                    chromanorm[1] += pow(*it,2);                      
+    //                }
+    //                chromanorm[1] = sqrt(chromanorm[1]);
+    //                for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
+    //                    chromanorm[2] += pow(*it,2);                      
+    //                }
+    //                chromanorm[2] = sqrt(chromanorm[2]);
+    //                break;
+    //            }
+    //            if (chromanorm[0] > 0) {
+    //                for (int i = 0; i < f4.values.size(); i++) {
+    //                    f4.values[i] /= chromanorm[0];
+    //                }
+    //            }
+    //            if (chromanorm[1] > 0) {
+    //                for (int i = 0; i < f5.values.size(); i++) {
+    //                    f5.values[i] /= chromanorm[1];
+    //                }
+    //            }
+    //            if (chromanorm[2] > 0) {
+    //                for (int i = 0; i < f6.values.size(); i++) {
+    //                    f6.values[i] /= chromanorm[2];
+    //                }
+    //            }
+    //              
+    //        }
+    //  
+    //        // local chord estimation
+    //        vector<float> currentChordSalience;
+    //        float tempchordvalue = 0;
+    //        float sumchordvalue = 0;
+    //          
+    //        for (int iChord = 0; iChord < nChord; iChord++) {
+    //            tempchordvalue = 0;
+    //            for (int iBin = 0; iBin < 12; iBin++) {
+    //                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
+    //            }
+    //            for (int iBin = 12; iBin < 24; iBin++) {
+    //                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
+    //            }
+    //            sumchordvalue+=tempchordvalue;
+    //            currentChordSalience.push_back(tempchordvalue);
+    //        }
+    //        if (sumchordvalue > 0) {
+    //            for (int iChord = 0; iChord < nChord; iChord++) {
+    //                currentChordSalience[iChord] /= sumchordvalue;
+    //            }
+    //        } else {
+    //            currentChordSalience[nChord-1] = 1.0;
+    //        }
+    //        chordogram.push_back(currentChordSalience);
+    //          
+    //        fsOut[3].push_back(f3);
+    //        fsOut[4].push_back(f4);
+    //        fsOut[5].push_back(f5);
+    //        fsOut[6].push_back(f6);
+    //        count++;
+    //    }
+    //    cerr << "done." << endl;
+    //      
+    // 
+    //    /* Simple chord estimation
+    //       I just take the local chord estimates ("currentChordSalience") and average them over time, then
+    //       take the maximum. Very simple, don't do this at home...
+    //    */
+    //    cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
+    //    count = 0; 
+    //    int halfwindowlength = m_inputSampleRate / m_stepSize;
+    //    vector<int> chordSequence;
+    //    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
+    //        vector<int> temp = vector<int>(nChord,0);
+    //        scoreChordogram.push_back(temp);
+    //    }
+    //    for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {       
+    //        int startIndex = count + 1;
+    //        int endIndex = count + 2 * halfwindowlength;
+    //          
+    //        float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
+    //            
+    //        vector<int> chordCandidates;
+    //        for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
+    //            // float currsum = 0;
+    //            // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
+    //            //  currsum += chordogram[iFrame][iChord];
+    //            // }
+    //            //                 if (currsum > chordThreshold) chordCandidates.push_back(iChord);
+    //            for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
+    //                if (chordogram[iFrame][iChord] > chordThreshold) {
+    //                    chordCandidates.push_back(iChord);
+    //                    break;
+    //                }                    
+    //            }
+    //        }
+    //        chordCandidates.push_back(nChord-1);
+    //        // cerr << chordCandidates.size() << endl;          
+    //          
+    //        float maxval = 0; // will be the value of the most salient *chord change* in this frame
+    //        float maxindex = 0; //... and the index thereof
+    //        unsigned bestchordL = nChord-1; // index of the best "left" chord
+    //        unsigned bestchordR = nChord-1; // index of the best "right" chord
+    //          
+    //        for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
+    //            // now find the max values on both sides of iWF
+    //            // left side:
+    //            float maxL = 0;
+    //            unsigned maxindL = nChord-1;
+    //            for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
+    //                unsigned iChord = chordCandidates[kChord];
+    //                float currsum = 0;
+    //                for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
+    //                    currsum += chordogram[count+iFrame][iChord];
+    //                }
+    //                if (iChord == nChord-1) currsum *= 0.8;
+    //                if (currsum > maxL) {
+    //                    maxL = currsum;
+    //                    maxindL = iChord;
+    //                }
+    //            }             
+    //            // right side:
+    //            float maxR = 0;
+    //            unsigned maxindR = nChord-1;
+    //            for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
+    //                unsigned iChord = chordCandidates[kChord];
+    //                float currsum = 0;
+    //                for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
+    //                    currsum += chordogram[count+iFrame][iChord];
+    //                }
+    //                if (iChord == nChord-1) currsum *= 0.8;
+    //                if (currsum > maxR) {
+    //                    maxR = currsum;
+    //                    maxindR = iChord;
+    //                }
+    //            }
+    //            if (maxL+maxR > maxval) {                 
+    //                maxval = maxL+maxR;
+    //                maxindex = iWF;
+    //                bestchordL = maxindL;
+    //                bestchordR = maxindR;
+    //            }
+    //              
+    //        }
+    //        // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
+    //        // add a score to every chord-frame-point that was part of a maximum 
+    //        for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
+    //            scoreChordogram[iFrame+count][bestchordL]++;
+    //        }
+    //        for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
+    //            scoreChordogram[iFrame+count][bestchordR]++;
+    //        }
+    //        if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
+    //        count++;  
+    //    }
+    //    // cerr << "*******  agent finished   *******" << endl;
+    //    count = 0;
+    //    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 
+    //        float maxval = 0; // will be the value of the most salient chord in this frame
+    //        float maxindex = 0; //... and the index thereof
+    //        for (unsigned iChord = 0; iChord < nChord; iChord++) {
+    //            if (scoreChordogram[count][iChord] > maxval) {
+    //                maxval = scoreChordogram[count][iChord];
+    //                maxindex = iChord;
+    //                // cerr << iChord << endl;
+    //            }
+    //        }
+    //        chordSequence.push_back(maxindex);
+    //        // cerr << "before modefilter, maxindex: " << maxindex << endl;
+    //        count++;
+    //    }
+    //    // cerr << "*******  mode filter done *******" << endl;
+    // 
+    //  
+    //    // mode filter on chordSequence
+    //    count = 0;
+    //    string oldChord = "";
+    //    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
+    //        Feature f6 = *it;
+    //        Feature f7; // chord estimate
+    //        f7.hasTimestamp = true;
+    //        f7.timestamp = f6.timestamp;
+    //        Feature f8; // chord estimate
+    //        f8.hasTimestamp = true;
+    //        f8.timestamp = f6.timestamp;
+    //          
+    //        vector<int> chordCount = vector<int>(nChord,0);
+    //        int maxChordCount = 0;
+    //        int maxChordIndex = nChord-1;
+    //        string maxChord;
+    //        int startIndex = max(count - halfwindowlength/2,0);
+    //        int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
+    //        for (int i = startIndex; i < endIndex; i++) {             
+    //            chordCount[chordSequence[i]]++;
+    //            if (chordCount[chordSequence[i]] > maxChordCount) {
+    //                // cerr << "start index " << startIndex << endl;
+    //                maxChordCount++;
+    //                maxChordIndex = chordSequence[i];
+    //                maxChord = m_chordnames[maxChordIndex];
+    //            }
+    //        }
+    //        // chordSequence[count] = maxChordIndex;
+    //        // cerr << maxChordIndex << endl;
+    //        f8.values.push_back(chordchange[count]/(halfwindowlength*2));
+    //        // cerr << chordchange[count] << endl;
+    //        fsOut[9].push_back(f8);
+    //        if (oldChord != maxChord) {
+    //            oldChord = maxChord;
+    //  
+    //            // char buffer1 [50];
+    //            // if (maxChordIndex < nChord - 1) {
+    //            //     sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
+    //            // } else {
+    //            //     sprintf(buffer1, "N");
+    //            // }
+    //            // f7.label = buffer1;
+    //            f7.label = m_chordnames[maxChordIndex];
+    //            fsOut[7].push_back(f7);
+    //        }
+    //        count++;
+    //    }
+    //    Feature f7; // last chord estimate
+    //    f7.hasTimestamp = true;
+    //    f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
+    //    f7.label = "N";
+    //    fsOut[7].push_back(f7);
+    //    cerr << "done." << endl;
+    //    //     // musicity
+    //    //     count = 0;
+    //    //     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
+    //    //     vector<float> musicityValue; 
+    //    //     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
+    //    //         Feature f4 = *it;
+    //    //         
+    //    //         int startIndex = max(count - musicitykernelwidth/2,0);
+    //    //         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
+    //    //         float chromasum = 0;
+    //    //         float diffsum = 0;
+    //    //         for (int k = 0; k < 12; k++) {
+    //    //             for (int i = startIndex + 1; i < endIndex; i++) {
+    //    //                 chromasum += pow(fsOut[4][i].values[k],2);
+    //    //                 diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
+    //    //             }
+    //    //         }
+    //    //         diffsum /= chromasum;
+    //    //         musicityValue.push_back(diffsum);        
+    //    //         count++;
+    //    //     }
+    //    //     
+    //    //     float musicityThreshold = 0.44;
+    //    //     if (m_stepSize == 4096) {
+    //    //         musicityThreshold = 0.74;
+    //    //     }
+    //    //     if (m_stepSize == 4410) {
+    //    //         musicityThreshold = 0.77;
+    //    //     }
+    //    //     
+    //    //     count = 0;
+    //    //     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
+    //    //         Feature f4 = *it;
+    //    //         Feature f8; // musicity
+    //    //         Feature f9; // musicity segmenter
+    //    //         
+    //    //         f8.hasTimestamp = true;
+    //    //         f8.timestamp = f4.timestamp;
+    //    //         f9.hasTimestamp = true;
+    //    //         f9.timestamp = f4.timestamp;    
+    //    //         
+    //    //         int startIndex = max(count - musicitykernelwidth/2,0);
+    //    //         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
+    //    //         int musicityCount = 0;
+    //    //         for (int i = startIndex; i <= endIndex; i++) {
+    //    //             if (musicityValue[i] > musicityThreshold) musicityCount++;
+    //    //         }
+    //    //         bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); 
+    //    //         
+    //    //         if (isSpeech) {
+    //    //             if (oldlabeltype != 2) {
+    //    //                 f9.label = "Speech";
+    //    //                 fsOut[9].push_back(f9);
+    //    //                 oldlabeltype = 2;
+    //    //             }
+    //    //         } else {
+    //    //             if (oldlabeltype != 1) {
+    //    //                 f9.label = "Music";
+    //    //                 fsOut[9].push_back(f9);
+    //    //                 oldlabeltype = 1;
+    //    //             }
+    //    //         }
+    //    //         f8.values.push_back(musicityValue[count]);
+    //    //         fsOut[8].push_back(f8);
+    //    //         count++;
+    //    //      }
     return fsOut;     
 
 }
author	Matthias Mauch <mail@matthiasmauch.net>
date	Mon, 15 Nov 2010 11:01:36 +0900
parents	026a5c0ee2c2
children	e5c16976513d