changeset 80:026a5c0ee2c2 matthiasm-plugin

bins per semitone can now be chosen in chromamethods.h
author Matthias Mauch <mail@matthiasmauch.net>
date Thu, 11 Nov 2010 15:11:05 +0900
parents d52884de7d79
children 4270f3039ab0
files Chordino.cpp NNLSBase.cpp NNLSBase.h NNLSChroma.cpp Tuning.cpp chromamethods.cpp chromamethods.h
diffstat 7 files changed, 101 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/Chordino.cpp	Thu Nov 11 10:29:35 2010 +0900
+++ b/Chordino.cpp	Thu Nov 11 15:11:05 2010 +0900
@@ -246,12 +246,16 @@
          calculate tuning from (using the angle of the complex number defined by the 
          cumulative mean real and imag values)
     **/
-    float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
-    float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
+    float meanTuningImag = 0;
+    float meanTuningReal = 0;
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
+        meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
+    }
     float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
     float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
     int intShift = floor(normalisedtuning * 3);
-    float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
+    float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
 		    
     char buffer0 [50];
 		
@@ -285,13 +289,13 @@
 		
         if (m_tuneLocal) {
             intShift = floor(m_localTuning[count] * 3);
-            intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
+            floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
         }
 		        
-        // cerr << intShift << " " << intFactor << endl;
+        // cerr << intShift << " " << floatShift << endl;
 		        
         for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
-            tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor;
+            tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift;
             currentTunedSpec.values.push_back(tempValue);
         }
 		        
--- a/NNLSBase.cpp	Thu Nov 11 10:29:35 2010 +0900
+++ b/NNLSBase.cpp	Thu Nov 11 15:11:05 2010 +0900
@@ -34,12 +34,8 @@
     m_blockSize(0),
     m_stepSize(0),
     m_lengthOfNoteIndex(0),
-    m_meanTuning0(0),
-    m_meanTuning1(0),
-    m_meanTuning2(0),
-    m_localTuning0(0),
-    m_localTuning1(0),
-    m_localTuning2(0),
+    m_meanTunings(0),
+    m_localTunings(0),
     m_whitening(1.0),
     m_preset(0.0),
     m_localTuning(0),
@@ -54,7 +50,9 @@
     m_rollon(0),
 	m_s(0.7),
 	m_useNNLS(1),
-	m_useHMM(1)
+	m_useHMM(1),
+	sinvalues(0),
+	cosvalues(0)
 {
     if (debug_on) cerr << "--> NNLSBase" << endl;
 
@@ -352,6 +350,14 @@
         cerr << "--> initialise";
     }
 	
+	// make things for tuning estimation
+	for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
+        cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
+    }
+    
+	
+	// make hamming window of length 1/2 octave
 	int hamwinlength = nBPS * 6 + 1;
     float hamwinsum = 0;
     for (int i = 0; i < hamwinlength; ++i) { 
@@ -359,6 +365,13 @@
         hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
     }
     for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
+    
+    
+    // initialise the tuning
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        m_meanTunings.push_back(0);
+        m_localTunings.push_back(0);
+    }
 	
     if (channels < getMinChannelCount() ||
 	channels > getMaxChannelCount()) return false;
@@ -411,12 +424,10 @@
     m_frameCount = 0;
     // m_dictID = 0;
     m_logSpectrum.clear();
-    m_meanTuning0 = 0;
-    m_meanTuning1 = 0;
-    m_meanTuning2 = 0;
-    m_localTuning0 = 0;
-    m_localTuning1 = 0;
-    m_localTuning2 = 0;
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        m_meanTunings[iBPS] = 0;
+        m_localTunings[iBPS] = 0;
+    }
     m_localTuning.clear();
 }
 
@@ -473,24 +484,28 @@
 	
     float one_over_N = 1.0/m_frameCount;
     // update means of complex tuning variables
-    m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
-    m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
-    m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
-	
-    for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
-        m_meanTuning0 += nm[iTone + 0]*one_over_N;
-    	m_meanTuning1 += nm[iTone + 1]*one_over_N;
-    	m_meanTuning2 += nm[iTone + 2]*one_over_N;
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
+    
+    for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
+        for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
         float ratioOld = 0.997;
-        m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
-        m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
-        m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
+        for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+            m_localTunings[iBPS] *= ratioOld; 
+            m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
+        }
     }
-	
     // if (m_tuneLocal) {
     // local tuning
-    float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
-    float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
+    // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
+    // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
+    
+    float localTuningImag = 0;
+    float localTuningReal = 0;
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
+        localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
+    }
+    
     float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
     m_localTuning.push_back(normalisedtuning);
     
@@ -523,12 +538,12 @@
          calculate tuning from (using the angle of the complex number defined by the 
          cumulative mean real and imag values)
     **/
-    float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
-    float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
+    float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
+    float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
     float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
     float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
     int intShift = floor(normalisedtuning * 3);
-    float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
+    float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
 		    
     char buffer0 [50];
 		
@@ -564,13 +579,13 @@
 		
         if (m_tuneLocal == 1.0) {
             intShift = floor(m_localTuning[count] * 3);
-            intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
+            floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
         }
 		        
-        // cerr << intShift << " " << intFactor << endl;
+        // cerr << intShift << " " << floatShift << endl;
 		        
         for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
-            tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
+            tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
             f2.values.push_back(tempValue);
         }
 		        
--- a/NNLSBase.h	Thu Nov 11 10:29:35 2010 +0900
+++ b/NNLSBase.h	Thu Nov 11 15:11:05 2010 +0900
@@ -60,12 +60,8 @@
     size_t m_blockSize;
     size_t m_stepSize;
     int m_lengthOfNoteIndex;
-    float m_meanTuning0;
-    float m_meanTuning1;
-    float m_meanTuning2;
-    float m_localTuning0;
-    float m_localTuning1;
-    float m_localTuning2;
+    vector<float> m_meanTunings;
+    vector<float> m_localTunings;
     float m_whitening;
     float m_preset;
 	float m_s;
@@ -82,6 +78,8 @@
     float m_doNormalizeChroma;
     float m_rollon;
     vector<float> hw;
+    vector<float> sinvalues;
+    vector<float> cosvalues;
 };
 
 
--- a/NNLSChroma.cpp	Thu Nov 11 10:29:35 2010 +0900
+++ b/NNLSChroma.cpp	Thu Nov 11 15:11:05 2010 +0900
@@ -219,12 +219,16 @@
          calculate tuning from (using the angle of the complex number defined by the 
          cumulative mean real and imag values)
     **/
-    float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
-    float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
+    float meanTuningImag = 0;
+    float meanTuningReal = 0;
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
+        meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
+    }
     float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
     float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
     int intShift = floor(normalisedtuning * 3);
-    float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
+    float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
 		    
     char buffer0 [50];
 		
@@ -244,8 +248,6 @@
     // cerr << "tune local ? " << m_tuneLocal << endl;
     int count = 0;
 
-    cerr << nNote;
-    cerr << endl << "-------------------------------------"<< endl;
 		
     for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
         Feature f1 = *i;
@@ -256,13 +258,13 @@
 		
         if (m_tuneLocal) {
             intShift = floor(m_localTuning[count] * 3);
-            intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
+            floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
         }
 		        
-        // cerr << intShift << " " << intFactor << endl;
+        // cerr << intShift << " " << floatShift << endl;
 		        
         for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
-            tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
+            tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
             f2.values.push_back(tempValue);
         }
 		        
@@ -349,11 +351,11 @@
 			
         if (some_b_greater_zero) {
             if (m_useNNLS == 0) {
-                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
+                for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
                     currval = 0;
-                    currval += b[iNote + 1 + -1] * 0.5;						
-                    currval += b[iNote + 1 +  0] * 1.0;						
-                    currval += b[iNote + 1 +  1] * 0.5;						
+                    for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
+                        currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));						
+                    }
                     f3.values.push_back(currval);
                     chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
                     basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
@@ -366,11 +368,11 @@
                 vector<int> signifIndex;
                 int index=0;
                 sumb /= 84.0;
-                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
+                for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
                     float currval = 0;
-                    currval += b[iNote + 1 + -1]; 
-                    currval += b[iNote + 1 +  0]; 
-                    currval += b[iNote + 1 +  1];
+                    for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
+                        currval += b[iNote + iBPS]; 
+                    }
                     if (currval > 0) signifIndex.push_back(index);
                     f3.values.push_back(0); // fill the values, change later
                     index++;
--- a/Tuning.cpp	Thu Nov 11 10:29:35 2010 +0900
+++ b/Tuning.cpp	Thu Nov 11 15:11:05 2010 +0900
@@ -187,8 +187,15 @@
          calculate tuning from (using the angle of the complex number defined by the 
          cumulative mean real and imag values)
     **/
-    float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
-    float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
+    
+    float meanTuningImag = 0;
+    float meanTuningReal = 0;
+    for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
+        meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
+        meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
+    }
+    
+
     float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
 		    
     char buffer0 [50];
--- a/chromamethods.cpp	Thu Nov 11 10:29:35 2010 +0900
+++ b/chromamethods.cpp	Thu Nov 11 15:11:05 2010 +0900
@@ -105,7 +105,7 @@
 
 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
 	
-    int binspersemitone = 3; // this must be 3
+    int binspersemitone = nBPS; 
     int minoctave = 0; // this must be 0
     int maxoctave = 7; // this must be 7
     int oversampling = 80;
@@ -128,14 +128,14 @@
     int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
     vector<float> cq_f;
     float oob = 1.0/binspersemitone; // one over binspersemitone
-    cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
-    cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
-    for (int i = minMIDI + 1; i < maxMIDI; ++i) {
-        for (int k = -1; k < 2; ++k)	 {
+    // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
+    // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
+    for (int i = minMIDI; i < maxMIDI; ++i) {
+        for (int k = 0; k < binspersemitone; ++k)	 {
             cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
         }
     }
-    cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
+    // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
     cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
 
     int nFFT = fft_f.size();
@@ -171,7 +171,7 @@
 }
 
 void dictionaryMatrix(float* dm, float s_param) {
-    int binspersemitone = 3; // this must be 3
+    int binspersemitone = nBPS;
     int minoctave = 0; // this must be 0
     int maxoctave = 7; // this must be 7
     // float s_param = 0.7;
@@ -181,14 +181,14 @@
     int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
     vector<float> cq_f;
     float oob = 1.0/binspersemitone; // one over binspersemitone
-    cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
-    cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
-    for (int i = minMIDI + 1; i < maxMIDI; ++i) {
-        for (int k = -1; k < 2; ++k)	 {
+    // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
+    // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
+    for (int i = minMIDI; i < maxMIDI; ++i) {
+        for (int k = 0; k < binspersemitone; ++k)	 {
             cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
         }
     }
-    cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
+    // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
     cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
 
     float curr_f;
--- a/chromamethods.h	Thu Nov 11 10:29:35 2010 +0900
+++ b/chromamethods.h	Thu Nov 11 15:11:05 2010 +0900
@@ -53,10 +53,6 @@
 static const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
 static const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
 
-static const float sinvalue = 0.866025404;
-static const float cosvalue = -0.5;
-
-
 
 #include "nnls.h"