Mercurial > hg > nnls-chroma
changeset 80:026a5c0ee2c2 matthiasm-plugin
bins per semitone can now be chosen in chromamethods.h
author | Matthias Mauch <mail@matthiasmauch.net> |
---|---|
date | Thu, 11 Nov 2010 15:11:05 +0900 |
parents | d52884de7d79 |
children | 4270f3039ab0 |
files | Chordino.cpp NNLSBase.cpp NNLSBase.h NNLSChroma.cpp Tuning.cpp chromamethods.cpp chromamethods.h |
diffstat | 7 files changed, 101 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/Chordino.cpp Thu Nov 11 10:29:35 2010 +0900 +++ b/Chordino.cpp Thu Nov 11 15:11:05 2010 +0900 @@ -246,12 +246,16 @@ calculate tuning from (using the angle of the complex number defined by the cumulative mean real and imag values) **/ - float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; - float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; + float meanTuningImag = 0; + float meanTuningReal = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS]; + meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS]; + } float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); int intShift = floor(normalisedtuning * 3); - float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this + float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this char buffer0 [50]; @@ -285,13 +289,13 @@ if (m_tuneLocal) { intShift = floor(m_localTuning[count] * 3); - intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this + floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this } - // cerr << intShift << " " << intFactor << endl; + // cerr << intShift << " " << floatShift << endl; for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins - tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor; + tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift; currentTunedSpec.values.push_back(tempValue); }
--- a/NNLSBase.cpp Thu Nov 11 10:29:35 2010 +0900 +++ b/NNLSBase.cpp Thu Nov 11 15:11:05 2010 +0900 @@ -34,12 +34,8 @@ m_blockSize(0), m_stepSize(0), m_lengthOfNoteIndex(0), - m_meanTuning0(0), - m_meanTuning1(0), - m_meanTuning2(0), - m_localTuning0(0), - m_localTuning1(0), - m_localTuning2(0), + m_meanTunings(0), + m_localTunings(0), m_whitening(1.0), m_preset(0.0), m_localTuning(0), @@ -54,7 +50,9 @@ m_rollon(0), m_s(0.7), m_useNNLS(1), - m_useHMM(1) + m_useHMM(1), + sinvalues(0), + cosvalues(0) { if (debug_on) cerr << "--> NNLSBase" << endl; @@ -352,6 +350,14 @@ cerr << "--> initialise"; } + // make things for tuning estimation + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); + cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); + } + + + // make hamming window of length 1/2 octave int hamwinlength = nBPS * 6 + 1; float hamwinsum = 0; for (int i = 0; i < hamwinlength; ++i) { @@ -359,6 +365,13 @@ hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); } for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; + + + // initialise the tuning + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + m_meanTunings.push_back(0); + m_localTunings.push_back(0); + } if (channels < getMinChannelCount() || channels > getMaxChannelCount()) return false; @@ -411,12 +424,10 @@ m_frameCount = 0; // m_dictID = 0; m_logSpectrum.clear(); - m_meanTuning0 = 0; - m_meanTuning1 = 0; - m_meanTuning2 = 0; - m_localTuning0 = 0; - m_localTuning1 = 0; - m_localTuning2 = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + m_meanTunings[iBPS] = 0; + m_localTunings[iBPS] = 0; + } m_localTuning.clear(); } @@ -473,24 +484,28 @@ float one_over_N = 1.0/m_frameCount; // update means of complex tuning variables - m_meanTuning0 *= float(m_frameCount-1)*one_over_N; - m_meanTuning1 *= float(m_frameCount-1)*one_over_N; - m_meanTuning2 *= float(m_frameCount-1)*one_over_N; - - for (int iTone = 0; iTone < 160; iTone = iTone + 3) { - m_meanTuning0 += nm[iTone + 0]*one_over_N; - m_meanTuning1 += nm[iTone + 1]*one_over_N; - m_meanTuning2 += nm[iTone + 2]*one_over_N; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N; + + for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { + for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; float ratioOld = 0.997; - m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); - m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); - m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + m_localTunings[iBPS] *= ratioOld; + m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); + } } - // if (m_tuneLocal) { // local tuning - float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; - float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; + // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2]; + // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2]; + + float localTuningImag = 0; + float localTuningReal = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS]; + localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS]; + } + float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); m_localTuning.push_back(normalisedtuning); @@ -523,12 +538,12 @@ calculate tuning from (using the angle of the complex number defined by the cumulative mean real and imag values) **/ - float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; - float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; + float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2]; + float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2]; float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); int intShift = floor(normalisedtuning * 3); - float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this + float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this char buffer0 [50]; @@ -564,13 +579,13 @@ if (m_tuneLocal == 1.0) { intShift = floor(m_localTuning[count] * 3); - intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this + floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this } - // cerr << intShift << " " << intFactor << endl; + // cerr << intShift << " " << floatShift << endl; for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins - tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; + tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; f2.values.push_back(tempValue); }
--- a/NNLSBase.h Thu Nov 11 10:29:35 2010 +0900 +++ b/NNLSBase.h Thu Nov 11 15:11:05 2010 +0900 @@ -60,12 +60,8 @@ size_t m_blockSize; size_t m_stepSize; int m_lengthOfNoteIndex; - float m_meanTuning0; - float m_meanTuning1; - float m_meanTuning2; - float m_localTuning0; - float m_localTuning1; - float m_localTuning2; + vector<float> m_meanTunings; + vector<float> m_localTunings; float m_whitening; float m_preset; float m_s; @@ -82,6 +78,8 @@ float m_doNormalizeChroma; float m_rollon; vector<float> hw; + vector<float> sinvalues; + vector<float> cosvalues; };
--- a/NNLSChroma.cpp Thu Nov 11 10:29:35 2010 +0900 +++ b/NNLSChroma.cpp Thu Nov 11 15:11:05 2010 +0900 @@ -219,12 +219,16 @@ calculate tuning from (using the angle of the complex number defined by the cumulative mean real and imag values) **/ - float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; - float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; + float meanTuningImag = 0; + float meanTuningReal = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS]; + meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS]; + } float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); int intShift = floor(normalisedtuning * 3); - float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this + float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this char buffer0 [50]; @@ -244,8 +248,6 @@ // cerr << "tune local ? " << m_tuneLocal << endl; int count = 0; - cerr << nNote; - cerr << endl << "-------------------------------------"<< endl; for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { Feature f1 = *i; @@ -256,13 +258,13 @@ if (m_tuneLocal) { intShift = floor(m_localTuning[count] * 3); - intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this + floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this } - // cerr << intShift << " " << intFactor << endl; + // cerr << intShift << " " << floatShift << endl; for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins - tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; + tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; f2.values.push_back(tempValue); } @@ -349,11 +351,11 @@ if (some_b_greater_zero) { if (m_useNNLS == 0) { - for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { + for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { currval = 0; - currval += b[iNote + 1 + -1] * 0.5; - currval += b[iNote + 1 + 0] * 1.0; - currval += b[iNote + 1 + 1] * 0.5; + for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { + currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); + } f3.values.push_back(currval); chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; @@ -366,11 +368,11 @@ vector<int> signifIndex; int index=0; sumb /= 84.0; - for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { + for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { float currval = 0; - currval += b[iNote + 1 + -1]; - currval += b[iNote + 1 + 0]; - currval += b[iNote + 1 + 1]; + for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { + currval += b[iNote + iBPS]; + } if (currval > 0) signifIndex.push_back(index); f3.values.push_back(0); // fill the values, change later index++;
--- a/Tuning.cpp Thu Nov 11 10:29:35 2010 +0900 +++ b/Tuning.cpp Thu Nov 11 15:11:05 2010 +0900 @@ -187,8 +187,15 @@ calculate tuning from (using the angle of the complex number defined by the cumulative mean real and imag values) **/ - float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; - float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; + + float meanTuningImag = 0; + float meanTuningReal = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS]; + meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS]; + } + + float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); char buffer0 [50];
--- a/chromamethods.cpp Thu Nov 11 10:29:35 2010 +0900 +++ b/chromamethods.cpp Thu Nov 11 15:11:05 2010 +0900 @@ -105,7 +105,7 @@ bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { - int binspersemitone = 3; // this must be 3 + int binspersemitone = nBPS; int minoctave = 0; // this must be 0 int maxoctave = 7; // this must be 7 int oversampling = 80; @@ -128,14 +128,14 @@ int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! vector<float> cq_f; float oob = 1.0/binspersemitone; // one over binspersemitone - cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 - cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); - for (int i = minMIDI + 1; i < maxMIDI; ++i) { - for (int k = -1; k < 2; ++k) { + // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 + // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); + for (int i = minMIDI; i < maxMIDI; ++i) { + for (int k = 0; k < binspersemitone; ++k) { cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); } } - cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); + // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); int nFFT = fft_f.size(); @@ -171,7 +171,7 @@ } void dictionaryMatrix(float* dm, float s_param) { - int binspersemitone = 3; // this must be 3 + int binspersemitone = nBPS; int minoctave = 0; // this must be 0 int maxoctave = 7; // this must be 7 // float s_param = 0.7; @@ -181,14 +181,14 @@ int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! vector<float> cq_f; float oob = 1.0/binspersemitone; // one over binspersemitone - cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 - cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); - for (int i = minMIDI + 1; i < maxMIDI; ++i) { - for (int k = -1; k < 2; ++k) { + // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 + // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); + for (int i = minMIDI; i < maxMIDI; ++i) { + for (int k = 0; k < binspersemitone; ++k) { cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); } } - cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); + // cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); float curr_f;
--- a/chromamethods.h Thu Nov 11 10:29:35 2010 +0900 +++ b/chromamethods.h Thu Nov 11 15:11:05 2010 +0900 @@ -53,10 +53,6 @@ static const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; static const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; -static const float sinvalue = 0.866025404; -static const float cosvalue = -0.5; - - #include "nnls.h"