# HG changeset patch # User matthiasm # Date 1275385291 0 # Node ID 266d23a41cdcc840c3e00ac195f95e8a8204bd6b # Parent 8360483a026ef6cd7a885deb22480e118ee79bfc tested almost finished plugin, chord est with mode filter diff -r 8360483a026e -r 266d23a41cdc NNLSChroma.cpp --- a/NNLSChroma.cpp Mon May 31 14:12:37 2010 +0000 +++ b/NNLSChroma.cpp Tue Jun 01 09:41:31 2010 +0000 @@ -161,7 +161,7 @@ int binspersemitone = 3; // this must be 3 int minoctave = 0; // this must be 0 int maxoctave = 7; // this must be 7 - float s_param = 0.6; + float s_param = 0.7; // pitch-spaced frequency vector int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! @@ -216,7 +216,7 @@ m_localTuning0(0), m_localTuning1(0), m_localTuning2(0), - m_paling(0.8), + m_paling(1.0), m_preset(0.0), m_localTuning(0), m_kernelValue(0), @@ -258,7 +258,7 @@ { // Return something helpful here! if (debug_on) cerr << "--> getDescription" << endl; - return ""; + return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription."; } string @@ -332,49 +332,6 @@ if (debug_on) cerr << "--> getParameterDescriptors" << endl; ParameterList list; - ParameterDescriptor d0; - d0.identifier = "notedict"; - d0.name = "note dictionary"; - d0.description = "Notes in different note dictionaries differ by their spectral shapes."; - d0.unit = ""; - d0.minValue = 0; - d0.maxValue = 1; - d0.defaultValue = 0; - d0.isQuantized = true; - d0.valueNames.push_back("s = 0.6"); - // d0.valueNames.push_back("s = 0.9"); - // d0.valueNames.push_back("s linearly spaced"); - d0.valueNames.push_back("no NNLS"); - d0.quantizeStep = 1.0; - list.push_back(d0); - - ParameterDescriptor d1; - d1.identifier = "tuningmode"; - d1.name = "tuning mode"; - d1.description = "Tuning can be performed locally or on the whole extraction segment."; - d1.unit = ""; - d1.minValue = 0; - d1.maxValue = 1; - d1.defaultValue = 1; - d1.isQuantized = true; - d1.valueNames.push_back("global tuning"); - d1.valueNames.push_back("local tuning"); - d1.quantizeStep = 1.0; - list.push_back(d1); - - ParameterDescriptor d2; - d2.identifier = "paling"; - d2.name = "spectral paling"; - d2.description = "Spectral paling: no paling - 0; whitening - 1."; - d2.unit = ""; - d2.isQuantized = true; - d2.quantizeStep = 0.1; - d2.minValue = 0.0; - d2.maxValue = 1.0; - d2.defaultValue = 0.5; - // d2.isQuantized = false; - list.push_back(d2); - ParameterDescriptor d3; d3.identifier = "preset"; d3.name = "preset"; @@ -383,13 +340,55 @@ d3.isQuantized = true; d3.quantizeStep = 1; d3.minValue = 0.0; - d3.maxValue = 2.0; + d3.maxValue = 3.0; d3.defaultValue = 0.0; d3.valueNames.push_back("polyphonic pop"); d3.valueNames.push_back("polyphonic pop (fast)"); d3.valueNames.push_back("solo keyboard"); d3.valueNames.push_back("manual"); list.push_back(d3); + + // ParameterDescriptor d0; + // d0.identifier = "notedict"; + // d0.name = "note dictionary"; + // d0.description = "Notes in different note dictionaries differ by their spectral shapes."; + // d0.unit = ""; + // d0.minValue = 0; + // d0.maxValue = 1; + // d0.defaultValue = 0; + // d0.isQuantized = true; + // d0.valueNames.push_back("s = 0.6"); + // d0.valueNames.push_back("no NNLS"); + // d0.quantizeStep = 1.0; + // list.push_back(d0); + + ParameterDescriptor d1; + d1.identifier = "tuningmode"; + d1.name = "tuning mode"; + d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; + d1.unit = ""; + d1.minValue = 0; + d1.maxValue = 1; + d1.defaultValue = 0; + d1.isQuantized = true; + d1.valueNames.push_back("global tuning"); + d1.valueNames.push_back("local tuning"); + d1.quantizeStep = 1.0; + list.push_back(d1); + + // ParameterDescriptor d2; + // d2.identifier = "paling"; + // d2.name = "spectral paling"; + // d2.description = "Spectral paling: no paling - 0; whitening - 1."; + // d2.unit = ""; + // d2.isQuantized = true; + // // d2.quantizeStep = 0.1; + // d2.minValue = 0.0; + // d2.maxValue = 1.0; + // d2.defaultValue = 1.0; + // d2.isQuantized = false; + // list.push_back(d2); + return list; } @@ -651,7 +650,7 @@ OutputDescriptor d10; d10.identifier = "localtuning"; d10.name = "Local tuning"; - d10.description = ""; + d10.description = "Tuning based on the history up to this timestamp."; d10.unit = "Hz"; d10.hasFixedBinCount = true; d10.binCount = 1; @@ -681,7 +680,7 @@ m_stepSize = stepSize; frameCount = 0; int tempn = 256 * m_blockSize/2; - cerr << "length of tempkernel : " << tempn << endl; + // cerr << "length of tempkernel : " << tempn << endl; float *tempkernel; tempkernel = new float[tempn]; @@ -703,7 +702,7 @@ } } } - cerr << "nonzero count : " << countNonzero << endl; + // cerr << "nonzero count : " << countNonzero << endl; delete [] tempkernel; ofstream myfile; myfile.open ("matrix.txt"); @@ -718,37 +717,40 @@ void NNLSChroma::reset() { - if (debug_on) cerr << "--> reset"; + if (debug_on) cerr << "--> reset"; + // Clear buffers, reset stored values, etc - frameCount = 0; - m_dictID = 0; - m_kernelValue.clear(); - m_kernelFftIndex.clear(); - m_kernelNoteIndex.clear(); + frameCount = 0; + m_dictID = 0; + m_fl.clear(); + m_meanTuning0 = 0; + m_meanTuning1 = 0; + m_meanTuning2 = 0; + m_localTuning0 = 0; + m_localTuning1 = 0; + m_localTuning2 = 0; + m_localTuning.clear(); } NNLSChroma::FeatureSet NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) { - if (debug_on) cerr << "--> process" << endl; - // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix - + if (debug_on) cerr << "--> process" << endl; + frameCount++; float *magnitude = new float[m_blockSize/2]; Feature f10; // local tuning f10.hasTimestamp = true; - f10.timestamp = timestamp - Vamp::RealTime::fromSeconds(0); + f10.timestamp = timestamp; const float *fbuf = inputBuffers[0]; // make magnitude for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); - // magnitude[iBin] = (iBin == frameCount - 1 || frameCount < 2) ? 1.0 : 0.0; } - - + // note magnitude mapping using pre-calculated matrix float *nm = new float[nNote]; // note magnitude for (size_t iNote = 0; iNote < nNote; iNote++) { @@ -814,8 +816,9 @@ NNLSChroma::FeatureSet NNLSChroma::getRemainingFeatures() { - if (debug_on) cerr << "--> getRemainingFeatures" << endl; - FeatureSet fsOut; + if (debug_on) cerr << "--> getRemainingFeatures" << endl; + FeatureSet fsOut; + if (m_fl.size() == 0) return fsOut; // /** Calculate Tuning calculate tuning from (using the angle of the complex number defined by the @@ -866,7 +869,7 @@ // cerr << intShift << " " << intFactor << endl; - for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins + for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; f2.values.push_back(tempValue); } @@ -1035,15 +1038,15 @@ vector temp = vector(nChord,0); scoreChordogram.push_back(temp); } - for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end()-2*halfwindowlength-1; ++it) { + for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { int startIndex = count + 1; int endIndex = count + 2 * halfwindowlength; vector temp = vector(nChord,0); float maxval = 0; // will be the value of the most salient chord in this frame - float maxindex = nChord-1; //... and the index thereof + float maxindex = 0; //... and the index thereof unsigned bestchordL = 0; // index of the best "left" chord unsigned bestchordR = 0; // index of the best "right" chord - for (unsigned iWF = 1; iWF < 2*halfwindowlength; ++iWF) { + for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { // now find the max values on both sides of iWF // left side: float maxL = 0; @@ -1100,11 +1103,11 @@ if (scoreChordogram[count][iChord] > maxval) { maxval = scoreChordogram[count][iChord]; maxindex = iChord; - cerr << iChord << endl; + // cerr << iChord << endl; } } chordSequence.push_back(maxindex); - cerr << "before modefilter, maxindex: " << maxindex << endl; + // cerr << "before modefilter, maxindex: " << maxindex << endl; count++; } @@ -1120,16 +1123,18 @@ vector chordCount = vector(nChord,0); int maxChordCount = 0; int maxChordIndex = nChord-1; - // int startIndex = max(count - halfwindowlength,0); - // int endIndex = min(int(chordogram.size()), startIndex + halfwindowlength); - // for (int i = startIndex; i < endIndex; i++) { - // chordCount[chordSequence[i]]++; - // if (chordCount[chordSequence[i]] > maxChordCount) { - // maxChordCount++; - // maxChordIndex = chordSequence[i]; - // } - // } - maxChordIndex = chordSequence[count]; + int startIndex = max(count - halfwindowlength/2,0); + int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); + for (int i = startIndex; i < endIndex; i++) { + chordCount[chordSequence[i]]++; + if (chordCount[chordSequence[i]] > maxChordCount) { + cerr << "start index " << startIndex << endl; + maxChordCount++; + maxChordIndex = chordSequence[i]; + } + } + // chordSequence[count] = maxChordIndex; + cerr << maxChordIndex << endl; if (oldChordIndex != maxChordIndex) { oldChordIndex = maxChordIndex;