comparison NNLSChroma.cpp @ 4:266d23a41cdc matthiasm-plugin

tested almost finished plugin, chord est with mode filter
author matthiasm
date Tue, 01 Jun 2010 09:41:31 +0000
parents 8360483a026e
children 84db8ce38fd3
comparison
equal deleted inserted replaced
3:8360483a026e 4:266d23a41cdc
159 159
160 bool dictionaryMatrix(float* dm) { 160 bool dictionaryMatrix(float* dm) {
161 int binspersemitone = 3; // this must be 3 161 int binspersemitone = 3; // this must be 3
162 int minoctave = 0; // this must be 0 162 int minoctave = 0; // this must be 0
163 int maxoctave = 7; // this must be 7 163 int maxoctave = 7; // this must be 7
164 float s_param = 0.6; 164 float s_param = 0.7;
165 165
166 // pitch-spaced frequency vector 166 // pitch-spaced frequency vector
167 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! 167 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
168 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! 168 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
169 vector<float> cq_f; 169 vector<float> cq_f;
214 m_meanTuning1(0), 214 m_meanTuning1(0),
215 m_meanTuning2(0), 215 m_meanTuning2(0),
216 m_localTuning0(0), 216 m_localTuning0(0),
217 m_localTuning1(0), 217 m_localTuning1(0),
218 m_localTuning2(0), 218 m_localTuning2(0),
219 m_paling(0.8), 219 m_paling(1.0),
220 m_preset(0.0), 220 m_preset(0.0),
221 m_localTuning(0), 221 m_localTuning(0),
222 m_kernelValue(0), 222 m_kernelValue(0),
223 m_kernelFftIndex(0), 223 m_kernelFftIndex(0),
224 m_kernelNoteIndex(0), 224 m_kernelNoteIndex(0),
256 string 256 string
257 NNLSChroma::getDescription() const 257 NNLSChroma::getDescription() const
258 { 258 {
259 // Return something helpful here! 259 // Return something helpful here!
260 if (debug_on) cerr << "--> getDescription" << endl; 260 if (debug_on) cerr << "--> getDescription" << endl;
261 return ""; 261 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
262 } 262 }
263 263
264 string 264 string
265 NNLSChroma::getMaker() const 265 NNLSChroma::getMaker() const
266 { 266 {
329 NNLSChroma::ParameterList 329 NNLSChroma::ParameterList
330 NNLSChroma::getParameterDescriptors() const 330 NNLSChroma::getParameterDescriptors() const
331 { 331 {
332 if (debug_on) cerr << "--> getParameterDescriptors" << endl; 332 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
333 ParameterList list; 333 ParameterList list;
334
335 ParameterDescriptor d0;
336 d0.identifier = "notedict";
337 d0.name = "note dictionary";
338 d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
339 d0.unit = "";
340 d0.minValue = 0;
341 d0.maxValue = 1;
342 d0.defaultValue = 0;
343 d0.isQuantized = true;
344 d0.valueNames.push_back("s = 0.6");
345 // d0.valueNames.push_back("s = 0.9");
346 // d0.valueNames.push_back("s linearly spaced");
347 d0.valueNames.push_back("no NNLS");
348 d0.quantizeStep = 1.0;
349 list.push_back(d0);
350
351 ParameterDescriptor d1;
352 d1.identifier = "tuningmode";
353 d1.name = "tuning mode";
354 d1.description = "Tuning can be performed locally or on the whole extraction segment.";
355 d1.unit = "";
356 d1.minValue = 0;
357 d1.maxValue = 1;
358 d1.defaultValue = 1;
359 d1.isQuantized = true;
360 d1.valueNames.push_back("global tuning");
361 d1.valueNames.push_back("local tuning");
362 d1.quantizeStep = 1.0;
363 list.push_back(d1);
364
365 ParameterDescriptor d2;
366 d2.identifier = "paling";
367 d2.name = "spectral paling";
368 d2.description = "Spectral paling: no paling - 0; whitening - 1.";
369 d2.unit = "";
370 d2.isQuantized = true;
371 d2.quantizeStep = 0.1;
372 d2.minValue = 0.0;
373 d2.maxValue = 1.0;
374 d2.defaultValue = 0.5;
375 // d2.isQuantized = false;
376 list.push_back(d2);
377 334
378 ParameterDescriptor d3; 335 ParameterDescriptor d3;
379 d3.identifier = "preset"; 336 d3.identifier = "preset";
380 d3.name = "preset"; 337 d3.name = "preset";
381 d3.description = "Spectral paling: no paling - 0; whitening - 1."; 338 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
382 d3.unit = ""; 339 d3.unit = "";
383 d3.isQuantized = true; 340 d3.isQuantized = true;
384 d3.quantizeStep = 1; 341 d3.quantizeStep = 1;
385 d3.minValue = 0.0; 342 d3.minValue = 0.0;
386 d3.maxValue = 2.0; 343 d3.maxValue = 3.0;
387 d3.defaultValue = 0.0; 344 d3.defaultValue = 0.0;
388 d3.valueNames.push_back("polyphonic pop"); 345 d3.valueNames.push_back("polyphonic pop");
389 d3.valueNames.push_back("polyphonic pop (fast)"); 346 d3.valueNames.push_back("polyphonic pop (fast)");
390 d3.valueNames.push_back("solo keyboard"); 347 d3.valueNames.push_back("solo keyboard");
391 d3.valueNames.push_back("manual"); 348 d3.valueNames.push_back("manual");
392 list.push_back(d3); 349 list.push_back(d3);
350
351 // ParameterDescriptor d0;
352 // d0.identifier = "notedict";
353 // d0.name = "note dictionary";
354 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
355 // d0.unit = "";
356 // d0.minValue = 0;
357 // d0.maxValue = 1;
358 // d0.defaultValue = 0;
359 // d0.isQuantized = true;
360 // d0.valueNames.push_back("s = 0.6");
361 // d0.valueNames.push_back("no NNLS");
362 // d0.quantizeStep = 1.0;
363 // list.push_back(d0);
364
365 ParameterDescriptor d1;
366 d1.identifier = "tuningmode";
367 d1.name = "tuning mode";
368 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
369 d1.unit = "";
370 d1.minValue = 0;
371 d1.maxValue = 1;
372 d1.defaultValue = 0;
373 d1.isQuantized = true;
374 d1.valueNames.push_back("global tuning");
375 d1.valueNames.push_back("local tuning");
376 d1.quantizeStep = 1.0;
377 list.push_back(d1);
378
379 // ParameterDescriptor d2;
380 // d2.identifier = "paling";
381 // d2.name = "spectral paling";
382 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
383 // d2.unit = "";
384 // d2.isQuantized = true;
385 // // d2.quantizeStep = 0.1;
386 // d2.minValue = 0.0;
387 // d2.maxValue = 1.0;
388 // d2.defaultValue = 1.0;
389 // d2.isQuantized = false;
390 // list.push_back(d2);
391
393 return list; 392 return list;
394 } 393 }
395 394
396 float 395 float
397 NNLSChroma::getParameter(string identifier) const 396 NNLSChroma::getParameter(string identifier) const
649 // list.push_back(d9); 648 // list.push_back(d9);
650 // 649 //
651 OutputDescriptor d10; 650 OutputDescriptor d10;
652 d10.identifier = "localtuning"; 651 d10.identifier = "localtuning";
653 d10.name = "Local tuning"; 652 d10.name = "Local tuning";
654 d10.description = ""; 653 d10.description = "Tuning based on the history up to this timestamp.";
655 d10.unit = "Hz"; 654 d10.unit = "Hz";
656 d10.hasFixedBinCount = true; 655 d10.hasFixedBinCount = true;
657 d10.binCount = 1; 656 d10.binCount = 1;
658 d10.hasKnownExtents = true; 657 d10.hasKnownExtents = true;
659 d10.minValue = 427.47; 658 d10.minValue = 427.47;
679 channels > getMaxChannelCount()) return false; 678 channels > getMaxChannelCount()) return false;
680 m_blockSize = blockSize; 679 m_blockSize = blockSize;
681 m_stepSize = stepSize; 680 m_stepSize = stepSize;
682 frameCount = 0; 681 frameCount = 0;
683 int tempn = 256 * m_blockSize/2; 682 int tempn = 256 * m_blockSize/2;
684 cerr << "length of tempkernel : " << tempn << endl; 683 // cerr << "length of tempkernel : " << tempn << endl;
685 float *tempkernel; 684 float *tempkernel;
686 685
687 tempkernel = new float[tempn]; 686 tempkernel = new float[tempn];
688 687
689 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); 688 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
701 m_kernelFftIndex.push_back(iFFT); 700 m_kernelFftIndex.push_back(iFFT);
702 m_kernelNoteIndex.push_back(iNote); 701 m_kernelNoteIndex.push_back(iNote);
703 } 702 }
704 } 703 }
705 } 704 }
706 cerr << "nonzero count : " << countNonzero << endl; 705 // cerr << "nonzero count : " << countNonzero << endl;
707 delete [] tempkernel; 706 delete [] tempkernel;
708 ofstream myfile; 707 ofstream myfile;
709 myfile.open ("matrix.txt"); 708 myfile.open ("matrix.txt");
710 // myfile << "Writing this to a file.\n"; 709 // myfile << "Writing this to a file.\n";
711 for (int i = 0; i < nNote * 84; ++i) { 710 for (int i = 0; i < nNote * 84; ++i) {
716 } 715 }
717 716
718 void 717 void
719 NNLSChroma::reset() 718 NNLSChroma::reset()
720 { 719 {
721 if (debug_on) cerr << "--> reset"; 720 if (debug_on) cerr << "--> reset";
721
722 // Clear buffers, reset stored values, etc 722 // Clear buffers, reset stored values, etc
723 frameCount = 0; 723 frameCount = 0;
724 m_dictID = 0; 724 m_dictID = 0;
725 m_kernelValue.clear(); 725 m_fl.clear();
726 m_kernelFftIndex.clear(); 726 m_meanTuning0 = 0;
727 m_kernelNoteIndex.clear(); 727 m_meanTuning1 = 0;
728 m_meanTuning2 = 0;
729 m_localTuning0 = 0;
730 m_localTuning1 = 0;
731 m_localTuning2 = 0;
732 m_localTuning.clear();
728 } 733 }
729 734
730 NNLSChroma::FeatureSet 735 NNLSChroma::FeatureSet
731 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 736 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
732 { 737 {
733 if (debug_on) cerr << "--> process" << endl; 738 if (debug_on) cerr << "--> process" << endl;
734 // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix 739
735
736 frameCount++; 740 frameCount++;
737 float *magnitude = new float[m_blockSize/2]; 741 float *magnitude = new float[m_blockSize/2];
738 742
739 Feature f10; // local tuning 743 Feature f10; // local tuning
740 f10.hasTimestamp = true; 744 f10.hasTimestamp = true;
741 f10.timestamp = timestamp - Vamp::RealTime::fromSeconds(0); 745 f10.timestamp = timestamp;
742 const float *fbuf = inputBuffers[0]; 746 const float *fbuf = inputBuffers[0];
743 747
744 // make magnitude 748 // make magnitude
745 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { 749 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
746 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + 750 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
747 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); 751 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
748 // magnitude[iBin] = (iBin == frameCount - 1 || frameCount < 2) ? 1.0 : 0.0; 752 }
749 } 753
750
751
752 // note magnitude mapping using pre-calculated matrix 754 // note magnitude mapping using pre-calculated matrix
753 float *nm = new float[nNote]; // note magnitude 755 float *nm = new float[nNote]; // note magnitude
754 for (size_t iNote = 0; iNote < nNote; iNote++) { 756 for (size_t iNote = 0; iNote < nNote; iNote++) {
755 nm[iNote] = 0; // initialise as 0 757 nm[iNote] = 0; // initialise as 0
756 } 758 }
812 } 814 }
813 815
814 NNLSChroma::FeatureSet 816 NNLSChroma::FeatureSet
815 NNLSChroma::getRemainingFeatures() 817 NNLSChroma::getRemainingFeatures()
816 { 818 {
817 if (debug_on) cerr << "--> getRemainingFeatures" << endl; 819 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
818 FeatureSet fsOut; 820 FeatureSet fsOut;
821 if (m_fl.size() == 0) return fsOut;
819 // 822 //
820 /** Calculate Tuning 823 /** Calculate Tuning
821 calculate tuning from (using the angle of the complex number defined by the 824 calculate tuning from (using the angle of the complex number defined by the
822 cumulative mean real and imag values) 825 cumulative mean real and imag values)
823 **/ 826 **/
864 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this 867 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
865 } 868 }
866 869
867 // cerr << intShift << " " << intFactor << endl; 870 // cerr << intShift << " " << intFactor << endl;
868 871
869 for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins 872 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
870 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; 873 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
871 f2.values.push_back(tempValue); 874 f2.values.push_back(tempValue);
872 } 875 }
873 876
874 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge 877 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
1033 vector<int> chordSequence; 1036 vector<int> chordSequence;
1034 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram 1037 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
1035 vector<int> temp = vector<int>(nChord,0); 1038 vector<int> temp = vector<int>(nChord,0);
1036 scoreChordogram.push_back(temp); 1039 scoreChordogram.push_back(temp);
1037 } 1040 }
1038 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end()-2*halfwindowlength-1; ++it) { 1041 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
1039 int startIndex = count + 1; 1042 int startIndex = count + 1;
1040 int endIndex = count + 2 * halfwindowlength; 1043 int endIndex = count + 2 * halfwindowlength;
1041 vector<float> temp = vector<float>(nChord,0); 1044 vector<float> temp = vector<float>(nChord,0);
1042 float maxval = 0; // will be the value of the most salient chord in this frame 1045 float maxval = 0; // will be the value of the most salient chord in this frame
1043 float maxindex = nChord-1; //... and the index thereof 1046 float maxindex = 0; //... and the index thereof
1044 unsigned bestchordL = 0; // index of the best "left" chord 1047 unsigned bestchordL = 0; // index of the best "left" chord
1045 unsigned bestchordR = 0; // index of the best "right" chord 1048 unsigned bestchordR = 0; // index of the best "right" chord
1046 for (unsigned iWF = 1; iWF < 2*halfwindowlength; ++iWF) { 1049 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
1047 // now find the max values on both sides of iWF 1050 // now find the max values on both sides of iWF
1048 // left side: 1051 // left side:
1049 float maxL = 0; 1052 float maxL = 0;
1050 unsigned maxindL = nChord-1; 1053 unsigned maxindL = nChord-1;
1051 for (unsigned iChord = 0; iChord < nChord; iChord++) { 1054 for (unsigned iChord = 0; iChord < nChord; iChord++) {
1098 float maxindex = 0; //... and the index thereof 1101 float maxindex = 0; //... and the index thereof
1099 for (unsigned iChord = 0; iChord < nChord; iChord++) { 1102 for (unsigned iChord = 0; iChord < nChord; iChord++) {
1100 if (scoreChordogram[count][iChord] > maxval) { 1103 if (scoreChordogram[count][iChord] > maxval) {
1101 maxval = scoreChordogram[count][iChord]; 1104 maxval = scoreChordogram[count][iChord];
1102 maxindex = iChord; 1105 maxindex = iChord;
1103 cerr << iChord << endl; 1106 // cerr << iChord << endl;
1104 } 1107 }
1105 } 1108 }
1106 chordSequence.push_back(maxindex); 1109 chordSequence.push_back(maxindex);
1107 cerr << "before modefilter, maxindex: " << maxindex << endl; 1110 // cerr << "before modefilter, maxindex: " << maxindex << endl;
1108 count++; 1111 count++;
1109 } 1112 }
1110 1113
1111 1114
1112 // mode filter on chordSequence 1115 // mode filter on chordSequence
1118 f7.hasTimestamp = true; 1121 f7.hasTimestamp = true;
1119 f7.timestamp = f6.timestamp; 1122 f7.timestamp = f6.timestamp;
1120 vector<int> chordCount = vector<int>(nChord,0); 1123 vector<int> chordCount = vector<int>(nChord,0);
1121 int maxChordCount = 0; 1124 int maxChordCount = 0;
1122 int maxChordIndex = nChord-1; 1125 int maxChordIndex = nChord-1;
1123 // int startIndex = max(count - halfwindowlength,0); 1126 int startIndex = max(count - halfwindowlength/2,0);
1124 // int endIndex = min(int(chordogram.size()), startIndex + halfwindowlength); 1127 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
1125 // for (int i = startIndex; i < endIndex; i++) { 1128 for (int i = startIndex; i < endIndex; i++) {
1126 // chordCount[chordSequence[i]]++; 1129 chordCount[chordSequence[i]]++;
1127 // if (chordCount[chordSequence[i]] > maxChordCount) { 1130 if (chordCount[chordSequence[i]] > maxChordCount) {
1128 // maxChordCount++; 1131 cerr << "start index " << startIndex << endl;
1129 // maxChordIndex = chordSequence[i]; 1132 maxChordCount++;
1130 // } 1133 maxChordIndex = chordSequence[i];
1131 // } 1134 }
1132 maxChordIndex = chordSequence[count]; 1135 }
1136 // chordSequence[count] = maxChordIndex;
1137 cerr << maxChordIndex << endl;
1133 if (oldChordIndex != maxChordIndex) { 1138 if (oldChordIndex != maxChordIndex) {
1134 oldChordIndex = maxChordIndex; 1139 oldChordIndex = maxChordIndex;
1135 1140
1136 char buffer1 [50]; 1141 char buffer1 [50];
1137 if (maxChordIndex < nChord - 1) { 1142 if (maxChordIndex < nChord - 1) {