Mercurial > hg > nnls-chroma
comparison Chordino.cpp @ 164:3c731acad404
Fix some problems from the plugin tester: zero sample rate with fixed sample type outputs; mismatching parameter id ("spectralshape" in Chordino, "s" in NNLSBase -- changing this in Chordino won't break anything as it would never have worked under that name anyway); some NaN values
author | Chris Cannam |
---|---|
date | Fri, 04 Sep 2015 16:45:37 +0100 |
parents | 59b26f52550d |
children | 25db325b6cf9 |
comparison
equal
deleted
inserted
replaced
163:59b26f52550d | 164:3c731acad404 |
---|---|
119 whiteningParam.defaultValue = 1.0; | 119 whiteningParam.defaultValue = 1.0; |
120 whiteningParam.isQuantized = false; | 120 whiteningParam.isQuantized = false; |
121 list.push_back(whiteningParam); | 121 list.push_back(whiteningParam); |
122 | 122 |
123 ParameterDescriptor spectralShapeParam; | 123 ParameterDescriptor spectralShapeParam; |
124 spectralShapeParam.identifier = "spectralshape"; | 124 spectralShapeParam.identifier = "s"; |
125 spectralShapeParam.name = "spectral shape"; | 125 spectralShapeParam.name = "spectral shape"; |
126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; | 126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; |
127 spectralShapeParam.unit = ""; | 127 spectralShapeParam.unit = ""; |
128 spectralShapeParam.minValue = 0.5; | 128 spectralShapeParam.minValue = 0.5; |
129 spectralShapeParam.maxValue = 0.9; | 129 spectralShapeParam.maxValue = 0.9; |
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl; | 165 if (debug_on) cerr << "--> getOutputDescriptors" << endl; |
166 OutputList list; | 166 OutputList list; |
167 | 167 |
168 int index = 0; | 168 int index = 0; |
169 | 169 |
170 float featureRate = | |
171 (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | |
172 | |
170 OutputDescriptor d7; | 173 OutputDescriptor d7; |
171 d7.identifier = "simplechord"; | 174 d7.identifier = "simplechord"; |
172 d7.name = "Chord Estimate"; | 175 d7.name = "Chord Estimate"; |
173 d7.description = "Estimated chord times and labels."; | 176 d7.description = "Estimated chord times and labels."; |
174 d7.unit = ""; | 177 d7.unit = ""; |
176 d7.binCount = 0; | 179 d7.binCount = 0; |
177 d7.hasKnownExtents = false; | 180 d7.hasKnownExtents = false; |
178 d7.isQuantized = false; | 181 d7.isQuantized = false; |
179 d7.sampleType = OutputDescriptor::VariableSampleRate; | 182 d7.sampleType = OutputDescriptor::VariableSampleRate; |
180 d7.hasDuration = false; | 183 d7.hasDuration = false; |
181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 184 d7.sampleRate = featureRate; |
182 list.push_back(d7); | 185 list.push_back(d7); |
183 m_outputChords = index++; | 186 m_outputChords = index++; |
184 | 187 |
185 OutputDescriptor chordnotes; | 188 OutputDescriptor chordnotes; |
186 chordnotes.identifier = "chordnotes"; | 189 chordnotes.identifier = "chordnotes"; |
194 chordnotes.maxValue = 127; | 197 chordnotes.maxValue = 127; |
195 chordnotes.isQuantized = true; | 198 chordnotes.isQuantized = true; |
196 chordnotes.quantizeStep = 1; | 199 chordnotes.quantizeStep = 1; |
197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate; | 200 chordnotes.sampleType = OutputDescriptor::VariableSampleRate; |
198 chordnotes.hasDuration = true; | 201 chordnotes.hasDuration = true; |
199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 202 chordnotes.sampleRate = featureRate; |
200 list.push_back(chordnotes); | 203 list.push_back(chordnotes); |
201 m_outputChordnotes = index++; | 204 m_outputChordnotes = index++; |
202 | 205 |
203 OutputDescriptor d8; | 206 OutputDescriptor d8; |
204 d8.identifier = "harmonicchange"; | 207 d8.identifier = "harmonicchange"; |
208 d8.hasFixedBinCount = true; | 211 d8.hasFixedBinCount = true; |
209 d8.binCount = 1; | 212 d8.binCount = 1; |
210 d8.hasKnownExtents = false; | 213 d8.hasKnownExtents = false; |
211 d8.isQuantized = false; | 214 d8.isQuantized = false; |
212 d8.sampleType = OutputDescriptor::FixedSampleRate; | 215 d8.sampleType = OutputDescriptor::FixedSampleRate; |
216 d8.sampleRate = featureRate; | |
213 d8.hasDuration = false; | 217 d8.hasDuration = false; |
214 list.push_back(d8); | 218 list.push_back(d8); |
215 m_outputHarmonicChange = index++; | 219 m_outputHarmonicChange = index++; |
216 | 220 |
217 OutputDescriptor loglikelihood; | 221 OutputDescriptor loglikelihood; |
222 loglikelihood.hasFixedBinCount = true; | 226 loglikelihood.hasFixedBinCount = true; |
223 loglikelihood.binCount = 1; | 227 loglikelihood.binCount = 1; |
224 loglikelihood.hasKnownExtents = false; | 228 loglikelihood.hasKnownExtents = false; |
225 loglikelihood.isQuantized = false; | 229 loglikelihood.isQuantized = false; |
226 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate; | 230 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate; |
231 loglikelihood.sampleRate = featureRate; | |
227 loglikelihood.hasDuration = false; | 232 loglikelihood.hasDuration = false; |
228 list.push_back(loglikelihood); | 233 list.push_back(loglikelihood); |
229 m_outputLoglikelihood = index++; | 234 m_outputLoglikelihood = index++; |
230 | 235 |
231 return list; | 236 return list; |
366 vector<float> chordchange = vector<float>(tunedSpec.size(),0); | 371 vector<float> chordchange = vector<float>(tunedSpec.size(),0); |
367 count = 0; | 372 count = 0; |
368 | 373 |
369 FeatureList chromaList; | 374 FeatureList chromaList; |
370 | 375 |
371 | 376 bool clipwarned = false; |
372 | 377 |
373 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) { | 378 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) { |
374 Feature currentTunedSpec = *it; // logfreq spectrum | 379 Feature currentTunedSpec = *it; // logfreq spectrum |
375 Feature currentChromas; // treble and bass chromagram | 380 Feature currentChromas; // treble and bass chromagram |
376 | 381 |
447 } | 452 } |
448 | 453 |
449 vector<float> origchroma = chroma; | 454 vector<float> origchroma = chroma; |
450 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas | 455 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas |
451 currentChromas.values = chroma; | 456 currentChromas.values = chroma; |
452 | 457 |
453 if (m_doNormalizeChroma > 0) { | 458 if (m_doNormalizeChroma > 0) { |
454 vector<float> chromanorm = vector<float>(3,0); | 459 vector<float> chromanorm = vector<float>(3,0); |
455 switch (int(m_doNormalizeChroma)) { | 460 switch (int(m_doNormalizeChroma)) { |
456 case 0: // should never end up here | 461 case 0: // should never end up here |
457 break; | 462 break; |
487 double sumchordvalue = 0; | 492 double sumchordvalue = 0; |
488 | 493 |
489 for (int iChord = 0; iChord < nChord; iChord++) { | 494 for (int iChord = 0; iChord < nChord; iChord++) { |
490 tempchordvalue = 0; | 495 tempchordvalue = 0; |
491 for (int iBin = 0; iBin < 12; iBin++) { | 496 for (int iBin = 0; iBin < 12; iBin++) { |
492 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | 497 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; |
493 } | 498 } |
494 for (int iBin = 12; iBin < 24; iBin++) { | 499 for (int iBin = 12; iBin < 24; iBin++) { |
495 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | 500 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; |
496 } | 501 } |
497 if (iChord == nChord-1) tempchordvalue *= .7; | 502 if (iChord == nChord-1) tempchordvalue *= .7; |
498 if (tempchordvalue < 0) tempchordvalue = 0.0; | 503 if (tempchordvalue < 0) tempchordvalue = 0.0; |
499 tempchordvalue = pow(1.3,tempchordvalue); | 504 if (tempchordvalue > 20.0) { |
500 sumchordvalue+=tempchordvalue; | 505 if (!clipwarned) { |
506 cerr << "WARNING: interim chroma contains extreme chord value " << tempchordvalue << ", clipping this and any others that appear" << endl; | |
507 clipwarned = true; | |
508 } | |
509 tempchordvalue = 10.0; | |
510 } | |
511 tempchordvalue = pow(1.3, tempchordvalue); | |
512 sumchordvalue += tempchordvalue; | |
501 currentChordSalience.push_back(tempchordvalue); | 513 currentChordSalience.push_back(tempchordvalue); |
502 } | 514 } |
503 if (sumchordvalue > 0) { | 515 if (sumchordvalue > 0) { |
504 for (int iChord = 0; iChord < nChord; iChord++) { | 516 for (int iChord = 0; iChord < nChord; iChord++) { |
505 currentChordSalience[iChord] /= sumchordvalue; | 517 currentChordSalience[iChord] /= sumchordvalue; |
566 oldnotes.push_back(chordnote_feature); | 578 oldnotes.push_back(chordnote_feature); |
567 } | 579 } |
568 } | 580 } |
569 /* calculating simple chord change prob */ | 581 /* calculating simple chord change prob */ |
570 for (int iChord = 0; iChord < nChord; iChord++) { | 582 for (int iChord = 0; iChord < nChord; iChord++) { |
571 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]); | 583 double num = delta[(iFrame-1) * nChord + iChord]; |
584 double denom = delta[iFrame * nChord + iChord]; | |
585 double eps = 1e-7; | |
586 if (denom < eps) denom = eps; | |
587 chordchange[iFrame-1] += num * log(num / denom + eps); | |
572 } | 588 } |
573 } | 589 } |
574 | 590 |
575 float logscale = 0; | 591 float logscale = 0; |
576 for (int iFrame = 0; iFrame < nFrame; ++iFrame) { | 592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) { |
599 for (int iFrame = 0; iFrame < nFrame; iFrame++) { | 615 for (int iFrame = 0; iFrame < nFrame; iFrame++) { |
600 Feature chordchange_feature; | 616 Feature chordchange_feature; |
601 chordchange_feature.hasTimestamp = true; | 617 chordchange_feature.hasTimestamp = true; |
602 chordchange_feature.timestamp = timestamps[iFrame]; | 618 chordchange_feature.timestamp = timestamps[iFrame]; |
603 chordchange_feature.values.push_back(chordchange[iFrame]); | 619 chordchange_feature.values.push_back(chordchange[iFrame]); |
604 // cerr << chordchange[iFrame] << endl; | 620 // cerr << "putting value " << chordchange[iFrame] << " at time " << chordchange_feature.timestamp << endl; |
605 fsOut[m_outputHarmonicChange].push_back(chordchange_feature); | 621 fsOut[m_outputHarmonicChange].push_back(chordchange_feature); |
606 } | 622 } |
607 | 623 |
608 free(delta); | 624 free(delta); |
609 | 625 |