Mercurial > hg > nnls-chroma
comparison Chordino.cpp @ 91:b56dde3417d4 matthiasm-plugin
* Fix the "comparison between signed and unsigned" warnings; remove some ifdef'd-out old code
author | Chris Cannam |
---|---|
date | Thu, 02 Dec 2010 13:05:23 +0000 |
parents | 7af5312e66f8 |
children | a76598852303 |
comparison
equal
deleted
inserted
replaced
90:b095d83585c9 | 91:b56dde3417d4 |
---|---|
280 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); | 280 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); |
281 | 281 |
282 | 282 |
283 /** Tune Log-Frequency Spectrogram | 283 /** Tune Log-Frequency Spectrogram |
284 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to | 284 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to |
285 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum). | 285 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum). |
286 **/ | 286 **/ |
287 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... "; | 287 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... "; |
288 | 288 |
289 float tempValue = 0; | 289 float tempValue = 0; |
290 float dbThreshold = 0; // relative to the background spectrum | 290 float dbThreshold = 0; // relative to the background spectrum |
296 int nFrame = m_logSpectrum.size(); | 296 int nFrame = m_logSpectrum.size(); |
297 | 297 |
298 vector<Vamp::RealTime> timestamps; | 298 vector<Vamp::RealTime> timestamps; |
299 | 299 |
300 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { | 300 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { |
301 Feature currentLogSpectum = *i; | 301 Feature currentLogSpectrum = *i; |
302 Feature currentTunedSpec; // tuned log-frequency spectrum | 302 Feature currentTunedSpec; // tuned log-frequency spectrum |
303 currentTunedSpec.hasTimestamp = true; | 303 currentTunedSpec.hasTimestamp = true; |
304 currentTunedSpec.timestamp = currentLogSpectum.timestamp; | 304 currentTunedSpec.timestamp = currentLogSpectrum.timestamp; |
305 timestamps.push_back(currentLogSpectum.timestamp); | 305 timestamps.push_back(currentLogSpectrum.timestamp); |
306 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero | 306 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero |
307 | 307 |
308 if (m_tuneLocal) { | 308 if (m_tuneLocal) { |
309 intShift = floor(m_localTuning[count] * 3); | 309 intShift = floor(m_localTuning[count] * 3); |
310 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this | 310 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this |
311 } | 311 } |
312 | 312 |
313 // cerr << intShift << " " << floatShift << endl; | 313 // cerr << intShift << " " << floatShift << endl; |
314 | 314 |
315 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins | 315 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins |
316 tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift; | 316 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift; |
317 currentTunedSpec.values.push_back(tempValue); | 317 currentTunedSpec.values.push_back(tempValue); |
318 } | 318 } |
319 | 319 |
320 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge | 320 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge |
321 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw); | 321 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw); |
386 // here's where the non-negative least squares algorithm calculates the note activation x | 386 // here's where the non-negative least squares algorithm calculates the note activation x |
387 | 387 |
388 vector<float> chroma = vector<float>(12, 0); | 388 vector<float> chroma = vector<float>(12, 0); |
389 vector<float> basschroma = vector<float>(12, 0); | 389 vector<float> basschroma = vector<float>(12, 0); |
390 float currval; | 390 float currval; |
391 unsigned iSemitone = 0; | 391 int iSemitone = 0; |
392 | 392 |
393 if (some_b_greater_zero) { | 393 if (some_b_greater_zero) { |
394 if (m_useNNLS == 0) { | 394 if (m_useNNLS == 0) { |
395 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { | 395 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { |
396 currval = 0; | 396 currval = 0; |
397 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { | 397 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { |
398 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); | 398 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); |
399 } | 399 } |
400 chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; | 400 chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; |
406 float x[84+1000]; | 406 float x[84+1000]; |
407 for (int i = 1; i < 1084; ++i) x[i] = 1.0; | 407 for (int i = 1; i < 1084; ++i) x[i] = 1.0; |
408 vector<int> signifIndex; | 408 vector<int> signifIndex; |
409 int index=0; | 409 int index=0; |
410 sumb /= 84.0; | 410 sumb /= 84.0; |
411 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { | 411 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { |
412 float currval = 0; | 412 float currval = 0; |
413 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { | 413 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { |
414 currval += b[iNote + iBPS]; | 414 currval += b[iNote + iBPS]; |
415 } | 415 } |
416 if (currval > 0) signifIndex.push_back(index); | 416 if (currval > 0) signifIndex.push_back(index); |
422 int indx[84+1000]; | 422 int indx[84+1000]; |
423 int mode; | 423 int mode; |
424 int dictsize = nNote*signifIndex.size(); | 424 int dictsize = nNote*signifIndex.size(); |
425 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; | 425 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; |
426 float *curr_dict = new float[dictsize]; | 426 float *curr_dict = new float[dictsize]; |
427 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | 427 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { |
428 for (unsigned iBin = 0; iBin < nNote; iBin++) { | 428 for (int iBin = 0; iBin < nNote; iBin++) { |
429 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin]; | 429 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin]; |
430 } | 430 } |
431 } | 431 } |
432 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); | 432 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); |
433 delete [] curr_dict; | 433 delete [] curr_dict; |
434 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | 434 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { |
435 // cerr << mode << endl; | 435 // cerr << mode << endl; |
436 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; | 436 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; |
437 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; | 437 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; |
438 } | 438 } |
439 } | 439 } |
464 } | 464 } |
465 chromanorm[2] = sqrt(chromanorm[2]); | 465 chromanorm[2] = sqrt(chromanorm[2]); |
466 break; | 466 break; |
467 } | 467 } |
468 if (chromanorm[2] > 0) { | 468 if (chromanorm[2] > 0) { |
469 for (int i = 0; i < chroma.size(); i++) { | 469 for (int i = 0; i < (int)chroma.size(); i++) { |
470 currentChromas.values[i] /= chromanorm[2]; | 470 currentChromas.values[i] /= chromanorm[2]; |
471 } | 471 } |
472 } | 472 } |
473 } | 473 } |
474 | 474 |
534 chord_feature.timestamp = timestamps[0]; | 534 chord_feature.timestamp = timestamps[0]; |
535 chord_feature.label = m_chordnames[chordpath[0]]; | 535 chord_feature.label = m_chordnames[chordpath[0]]; |
536 fsOut[m_outputChords].push_back(chord_feature); | 536 fsOut[m_outputChords].push_back(chord_feature); |
537 | 537 |
538 chordchange[0] = 0; | 538 chordchange[0] = 0; |
539 for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) { | 539 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) { |
540 // cerr << chordpath[iFrame] << endl; | 540 // cerr << chordpath[iFrame] << endl; |
541 if (chordpath[iFrame] != oldchord ) { | 541 if (chordpath[iFrame] != oldchord ) { |
542 // chord | 542 // chord |
543 Feature chord_feature; // chord estimate | 543 Feature chord_feature; // chord estimate |
544 chord_feature.hasTimestamp = true; | 544 chord_feature.hasTimestamp = true; |
545 chord_feature.timestamp = timestamps[iFrame]; | 545 chord_feature.timestamp = timestamps[iFrame]; |
546 chord_feature.label = m_chordnames[chordpath[iFrame]]; | 546 chord_feature.label = m_chordnames[chordpath[iFrame]]; |
547 fsOut[m_outputChords].push_back(chord_feature); | 547 fsOut[m_outputChords].push_back(chord_feature); |
548 oldchord = chordpath[iFrame]; | 548 oldchord = chordpath[iFrame]; |
549 // chord notes | 549 // chord notes |
550 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord | 550 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord |
551 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame]; | 551 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame]; |
552 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); | 552 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); |
553 } | 553 } |
554 oldnotes.clear(); | 554 oldnotes.clear(); |
555 for (int iNote = 0; iNote < m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord | 555 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord |
556 Feature chordnote_feature; | 556 Feature chordnote_feature; |
557 chordnote_feature.hasTimestamp = true; | 557 chordnote_feature.hasTimestamp = true; |
558 chordnote_feature.timestamp = timestamps[iFrame]; | 558 chordnote_feature.timestamp = timestamps[iFrame]; |
559 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]); | 559 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]); |
560 chordnote_feature.hasDuration = true; | 560 chordnote_feature.hasDuration = true; |
587 int endIndex = count + 2 * halfwindowlength; | 587 int endIndex = count + 2 * halfwindowlength; |
588 | 588 |
589 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); | 589 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); |
590 | 590 |
591 vector<int> chordCandidates; | 591 vector<int> chordCandidates; |
592 for (unsigned iChord = 0; iChord < nChord-1; iChord++) { | 592 for (int iChord = 0; iChord+1 < nChord; iChord++) { |
593 // float currsum = 0; | 593 // float currsum = 0; |
594 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | 594 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) { |
595 // currsum += chordogram[iFrame][iChord]; | 595 // currsum += chordogram[iFrame][iChord]; |
596 // } | 596 // } |
597 // if (currsum > chordThreshold) chordCandidates.push_back(iChord); | 597 // if (currsum > chordThreshold) chordCandidates.push_back(iChord); |
598 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | 598 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) { |
599 if (chordogram[iFrame][iChord] > chordThreshold) { | 599 if (chordogram[iFrame][iChord] > chordThreshold) { |
600 chordCandidates.push_back(iChord); | 600 chordCandidates.push_back(iChord); |
601 break; | 601 break; |
602 } | 602 } |
603 } | 603 } |
605 chordCandidates.push_back(nChord-1); | 605 chordCandidates.push_back(nChord-1); |
606 // cerr << chordCandidates.size() << endl; | 606 // cerr << chordCandidates.size() << endl; |
607 | 607 |
608 float maxval = 0; // will be the value of the most salient *chord change* in this frame | 608 float maxval = 0; // will be the value of the most salient *chord change* in this frame |
609 float maxindex = 0; //... and the index thereof | 609 float maxindex = 0; //... and the index thereof |
610 unsigned bestchordL = nChord-1; // index of the best "left" chord | 610 int bestchordL = nChord-1; // index of the best "left" chord |
611 unsigned bestchordR = nChord-1; // index of the best "right" chord | 611 int bestchordR = nChord-1; // index of the best "right" chord |
612 | 612 |
613 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { | 613 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { |
614 // now find the max values on both sides of iWF | 614 // now find the max values on both sides of iWF |
615 // left side: | 615 // left side: |
616 float maxL = 0; | 616 float maxL = 0; |
617 unsigned maxindL = nChord-1; | 617 int maxindL = nChord-1; |
618 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | 618 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) { |
619 unsigned iChord = chordCandidates[kChord]; | 619 int iChord = chordCandidates[kChord]; |
620 float currsum = 0; | 620 float currsum = 0; |
621 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { | 621 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) { |
622 currsum += chordogram[count+iFrame][iChord]; | 622 currsum += chordogram[count+iFrame][iChord]; |
623 } | 623 } |
624 if (iChord == nChord-1) currsum *= 0.8; | 624 if (iChord == nChord-1) currsum *= 0.8; |
625 if (currsum > maxL) { | 625 if (currsum > maxL) { |
626 maxL = currsum; | 626 maxL = currsum; |
627 maxindL = iChord; | 627 maxindL = iChord; |
628 } | 628 } |
629 } | 629 } |
630 // right side: | 630 // right side: |
631 float maxR = 0; | 631 float maxR = 0; |
632 unsigned maxindR = nChord-1; | 632 int maxindR = nChord-1; |
633 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | 633 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) { |
634 unsigned iChord = chordCandidates[kChord]; | 634 int iChord = chordCandidates[kChord]; |
635 float currsum = 0; | 635 float currsum = 0; |
636 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { | 636 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { |
637 currsum += chordogram[count+iFrame][iChord]; | 637 currsum += chordogram[count+iFrame][iChord]; |
638 } | 638 } |
639 if (iChord == nChord-1) currsum *= 0.8; | 639 if (iChord == nChord-1) currsum *= 0.8; |
640 if (currsum > maxR) { | 640 if (currsum > maxR) { |
641 maxR = currsum; | 641 maxR = currsum; |
650 } | 650 } |
651 | 651 |
652 } | 652 } |
653 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; | 653 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; |
654 // add a score to every chord-frame-point that was part of a maximum | 654 // add a score to every chord-frame-point that was part of a maximum |
655 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { | 655 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) { |
656 scoreChordogram[iFrame+count][bestchordL]++; | 656 scoreChordogram[iFrame+count][bestchordL]++; |
657 } | 657 } |
658 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { | 658 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { |
659 scoreChordogram[iFrame+count][bestchordR]++; | 659 scoreChordogram[iFrame+count][bestchordR]++; |
660 } | 660 } |
661 if (bestchordL != bestchordR) { | 661 if (bestchordL != bestchordR) { |
662 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; | 662 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; |
663 } | 663 } |
666 // cerr << "******* agent finished *******" << endl; | 666 // cerr << "******* agent finished *******" << endl; |
667 count = 0; | 667 count = 0; |
668 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { | 668 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { |
669 float maxval = 0; // will be the value of the most salient chord in this frame | 669 float maxval = 0; // will be the value of the most salient chord in this frame |
670 float maxindex = 0; //... and the index thereof | 670 float maxindex = 0; //... and the index thereof |
671 for (unsigned iChord = 0; iChord < nChord; iChord++) { | 671 for (int iChord = 0; iChord < nChord; iChord++) { |
672 if (scoreChordogram[count][iChord] > maxval) { | 672 if (scoreChordogram[count][iChord] > maxval) { |
673 maxval = scoreChordogram[count][iChord]; | 673 maxval = scoreChordogram[count][iChord]; |
674 maxindex = iChord; | 674 maxindex = iChord; |
675 // cerr << iChord << endl; | 675 // cerr << iChord << endl; |
676 } | 676 } |
711 // cerr << chordchange[count] << endl; | 711 // cerr << chordchange[count] << endl; |
712 if (oldChord != maxChord) { | 712 if (oldChord != maxChord) { |
713 oldChord = maxChord; | 713 oldChord = maxChord; |
714 chord_feature.label = m_chordnames[maxChordIndex]; | 714 chord_feature.label = m_chordnames[maxChordIndex]; |
715 fsOut[m_outputChords].push_back(chord_feature); | 715 fsOut[m_outputChords].push_back(chord_feature); |
716 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord | 716 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord |
717 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp; | 717 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp; |
718 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); | 718 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); |
719 } | 719 } |
720 oldnotes.clear(); | 720 oldnotes.clear(); |
721 for (int iNote = 0; iNote < m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord | 721 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord |
722 Feature chordnote_feature; | 722 Feature chordnote_feature; |
723 chordnote_feature.hasTimestamp = true; | 723 chordnote_feature.hasTimestamp = true; |
724 chordnote_feature.timestamp = chord_feature.timestamp; | 724 chordnote_feature.timestamp = chord_feature.timestamp; |
725 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]); | 725 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]); |
726 chordnote_feature.hasDuration = true; | 726 chordnote_feature.hasDuration = true; |
735 chord_feature.hasTimestamp = true; | 735 chord_feature.hasTimestamp = true; |
736 chord_feature.timestamp = timestamps[timestamps.size()-1]; | 736 chord_feature.timestamp = timestamps[timestamps.size()-1]; |
737 chord_feature.label = "N"; | 737 chord_feature.label = "N"; |
738 fsOut[m_outputChords].push_back(chord_feature); | 738 fsOut[m_outputChords].push_back(chord_feature); |
739 | 739 |
740 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord | 740 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord |
741 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1]; | 741 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1]; |
742 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); | 742 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); |
743 } | 743 } |
744 | 744 |
745 cerr << "done." << endl; | 745 cerr << "done." << endl; |