comparison Chordino.cpp @ 131:b547e7238bf5 darwintunes

removed old chord inference
author matthiasm
date Thu, 16 Jun 2011 17:28:22 +0100
parents a5ee5fe71e52
children 5c1a25b3daf0
comparison
equal deleted inserted replaced
130:014e62613e75 131:b547e7238bf5
79 useNNLSParam.maxValue = 1.0; 79 useNNLSParam.maxValue = 1.0;
80 useNNLSParam.defaultValue = 1.0; 80 useNNLSParam.defaultValue = 1.0;
81 useNNLSParam.isQuantized = true; 81 useNNLSParam.isQuantized = true;
82 useNNLSParam.quantizeStep = 1.0; 82 useNNLSParam.quantizeStep = 1.0;
83 list.push_back(useNNLSParam); 83 list.push_back(useNNLSParam);
84
85 ParameterDescriptor useHMMParam;
86 useHMMParam.identifier = "useHMM";
87 useHMMParam.name = "HMM (Viterbi decoding)";
88 useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
89 useHMMParam.unit = "";
90 useHMMParam.minValue = 0.0;
91 useHMMParam.maxValue = 1.0;
92 useHMMParam.defaultValue = 1.0;
93 useHMMParam.isQuantized = true;
94 useHMMParam.quantizeStep = 1.0;
95 list.push_back(useHMMParam);
96 84
97 ParameterDescriptor rollonParam; 85 ParameterDescriptor rollonParam;
98 rollonParam.identifier = "rollon"; 86 rollonParam.identifier = "rollon";
99 rollonParam.name = "bass noise threshold"; 87 rollonParam.name = "bass noise threshold";
100 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed."; 88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
525 } 513 }
526 cerr << "done." << endl; 514 cerr << "done." << endl;
527 515
528 vector<Feature> oldnotes; 516 vector<Feature> oldnotes;
529 517
530 // bool m_useHMM = true; // this will go into the chordino header file. 518 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
531 if (m_useHMM == 1.0) { 519 int oldchord = nChord-1;
532 cerr << "[Chordino Plugin] HMM Chord Estimation ... "; 520 double selftransprob = 0.99;
533 int oldchord = nChord-1; 521
534 double selftransprob = 0.99; 522 // vector<double> init = vector<double>(nChord,1.0/nChord);
535 523 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
536 // vector<double> init = vector<double>(nChord,1.0/nChord); 524
537 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1; 525 double *delta;
538 526 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
539 double *delta; 527
540 delta = (double *)malloc(sizeof(double)*nFrame*nChord); 528 vector<vector<double> > trans;
541 529 for (int iChord = 0; iChord < nChord; iChord++) {
542 vector<vector<double> > trans; 530 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
543 for (int iChord = 0; iChord < nChord; iChord++) { 531 temp[iChord] = selftransprob;
544 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1)); 532 trans.push_back(temp);
545 temp[iChord] = selftransprob; 533 }
546 trans.push_back(temp); 534 vector<double> scale;
547 } 535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
548 vector<double> scale; 536
549 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale); 537
550 538 Feature chord_feature; // chord estimate
551 539 chord_feature.hasTimestamp = true;
552 Feature chord_feature; // chord estimate 540 chord_feature.timestamp = timestamps[0];
553 chord_feature.hasTimestamp = true; 541 chord_feature.label = m_chordnames[chordpath[0]];
554 chord_feature.timestamp = timestamps[0]; 542 fsOut[m_outputChords].push_back(chord_feature);
555 chord_feature.label = m_chordnames[chordpath[0]]; 543
556 fsOut[m_outputChords].push_back(chord_feature); 544 chordchange[0] = 0;
557 545 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
558 chordchange[0] = 0; 546 // cerr << chordpath[iFrame] << endl;
559 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) { 547 if (chordpath[iFrame] != oldchord ) {
560 // cerr << chordpath[iFrame] << endl; 548 // chord
561 if (chordpath[iFrame] != oldchord ) {
562 // chord
563 Feature chord_feature; // chord estimate
564 chord_feature.hasTimestamp = true;
565 chord_feature.timestamp = timestamps[iFrame];
566 chord_feature.label = m_chordnames[chordpath[iFrame]];
567 fsOut[m_outputChords].push_back(chord_feature);
568 oldchord = chordpath[iFrame];
569 // chord notes
570 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
571 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
572 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
573 }
574 oldnotes.clear();
575 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
576 Feature chordnote_feature;
577 chordnote_feature.hasTimestamp = true;
578 chordnote_feature.timestamp = timestamps[iFrame];
579 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
580 chordnote_feature.hasDuration = true;
581 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
582 oldnotes.push_back(chordnote_feature);
583 }
584 }
585 /* calculating simple chord change prob */
586 for (int iChord = 0; iChord < nChord; iChord++) {
587 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
588 }
589 }
590
591 float logscale = 0;
592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
593 logscale -= log(scale[iFrame]);
594 Feature loglikelihood;
595 loglikelihood.hasTimestamp = true;
596 loglikelihood.timestamp = timestamps[iFrame];
597 loglikelihood.values.push_back(-log(scale[iFrame]));
598 // cerr << chordchange[iFrame] << endl;
599 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
600 }
601 logscale /= nFrame;
602 // cerr << "loglik" << logscale << endl;
603
604
605 // cerr << chordpath[0] << endl;
606 } else {
607 /* Simple chord estimation
608 I just take the local chord estimates ("currentChordSalience") and average them over time, then
609 take the maximum. Very simple, don't do this at home...
610 */
611 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
612 count = 0;
613 int halfwindowlength = m_inputSampleRate / m_stepSize;
614 vector<int> chordSequence;
615 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
616 vector<int> temp = vector<int>(nChord,0);
617 scoreChordogram.push_back(temp);
618 }
619 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
620 int startIndex = count + 1;
621 int endIndex = count + 2 * halfwindowlength;
622
623 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
624
625 vector<int> chordCandidates;
626 for (int iChord = 0; iChord+1 < nChord; iChord++) {
627 // float currsum = 0;
628 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
629 // currsum += chordogram[iFrame][iChord];
630 // }
631 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
632 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
633 if (chordogram[iFrame][iChord] > chordThreshold) {
634 chordCandidates.push_back(iChord);
635 break;
636 }
637 }
638 }
639 chordCandidates.push_back(nChord-1);
640 // cerr << chordCandidates.size() << endl;
641
642 float maxval = 0; // will be the value of the most salient *chord change* in this frame
643 float maxindex = 0; //... and the index thereof
644 int bestchordL = nChord-1; // index of the best "left" chord
645 int bestchordR = nChord-1; // index of the best "right" chord
646
647 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
648 // now find the max values on both sides of iWF
649 // left side:
650 float maxL = 0;
651 int maxindL = nChord-1;
652 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
653 int iChord = chordCandidates[kChord];
654 float currsum = 0;
655 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
656 currsum += chordogram[count+iFrame][iChord];
657 }
658 if (iChord == nChord-1) currsum *= 0.8;
659 if (currsum > maxL) {
660 maxL = currsum;
661 maxindL = iChord;
662 }
663 }
664 // right side:
665 float maxR = 0;
666 int maxindR = nChord-1;
667 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
668 int iChord = chordCandidates[kChord];
669 float currsum = 0;
670 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
671 currsum += chordogram[count+iFrame][iChord];
672 }
673 if (iChord == nChord-1) currsum *= 0.8;
674 if (currsum > maxR) {
675 maxR = currsum;
676 maxindR = iChord;
677 }
678 }
679 if (maxL+maxR > maxval) {
680 maxval = maxL+maxR;
681 maxindex = iWF;
682 bestchordL = maxindL;
683 bestchordR = maxindR;
684 }
685
686 }
687 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
688 // add a score to every chord-frame-point that was part of a maximum
689 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
690 scoreChordogram[iFrame+count][bestchordL]++;
691 }
692 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
693 scoreChordogram[iFrame+count][bestchordR]++;
694 }
695 if (bestchordL != bestchordR) {
696 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
697 }
698 count++;
699 }
700 // cerr << "******* agent finished *******" << endl;
701 count = 0;
702 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
703 float maxval = 0; // will be the value of the most salient chord in this frame
704 float maxindex = 0; //... and the index thereof
705 for (int iChord = 0; iChord < nChord; iChord++) {
706 if (scoreChordogram[count][iChord] > maxval) {
707 maxval = scoreChordogram[count][iChord];
708 maxindex = iChord;
709 // cerr << iChord << endl;
710 }
711 }
712 chordSequence.push_back(maxindex);
713 count++;
714 }
715
716
717 // mode filter on chordSequence
718 count = 0;
719 string oldChord = "";
720 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
721 Feature chord_feature; // chord estimate 549 Feature chord_feature; // chord estimate
722 chord_feature.hasTimestamp = true; 550 chord_feature.hasTimestamp = true;
723 chord_feature.timestamp = *it; 551 chord_feature.timestamp = timestamps[iFrame];
724 // Feature currentChord; // chord estimate 552 chord_feature.label = m_chordnames[chordpath[iFrame]];
725 // currentChord.hasTimestamp = true; 553 fsOut[m_outputChords].push_back(chord_feature);
726 // currentChord.timestamp = currentChromas.timestamp; 554 oldchord = chordpath[iFrame];
727 555 // chord notes
728 vector<int> chordCount = vector<int>(nChord,0); 556 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
729 int maxChordCount = 0; 557 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
730 int maxChordIndex = nChord-1; 558 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
731 string maxChord; 559 }
732 int startIndex = max(count - halfwindowlength/2,0); 560 oldnotes.clear();
733 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); 561 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
734 for (int i = startIndex; i < endIndex; i++) { 562 Feature chordnote_feature;
735 chordCount[chordSequence[i]]++; 563 chordnote_feature.hasTimestamp = true;
736 if (chordCount[chordSequence[i]] > maxChordCount) { 564 chordnote_feature.timestamp = timestamps[iFrame];
737 // cerr << "start index " << startIndex << endl; 565 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
738 maxChordCount++; 566 chordnote_feature.hasDuration = true;
739 maxChordIndex = chordSequence[i]; 567 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
740 maxChord = m_chordnames[maxChordIndex]; 568 oldnotes.push_back(chordnote_feature);
741 } 569 }
742 } 570 }
743 // chordSequence[count] = maxChordIndex; 571 /* calculating simple chord change prob */
744 // cerr << maxChordIndex << endl; 572 for (int iChord = 0; iChord < nChord; iChord++) {
745 // cerr << chordchange[count] << endl; 573 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
746 if (oldChord != maxChord) { 574 }
747 oldChord = maxChord; 575 }
748 chord_feature.label = m_chordnames[maxChordIndex]; 576
749 fsOut[m_outputChords].push_back(chord_feature); 577 float logscale = 0;
750 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord 578 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
751 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp; 579 logscale -= log(scale[iFrame]);
752 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); 580 Feature loglikelihood;
753 } 581 loglikelihood.hasTimestamp = true;
754 oldnotes.clear(); 582 loglikelihood.timestamp = timestamps[iFrame];
755 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord 583 loglikelihood.values.push_back(-log(scale[iFrame]));
756 Feature chordnote_feature; 584 // cerr << chordchange[iFrame] << endl;
757 chordnote_feature.hasTimestamp = true; 585 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
758 chordnote_feature.timestamp = chord_feature.timestamp; 586 }
759 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]); 587 logscale /= nFrame;
760 chordnote_feature.hasDuration = true; 588
761 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
762 oldnotes.push_back(chordnote_feature);
763 }
764 }
765 count++;
766 }
767 }
768 Feature chord_feature; // last chord estimate
769 chord_feature.hasTimestamp = true; 589 chord_feature.hasTimestamp = true;
770 chord_feature.timestamp = timestamps[timestamps.size()-1]; 590 chord_feature.timestamp = timestamps[timestamps.size()-1];
771 chord_feature.label = "N"; 591 chord_feature.label = "N";
772 fsOut[m_outputChords].push_back(chord_feature); 592 fsOut[m_outputChords].push_back(chord_feature);
773 593