Mercurial > hg > nnls-chroma
comparison Chordino.cpp @ 131:b547e7238bf5 darwintunes
removed old chord inference
author | matthiasm |
---|---|
date | Thu, 16 Jun 2011 17:28:22 +0100 |
parents | a5ee5fe71e52 |
children | 5c1a25b3daf0 |
comparison
equal
deleted
inserted
replaced
130:014e62613e75 | 131:b547e7238bf5 |
---|---|
79 useNNLSParam.maxValue = 1.0; | 79 useNNLSParam.maxValue = 1.0; |
80 useNNLSParam.defaultValue = 1.0; | 80 useNNLSParam.defaultValue = 1.0; |
81 useNNLSParam.isQuantized = true; | 81 useNNLSParam.isQuantized = true; |
82 useNNLSParam.quantizeStep = 1.0; | 82 useNNLSParam.quantizeStep = 1.0; |
83 list.push_back(useNNLSParam); | 83 list.push_back(useNNLSParam); |
84 | |
85 ParameterDescriptor useHMMParam; | |
86 useHMMParam.identifier = "useHMM"; | |
87 useHMMParam.name = "HMM (Viterbi decoding)"; | |
88 useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used)."; | |
89 useHMMParam.unit = ""; | |
90 useHMMParam.minValue = 0.0; | |
91 useHMMParam.maxValue = 1.0; | |
92 useHMMParam.defaultValue = 1.0; | |
93 useHMMParam.isQuantized = true; | |
94 useHMMParam.quantizeStep = 1.0; | |
95 list.push_back(useHMMParam); | |
96 | 84 |
97 ParameterDescriptor rollonParam; | 85 ParameterDescriptor rollonParam; |
98 rollonParam.identifier = "rollon"; | 86 rollonParam.identifier = "rollon"; |
99 rollonParam.name = "bass noise threshold"; | 87 rollonParam.name = "bass noise threshold"; |
100 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed."; | 88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed."; |
525 } | 513 } |
526 cerr << "done." << endl; | 514 cerr << "done." << endl; |
527 | 515 |
528 vector<Feature> oldnotes; | 516 vector<Feature> oldnotes; |
529 | 517 |
530 // bool m_useHMM = true; // this will go into the chordino header file. | 518 cerr << "[Chordino Plugin] HMM Chord Estimation ... "; |
531 if (m_useHMM == 1.0) { | 519 int oldchord = nChord-1; |
532 cerr << "[Chordino Plugin] HMM Chord Estimation ... "; | 520 double selftransprob = 0.99; |
533 int oldchord = nChord-1; | 521 |
534 double selftransprob = 0.99; | 522 // vector<double> init = vector<double>(nChord,1.0/nChord); |
535 | 523 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1; |
536 // vector<double> init = vector<double>(nChord,1.0/nChord); | 524 |
537 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1; | 525 double *delta; |
538 | 526 delta = (double *)malloc(sizeof(double)*nFrame*nChord); |
539 double *delta; | 527 |
540 delta = (double *)malloc(sizeof(double)*nFrame*nChord); | 528 vector<vector<double> > trans; |
541 | 529 for (int iChord = 0; iChord < nChord; iChord++) { |
542 vector<vector<double> > trans; | 530 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1)); |
543 for (int iChord = 0; iChord < nChord; iChord++) { | 531 temp[iChord] = selftransprob; |
544 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1)); | 532 trans.push_back(temp); |
545 temp[iChord] = selftransprob; | 533 } |
546 trans.push_back(temp); | 534 vector<double> scale; |
547 } | 535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale); |
548 vector<double> scale; | 536 |
549 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale); | 537 |
550 | 538 Feature chord_feature; // chord estimate |
551 | 539 chord_feature.hasTimestamp = true; |
552 Feature chord_feature; // chord estimate | 540 chord_feature.timestamp = timestamps[0]; |
553 chord_feature.hasTimestamp = true; | 541 chord_feature.label = m_chordnames[chordpath[0]]; |
554 chord_feature.timestamp = timestamps[0]; | 542 fsOut[m_outputChords].push_back(chord_feature); |
555 chord_feature.label = m_chordnames[chordpath[0]]; | 543 |
556 fsOut[m_outputChords].push_back(chord_feature); | 544 chordchange[0] = 0; |
557 | 545 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) { |
558 chordchange[0] = 0; | 546 // cerr << chordpath[iFrame] << endl; |
559 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) { | 547 if (chordpath[iFrame] != oldchord ) { |
560 // cerr << chordpath[iFrame] << endl; | 548 // chord |
561 if (chordpath[iFrame] != oldchord ) { | |
562 // chord | |
563 Feature chord_feature; // chord estimate | |
564 chord_feature.hasTimestamp = true; | |
565 chord_feature.timestamp = timestamps[iFrame]; | |
566 chord_feature.label = m_chordnames[chordpath[iFrame]]; | |
567 fsOut[m_outputChords].push_back(chord_feature); | |
568 oldchord = chordpath[iFrame]; | |
569 // chord notes | |
570 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord | |
571 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame]; | |
572 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); | |
573 } | |
574 oldnotes.clear(); | |
575 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord | |
576 Feature chordnote_feature; | |
577 chordnote_feature.hasTimestamp = true; | |
578 chordnote_feature.timestamp = timestamps[iFrame]; | |
579 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]); | |
580 chordnote_feature.hasDuration = true; | |
581 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord | |
582 oldnotes.push_back(chordnote_feature); | |
583 } | |
584 } | |
585 /* calculating simple chord change prob */ | |
586 for (int iChord = 0; iChord < nChord; iChord++) { | |
587 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]); | |
588 } | |
589 } | |
590 | |
591 float logscale = 0; | |
592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) { | |
593 logscale -= log(scale[iFrame]); | |
594 Feature loglikelihood; | |
595 loglikelihood.hasTimestamp = true; | |
596 loglikelihood.timestamp = timestamps[iFrame]; | |
597 loglikelihood.values.push_back(-log(scale[iFrame])); | |
598 // cerr << chordchange[iFrame] << endl; | |
599 fsOut[m_outputLoglikelihood].push_back(loglikelihood); | |
600 } | |
601 logscale /= nFrame; | |
602 // cerr << "loglik" << logscale << endl; | |
603 | |
604 | |
605 // cerr << chordpath[0] << endl; | |
606 } else { | |
607 /* Simple chord estimation | |
608 I just take the local chord estimates ("currentChordSalience") and average them over time, then | |
609 take the maximum. Very simple, don't do this at home... | |
610 */ | |
611 cerr << "[Chordino Plugin] Simple Chord Estimation ... "; | |
612 count = 0; | |
613 int halfwindowlength = m_inputSampleRate / m_stepSize; | |
614 vector<int> chordSequence; | |
615 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram | |
616 vector<int> temp = vector<int>(nChord,0); | |
617 scoreChordogram.push_back(temp); | |
618 } | |
619 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) { | |
620 int startIndex = count + 1; | |
621 int endIndex = count + 2 * halfwindowlength; | |
622 | |
623 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); | |
624 | |
625 vector<int> chordCandidates; | |
626 for (int iChord = 0; iChord+1 < nChord; iChord++) { | |
627 // float currsum = 0; | |
628 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) { | |
629 // currsum += chordogram[iFrame][iChord]; | |
630 // } | |
631 // if (currsum > chordThreshold) chordCandidates.push_back(iChord); | |
632 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) { | |
633 if (chordogram[iFrame][iChord] > chordThreshold) { | |
634 chordCandidates.push_back(iChord); | |
635 break; | |
636 } | |
637 } | |
638 } | |
639 chordCandidates.push_back(nChord-1); | |
640 // cerr << chordCandidates.size() << endl; | |
641 | |
642 float maxval = 0; // will be the value of the most salient *chord change* in this frame | |
643 float maxindex = 0; //... and the index thereof | |
644 int bestchordL = nChord-1; // index of the best "left" chord | |
645 int bestchordR = nChord-1; // index of the best "right" chord | |
646 | |
647 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { | |
648 // now find the max values on both sides of iWF | |
649 // left side: | |
650 float maxL = 0; | |
651 int maxindL = nChord-1; | |
652 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) { | |
653 int iChord = chordCandidates[kChord]; | |
654 float currsum = 0; | |
655 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) { | |
656 currsum += chordogram[count+iFrame][iChord]; | |
657 } | |
658 if (iChord == nChord-1) currsum *= 0.8; | |
659 if (currsum > maxL) { | |
660 maxL = currsum; | |
661 maxindL = iChord; | |
662 } | |
663 } | |
664 // right side: | |
665 float maxR = 0; | |
666 int maxindR = nChord-1; | |
667 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) { | |
668 int iChord = chordCandidates[kChord]; | |
669 float currsum = 0; | |
670 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { | |
671 currsum += chordogram[count+iFrame][iChord]; | |
672 } | |
673 if (iChord == nChord-1) currsum *= 0.8; | |
674 if (currsum > maxR) { | |
675 maxR = currsum; | |
676 maxindR = iChord; | |
677 } | |
678 } | |
679 if (maxL+maxR > maxval) { | |
680 maxval = maxL+maxR; | |
681 maxindex = iWF; | |
682 bestchordL = maxindL; | |
683 bestchordR = maxindR; | |
684 } | |
685 | |
686 } | |
687 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; | |
688 // add a score to every chord-frame-point that was part of a maximum | |
689 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) { | |
690 scoreChordogram[iFrame+count][bestchordL]++; | |
691 } | |
692 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { | |
693 scoreChordogram[iFrame+count][bestchordR]++; | |
694 } | |
695 if (bestchordL != bestchordR) { | |
696 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; | |
697 } | |
698 count++; | |
699 } | |
700 // cerr << "******* agent finished *******" << endl; | |
701 count = 0; | |
702 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { | |
703 float maxval = 0; // will be the value of the most salient chord in this frame | |
704 float maxindex = 0; //... and the index thereof | |
705 for (int iChord = 0; iChord < nChord; iChord++) { | |
706 if (scoreChordogram[count][iChord] > maxval) { | |
707 maxval = scoreChordogram[count][iChord]; | |
708 maxindex = iChord; | |
709 // cerr << iChord << endl; | |
710 } | |
711 } | |
712 chordSequence.push_back(maxindex); | |
713 count++; | |
714 } | |
715 | |
716 | |
717 // mode filter on chordSequence | |
718 count = 0; | |
719 string oldChord = ""; | |
720 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { | |
721 Feature chord_feature; // chord estimate | 549 Feature chord_feature; // chord estimate |
722 chord_feature.hasTimestamp = true; | 550 chord_feature.hasTimestamp = true; |
723 chord_feature.timestamp = *it; | 551 chord_feature.timestamp = timestamps[iFrame]; |
724 // Feature currentChord; // chord estimate | 552 chord_feature.label = m_chordnames[chordpath[iFrame]]; |
725 // currentChord.hasTimestamp = true; | 553 fsOut[m_outputChords].push_back(chord_feature); |
726 // currentChord.timestamp = currentChromas.timestamp; | 554 oldchord = chordpath[iFrame]; |
727 | 555 // chord notes |
728 vector<int> chordCount = vector<int>(nChord,0); | 556 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord |
729 int maxChordCount = 0; | 557 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame]; |
730 int maxChordIndex = nChord-1; | 558 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); |
731 string maxChord; | 559 } |
732 int startIndex = max(count - halfwindowlength/2,0); | 560 oldnotes.clear(); |
733 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); | 561 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord |
734 for (int i = startIndex; i < endIndex; i++) { | 562 Feature chordnote_feature; |
735 chordCount[chordSequence[i]]++; | 563 chordnote_feature.hasTimestamp = true; |
736 if (chordCount[chordSequence[i]] > maxChordCount) { | 564 chordnote_feature.timestamp = timestamps[iFrame]; |
737 // cerr << "start index " << startIndex << endl; | 565 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]); |
738 maxChordCount++; | 566 chordnote_feature.hasDuration = true; |
739 maxChordIndex = chordSequence[i]; | 567 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord |
740 maxChord = m_chordnames[maxChordIndex]; | 568 oldnotes.push_back(chordnote_feature); |
741 } | 569 } |
742 } | 570 } |
743 // chordSequence[count] = maxChordIndex; | 571 /* calculating simple chord change prob */ |
744 // cerr << maxChordIndex << endl; | 572 for (int iChord = 0; iChord < nChord; iChord++) { |
745 // cerr << chordchange[count] << endl; | 573 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]); |
746 if (oldChord != maxChord) { | 574 } |
747 oldChord = maxChord; | 575 } |
748 chord_feature.label = m_chordnames[maxChordIndex]; | 576 |
749 fsOut[m_outputChords].push_back(chord_feature); | 577 float logscale = 0; |
750 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord | 578 for (int iFrame = 0; iFrame < nFrame; ++iFrame) { |
751 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp; | 579 logscale -= log(scale[iFrame]); |
752 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); | 580 Feature loglikelihood; |
753 } | 581 loglikelihood.hasTimestamp = true; |
754 oldnotes.clear(); | 582 loglikelihood.timestamp = timestamps[iFrame]; |
755 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord | 583 loglikelihood.values.push_back(-log(scale[iFrame])); |
756 Feature chordnote_feature; | 584 // cerr << chordchange[iFrame] << endl; |
757 chordnote_feature.hasTimestamp = true; | 585 fsOut[m_outputLoglikelihood].push_back(loglikelihood); |
758 chordnote_feature.timestamp = chord_feature.timestamp; | 586 } |
759 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]); | 587 logscale /= nFrame; |
760 chordnote_feature.hasDuration = true; | 588 |
761 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord | |
762 oldnotes.push_back(chordnote_feature); | |
763 } | |
764 } | |
765 count++; | |
766 } | |
767 } | |
768 Feature chord_feature; // last chord estimate | |
769 chord_feature.hasTimestamp = true; | 589 chord_feature.hasTimestamp = true; |
770 chord_feature.timestamp = timestamps[timestamps.size()-1]; | 590 chord_feature.timestamp = timestamps[timestamps.size()-1]; |
771 chord_feature.label = "N"; | 591 chord_feature.label = "N"; |
772 fsOut[m_outputChords].push_back(chord_feature); | 592 fsOut[m_outputChords].push_back(chord_feature); |
773 | 593 |