Mercurial > hg > nnls-chroma
comparison NNLSBase.cpp @ 91:b56dde3417d4 matthiasm-plugin
* Fix the "comparison between signed and unsigned" warnings; remove some ifdef'd-out old code
author | Chris Cannam |
---|---|
date | Thu, 02 Dec 2010 13:05:23 +0000 |
parents | 7af5312e66f8 |
children | a76598852303 |
comparison
equal
deleted
inserted
replaced
90:b095d83585c9 | 91:b56dde3417d4 |
---|---|
392 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); | 392 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); |
393 m_kernelValue.clear(); | 393 m_kernelValue.clear(); |
394 m_kernelFftIndex.clear(); | 394 m_kernelFftIndex.clear(); |
395 m_kernelNoteIndex.clear(); | 395 m_kernelNoteIndex.clear(); |
396 int countNonzero = 0; | 396 int countNonzero = 0; |
397 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix | 397 for (int iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix |
398 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { | 398 for (int iFFT = 0; iFFT < blockSize/2; ++iFFT) { |
399 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | 399 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { |
400 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); | 400 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); |
401 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | 401 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { |
402 countNonzero++; | 402 countNonzero++; |
403 } | 403 } |
471 } | 471 } |
472 } | 472 } |
473 | 473 |
474 // note magnitude mapping using pre-calculated matrix | 474 // note magnitude mapping using pre-calculated matrix |
475 float *nm = new float[nNote]; // note magnitude | 475 float *nm = new float[nNote]; // note magnitude |
476 for (size_t iNote = 0; iNote < nNote; iNote++) { | 476 for (int iNote = 0; iNote < nNote; iNote++) { |
477 nm[iNote] = 0; // initialise as 0 | 477 nm[iNote] = 0; // initialise as 0 |
478 } | 478 } |
479 int binCount = 0; | 479 int binCount = 0; |
480 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { | 480 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { |
481 // cerr << "."; | 481 // cerr << "."; |
515 m_localTuning.push_back(normalisedtuning); | 515 m_localTuning.push_back(normalisedtuning); |
516 | 516 |
517 Feature f1; // logfreqspec | 517 Feature f1; // logfreqspec |
518 f1.hasTimestamp = true; | 518 f1.hasTimestamp = true; |
519 f1.timestamp = timestamp; | 519 f1.timestamp = timestamp; |
520 for (size_t iNote = 0; iNote < nNote; iNote++) { | 520 for (int iNote = 0; iNote < nNote; iNote++) { |
521 f1.values.push_back(nm[iNote]); | 521 f1.values.push_back(nm[iNote]); |
522 } | 522 } |
523 | 523 |
524 // deletes | 524 // deletes |
525 delete[] magnitude; | 525 delete[] magnitude; |
526 delete[] nm; | 526 delete[] nm; |
527 | 527 |
528 m_logSpectrum.push_back(f1); // remember note magnitude | 528 m_logSpectrum.push_back(f1); // remember note magnitude |
529 } | 529 } |
530 | 530 |
531 | |
532 #ifdef NOT_DEFINED | |
533 | |
534 NNLSBase::FeatureSet | |
535 NNLSBase::getRemainingFeatures() | |
536 { | |
537 // if (debug_on) cerr << "--> getRemainingFeatures" << endl; | |
538 FeatureSet fsOut; | |
539 // if (m_logSpectrum.size() == 0) return fsOut; | |
540 // int nChord = m_chordnames.size(); | |
541 // // | |
542 // /** Calculate Tuning | |
543 // calculate tuning from (using the angle of the complex number defined by the | |
544 // cumulative mean real and imag values) | |
545 // **/ | |
546 // float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2]; | |
547 // float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2]; | |
548 // float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); | |
549 // float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); | |
550 // int intShift = floor(normalisedtuning * 3); | |
551 // float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this | |
552 // | |
553 // char buffer0 [50]; | |
554 // | |
555 // sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); | |
556 // | |
557 // // cerr << "normalisedtuning: " << normalisedtuning << '\n'; | |
558 // | |
559 // // push tuning to FeatureSet fsOut | |
560 // Feature f0; // tuning | |
561 // f0.hasTimestamp = true; | |
562 // f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; | |
563 // f0.label = buffer0; | |
564 // fsOut[0].push_back(f0); | |
565 // | |
566 // /** Tune Log-Frequency Spectrogram | |
567 // calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to | |
568 // perform linear interpolation on the existing log-frequency spectrogram (kinda f1). | |
569 // **/ | |
570 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; | |
571 // | |
572 // float tempValue = 0; | |
573 // float dbThreshold = 0; // relative to the background spectrum | |
574 // float thresh = pow(10,dbThreshold/20); | |
575 // // cerr << "tune local ? " << m_tuneLocal << endl; | |
576 // int count = 0; | |
577 // | |
578 // for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { | |
579 // Feature f1 = *i; | |
580 // Feature f2; // tuned log-frequency spectrum | |
581 // f2.hasTimestamp = true; | |
582 // f2.timestamp = f1.timestamp; | |
583 // f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero | |
584 // | |
585 // if (m_tuneLocal == 1.0) { | |
586 // intShift = floor(m_localTuning[count] * 3); | |
587 // floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this | |
588 // } | |
589 // | |
590 // // cerr << intShift << " " << floatShift << endl; | |
591 // | |
592 // for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins | |
593 // tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; | |
594 // f2.values.push_back(tempValue); | |
595 // } | |
596 // | |
597 // f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge | |
598 // vector<float> runningmean = SpecialConvolution(f2.values,hw); | |
599 // vector<float> runningstd; | |
600 // for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) | |
601 // runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); | |
602 // } | |
603 // runningstd = SpecialConvolution(runningstd,hw); // second step convolve | |
604 // for (int i = 0; i < nNote; i++) { | |
605 // runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std | |
606 // if (runningstd[i] > 0) { | |
607 // // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? | |
608 // // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; | |
609 // f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? | |
610 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; | |
611 // } | |
612 // if (f2.values[i] < 0) { | |
613 // cerr << "ERROR: negative value in logfreq spectrum" << endl; | |
614 // } | |
615 // } | |
616 // fsOut[2].push_back(f2); | |
617 // count++; | |
618 // } | |
619 // cerr << "done." << endl; | |
620 // | |
621 // /** Semitone spectrum and chromagrams | |
622 // Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum | |
623 // is inferred using a non-negative least squares algorithm. | |
624 // Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means | |
625 // bass and treble stacked onto each other). | |
626 // **/ | |
627 // if (m_useNNLS == 0) { | |
628 // cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; | |
629 // } else { | |
630 // cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; | |
631 // } | |
632 // | |
633 // | |
634 // vector<vector<float> > chordogram; | |
635 // vector<vector<int> > scoreChordogram; | |
636 // vector<float> chordchange = vector<float>(fsOut[2].size(),0); | |
637 // vector<float> oldchroma = vector<float>(12,0); | |
638 // vector<float> oldbasschroma = vector<float>(12,0); | |
639 // count = 0; | |
640 // | |
641 // for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { | |
642 // Feature f2 = *it; // logfreq spectrum | |
643 // Feature f3; // semitone spectrum | |
644 // Feature f4; // treble chromagram | |
645 // Feature f5; // bass chromagram | |
646 // Feature f6; // treble and bass chromagram | |
647 // | |
648 // f3.hasTimestamp = true; | |
649 // f3.timestamp = f2.timestamp; | |
650 // | |
651 // f4.hasTimestamp = true; | |
652 // f4.timestamp = f2.timestamp; | |
653 // | |
654 // f5.hasTimestamp = true; | |
655 // f5.timestamp = f2.timestamp; | |
656 // | |
657 // f6.hasTimestamp = true; | |
658 // f6.timestamp = f2.timestamp; | |
659 // | |
660 // float b[nNote]; | |
661 // | |
662 // bool some_b_greater_zero = false; | |
663 // float sumb = 0; | |
664 // for (int i = 0; i < nNote; i++) { | |
665 // // b[i] = m_dict[(nNote * count + i) % (nNote * 84)]; | |
666 // b[i] = f2.values[i]; | |
667 // sumb += b[i]; | |
668 // if (b[i] > 0) { | |
669 // some_b_greater_zero = true; | |
670 // } | |
671 // } | |
672 // | |
673 // // here's where the non-negative least squares algorithm calculates the note activation x | |
674 // | |
675 // vector<float> chroma = vector<float>(12, 0); | |
676 // vector<float> basschroma = vector<float>(12, 0); | |
677 // float currval; | |
678 // unsigned iSemitone = 0; | |
679 // | |
680 // if (some_b_greater_zero) { | |
681 // if (m_useNNLS == 0) { | |
682 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | |
683 // currval = 0; | |
684 // currval += b[iNote + 1 + -1] * 0.5; | |
685 // currval += b[iNote + 1 + 0] * 1.0; | |
686 // currval += b[iNote + 1 + 1] * 0.5; | |
687 // f3.values.push_back(currval); | |
688 // chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; | |
689 // basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; | |
690 // iSemitone++; | |
691 // } | |
692 // | |
693 // } else { | |
694 // float x[84+1000]; | |
695 // for (int i = 1; i < 1084; ++i) x[i] = 1.0; | |
696 // vector<int> signifIndex; | |
697 // int index=0; | |
698 // sumb /= 84.0; | |
699 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | |
700 // float currval = 0; | |
701 // currval += b[iNote + 1 + -1]; | |
702 // currval += b[iNote + 1 + 0]; | |
703 // currval += b[iNote + 1 + 1]; | |
704 // if (currval > 0) signifIndex.push_back(index); | |
705 // f3.values.push_back(0); // fill the values, change later | |
706 // index++; | |
707 // } | |
708 // float rnorm; | |
709 // float w[84+1000]; | |
710 // float zz[84+1000]; | |
711 // int indx[84+1000]; | |
712 // int mode; | |
713 // int dictsize = nNote*signifIndex.size(); | |
714 // // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; | |
715 // float *curr_dict = new float[dictsize]; | |
716 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | |
717 // for (unsigned iBin = 0; iBin < nNote; iBin++) { | |
718 // curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin]; | |
719 // } | |
720 // } | |
721 // nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); | |
722 // delete [] curr_dict; | |
723 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | |
724 // f3.values[signifIndex[iNote]] = x[iNote]; | |
725 // // cerr << mode << endl; | |
726 // chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; | |
727 // basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; | |
728 // } | |
729 // } | |
730 // } | |
731 // | |
732 // | |
733 // | |
734 // | |
735 // f4.values = chroma; | |
736 // f5.values = basschroma; | |
737 // chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas | |
738 // f6.values = chroma; | |
739 // | |
740 // if (m_doNormalizeChroma > 0) { | |
741 // vector<float> chromanorm = vector<float>(3,0); | |
742 // switch (int(m_doNormalizeChroma)) { | |
743 // case 0: // should never end up here | |
744 // break; | |
745 // case 1: | |
746 // chromanorm[0] = *max_element(f4.values.begin(), f4.values.end()); | |
747 // chromanorm[1] = *max_element(f5.values.begin(), f5.values.end()); | |
748 // chromanorm[2] = max(chromanorm[0], chromanorm[1]); | |
749 // break; | |
750 // case 2: | |
751 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { | |
752 // chromanorm[0] += *it; | |
753 // } | |
754 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { | |
755 // chromanorm[1] += *it; | |
756 // } | |
757 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { | |
758 // chromanorm[2] += *it; | |
759 // } | |
760 // break; | |
761 // case 3: | |
762 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { | |
763 // chromanorm[0] += pow(*it,2); | |
764 // } | |
765 // chromanorm[0] = sqrt(chromanorm[0]); | |
766 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { | |
767 // chromanorm[1] += pow(*it,2); | |
768 // } | |
769 // chromanorm[1] = sqrt(chromanorm[1]); | |
770 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { | |
771 // chromanorm[2] += pow(*it,2); | |
772 // } | |
773 // chromanorm[2] = sqrt(chromanorm[2]); | |
774 // break; | |
775 // } | |
776 // if (chromanorm[0] > 0) { | |
777 // for (int i = 0; i < f4.values.size(); i++) { | |
778 // f4.values[i] /= chromanorm[0]; | |
779 // } | |
780 // } | |
781 // if (chromanorm[1] > 0) { | |
782 // for (int i = 0; i < f5.values.size(); i++) { | |
783 // f5.values[i] /= chromanorm[1]; | |
784 // } | |
785 // } | |
786 // if (chromanorm[2] > 0) { | |
787 // for (int i = 0; i < f6.values.size(); i++) { | |
788 // f6.values[i] /= chromanorm[2]; | |
789 // } | |
790 // } | |
791 // | |
792 // } | |
793 // | |
794 // // local chord estimation | |
795 // vector<float> currentChordSalience; | |
796 // float tempchordvalue = 0; | |
797 // float sumchordvalue = 0; | |
798 // | |
799 // for (int iChord = 0; iChord < nChord; iChord++) { | |
800 // tempchordvalue = 0; | |
801 // for (int iBin = 0; iBin < 12; iBin++) { | |
802 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | |
803 // } | |
804 // for (int iBin = 12; iBin < 24; iBin++) { | |
805 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | |
806 // } | |
807 // sumchordvalue+=tempchordvalue; | |
808 // currentChordSalience.push_back(tempchordvalue); | |
809 // } | |
810 // if (sumchordvalue > 0) { | |
811 // for (int iChord = 0; iChord < nChord; iChord++) { | |
812 // currentChordSalience[iChord] /= sumchordvalue; | |
813 // } | |
814 // } else { | |
815 // currentChordSalience[nChord-1] = 1.0; | |
816 // } | |
817 // chordogram.push_back(currentChordSalience); | |
818 // | |
819 // fsOut[3].push_back(f3); | |
820 // fsOut[4].push_back(f4); | |
821 // fsOut[5].push_back(f5); | |
822 // fsOut[6].push_back(f6); | |
823 // count++; | |
824 // } | |
825 // cerr << "done." << endl; | |
826 // | |
827 // | |
828 // /* Simple chord estimation | |
829 // I just take the local chord estimates ("currentChordSalience") and average them over time, then | |
830 // take the maximum. Very simple, don't do this at home... | |
831 // */ | |
832 // cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; | |
833 // count = 0; | |
834 // int halfwindowlength = m_inputSampleRate / m_stepSize; | |
835 // vector<int> chordSequence; | |
836 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram | |
837 // vector<int> temp = vector<int>(nChord,0); | |
838 // scoreChordogram.push_back(temp); | |
839 // } | |
840 // for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { | |
841 // int startIndex = count + 1; | |
842 // int endIndex = count + 2 * halfwindowlength; | |
843 // | |
844 // float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); | |
845 // | |
846 // vector<int> chordCandidates; | |
847 // for (unsigned iChord = 0; iChord < nChord-1; iChord++) { | |
848 // // float currsum = 0; | |
849 // // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | |
850 // // currsum += chordogram[iFrame][iChord]; | |
851 // // } | |
852 // // if (currsum > chordThreshold) chordCandidates.push_back(iChord); | |
853 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | |
854 // if (chordogram[iFrame][iChord] > chordThreshold) { | |
855 // chordCandidates.push_back(iChord); | |
856 // break; | |
857 // } | |
858 // } | |
859 // } | |
860 // chordCandidates.push_back(nChord-1); | |
861 // // cerr << chordCandidates.size() << endl; | |
862 // | |
863 // float maxval = 0; // will be the value of the most salient *chord change* in this frame | |
864 // float maxindex = 0; //... and the index thereof | |
865 // unsigned bestchordL = nChord-1; // index of the best "left" chord | |
866 // unsigned bestchordR = nChord-1; // index of the best "right" chord | |
867 // | |
868 // for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { | |
869 // // now find the max values on both sides of iWF | |
870 // // left side: | |
871 // float maxL = 0; | |
872 // unsigned maxindL = nChord-1; | |
873 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | |
874 // unsigned iChord = chordCandidates[kChord]; | |
875 // float currsum = 0; | |
876 // for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { | |
877 // currsum += chordogram[count+iFrame][iChord]; | |
878 // } | |
879 // if (iChord == nChord-1) currsum *= 0.8; | |
880 // if (currsum > maxL) { | |
881 // maxL = currsum; | |
882 // maxindL = iChord; | |
883 // } | |
884 // } | |
885 // // right side: | |
886 // float maxR = 0; | |
887 // unsigned maxindR = nChord-1; | |
888 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | |
889 // unsigned iChord = chordCandidates[kChord]; | |
890 // float currsum = 0; | |
891 // for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { | |
892 // currsum += chordogram[count+iFrame][iChord]; | |
893 // } | |
894 // if (iChord == nChord-1) currsum *= 0.8; | |
895 // if (currsum > maxR) { | |
896 // maxR = currsum; | |
897 // maxindR = iChord; | |
898 // } | |
899 // } | |
900 // if (maxL+maxR > maxval) { | |
901 // maxval = maxL+maxR; | |
902 // maxindex = iWF; | |
903 // bestchordL = maxindL; | |
904 // bestchordR = maxindR; | |
905 // } | |
906 // | |
907 // } | |
908 // // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; | |
909 // // add a score to every chord-frame-point that was part of a maximum | |
910 // for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { | |
911 // scoreChordogram[iFrame+count][bestchordL]++; | |
912 // } | |
913 // for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { | |
914 // scoreChordogram[iFrame+count][bestchordR]++; | |
915 // } | |
916 // if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; | |
917 // count++; | |
918 // } | |
919 // // cerr << "******* agent finished *******" << endl; | |
920 // count = 0; | |
921 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { | |
922 // float maxval = 0; // will be the value of the most salient chord in this frame | |
923 // float maxindex = 0; //... and the index thereof | |
924 // for (unsigned iChord = 0; iChord < nChord; iChord++) { | |
925 // if (scoreChordogram[count][iChord] > maxval) { | |
926 // maxval = scoreChordogram[count][iChord]; | |
927 // maxindex = iChord; | |
928 // // cerr << iChord << endl; | |
929 // } | |
930 // } | |
931 // chordSequence.push_back(maxindex); | |
932 // // cerr << "before modefilter, maxindex: " << maxindex << endl; | |
933 // count++; | |
934 // } | |
935 // // cerr << "******* mode filter done *******" << endl; | |
936 // | |
937 // | |
938 // // mode filter on chordSequence | |
939 // count = 0; | |
940 // string oldChord = ""; | |
941 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { | |
942 // Feature f6 = *it; | |
943 // Feature f7; // chord estimate | |
944 // f7.hasTimestamp = true; | |
945 // f7.timestamp = f6.timestamp; | |
946 // Feature f8; // chord estimate | |
947 // f8.hasTimestamp = true; | |
948 // f8.timestamp = f6.timestamp; | |
949 // | |
950 // vector<int> chordCount = vector<int>(nChord,0); | |
951 // int maxChordCount = 0; | |
952 // int maxChordIndex = nChord-1; | |
953 // string maxChord; | |
954 // int startIndex = max(count - halfwindowlength/2,0); | |
955 // int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); | |
956 // for (int i = startIndex; i < endIndex; i++) { | |
957 // chordCount[chordSequence[i]]++; | |
958 // if (chordCount[chordSequence[i]] > maxChordCount) { | |
959 // // cerr << "start index " << startIndex << endl; | |
960 // maxChordCount++; | |
961 // maxChordIndex = chordSequence[i]; | |
962 // maxChord = m_chordnames[maxChordIndex]; | |
963 // } | |
964 // } | |
965 // // chordSequence[count] = maxChordIndex; | |
966 // // cerr << maxChordIndex << endl; | |
967 // f8.values.push_back(chordchange[count]/(halfwindowlength*2)); | |
968 // // cerr << chordchange[count] << endl; | |
969 // fsOut[9].push_back(f8); | |
970 // if (oldChord != maxChord) { | |
971 // oldChord = maxChord; | |
972 // | |
973 // // char buffer1 [50]; | |
974 // // if (maxChordIndex < nChord - 1) { | |
975 // // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); | |
976 // // } else { | |
977 // // sprintf(buffer1, "N"); | |
978 // // } | |
979 // // f7.label = buffer1; | |
980 // f7.label = m_chordnames[maxChordIndex]; | |
981 // fsOut[7].push_back(f7); | |
982 // } | |
983 // count++; | |
984 // } | |
985 // Feature f7; // last chord estimate | |
986 // f7.hasTimestamp = true; | |
987 // f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; | |
988 // f7.label = "N"; | |
989 // fsOut[7].push_back(f7); | |
990 // cerr << "done." << endl; | |
991 // // // musicity | |
992 // // count = 0; | |
993 // // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 | |
994 // // vector<float> musicityValue; | |
995 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { | |
996 // // Feature f4 = *it; | |
997 // // | |
998 // // int startIndex = max(count - musicitykernelwidth/2,0); | |
999 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); | |
1000 // // float chromasum = 0; | |
1001 // // float diffsum = 0; | |
1002 // // for (int k = 0; k < 12; k++) { | |
1003 // // for (int i = startIndex + 1; i < endIndex; i++) { | |
1004 // // chromasum += pow(fsOut[4][i].values[k],2); | |
1005 // // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); | |
1006 // // } | |
1007 // // } | |
1008 // // diffsum /= chromasum; | |
1009 // // musicityValue.push_back(diffsum); | |
1010 // // count++; | |
1011 // // } | |
1012 // // | |
1013 // // float musicityThreshold = 0.44; | |
1014 // // if (m_stepSize == 4096) { | |
1015 // // musicityThreshold = 0.74; | |
1016 // // } | |
1017 // // if (m_stepSize == 4410) { | |
1018 // // musicityThreshold = 0.77; | |
1019 // // } | |
1020 // // | |
1021 // // count = 0; | |
1022 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { | |
1023 // // Feature f4 = *it; | |
1024 // // Feature f8; // musicity | |
1025 // // Feature f9; // musicity segmenter | |
1026 // // | |
1027 // // f8.hasTimestamp = true; | |
1028 // // f8.timestamp = f4.timestamp; | |
1029 // // f9.hasTimestamp = true; | |
1030 // // f9.timestamp = f4.timestamp; | |
1031 // // | |
1032 // // int startIndex = max(count - musicitykernelwidth/2,0); | |
1033 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); | |
1034 // // int musicityCount = 0; | |
1035 // // for (int i = startIndex; i <= endIndex; i++) { | |
1036 // // if (musicityValue[i] > musicityThreshold) musicityCount++; | |
1037 // // } | |
1038 // // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); | |
1039 // // | |
1040 // // if (isSpeech) { | |
1041 // // if (oldlabeltype != 2) { | |
1042 // // f9.label = "Speech"; | |
1043 // // fsOut[9].push_back(f9); | |
1044 // // oldlabeltype = 2; | |
1045 // // } | |
1046 // // } else { | |
1047 // // if (oldlabeltype != 1) { | |
1048 // // f9.label = "Music"; | |
1049 // // fsOut[9].push_back(f9); | |
1050 // // oldlabeltype = 1; | |
1051 // // } | |
1052 // // } | |
1053 // // f8.values.push_back(musicityValue[count]); | |
1054 // // fsOut[8].push_back(f8); | |
1055 // // count++; | |
1056 // // } | |
1057 return fsOut; | |
1058 | |
1059 } | |
1060 | |
1061 #endif |