comparison src/Silvet.cpp @ 325:4cf4313d7e30 livemode

Always use q=0.8 and accept the hit on speed -- the templates are made for that configuration and it does work better. Also some adjustments to thresholding and peak picking for live mode in particular.
author Chris Cannam
date Mon, 18 May 2015 13:58:27 +0100
parents 6f8fa7fc8fdc
children df9a8e16bae6 19c17cd0c7d8
comparison
equal deleted inserted replaced
324:71ffe35578fa 325:4cf4313d7e30
510 CQParameters params(processingSampleRate, 510 CQParameters params(processingSampleRate,
511 minFreq, 511 minFreq,
512 maxFreq, 512 maxFreq,
513 bpo); 513 bpo);
514 514
515 // For params.q, the MIREX code uses 0.8, but it seems that with 515 params.q = 0.8;
516 // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT 516 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
517 // size to 512 from 1024 and alters some other processing
518 // parameters, making everything much, much slower. Could be a
519 // flaw in the CQ parameter calculations, must check. For
520 // atomHopFactor == 1, q == 0.8 is fine
521 params.q = (m_mode == HighQualityMode ? 0.95 : 0.8);
522 params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0);
523 params.threshold = 0.0005; 517 params.threshold = 0.0005;
524 params.decimator = 518 params.decimator =
525 (m_mode == LiveMode ? 519 (m_mode == LiveMode ?
526 CQParameters::FasterDecimator : CQParameters::BetterDecimator); 520 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
527 params.window = CQParameters::Hann; 521 params.window = CQParameters::Hann;
643 637
644 if (filtered.empty()) return; 638 if (filtered.empty()) return;
645 639
646 const InstrumentPack &pack(getPack(m_instrument)); 640 const InstrumentPack &pack(getPack(m_instrument));
647 641
648 for (int i = 0; i < (int)filtered.size(); ++i) { 642 int width = filtered.size();
643
644 double silenceThreshold = 0.01;
645
646 for (int i = 0; i < width; ++i) {
647
648 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
649 float inputGain = getInputGainAt(timestamp);
650
649 Feature f; 651 Feature f;
652 double rms = 0.0;
653
650 for (int j = 0; j < pack.templateHeight; ++j) { 654 for (int j = 0; j < pack.templateHeight; ++j) {
651 f.values.push_back(float(filtered[i][j])); 655 double v = filtered[i][j];
652 } 656 rms += v * v;
657 f.values.push_back(float(v));
658 }
659
660 rms = sqrt(rms / pack.templateHeight);
661 if (rms / inputGain < silenceThreshold) {
662 filtered[i].clear();
663 }
664
653 fs[m_fcqOutputNo].push_back(f); 665 fs[m_fcqOutputNo].push_back(f);
654 } 666 }
655 667
656 int width = filtered.size();
657
658 Grid localPitches(width); 668 Grid localPitches(width);
659 669
660 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning; 670 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
661 int shiftCount = 1; 671 int shiftCount = 1;
662 if (wantShifts) { 672 if (wantShifts) {
685 vector<EMFuture> results; 695 vector<EMFuture> results;
686 for (int j = 0; j < emThreadCount && i + j < width; ++j) { 696 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
687 results.push_back 697 results.push_back
688 (async(std::launch::async, 698 (async(std::launch::async,
689 [&](int index) { 699 [&](int index) {
690 return applyEM(pack, filtered.at(index), wantShifts); 700 return applyEM
701 (pack, filtered.at(index), wantShifts);
691 }, i + j)); 702 }, i + j));
692 } 703 }
693 for (int j = 0; j < emThreadCount && i + j < width; ++j) { 704 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
694 auto out = results[j].get(); 705 auto out = results[j].get();
695 localPitches[i+j] = out.first; 706 localPitches[i+j] = out.first;
761 bool wantShifts) 772 bool wantShifts)
762 { 773 {
763 double columnThreshold = 1e-5; 774 double columnThreshold = 1e-5;
764 775
765 if (m_mode == LiveMode) { 776 if (m_mode == LiveMode) {
766 columnThreshold /= 20; 777 columnThreshold /= 15;
767 } 778 }
768 779
769 vector<double> pitches(pack.templateNoteCount, 0.0); 780 vector<double> pitches(pack.templateNoteCount, 0.0);
770 vector<int> bestShifts; 781 vector<int> bestShifts;
782
783 if (column.empty()) return { pitches, bestShifts };
771 784
772 double sum = 0.0; 785 double sum = 0.0;
773 for (int j = 0; j < pack.templateHeight; ++j) { 786 for (int j = 0; j < pack.templateHeight; ++j) {
774 sum += column.at(j); 787 sum += column.at(j);
775 } 788 }
921 if (strength < pack.levelThreshold) continue; 934 if (strength < pack.levelThreshold) continue;
922 935
923 // In live mode with only a 12-bpo CQ, we are very likely to 936 // In live mode with only a 12-bpo CQ, we are very likely to
924 // get clusters of two or three high scores at a time for 937 // get clusters of two or three high scores at a time for
925 // neighbouring semitones. Eliminate these by picking only the 938 // neighbouring semitones. Eliminate these by picking only the
926 // peaks. This means we can't recognise actual semitone chords 939 // peaks (except that we never eliminate a note that has
927 // if they ever appear, but it's not as if live mode is good 940 // already been established as currently playing). This means
928 // enough for that to be a big deal anyway. 941 // we can't recognise actual semitone chords if they ever
942 // appear, but it's not as if live mode is good enough for
943 // that to be a big deal anyway.
929 if (m_mode == LiveMode) { 944 if (m_mode == LiveMode) {
930 if (j == 0 || 945 if (m_current.find(j) == m_current.end() &&
931 j + 1 == pack.templateNoteCount || 946 (j == 0 ||
932 pitches[j] < pitches[j-1] || 947 j + 1 == pack.templateNoteCount ||
933 pitches[j] < pitches[j+1]) { 948 pitches[j] < pitches[j-1] ||
949 pitches[j] < pitches[j+1])) {
950 // not a peak or a currently-playing note: skip it
934 continue; 951 continue;
935 } 952 }
936 } 953 }
937 954
938 strengths.insert(ValueIndexMap::value_type(strength, j)); 955 strengths.insert(ValueIndexMap::value_type(strength, j));
1014 if (duration < durationThreshold) { 1031 if (duration < durationThreshold) {
1015 continue; 1032 continue;
1016 } 1033 }
1017 1034
1018 if (duration == durationThreshold) { 1035 if (duration == durationThreshold) {
1036 m_current.insert(note);
1019 emitOnset(start, note, shiftCount, onsetFeatures); 1037 emitOnset(start, note, shiftCount, onsetFeatures);
1020 } 1038 }
1021 1039
1022 if (active.find(note) == active.end()) { 1040 if (active.find(note) == active.end()) {
1023 // the note was playing but just ended 1041 // the note was playing but just ended
1042 m_current.erase(note);
1024 emitNote(start, end, note, shiftCount, noteFeatures); 1043 emitNote(start, end, note, shiftCount, noteFeatures);
1025 } 1044 }
1026 } 1045 }
1027 1046
1028 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; 1047 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;