comparison src/Silvet.cpp @ 316:f3e10617a60d livemode

Live mode enhancements: Adjust processing parameters (for speed) and peak-pick pitch activations across frequency (to avoid neighbouring-semitone clusters)
author Chris Cannam
date Tue, 28 Apr 2015 12:21:40 +0100
parents f98ba4f47e49
children 92293058368a
comparison
equal deleted inserted replaced
315:dec47312ed40 316:f3e10617a60d
43 static int binsPerSemitoneNormal = 5; 43 static int binsPerSemitoneNormal = 5;
44 44
45 static int minInputSampleRate = 100; 45 static int minInputSampleRate = 100;
46 static int maxInputSampleRate = 192000; 46 static int maxInputSampleRate = 192000;
47 47
48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
49
48 Silvet::Silvet(float inputSampleRate) : 50 Silvet::Silvet(float inputSampleRate) :
49 Plugin(inputSampleRate), 51 Plugin(inputSampleRate),
50 m_instruments(InstrumentPack::listInstrumentPacks()), 52 m_instruments(InstrumentPack::listInstrumentPacks()),
51 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)), 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
52 m_resampler(0), 54 m_resampler(0),
53 m_flattener(0), 55 m_flattener(0),
54 m_cq(0), 56 m_cq(0),
55 m_mode(HighQualityMode), 57 m_mode(defaultMode),
56 m_fineTuning(false), 58 m_fineTuning(false),
57 m_instrument(0), 59 m_instrument(0),
58 m_colsPerSec(50), 60 m_colsPerSec(50),
59 m_haveStartTime(false) 61 m_haveStartTime(false)
60 { 62 {
146 desc.name = "Processing mode"; 148 desc.name = "Processing mode";
147 desc.unit = ""; 149 desc.unit = "";
148 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results."; 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
149 desc.minValue = 0; 151 desc.minValue = 0;
150 desc.maxValue = 2; 152 desc.maxValue = 2;
151 desc.defaultValue = 1; 153 desc.defaultValue = int(defaultMode);
152 desc.isQuantized = true; 154 desc.isQuantized = true;
153 desc.quantizeStep = 1; 155 desc.quantizeStep = 1;
154 desc.valueNames.push_back("Draft (faster)"); 156 desc.valueNames.push_back("Draft (faster)");
155 desc.valueNames.push_back("Intensive (higher quality)"); 157 desc.valueNames.push_back("Intensive (higher quality)");
156 desc.valueNames.push_back("Live (lower latency)"); 158 desc.valueNames.push_back("Live (lower latency)");
492 CQParameters params(processingSampleRate, 494 CQParameters params(processingSampleRate,
493 minFreq, 495 minFreq,
494 maxFreq, 496 maxFreq,
495 bpo); 497 bpo);
496 498
497 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower 499 // For params.q, the MIREX code uses 0.8, but it seems that with
498 // drops the FFT size to 512 from 1024 and alters 500 // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT
499 // some other processing parameters, making 501 // size to 512 from 1024 and alters some other processing
500 // everything much, much slower. Could be a flaw 502 // parameters, making everything much, much slower. Could be a
501 // in the CQ parameter calculations, must check 503 // flaw in the CQ parameter calculations, must check. For
502 params.atomHopFactor = 0.3; 504 // atomHopFactor == 1, q == 0.8 is fine
505 params.q = (m_mode == HighQualityMode ? 0.95 : 0.8);
506 params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0);
503 params.threshold = 0.0005; 507 params.threshold = 0.0005;
504 params.window = CQParameters::Hann; 508 params.window = CQParameters::Hann;
505 509
506 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); 510 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
507 511
868 for (int j = 0; j < pack.templateNoteCount; ++j) { 872 for (int j = 0; j < pack.templateNoteCount; ++j) {
869 m_postFilter[j]->push(pitches[j]); 873 m_postFilter[j]->push(pitches[j]);
870 filtered.push_back(m_postFilter[j]->get()); 874 filtered.push_back(m_postFilter[j]->get());
871 } 875 }
872 876
877 if (m_mode == LiveMode) {
878 // In live mode with only a 12-bpo CQ, we are very likely to
879 // get clusters of two or three high scores at a time for
880 // neighbouring semitones. Eliminate these by picking only the
881 // peaks. This means we can't recognise actual semitone chords
882 // if they ever appear, but it's not as if live mode is good
883 // enough for that to be a big deal anyway.
884 for (int j = 0; j < pack.templateNoteCount; ++j) {
885 if (j > 0 && j + 1 < pack.templateNoteCount &&
886 filtered[j] >= filtered[j-1] &&
887 filtered[j] >= filtered[j+1]) {
888 } else {
889 filtered[j] = 0.0;
890 }
891 }
892 }
893
873 // Threshold for level and reduce number of candidate pitches 894 // Threshold for level and reduce number of candidate pitches
874 895
875 typedef std::multimap<double, int> ValueIndexMap; 896 typedef std::multimap<double, int> ValueIndexMap;
876 897
877 ValueIndexMap strengths; 898 ValueIndexMap strengths;
1010 } 1031 }
1011 } 1032 }
1012 1033
1013 int v; 1034 int v;
1014 if (m_mode == LiveMode) { 1035 if (m_mode == LiveMode) {
1015 v = round(strength * 30); 1036 v = round(strength * 20);
1016 } else { 1037 } else {
1017 v = round(strength * 2); 1038 v = round(strength * 2);
1018 } 1039 }
1019 if (v > partVelocity) { 1040 if (v > partVelocity) {
1020 partVelocity = v; 1041 partVelocity = v;