Mercurial > hg > silvet
comparison src/Silvet.cpp @ 316:f3e10617a60d livemode
Live mode enhancements: Adjust processing parameters (for speed) and peak-pick pitch activations across frequency (to avoid neighbouring-semitone clusters)
author | Chris Cannam |
---|---|
date | Tue, 28 Apr 2015 12:21:40 +0100 |
parents | f98ba4f47e49 |
children | 92293058368a |
comparison
equal
deleted
inserted
replaced
315:dec47312ed40 | 316:f3e10617a60d |
---|---|
43 static int binsPerSemitoneNormal = 5; | 43 static int binsPerSemitoneNormal = 5; |
44 | 44 |
45 static int minInputSampleRate = 100; | 45 static int minInputSampleRate = 100; |
46 static int maxInputSampleRate = 192000; | 46 static int maxInputSampleRate = 192000; |
47 | 47 |
48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode; | |
49 | |
48 Silvet::Silvet(float inputSampleRate) : | 50 Silvet::Silvet(float inputSampleRate) : |
49 Plugin(inputSampleRate), | 51 Plugin(inputSampleRate), |
50 m_instruments(InstrumentPack::listInstrumentPacks()), | 52 m_instruments(InstrumentPack::listInstrumentPacks()), |
51 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)), | 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)), |
52 m_resampler(0), | 54 m_resampler(0), |
53 m_flattener(0), | 55 m_flattener(0), |
54 m_cq(0), | 56 m_cq(0), |
55 m_mode(HighQualityMode), | 57 m_mode(defaultMode), |
56 m_fineTuning(false), | 58 m_fineTuning(false), |
57 m_instrument(0), | 59 m_instrument(0), |
58 m_colsPerSec(50), | 60 m_colsPerSec(50), |
59 m_haveStartTime(false) | 61 m_haveStartTime(false) |
60 { | 62 { |
146 desc.name = "Processing mode"; | 148 desc.name = "Processing mode"; |
147 desc.unit = ""; | 149 desc.unit = ""; |
148 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results."; | 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results."; |
149 desc.minValue = 0; | 151 desc.minValue = 0; |
150 desc.maxValue = 2; | 152 desc.maxValue = 2; |
151 desc.defaultValue = 1; | 153 desc.defaultValue = int(defaultMode); |
152 desc.isQuantized = true; | 154 desc.isQuantized = true; |
153 desc.quantizeStep = 1; | 155 desc.quantizeStep = 1; |
154 desc.valueNames.push_back("Draft (faster)"); | 156 desc.valueNames.push_back("Draft (faster)"); |
155 desc.valueNames.push_back("Intensive (higher quality)"); | 157 desc.valueNames.push_back("Intensive (higher quality)"); |
156 desc.valueNames.push_back("Live (lower latency)"); | 158 desc.valueNames.push_back("Live (lower latency)"); |
492 CQParameters params(processingSampleRate, | 494 CQParameters params(processingSampleRate, |
493 minFreq, | 495 minFreq, |
494 maxFreq, | 496 maxFreq, |
495 bpo); | 497 bpo); |
496 | 498 |
497 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower | 499 // For params.q, the MIREX code uses 0.8, but it seems that with |
498 // drops the FFT size to 512 from 1024 and alters | 500 // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT |
499 // some other processing parameters, making | 501 // size to 512 from 1024 and alters some other processing |
500 // everything much, much slower. Could be a flaw | 502 // parameters, making everything much, much slower. Could be a |
501 // in the CQ parameter calculations, must check | 503 // flaw in the CQ parameter calculations, must check. For |
502 params.atomHopFactor = 0.3; | 504 // atomHopFactor == 1, q == 0.8 is fine |
505 params.q = (m_mode == HighQualityMode ? 0.95 : 0.8); | |
506 params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0); | |
503 params.threshold = 0.0005; | 507 params.threshold = 0.0005; |
504 params.window = CQParameters::Hann; | 508 params.window = CQParameters::Hann; |
505 | 509 |
506 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); | 510 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); |
507 | 511 |
868 for (int j = 0; j < pack.templateNoteCount; ++j) { | 872 for (int j = 0; j < pack.templateNoteCount; ++j) { |
869 m_postFilter[j]->push(pitches[j]); | 873 m_postFilter[j]->push(pitches[j]); |
870 filtered.push_back(m_postFilter[j]->get()); | 874 filtered.push_back(m_postFilter[j]->get()); |
871 } | 875 } |
872 | 876 |
877 if (m_mode == LiveMode) { | |
878 // In live mode with only a 12-bpo CQ, we are very likely to | |
879 // get clusters of two or three high scores at a time for | |
880 // neighbouring semitones. Eliminate these by picking only the | |
881 // peaks. This means we can't recognise actual semitone chords | |
882 // if they ever appear, but it's not as if live mode is good | |
883 // enough for that to be a big deal anyway. | |
884 for (int j = 0; j < pack.templateNoteCount; ++j) { | |
885 if (j > 0 && j + 1 < pack.templateNoteCount && | |
886 filtered[j] >= filtered[j-1] && | |
887 filtered[j] >= filtered[j+1]) { | |
888 } else { | |
889 filtered[j] = 0.0; | |
890 } | |
891 } | |
892 } | |
893 | |
873 // Threshold for level and reduce number of candidate pitches | 894 // Threshold for level and reduce number of candidate pitches |
874 | 895 |
875 typedef std::multimap<double, int> ValueIndexMap; | 896 typedef std::multimap<double, int> ValueIndexMap; |
876 | 897 |
877 ValueIndexMap strengths; | 898 ValueIndexMap strengths; |
1010 } | 1031 } |
1011 } | 1032 } |
1012 | 1033 |
1013 int v; | 1034 int v; |
1014 if (m_mode == LiveMode) { | 1035 if (m_mode == LiveMode) { |
1015 v = round(strength * 30); | 1036 v = round(strength * 20); |
1016 } else { | 1037 } else { |
1017 v = round(strength * 2); | 1038 v = round(strength * 2); |
1018 } | 1039 } |
1019 if (v > partVelocity) { | 1040 if (v > partVelocity) { |
1020 partVelocity = v; | 1041 partVelocity = v; |