Mercurial > hg > silvet
comparison src/Silvet.cpp @ 298:ebe5e0942bb8 livemode
More toward a possible live mode
author | Chris Cannam |
---|---|
date | Fri, 28 Nov 2014 10:18:22 +0000 |
parents | d6ab1b4918bd |
children | ba5f3b084466 |
comparison
equal
deleted
inserted
replaced
297:d6ab1b4918bd | 298:ebe5e0942bb8 |
---|---|
19 #include <cq/CQSpectrogram.h> | 19 #include <cq/CQSpectrogram.h> |
20 | 20 |
21 #include "MedianFilter.h" | 21 #include "MedianFilter.h" |
22 #include "constant-q-cpp/src/dsp/Resampler.h" | 22 #include "constant-q-cpp/src/dsp/Resampler.h" |
23 #include "flattendynamics-ladspa.h" | 23 #include "flattendynamics-ladspa.h" |
24 #include "LiveInstruments.h" | |
24 | 25 |
25 #include <vector> | 26 #include <vector> |
26 | 27 |
27 #include <cstdio> | 28 #include <cstdio> |
28 | 29 |
31 using std::cerr; | 32 using std::cerr; |
32 using std::endl; | 33 using std::endl; |
33 using Vamp::RealTime; | 34 using Vamp::RealTime; |
34 | 35 |
35 static int processingSampleRate = 44100; | 36 static int processingSampleRate = 44100; |
36 static int processingBPO = 60; | 37 |
38 static int binsPerSemitoneLive = 1; | |
39 static int binsPerSemitoneNormal = 5; | |
37 | 40 |
38 static int minInputSampleRate = 100; | 41 static int minInputSampleRate = 100; |
39 static int maxInputSampleRate = 192000; | 42 static int maxInputSampleRate = 192000; |
40 | 43 |
41 Silvet::Silvet(float inputSampleRate) : | 44 Silvet::Silvet(float inputSampleRate) : |
42 Plugin(inputSampleRate), | 45 Plugin(inputSampleRate), |
43 m_instruments(InstrumentPack::listInstrumentPacks()), | 46 m_instruments(InstrumentPack::listInstrumentPacks()), |
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)), | |
44 m_resampler(0), | 48 m_resampler(0), |
45 m_flattener(0), | 49 m_flattener(0), |
46 m_cq(0), | 50 m_cq(0), |
47 m_mode(HighQualityMode), | 51 m_mode(HighQualityMode), |
48 m_fineTuning(false), | 52 m_fineTuning(false), |
245 d.identifier = "timefreq"; | 249 d.identifier = "timefreq"; |
246 d.name = "Time-frequency distribution"; | 250 d.name = "Time-frequency distribution"; |
247 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; | 251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; |
248 d.unit = ""; | 252 d.unit = ""; |
249 d.hasFixedBinCount = true; | 253 d.hasFixedBinCount = true; |
250 d.binCount = m_instruments[0].templateHeight; | 254 d.binCount = getPack(0).templateHeight; |
251 d.binNames.clear(); | 255 d.binNames.clear(); |
252 if (m_cq) { | 256 if (m_cq) { |
253 char name[50]; | 257 char name[50]; |
254 for (int i = 0; i < m_instruments[0].templateHeight; ++i) { | 258 for (int i = 0; i < getPack(0).templateHeight; ++i) { |
255 // We have a 600-bin (10 oct 60-bin CQ) of which the | 259 // We have a 600-bin (10 oct 60-bin CQ) of which the |
256 // lowest-frequency 55 bins have been dropped, for a | 260 // lowest-frequency 55 bins have been dropped, for a |
257 // 545-bin template. The native CQ bins go high->low | 261 // 545-bin template. The native CQ bins go high->low |
258 // frequency though, so these are still the first 545 bins | 262 // frequency though, so these are still the first 545 bins |
259 // as reported by getBinFrequency, though in reverse order | 263 // as reported by getBinFrequency, though in reverse order |
260 float freq = m_cq->getBinFrequency | 264 float freq = m_cq->getBinFrequency |
261 (m_instruments[0].templateHeight - i - 1); | 265 (getPack(0).templateHeight - i - 1); |
262 sprintf(name, "%.1f Hz", freq); | 266 sprintf(name, "%.1f Hz", freq); |
263 d.binNames.push_back(name); | 267 d.binNames.push_back(name); |
264 } | 268 } |
265 } | 269 } |
266 d.hasKnownExtents = false; | 270 d.hasKnownExtents = false; |
274 d.identifier = "pitchactivation"; | 278 d.identifier = "pitchactivation"; |
275 d.name = "Pitch activation distribution"; | 279 d.name = "Pitch activation distribution"; |
276 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction."; | 280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction."; |
277 d.unit = ""; | 281 d.unit = ""; |
278 d.hasFixedBinCount = true; | 282 d.hasFixedBinCount = true; |
279 d.binCount = m_instruments[0].templateNoteCount; | 283 d.binCount = getPack(0).templateNoteCount; |
280 d.binNames.clear(); | 284 d.binNames.clear(); |
281 if (m_cq) { | 285 if (m_cq) { |
282 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { | 286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { |
283 d.binNames.push_back(noteName(i, 0, 1)); | 287 d.binNames.push_back(noteName(i, 0, 1)); |
284 } | 288 } |
285 } | 289 } |
286 d.hasKnownExtents = false; | 290 d.hasKnownExtents = false; |
287 d.isQuantized = false; | 291 d.isQuantized = false; |
404 // We don't actually return any notes from the bottom octave, | 408 // We don't actually return any notes from the bottom octave, |
405 // so we can just pad with zeros | 409 // so we can just pad with zeros |
406 minFreq *= 2; | 410 minFreq *= 2; |
407 } | 411 } |
408 | 412 |
413 int bpo = 12 * | |
414 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal); | |
415 | |
409 CQParameters params(processingSampleRate, | 416 CQParameters params(processingSampleRate, |
410 minFreq, | 417 minFreq, |
411 processingSampleRate / 3, | 418 processingSampleRate / 3, |
412 processingBPO); | 419 bpo); |
413 | 420 |
414 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower | 421 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower |
415 // drops the FFT size to 512 from 1024 and alters | 422 // drops the FFT size to 512 from 1024 and alters |
416 // some other processing parameters, making | 423 // some other processing parameters, making |
417 // everything much, much slower. Could be a flaw | 424 // everything much, much slower. Could be a flaw |
428 | 435 |
429 for (int i = 0; i < (int)m_postFilter.size(); ++i) { | 436 for (int i = 0; i < (int)m_postFilter.size(); ++i) { |
430 delete m_postFilter[i]; | 437 delete m_postFilter[i]; |
431 } | 438 } |
432 m_postFilter.clear(); | 439 m_postFilter.clear(); |
433 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { | 440 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { |
434 m_postFilter.push_back(new MedianFilter<double>(3)); | 441 m_postFilter.push_back(new MedianFilter<double>(3)); |
435 } | 442 } |
436 m_pianoRoll.clear(); | 443 m_pianoRoll.clear(); |
437 m_inputGains.clear(); | 444 m_inputGains.clear(); |
438 m_columnCount = 0; | 445 m_columnCount = 0; |
504 | 511 |
505 FeatureSet fs; | 512 FeatureSet fs; |
506 | 513 |
507 if (filtered.empty()) return fs; | 514 if (filtered.empty()) return fs; |
508 | 515 |
509 const InstrumentPack &pack = m_instruments[m_instrument]; | 516 const InstrumentPack &pack(getPack(m_instrument)); |
510 | 517 |
511 for (int i = 0; i < (int)filtered.size(); ++i) { | 518 for (int i = 0; i < (int)filtered.size(); ++i) { |
512 Feature f; | 519 Feature f; |
513 for (int j = 0; j < pack.templateHeight; ++j) { | 520 for (int j = 0; j < pack.templateHeight; ++j) { |
514 f.values.push_back(float(filtered[i][j])); | 521 f.values.push_back(float(filtered[i][j])); |
631 // isn't quite accurate. But the small constant offset is | 638 // isn't quite accurate. But the small constant offset is |
632 // practically irrelevant compared to the jitter from the frame | 639 // practically irrelevant compared to the jitter from the frame |
633 // size we reduce to in a moment | 640 // size we reduce to in a moment |
634 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); | 641 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); |
635 | 642 |
636 const InstrumentPack &pack = m_instruments[m_instrument]; | 643 const InstrumentPack &pack(getPack(m_instrument)); |
637 | 644 |
638 for (int i = 0; i < width; ++i) { | 645 for (int i = 0; i < width; ++i) { |
639 | 646 |
640 if (m_columnCount < latentColumns) { | 647 if (m_columnCount < latentColumns) { |
641 ++m_columnCount; | 648 ++m_columnCount; |
650 if (select) { | 657 if (select) { |
651 vector<double> inCol = in[i]; | 658 vector<double> inCol = in[i]; |
652 vector<double> outCol(pack.templateHeight); | 659 vector<double> outCol(pack.templateHeight); |
653 | 660 |
654 // In HQ mode, the CQ returns 600 bins and we ignore the | 661 // In HQ mode, the CQ returns 600 bins and we ignore the |
655 // lowest 55 of them. | 662 // lowest 55 of them (assuming binsPerSemitone == 5). |
656 // | 663 // |
657 // In draft and live mode the CQ is an octave shorter, | 664 // In draft and live mode the CQ is an octave shorter, |
658 // returning 540 bins, so we instead pad them with an | 665 // returning 540 bins, so we instead pad them with an |
659 // additional 5 zeros. | 666 // additional 5 zeros. |
660 // | 667 // |
661 // We also need to reverse the column as we go, since the | 668 // We also need to reverse the column as we go, since the |
662 // raw CQ has the high frequencies first and we need it | 669 // raw CQ has the high frequencies first and we need it |
663 // the other way around. | 670 // the other way around. |
664 | 671 |
672 int bps = (m_mode == LiveMode ? | |
673 binsPerSemitoneLive : binsPerSemitoneNormal); | |
674 | |
665 if (m_mode == HighQualityMode) { | 675 if (m_mode == HighQualityMode) { |
666 for (int j = 0; j < pack.templateHeight; ++j) { | 676 for (int j = 0; j < pack.templateHeight; ++j) { |
667 int ix = inCol.size() - j - 55; | 677 int ix = inCol.size() - j - (11 * bps); |
668 outCol[j] = inCol[ix]; | 678 outCol[j] = inCol[ix]; |
669 } | 679 } |
670 } else { | 680 } else { |
671 for (int j = 0; j < 5; ++j) { | 681 for (int j = 0; j < bps; ++j) { |
672 outCol[j] = 0.0; | 682 outCol[j] = 0.0; |
673 } | 683 } |
674 for (int j = 5; j < pack.templateHeight; ++j) { | 684 for (int j = bps; j < pack.templateHeight; ++j) { |
675 int ix = inCol.size() - j + 4; | 685 int ix = inCol.size() - j + (bps-1); |
676 outCol[j] = inCol[ix]; | 686 outCol[j] = inCol[ix]; |
677 } | 687 } |
678 } | 688 } |
679 | 689 |
680 vector<double> noiseLevel1 = | 690 vector<double> noiseLevel1 = |
681 MedianFilter<double>::filter(40, outCol); | 691 MedianFilter<double>::filter(8 * bps, outCol); |
682 for (int j = 0; j < pack.templateHeight; ++j) { | 692 for (int j = 0; j < pack.templateHeight; ++j) { |
683 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); | 693 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); |
684 } | 694 } |
685 | 695 |
686 vector<double> noiseLevel2 = | 696 vector<double> noiseLevel2 = |
687 MedianFilter<double>::filter(40, noiseLevel1); | 697 MedianFilter<double>::filter(8 * bps, noiseLevel1); |
688 for (int j = 0; j < pack.templateHeight; ++j) { | 698 for (int j = 0; j < pack.templateHeight; ++j) { |
689 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); | 699 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); |
690 } | 700 } |
691 | 701 |
692 out.push_back(outCol); | 702 out.push_back(outCol); |
701 vector<double> | 711 vector<double> |
702 Silvet::postProcess(const vector<double> &pitches, | 712 Silvet::postProcess(const vector<double> &pitches, |
703 const vector<int> &bestShifts, | 713 const vector<int> &bestShifts, |
704 bool wantShifts) | 714 bool wantShifts) |
705 { | 715 { |
706 const InstrumentPack &pack = m_instruments[m_instrument]; | 716 const InstrumentPack &pack(getPack(m_instrument)); |
707 | 717 |
708 vector<double> filtered; | 718 vector<double> filtered; |
709 | 719 |
710 for (int j = 0; j < pack.templateNoteCount; ++j) { | 720 for (int j = 0; j < pack.templateNoteCount; ++j) { |
711 m_postFilter[j]->push(pitches[j]); | 721 m_postFilter[j]->push(pitches[j]); |