comparison src/Silvet.cpp @ 298:ebe5e0942bb8 livemode

More toward a possible live mode
author Chris Cannam
date Fri, 28 Nov 2014 10:18:22 +0000
parents d6ab1b4918bd
children ba5f3b084466
comparison
equal deleted inserted replaced
297:d6ab1b4918bd 298:ebe5e0942bb8
19 #include <cq/CQSpectrogram.h> 19 #include <cq/CQSpectrogram.h>
20 20
21 #include "MedianFilter.h" 21 #include "MedianFilter.h"
22 #include "constant-q-cpp/src/dsp/Resampler.h" 22 #include "constant-q-cpp/src/dsp/Resampler.h"
23 #include "flattendynamics-ladspa.h" 23 #include "flattendynamics-ladspa.h"
24 #include "LiveInstruments.h"
24 25
25 #include <vector> 26 #include <vector>
26 27
27 #include <cstdio> 28 #include <cstdio>
28 29
31 using std::cerr; 32 using std::cerr;
32 using std::endl; 33 using std::endl;
33 using Vamp::RealTime; 34 using Vamp::RealTime;
34 35
35 static int processingSampleRate = 44100; 36 static int processingSampleRate = 44100;
36 static int processingBPO = 60; 37
38 static int binsPerSemitoneLive = 1;
39 static int binsPerSemitoneNormal = 5;
37 40
38 static int minInputSampleRate = 100; 41 static int minInputSampleRate = 100;
39 static int maxInputSampleRate = 192000; 42 static int maxInputSampleRate = 192000;
40 43
41 Silvet::Silvet(float inputSampleRate) : 44 Silvet::Silvet(float inputSampleRate) :
42 Plugin(inputSampleRate), 45 Plugin(inputSampleRate),
43 m_instruments(InstrumentPack::listInstrumentPacks()), 46 m_instruments(InstrumentPack::listInstrumentPacks()),
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
44 m_resampler(0), 48 m_resampler(0),
45 m_flattener(0), 49 m_flattener(0),
46 m_cq(0), 50 m_cq(0),
47 m_mode(HighQualityMode), 51 m_mode(HighQualityMode),
48 m_fineTuning(false), 52 m_fineTuning(false),
245 d.identifier = "timefreq"; 249 d.identifier = "timefreq";
246 d.name = "Time-frequency distribution"; 250 d.name = "Time-frequency distribution";
247 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; 251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
248 d.unit = ""; 252 d.unit = "";
249 d.hasFixedBinCount = true; 253 d.hasFixedBinCount = true;
250 d.binCount = m_instruments[0].templateHeight; 254 d.binCount = getPack(0).templateHeight;
251 d.binNames.clear(); 255 d.binNames.clear();
252 if (m_cq) { 256 if (m_cq) {
253 char name[50]; 257 char name[50];
254 for (int i = 0; i < m_instruments[0].templateHeight; ++i) { 258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
255 // We have a 600-bin (10 oct 60-bin CQ) of which the 259 // We have a 600-bin (10 oct 60-bin CQ) of which the
256 // lowest-frequency 55 bins have been dropped, for a 260 // lowest-frequency 55 bins have been dropped, for a
257 // 545-bin template. The native CQ bins go high->low 261 // 545-bin template. The native CQ bins go high->low
258 // frequency though, so these are still the first 545 bins 262 // frequency though, so these are still the first 545 bins
259 // as reported by getBinFrequency, though in reverse order 263 // as reported by getBinFrequency, though in reverse order
260 float freq = m_cq->getBinFrequency 264 float freq = m_cq->getBinFrequency
261 (m_instruments[0].templateHeight - i - 1); 265 (getPack(0).templateHeight - i - 1);
262 sprintf(name, "%.1f Hz", freq); 266 sprintf(name, "%.1f Hz", freq);
263 d.binNames.push_back(name); 267 d.binNames.push_back(name);
264 } 268 }
265 } 269 }
266 d.hasKnownExtents = false; 270 d.hasKnownExtents = false;
274 d.identifier = "pitchactivation"; 278 d.identifier = "pitchactivation";
275 d.name = "Pitch activation distribution"; 279 d.name = "Pitch activation distribution";
276 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction."; 280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
277 d.unit = ""; 281 d.unit = "";
278 d.hasFixedBinCount = true; 282 d.hasFixedBinCount = true;
279 d.binCount = m_instruments[0].templateNoteCount; 283 d.binCount = getPack(0).templateNoteCount;
280 d.binNames.clear(); 284 d.binNames.clear();
281 if (m_cq) { 285 if (m_cq) {
282 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { 286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
283 d.binNames.push_back(noteName(i, 0, 1)); 287 d.binNames.push_back(noteName(i, 0, 1));
284 } 288 }
285 } 289 }
286 d.hasKnownExtents = false; 290 d.hasKnownExtents = false;
287 d.isQuantized = false; 291 d.isQuantized = false;
404 // We don't actually return any notes from the bottom octave, 408 // We don't actually return any notes from the bottom octave,
405 // so we can just pad with zeros 409 // so we can just pad with zeros
406 minFreq *= 2; 410 minFreq *= 2;
407 } 411 }
408 412
413 int bpo = 12 *
414 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
415
409 CQParameters params(processingSampleRate, 416 CQParameters params(processingSampleRate,
410 minFreq, 417 minFreq,
411 processingSampleRate / 3, 418 processingSampleRate / 3,
412 processingBPO); 419 bpo);
413 420
414 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower 421 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
415 // drops the FFT size to 512 from 1024 and alters 422 // drops the FFT size to 512 from 1024 and alters
416 // some other processing parameters, making 423 // some other processing parameters, making
417 // everything much, much slower. Could be a flaw 424 // everything much, much slower. Could be a flaw
428 435
429 for (int i = 0; i < (int)m_postFilter.size(); ++i) { 436 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
430 delete m_postFilter[i]; 437 delete m_postFilter[i];
431 } 438 }
432 m_postFilter.clear(); 439 m_postFilter.clear();
433 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { 440 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
434 m_postFilter.push_back(new MedianFilter<double>(3)); 441 m_postFilter.push_back(new MedianFilter<double>(3));
435 } 442 }
436 m_pianoRoll.clear(); 443 m_pianoRoll.clear();
437 m_inputGains.clear(); 444 m_inputGains.clear();
438 m_columnCount = 0; 445 m_columnCount = 0;
504 511
505 FeatureSet fs; 512 FeatureSet fs;
506 513
507 if (filtered.empty()) return fs; 514 if (filtered.empty()) return fs;
508 515
509 const InstrumentPack &pack = m_instruments[m_instrument]; 516 const InstrumentPack &pack(getPack(m_instrument));
510 517
511 for (int i = 0; i < (int)filtered.size(); ++i) { 518 for (int i = 0; i < (int)filtered.size(); ++i) {
512 Feature f; 519 Feature f;
513 for (int j = 0; j < pack.templateHeight; ++j) { 520 for (int j = 0; j < pack.templateHeight; ++j) {
514 f.values.push_back(float(filtered[i][j])); 521 f.values.push_back(float(filtered[i][j]));
631 // isn't quite accurate. But the small constant offset is 638 // isn't quite accurate. But the small constant offset is
632 // practically irrelevant compared to the jitter from the frame 639 // practically irrelevant compared to the jitter from the frame
633 // size we reduce to in a moment 640 // size we reduce to in a moment
634 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); 641 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
635 642
636 const InstrumentPack &pack = m_instruments[m_instrument]; 643 const InstrumentPack &pack(getPack(m_instrument));
637 644
638 for (int i = 0; i < width; ++i) { 645 for (int i = 0; i < width; ++i) {
639 646
640 if (m_columnCount < latentColumns) { 647 if (m_columnCount < latentColumns) {
641 ++m_columnCount; 648 ++m_columnCount;
650 if (select) { 657 if (select) {
651 vector<double> inCol = in[i]; 658 vector<double> inCol = in[i];
652 vector<double> outCol(pack.templateHeight); 659 vector<double> outCol(pack.templateHeight);
653 660
654 // In HQ mode, the CQ returns 600 bins and we ignore the 661 // In HQ mode, the CQ returns 600 bins and we ignore the
655 // lowest 55 of them. 662 // lowest 55 of them (assuming binsPerSemitone == 5).
656 // 663 //
657 // In draft and live mode the CQ is an octave shorter, 664 // In draft and live mode the CQ is an octave shorter,
658 // returning 540 bins, so we instead pad them with an 665 // returning 540 bins, so we instead pad them with an
659 // additional 5 zeros. 666 // additional 5 zeros.
660 // 667 //
661 // We also need to reverse the column as we go, since the 668 // We also need to reverse the column as we go, since the
662 // raw CQ has the high frequencies first and we need it 669 // raw CQ has the high frequencies first and we need it
663 // the other way around. 670 // the other way around.
664 671
672 int bps = (m_mode == LiveMode ?
673 binsPerSemitoneLive : binsPerSemitoneNormal);
674
665 if (m_mode == HighQualityMode) { 675 if (m_mode == HighQualityMode) {
666 for (int j = 0; j < pack.templateHeight; ++j) { 676 for (int j = 0; j < pack.templateHeight; ++j) {
667 int ix = inCol.size() - j - 55; 677 int ix = inCol.size() - j - (11 * bps);
668 outCol[j] = inCol[ix]; 678 outCol[j] = inCol[ix];
669 } 679 }
670 } else { 680 } else {
671 for (int j = 0; j < 5; ++j) { 681 for (int j = 0; j < bps; ++j) {
672 outCol[j] = 0.0; 682 outCol[j] = 0.0;
673 } 683 }
674 for (int j = 5; j < pack.templateHeight; ++j) { 684 for (int j = bps; j < pack.templateHeight; ++j) {
675 int ix = inCol.size() - j + 4; 685 int ix = inCol.size() - j + (bps-1);
676 outCol[j] = inCol[ix]; 686 outCol[j] = inCol[ix];
677 } 687 }
678 } 688 }
679 689
680 vector<double> noiseLevel1 = 690 vector<double> noiseLevel1 =
681 MedianFilter<double>::filter(40, outCol); 691 MedianFilter<double>::filter(8 * bps, outCol);
682 for (int j = 0; j < pack.templateHeight; ++j) { 692 for (int j = 0; j < pack.templateHeight; ++j) {
683 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); 693 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
684 } 694 }
685 695
686 vector<double> noiseLevel2 = 696 vector<double> noiseLevel2 =
687 MedianFilter<double>::filter(40, noiseLevel1); 697 MedianFilter<double>::filter(8 * bps, noiseLevel1);
688 for (int j = 0; j < pack.templateHeight; ++j) { 698 for (int j = 0; j < pack.templateHeight; ++j) {
689 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); 699 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
690 } 700 }
691 701
692 out.push_back(outCol); 702 out.push_back(outCol);
701 vector<double> 711 vector<double>
702 Silvet::postProcess(const vector<double> &pitches, 712 Silvet::postProcess(const vector<double> &pitches,
703 const vector<int> &bestShifts, 713 const vector<int> &bestShifts,
704 bool wantShifts) 714 bool wantShifts)
705 { 715 {
706 const InstrumentPack &pack = m_instruments[m_instrument]; 716 const InstrumentPack &pack(getPack(m_instrument));
707 717
708 vector<double> filtered; 718 vector<double> filtered;
709 719
710 for (int j = 0; j < pack.templateNoteCount; ++j) { 720 for (int j = 0; j < pack.templateNoteCount; ++j) {
711 m_postFilter[j]->push(pitches[j]); 721 m_postFilter[j]->push(pitches[j]);