comparison src/Silvet.cpp @ 267:a8c05709e486

Merge from branch "norm"
author Chris Cannam
date Wed, 23 Jul 2014 17:51:58 +0100
parents 34e69544691b
children 10d8bd634a77
comparison
equal deleted inserted replaced
250:ec296f783a6f 267:a8c05709e486
18 18
19 #include <cq/CQSpectrogram.h> 19 #include <cq/CQSpectrogram.h>
20 20
21 #include "MedianFilter.h" 21 #include "MedianFilter.h"
22 #include "constant-q-cpp/src/dsp/Resampler.h" 22 #include "constant-q-cpp/src/dsp/Resampler.h"
23 #include "flattendynamics-ladspa.h"
23 24
24 #include <vector> 25 #include <vector>
25 26
26 #include <cstdio> 27 #include <cstdio>
27 28
36 37
37 Silvet::Silvet(float inputSampleRate) : 38 Silvet::Silvet(float inputSampleRate) :
38 Plugin(inputSampleRate), 39 Plugin(inputSampleRate),
39 m_instruments(InstrumentPack::listInstrumentPacks()), 40 m_instruments(InstrumentPack::listInstrumentPacks()),
40 m_resampler(0), 41 m_resampler(0),
42 m_flattener(0),
41 m_cq(0), 43 m_cq(0),
42 m_hqMode(true), 44 m_hqMode(true),
43 m_fineTuning(false), 45 m_fineTuning(false),
44 m_instrument(0), 46 m_instrument(0),
45 m_colsPerSec(50) 47 m_colsPerSec(50)
47 } 49 }
48 50
49 Silvet::~Silvet() 51 Silvet::~Silvet()
50 { 52 {
51 delete m_resampler; 53 delete m_resampler;
54 delete m_flattener;
52 delete m_cq; 55 delete m_cq;
53 for (int i = 0; i < (int)m_postFilter.size(); ++i) { 56 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
54 delete m_postFilter[i]; 57 delete m_postFilter[i];
55 } 58 }
56 } 59 }
228 d.binNames.push_back("Frequency"); 231 d.binNames.push_back("Frequency");
229 d.binNames.push_back("Velocity"); 232 d.binNames.push_back("Velocity");
230 d.hasKnownExtents = false; 233 d.hasKnownExtents = false;
231 d.isQuantized = false; 234 d.isQuantized = false;
232 d.sampleType = OutputDescriptor::VariableSampleRate; 235 d.sampleType = OutputDescriptor::VariableSampleRate;
233 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62); 236 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
234 d.hasDuration = true; 237 d.hasDuration = true;
235 m_notesOutputNo = list.size(); 238 m_notesOutputNo = list.size();
236 list.push_back(d); 239 list.push_back(d);
237 240
238 d.identifier = "timefreq"; 241 d.identifier = "timefreq";
344 347
345 void 348 void
346 Silvet::reset() 349 Silvet::reset()
347 { 350 {
348 delete m_resampler; 351 delete m_resampler;
352 delete m_flattener;
349 delete m_cq; 353 delete m_cq;
350 354
351 if (m_inputSampleRate != processingSampleRate) { 355 if (m_inputSampleRate != processingSampleRate) {
352 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate); 356 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
353 } else { 357 } else {
354 m_resampler = 0; 358 m_resampler = 0;
355 } 359 }
360
361 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
362 m_flattener->reset();
356 363
357 double minFreq = 27.5; 364 double minFreq = 27.5;
358 365
359 if (!m_hqMode) { 366 if (!m_hqMode) {
360 // We don't actually return any notes from the bottom octave, 367 // We don't actually return any notes from the bottom octave,
386 m_postFilter.clear(); 393 m_postFilter.clear();
387 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { 394 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
388 m_postFilter.push_back(new MedianFilter<double>(3)); 395 m_postFilter.push_back(new MedianFilter<double>(3));
389 } 396 }
390 m_pianoRoll.clear(); 397 m_pianoRoll.clear();
398 m_inputGains.clear();
391 m_columnCount = 0; 399 m_columnCount = 0;
392 m_startTime = RealTime::zeroTime; 400 m_startTime = RealTime::zeroTime;
393 } 401 }
394 402
395 Silvet::FeatureSet 403 Silvet::FeatureSet
396 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 404 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
397 { 405 {
398 if (m_columnCount == 0) { 406 if (m_columnCount == 0) {
399 m_startTime = timestamp; 407 m_startTime = timestamp;
400 } 408 }
409
410 vector<float> flattened(m_blockSize);
411 float gain = 1.f;
412 m_flattener->connectInputPort
413 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
414 m_flattener->connectOutputPort
415 (FlattenDynamics::AudioOutputPort, &flattened[0]);
416 m_flattener->connectOutputPort
417 (FlattenDynamics::GainOutputPort, &gain);
418 m_flattener->process(m_blockSize);
419
420 m_inputGains[timestamp] = gain;
401 421
402 vector<double> data; 422 vector<double> data;
403 for (int i = 0; i < m_blockSize; ++i) { 423 for (int i = 0; i < m_blockSize; ++i) {
404 data.push_back(inputBuffers[0][i]); 424 double d = flattened[i];
425 data.push_back(d);
405 } 426 }
406 427
407 if (m_resampler) { 428 if (m_resampler) {
408 data = m_resampler->process(data.data(), data.size()); 429 data = m_resampler->process(data.data(), data.size());
409 } 430 }
410 431
411 Grid cqout = m_cq->process(data); 432 Grid cqout = m_cq->process(data);
412 FeatureSet fs = transcribe(cqout); 433 FeatureSet fs = transcribe(cqout);
413 return fs; 434 return fs;
414 } 435 }
415 436
731 { 752 {
732 int partStart = start; 753 int partStart = start;
733 int partShift = 0; 754 int partShift = 0;
734 int partVelocity = 0; 755 int partVelocity = 0;
735 756
736 Feature f; 757 int partThreshold = floor(0.05 * m_colsPerSec);
737 f.hasTimestamp = true;
738 f.hasDuration = true;
739
740 double columnDuration = 1.0 / m_colsPerSec;
741 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
742 int partThreshold = floor(0.05 / columnDuration);
743 758
744 for (int i = start; i != end; ++i) { 759 for (int i = start; i != end; ++i) {
745 760
746 double strength = m_pianoRoll[i][note]; 761 double strength = m_pianoRoll[i][note];
747 762
758 if (i > partStart + partThreshold && shift != partShift) { 773 if (i > partStart + partThreshold && shift != partShift) {
759 774
760 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl; 775 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
761 776
762 // pitch has changed, emit an intermediate note 777 // pitch has changed, emit an intermediate note
763 f.timestamp = RealTime::fromSeconds 778 noteFeatures.push_back(makeNoteFeature(partStart,
764 (columnDuration * (partStart - postFilterLatency) + 0.02); 779 i,
765 f.duration = RealTime::fromSeconds 780 note,
766 (columnDuration * (i - partStart)); 781 partShift,
767 f.values.clear(); 782 shiftCount,
768 f.values.push_back 783 partVelocity));
769 (noteFrequency(note, partShift, shiftCount));
770 f.values.push_back(partVelocity);
771 f.label = noteName(note, partShift, shiftCount);
772 noteFeatures.push_back(f);
773 partStart = i; 784 partStart = i;
774 partShift = shift; 785 partShift = shift;
775 partVelocity = 0; 786 partVelocity = 0;
776 } 787 }
777 } 788 }
778 789
779 int v = strength * 2; 790 int v = round(strength * 2);
780 if (v > 127) v = 127;
781
782 if (v > partVelocity) { 791 if (v > partVelocity) {
783 partVelocity = v; 792 partVelocity = v;
784 } 793 }
785 } 794 }
786 795
787 if (end >= partStart + partThreshold) { 796 if (end >= partStart + partThreshold) {
788 f.timestamp = RealTime::fromSeconds 797 noteFeatures.push_back(makeNoteFeature(partStart,
789 (columnDuration * (partStart - postFilterLatency) + 0.02); 798 end,
790 f.duration = RealTime::fromSeconds 799 note,
791 (columnDuration * (end - partStart)); 800 partShift,
792 f.values.clear(); 801 shiftCount,
793 f.values.push_back 802 partVelocity));
794 (noteFrequency(note, partShift, shiftCount)); 803 }
795 f.values.push_back(partVelocity); 804 }
796 f.label = noteName(note, partShift, shiftCount); 805
797 noteFeatures.push_back(f); 806 Silvet::Feature
798 } 807 Silvet::makeNoteFeature(int start,
799 } 808 int end,
809 int note,
810 int shift,
811 int shiftCount,
812 int velocity)
813 {
814 double columnDuration = 1.0 / m_colsPerSec;
815 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
816
817 Feature f;
818
819 f.hasTimestamp = true;
820 f.timestamp = RealTime::fromSeconds
821 (columnDuration * (start - postFilterLatency) + 0.02);
822
823 f.hasDuration = true;
824 f.duration = RealTime::fromSeconds
825 (columnDuration * (end - start));
826
827 f.values.clear();
828
829 f.values.push_back
830 (noteFrequency(note, shift, shiftCount));
831
832 float inputGain = getInputGainAt(f.timestamp);
833 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
834 velocity = round(velocity / inputGain);
835 if (velocity > 127) velocity = 127;
836 if (velocity < 1) velocity = 1;
837 f.values.push_back(velocity);
838
839 f.label = noteName(note, shift, shiftCount);
840
841 return f;
842 }
843
844 float
845 Silvet::getInputGainAt(RealTime t)
846 {
847 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
848
849 if (i == m_inputGains.end()) {
850 if (i != m_inputGains.begin()) {
851 --i;
852 } else {
853 return 1.f; // no data
854 }
855 }
856
857 // cerr << "gain at time " << t << " = " << i->second << endl;
858
859 return i->second;
860 }
861