comparison src/Silvet.cpp @ 342:ad45b18427e0

Merge from branch livemode
author Chris Cannam
date Mon, 06 Jul 2015 09:15:21 +0100
parents 705d807ca2ca
children 460cabb27bf7
comparison
equal deleted inserted replaced
313:fa2ffbb786df 342:ad45b18427e0
19 #include <cq/CQSpectrogram.h> 19 #include <cq/CQSpectrogram.h>
20 20
21 #include "MedianFilter.h" 21 #include "MedianFilter.h"
22 #include "constant-q-cpp/src/dsp/Resampler.h" 22 #include "constant-q-cpp/src/dsp/Resampler.h"
23 #include "flattendynamics-ladspa.h" 23 #include "flattendynamics-ladspa.h"
24 #include "LiveInstruments.h"
24 25
25 #include <vector> 26 #include <vector>
26 #include <future> 27 #include <future>
27 28
28 #include <cstdio> 29 #include <cstdio>
35 using std::future; 36 using std::future;
36 using std::async; 37 using std::async;
37 using Vamp::RealTime; 38 using Vamp::RealTime;
38 39
39 static int processingSampleRate = 44100; 40 static int processingSampleRate = 44100;
40 static int processingBPO = 60; 41
42 static int binsPerSemitoneLive = 1;
43 static int binsPerSemitoneNormal = 5;
41 44
42 static int minInputSampleRate = 100; 45 static int minInputSampleRate = 100;
43 static int maxInputSampleRate = 192000; 46 static int maxInputSampleRate = 192000;
47
48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
44 49
45 Silvet::Silvet(float inputSampleRate) : 50 Silvet::Silvet(float inputSampleRate) :
46 Plugin(inputSampleRate), 51 Plugin(inputSampleRate),
47 m_instruments(InstrumentPack::listInstrumentPacks()), 52 m_instruments(InstrumentPack::listInstrumentPacks()),
53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
48 m_resampler(0), 54 m_resampler(0),
49 m_flattener(0), 55 m_flattener(0),
50 m_cq(0), 56 m_cq(0),
51 m_hqMode(true), 57 m_mode(defaultMode),
52 m_fineTuning(false), 58 m_fineTuning(false),
53 m_instrument(0), 59 m_instrument(0),
54 m_colsPerSec(50), 60 m_colsPerSec(50),
55 m_haveStartTime(false) 61 m_haveStartTime(false)
56 { 62 {
139 145
140 ParameterDescriptor desc; 146 ParameterDescriptor desc;
141 desc.identifier = "mode"; 147 desc.identifier = "mode";
142 desc.name = "Processing mode"; 148 desc.name = "Processing mode";
143 desc.unit = ""; 149 desc.unit = "";
144 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results."; 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results.";
145 desc.minValue = 0; 151 desc.minValue = 0;
146 desc.maxValue = 1; 152 desc.maxValue = 2;
147 desc.defaultValue = 1; 153 desc.defaultValue = int(defaultMode);
148 desc.isQuantized = true; 154 desc.isQuantized = true;
149 desc.quantizeStep = 1; 155 desc.quantizeStep = 1;
150 desc.valueNames.push_back("Draft (faster)"); 156 desc.valueNames.push_back("Live (faster and lower latency)");
151 desc.valueNames.push_back("Intensive (higher quality)"); 157 desc.valueNames.push_back("Intensive (higher quality)");
152 list.push_back(desc); 158 list.push_back(desc);
153 159
154 desc.identifier = "instrument"; 160 desc.identifier = "instrument";
155 desc.name = "Instrument"; 161 desc.name = "Instrument";
183 189
184 float 190 float
185 Silvet::getParameter(string identifier) const 191 Silvet::getParameter(string identifier) const
186 { 192 {
187 if (identifier == "mode") { 193 if (identifier == "mode") {
188 return m_hqMode ? 1.f : 0.f; 194 return (float)(int)m_mode;
189 } else if (identifier == "finetune") { 195 } else if (identifier == "finetune") {
190 return m_fineTuning ? 1.f : 0.f; 196 return m_fineTuning ? 1.f : 0.f;
191 } else if (identifier == "instrument") { 197 } else if (identifier == "instrument") {
192 return m_instrument; 198 return m_instrument;
193 } 199 }
196 202
197 void 203 void
198 Silvet::setParameter(string identifier, float value) 204 Silvet::setParameter(string identifier, float value)
199 { 205 {
200 if (identifier == "mode") { 206 if (identifier == "mode") {
201 m_hqMode = (value > 0.5); 207 m_mode = (ProcessingMode)(int)(value + 0.5);
202 } else if (identifier == "finetune") { 208 } else if (identifier == "finetune") {
203 m_fineTuning = (value > 0.5); 209 m_fineTuning = (value > 0.5);
204 } else if (identifier == "instrument") { 210 } else if (identifier == "instrument") {
205 m_instrument = lrintf(value); 211 m_instrument = lrintf(value);
206 } 212 }
230 OutputList list; 236 OutputList list;
231 237
232 OutputDescriptor d; 238 OutputDescriptor d;
233 d.identifier = "notes"; 239 d.identifier = "notes";
234 d.name = "Note transcription"; 240 d.name = "Note transcription";
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture."; 241 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
236 d.unit = "Hz"; 242 d.unit = "Hz";
237 d.hasFixedBinCount = true; 243 d.hasFixedBinCount = true;
238 d.binCount = 2; 244 d.binCount = 2;
239 d.binNames.push_back("Frequency"); 245 d.binNames.push_back("Frequency");
240 d.binNames.push_back("Velocity"); 246 d.binNames.push_back("Velocity");
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); 250 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
245 d.hasDuration = true; 251 d.hasDuration = true;
246 m_notesOutputNo = list.size(); 252 m_notesOutputNo = list.size();
247 list.push_back(d); 253 list.push_back(d);
248 254
255 d.identifier = "onsets";
256 d.name = "Note onsets";
257 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
258 d.unit = "Hz";
259 d.hasFixedBinCount = true;
260 d.binCount = 2;
261 d.binNames.push_back("Frequency");
262 d.binNames.push_back("Velocity");
263 d.hasKnownExtents = false;
264 d.isQuantized = false;
265 d.sampleType = OutputDescriptor::VariableSampleRate;
266 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
267 d.hasDuration = false;
268 m_onsetsOutputNo = list.size();
269 list.push_back(d);
270
271 d.identifier = "onoffsets";
272 d.name = "Note onsets and offsets";
273 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
274 d.unit = "Hz";
275 d.hasFixedBinCount = true;
276 d.binCount = 2;
277 d.binNames.push_back("Frequency");
278 d.binNames.push_back("Velocity");
279 d.hasKnownExtents = false;
280 d.isQuantized = false;
281 d.sampleType = OutputDescriptor::VariableSampleRate;
282 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
283 d.hasDuration = false;
284 m_onOffsetsOutputNo = list.size();
285 list.push_back(d);
286
249 d.identifier = "timefreq"; 287 d.identifier = "timefreq";
250 d.name = "Time-frequency distribution"; 288 d.name = "Time-frequency distribution";
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; 289 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
252 d.unit = ""; 290 d.unit = "";
253 d.hasFixedBinCount = true; 291 d.hasFixedBinCount = true;
254 d.binCount = m_instruments[0].templateHeight; 292 d.binCount = getPack(0).templateHeight;
255 d.binNames.clear(); 293 d.binNames.clear();
256 if (m_cq) { 294 if (m_cq) {
257 char name[50]; 295 char name[50];
258 for (int i = 0; i < m_instruments[0].templateHeight; ++i) { 296 for (int i = 0; i < getPack(0).templateHeight; ++i) {
259 // We have a 600-bin (10 oct 60-bin CQ) of which the 297 // We have a 600-bin (10 oct 60-bin CQ) of which the
260 // lowest-frequency 55 bins have been dropped, for a 298 // lowest-frequency 55 bins have been dropped, for a
261 // 545-bin template. The native CQ bins go high->low 299 // 545-bin template. The native CQ bins go high->low
262 // frequency though, so these are still the first 545 bins 300 // frequency though, so these are still the first 545 bins
263 // as reported by getBinFrequency, though in reverse order 301 // as reported by getBinFrequency, though in reverse order
264 float freq = m_cq->getBinFrequency 302 float freq = m_cq->getBinFrequency
265 (m_instruments[0].templateHeight - i - 1); 303 (getPack(0).templateHeight - i - 1);
266 sprintf(name, "%.1f Hz", freq); 304 sprintf(name, "%.1f Hz", freq);
267 d.binNames.push_back(name); 305 d.binNames.push_back(name);
268 } 306 }
269 } 307 }
270 d.hasKnownExtents = false; 308 d.hasKnownExtents = false;
278 d.identifier = "pitchactivation"; 316 d.identifier = "pitchactivation";
279 d.name = "Pitch activation distribution"; 317 d.name = "Pitch activation distribution";
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction."; 318 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
281 d.unit = ""; 319 d.unit = "";
282 d.hasFixedBinCount = true; 320 d.hasFixedBinCount = true;
283 d.binCount = m_instruments[0].templateNoteCount; 321 d.binCount = getPack(0).templateNoteCount;
284 d.binNames.clear(); 322 d.binNames.clear();
285 if (m_cq) { 323 if (m_cq) {
286 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { 324 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
287 d.binNames.push_back(noteName(i, 0, 1)); 325 d.binNames.push_back(getNoteName(i, 0));
288 } 326 }
289 } 327 }
290 d.hasKnownExtents = false; 328 d.hasKnownExtents = false;
291 d.isQuantized = false; 329 d.isQuantized = false;
292 d.sampleType = OutputDescriptor::FixedSampleRate; 330 d.sampleType = OutputDescriptor::FixedSampleRate;
302 d.hasFixedBinCount = true; 340 d.hasFixedBinCount = true;
303 d.binCount = 12; 341 d.binCount = 12;
304 d.binNames.clear(); 342 d.binNames.clear();
305 if (m_cq) { 343 if (m_cq) {
306 for (int i = 0; i < 12; ++i) { 344 for (int i = 0; i < 12; ++i) {
307 d.binNames.push_back(chromaName(i)); 345 d.binNames.push_back(getChromaName(i));
308 } 346 }
309 } 347 }
310 d.hasKnownExtents = false; 348 d.hasKnownExtents = false;
311 d.isQuantized = false; 349 d.isQuantized = false;
312 d.sampleType = OutputDescriptor::FixedSampleRate; 350 d.sampleType = OutputDescriptor::FixedSampleRate;
313 d.sampleRate = m_colsPerSec; 351 d.sampleRate = m_colsPerSec;
314 d.hasDuration = false; 352 d.hasDuration = false;
315 m_chromaOutputNo = list.size(); 353 m_chromaOutputNo = list.size();
316 list.push_back(d); 354 list.push_back(d);
317 355
356 d.identifier = "templates";
357 d.name = "Templates";
358 d.description = "Constant-Q spectral templates for the selected instrument pack.";
359 d.unit = "";
360 d.hasFixedBinCount = true;
361 d.binCount = getPack(0).templateHeight;
362 d.binNames.clear();
363 if (m_cq) {
364 char name[50];
365 for (int i = 0; i < getPack(0).templateHeight; ++i) {
366 // We have a 600-bin (10 oct 60-bin CQ) of which the
367 // lowest-frequency 55 bins have been dropped, for a
368 // 545-bin template. The native CQ bins go high->low
369 // frequency though, so these are still the first 545 bins
370 // as reported by getBinFrequency, though in reverse order
371 float freq = m_cq->getBinFrequency
372 (getPack(0).templateHeight - i - 1);
373 sprintf(name, "%.1f Hz", freq);
374 d.binNames.push_back(name);
375 }
376 }
377 d.hasKnownExtents = false;
378 d.isQuantized = false;
379 d.sampleType = OutputDescriptor::FixedSampleRate;
380 d.sampleRate = m_colsPerSec;
381 d.hasDuration = false;
382 m_templateOutputNo = list.size();
383 list.push_back(d);
384
318 return list; 385 return list;
319 } 386 }
320 387
321 std::string 388 std::string
322 Silvet::chromaName(int pitch) const 389 Silvet::getChromaName(int pitch) const
323 { 390 {
324 static const char *names[] = { 391 static const char *names[] = {
325 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" 392 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
326 }; 393 };
327 394
328 return names[pitch]; 395 return names[pitch];
329 } 396 }
330 397
331 std::string 398 std::string
332 Silvet::noteName(int note, int shift, int shiftCount) const 399 Silvet::getNoteName(int note, int shift) const
333 { 400 {
334 string n = chromaName(note % 12); 401 string n = getChromaName(note % 12);
335 402
336 int oct = (note + 9) / 12; 403 int oct = (note + 9) / 12;
337 404
338 char buf[30]; 405 char buf[30];
339 406
340 float pshift = 0.f; 407 float pshift = 0.f;
408 int shiftCount = getShiftCount();
341 if (shiftCount > 1) { 409 if (shiftCount > 1) {
342 // see noteFrequency below 410 // see getNoteFrequency below
343 pshift = 411 pshift =
344 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; 412 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
345 } 413 }
346 414
347 if (pshift > 0.f) { 415 if (pshift > 0.f) {
354 422
355 return buf; 423 return buf;
356 } 424 }
357 425
358 float 426 float
359 Silvet::noteFrequency(int note, int shift, int shiftCount) const 427 Silvet::getNoteFrequency(int note, int shift) const
360 { 428 {
361 // Convert shift number to a pitch shift. The given shift number 429 // Convert shift number to a pitch shift. The given shift number
362 // is an offset into the template array, which starts with some 430 // is an offset into the template array, which starts with some
363 // zeros, followed by the template, then some trailing zeros. 431 // zeros, followed by the template, then some trailing zeros.
364 // 432 //
370 // zeros at the start, which is the low-frequency end), for a 438 // zeros at the start, which is the low-frequency end), for a
371 // positive pitch shift; and higher values represent moving it 439 // positive pitch shift; and higher values represent moving it
372 // down in pitch, for a negative pitch shift. 440 // down in pitch, for a negative pitch shift.
373 441
374 float pshift = 0.f; 442 float pshift = 0.f;
443 int shiftCount = getShiftCount();
375 if (shiftCount > 1) { 444 if (shiftCount > 1) {
376 pshift = 445 pshift =
377 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; 446 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
378 } 447 }
379 448
380 return float(27.5 * pow(2.0, (note + pshift) / 12.0)); 449 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
450
451 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
452 // << shiftCount << ", obtained freq = " << freq << endl;
453
454 return freq;
381 } 455 }
382 456
383 bool 457 bool
384 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) 458 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
385 { 459 {
426 } 500 }
427 501
428 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling 502 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
429 m_flattener->reset(); 503 m_flattener->reset();
430 504
505 // this happens to be processingSampleRate / 3, and is the top
506 // freq used for the EM templates:
507 double maxFreq = 14700;
508
509 if (m_mode == LiveMode) {
510 // We only have 12 bpo rather than 60, so we need the top bin
511 // to be the middle one of the top 5, i.e. 2/5 of a semitone
512 // lower than 14700
513 maxFreq *= powf(2.0, -1.0 / 30.0);
514 }
515
431 double minFreq = 27.5; 516 double minFreq = 27.5;
432 517
433 if (!m_hqMode) { 518 if (m_mode == LiveMode) {
434 // We don't actually return any notes from the bottom octave, 519 // We don't actually return any notes from the bottom octave,
435 // so we can just pad with zeros 520 // so we can just pad with zeros
436 minFreq *= 2; 521 minFreq *= 2;
437 } 522 }
438 523
524 int bpo = 12 *
525 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
526
439 CQParameters params(processingSampleRate, 527 CQParameters params(processingSampleRate,
440 minFreq, 528 minFreq,
441 processingSampleRate / 3, 529 maxFreq,
442 processingBPO); 530 bpo);
443 531
444 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower 532 params.q = 0.8;
445 // drops the FFT size to 512 from 1024 and alters 533 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
446 // some other processing parameters, making
447 // everything much, much slower. Could be a flaw
448 // in the CQ parameter calculations, must check
449 params.atomHopFactor = 0.3;
450 params.threshold = 0.0005; 534 params.threshold = 0.0005;
535 params.decimator =
536 (m_mode == LiveMode ?
537 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
451 params.window = CQParameters::Hann; 538 params.window = CQParameters::Hann;
452 539
453 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); 540 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
454 541
455 m_colsPerSec = m_hqMode ? 50 : 25; 542 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
543 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
544
545 m_colsPerSec = 50;
456 546
457 for (int i = 0; i < (int)m_postFilter.size(); ++i) { 547 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
458 delete m_postFilter[i]; 548 delete m_postFilter[i];
459 } 549 }
460 m_postFilter.clear(); 550 m_postFilter.clear();
461 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { 551 int postFilterLength = 3;
462 m_postFilter.push_back(new MedianFilter<double>(3)); 552 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
553 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
463 } 554 }
464 m_pianoRoll.clear(); 555 m_pianoRoll.clear();
465 m_inputGains.clear(); 556 m_inputGains.clear();
466 m_columnCount = 0; 557 m_columnCount = 0;
467 m_resampledCount = 0; 558 m_resampledCount = 0;
470 } 561 }
471 562
472 Silvet::FeatureSet 563 Silvet::FeatureSet
473 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 564 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
474 { 565 {
566 FeatureSet fs;
567
475 if (!m_haveStartTime) { 568 if (!m_haveStartTime) {
569
476 m_startTime = timestamp; 570 m_startTime = timestamp;
477 m_haveStartTime = true; 571 m_haveStartTime = true;
572
573 insertTemplateFeatures(fs);
478 } 574 }
479 575
480 vector<float> flattened(m_blockSize); 576 vector<float> flattened(m_blockSize);
481 float gain = 1.f; 577 float gain = 1.f;
482 m_flattener->connectInputPort 578 m_flattener->connectInputPort
505 int resamplerLatency = m_resampler->getLatency(); 601 int resamplerLatency = m_resampler->getLatency();
506 602
507 if (hadCount < resamplerLatency) { 603 if (hadCount < resamplerLatency) {
508 int stillToDrop = resamplerLatency - hadCount; 604 int stillToDrop = resamplerLatency - hadCount;
509 if (stillToDrop >= int(data.size())) { 605 if (stillToDrop >= int(data.size())) {
510 return FeatureSet(); 606 return fs;
511 } else { 607 } else {
512 data = vector<double>(data.begin() + stillToDrop, data.end()); 608 data = vector<double>(data.begin() + stillToDrop, data.end());
513 } 609 }
514 } 610 }
515 } 611 }
516 612
517 Grid cqout = m_cq->process(data); 613 Grid cqout = m_cq->process(data);
518 FeatureSet fs = transcribe(cqout); 614 transcribe(cqout, fs);
519 return fs; 615 return fs;
520 } 616 }
521 617
522 Silvet::FeatureSet 618 Silvet::FeatureSet
523 Silvet::getRemainingFeatures() 619 Silvet::getRemainingFeatures()
524 { 620 {
525 Grid cqout = m_cq->getRemainingOutput(); 621 Grid cqout = m_cq->getRemainingOutput();
526 FeatureSet fs = transcribe(cqout); 622 FeatureSet fs;
623
624 if (m_columnCount == 0) {
625 // process() was never called, but we still want these
626 insertTemplateFeatures(fs);
627 } else {
628
629 // Complete the transcription
630
631 transcribe(cqout, fs);
632
633 // And make sure any extant playing notes are finished and returned
634
635 m_pianoRoll.push_back({});
636
637 auto events = noteTrack();
638
639 for (const auto &f : events.notes) {
640 fs[m_notesOutputNo].push_back(f);
641 }
642
643 for (const auto &f : events.onsets) {
644 fs[m_onsetsOutputNo].push_back(f);
645 }
646
647 for (const auto &f : events.onOffsets) {
648 fs[m_onOffsetsOutputNo].push_back(f);
649 }
650 }
651
527 return fs; 652 return fs;
528 } 653 }
529 654
530 Silvet::FeatureSet 655 void
531 Silvet::transcribe(const Grid &cqout) 656 Silvet::insertTemplateFeatures(FeatureSet &fs)
532 { 657 {
533 Grid filtered = preProcess(cqout); 658 const InstrumentPack &pack = getPack(m_instrument);
534 659 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
535 FeatureSet fs; 660 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
536
537 if (filtered.empty()) return fs;
538
539 const InstrumentPack &pack = m_instruments[m_instrument];
540
541 for (int i = 0; i < (int)filtered.size(); ++i) {
542 Feature f; 661 Feature f;
543 for (int j = 0; j < pack.templateHeight; ++j) { 662 char buffer[50];
544 f.values.push_back(float(filtered[i][j])); 663 sprintf(buffer, "Note %d", i + 1);
545 } 664 f.label = buffer;
546 fs[m_fcqOutputNo].push_back(f); 665 f.hasTimestamp = true;
547 } 666 f.timestamp = timestamp;
548 667 f.values = pack.templates[i / pack.templateNoteCount]
549 int width = filtered.size(); 668 .data[i % pack.templateNoteCount];
550 669 fs[m_templateOutputNo].push_back(f);
551 Grid localPitches(width); 670 }
552 671 }
553 bool wantShifts = m_hqMode && m_fineTuning; 672
673 int
674 Silvet::getShiftCount() const
675 {
676 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
554 int shiftCount = 1; 677 int shiftCount = 1;
555 if (wantShifts) { 678 if (wantShifts) {
679 const InstrumentPack &pack(getPack(m_instrument));
556 shiftCount = pack.templateMaxShift * 2 + 1; 680 shiftCount = pack.templateMaxShift * 2 + 1;
557 } 681 }
682 return shiftCount;
683 }
684
685 void
686 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
687 {
688 Grid filtered = preProcess(cqout);
689
690 if (filtered.empty()) return;
691
692 const InstrumentPack &pack(getPack(m_instrument));
693
694 int width = filtered.size();
695
696 double silenceThreshold = 0.01;
697
698 for (int i = 0; i < width; ++i) {
699
700 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
701 float inputGain = getInputGainAt(timestamp);
702
703 Feature f;
704 double rms = 0.0;
705
706 for (int j = 0; j < pack.templateHeight; ++j) {
707 double v = filtered[i][j];
708 rms += v * v;
709 f.values.push_back(float(v));
710 }
711
712 rms = sqrt(rms / pack.templateHeight);
713 if (rms / inputGain < silenceThreshold) {
714 filtered[i].clear();
715 }
716
717 fs[m_fcqOutputNo].push_back(f);
718 }
719
720 Grid localPitches(width);
721
722 int shiftCount = getShiftCount();
723 bool wantShifts = (shiftCount > 1);
558 724
559 vector<vector<int> > localBestShifts; 725 vector<vector<int> > localBestShifts;
560 if (wantShifts) { 726 if (wantShifts) {
561 localBestShifts = vector<vector<int> >(width); 727 localBestShifts = vector<vector<int> >(width);
562 } 728 }
563 729
564 #ifndef MAX_EM_THREADS 730 #ifndef MAX_EM_THREADS
565 #define MAX_EM_THREADS 8 731 #define MAX_EM_THREADS 8
566 #endif 732 #endif
567 733
734 int emThreadCount = MAX_EM_THREADS;
735 if (m_mode == LiveMode && pack.templates.size() == 1) {
736 // The EM step is probably not slow enough to merit it
737 emThreadCount = 1;
738 }
739
568 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1)) 740 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
569 for (int i = 0; i < width; ) { 741 if (emThreadCount > 1) {
570 typedef future<pair<vector<double>, vector<int>>> EMFuture; 742 for (int i = 0; i < width; ) {
571 vector<EMFuture> results; 743 typedef future<pair<vector<double>, vector<int>>> EMFuture;
572 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) { 744 vector<EMFuture> results;
573 results.push_back 745 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
574 (async(std::launch::async, 746 results.push_back
575 [&](int index) { 747 (async(std::launch::async,
576 return applyEM(pack, filtered.at(index), wantShifts); 748 [&](int index) {
577 }, i + j)); 749 return applyEM(pack, filtered.at(index));
578 } 750 }, i + j));
579 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) { 751 }
580 auto out = results[j].get(); 752 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
581 localPitches[i+j] = out.first; 753 auto out = results[j].get();
582 if (wantShifts) localBestShifts[i+j] = out.second; 754 localPitches[i+j] = out.first;
583 } 755 if (wantShifts) localBestShifts[i+j] = out.second;
584 i += MAX_EM_THREADS; 756 }
585 } 757 i += emThreadCount;
586 #else 758 }
759 }
760 #endif
761
762 if (emThreadCount == 1) {
763 for (int i = 0; i < width; ++i) {
764 auto out = applyEM(pack, filtered.at(i));
765 localPitches[i] = out.first;
766 if (wantShifts) localBestShifts[i] = out.second;
767 }
768 }
769
587 for (int i = 0; i < width; ++i) { 770 for (int i = 0; i < width; ++i) {
588 auto out = applyEM(pack, filtered.at(i), wantShifts); 771
589 localPitches[i] = out.first; 772 vector<double> filtered;
590 if (wantShifts) localBestShifts[i] = out.second; 773
591 } 774 for (int j = 0; j < pack.templateNoteCount; ++j) {
592 #endif 775 m_postFilter[j]->push(localPitches[i][j]);
593 776 filtered.push_back(m_postFilter[j]->get());
594 for (int i = 0; i < width; ++i) { 777 }
595
596 // This returns a filtered column, and pushes the
597 // up-to-max-polyphony activation column to m_pianoRoll
598 vector<double> filtered = postProcess
599 (localPitches[i], localBestShifts[i], wantShifts);
600 778
601 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); 779 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
602 float inputGain = getInputGainAt(timestamp); 780 float inputGain = getInputGainAt(timestamp);
603 781
604 Feature f; 782 Feature f;
613 f.values.resize(12); 791 f.values.resize(12);
614 for (int j = 0; j < (int)filtered.size(); ++j) { 792 for (int j = 0; j < (int)filtered.size(); ++j) {
615 f.values[j % 12] += filtered[j] / inputGain; 793 f.values[j % 12] += filtered[j] / inputGain;
616 } 794 }
617 fs[m_chromaOutputNo].push_back(f); 795 fs[m_chromaOutputNo].push_back(f);
618 796
619 FeatureList noteFeatures = noteTrack(shiftCount); 797 // This pushes the up-to-max-polyphony activation column to
620 798 // m_pianoRoll
621 for (FeatureList::const_iterator fi = noteFeatures.begin(); 799 postProcess(filtered, localBestShifts[i]);
622 fi != noteFeatures.end(); ++fi) { 800
623 fs[m_notesOutputNo].push_back(*fi); 801 auto events = noteTrack();
624 } 802
625 } 803 for (const auto &f : events.notes) {
626 804 fs[m_notesOutputNo].push_back(f);
627 return fs; 805 }
806
807 for (const auto &f : events.onsets) {
808 fs[m_onsetsOutputNo].push_back(f);
809 }
810
811 for (const auto &f : events.onOffsets) {
812 fs[m_onOffsetsOutputNo].push_back(f);
813 }
814 }
628 } 815 }
629 816
630 pair<vector<double>, vector<int> > 817 pair<vector<double>, vector<int> >
631 Silvet::applyEM(const InstrumentPack &pack, 818 Silvet::applyEM(const InstrumentPack &pack,
632 const vector<double> &column, 819 const vector<double> &column)
633 bool wantShifts)
634 { 820 {
635 double columnThreshold = 1e-5; 821 double columnThreshold = 1e-5;
822
823 if (m_mode == LiveMode) {
824 columnThreshold /= 15;
825 }
636 826
637 vector<double> pitches(pack.templateNoteCount, 0.0); 827 vector<double> pitches(pack.templateNoteCount, 0.0);
638 vector<int> bestShifts; 828 vector<int> bestShifts;
829
830 if (column.empty()) return { pitches, bestShifts };
639 831
640 double sum = 0.0; 832 double sum = 0.0;
641 for (int j = 0; j < pack.templateHeight; ++j) { 833 for (int j = 0; j < pack.templateHeight; ++j) {
642 sum += column.at(j); 834 sum += column.at(j);
643 } 835 }
644 if (sum < columnThreshold) return { pitches, bestShifts }; 836 if (sum < columnThreshold) return { pitches, bestShifts };
645 837
646 EM em(&pack, m_hqMode); 838 EM em(&pack, m_mode == HighQualityMode);
647 839
648 em.setPitchSparsity(pack.pitchSparsity); 840 em.setPitchSparsity(pack.pitchSparsity);
649 em.setSourceSparsity(pack.sourceSparsity); 841 em.setSourceSparsity(pack.sourceSparsity);
650 842
651 int iterations = m_hqMode ? 20 : 10; 843 int iterations = (m_mode == HighQualityMode ? 20 : 10);
652 844
653 for (int j = 0; j < iterations; ++j) { 845 for (int j = 0; j < iterations; ++j) {
654 em.iterate(column.data()); 846 em.iterate(column.data());
655 } 847 }
656 848
657 const float *pitchDist = em.getPitchDistribution(); 849 const float *pitchDist = em.getPitchDistribution();
658 const float *const *shiftDist = em.getShifts(); 850 const float *const *shiftDist = em.getShifts();
659 851
660 int shiftCount = 1; 852 int shiftCount = getShiftCount();
661 if (wantShifts) {
662 shiftCount = pack.templateMaxShift * 2 + 1;
663 }
664 853
665 for (int j = 0; j < pack.templateNoteCount; ++j) { 854 for (int j = 0; j < pack.templateNoteCount; ++j) {
666 855
667 pitches[j] = pitchDist[j] * sum; 856 pitches[j] = pitchDist[j] * sum;
668 857
669 int bestShift = 0; 858 int bestShift = 0;
670 float bestShiftValue = 0.0; 859 float bestShiftValue = 0.0;
671 if (wantShifts) { 860 if (shiftCount > 1) {
672 for (int k = 0; k < shiftCount; ++k) { 861 for (int k = 0; k < shiftCount; ++k) {
673 float value = shiftDist[k][j]; 862 float value = shiftDist[k][j];
674 if (k == 0 || value > bestShiftValue) { 863 if (k == 0 || value > bestShiftValue) {
675 bestShiftValue = value; 864 bestShiftValue = value;
676 bestShift = k; 865 bestShift = k;
700 // isn't quite accurate. But the small constant offset is 889 // isn't quite accurate. But the small constant offset is
701 // practically irrelevant compared to the jitter from the frame 890 // practically irrelevant compared to the jitter from the frame
702 // size we reduce to in a moment 891 // size we reduce to in a moment
703 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); 892 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
704 893
705 const InstrumentPack &pack = m_instruments[m_instrument]; 894 const InstrumentPack &pack(getPack(m_instrument));
706 895
707 for (int i = 0; i < width; ++i) { 896 for (int i = 0; i < width; ++i) {
708 897
709 if (m_columnCount < latentColumns) { 898 if (m_columnCount < latentColumns) {
710 ++m_columnCount; 899 ++m_columnCount;
719 if (select) { 908 if (select) {
720 vector<double> inCol = in[i]; 909 vector<double> inCol = in[i];
721 vector<double> outCol(pack.templateHeight); 910 vector<double> outCol(pack.templateHeight);
722 911
723 // In HQ mode, the CQ returns 600 bins and we ignore the 912 // In HQ mode, the CQ returns 600 bins and we ignore the
724 // lowest 55 of them. 913 // lowest 55 of them (assuming binsPerSemitone == 5).
725 // 914 //
726 // In draft mode the CQ is an octave shorter, returning 915 // In live mode the CQ is an octave shorter, returning 540
727 // 540 bins, so we instead pad them with an additional 5 916 // bins or equivalent, so we instead pad them with an
728 // zeros. 917 // additional 5 or equivalent zeros.
729 // 918 //
730 // We also need to reverse the column as we go, since the 919 // We also need to reverse the column as we go, since the
731 // raw CQ has the high frequencies first and we need it 920 // raw CQ has the high frequencies first and we need it
732 // the other way around. 921 // the other way around.
733 922
734 if (m_hqMode) { 923 int bps = (m_mode == LiveMode ?
924 binsPerSemitoneLive : binsPerSemitoneNormal);
925
926 if (m_mode == HighQualityMode) {
735 for (int j = 0; j < pack.templateHeight; ++j) { 927 for (int j = 0; j < pack.templateHeight; ++j) {
736 int ix = inCol.size() - j - 55; 928 int ix = inCol.size() - j - (11 * bps);
737 outCol[j] = inCol[ix]; 929 outCol[j] = inCol[ix];
738 } 930 }
739 } else { 931 } else {
740 for (int j = 0; j < 5; ++j) { 932 for (int j = 0; j < bps; ++j) {
741 outCol[j] = 0.0; 933 outCol[j] = 0.0;
742 } 934 }
743 for (int j = 5; j < pack.templateHeight; ++j) { 935 for (int j = bps; j < pack.templateHeight; ++j) {
744 int ix = inCol.size() - j + 4; 936 int ix = inCol.size() - j + (bps-1);
745 outCol[j] = inCol[ix]; 937 outCol[j] = inCol[ix];
746 } 938 }
747 } 939 }
748 940
749 vector<double> noiseLevel1 = 941 vector<double> noiseLevel1 =
750 MedianFilter<double>::filter(40, outCol); 942 MedianFilter<double>::filter(8 * bps, outCol);
751 for (int j = 0; j < pack.templateHeight; ++j) { 943 for (int j = 0; j < pack.templateHeight; ++j) {
752 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); 944 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
753 } 945 }
754 946
755 vector<double> noiseLevel2 = 947 vector<double> noiseLevel2 =
756 MedianFilter<double>::filter(40, noiseLevel1); 948 MedianFilter<double>::filter(8 * bps, noiseLevel1);
757 for (int j = 0; j < pack.templateHeight; ++j) { 949 for (int j = 0; j < pack.templateHeight; ++j) {
758 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); 950 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
759 } 951 }
760 952
761 out.push_back(outCol); 953 out.push_back(outCol);
765 } 957 }
766 958
767 return out; 959 return out;
768 } 960 }
769 961
770 vector<double> 962 void
771 Silvet::postProcess(const vector<double> &pitches, 963 Silvet::postProcess(const vector<double> &pitches,
772 const vector<int> &bestShifts, 964 const vector<int> &bestShifts)
773 bool wantShifts) 965 {
774 { 966 const InstrumentPack &pack(getPack(m_instrument));
775 const InstrumentPack &pack = m_instruments[m_instrument]; 967
776 968 // Threshold for level and reduce number of candidate pitches
777 vector<double> filtered; 969
970 typedef std::multimap<double, int> ValueIndexMap;
971
972 ValueIndexMap strengths;
778 973
779 for (int j = 0; j < pack.templateNoteCount; ++j) { 974 for (int j = 0; j < pack.templateNoteCount; ++j) {
780 m_postFilter[j]->push(pitches[j]); 975
781 filtered.push_back(m_postFilter[j]->get()); 976 double strength = pitches[j];
782 }
783
784 // Threshold for level and reduce number of candidate pitches
785
786 typedef std::multimap<double, int> ValueIndexMap;
787
788 ValueIndexMap strengths;
789
790 for (int j = 0; j < pack.templateNoteCount; ++j) {
791 double strength = filtered[j];
792 if (strength < pack.levelThreshold) continue; 977 if (strength < pack.levelThreshold) continue;
978
979 // In live mode with only a 12-bpo CQ, we are very likely to
980 // get clusters of two or three high scores at a time for
981 // neighbouring semitones. Eliminate these by picking only the
982 // peaks (except that we never eliminate a note that has
983 // already been established as currently playing). This means
984 // we can't recognise actual semitone chords if they ever
985 // appear, but it's not as if live mode is good enough for
986 // that to be a big deal anyway.
987 if (m_mode == LiveMode) {
988 if (m_current.find(j) == m_current.end() &&
989 (j == 0 ||
990 j + 1 == pack.templateNoteCount ||
991 pitches[j] < pitches[j-1] ||
992 pitches[j] < pitches[j+1])) {
993 // not a peak or a currently-playing note: skip it
994 continue;
995 }
996 }
997
793 strengths.insert(ValueIndexMap::value_type(strength, j)); 998 strengths.insert(ValueIndexMap::value_type(strength, j));
794 } 999 }
795 1000
796 ValueIndexMap::const_iterator si = strengths.end(); 1001 ValueIndexMap::const_iterator si = strengths.end();
797 1002
798 map<int, double> active; 1003 map<int, double> active;
799 map<int, int> activeShifts; 1004 map<int, int> activeShifts;
800 1005
1006 int shiftCount = getShiftCount();
1007
801 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) { 1008 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
802 1009
803 --si; 1010 --si;
804 1011
805 double strength = si->first; 1012 double strength = si->first;
806 int j = si->second; 1013 int j = si->second;
807 1014
808 active[j] = strength; 1015 active[j] = strength;
809 1016
810 if (wantShifts) { 1017 if (shiftCount > 1) {
811 activeShifts[j] = bestShifts[j]; 1018 activeShifts[j] = bestShifts[j];
812 } 1019 }
813 } 1020 }
814 1021
815 m_pianoRoll.push_back(active); 1022 m_pianoRoll.push_back(active);
816 1023
817 if (wantShifts) { 1024 if (shiftCount > 1) {
818 m_pianoRollShifts.push_back(activeShifts); 1025 m_pianoRollShifts.push_back(activeShifts);
819 } 1026 }
820 1027
821 return filtered; 1028 return;
822 } 1029 }
823 1030
824 Vamp::Plugin::FeatureList 1031 Silvet::FeatureChunk
825 Silvet::noteTrack(int shiftCount) 1032 Silvet::noteTrack()
826 { 1033 {
827 // Minimum duration pruning, and conversion to notes. We can only 1034 // Minimum duration pruning, and conversion to notes. We can only
828 // report notes that have just ended (i.e. that are absent in the 1035 // report notes that have just ended (i.e. that are absent in the
829 // latest active set but present in the prior set in the piano 1036 // latest active set but present in the prior set in the piano
830 // roll) -- any notes that ended earlier will have been reported 1037 // roll) -- any notes that ended earlier will have been reported
836 const map<int, double> &active = m_pianoRoll[width]; 1043 const map<int, double> &active = m_pianoRoll[width];
837 1044
838 double columnDuration = 1.0 / m_colsPerSec; 1045 double columnDuration = 1.0 / m_colsPerSec;
839 1046
840 // only keep notes >= 100ms or thereabouts 1047 // only keep notes >= 100ms or thereabouts
841 int durationThreshold = floor(0.1 / columnDuration); // columns 1048 double durationThrSec = 0.1;
1049 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
842 if (durationThreshold < 1) durationThreshold = 1; 1050 if (durationThreshold < 1) durationThreshold = 1;
843 1051
844 FeatureList noteFeatures; 1052 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
845 1053
846 if (width < durationThreshold + 1) { 1054 if (width < durationThreshold + 1) {
847 return noteFeatures; 1055 return { noteFeatures, onsetFeatures, onOffsetFeatures };
848 } 1056 }
849 1057
850 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
851
852 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); 1058 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
853 ni != m_pianoRoll[width-1].end(); ++ni) { 1059 ni != m_pianoRoll[width-1].end(); ++ni) {
854 1060
855 int note = ni->first; 1061 int note = ni->first;
856 1062
857 if (active.find(note) != active.end()) {
858 // the note is still playing
859 continue;
860 }
861
862 // the note was playing but just ended
863 int end = width; 1063 int end = width;
864 int start = end-1; 1064 int start = end-1;
865 1065
866 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { 1066 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
867 --start; 1067 --start;
868 } 1068 }
869 ++start; 1069 ++start;
870 1070
871 if ((end - start) < durationThreshold) { 1071 int duration = end - start;
1072
1073 if (duration < durationThreshold) {
872 continue; 1074 continue;
873 } 1075 }
874 1076
875 emitNote(start, end, note, shiftCount, noteFeatures); 1077 if (duration == durationThreshold) {
1078 m_current.insert(note);
1079 emitOnset(start, note, onsetFeatures);
1080 emitOnset(start, note, onOffsetFeatures);
1081 }
1082
1083 if (active.find(note) == active.end()) {
1084 // the note was playing but just ended
1085 m_current.erase(note);
1086 emitNote(start, end, note, noteFeatures);
1087 emitOffset(start, end, note, onOffsetFeatures);
1088 } else { // still playing
1089 // repeated note detection: if level is greater than this
1090 // multiple of its previous value, then we end the note and
1091 // restart it with the same pitch
1092 double restartFactor = 1.5;
1093 if (duration >= durationThreshold * 2 &&
1094 (active.find(note)->second >
1095 restartFactor * m_pianoRoll[width-1][note])) {
1096 m_current.erase(note);
1097 emitNote(start, end-1, note, noteFeatures);
1098 emitOffset(start, end-1, note, onOffsetFeatures);
1099 // and remove this so that we start counting the new
1100 // note's duration from the current position
1101 m_pianoRoll[width-1].erase(note);
1102 }
1103 }
876 } 1104 }
877 1105
878 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; 1106 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
879 1107
880 return noteFeatures; 1108 return { noteFeatures, onsetFeatures, onOffsetFeatures };
881 } 1109 }
882 1110
883 void 1111 void
884 Silvet::emitNote(int start, int end, int note, int shiftCount, 1112 Silvet::emitNote(int start, int end, int note, FeatureList &noteFeatures)
885 FeatureList &noteFeatures)
886 { 1113 {
887 int partStart = start; 1114 int partStart = start;
888 int partShift = 0; 1115 int partShift = 0;
889 int partVelocity = 0; 1116 double partStrength = 0;
890 1117
891 int partThreshold = floor(0.05 * m_colsPerSec); 1118 int partThreshold = floor(0.05 * m_colsPerSec);
892 1119
893 for (int i = start; i != end; ++i) { 1120 for (int i = start; i != end; ++i) {
894 1121
895 double strength = m_pianoRoll[i][note]; 1122 double strength = m_pianoRoll[i][note];
896 1123
897 int shift = 0; 1124 int shift = 0;
898 1125
899 if (shiftCount > 1) { 1126 if (getShiftCount() > 1) {
900 1127
901 shift = m_pianoRollShifts[i][note]; 1128 shift = m_pianoRollShifts[i][note];
902 1129
903 if (i == partStart) { 1130 if (i == partStart) {
904 partShift = shift; 1131 partShift = shift;
911 // pitch has changed, emit an intermediate note 1138 // pitch has changed, emit an intermediate note
912 noteFeatures.push_back(makeNoteFeature(partStart, 1139 noteFeatures.push_back(makeNoteFeature(partStart,
913 i, 1140 i,
914 note, 1141 note,
915 partShift, 1142 partShift,
916 shiftCount, 1143 partStrength));
917 partVelocity));
918 partStart = i; 1144 partStart = i;
919 partShift = shift; 1145 partShift = shift;
920 partVelocity = 0; 1146 partStrength = 0;
921 } 1147 }
922 } 1148 }
923 1149
924 int v = round(strength * 2); 1150 if (strength > partStrength) {
925 if (v > partVelocity) { 1151 partStrength = strength;
926 partVelocity = v;
927 } 1152 }
928 } 1153 }
929 1154
930 if (end >= partStart + partThreshold) { 1155 if (end >= partStart + partThreshold) {
931 noteFeatures.push_back(makeNoteFeature(partStart, 1156 noteFeatures.push_back(makeNoteFeature(partStart,
932 end, 1157 end,
933 note, 1158 note,
934 partShift, 1159 partShift,
935 shiftCount, 1160 partStrength));
936 partVelocity)); 1161 }
937 } 1162 }
1163
1164 void
1165 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
1166 {
1167 int len = int(m_pianoRoll.size());
1168
1169 double onsetStrength = 0;
1170
1171 int shift = 0;
1172 if (getShiftCount() > 1) {
1173 shift = m_pianoRollShifts[start][note];
1174 }
1175
1176 for (int i = start; i < len; ++i) {
1177 double strength = m_pianoRoll[i][note];
1178 if (strength > onsetStrength) {
1179 onsetStrength = strength;
1180 }
1181 }
1182
1183 if (onsetStrength == 0) return;
1184
1185 onOffsetFeatures.push_back(makeOnsetFeature(start,
1186 note,
1187 shift,
1188 onsetStrength));
1189 }
1190
1191 void
1192 Silvet::emitOffset(int start, int end, int note, FeatureList &onOffsetFeatures)
1193 {
1194 int shift = 0;
1195 if (getShiftCount() > 1) {
1196 shift = m_pianoRollShifts[start][note];
1197 }
1198
1199 onOffsetFeatures.push_back(makeOffsetFeature(end,
1200 note,
1201 shift));
938 } 1202 }
939 1203
940 RealTime 1204 RealTime
941 Silvet::getColumnTimestamp(int column) 1205 Silvet::getColumnTimestamp(int column)
942 { 1206 {
950 Silvet::Feature 1214 Silvet::Feature
951 Silvet::makeNoteFeature(int start, 1215 Silvet::makeNoteFeature(int start,
952 int end, 1216 int end,
953 int note, 1217 int note,
954 int shift, 1218 int shift,
955 int shiftCount, 1219 double strength)
956 int velocity)
957 { 1220 {
958 Feature f; 1221 Feature f;
959 1222
960 f.hasTimestamp = true; 1223 f.hasTimestamp = true;
961 f.timestamp = getColumnTimestamp(start); 1224 f.timestamp = getColumnTimestamp(start);
962 1225
963 f.hasDuration = true; 1226 f.hasDuration = true;
964 f.duration = getColumnTimestamp(end) - f.timestamp; 1227 f.duration = getColumnTimestamp(end) - f.timestamp;
965 1228
966 f.values.clear(); 1229 f.values.clear();
967 1230 f.values.push_back(getNoteFrequency(note, shift));
968 f.values.push_back 1231 f.values.push_back(getVelocityFor(strength, start));
969 (noteFrequency(note, shift, shiftCount)); 1232
970 1233 f.label = getNoteName(note, shift);
971 float inputGain = getInputGainAt(f.timestamp);
972 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
973 velocity = round(velocity / inputGain);
974 if (velocity > 127) velocity = 127;
975 if (velocity < 1) velocity = 1;
976 f.values.push_back(velocity);
977
978 f.label = noteName(note, shift, shiftCount);
979 1234
980 return f; 1235 return f;
1236 }
1237
1238 Silvet::Feature
1239 Silvet::makeOnsetFeature(int start,
1240 int note,
1241 int shift,
1242 double strength)
1243 {
1244 Feature f;
1245
1246 f.hasTimestamp = true;
1247 f.timestamp = getColumnTimestamp(start);
1248
1249 f.hasDuration = false;
1250
1251 f.values.clear();
1252 f.values.push_back(getNoteFrequency(note, shift));
1253 f.values.push_back(getVelocityFor(strength, start));
1254
1255 f.label = getNoteName(note, shift);
1256
1257 return f;
1258 }
1259
1260 Silvet::Feature
1261 Silvet::makeOffsetFeature(int col,
1262 int note,
1263 int shift)
1264 {
1265 Feature f;
1266
1267 f.hasTimestamp = true;
1268 f.timestamp = getColumnTimestamp(col);
1269
1270 f.hasDuration = false;
1271
1272 f.values.clear();
1273 f.values.push_back(getNoteFrequency(note, shift));
1274 f.values.push_back(0); // velocity 0 for offset
1275
1276 f.label = getNoteName(note, shift) + " off";
1277
1278 return f;
1279 }
1280
1281 int
1282 Silvet::getVelocityFor(double strength, int column)
1283 {
1284 RealTime rt = getColumnTimestamp(column + 1);
1285
1286 float inputGain = getInputGainAt(rt);
1287
1288 double scale = 2.0;
1289 if (m_mode == LiveMode) scale = 20.0;
1290
1291 double velocity = round((strength * scale) / inputGain);
1292
1293 if (velocity > 127.0) velocity = 127.0;
1294 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
1295
1296 return int(velocity);
981 } 1297 }
982 1298
983 float 1299 float
984 Silvet::getInputGainAt(RealTime t) 1300 Silvet::getInputGainAt(RealTime t)
985 { 1301 {