comparison src/Silvet.cpp @ 171:7c0ce480a7a4

Merge from branch finetune
author Chris Cannam
date Wed, 21 May 2014 11:53:54 +0100
parents 2114b2545c27
children 06d19c847a41
comparison
equal deleted inserted replaced
165:f73be84f5c90 171:7c0ce480a7a4
31 using std::endl; 31 using std::endl;
32 using Vamp::RealTime; 32 using Vamp::RealTime;
33 33
34 static int processingSampleRate = 44100; 34 static int processingSampleRate = 44100;
35 static int processingBPO = 60; 35 static int processingBPO = 60;
36
37 //!!! todo: replace these two with values from instrument pack
36 static int processingHeight = 545; 38 static int processingHeight = 545;
37 static int processingNotes = 88; 39 static int processingNotes = 88;
38 40
39 Silvet::Silvet(float inputSampleRate) : 41 Silvet::Silvet(float inputSampleRate) :
40 Plugin(inputSampleRate), 42 Plugin(inputSampleRate),
41 m_instruments(InstrumentPack::listInstrumentPacks()), 43 m_instruments(InstrumentPack::listInstrumentPacks()),
42 m_resampler(0), 44 m_resampler(0),
43 m_cq(0), 45 m_cq(0),
44 m_hqMode(true), 46 m_hqMode(true),
47 m_fineTuning(false),
45 m_instrument(0) 48 m_instrument(0)
46 { 49 {
47 } 50 }
48 51
49 Silvet::~Silvet() 52 Silvet::~Silvet()
140 desc.minValue = 0; 143 desc.minValue = 0;
141 desc.maxValue = 1; 144 desc.maxValue = 1;
142 desc.defaultValue = 1; 145 desc.defaultValue = 1;
143 desc.isQuantized = true; 146 desc.isQuantized = true;
144 desc.quantizeStep = 1; 147 desc.quantizeStep = 1;
145 desc.valueNames.push_back("Draft (faster)"); 148 desc.valueNames.push_back("Draft (faster)");
146 desc.valueNames.push_back("Intensive (higher quality)"); 149 desc.valueNames.push_back("Intensive (higher quality)");
147 list.push_back(desc); 150 list.push_back(desc);
148 151
149 desc.identifier = "soloinstrument"; 152 desc.identifier = "soloinstrument";
150 desc.name = "Solo instrument"; 153 desc.name = "Solo instrument";
157 desc.quantizeStep = 1; 160 desc.quantizeStep = 1;
158 desc.valueNames.clear(); 161 desc.valueNames.clear();
159 for (int i = 0; i < int(m_instruments.size()); ++i) { 162 for (int i = 0; i < int(m_instruments.size()); ++i) {
160 desc.valueNames.push_back(m_instruments[i].name); 163 desc.valueNames.push_back(m_instruments[i].name);
161 } 164 }
162 165 list.push_back(desc);
166
167 desc.identifier = "finetune";
168 desc.name = "Return fine pitch estimates";
169 desc.unit = "";
170 desc.description = "Return pitch estimates at finer than semitone resolution (works only in Intensive mode)";
171 desc.minValue = 0;
172 desc.maxValue = 1;
173 desc.defaultValue = 0;
174 desc.isQuantized = true;
175 desc.quantizeStep = 1;
176 desc.valueNames.clear();
163 list.push_back(desc); 177 list.push_back(desc);
164 178
165 return list; 179 return list;
166 } 180 }
167 181
168 float 182 float
169 Silvet::getParameter(string identifier) const 183 Silvet::getParameter(string identifier) const
170 { 184 {
171 if (identifier == "mode") { 185 if (identifier == "mode") {
172 return m_hqMode ? 1.f : 0.f; 186 return m_hqMode ? 1.f : 0.f;
187 } else if (identifier == "finetune") {
188 return m_fineTuning ? 1.f : 0.f;
173 } else if (identifier == "soloinstrument") { 189 } else if (identifier == "soloinstrument") {
174 return m_instrument; 190 return m_instrument;
175 } 191 }
176 return 0; 192 return 0;
177 } 193 }
179 void 195 void
180 Silvet::setParameter(string identifier, float value) 196 Silvet::setParameter(string identifier, float value)
181 { 197 {
182 if (identifier == "mode") { 198 if (identifier == "mode") {
183 m_hqMode = (value > 0.5); 199 m_hqMode = (value > 0.5);
200 } else if (identifier == "finetune") {
201 m_fineTuning = (value > 0.5);
184 } else if (identifier == "soloinstrument") { 202 } else if (identifier == "soloinstrument") {
185 m_instrument = lrintf(value); 203 m_instrument = lrintf(value);
186 } 204 }
187 } 205 }
188 206
245 263
246 return buf; 264 return buf;
247 } 265 }
248 266
249 float 267 float
250 Silvet::noteFrequency(int note) const 268 Silvet::noteFrequency(int note, int shift, int shiftCount) const
251 { 269 {
252 return float(27.5 * pow(2.0, note / 12.0)); 270 // Convert shift number to a pitch shift. The given shift number
271 // is an offset into the template array, which starts with some
272 // zeros, followed by the template, then some trailing zeros.
273 //
274 // Example: if we have templateMaxShift == 2 and thus shiftCount
275 // == 5, then the number will be in the range 0-4 and the template
276 // will have 2 zeros at either end. Thus number 2 represents the
277 // template "as recorded", for a pitch shift of 0; smaller indices
278 // represent moving the template *up* in pitch (by introducing
279 // zeros at the start, which is the low-frequency end), for a
280 // positive pitch shift; and higher values represent moving it
281 // down in pitch, for a negative pitch shift.
282
283 float pshift =
284 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
285
286 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
253 } 287 }
254 288
255 bool 289 bool
256 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) 290 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
257 { 291 {
348 Grid filtered = preProcess(cqout); 382 Grid filtered = preProcess(cqout);
349 383
350 FeatureSet fs; 384 FeatureSet fs;
351 385
352 if (filtered.empty()) return fs; 386 if (filtered.empty()) return fs;
387
388 const InstrumentPack &pack = m_instruments[m_instrument];
353 389
354 int width = filtered.size(); 390 int width = filtered.size();
355 391
356 int iterations = m_hqMode ? 20 : 10; 392 int iterations = m_hqMode ? 20 : 10;
357 393
358 Grid pitchMatrix(width, vector<double>(processingNotes)); 394 //!!! pitches or notes? [terminology]
395 Grid localPitches(width, vector<double>(processingNotes, 0.0));
396
397 bool wantShifts = m_hqMode && m_fineTuning;
398 int shiftCount = 1;
399 if (wantShifts) {
400 shiftCount = pack.templateMaxShift * 2 + 1;
401 }
402
403 vector<vector<int> > localBestShifts;
404 if (wantShifts) {
405 localBestShifts =
406 vector<vector<int> >(width, vector<int>(processingNotes, 0));
407 }
408
409 vector<bool> present(width, false);
359 410
360 #pragma omp parallel for 411 #pragma omp parallel for
361 for (int i = 0; i < width; ++i) { 412 for (int i = 0; i < width; ++i) {
362 413
363 double sum = 0.0; 414 double sum = 0.0;
364 for (int j = 0; j < processingHeight; ++j) { 415 for (int j = 0; j < processingHeight; ++j) {
365 sum += filtered.at(i).at(j); 416 sum += filtered.at(i).at(j);
366 } 417 }
367
368 if (sum < 1e-5) continue; 418 if (sum < 1e-5) continue;
369 419
370 EM em(&m_instruments[m_instrument], m_hqMode); 420 present[i] = true;
421
422 EM em(&pack, m_hqMode);
371 423
372 for (int j = 0; j < iterations; ++j) { 424 for (int j = 0; j < iterations; ++j) {
373 em.iterate(filtered.at(i).data()); 425 em.iterate(filtered.at(i).data());
374 } 426 }
427
428 const float *pitchDist = em.getPitchDistribution();
429 const float *const *shiftDist = em.getShifts();
430
431 for (int j = 0; j < processingNotes; ++j) {
432
433 localPitches[i][j] = pitchDist[j] * sum;
434
435 int bestShift = 0;
436 int bestShiftValue = 0.0;
437 if (wantShifts) {
438 for (int k = 0; k < shiftCount; ++k) {
439 if (k == 0 || shiftDist[k][j] > bestShiftValue) {
440 bestShiftValue = shiftDist[k][j];
441 bestShift = k;
442 }
443 }
444 localBestShifts[i][j] = bestShift;
445 }
446 }
447 }
375 448
376 const float *pitches = em.getPitchDistribution();
377
378 for (int j = 0; j < processingNotes; ++j) {
379 pitchMatrix[i][j] = pitches[j] * sum;
380 }
381 }
382
383 for (int i = 0; i < width; ++i) { 449 for (int i = 0; i < width; ++i) {
450
451 if (!present[i]) {
452 // silent column
453 for (int j = 0; j < processingNotes; ++j) {
454 m_postFilter[j]->push(0.0);
455 }
456 m_pianoRoll.push_back(map<int, double>());
457 if (wantShifts) {
458 m_pianoRollShifts.push_back(map<int, int>());
459 }
460 continue;
461 }
462
463 postProcess(localPitches[i], localBestShifts[i], wantShifts);
384 464
385 FeatureList noteFeatures = postProcess(pitchMatrix[i]); 465 FeatureList noteFeatures = noteTrack(shiftCount);
386 466
387 for (FeatureList::const_iterator fi = noteFeatures.begin(); 467 for (FeatureList::const_iterator fi = noteFeatures.begin();
388 fi != noteFeatures.end(); ++fi) { 468 fi != noteFeatures.end(); ++fi) {
389 fs[m_notesOutputNo].push_back(*fi); 469 fs[m_notesOutputNo].push_back(*fi);
390 } 470 }
458 } 538 }
459 539
460 return out; 540 return out;
461 } 541 }
462 542
463 Vamp::Plugin::FeatureList 543 void
464 Silvet::postProcess(const vector<double> &pitches) 544 Silvet::postProcess(const vector<double> &pitches,
465 { 545 const vector<int> &bestShifts,
546 bool wantShifts)
547 {
466 vector<double> filtered; 548 vector<double> filtered;
467 549
468 for (int j = 0; j < processingNotes; ++j) { 550 for (int j = 0; j < processingNotes; ++j) {
469 m_postFilter[j]->push(pitches[j]); 551 m_postFilter[j]->push(pitches[j]);
470 filtered.push_back(m_postFilter[j]->get()); 552 filtered.push_back(m_postFilter[j]->get());
471 } 553 }
472 554
473 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
474
475 // Threshold for level and reduce number of candidate pitches 555 // Threshold for level and reduce number of candidate pitches
476 556
477 int polyphony = 5; 557 int polyphony = 5;
478 558
479 //!!! make this a parameter (was 4.8, try adjusting, compare levels against matlab code) 559 //!!! make this a parameter (was 4.8, try adjusting, compare levels against matlab code)
481 // double threshold = 4.8; 561 // double threshold = 4.8;
482 562
483 typedef std::multimap<double, int> ValueIndexMap; 563 typedef std::multimap<double, int> ValueIndexMap;
484 564
485 ValueIndexMap strengths; 565 ValueIndexMap strengths;
566
486 for (int j = 0; j < processingNotes; ++j) { 567 for (int j = 0; j < processingNotes; ++j) {
487 strengths.insert(ValueIndexMap::value_type(filtered[j], j)); 568 double strength = filtered[j];
488 } 569 if (strength < threshold) continue;
570 strengths.insert(ValueIndexMap::value_type(strength, j));
571 }
572
573 ValueIndexMap::const_iterator si = strengths.end();
489 574
490 map<int, double> active; 575 map<int, double> active;
491 ValueIndexMap::const_iterator si = strengths.end(); 576 map<int, int> activeShifts;
492 while (int(active.size()) < polyphony) { 577
578 while (int(active.size()) < polyphony && si != strengths.begin()) {
579
493 --si; 580 --si;
494 if (si->first < threshold) break; 581
495 // cerr << si->second << " : " << si->first << endl; 582 double strength = si->first;
496 active[si->second] = si->first; 583 int j = si->second;
497 if (si == strengths.begin()) break; 584
498 } 585 active[j] = strength;
499 586
587 if (wantShifts) {
588 activeShifts[j] = bestShifts[j];
589 }
590 }
591
592 m_pianoRoll.push_back(active);
593
594 if (wantShifts) {
595 m_pianoRollShifts.push_back(activeShifts);
596 }
597 }
598
599 Vamp::Plugin::FeatureList
600 Silvet::noteTrack(int shiftCount)
601 {
500 // Minimum duration pruning, and conversion to notes. We can only 602 // Minimum duration pruning, and conversion to notes. We can only
501 // report notes that have just ended (i.e. that are absent in the 603 // report notes that have just ended (i.e. that are absent in the
502 // latest active set but present in the last set in the piano 604 // latest active set but present in the prior set in the piano
503 // roll) -- any notes that ended earlier will have been reported 605 // roll) -- any notes that ended earlier will have been reported
504 // already, and if they haven't ended, we don't know their 606 // already, and if they haven't ended, we don't know their
505 // duration. 607 // duration.
506 608
507 int width = m_pianoRoll.size(); 609 int width = m_pianoRoll.size() - 1;
610
611 const map<int, double> &active = m_pianoRoll[width];
508 612
509 double columnDuration = 1.0 / m_colsPerSec; 613 double columnDuration = 1.0 / m_colsPerSec;
510 614
511 // only keep notes >= 100ms or thereabouts 615 // only keep notes >= 100ms or thereabouts
512 int durationThreshold = floor(0.1 / columnDuration); // columns 616 int durationThreshold = floor(0.1 / columnDuration); // columns
513 if (durationThreshold < 1) durationThreshold = 1; 617 if (durationThreshold < 1) durationThreshold = 1;
514 618
515 FeatureList noteFeatures; 619 FeatureList noteFeatures;
516 620
517 if (width < durationThreshold + 1) { 621 if (width < durationThreshold + 1) {
518 m_pianoRoll.push_back(active);
519 return noteFeatures; 622 return noteFeatures;
520 } 623 }
521 624
522 //!!! try: 20ms intervals in intensive mode
523 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) 625 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
524 626
525 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); 627 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
526 ni != m_pianoRoll[width-1].end(); ++ni) { 628 ni != m_pianoRoll[width-1].end(); ++ni) {
527 629
534 636
535 // the note was playing but just ended 637 // the note was playing but just ended
536 int end = width; 638 int end = width;
537 int start = end-1; 639 int start = end-1;
538 640
539 double maxStrength = 0.0;
540
541 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { 641 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
542 double strength = m_pianoRoll[start][note]; 642 --start;
543 if (strength > maxStrength) { 643 }
544 maxStrength = strength; 644 ++start;
645
646 if ((end - start) < durationThreshold) {
647 continue;
648 }
649
650 emitNote(start, end, note, shiftCount, noteFeatures);
651 }
652
653 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
654
655 return noteFeatures;
656 }
657
658 void
659 Silvet::emitNote(int start, int end, int note, int shiftCount,
660 FeatureList &noteFeatures)
661 {
662 int partStart = start;
663 int partShift = 0;
664 int partVelocity = 0;
665
666 Feature f;
667 f.hasTimestamp = true;
668 f.hasDuration = true;
669
670 double columnDuration = 1.0 / m_colsPerSec;
671 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
672 int partThreshold = floor(0.05 / columnDuration);
673
674 for (int i = start; i != end; ++i) {
675
676 double strength = m_pianoRoll[i][note];
677
678 int shift = 0;
679
680 if (shiftCount > 1) {
681
682 shift = m_pianoRollShifts[i][note];
683
684 if (i == partStart) {
685 partShift = shift;
545 } 686 }
546 --start; 687
547 } 688 if (i > partStart + partThreshold && shift != partShift) {
548 ++start; 689
549 690 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
550 int duration = width - start; 691
551 // cerr << "duration " << duration << " for just-ended note " << note << endl; 692 // pitch has changed, emit an intermediate note
552 if (duration < durationThreshold) { 693 f.timestamp = RealTime::fromSeconds
553 // spurious 694 (columnDuration * (partStart - postFilterLatency) + 0.02);
554 continue; 695 f.duration = RealTime::fromSeconds
555 } 696 (columnDuration * (i - partStart));
556 697 f.values.clear();
557 int velocity = maxStrength * 2; 698 f.values.push_back
558 if (velocity > 127) velocity = 127; 699 (noteFrequency(note, partShift, shiftCount));
559 700 f.values.push_back(partVelocity);
560 // cerr << "Found a genuine note, starting at " << columnDuration * start << " with duration " << columnDuration * duration << endl; 701 f.label = noteName(note);
561 702 noteFeatures.push_back(f);
562 Feature nf; 703 partStart = i;
563 nf.hasTimestamp = true; 704 partShift = shift;
564 nf.timestamp = RealTime::fromSeconds 705 partVelocity = 0;
565 (columnDuration * (start - postFilterLatency) + 0.02); 706 }
566 nf.hasDuration = true; 707 }
567 nf.duration = RealTime::fromSeconds 708
568 (columnDuration * duration); 709 int v = strength * 2;
569 nf.values.push_back(noteFrequency(note)); 710 if (v > 127) v = 127;
570 nf.values.push_back(velocity); 711
571 nf.label = noteName(note); 712 if (v > partVelocity) {
572 noteFeatures.push_back(nf); 713 partVelocity = v;
573 } 714 }
574 715 }
575 m_pianoRoll.push_back(active); 716
576 717 if (end >= partStart + partThreshold) {
577 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; 718 f.timestamp = RealTime::fromSeconds
578 719 (columnDuration * (partStart - postFilterLatency) + 0.02);
579 return noteFeatures; 720 f.duration = RealTime::fromSeconds
580 } 721 (columnDuration * (end - partStart));
581 722 f.values.clear();
723 f.values.push_back
724 (noteFrequency(note, partShift, shiftCount));
725 f.values.push_back(partVelocity);
726 f.label = noteName(note);
727 noteFeatures.push_back(f);
728 }
729 }