Mercurial > hg > silvet
comparison src/Silvet.cpp @ 171:7c0ce480a7a4
Merge from branch finetune
author | Chris Cannam |
---|---|
date | Wed, 21 May 2014 11:53:54 +0100 |
parents | 2114b2545c27 |
children | 06d19c847a41 |
comparison
equal
deleted
inserted
replaced
165:f73be84f5c90 | 171:7c0ce480a7a4 |
---|---|
31 using std::endl; | 31 using std::endl; |
32 using Vamp::RealTime; | 32 using Vamp::RealTime; |
33 | 33 |
34 static int processingSampleRate = 44100; | 34 static int processingSampleRate = 44100; |
35 static int processingBPO = 60; | 35 static int processingBPO = 60; |
36 | |
37 //!!! todo: replace these two with values from instrument pack | |
36 static int processingHeight = 545; | 38 static int processingHeight = 545; |
37 static int processingNotes = 88; | 39 static int processingNotes = 88; |
38 | 40 |
39 Silvet::Silvet(float inputSampleRate) : | 41 Silvet::Silvet(float inputSampleRate) : |
40 Plugin(inputSampleRate), | 42 Plugin(inputSampleRate), |
41 m_instruments(InstrumentPack::listInstrumentPacks()), | 43 m_instruments(InstrumentPack::listInstrumentPacks()), |
42 m_resampler(0), | 44 m_resampler(0), |
43 m_cq(0), | 45 m_cq(0), |
44 m_hqMode(true), | 46 m_hqMode(true), |
47 m_fineTuning(false), | |
45 m_instrument(0) | 48 m_instrument(0) |
46 { | 49 { |
47 } | 50 } |
48 | 51 |
49 Silvet::~Silvet() | 52 Silvet::~Silvet() |
140 desc.minValue = 0; | 143 desc.minValue = 0; |
141 desc.maxValue = 1; | 144 desc.maxValue = 1; |
142 desc.defaultValue = 1; | 145 desc.defaultValue = 1; |
143 desc.isQuantized = true; | 146 desc.isQuantized = true; |
144 desc.quantizeStep = 1; | 147 desc.quantizeStep = 1; |
145 desc.valueNames.push_back("Draft (faster)"); | 148 desc.valueNames.push_back("Draft (faster)"); |
146 desc.valueNames.push_back("Intensive (higher quality)"); | 149 desc.valueNames.push_back("Intensive (higher quality)"); |
147 list.push_back(desc); | 150 list.push_back(desc); |
148 | 151 |
149 desc.identifier = "soloinstrument"; | 152 desc.identifier = "soloinstrument"; |
150 desc.name = "Solo instrument"; | 153 desc.name = "Solo instrument"; |
157 desc.quantizeStep = 1; | 160 desc.quantizeStep = 1; |
158 desc.valueNames.clear(); | 161 desc.valueNames.clear(); |
159 for (int i = 0; i < int(m_instruments.size()); ++i) { | 162 for (int i = 0; i < int(m_instruments.size()); ++i) { |
160 desc.valueNames.push_back(m_instruments[i].name); | 163 desc.valueNames.push_back(m_instruments[i].name); |
161 } | 164 } |
162 | 165 list.push_back(desc); |
166 | |
167 desc.identifier = "finetune"; | |
168 desc.name = "Return fine pitch estimates"; | |
169 desc.unit = ""; | |
170 desc.description = "Return pitch estimates at finer than semitone resolution (works only in Intensive mode)"; | |
171 desc.minValue = 0; | |
172 desc.maxValue = 1; | |
173 desc.defaultValue = 0; | |
174 desc.isQuantized = true; | |
175 desc.quantizeStep = 1; | |
176 desc.valueNames.clear(); | |
163 list.push_back(desc); | 177 list.push_back(desc); |
164 | 178 |
165 return list; | 179 return list; |
166 } | 180 } |
167 | 181 |
168 float | 182 float |
169 Silvet::getParameter(string identifier) const | 183 Silvet::getParameter(string identifier) const |
170 { | 184 { |
171 if (identifier == "mode") { | 185 if (identifier == "mode") { |
172 return m_hqMode ? 1.f : 0.f; | 186 return m_hqMode ? 1.f : 0.f; |
187 } else if (identifier == "finetune") { | |
188 return m_fineTuning ? 1.f : 0.f; | |
173 } else if (identifier == "soloinstrument") { | 189 } else if (identifier == "soloinstrument") { |
174 return m_instrument; | 190 return m_instrument; |
175 } | 191 } |
176 return 0; | 192 return 0; |
177 } | 193 } |
179 void | 195 void |
180 Silvet::setParameter(string identifier, float value) | 196 Silvet::setParameter(string identifier, float value) |
181 { | 197 { |
182 if (identifier == "mode") { | 198 if (identifier == "mode") { |
183 m_hqMode = (value > 0.5); | 199 m_hqMode = (value > 0.5); |
200 } else if (identifier == "finetune") { | |
201 m_fineTuning = (value > 0.5); | |
184 } else if (identifier == "soloinstrument") { | 202 } else if (identifier == "soloinstrument") { |
185 m_instrument = lrintf(value); | 203 m_instrument = lrintf(value); |
186 } | 204 } |
187 } | 205 } |
188 | 206 |
245 | 263 |
246 return buf; | 264 return buf; |
247 } | 265 } |
248 | 266 |
249 float | 267 float |
250 Silvet::noteFrequency(int note) const | 268 Silvet::noteFrequency(int note, int shift, int shiftCount) const |
251 { | 269 { |
252 return float(27.5 * pow(2.0, note / 12.0)); | 270 // Convert shift number to a pitch shift. The given shift number |
271 // is an offset into the template array, which starts with some | |
272 // zeros, followed by the template, then some trailing zeros. | |
273 // | |
274 // Example: if we have templateMaxShift == 2 and thus shiftCount | |
275 // == 5, then the number will be in the range 0-4 and the template | |
276 // will have 2 zeros at either end. Thus number 2 represents the | |
277 // template "as recorded", for a pitch shift of 0; smaller indices | |
278 // represent moving the template *up* in pitch (by introducing | |
279 // zeros at the start, which is the low-frequency end), for a | |
280 // positive pitch shift; and higher values represent moving it | |
281 // down in pitch, for a negative pitch shift. | |
282 | |
283 float pshift = | |
284 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; | |
285 | |
286 return float(27.5 * pow(2.0, (note + pshift) / 12.0)); | |
253 } | 287 } |
254 | 288 |
255 bool | 289 bool |
256 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) | 290 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) |
257 { | 291 { |
348 Grid filtered = preProcess(cqout); | 382 Grid filtered = preProcess(cqout); |
349 | 383 |
350 FeatureSet fs; | 384 FeatureSet fs; |
351 | 385 |
352 if (filtered.empty()) return fs; | 386 if (filtered.empty()) return fs; |
387 | |
388 const InstrumentPack &pack = m_instruments[m_instrument]; | |
353 | 389 |
354 int width = filtered.size(); | 390 int width = filtered.size(); |
355 | 391 |
356 int iterations = m_hqMode ? 20 : 10; | 392 int iterations = m_hqMode ? 20 : 10; |
357 | 393 |
358 Grid pitchMatrix(width, vector<double>(processingNotes)); | 394 //!!! pitches or notes? [terminology] |
395 Grid localPitches(width, vector<double>(processingNotes, 0.0)); | |
396 | |
397 bool wantShifts = m_hqMode && m_fineTuning; | |
398 int shiftCount = 1; | |
399 if (wantShifts) { | |
400 shiftCount = pack.templateMaxShift * 2 + 1; | |
401 } | |
402 | |
403 vector<vector<int> > localBestShifts; | |
404 if (wantShifts) { | |
405 localBestShifts = | |
406 vector<vector<int> >(width, vector<int>(processingNotes, 0)); | |
407 } | |
408 | |
409 vector<bool> present(width, false); | |
359 | 410 |
360 #pragma omp parallel for | 411 #pragma omp parallel for |
361 for (int i = 0; i < width; ++i) { | 412 for (int i = 0; i < width; ++i) { |
362 | 413 |
363 double sum = 0.0; | 414 double sum = 0.0; |
364 for (int j = 0; j < processingHeight; ++j) { | 415 for (int j = 0; j < processingHeight; ++j) { |
365 sum += filtered.at(i).at(j); | 416 sum += filtered.at(i).at(j); |
366 } | 417 } |
367 | |
368 if (sum < 1e-5) continue; | 418 if (sum < 1e-5) continue; |
369 | 419 |
370 EM em(&m_instruments[m_instrument], m_hqMode); | 420 present[i] = true; |
421 | |
422 EM em(&pack, m_hqMode); | |
371 | 423 |
372 for (int j = 0; j < iterations; ++j) { | 424 for (int j = 0; j < iterations; ++j) { |
373 em.iterate(filtered.at(i).data()); | 425 em.iterate(filtered.at(i).data()); |
374 } | 426 } |
427 | |
428 const float *pitchDist = em.getPitchDistribution(); | |
429 const float *const *shiftDist = em.getShifts(); | |
430 | |
431 for (int j = 0; j < processingNotes; ++j) { | |
432 | |
433 localPitches[i][j] = pitchDist[j] * sum; | |
434 | |
435 int bestShift = 0; | |
436 int bestShiftValue = 0.0; | |
437 if (wantShifts) { | |
438 for (int k = 0; k < shiftCount; ++k) { | |
439 if (k == 0 || shiftDist[k][j] > bestShiftValue) { | |
440 bestShiftValue = shiftDist[k][j]; | |
441 bestShift = k; | |
442 } | |
443 } | |
444 localBestShifts[i][j] = bestShift; | |
445 } | |
446 } | |
447 } | |
375 | 448 |
376 const float *pitches = em.getPitchDistribution(); | |
377 | |
378 for (int j = 0; j < processingNotes; ++j) { | |
379 pitchMatrix[i][j] = pitches[j] * sum; | |
380 } | |
381 } | |
382 | |
383 for (int i = 0; i < width; ++i) { | 449 for (int i = 0; i < width; ++i) { |
450 | |
451 if (!present[i]) { | |
452 // silent column | |
453 for (int j = 0; j < processingNotes; ++j) { | |
454 m_postFilter[j]->push(0.0); | |
455 } | |
456 m_pianoRoll.push_back(map<int, double>()); | |
457 if (wantShifts) { | |
458 m_pianoRollShifts.push_back(map<int, int>()); | |
459 } | |
460 continue; | |
461 } | |
462 | |
463 postProcess(localPitches[i], localBestShifts[i], wantShifts); | |
384 | 464 |
385 FeatureList noteFeatures = postProcess(pitchMatrix[i]); | 465 FeatureList noteFeatures = noteTrack(shiftCount); |
386 | 466 |
387 for (FeatureList::const_iterator fi = noteFeatures.begin(); | 467 for (FeatureList::const_iterator fi = noteFeatures.begin(); |
388 fi != noteFeatures.end(); ++fi) { | 468 fi != noteFeatures.end(); ++fi) { |
389 fs[m_notesOutputNo].push_back(*fi); | 469 fs[m_notesOutputNo].push_back(*fi); |
390 } | 470 } |
458 } | 538 } |
459 | 539 |
460 return out; | 540 return out; |
461 } | 541 } |
462 | 542 |
463 Vamp::Plugin::FeatureList | 543 void |
464 Silvet::postProcess(const vector<double> &pitches) | 544 Silvet::postProcess(const vector<double> &pitches, |
465 { | 545 const vector<int> &bestShifts, |
546 bool wantShifts) | |
547 { | |
466 vector<double> filtered; | 548 vector<double> filtered; |
467 | 549 |
468 for (int j = 0; j < processingNotes; ++j) { | 550 for (int j = 0; j < processingNotes; ++j) { |
469 m_postFilter[j]->push(pitches[j]); | 551 m_postFilter[j]->push(pitches[j]); |
470 filtered.push_back(m_postFilter[j]->get()); | 552 filtered.push_back(m_postFilter[j]->get()); |
471 } | 553 } |
472 | 554 |
473 int postFilterLatency = int(m_postFilter[0]->getSize() / 2); | |
474 | |
475 // Threshold for level and reduce number of candidate pitches | 555 // Threshold for level and reduce number of candidate pitches |
476 | 556 |
477 int polyphony = 5; | 557 int polyphony = 5; |
478 | 558 |
479 //!!! make this a parameter (was 4.8, try adjusting, compare levels against matlab code) | 559 //!!! make this a parameter (was 4.8, try adjusting, compare levels against matlab code) |
481 // double threshold = 4.8; | 561 // double threshold = 4.8; |
482 | 562 |
483 typedef std::multimap<double, int> ValueIndexMap; | 563 typedef std::multimap<double, int> ValueIndexMap; |
484 | 564 |
485 ValueIndexMap strengths; | 565 ValueIndexMap strengths; |
566 | |
486 for (int j = 0; j < processingNotes; ++j) { | 567 for (int j = 0; j < processingNotes; ++j) { |
487 strengths.insert(ValueIndexMap::value_type(filtered[j], j)); | 568 double strength = filtered[j]; |
488 } | 569 if (strength < threshold) continue; |
570 strengths.insert(ValueIndexMap::value_type(strength, j)); | |
571 } | |
572 | |
573 ValueIndexMap::const_iterator si = strengths.end(); | |
489 | 574 |
490 map<int, double> active; | 575 map<int, double> active; |
491 ValueIndexMap::const_iterator si = strengths.end(); | 576 map<int, int> activeShifts; |
492 while (int(active.size()) < polyphony) { | 577 |
578 while (int(active.size()) < polyphony && si != strengths.begin()) { | |
579 | |
493 --si; | 580 --si; |
494 if (si->first < threshold) break; | 581 |
495 // cerr << si->second << " : " << si->first << endl; | 582 double strength = si->first; |
496 active[si->second] = si->first; | 583 int j = si->second; |
497 if (si == strengths.begin()) break; | 584 |
498 } | 585 active[j] = strength; |
499 | 586 |
587 if (wantShifts) { | |
588 activeShifts[j] = bestShifts[j]; | |
589 } | |
590 } | |
591 | |
592 m_pianoRoll.push_back(active); | |
593 | |
594 if (wantShifts) { | |
595 m_pianoRollShifts.push_back(activeShifts); | |
596 } | |
597 } | |
598 | |
599 Vamp::Plugin::FeatureList | |
600 Silvet::noteTrack(int shiftCount) | |
601 { | |
500 // Minimum duration pruning, and conversion to notes. We can only | 602 // Minimum duration pruning, and conversion to notes. We can only |
501 // report notes that have just ended (i.e. that are absent in the | 603 // report notes that have just ended (i.e. that are absent in the |
502 // latest active set but present in the last set in the piano | 604 // latest active set but present in the prior set in the piano |
503 // roll) -- any notes that ended earlier will have been reported | 605 // roll) -- any notes that ended earlier will have been reported |
504 // already, and if they haven't ended, we don't know their | 606 // already, and if they haven't ended, we don't know their |
505 // duration. | 607 // duration. |
506 | 608 |
507 int width = m_pianoRoll.size(); | 609 int width = m_pianoRoll.size() - 1; |
610 | |
611 const map<int, double> &active = m_pianoRoll[width]; | |
508 | 612 |
509 double columnDuration = 1.0 / m_colsPerSec; | 613 double columnDuration = 1.0 / m_colsPerSec; |
510 | 614 |
511 // only keep notes >= 100ms or thereabouts | 615 // only keep notes >= 100ms or thereabouts |
512 int durationThreshold = floor(0.1 / columnDuration); // columns | 616 int durationThreshold = floor(0.1 / columnDuration); // columns |
513 if (durationThreshold < 1) durationThreshold = 1; | 617 if (durationThreshold < 1) durationThreshold = 1; |
514 | 618 |
515 FeatureList noteFeatures; | 619 FeatureList noteFeatures; |
516 | 620 |
517 if (width < durationThreshold + 1) { | 621 if (width < durationThreshold + 1) { |
518 m_pianoRoll.push_back(active); | |
519 return noteFeatures; | 622 return noteFeatures; |
520 } | 623 } |
521 | 624 |
522 //!!! try: 20ms intervals in intensive mode | |
523 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) | 625 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) |
524 | 626 |
525 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); | 627 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); |
526 ni != m_pianoRoll[width-1].end(); ++ni) { | 628 ni != m_pianoRoll[width-1].end(); ++ni) { |
527 | 629 |
534 | 636 |
535 // the note was playing but just ended | 637 // the note was playing but just ended |
536 int end = width; | 638 int end = width; |
537 int start = end-1; | 639 int start = end-1; |
538 | 640 |
539 double maxStrength = 0.0; | |
540 | |
541 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { | 641 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { |
542 double strength = m_pianoRoll[start][note]; | 642 --start; |
543 if (strength > maxStrength) { | 643 } |
544 maxStrength = strength; | 644 ++start; |
645 | |
646 if ((end - start) < durationThreshold) { | |
647 continue; | |
648 } | |
649 | |
650 emitNote(start, end, note, shiftCount, noteFeatures); | |
651 } | |
652 | |
653 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; | |
654 | |
655 return noteFeatures; | |
656 } | |
657 | |
658 void | |
659 Silvet::emitNote(int start, int end, int note, int shiftCount, | |
660 FeatureList ¬eFeatures) | |
661 { | |
662 int partStart = start; | |
663 int partShift = 0; | |
664 int partVelocity = 0; | |
665 | |
666 Feature f; | |
667 f.hasTimestamp = true; | |
668 f.hasDuration = true; | |
669 | |
670 double columnDuration = 1.0 / m_colsPerSec; | |
671 int postFilterLatency = int(m_postFilter[0]->getSize() / 2); | |
672 int partThreshold = floor(0.05 / columnDuration); | |
673 | |
674 for (int i = start; i != end; ++i) { | |
675 | |
676 double strength = m_pianoRoll[i][note]; | |
677 | |
678 int shift = 0; | |
679 | |
680 if (shiftCount > 1) { | |
681 | |
682 shift = m_pianoRollShifts[i][note]; | |
683 | |
684 if (i == partStart) { | |
685 partShift = shift; | |
545 } | 686 } |
546 --start; | 687 |
547 } | 688 if (i > partStart + partThreshold && shift != partShift) { |
548 ++start; | 689 |
549 | 690 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl; |
550 int duration = width - start; | 691 |
551 // cerr << "duration " << duration << " for just-ended note " << note << endl; | 692 // pitch has changed, emit an intermediate note |
552 if (duration < durationThreshold) { | 693 f.timestamp = RealTime::fromSeconds |
553 // spurious | 694 (columnDuration * (partStart - postFilterLatency) + 0.02); |
554 continue; | 695 f.duration = RealTime::fromSeconds |
555 } | 696 (columnDuration * (i - partStart)); |
556 | 697 f.values.clear(); |
557 int velocity = maxStrength * 2; | 698 f.values.push_back |
558 if (velocity > 127) velocity = 127; | 699 (noteFrequency(note, partShift, shiftCount)); |
559 | 700 f.values.push_back(partVelocity); |
560 // cerr << "Found a genuine note, starting at " << columnDuration * start << " with duration " << columnDuration * duration << endl; | 701 f.label = noteName(note); |
561 | 702 noteFeatures.push_back(f); |
562 Feature nf; | 703 partStart = i; |
563 nf.hasTimestamp = true; | 704 partShift = shift; |
564 nf.timestamp = RealTime::fromSeconds | 705 partVelocity = 0; |
565 (columnDuration * (start - postFilterLatency) + 0.02); | 706 } |
566 nf.hasDuration = true; | 707 } |
567 nf.duration = RealTime::fromSeconds | 708 |
568 (columnDuration * duration); | 709 int v = strength * 2; |
569 nf.values.push_back(noteFrequency(note)); | 710 if (v > 127) v = 127; |
570 nf.values.push_back(velocity); | 711 |
571 nf.label = noteName(note); | 712 if (v > partVelocity) { |
572 noteFeatures.push_back(nf); | 713 partVelocity = v; |
573 } | 714 } |
574 | 715 } |
575 m_pianoRoll.push_back(active); | 716 |
576 | 717 if (end >= partStart + partThreshold) { |
577 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; | 718 f.timestamp = RealTime::fromSeconds |
578 | 719 (columnDuration * (partStart - postFilterLatency) + 0.02); |
579 return noteFeatures; | 720 f.duration = RealTime::fromSeconds |
580 } | 721 (columnDuration * (end - partStart)); |
581 | 722 f.values.clear(); |
723 f.values.push_back | |
724 (noteFrequency(note, partShift, shiftCount)); | |
725 f.values.push_back(partVelocity); | |
726 f.label = noteName(note); | |
727 noteFeatures.push_back(f); | |
728 } | |
729 } |