comparison src/Silvet.cpp @ 169:192c4ba3de45 finetune

More work on returning sensibly-shifted notes with fine tuning
author Chris Cannam
date Wed, 21 May 2014 11:15:53 +0100
parents 51bd3d833db8
children 2114b2545c27
comparison
equal deleted inserted replaced
168:51bd3d833db8 169:192c4ba3de45
263 } 263 }
264 264
265 float 265 float
266 Silvet::noteFrequency(int note, int shift, int shiftCount) const 266 Silvet::noteFrequency(int note, int shift, int shiftCount) const
267 { 267 {
268 float fineNote = float(note) + float(shift) / float(shiftCount); 268 // Convert shift number to a pitch shift. The given shift number
269 return float(27.5 * pow(2.0, fineNote / 12.0)); 269 // is an offset into the template array, which starts with some
270 // zeros, followed by the template, then some trailing zeros.
271 //
272 // Example: if we have templateMaxShift == 2 and thus shiftCount
273 // == 5, then the number will be in the range 0-4 and the template
274 // will have 2 zeros at either end. Thus number 2 represents the
275 // template "as recorded", for a pitch shift of 0; smaller indices
276 // represent moving the template *up* in pitch (by introducing
277 // zeros at the start, which is the low-frequency end), for a
278 // positive pitch shift; and higher values represent moving it
279 // down in pitch, for a negative pitch shift.
280
281 float pshift =
282 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
283
284 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
270 } 285 }
271 286
272 bool 287 bool
273 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) 288 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
274 { 289 {
541 556
542 if (shiftCount > 1) { 557 if (shiftCount > 1) {
543 558
544 // find preferred shift f for note j 559 // find preferred shift f for note j
545 int bestShift = 0; 560 int bestShift = 0;
546
547 float bestShiftValue = 0.f; 561 float bestShiftValue = 0.f;
548 for (int f = 0; f < shiftCount; ++f) { 562 for (int f = 0; f < shiftCount; ++f) {
549 if (f == 0 || shifts[f][j] > bestShiftValue) { 563 if (f == 0 || shifts[f][j] > bestShiftValue) {
550 bestShiftValue = shifts[f][j]; 564 bestShiftValue = shifts[f][j];
551 bestShift = f - int(shiftCount / 2); 565 bestShift = f;
552 } 566 }
553 } 567 }
554 //!!! I think our shift array per note is actually upside down, check this
555 568
556 activeShifts[j] = bestShift; 569 activeShifts[j] = bestShift;
557 } 570 }
558 } 571 }
559 572
571 // latest active set but present in the prior set in the piano 584 // latest active set but present in the prior set in the piano
572 // roll) -- any notes that ended earlier will have been reported 585 // roll) -- any notes that ended earlier will have been reported
573 // already, and if they haven't ended, we don't know their 586 // already, and if they haven't ended, we don't know their
574 // duration. 587 // duration.
575 588
576 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
577
578 int width = m_pianoRoll.size() - 1; 589 int width = m_pianoRoll.size() - 1;
579 590
580 const map<int, double> &active = m_pianoRoll[width]; 591 const map<int, double> &active = m_pianoRoll[width];
581 592
582 double columnDuration = 1.0 / m_colsPerSec; 593 double columnDuration = 1.0 / m_colsPerSec;
605 616
606 // the note was playing but just ended 617 // the note was playing but just ended
607 int end = width; 618 int end = width;
608 int start = end-1; 619 int start = end-1;
609 620
610 double maxStrength = 0.0;
611
612 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { 621 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
613 double strength = m_pianoRoll[start][note]; 622 --start;
614 if (strength > maxStrength) { 623 }
615 maxStrength = strength; 624 ++start;
625
626 if ((end - start) < durationThreshold) {
627 continue;
628 }
629
630 emitNote(start, end, note, shiftCount, noteFeatures);
631 }
632
633 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
634
635 return noteFeatures;
636 }
637
638 void
639 Silvet::emitNote(int start, int end, int note, int shiftCount,
640 FeatureList &noteFeatures)
641 {
642 int partStart = start;
643 int partShift = 0;
644 int partVelocity = 0;
645
646 Feature f;
647 f.hasTimestamp = true;
648 f.hasDuration = true;
649
650 double columnDuration = 1.0 / m_colsPerSec;
651 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
652 int partThreshold = floor(0.05 / columnDuration);
653
654 for (int i = start; i != end; ++i) {
655
656 double strength = m_pianoRoll[i][note];
657
658 int shift = 0;
659
660 if (shiftCount > 1) {
661
662 shift = m_pianoRollShifts[i][note];
663
664 if (i == partStart) {
665 partShift = shift;
616 } 666 }
617 --start; 667
618 } 668 if (i > partStart + partThreshold && shift != partShift) {
619 ++start; 669
620 670 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
621 int duration = width - start; 671
622 // cerr << "duration " << duration << " for just-ended note " << note << endl; 672 // pitch has changed, emit an intermediate note
623 if (duration < durationThreshold) { 673 f.timestamp = RealTime::fromSeconds
624 // spurious 674 (columnDuration * (partStart - postFilterLatency) + 0.02);
625 continue; 675 f.duration = RealTime::fromSeconds
626 } 676 (columnDuration * (i - partStart));
627 677 f.values.clear();
628 int velocity = maxStrength * 2; 678 f.values.push_back
629 if (velocity > 127) velocity = 127; 679 (noteFrequency(note, partShift, shiftCount));
630 680 f.values.push_back(partVelocity);
631 // cerr << "Found a genuine note, starting at " << columnDuration * start << " with duration " << columnDuration * duration << endl; 681 f.label = noteName(note);
632 682 noteFeatures.push_back(f);
633 Feature nf; 683 partStart = i;
634 nf.hasTimestamp = true; 684 partShift = shift;
635 nf.timestamp = RealTime::fromSeconds 685 partVelocity = 0;
636 (columnDuration * (start - postFilterLatency) + 0.02); 686 }
637 nf.hasDuration = true; 687 }
638 nf.duration = RealTime::fromSeconds 688
639 (columnDuration * duration); 689 int v = strength * 2;
640 nf.values.push_back(noteFrequency(note, shiftCount)); 690 if (v > 127) v = 127;
641 nf.values.push_back(velocity); 691
642 nf.label = noteName(note); 692 if (v > partVelocity) {
643 noteFeatures.push_back(nf); 693 partVelocity = v;
644 } 694 }
645 695 }
646 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; 696
647 697 if (end >= partStart + partThreshold) {
648 return noteFeatures; 698 f.timestamp = RealTime::fromSeconds
649 } 699 (columnDuration * (partStart - postFilterLatency) + 0.02);
650 700 f.duration = RealTime::fromSeconds
701 (columnDuration * (end - partStart));
702 f.values.clear();
703 f.values.push_back
704 (noteFrequency(note, partShift, shiftCount));
705 f.values.push_back(partVelocity);
706 f.label = noteName(note);
707 noteFeatures.push_back(f);
708 }
709 }