Mercurial > hg > pyin
comparison PYinVamp.cpp @ 133:83978b93aac1 fixedlag
ah, didn't commit when I stopped working... what did I do?
author | Matthias Mauch <mail@matthiasmauch.net> |
---|---|
date | Mon, 13 Jul 2015 12:10:06 +0100 |
parents | 926c292fa3ff |
children | 72bda34e0e64 |
comparison
equal
deleted
inserted
replaced
132:926c292fa3ff | 133:83978b93aac1 |
---|---|
51 m_onsetSensitivity(0.7f), | 51 m_onsetSensitivity(0.7f), |
52 m_pruneThresh(0.1f), | 52 m_pruneThresh(0.1f), |
53 m_pitchHmm(0), | 53 m_pitchHmm(0), |
54 m_pitchProb(0), | 54 m_pitchProb(0), |
55 m_timestamp(0), | 55 m_timestamp(0), |
56 m_level(0) | 56 m_level(0), |
57 m_pitchTrack(0) | |
57 { | 58 { |
58 } | 59 } |
59 | 60 |
60 PYinVamp::~PYinVamp() | 61 PYinVamp::~PYinVamp() |
61 { | 62 { |
446 else m_pitchHmm = MonoPitchHMM(0); | 447 else m_pitchHmm = MonoPitchHMM(0); |
447 | 448 |
448 m_pitchProb.clear(); | 449 m_pitchProb.clear(); |
449 m_timestamp.clear(); | 450 m_timestamp.clear(); |
450 m_level.clear(); | 451 m_level.clear(); |
452 m_pitchTrack.clear(); | |
451 /* | 453 /* |
452 std::cerr << "PYinVamp::reset" | 454 std::cerr << "PYinVamp::reset" |
453 << ", blockSize = " << m_blockSize | 455 << ", blockSize = " << m_blockSize |
454 << std::endl; | 456 << std::endl; |
455 */ | 457 */ |
456 } | 458 } |
457 | 459 |
458 PYinVamp::FeatureSet | 460 PYinVamp::FeatureSet |
459 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) | 461 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) |
460 { | 462 { |
463 std::cerr << timestamp << std::endl; | |
461 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; | 464 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; |
462 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate)); | 465 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, |
466 lrintf(m_inputSampleRate)); | |
463 | 467 |
464 FeatureSet fs; | 468 FeatureSet fs; |
465 | 469 |
466 float rms = 0; | 470 float rms = 0; |
467 | 471 |
506 m_pitchProb.push_back(tempPitchProb); | 510 m_pitchProb.push_back(tempPitchProb); |
507 m_timestamp.push_back(timestamp); | 511 m_timestamp.push_back(timestamp); |
508 | 512 |
509 int lag = m_pitchHmm.m_fixedLag; | 513 int lag = m_pitchHmm.m_fixedLag; |
510 | 514 |
511 if (m_fixedLag == 1.f) | 515 if (m_fixedLag == 1.f) // do fixed-lag smoothing instead of full Viterbi |
512 { | 516 { |
513 if (m_timestamp.size() == lag + 1) | 517 if (m_timestamp.size() == lag + 1) |
514 { | 518 { |
515 m_timestamp.pop_front(); | 519 m_timestamp.pop_front(); |
516 m_pitchProb.pop_front(); | 520 m_pitchProb.pop_front(); |
518 Feature f; | 522 Feature f; |
519 f.hasTimestamp = true; | 523 f.hasTimestamp = true; |
520 vector<int> rawPitchPath = m_pitchHmm.track(); | 524 vector<int> rawPitchPath = m_pitchHmm.track(); |
521 float freq = m_pitchHmm.nearestFreq(rawPitchPath[0], | 525 float freq = m_pitchHmm.nearestFreq(rawPitchPath[0], |
522 m_pitchProb[0]); | 526 m_pitchProb[0]); |
527 m_pitchTrack.push_back(freq); | |
523 f.timestamp = m_timestamp[0]; | 528 f.timestamp = m_timestamp[0]; |
524 f.values.clear(); | 529 f.values.clear(); |
525 | 530 |
526 // different output modes | 531 // different output modes |
527 if (freq < 0 && (m_outputUnvoiced==0)) | 532 if (freq < 0 && (m_outputUnvoiced==0)) |
590 } | 595 } |
591 | 596 |
592 // ================== P I T C H T R A C K ================================= | 597 // ================== P I T C H T R A C K ================================= |
593 | 598 |
594 vector<int> rawPitchPath = m_pitchHmm.track(); | 599 vector<int> rawPitchPath = m_pitchHmm.track(); |
595 vector<float> mpOut; | |
596 | 600 |
597 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) | 601 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) |
598 { | 602 { |
599 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame], | 603 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame], |
600 m_pitchProb[iFrame]); | 604 m_pitchProb[iFrame]); |
601 mpOut.push_back(freq); // for note processing below | 605 m_pitchTrack.push_back(freq); // for note processing below |
602 | 606 |
603 f.timestamp = m_timestamp[iFrame]; | 607 f.timestamp = m_timestamp[iFrame]; |
604 f.values.clear(); | 608 f.values.clear(); |
605 | 609 |
606 // different output modes | 610 // different output modes |
607 if (freq < 0 && (m_outputUnvoiced==0)) continue; | 611 if (freq < 0 && (m_outputUnvoiced==0)) continue; |
613 } | 617 } |
614 fs[m_oSmoothedPitchTrack].push_back(f); | 618 fs[m_oSmoothedPitchTrack].push_back(f); |
615 } | 619 } |
616 | 620 |
617 // ======================== N O T E S ====================================== | 621 // ======================== N O T E S ====================================== |
618 // MonoNote mn; | 622 MonoNote mn; |
619 // std::vector<std::vector<std::pair<double, double> > > smoothedPitch; | 623 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; |
620 // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { | 624 for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) { |
621 // std::vector<std::pair<double, double> > temp; | 625 std::vector<std::pair<double, double> > temp; |
622 // if (mpOut[iFrame] > 0) | 626 if (m_pitchTrack[iFrame] > 0) |
623 // { | 627 { |
624 // double tempPitch = 12 * | 628 double tempPitch = 12 * |
625 // std::log(mpOut[iFrame]/440)/std::log(2.) + 69; | 629 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69; |
626 // temp.push_back(std::pair<double,double>(tempPitch, .9)); | 630 temp.push_back(std::pair<double,double>(tempPitch, .9)); |
627 // } | 631 // std::cerr << "tempPitch: " << tempPitch << std::endl; |
628 // smoothedPitch.push_back(temp); | 632 } |
629 // } | 633 // std::cerr << "temp size: " << temp.size() << std::endl; |
630 // // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb); | 634 smoothedPitch.push_back(temp); |
631 // vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); | 635 } |
632 | 636 |
633 // // turning feature into a note feature | 637 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); |
634 // f.hasTimestamp = true; | 638 std::cerr << "mnOut size: " << mnOut.size() << std::endl; |
635 // f.hasDuration = true; | 639 std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl; |
636 // f.values.clear(); | 640 |
641 // turning feature into a note feature | |
642 f.hasTimestamp = true; | |
643 f.hasDuration = true; | |
644 f.values.clear(); | |
637 | 645 |
638 // int onsetFrame = 0; | 646 int onsetFrame = 0; |
639 // bool isVoiced = 0; | 647 bool isVoiced = 0; |
640 // bool oldIsVoiced = 0; | 648 bool oldIsVoiced = 0; |
641 // size_t nFrame = m_pitchProb.size(); | 649 size_t nFrame = m_pitchTrack.size(); |
642 | 650 |
643 // float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; | 651 float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; |
644 | 652 |
645 // // the body of the loop below should be in a function/method | 653 // the body of the loop below should be in a function/method |
646 // std::vector<float> notePitchTrack; // collects pitches for one note at a time | 654 // but what does it actually do?? |
647 // for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) | 655 // * takes the result of the note tracking HMM |
648 // { | 656 // * collects contiguously pitched pitches |
649 // isVoiced = mnOut[iFrame].noteState < 3 | 657 // * writes a note once it notices the voiced segment has ended |
650 // && smoothedPitch[iFrame].size() > 0 | 658 // complications: |
651 // && (iFrame >= nFrame-2 | 659 // * it needs a lookahead of two frames for m_level (wtf was I thinking) |
652 // || ((m_level[iFrame]/m_level[iFrame+2]) > | 660 // * it needs to know the timestamp (which can be guessed from the frame no) |
653 // m_onsetSensitivity)); | 661 // * |
654 // if (isVoiced && iFrame != nFrame-1) | 662 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; |
655 // { | 663 RealTime timestampOffset = Vamp::RealTime::frame2RealTime(offset, |
656 // if (oldIsVoiced == 0) // beginning of a note | 664 lrintf(m_inputSampleRate)); |
657 // { | 665 |
658 // onsetFrame = iFrame; | 666 std::vector<float> notePitchTrack; // collects pitches for 1 note at a time |
659 // } | 667 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) |
660 // float pitch = smoothedPitch[iFrame][0].first; | 668 { |
661 // notePitchTrack.push_back(pitch); // add to the note's pitch track | 669 isVoiced = mnOut[iFrame].noteState < 3 |
662 // } else { // not currently voiced | 670 && smoothedPitch[iFrame].size() > 0 |
663 // if (oldIsVoiced == 1) // end of note | 671 && (iFrame >= nFrame-2 |
664 // { | 672 || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity)); |
665 // if (notePitchTrack.size() >= minNoteFrames) | 673 if (isVoiced && iFrame != nFrame-1) |
666 // { | 674 { |
667 // std::sort(notePitchTrack.begin(), notePitchTrack.end()); | 675 if (oldIsVoiced == 0) // beginning of a note |
668 // float medianPitch = notePitchTrack[notePitchTrack.size()/2]; | 676 { |
669 // float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; | 677 onsetFrame = iFrame; |
670 // f.values.clear(); | 678 } |
671 // f.values.push_back(medianFreq); | 679 float pitch = smoothedPitch[iFrame][0].first; |
672 // f.timestamp = m_timestamp[onsetFrame]; | 680 notePitchTrack.push_back(pitch); // add to the note's pitch track |
673 // f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; | 681 } else { // not currently voiced |
674 // fs[m_oNotes].push_back(f); | 682 if (oldIsVoiced == 1) // end of note |
675 // } | 683 { |
676 // notePitchTrack.clear(); | 684 if (notePitchTrack.size() >= minNoteFrames) |
677 // } | 685 { |
678 // } | 686 std::sort(notePitchTrack.begin(), notePitchTrack.end()); |
679 // oldIsVoiced = isVoiced; | 687 float medianPitch = notePitchTrack[notePitchTrack.size()/2]; |
680 // } | 688 float medianFreq = |
689 std::pow(2,(medianPitch - 69) / 12) * 440; | |
690 f.values.clear(); | |
691 f.values.push_back(medianFreq); | |
692 RealTime start = RealTime::frame2RealTime( | |
693 onsetFrame * m_stepSize, lrintf(m_inputSampleRate)) + | |
694 timestampOffset; | |
695 RealTime end = RealTime::frame2RealTime( | |
696 iFrame * m_stepSize, lrintf(m_inputSampleRate)) + | |
697 timestampOffset; | |
698 f.timestamp = start; | |
699 f.duration = end - start; | |
700 fs[m_oNotes].push_back(f); | |
701 } | |
702 notePitchTrack.clear(); | |
703 } | |
704 } | |
705 oldIsVoiced = isVoiced; | |
706 } | |
681 return fs; | 707 return fs; |
682 } | 708 } |