Mercurial > hg > pyin
comparison PYinVamp.cpp @ 131:b877df85ad9e fixedlag
mono pitch works now with the refactored HMM implementation
author | Matthias Mauch <mail@matthiasmauch.net> |
---|---|
date | Fri, 03 Jul 2015 14:09:05 +0100 |
parents | 080fe18f5ebf |
children | 926c292fa3ff |
comparison
equal
deleted
inserted
replaced
130:080fe18f5ebf | 131:b877df85ad9e |
---|---|
12 */ | 12 */ |
13 | 13 |
14 #include "PYinVamp.h" | 14 #include "PYinVamp.h" |
15 #include "MonoNote.h" | 15 #include "MonoNote.h" |
16 #include "MonoPitch.h" | 16 #include "MonoPitch.h" |
17 #include "MonoPitchHMM.h" | |
17 | 18 |
18 #include "vamp-sdk/FFT.h" | 19 #include "vamp-sdk/FFT.h" |
19 | 20 |
20 #include <vector> | 21 #include <vector> |
21 #include <algorithm> | 22 #include <algorithm> |
48 m_outputUnvoiced(0.0f), | 49 m_outputUnvoiced(0.0f), |
49 m_preciseTime(0.0f), | 50 m_preciseTime(0.0f), |
50 m_lowAmp(0.1f), | 51 m_lowAmp(0.1f), |
51 m_onsetSensitivity(0.7f), | 52 m_onsetSensitivity(0.7f), |
52 m_pruneThresh(0.1f), | 53 m_pruneThresh(0.1f), |
54 m_pitchHmm(), | |
53 m_pitchProb(0), | 55 m_pitchProb(0), |
54 m_timestamp(0), | 56 m_timestamp(0), |
55 m_level(0) | 57 m_level(0) |
56 { | 58 { |
57 } | 59 } |
491 } | 493 } |
492 } | 494 } |
493 | 495 |
494 if (m_fixedLag == 0.f) | 496 if (m_fixedLag == 0.f) |
495 { | 497 { |
498 vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb); | |
499 if (m_timestamp.empty()) | |
500 { | |
501 m_pitchHmm.initialise(tempObsProb); | |
502 } else { | |
503 m_pitchHmm.process(tempObsProb); | |
504 } | |
496 m_pitchProb.push_back(tempPitchProb); | 505 m_pitchProb.push_back(tempPitchProb); |
497 } else { | 506 } else { |
498 // Damn, so I need the hmm right here! Sadly it isn't defined here yet. | 507 // Damn, so I need the hmm right here! Sadly it isn't defined here yet. |
499 // Perhaps I could re-design the whole shabang | 508 // Perhaps I could re-design the whole shabang |
500 } | 509 } |
547 | 556 |
548 if (m_pitchProb.empty()) { | 557 if (m_pitchProb.empty()) { |
549 return fs; | 558 return fs; |
550 } | 559 } |
551 | 560 |
552 // MONO-PITCH STUFF | 561 // ================== P I T C H T R A C K ================================= |
553 MonoPitch mp; | 562 |
554 vector<float> mpOut = mp.process(m_pitchProb); | 563 vector<int> rawPitchPath = m_pitchHmm.finalise(); |
555 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) | 564 vector<float> mpOut; |
556 { | 565 |
557 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; | 566 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) |
567 { | |
568 float freq = pitchState2Freq(rawPitchPath[iFrame], m_pitchProb[iFrame]); | |
569 mpOut.push_back(freq); // for note processing below | |
570 | |
558 f.timestamp = m_timestamp[iFrame]; | 571 f.timestamp = m_timestamp[iFrame]; |
572 // std::cerr << f.timestamp << std::endl; | |
559 f.values.clear(); | 573 f.values.clear(); |
574 | |
575 // different output modes | |
576 if (freq < 0 && (m_outputUnvoiced==0)) continue; | |
560 if (m_outputUnvoiced == 1) | 577 if (m_outputUnvoiced == 1) |
561 { | 578 { |
562 f.values.push_back(fabs(mpOut[iFrame])); | 579 f.values.push_back(fabs(freq)); |
563 } else { | 580 } else { |
564 f.values.push_back(mpOut[iFrame]); | 581 f.values.push_back(freq); |
565 } | 582 } |
583 fs[m_oSmoothedPitchTrack].push_back(f); | |
584 } | |
585 | |
586 // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) | |
587 // { | |
588 // if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; | |
589 | |
590 // if (m_outputUnvoiced == 1) | |
591 // { | |
592 // f.values.push_back(fabs(mpOut[iFrame])); | |
593 // } else { | |
594 // f.values.push_back(mpOut[iFrame]); | |
595 // } | |
566 | 596 |
567 fs[m_oSmoothedPitchTrack].push_back(f); | 597 // fs[m_oSmoothedPitchTrack].push_back(f); |
568 } | 598 // } |
569 | 599 |
570 // MONO-NOTE STUFF | 600 // ======================== N O T E S ====================================== |
571 // std::cerr << "Mono Note Stuff" << std::endl; | |
572 MonoNote mn; | 601 MonoNote mn; |
573 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; | 602 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; |
574 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { | 603 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { |
575 std::vector<std::pair<double, double> > temp; | 604 std::vector<std::pair<double, double> > temp; |
576 if (mpOut[iFrame] > 0) | 605 if (mpOut[iFrame] > 0) |
632 } | 661 } |
633 oldIsVoiced = isVoiced; | 662 oldIsVoiced = isVoiced; |
634 } | 663 } |
635 return fs; | 664 return fs; |
636 } | 665 } |
666 | |
667 float | |
668 PYinVamp::pitchState2Freq(int state, vector<pair<double, double> > pitchProb) | |
669 { | |
670 float hmmFreq = m_pitchHmm.m_freqs[state]; | |
671 float bestFreq = 0; | |
672 float leastDist = 10000; | |
673 if (hmmFreq > 0) | |
674 { | |
675 // This was a Yin estimate, so try to get original pitch estimate back | |
676 // ... a bit hacky, since we could have direclty saved the frequency | |
677 // that was assigned to the HMM bin in hmm.calculateObsProb -- but would | |
678 // have had to rethink the interface of that method. | |
679 for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt) | |
680 { | |
681 float freq = 440. * | |
682 std::pow(2, | |
683 (pitchProb[iPt].first - 69)/12); | |
684 float dist = std::abs(hmmFreq-freq); | |
685 if (dist < leastDist) | |
686 { | |
687 leastDist = dist; | |
688 bestFreq = freq; | |
689 } | |
690 } | |
691 } else { | |
692 bestFreq = hmmFreq; | |
693 } | |
694 return bestFreq; | |
695 } |