comparison PYinVamp.cpp @ 131:b877df85ad9e fixedlag

mono pitch works now with the refactored HMM implementation
author Matthias Mauch <mail@matthiasmauch.net>
date Fri, 03 Jul 2015 14:09:05 +0100
parents 080fe18f5ebf
children 926c292fa3ff
comparison
equal deleted inserted replaced
130:080fe18f5ebf 131:b877df85ad9e
12 */ 12 */
13 13
14 #include "PYinVamp.h" 14 #include "PYinVamp.h"
15 #include "MonoNote.h" 15 #include "MonoNote.h"
16 #include "MonoPitch.h" 16 #include "MonoPitch.h"
17 #include "MonoPitchHMM.h"
17 18
18 #include "vamp-sdk/FFT.h" 19 #include "vamp-sdk/FFT.h"
19 20
20 #include <vector> 21 #include <vector>
21 #include <algorithm> 22 #include <algorithm>
48 m_outputUnvoiced(0.0f), 49 m_outputUnvoiced(0.0f),
49 m_preciseTime(0.0f), 50 m_preciseTime(0.0f),
50 m_lowAmp(0.1f), 51 m_lowAmp(0.1f),
51 m_onsetSensitivity(0.7f), 52 m_onsetSensitivity(0.7f),
52 m_pruneThresh(0.1f), 53 m_pruneThresh(0.1f),
54 m_pitchHmm(),
53 m_pitchProb(0), 55 m_pitchProb(0),
54 m_timestamp(0), 56 m_timestamp(0),
55 m_level(0) 57 m_level(0)
56 { 58 {
57 } 59 }
491 } 493 }
492 } 494 }
493 495
494 if (m_fixedLag == 0.f) 496 if (m_fixedLag == 0.f)
495 { 497 {
498 vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb);
499 if (m_timestamp.empty())
500 {
501 m_pitchHmm.initialise(tempObsProb);
502 } else {
503 m_pitchHmm.process(tempObsProb);
504 }
496 m_pitchProb.push_back(tempPitchProb); 505 m_pitchProb.push_back(tempPitchProb);
497 } else { 506 } else {
498 // Damn, so I need the hmm right here! Sadly it isn't defined here yet. 507 // Damn, so I need the hmm right here! Sadly it isn't defined here yet.
499 // Perhaps I could re-design the whole shabang 508 // Perhaps I could re-design the whole shabang
500 } 509 }
547 556
548 if (m_pitchProb.empty()) { 557 if (m_pitchProb.empty()) {
549 return fs; 558 return fs;
550 } 559 }
551 560
552 // MONO-PITCH STUFF 561 // ================== P I T C H T R A C K =================================
553 MonoPitch mp; 562
554 vector<float> mpOut = mp.process(m_pitchProb); 563 vector<int> rawPitchPath = m_pitchHmm.finalise();
555 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) 564 vector<float> mpOut;
556 { 565
557 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; 566 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
567 {
568 float freq = pitchState2Freq(rawPitchPath[iFrame], m_pitchProb[iFrame]);
569 mpOut.push_back(freq); // for note processing below
570
558 f.timestamp = m_timestamp[iFrame]; 571 f.timestamp = m_timestamp[iFrame];
572 // std::cerr << f.timestamp << std::endl;
559 f.values.clear(); 573 f.values.clear();
574
575 // different output modes
576 if (freq < 0 && (m_outputUnvoiced==0)) continue;
560 if (m_outputUnvoiced == 1) 577 if (m_outputUnvoiced == 1)
561 { 578 {
562 f.values.push_back(fabs(mpOut[iFrame])); 579 f.values.push_back(fabs(freq));
563 } else { 580 } else {
564 f.values.push_back(mpOut[iFrame]); 581 f.values.push_back(freq);
565 } 582 }
583 fs[m_oSmoothedPitchTrack].push_back(f);
584 }
585
586 // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
587 // {
588 // if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
589
590 // if (m_outputUnvoiced == 1)
591 // {
592 // f.values.push_back(fabs(mpOut[iFrame]));
593 // } else {
594 // f.values.push_back(mpOut[iFrame]);
595 // }
566 596
567 fs[m_oSmoothedPitchTrack].push_back(f); 597 // fs[m_oSmoothedPitchTrack].push_back(f);
568 } 598 // }
569 599
570 // MONO-NOTE STUFF 600 // ======================== N O T E S ======================================
571 // std::cerr << "Mono Note Stuff" << std::endl;
572 MonoNote mn; 601 MonoNote mn;
573 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; 602 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
574 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { 603 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
575 std::vector<std::pair<double, double> > temp; 604 std::vector<std::pair<double, double> > temp;
576 if (mpOut[iFrame] > 0) 605 if (mpOut[iFrame] > 0)
632 } 661 }
633 oldIsVoiced = isVoiced; 662 oldIsVoiced = isVoiced;
634 } 663 }
635 return fs; 664 return fs;
636 } 665 }
666
667 float
668 PYinVamp::pitchState2Freq(int state, vector<pair<double, double> > pitchProb)
669 {
670 float hmmFreq = m_pitchHmm.m_freqs[state];
671 float bestFreq = 0;
672 float leastDist = 10000;
673 if (hmmFreq > 0)
674 {
675 // This was a Yin estimate, so try to get original pitch estimate back
676 // ... a bit hacky, since we could have direclty saved the frequency
677 // that was assigned to the HMM bin in hmm.calculateObsProb -- but would
678 // have had to rethink the interface of that method.
679 for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt)
680 {
681 float freq = 440. *
682 std::pow(2,
683 (pitchProb[iPt].first - 69)/12);
684 float dist = std::abs(hmmFreq-freq);
685 if (dist < leastDist)
686 {
687 leastDist = dist;
688 bestFreq = freq;
689 }
690 }
691 } else {
692 bestFreq = hmmFreq;
693 }
694 return bestFreq;
695 }