Mercurial > hg > pyin
changeset 131:b877df85ad9e fixedlag
mono pitch works now with the refactored HMM implementation
author | Matthias Mauch <mail@matthiasmauch.net> |
---|---|
date | Fri, 03 Jul 2015 14:09:05 +0100 |
parents | 080fe18f5ebf |
children | 926c292fa3ff |
files | PYinVamp.cpp PYinVamp.h SparseHMM.cpp SparseHMM.h |
diffstat | 4 files changed, 87 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/PYinVamp.cpp Fri Jul 03 12:22:44 2015 +0100 +++ b/PYinVamp.cpp Fri Jul 03 14:09:05 2015 +0100 @@ -14,6 +14,7 @@ #include "PYinVamp.h" #include "MonoNote.h" #include "MonoPitch.h" +#include "MonoPitchHMM.h" #include "vamp-sdk/FFT.h" @@ -50,6 +51,7 @@ m_lowAmp(0.1f), m_onsetSensitivity(0.7f), m_pruneThresh(0.1f), + m_pitchHmm(), m_pitchProb(0), m_timestamp(0), m_level(0) @@ -493,6 +495,13 @@ if (m_fixedLag == 0.f) { + vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb); + if (m_timestamp.empty()) + { + m_pitchHmm.initialise(tempObsProb); + } else { + m_pitchHmm.process(tempObsProb); + } m_pitchProb.push_back(tempPitchProb); } else { // Damn, so I need the hmm right here! Sadly it isn't defined here yet. @@ -549,26 +558,46 @@ return fs; } - // MONO-PITCH STUFF - MonoPitch mp; - vector<float> mpOut = mp.process(m_pitchProb); - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) + // ================== P I T C H T R A C K ================================= + + vector<int> rawPitchPath = m_pitchHmm.finalise(); + vector<float> mpOut; + + for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) { - if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; + float freq = pitchState2Freq(rawPitchPath[iFrame], m_pitchProb[iFrame]); + mpOut.push_back(freq); // for note processing below + f.timestamp = m_timestamp[iFrame]; + // std::cerr << f.timestamp << std::endl; f.values.clear(); + + // different output modes + if (freq < 0 && (m_outputUnvoiced==0)) continue; if (m_outputUnvoiced == 1) { - f.values.push_back(fabs(mpOut[iFrame])); + f.values.push_back(fabs(freq)); } else { - f.values.push_back(mpOut[iFrame]); + f.values.push_back(freq); } - fs[m_oSmoothedPitchTrack].push_back(f); } + + // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) + // { + // if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; + + // if (m_outputUnvoiced == 1) + // { + // f.values.push_back(fabs(mpOut[iFrame])); + // } else { + // f.values.push_back(mpOut[iFrame]); + // } + + // fs[m_oSmoothedPitchTrack].push_back(f); + // } - // MONO-NOTE STUFF -// std::cerr << "Mono Note Stuff" << std::endl; + // ======================== N O T E S ====================================== MonoNote mn; std::vector<std::vector<std::pair<double, double> > > smoothedPitch; for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { @@ -634,3 +663,33 @@ } return fs; } + +float +PYinVamp::pitchState2Freq(int state, vector<pair<double, double> > pitchProb) +{ + float hmmFreq = m_pitchHmm.m_freqs[state]; + float bestFreq = 0; + float leastDist = 10000; + if (hmmFreq > 0) + { + // This was a Yin estimate, so try to get original pitch estimate back + // ... a bit hacky, since we could have direclty saved the frequency + // that was assigned to the HMM bin in hmm.calculateObsProb -- but would + // have had to rethink the interface of that method. + for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt) + { + float freq = 440. * + std::pow(2, + (pitchProb[iPt].first - 69)/12); + float dist = std::abs(hmmFreq-freq); + if (dist < leastDist) + { + leastDist = dist; + bestFreq = freq; + } + } + } else { + bestFreq = hmmFreq; + } + return bestFreq; +} \ No newline at end of file
--- a/PYinVamp.h Fri Jul 03 12:22:44 2015 +0100 +++ b/PYinVamp.h Fri Jul 03 14:09:05 2015 +0100 @@ -17,6 +17,7 @@ #include <vamp-sdk/Plugin.h> #include "Yin.h" +#include "MonoPitchHMM.h" class PYinVamp : public Vamp::Plugin { @@ -55,6 +56,8 @@ FeatureSet getRemainingFeatures(); + float pitchState2Freq(int state, vector<pair<double, double> > pitchProb); + protected: size_t m_channels; size_t m_stepSize; @@ -77,6 +80,9 @@ float m_lowAmp; float m_onsetSensitivity; float m_pruneThresh; + + MonoPitchHMM m_pitchHmm; + vector<vector<pair<double, double> > > m_pitchProb; vector<Vamp::RealTime> m_timestamp; vector<float> m_level;
--- a/SparseHMM.cpp Fri Jul 03 12:22:44 2015 +0100 +++ b/SparseHMM.cpp Fri Jul 03 14:09:05 2015 +0100 @@ -52,8 +52,7 @@ if (nFrame < 1) { return vector<int>(); } - - + initialise(obsProb[0]); // rest of forward step @@ -66,10 +65,20 @@ return(path); } +void +SparseHMM::reset() +{ + m_scale.clear(); + m_psi.clear(); + for (size_t i = 0; i < m_delta.size(); ++i) m_delta[i] = 0; + for (size_t i = 0; i < m_oldDelta.size(); ++i) m_oldDelta[i] = 0; +} void SparseHMM::initialise(vector<double> firstObs) { + reset(); + double deltasum = 0; // initialise first frame
--- a/SparseHMM.h Fri Jul 03 12:22:44 2015 +0100 +++ b/SparseHMM.h Fri Jul 03 14:09:05 2015 +0100 @@ -30,6 +30,7 @@ calculateObsProb(const vector<pair<double, double> >); virtual void build(); const std::vector<int> decodeViterbi(std::vector<vector<double> > obs); + void reset(); void initialise(vector<double> firstObs); int process(vector<double> newObs); vector<int> finalise();