pyin: PYinVamp.cpp comparison

comparison PYinVamp.cpp @ 133:83978b93aac1 fixedlag

ah, didn't commit when I stopped working... what did I do?

author	Matthias Mauch <mail@matthiasmauch.net>
date	Mon, 13 Jul 2015 12:10:06 +0100
parents	926c292fa3ff
children	72bda34e0e64

comparison

equal deleted inserted replaced

-:926c292fa3ff
+:83978b93aac1
 m_onsetSensitivity(0.7f),
 m_pruneThresh(0.1f),
 m_pitchHmm(0),
 m_pitchProb(0),
 m_timestamp(0),
-m_level(0)
+m_level(0),
+m_pitchTrack(0)
 {
 }
 PYinVamp::~PYinVamp()
 {
 else                   m_pitchHmm = MonoPitchHMM(0);
 m_pitchProb.clear();
 m_timestamp.clear();
 m_level.clear();
+m_pitchTrack.clear();
 /*
 std::cerr << "PYinVamp::reset"
 << ", blockSize = " << m_blockSize
 << std::endl;
 */
 }
 PYinVamp::FeatureSet
 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
 {
+std::cerr << timestamp << std::endl;
 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
-timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
+timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset,
+lrintf(m_inputSampleRate));
 FeatureSet fs;
 float rms = 0;
 m_pitchProb.push_back(tempPitchProb);
 m_timestamp.push_back(timestamp);
 int lag = m_pitchHmm.m_fixedLag;
-if (m_fixedLag == 1.f)
+if (m_fixedLag == 1.f) // do fixed-lag smoothing instead of full Viterbi
 {
 if (m_timestamp.size() == lag + 1)
 {
 m_timestamp.pop_front();
 m_pitchProb.pop_front();
 Feature f;
 f.hasTimestamp = true;
 vector<int> rawPitchPath = m_pitchHmm.track();
 float freq = m_pitchHmm.nearestFreq(rawPitchPath[0],
 m_pitchProb[0]);
+m_pitchTrack.push_back(freq);
 f.timestamp = m_timestamp[0];
 f.values.clear();
 // different output modes
 if (freq < 0 && (m_outputUnvoiced==0))
 }
 // ================== P I T C H  T R A C K =================================
 vector<int> rawPitchPath = m_pitchHmm.track();
-vector<float> mpOut;
 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
 {
 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
 m_pitchProb[iFrame]);
-mpOut.push_back(freq); // for note processing below
+m_pitchTrack.push_back(freq); // for note processing below
 f.timestamp = m_timestamp[iFrame];
 f.values.clear();
 // different output modes
 if (freq < 0 && (m_outputUnvoiced==0)) continue;
 }
 fs[m_oSmoothedPitchTrack].push_back(f);
 }
 // ======================== N O T E S ======================================
-// MonoNote mn;
+MonoNote mn;
-// std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
+std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
-// for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
+for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) {
-//     std::vector<std::pair<double, double> > temp;
+std::vector<std::pair<double, double> > temp;
-//     if (mpOut[iFrame] > 0)
+if (m_pitchTrack[iFrame] > 0)
-//     {
+{
-//         double tempPitch = 12 *
+double tempPitch = 12 *
-//                            std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
+std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69;
-//         temp.push_back(std::pair<double,double>(tempPitch, .9));
+temp.push_back(std::pair<double,double>(tempPitch, .9));
-//     }
+// std::cerr << "tempPitch: " << tempPitch << std::endl;
-//     smoothedPitch.push_back(temp);
+}
-// }
+// std::cerr << "temp size: " << temp.size() << std::endl;
-// // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
+smoothedPitch.push_back(temp);
-// vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
+}
-// // turning feature into a note feature
+vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
-// f.hasTimestamp = true;
+std::cerr << "mnOut size: " << mnOut.size() << std::endl;
-// f.hasDuration = true;
+std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl;
-// f.values.clear();
+// turning feature into a note feature
+f.hasTimestamp = true;
+f.hasDuration = true;
+f.values.clear();
-// int onsetFrame = 0;
+int onsetFrame = 0;
-// bool isVoiced = 0;
+bool isVoiced = 0;
-// bool oldIsVoiced = 0;
+bool oldIsVoiced = 0;
-// size_t nFrame = m_pitchProb.size();
+size_t nFrame = m_pitchTrack.size();
-// float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
+float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
-// // the body of the loop below should be in a function/method
+// the body of the loop below should be in a function/method
-// std::vector<float> notePitchTrack; // collects pitches for one note at a time
+// but what does it actually do??
-// for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+// * takes the result of the note tracking HMM
-// {
+// * collects contiguously pitched pitches
-//     isVoiced = mnOut[iFrame].noteState < 3
+// * writes a note once it notices the voiced segment has ended
-//                && smoothedPitch[iFrame].size() > 0
+// complications:
-//                && (iFrame >= nFrame-2
+// * it needs a lookahead of two frames for m_level (wtf was I thinking)
-//                    || ((m_level[iFrame]/m_level[iFrame+2]) >
+// * it needs to know the timestamp (which can be guessed from the frame no)
-//                     m_onsetSensitivity));
+// *
-//     if (isVoiced && iFrame != nFrame-1)
+int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
-//     {
+RealTime timestampOffset = Vamp::RealTime::frame2RealTime(offset,
-//         if (oldIsVoiced == 0) // beginning of a note
+lrintf(m_inputSampleRate));
-//         {
-//             onsetFrame = iFrame;
+std::vector<float> notePitchTrack; // collects pitches for 1 note at a time
-//         }
+for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
-//         float pitch = smoothedPitch[iFrame][0].first;
+{
-//         notePitchTrack.push_back(pitch); // add to the note's pitch track
+isVoiced = mnOut[iFrame].noteState < 3
-//     } else { // not currently voiced
+&& smoothedPitch[iFrame].size() > 0
-//         if (oldIsVoiced == 1) // end of note
+&& (iFrame >= nFrame-2
-//         {
+|| ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
-//             if (notePitchTrack.size() >= minNoteFrames)
+if (isVoiced && iFrame != nFrame-1)
-//             {
+{
-//                 std::sort(notePitchTrack.begin(), notePitchTrack.end());
+if (oldIsVoiced == 0) // beginning of a note
-//                 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
+{
-//                 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
+onsetFrame = iFrame;
-//                 f.values.clear();
+}
-//                 f.values.push_back(medianFreq);
+float pitch = smoothedPitch[iFrame][0].first;
-//                 f.timestamp = m_timestamp[onsetFrame];
+notePitchTrack.push_back(pitch); // add to the note's pitch track
-//                 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
+} else { // not currently voiced
-//                 fs[m_oNotes].push_back(f);
+if (oldIsVoiced == 1) // end of note
-//             }
+{
-//             notePitchTrack.clear();
+if (notePitchTrack.size() >= minNoteFrames)
-//         }
+{
-//     }
+std::sort(notePitchTrack.begin(), notePitchTrack.end());
-//     oldIsVoiced = isVoiced;
+float medianPitch = notePitchTrack[notePitchTrack.size()/2];
-// }
+float medianFreq =
+std::pow(2,(medianPitch - 69) / 12) * 440;
+f.values.clear();
+f.values.push_back(medianFreq);
+RealTime start = RealTime::frame2RealTime(
+onsetFrame * m_stepSize, lrintf(m_inputSampleRate)) +
+timestampOffset;
+RealTime end   = RealTime::frame2RealTime(
+iFrame * m_stepSize, lrintf(m_inputSampleRate)) +
+timestampOffset;
+f.timestamp = start;
+f.duration = end - start;
+fs[m_oNotes].push_back(f);
+}
+notePitchTrack.clear();
+}
+}
+oldIsVoiced = isVoiced;
+}
 return fs;
 }

Mercurial > hg > pyin

comparison PYinVamp.cpp @ 133:83978b93aac1 fixedlag