Mercurial > hg > pyin
changeset 46:e17206dafa65 tony
frantically fixed some things -- at least pyin is not behaving randomly any more.
author | matthiasm |
---|---|
date | Wed, 19 Feb 2014 22:48:08 +0000 |
parents | 68812db649e6 |
children | 42bafbc673b9 |
files | LocalCandidatePYIN.cpp LocalCandidatePYIN.h PYinVamp.cpp Yin.cpp YinUtil.cpp |
diffstat | 5 files changed, 210 insertions(+), 107 deletions(-) [+] |
line wrap: on
line diff
--- a/LocalCandidatePYIN.cpp Wed Feb 05 11:25:35 2014 +0000 +++ b/LocalCandidatePYIN.cpp Wed Feb 19 22:48:08 2014 +0000 @@ -27,6 +27,8 @@ #include <complex> #include <map> +#include <boost/math/distributions.hpp> + using std::string; using std::vector; using std::map; @@ -46,7 +48,7 @@ m_outputUnvoiced(0.0f), m_pitchProb(0), m_timestamp(0), - m_nCandidate(20) + m_nCandidate(10) { } @@ -265,7 +267,7 @@ m_pitchProb.clear(); for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { - m_pitchProb.push_back(vector<vector<pair<double, double> > >()); + m_pitchProb.push_back(vector<pair<double, double> >()); } m_timestamp.clear(); /* @@ -278,7 +280,7 @@ LocalCandidatePYIN::FeatureSet LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp) { - timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); + timestamp = timestamp - Vamp::RealTime::frame2RealTime(m_blockSize, lrintf(m_inputSampleRate)); double *dInputBuffers = new double[m_blockSize]; for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; @@ -291,30 +293,28 @@ YinUtil::cumulativeDifference(yinBuffer, yinBufferSize); - for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + float minFrequency = 60; + float maxFrequency = 900; + vector<double> peakProbability = YinUtil::yinProb(yinBuffer, + m_threshDistr, + yinBufferSize, + m_inputSampleRate/maxFrequency, + m_inputSampleRate/minFrequency); + + vector<pair<double, double> > tempPitchProb; + for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf) { - float minFrequency = m_fmin * std::pow(2,(3.0*iCandidate)/12); - float maxFrequency = m_fmin * std::pow(2,(3.0*iCandidate+9)/12); - vector<double> peakProbability = YinUtil::yinProb(yinBuffer, - m_threshDistr, - yinBufferSize, - m_inputSampleRate/maxFrequency, - m_inputSampleRate/minFrequency); - - vector<pair<double, double> > tempPitchProb; - for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf) + if (peakProbability[iBuf] > 0) { - if (peakProbability[iBuf] > 0) - { - double currentF0 = - m_inputSampleRate * (1.0 / - YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize)); - double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69; - tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf])); - } + double currentF0 = + m_inputSampleRate * (1.0 / + YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize)); + double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69; + if (tempPitch != tempPitch) std::cerr << "AAAAAAAAA! " << currentF0 << " " << (m_inputSampleRate * 1.0 / iBuf) << std::endl; + tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf])); } - m_pitchProb[iCandidate].push_back(tempPitchProb); } + m_pitchProb.push_back(tempPitchProb); m_timestamp.push_back(timestamp); return FeatureSet(); @@ -340,21 +340,46 @@ vector<float> freqNumber = vector<float>(m_nCandidate); vector<float> freqMean = vector<float>(m_nCandidate); + boost::math::normal normalDist(0, 8); // semitones sd + float maxNormalDist = boost::math::pdf(normalDist, 0); + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { pitchTracks.push_back(vector<float>(nFrame)); - vector<float> mpOut = mp.process(m_pitchProb[iCandidate]); + vector<vector<pair<double,double> > > tempPitchProb; + float centrePitch = 45 + 3 * iCandidate; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { + tempPitchProb.push_back(vector<pair<double,double> >(0)); + float sumProb = 0; + float pitch = 0; + float prob = 0; + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) { + pitch = m_pitchProb[iFrame][iProb].first; + // std::cerr << pitch << " " << m_pitchProb[iFrame][iProb].second << std::endl; + prob = m_pitchProb[iFrame][iProb].second * boost::math::pdf(normalDist, pitch-centrePitch) / maxNormalDist; + // if (abs(pitch-centrePitch) > 5) prob *= 0.5; + // if (abs(pitch-centrePitch) > 7) prob *= 0.5; + // if (abs(pitch-centrePitch) > 9) prob *= 0.5; + sumProb += prob; + tempPitchProb[iFrame].push_back(pair<double,double>(pitch,prob)); + // std::cerr << m_timestamp[iFrame] << " " << iCandidate << " " << centrePitch << " " << pitch << " " << prob << std::endl; + } + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) { + tempPitchProb[iFrame][iProb].second /= sumProb; + } + } + vector<float> mpOut = mp.process(tempPitchProb); float prevFreq = 0; for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { if (mpOut[iFrame] > 0) { - if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) { - for (int jFrame = iFrame; jFrame != -1; --jFrame) { - // hack: setting all freqs to 0 -- will be eliminated later - pitchTracks[iCandidate][jFrame] = 0; - } - break; - } + // if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) { + // for (int jFrame = iFrame; jFrame != -1; --jFrame) { + // // hack: setting all freqs to 0 -- will be eliminated later + // pitchTracks[iCandidate][jFrame] = 0; + // } + // break; + // } pitchTracks[iCandidate][iFrame] = mpOut[iFrame]; freqSum[iCandidate] += mpOut[iFrame]; freqNumber[iCandidate]++; @@ -371,23 +396,33 @@ size_t countEqual = 0; for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { - if (fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01) + if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) || + fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01) countEqual++; } + // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl; if (countEqual * 1.0 / nFrame > 0.8) { if (freqNumber[iCandidate] > freqNumber[jCandidate]) { duplicates.push_back(jCandidate); - } else { + } else if (iCandidate < jCandidate) { duplicates.push_back(iCandidate); } } } } + // std::cerr << "n duplicate: " << duplicates.size() << std::endl; + for (size_t iDup = 0; iDup < duplicates.size(); ++ iDup) { + // std::cerr << "duplicate: " << iDup << std::endl; + } + // now find non-duplicate pitch tracks map<int, int> candidateActuals; map<int, std::string> candidateLabels; + vector<vector<float> > outputFrequencies; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>(0)); + int actualCandidateNumber = 0; for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { bool isDuplicate = false; @@ -398,19 +433,20 @@ break; } } - if (!isDuplicate && freqNumber[iCandidate] > 0.8*nFrame) + if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame) { std::ostringstream convert; convert << actualCandidateNumber++; candidateLabels[iCandidate] = convert.str(); candidateActuals[iCandidate] = actualCandidateNumber; - std::cerr << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl; + // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl; for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { if (pitchTracks[iCandidate][iFrame] > 0) { - featureValues[m_timestamp[iFrame]][iCandidate] = - pitchTracks[iCandidate][iFrame]; + // featureValues[m_timestamp[iFrame]][iCandidate] = + // pitchTracks[iCandidate][iFrame]; + outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]); } } } @@ -422,24 +458,34 @@ FeatureSet fs; - for (map<RealTime, map<int, float> >::const_iterator i = - featureValues.begin(); i != featureValues.end(); ++i) { + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){ Feature f; f.hasTimestamp = true; - f.timestamp = i->first; - int nextCandidate = candidateActuals.begin()->second; - for (map<int, float>::const_iterator j = - i->second.begin(); j != i->second.end(); ++j) { - while (candidateActuals[j->first] > nextCandidate) { - f.values.push_back(0); - ++nextCandidate; - } - f.values.push_back(j->second); - nextCandidate = j->first + 1; - } - //!!! can't use labels? + f.timestamp = m_timestamp[iFrame]; + f.values = outputFrequencies[iFrame]; fs[0].push_back(f); } + + // I stopped using Chris's map stuff below because I couldn't get my head around it + // + // for (map<RealTime, map<int, float> >::const_iterator i = + // featureValues.begin(); i != featureValues.end(); ++i) { + // Feature f; + // f.hasTimestamp = true; + // f.timestamp = i->first; + // int nextCandidate = candidateActuals.begin()->second; + // for (map<int, float>::const_iterator j = + // i->second.begin(); j != i->second.end(); ++j) { + // while (candidateActuals[j->first] > nextCandidate) { + // f.values.push_back(0); + // ++nextCandidate; + // } + // f.values.push_back(j->second); + // nextCandidate = j->first + 1; + // } + // //!!! can't use labels? + // fs[0].push_back(f); + // } return fs; }
--- a/LocalCandidatePYIN.h Wed Feb 05 11:25:35 2014 +0000 +++ b/LocalCandidatePYIN.h Wed Feb 19 22:48:08 2014 +0000 @@ -67,7 +67,7 @@ float m_threshDistr; float m_outputUnvoiced; - vector<vector<vector<pair<double, double> > > > m_pitchProb; + vector<vector<pair<double, double> > > m_pitchProb; vector<Vamp::RealTime> m_timestamp; size_t m_nCandidate; };
--- a/PYinVamp.cpp Wed Feb 05 11:25:35 2014 +0000 +++ b/PYinVamp.cpp Wed Feb 19 22:48:08 2014 +0000 @@ -357,8 +357,16 @@ timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); FeatureSet fs; + float rms = 0; + double *dInputBuffers = new double[m_blockSize]; - for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + for (size_t i = 0; i < m_blockSize; ++i) { + dInputBuffers[i] = inputBuffers[0][i]; + rms += inputBuffers[0][i] * inputBuffers[0][i]; + } + rms /= m_blockSize; + rms = sqrt(rms); + bool isLowVolume = (rms < 0.01); Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); delete [] dInputBuffers; @@ -369,8 +377,12 @@ for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) { double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; - tempPitchProb.push_back(pair<double, double> - (tempPitch, yo.freqProb[iCandidate].second)); + if (!isLowVolume) + tempPitchProb.push_back(pair<double, double> + (tempPitch, yo.freqProb[iCandidate].second)); + else + tempPitchProb.push_back(pair<double, double> + (tempPitch, yo.freqProb[iCandidate].second*.01)); } m_pitchProb.push_back(tempPitchProb); m_timestamp.push_back(timestamp);
--- a/Yin.cpp Wed Feb 05 11:25:35 2014 +0000 +++ b/Yin.cpp Wed Feb 19 22:48:08 2014 +0000 @@ -57,7 +57,7 @@ double aperiodicity; double f0; - if (tau!=0) + if (tau!=0 && tau!=m_yinBufferSize-1) { interpolatedTau = YinUtil::parabolicInterpolation(yinBuffer, abs(tau), m_yinBufferSize); f0 = m_inputSampleRate * (1.0 / interpolatedTau); @@ -93,7 +93,7 @@ // basic yin output Yin::YinOutput yo(0,0,0); - for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) + for (size_t iBuf = 1; iBuf < m_yinBufferSize-1; ++iBuf) { if (peakProbability[iBuf] > 0) {
--- a/YinUtil.cpp Wed Feb 05 11:25:35 2014 +0000 +++ b/YinUtil.cpp Wed Feb 19 22:48:08 2014 +0000 @@ -233,9 +233,37 @@ // double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight size_t minInd = 0; float minVal = 42.f; - while (currThreshInd != -1 && tau < maxTau) + // while (currThreshInd != -1 && tau < maxTau) + // { + // if (yinBuffer[tau] < thresholds[currThreshInd]) + // { + // while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) + // { + // tau++; + // } + // // tau is now local minimum + // // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + // if (yinBuffer[tau] < minVal && tau > 2){ + // minVal = yinBuffer[tau]; + // minInd = tau; + // } + // peakProb[tau] += distribution[currThreshInd]; + // currThreshInd--; + // } else { + // tau++; + // } + // } + // double nonPeakProb = 1; + // for (size_t i = minTau; i < maxTau; ++i) + // { + // nonPeakProb -= peakProb[i]; + // } + // + // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + float sumProb = 0; + while (tau < maxTau) { - if (yinBuffer[tau] < thresholds[currThreshInd]) + if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau]) { while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) { @@ -247,18 +275,29 @@ minVal = yinBuffer[tau]; minInd = tau; } - peakProb[tau] += distribution[currThreshInd]; - currThreshInd--; + currThreshInd = nThresholdInt-1; + while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) { + // std::cerr << distribution[currThreshInd] << std::endl; + peakProb[tau] += distribution[currThreshInd]; + currThreshInd--; + } + // peakProb[tau] = 1 - yinBuffer[tau]; + sumProb += peakProb[tau]; + tau++; } else { tau++; } } + double nonPeakProb = 1; - for (size_t i = minTau; i < maxTau; ++i) - { - nonPeakProb -= peakProb[i]; + if (sumProb > 0) { + for (size_t i = minTau; i < maxTau; ++i) + { + peakProb[i] = peakProb[i] / sumProb * peakProb[minInd]; + nonPeakProb -= peakProb[i]; + } } - // std::cerr << nonPeakProb << std::endl; + std::cerr << nonPeakProb << std::endl; if (minInd > 0) { // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl; @@ -278,55 +317,61 @@ } double betterTau = 0.0; - size_t x0; - size_t x2; + // size_t x0; + // size_t x2; - if (tau < 1) - { - x0 = tau; - } else { - x0 = tau - 1; - } - - if (tau + 1 < yinBufferSize) - { - x2 = tau + 1; - } else { - x2 = tau; - } - - if (x0 == tau) - { - if (yinBuffer[tau] <= yinBuffer[x2]) - { - betterTau = tau; - } else { - betterTau = x2; - } - } - else if (x2 == tau) - { - if (yinBuffer[tau] <= yinBuffer[x0]) - { - betterTau = tau; - } - else - { - betterTau = x0; - } - } - else - { + // if (tau < 1) + // { + // x0 = tau; + // } else { + // x0 = tau - 1; + // } + // + // if (tau + 1 < yinBufferSize) + // { + // x2 = tau + 1; + // } else { + // x2 = tau; + // } + // + // if (x0 == tau) + // { + // if (yinBuffer[tau] <= yinBuffer[x2]) + // { + // betterTau = tau; + // } else { + // betterTau = x2; + // } + // } + // else if (x2 == tau) + // { + // if (yinBuffer[tau] <= yinBuffer[x0]) + // { + // betterTau = tau; + // } + // else + // { + // betterTau = x0; + // } + // } + // else + // { + if (tau > 0 && tau < yinBufferSize-1) { float s0, s1, s2; - s0 = yinBuffer[x0]; + s0 = yinBuffer[tau-1]; s1 = yinBuffer[tau]; - s2 = yinBuffer[x2]; + s2 = yinBuffer[tau+1]; // fixed AUBIO implementation, thanks to Karl Helgason: // (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1 - betterTau = tau + (s2 - s0) / (2 * (2 * s1 - s2 - s0)); - // std::cerr << tau << " --> " << betterTau << std::endl; + double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0)); + if (abs(adjustment)>1) adjustment = 0; + + betterTau = tau + adjustment; + } else { + std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n"; + betterTau = tau; } return betterTau; }