Mercurial > hg > pyin
changeset 54:619c01e3467e tony
Merge from the default branch
author | Chris Cannam <chris.cannam@eecs.qmul.ac.uk> |
---|---|
date | Thu, 06 Mar 2014 15:49:36 +0000 |
parents | 85eb802a8091 (diff) 71be7023e9d6 (current diff) |
children | cb268714b485 |
files | Yin.cpp |
diffstat | 22 files changed, 2119 insertions(+), 1123 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,3 @@ +syntax: glob +*.o +*.so
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalCandidatePYIN.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,490 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#include "LocalCandidatePYIN.h" +#include "MonoPitch.h" +#include "YinUtil.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <sstream> +// #include <iostream> +#include <cmath> +#include <complex> +#include <map> + +#include <boost/math/distributions.hpp> + +using std::string; +using std::vector; +using std::map; +using Vamp::RealTime; + + +LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(700), + m_yin(2048, inputSampleRate, 0.0), + m_oPitchTrackCandidates(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_pitchProb(0), + m_timestamp(0), + m_nCandidate(13) +{ +} + +LocalCandidatePYIN::~LocalCandidatePYIN() +{ +} + +string +LocalCandidatePYIN::getIdentifier() const +{ + return "localcandidatepyin"; +} + +string +LocalCandidatePYIN::getName() const +{ + return "Local Candidate PYIN"; +} + +string +LocalCandidatePYIN::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +LocalCandidatePYIN::getMaker() const +{ + return "Matthias Mauch"; +} + +int +LocalCandidatePYIN::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +LocalCandidatePYIN::getCopyright() const +{ + return "GPL"; +} + +LocalCandidatePYIN::InputDomain +LocalCandidatePYIN::getInputDomain() const +{ + return TimeDomain; +} + +size_t +LocalCandidatePYIN::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +LocalCandidatePYIN::getPreferredStepSize() const +{ + return 256; +} + +size_t +LocalCandidatePYIN::getMinChannelCount() const +{ + return 1; +} + +size_t +LocalCandidatePYIN::getMaxChannelCount() const +{ + return 1; +} + +LocalCandidatePYIN::ParameterList +LocalCandidatePYIN::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +LocalCandidatePYIN::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +LocalCandidatePYIN::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + +} + +LocalCandidatePYIN::ProgramList +LocalCandidatePYIN::getPrograms() const +{ + ProgramList list; + return list; +} + +string +LocalCandidatePYIN::getCurrentProgram() const +{ + return ""; // no programs +} + +void +LocalCandidatePYIN::selectProgram(string name) +{ +} + +LocalCandidatePYIN::OutputList +LocalCandidatePYIN::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "pitchtrackcandidates"; + d.name = "Pitch track candidates"; + d.description = "Multiple candidate pitch tracks."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; //!!!??? + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + + return outputs; +} + +bool +LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +LocalCandidatePYIN::reset() +{ + m_yin.setThresholdDistr(m_threshDistr); + m_yin.setFrameSize(m_blockSize); + + m_pitchProb.clear(); + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + m_pitchProb.push_back(vector<pair<double, double> >()); + } + m_timestamp.clear(); +/* + std::cerr << "LocalCandidatePYIN::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp) +{ + // I don't understand why I should have to make this very weird 11 + // step-size left-shift, but it does get it in sync with the normal pYIN + timestamp = timestamp - Vamp::RealTime::frame2RealTime(11 * m_stepSize, lrintf(m_inputSampleRate)); + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + size_t yinBufferSize = m_blockSize/2; + double* yinBuffer = new double[yinBufferSize]; + YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize); + + delete [] dInputBuffers; + + YinUtil::cumulativeDifference(yinBuffer, yinBufferSize); + + float minFrequency = 60; + float maxFrequency = 900; + vector<double> peakProbability = YinUtil::yinProb(yinBuffer, + m_threshDistr, + yinBufferSize, + m_inputSampleRate/maxFrequency, + m_inputSampleRate/minFrequency); + + vector<pair<double, double> > tempPitchProb; + for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf) + { + if (peakProbability[iBuf] > 0) + { + double currentF0 = + m_inputSampleRate * (1.0 / + YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize)); + double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69; + if (tempPitch != tempPitch) std::cerr << "AAAAAAAAA! " << currentF0 << " " << (m_inputSampleRate * 1.0 / iBuf) << std::endl; + tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf])); + } + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + return FeatureSet(); +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::getRemainingFeatures() +{ + // timestamp -> candidate number -> value + map<RealTime, map<int, float> > featureValues; + + // std::cerr << "in remaining features" << std::endl; + + if (m_pitchProb.empty()) { + return FeatureSet(); + } + + // MONO-PITCH STUFF + MonoPitch mp; + size_t nFrame = m_timestamp.size(); + vector<vector<float> > pitchTracks; + vector<float> freqSum = vector<float>(m_nCandidate); + vector<float> freqNumber = vector<float>(m_nCandidate); + vector<float> freqMean = vector<float>(m_nCandidate); + + boost::math::normal normalDist(0, 8); // semitones sd + float maxNormalDist = boost::math::pdf(normalDist, 0); + + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + pitchTracks.push_back(vector<float>(nFrame)); + vector<vector<pair<double,double> > > tempPitchProb; + float centrePitch = 45 + 3 * iCandidate; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { + tempPitchProb.push_back(vector<pair<double,double> >(0)); + float sumProb = 0; + float pitch = 0; + float prob = 0; + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) { + pitch = m_pitchProb[iFrame][iProb].first; + // std::cerr << pitch << " " << m_pitchProb[iFrame][iProb].second << std::endl; + prob = m_pitchProb[iFrame][iProb].second * boost::math::pdf(normalDist, pitch-centrePitch) / maxNormalDist * 2; + sumProb += prob; + tempPitchProb[iFrame].push_back(pair<double,double>(pitch,prob)); + // std::cerr << m_timestamp[iFrame] << " " << iCandidate << " " << centrePitch << " " << pitch << " " << prob << std::endl; + } + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) { + tempPitchProb[iFrame][iProb].second /= sumProb; + } + } + vector<float> mpOut = mp.process(tempPitchProb); + float prevFreq = 0; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (mpOut[iFrame] > 0) { + // if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) { + // for (int jFrame = iFrame; jFrame != -1; --jFrame) { + // // hack: setting all freqs to 0 -- will be eliminated later + // pitchTracks[iCandidate][jFrame] = 0; + // } + // break; + // } + pitchTracks[iCandidate][iFrame] = mpOut[iFrame]; + freqSum[iCandidate] += mpOut[iFrame]; + freqNumber[iCandidate]++; + prevFreq = mpOut[iFrame]; + } + } + freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate]; + } + + // find near duplicate pitch tracks + vector<size_t> duplicates; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { + for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) { + size_t countEqual = 0; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) || + fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01) + countEqual++; + } + // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl; + if (countEqual * 1.0 / nFrame > 0.8) { + if (freqNumber[iCandidate] > freqNumber[jCandidate]) { + duplicates.push_back(jCandidate); + } else if (iCandidate < jCandidate) { + duplicates.push_back(iCandidate); + } + } + } + } + + // std::cerr << "n duplicate: " << duplicates.size() << std::endl; + for (size_t iDup = 0; iDup < duplicates.size(); ++ iDup) { + // std::cerr << "duplicate: " << iDup << std::endl; + } + + // now find non-duplicate pitch tracks + map<int, int> candidateActuals; + map<int, std::string> candidateLabels; + + vector<vector<float> > outputFrequencies; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>(0)); + + int actualCandidateNumber = 0; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { + bool isDuplicate = false; + for (size_t i = 0; i < duplicates.size(); ++i) { + // std::cerr << duplicates[i] << std::endl; + if (duplicates[i] == iCandidate) { + isDuplicate = true; + break; + } + } + if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame) + { + std::ostringstream convert; + convert << actualCandidateNumber++; + candidateLabels[iCandidate] = convert.str(); + candidateActuals[iCandidate] = actualCandidateNumber; + // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (pitchTracks[iCandidate][iFrame] > 0) + { + // featureValues[m_timestamp[iFrame]][iCandidate] = + // pitchTracks[iCandidate][iFrame]; + outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]); + } + } + } + // fs[m_oPitchTrackCandidates].push_back(f); + } + + // adapt our features so as to return a stack of candidate values + // per frame + + FeatureSet fs; + + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){ + Feature f; + f.hasTimestamp = true; + f.timestamp = m_timestamp[iFrame]; + f.values = outputFrequencies[iFrame]; + fs[0].push_back(f); + } + + // I stopped using Chris's map stuff below because I couldn't get my head around it + // + // for (map<RealTime, map<int, float> >::const_iterator i = + // featureValues.begin(); i != featureValues.end(); ++i) { + // Feature f; + // f.hasTimestamp = true; + // f.timestamp = i->first; + // int nextCandidate = candidateActuals.begin()->second; + // for (map<int, float>::const_iterator j = + // i->second.begin(); j != i->second.end(); ++j) { + // while (candidateActuals[j->first] > nextCandidate) { + // f.values.push_back(0); + // ++nextCandidate; + // } + // f.values.push_back(j->second); + // nextCandidate = j->first + 1; + // } + // //!!! can't use labels? + // fs[0].push_back(f); + // } + + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalCandidatePYIN.h Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#ifndef _LOCALCANDIDATEPYIN_H_ +#define _LOCALCANDIDATEPYIN_H_ + +#include <vamp-sdk/Plugin.h> + +#include "Yin.h" + +class LocalCandidatePYIN : public Vamp::Plugin +{ +public: + LocalCandidatePYIN(float inputSampleRate); + virtual ~LocalCandidatePYIN(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_oPitchTrackCandidates; + + float m_threshDistr; + float m_outputUnvoiced; + vector<vector<pair<double, double> > > m_pitchProb; + vector<Vamp::RealTime> m_timestamp; + size_t m_nCandidate; +}; + +#endif
--- a/Makefile.inc Thu Mar 06 15:48:46 2014 +0000 +++ b/Makefile.inc Thu Mar 06 15:49:36 2014 +0000 @@ -9,8 +9,10 @@ PLUGIN := pyin$(PLUGIN_EXT) -SOURCES := PYIN.cpp \ - VampYin.cpp \ +SOURCES := PYinVamp.cpp \ + YinVamp.cpp \ + LocalCandidatePYIN.cpp \ + YinVampFreqConstrained.cpp \ Yin.cpp \ YinUtil.cpp \ MonoNote.cpp \ @@ -63,8 +65,10 @@ # DO NOT DELETE -PYIN.o: PYIN.h -VampYin.o: VampYin.h +PYinVamp.o: PYinVamp.h +YinVamp.o: YinVamp.h +YinVampFreqConstrained.o: YinVampFreqConstrained.h +LocalCandidatePYIN.o: LocalCandidatePYIN.h Yin.o: Yin.h MonoNoteParameters.o: MonoNoteParameters.h MonoNote.o: MonoNote.h @@ -72,7 +76,7 @@ MonoPitchHMM.o: MonoPitchHMM.h SparseHMM.o: SparseHMM.h MonoNoteHMM.o: MonoNoteHMM.h -libmain.o: PYIN.h VampYin.h +libmain.o: PYinVamp.h YinVamp.h LocalCandidatePYIN.h test/TestMeanFilter.o: MeanFilter.h test/TestYin.o: Yin.h
--- a/MonoPitch.cpp Thu Mar 06 15:48:46 2014 +0000 +++ b/MonoPitch.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -41,7 +41,7 @@ obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame])); } - vector<double> *scale = new vector<double>(pitchProb.size()); + vector<double> *scale = new vector<double>(0); vector<float> out;
--- a/MonoPitchHMM.cpp Thu Mar 06 15:48:46 2014 +0000 +++ b/MonoPitchHMM.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -30,7 +30,7 @@ m_yinTrust(.5), m_freqs(0) { - m_transitionWidth = 5*(m_nBPS/2) + 1; + m_transitionWidth = 9*(m_nBPS/2) + 1; m_nPitch = 48 * m_nBPS; m_freqs = vector<double>(2*m_nPitch); for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
--- a/PYIN.cpp Thu Mar 06 15:48:46 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,499 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "PYIN.h" -#include "MonoNote.h" -#include "MonoPitch.h" - -#include "vamp-sdk/FFT.h" - -#include <vector> -#include <algorithm> - -#include <cstdio> -#include <cmath> -#include <complex> - -using std::string; -using std::vector; -using Vamp::RealTime; - - -PYIN::PYIN(float inputSampleRate) : - Plugin(inputSampleRate), - m_channels(0), - m_stepSize(256), - m_blockSize(2048), - m_fmin(40), - m_fmax(700), - m_yin(2048, inputSampleRate, 0.0), - m_oF0Candidates(0), - m_oF0Probs(0), - m_oVoicedProb(0), - m_oCandidateSalience(0), - m_oSmoothedPitchTrack(0), - m_oNotes(0), - m_threshDistr(2.0f), - m_outputUnvoiced(0.0f), - m_pitchProb(0), - m_timestamp(0) -{ -} - -PYIN::~PYIN() -{ -} - -string -PYIN::getIdentifier() const -{ - return "pyin"; -} - -string -PYIN::getName() const -{ - return "pYin"; -} - -string -PYIN::getDescription() const -{ - return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; -} - -string -PYIN::getMaker() const -{ - return "Matthias Mauch"; -} - -int -PYIN::getPluginVersion() const -{ - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -PYIN::getCopyright() const -{ - return "GPL"; -} - -PYIN::InputDomain -PYIN::getInputDomain() const -{ - return TimeDomain; -} - -size_t -PYIN::getPreferredBlockSize() const -{ - return 2048; -} - -size_t -PYIN::getPreferredStepSize() const -{ - return 256; -} - -size_t -PYIN::getMinChannelCount() const -{ - return 1; -} - -size_t -PYIN::getMaxChannelCount() const -{ - return 1; -} - -PYIN::ParameterList -PYIN::getParameterDescriptors() const -{ - ParameterList list; - - ParameterDescriptor d; - - d.identifier = "threshdistr"; - d.name = "Yin threshold distribution"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 7.0f; - d.defaultValue = 2.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("Uniform"); - d.valueNames.push_back("Beta (mean 0.10)"); - d.valueNames.push_back("Beta (mean 0.15)"); - d.valueNames.push_back("Beta (mean 0.20)"); - d.valueNames.push_back("Beta (mean 0.30)"); - d.valueNames.push_back("Single Value 0.10"); - d.valueNames.push_back("Single Value 0.15"); - d.valueNames.push_back("Single Value 0.20"); - list.push_back(d); - - d.identifier = "outputunvoiced"; - d.valueNames.clear(); - d.name = "Output estimates classified as unvoiced?"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 2.0f; - d.defaultValue = 0.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("No"); - d.valueNames.push_back("Yes"); - d.valueNames.push_back("Yes, as negative frequencies"); - list.push_back(d); - - return list; -} - -float -PYIN::getParameter(string identifier) const -{ - if (identifier == "threshdistr") { - return m_threshDistr; - } - if (identifier == "outputunvoiced") { - return m_outputUnvoiced; - } - return 0.f; -} - -void -PYIN::setParameter(string identifier, float value) -{ - if (identifier == "threshdistr") - { - m_threshDistr = value; - } - if (identifier == "outputunvoiced") - { - m_outputUnvoiced = value; - } - -} - -PYIN::ProgramList -PYIN::getPrograms() const -{ - ProgramList list; - return list; -} - -string -PYIN::getCurrentProgram() const -{ - return ""; // no programs -} - -void -PYIN::selectProgram(string name) -{ -} - -PYIN::OutputList -PYIN::getOutputDescriptors() const -{ - OutputList outputs; - - OutputDescriptor d; - - int outputNumber = 0; - - d.identifier = "f0candidates"; - d.name = "F0 Candidates"; - d.description = "Estimated fundamental frequency candidates."; - d.unit = "Hz"; - d.hasFixedBinCount = false; - // d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = 500; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oF0Candidates = outputNumber++; - - d.identifier = "f0probs"; - d.name = "Candidate Probabilities"; - d.description = "Probabilities of estimated fundamental frequency candidates."; - d.unit = ""; - d.hasFixedBinCount = false; - // d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oF0Probs = outputNumber++; - - d.identifier = "voicedprob"; - d.name = "Voiced Probability"; - d.description = "Probability that the signal is voiced according to Probabilistic Yin."; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oVoicedProb = outputNumber++; - - d.identifier = "candidatesalience"; - d.name = "Candidate Salience"; - d.description = "Candidate Salience"; - d.hasFixedBinCount = true; - d.binCount = m_blockSize / 2; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oCandidateSalience = outputNumber++; - - d.identifier = "smoothedpitchtrack"; - d.name = "Smoothed Pitch Track"; - d.description = "."; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = false; - // d.minValue = 0; - // d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oSmoothedPitchTrack = outputNumber++; - - d.identifier = "notes"; - d.name = "Notes"; - d.description = "Derived fixed-pitch note frequencies"; - // d.unit = "MIDI unit"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = false; - d.isQuantized = false; - d.sampleType = OutputDescriptor::VariableSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = true; - outputs.push_back(d); - m_oNotes = outputNumber++; - - return outputs; -} - -bool -PYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - -/* - std::cerr << "PYIN::initialise: channels = " << channels - << ", stepSize = " << stepSize << ", blockSize = " << blockSize - << std::endl; -*/ - m_channels = channels; - m_stepSize = stepSize; - m_blockSize = blockSize; - - reset(); - - return true; -} - -void -PYIN::reset() -{ - m_yin.setThresholdDistr(m_threshDistr); - m_yin.setFrameSize(m_blockSize); - - m_pitchProb.clear(); - m_timestamp.clear(); -/* - std::cerr << "PYIN::reset" - << ", blockSize = " << m_blockSize - << std::endl; -*/ -} - -PYIN::FeatureSet -PYIN::process(const float *const *inputBuffers, RealTime timestamp) -{ - timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); - FeatureSet fs; - - double *dInputBuffers = new double[m_blockSize]; - for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; - - Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); - delete [] dInputBuffers; - - // First, get the things out of the way that we don't want to output - // immediately, but instead save for later. - vector<pair<double, double> > tempPitchProb; - for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) - { - double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; - tempPitchProb.push_back(pair<double, double> - (tempPitch, yo.freqProb[iCandidate].second)); - } - m_pitchProb.push_back(tempPitchProb); - m_timestamp.push_back(timestamp); - - // F0 CANDIDATES - Feature f; - f.hasTimestamp = true; - f.timestamp = timestamp; - for (size_t i = 0; i < yo.freqProb.size(); ++i) - { - f.values.push_back(yo.freqProb[i].first); - } - fs[m_oF0Candidates].push_back(f); - - // VOICEDPROB - f.values.clear(); - float voicedProb = 0; - for (size_t i = 0; i < yo.freqProb.size(); ++i) - { - f.values.push_back(yo.freqProb[i].second); - voicedProb += yo.freqProb[i].second; - } - fs[m_oF0Probs].push_back(f); - - f.values.clear(); - f.values.push_back(voicedProb); - fs[m_oVoicedProb].push_back(f); - - // SALIENCE -- maybe this should eventually disappear - f.values.clear(); - float salienceSum = 0; - for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) - { - f.values.push_back(yo.salience[iBin]); - salienceSum += yo.salience[iBin]; - } - fs[m_oCandidateSalience].push_back(f); - - return fs; -} - -PYIN::FeatureSet -PYIN::getRemainingFeatures() -{ - FeatureSet fs; - Feature f; - f.hasTimestamp = true; - f.hasDuration = false; - - if (m_pitchProb.empty()) { - return fs; - } - - // MONO-PITCH STUFF - MonoPitch mp; - vector<float> mpOut = mp.process(m_pitchProb); - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) - { - if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; - f.timestamp = m_timestamp[iFrame]; - f.values.clear(); - if (m_outputUnvoiced == 1) - { - f.values.push_back(fabs(mpOut[iFrame])); - } else { - f.values.push_back(mpOut[iFrame]); - } - - fs[m_oSmoothedPitchTrack].push_back(f); - } - - // MONO-NOTE STUFF - MonoNote mn; - std::vector<std::vector<std::pair<double, double> > > smoothedPitch; - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { - std::vector<std::pair<double, double> > temp; - if (mpOut[iFrame] > 0) - { - double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; - temp.push_back(std::pair<double,double>(tempPitch, .9)); - } - smoothedPitch.push_back(temp); - } - // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb); - vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); - - // turning feature into a note feature - f.hasTimestamp = true; - f.hasDuration = true; - f.values.clear(); - - int onsetFrame = 0; - bool isVoiced = 0; - bool oldIsVoiced = 0; - size_t nFrame = m_pitchProb.size(); - - std::vector<float> notePitchTrack; // collects pitches for one note at a time - for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) - { - isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; - if (isVoiced && iFrame != nFrame-1) - { - if (oldIsVoiced == 0) // beginning of a note - { - onsetFrame = iFrame; - notePitchTrack.clear(); - } - float pitch = smoothedPitch[iFrame][0].first; - notePitchTrack.push_back(pitch); // add to the note's pitch track - } else { // not currently voiced - if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note - { - std::sort(notePitchTrack.begin(), notePitchTrack.end()); - float medianPitch = notePitchTrack[notePitchTrack.size()/2]; - float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; - f.values.clear(); - f.values.push_back(medianFreq); - f.timestamp = m_timestamp[onsetFrame]; - f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; - fs[m_oNotes].push_back(f); - } - } - oldIsVoiced = isVoiced; - } - return fs; -}
--- a/PYIN.h Thu Mar 06 15:48:46 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _PYIN_H_ -#define _PYIN_H_ - -#include <vamp-sdk/Plugin.h> - -#include "Yin.h" - -class PYIN : public Vamp::Plugin -{ -public: - PYIN(float inputSampleRate); - virtual ~PYIN(); - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - InputDomain getInputDomain() const; - size_t getPreferredBlockSize() const; - size_t getPreferredStepSize() const; - size_t getMinChannelCount() const; - size_t getMaxChannelCount() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string identifier) const; - void setParameter(std::string identifier, float value); - - ProgramList getPrograms() const; - std::string getCurrentProgram() const; - void selectProgram(std::string name); - - OutputList getOutputDescriptors() const; - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - size_t m_channels; - size_t m_stepSize; - size_t m_blockSize; - float m_fmin; - float m_fmax; - Yin m_yin; - - mutable int m_oF0Candidates; - mutable int m_oF0Probs; - mutable int m_oVoicedProb; - mutable int m_oCandidateSalience; - mutable int m_oSmoothedPitchTrack; - mutable int m_oNotes; - - float m_threshDistr; - float m_outputUnvoiced; - vector<vector<pair<double, double> > > m_pitchProb; - vector<Vamp::RealTime> m_timestamp; -}; - -#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PYinVamp.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,511 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "PYinVamp.h" +#include "MonoNote.h" +#include "MonoPitch.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <cmath> +#include <complex> + +using std::string; +using std::vector; +using Vamp::RealTime; + + +PYinVamp::PYinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(700), + m_yin(2048, inputSampleRate, 0.0), + m_oF0Candidates(0), + m_oF0Probs(0), + m_oVoicedProb(0), + m_oCandidateSalience(0), + m_oSmoothedPitchTrack(0), + m_oNotes(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_pitchProb(0), + m_timestamp(0) +{ +} + +PYinVamp::~PYinVamp() +{ +} + +string +PYinVamp::getIdentifier() const +{ + return "pyin"; +} + +string +PYinVamp::getName() const +{ + return "pYin"; +} + +string +PYinVamp::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +PYinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +PYinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +PYinVamp::getCopyright() const +{ + return "GPL"; +} + +PYinVamp::InputDomain +PYinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +PYinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +PYinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +PYinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +PYinVamp::getMaxChannelCount() const +{ + return 1; +} + +PYinVamp::ParameterList +PYinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +PYinVamp::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +PYinVamp::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + +} + +PYinVamp::ProgramList +PYinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +PYinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +PYinVamp::selectProgram(string name) +{ +} + +PYinVamp::OutputList +PYinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0candidates"; + d.name = "F0 Candidates"; + d.description = "Estimated fundamental frequency candidates."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Candidates = outputNumber++; + + d.identifier = "f0probs"; + d.name = "Candidate Probabilities"; + d.description = "Probabilities of estimated fundamental frequency candidates."; + d.unit = ""; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Probs = outputNumber++; + + d.identifier = "voicedprob"; + d.name = "Voiced Probability"; + d.description = "Probability that the signal is voiced according to Probabilistic Yin."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oVoicedProb = outputNumber++; + + d.identifier = "candidatesalience"; + d.name = "Candidate Salience"; + d.description = "Candidate Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oCandidateSalience = outputNumber++; + + d.identifier = "smoothedpitchtrack"; + d.name = "Smoothed Pitch Track"; + d.description = "."; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + // d.minValue = 0; + // d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oSmoothedPitchTrack = outputNumber++; + + d.identifier = "notes"; + d.name = "Notes"; + d.description = "Derived fixed-pitch note frequencies"; + // d.unit = "MIDI unit"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = true; + outputs.push_back(d); + m_oNotes = outputNumber++; + + return outputs; +} + +bool +PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "PYinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +PYinVamp::reset() +{ + m_yin.setThresholdDistr(m_threshDistr); + m_yin.setFrameSize(m_blockSize); + + m_pitchProb.clear(); + m_timestamp.clear(); +/* + std::cerr << "PYinVamp::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +PYinVamp::FeatureSet +PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); + FeatureSet fs; + + float rms = 0; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) { + dInputBuffers[i] = inputBuffers[0][i]; + rms += inputBuffers[0][i] * inputBuffers[0][i]; + } + rms /= m_blockSize; + rms = sqrt(rms); + float lowAmp = 0.01; + bool isLowAmplitude = (rms < lowAmp); + + Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); + delete [] dInputBuffers; + + // First, get the things out of the way that we don't want to output + // immediately, but instead save for later. + vector<pair<double, double> > tempPitchProb; + for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) + { + double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; + if (!isLowAmplitude) + tempPitchProb.push_back(pair<double, double> + (tempPitch, yo.freqProb[iCandidate].second)); + else + tempPitchProb.push_back(pair<double, double> + (tempPitch, yo.freqProb[iCandidate].second*((rms+lowAmp)/(2*lowAmp)))); + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + // F0 CANDIDATES + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].first); + } + fs[m_oF0Candidates].push_back(f); + + // VOICEDPROB + f.values.clear(); + float voicedProb = 0; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].second); + voicedProb += yo.freqProb[i].second; + } + fs[m_oF0Probs].push_back(f); + + f.values.push_back(voicedProb); + fs[m_oVoicedProb].push_back(f); + + // SALIENCE -- maybe this should eventually disappear + f.values.clear(); + float salienceSum = 0; + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + salienceSum += yo.salience[iBin]; + } + fs[m_oCandidateSalience].push_back(f); + + return fs; +} + +PYinVamp::FeatureSet +PYinVamp::getRemainingFeatures() +{ + FeatureSet fs; + Feature f; + f.hasTimestamp = true; + f.hasDuration = false; + + if (m_pitchProb.empty()) { + return fs; + } + + // MONO-PITCH STUFF + MonoPitch mp; + vector<float> mpOut = mp.process(m_pitchProb); + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) + { + if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; + f.timestamp = m_timestamp[iFrame]; + f.values.clear(); + if (m_outputUnvoiced == 1) + { + f.values.push_back(fabs(mpOut[iFrame])); + } else { + f.values.push_back(mpOut[iFrame]); + } + + fs[m_oSmoothedPitchTrack].push_back(f); + } + + // MONO-NOTE STUFF + MonoNote mn; + std::vector<std::vector<std::pair<double, double> > > smoothedPitch; + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { + std::vector<std::pair<double, double> > temp; + if (mpOut[iFrame] > 0) + { + double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; + temp.push_back(std::pair<double,double>(tempPitch, .9)); + } + smoothedPitch.push_back(temp); + } + // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb); + vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); + + // turning feature into a note feature + f.hasTimestamp = true; + f.hasDuration = true; + f.values.clear(); + + int onsetFrame = 0; + bool isVoiced = 0; + bool oldIsVoiced = 0; + size_t nFrame = m_pitchProb.size(); + + std::vector<float> notePitchTrack; // collects pitches for one note at a time + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; + if (isVoiced && iFrame != nFrame-1) + { + if (oldIsVoiced == 0) // beginning of a note + { + onsetFrame = iFrame; + notePitchTrack.clear(); + } + float pitch = smoothedPitch[iFrame][0].first; + notePitchTrack.push_back(pitch); // add to the note's pitch track + } else { // not currently voiced + if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note + { + std::sort(notePitchTrack.begin(), notePitchTrack.end()); + float medianPitch = notePitchTrack[notePitchTrack.size()/2]; + float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; + f.values.clear(); + f.values.push_back(medianFreq); + f.timestamp = m_timestamp[onsetFrame]; + f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; + fs[m_oNotes].push_back(f); + } + } + oldIsVoiced = isVoiced; + } + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PYinVamp.h Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,79 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _PYINVAMP_H_ +#define _PYINVAMP_H_ + +#include <vamp-sdk/Plugin.h> + +#include "Yin.h" + +class PYinVamp : public Vamp::Plugin +{ +public: + PYinVamp(float inputSampleRate); + virtual ~PYinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_oF0Candidates; + mutable int m_oF0Probs; + mutable int m_oVoicedProb; + mutable int m_oCandidateSalience; + mutable int m_oSmoothedPitchTrack; + mutable int m_oNotes; + + float m_threshDistr; + float m_outputUnvoiced; + vector<vector<pair<double, double> > > m_pitchProb; + vector<Vamp::RealTime> m_timestamp; +}; + +#endif
--- a/SparseHMM.cpp Thu Mar 06 15:48:46 2014 +0000 +++ b/SparseHMM.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -30,6 +30,10 @@ SparseHMM::decodeViterbi(std::vector<vector<double> > obsProb, vector<double> *scale) { + if (obsProb.size() < 1) { + return vector<int>(); + } + size_t nState = init.size(); size_t nFrame = obsProb.size();
--- a/VampYin.cpp Thu Mar 06 15:48:46 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,379 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "VampYin.h" -#include "MonoNote.h" - -#include "vamp-sdk/FFT.h" - -#include <vector> -#include <algorithm> - -#include <cstdio> -#include <cmath> -#include <complex> - -using std::string; -using std::vector; -using Vamp::RealTime; - - -VampYin::VampYin(float inputSampleRate) : - Plugin(inputSampleRate), - m_channels(0), - m_stepSize(256), - m_blockSize(2048), - m_fmin(40), - m_fmax(1000), - m_yin(2048, inputSampleRate, 0.0), - m_outNoF0(0), - m_outNoPeriodicity(0), - m_outNoRms(0), - m_outNoSalience(0), - m_yinParameter(0.15f), - m_outputUnvoiced(2.0f) -{ -} - -VampYin::~VampYin() -{ -} - -string -VampYin::getIdentifier() const -{ - return "yin"; -} - -string -VampYin::getName() const -{ - return "Yin"; -} - -string -VampYin::getDescription() const -{ - return "A vamp implementation of the Yin algorithm for monophonic frequency estimation."; -} - -string -VampYin::getMaker() const -{ - return "Matthias Mauch"; -} - -int -VampYin::getPluginVersion() const -{ - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -VampYin::getCopyright() const -{ - return "GPL"; -} - -VampYin::InputDomain -VampYin::getInputDomain() const -{ - return TimeDomain; -} - -size_t -VampYin::getPreferredBlockSize() const -{ - return 2048; -} - -size_t -VampYin::getPreferredStepSize() const -{ - return 256; -} - -size_t -VampYin::getMinChannelCount() const -{ - return 1; -} - -size_t -VampYin::getMaxChannelCount() const -{ - return 1; -} - -VampYin::ParameterList -VampYin::getParameterDescriptors() const -{ - ParameterList list; - - ParameterDescriptor d; - d.identifier = "yinThreshold"; - d.name = "Yin threshold"; - d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached."; - d.unit = ""; - d.minValue = 0.025f; - d.maxValue = 1.0f; - d.defaultValue = 0.15f; - d.isQuantized = true; - d.quantizeStep = 0.025f; - - list.push_back(d); - - // d.identifier = "removeunvoiced"; - // d.name = "Remove pitches classified as unvoiced."; - // d.description = "If ticked, then the pitch estimator will return the most likely pitch, even if it 'thinks' there isn't any."; - // d.unit = ""; - // d.minValue = 0.0f; - // d.maxValue = 1.0f; - // d.defaultValue = 0.0f; - // d.isQuantized = true; - // d.quantizeStep = 1.0f; - // d.valueNames.clear(); - // list.push_back(d); - - d.identifier = "outputunvoiced"; - d.valueNames.clear(); - d.name = "Output estimates classified as unvoiced?"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 2.0f; - d.defaultValue = 2.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("No"); - d.valueNames.push_back("Yes"); - d.valueNames.push_back("Yes, as negative frequencies"); - list.push_back(d); - - return list; -} - -float -VampYin::getParameter(string identifier) const -{ - if (identifier == "yinThreshold") { - return m_yinParameter; - } - if (identifier == "outputunvoiced") { - return m_outputUnvoiced; - } - return 0.f; -} - -void -VampYin::setParameter(string identifier, float value) -{ - if (identifier == "yinThreshold") - { - m_yinParameter = value; - } - if (identifier == "outputunvoiced") - { - m_outputUnvoiced = value; - } -} - -VampYin::ProgramList -VampYin::getPrograms() const -{ - ProgramList list; - return list; -} - -string -VampYin::getCurrentProgram() const -{ - return ""; // no programs -} - -void -VampYin::selectProgram(string name) -{ -} - -VampYin::OutputList -VampYin::getOutputDescriptors() const -{ - OutputList outputs; - - OutputDescriptor d; - - int outputNumber = 0; - - d.identifier = "f0"; - d.name = "Estimated f0"; - d.description = "Estimated fundamental frequency"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = 500; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoF0 = outputNumber++; - - d.identifier = "periodicity"; - d.name = "Periodicity"; - d.description = "by-product of Yin f0 estimation"; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoPeriodicity = outputNumber++; - - d.identifier = "rms"; - d.name = "Root mean square"; - d.description = "Root mean square of the waveform."; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoRms = outputNumber++; - - d.identifier = "salience"; - d.name = "Salience"; - d.description = "Yin Salience"; - d.hasFixedBinCount = true; - d.binCount = m_blockSize / 2; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoSalience = outputNumber++; - - return outputs; -} - -bool -VampYin::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - -/* - std::cerr << "VampYin::initialise: channels = " << channels - << ", stepSize = " << stepSize << ", blockSize = " << blockSize - << std::endl; -*/ - m_channels = channels; - m_stepSize = stepSize; - m_blockSize = blockSize; - - reset(); - - return true; -} - -void -VampYin::reset() -{ - m_yin.setThreshold(m_yinParameter); - m_yin.setFrameSize(m_blockSize); -/* - std::cerr << "VampYin::reset: yin threshold set to " << (m_yinParameter) - << ", blockSize = " << m_blockSize - << std::endl; -*/ -} - -VampYin::FeatureSet -VampYin::process(const float *const *inputBuffers, RealTime timestamp) -{ - timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); - FeatureSet fs; - - double *dInputBuffers = new double[m_blockSize]; - for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; - - Yin::YinOutput yo = m_yin.process(dInputBuffers); - // std::cerr << "f0 in VampYin: " << yo.f0 << std::endl; - Feature f; - f.hasTimestamp = true; - f.timestamp = timestamp; - if (m_outputUnvoiced == 0.0f) - { - // std::cerr << "f0 in VampYin: " << yo.f0 << std::endl; - if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) { - f.values.push_back(yo.f0); - fs[m_outNoF0].push_back(f); - } - } else if (m_outputUnvoiced == 1.0f) - { - if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { - f.values.push_back(fabs(yo.f0)); - fs[m_outNoF0].push_back(f); - } - } else - { - if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { - f.values.push_back(yo.f0); - fs[m_outNoF0].push_back(f); - } - } - - f.values.clear(); - f.values.push_back(yo.rms); - fs[m_outNoRms].push_back(f); - - f.values.clear(); - for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) - { - f.values.push_back(yo.salience[iBin]); - } - fs[m_outNoSalience].push_back(f); - - f.values.clear(); - // f.values[0] = yo.periodicity; - f.values.push_back(yo.periodicity); - fs[m_outNoPeriodicity].push_back(f); - - delete [] dInputBuffers; - - return fs; -} - -VampYin::FeatureSet -VampYin::getRemainingFeatures() -{ - FeatureSet fs; - return fs; -}
--- a/VampYin.h Thu Mar 06 15:48:46 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _VAMPYIN_H_ -#define _VAMPYIN_H_ - -#include <vamp-sdk/Plugin.h> - -#include "Yin.h" - -class VampYin : public Vamp::Plugin -{ -public: - VampYin(float inputSampleRate); - virtual ~VampYin(); - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - InputDomain getInputDomain() const; - size_t getPreferredBlockSize() const; - size_t getPreferredStepSize() const; - size_t getMinChannelCount() const; - size_t getMaxChannelCount() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string identifier) const; - void setParameter(std::string identifier, float value); - - ProgramList getPrograms() const; - std::string getCurrentProgram() const; - void selectProgram(std::string name); - - OutputList getOutputDescriptors() const; - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - size_t m_channels; - size_t m_stepSize; - size_t m_blockSize; - float m_fmin; - float m_fmax; - Yin m_yin; - - mutable int m_outNoF0; - mutable int m_outNoPeriodicity; - mutable int m_outNoRms; - mutable int m_outNoSalience; - - float m_yinParameter; - float m_outputUnvoiced; -}; - -#endif
--- a/Yin.cpp Thu Mar 06 15:48:46 2014 +0000 +++ b/Yin.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -57,7 +57,7 @@ double aperiodicity; double f0; - if (tau!=0) + if (tau!=0 && tau!=m_yinBufferSize-1) { interpolatedTau = YinUtil::parabolicInterpolation(yinBuffer, abs(tau), m_yinBufferSize); f0 = m_inputSampleRate * (1.0 / interpolatedTau); @@ -82,7 +82,7 @@ Yin::YinOutput Yin::processProbabilisticYin(const double *in) const { - + double* yinBuffer = new double[m_yinBufferSize]; // calculate aperiodicity function for all periods @@ -90,18 +90,11 @@ YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize); vector<double> peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, m_yinBufferSize); - - // calculate overall "probability" from peak probability - double probSum = 0; - for (size_t iBin = 0; iBin < m_yinBufferSize; ++iBin) + + // basic yin output + Yin::YinOutput yo(0,0,0); + for (size_t iBuf = 1; iBuf < m_yinBufferSize-1; ++iBuf) { - probSum += peakProbability[iBin]; - } - - Yin::YinOutput yo(0,0,0); - for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) - { - yo.salience.push_back(peakProbability[iBuf]); if (peakProbability[iBuf] > 0) { double currentF0 = @@ -111,6 +104,11 @@ } } + // add salience + for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) { + yo.salience.push_back(peakProbability[iBuf]); + } + // std::cerr << yo.freqProb.size() << std::endl; delete [] yinBuffer; @@ -146,3 +144,38 @@ // m_removeUnvoiced = parameter; // return 0; // } + +float +Yin::constrainedMinPick(const double *in, const float minFreq, const int maxFreq) const { + + double* yinBuffer = new double[m_yinBufferSize]; + + // calculate aperiodicity function for all periods + YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize); + + int minPeriod = m_inputSampleRate / maxFreq; + int maxPeriod = m_inputSampleRate / minFreq; + + if (minPeriod < 0 || maxPeriod > m_yinBufferSize || minPeriod > maxPeriod) { + delete [] yinBuffer; + return 0.f; + } + + float bestVal = 1000; + int bestTau = 0; + for (int tau = minPeriod; tau <= maxPeriod; ++tau) + { + if (yinBuffer[tau] < bestVal) + { + bestVal = yinBuffer[tau]; + bestTau = tau; + } + } + + float interpolatedTau = + YinUtil::parabolicInterpolation(yinBuffer, bestTau, m_yinBufferSize); + + delete [] yinBuffer; + return m_inputSampleRate * (1.0 / interpolatedTau); +} \ No newline at end of file
--- a/Yin.h Thu Mar 06 15:48:46 2014 +0000 +++ b/Yin.h Thu Mar 06 15:49:36 2014 +0000 @@ -56,6 +56,7 @@ // int setRemoveUnvoiced(bool frameSize); YinOutput process(const double *in) const; YinOutput processProbabilisticYin(const double *in) const; + float constrainedMinPick(const double *in, const float minFreq, const int maxFreq) const; private: mutable size_t m_frameSize;
--- a/YinUtil.cpp Thu Mar 06 15:48:46 2014 +0000 +++ b/YinUtil.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -164,10 +164,16 @@ return 0; } +std::vector<double> +YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0) +{ + size_t minTau = 2; + size_t maxTau = yinBufferSize; -std::vector<double> -YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize) -{ + // adapt period range, if necessary + if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0; + if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0; + double minWeight = 0.01; size_t tau; std::vector<float> thresholds; @@ -220,28 +226,46 @@ thresholds.push_back(0.01 + i*0.01); } - // double minYin = 2936; - // for (size_t i = 2; i < yinBufferSize; ++i) - // { - // if (yinBuffer[i] < minYin) - // { - // minYin = yinBuffer[i]; - // } - // } - // if (minYin < 0.01) std::cerr << "min Yin buffer element: " << minYin << std::endl; - int currThreshInd = nThreshold-1; - tau = 2; + tau = minTau; // double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight size_t minInd = 0; float minVal = 42.f; - while (currThreshInd != -1 && tau < yinBufferSize) + // while (currThreshInd != -1 && tau < maxTau) + // { + // if (yinBuffer[tau] < thresholds[currThreshInd]) + // { + // while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) + // { + // tau++; + // } + // // tau is now local minimum + // // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + // if (yinBuffer[tau] < minVal && tau > 2){ + // minVal = yinBuffer[tau]; + // minInd = tau; + // } + // peakProb[tau] += distribution[currThreshInd]; + // currThreshInd--; + // } else { + // tau++; + // } + // } + // double nonPeakProb = 1; + // for (size_t i = minTau; i < maxTau; ++i) + // { + // nonPeakProb -= peakProb[i]; + // } + // + // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + float sumProb = 0; + while (tau < maxTau) { - if (yinBuffer[tau] < thresholds[currThreshInd]) + if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau]) { - while (tau + 1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau]) + while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) { tau++; } @@ -251,16 +275,27 @@ minVal = yinBuffer[tau]; minInd = tau; } - peakProb[tau] += distribution[currThreshInd]; - currThreshInd--; + currThreshInd = nThresholdInt-1; + // while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) { + // // std::cerr << distribution[currThreshInd] << std::endl; + // peakProb[tau] += distribution[currThreshInd]; + // currThreshInd--; + // } + peakProb[tau] = 1 - yinBuffer[tau]; + sumProb += peakProb[tau]; + tau++; } else { tau++; } } + double nonPeakProb = 1; - for (size_t i = 0; i < yinBufferSize; ++i) - { - nonPeakProb -= peakProb[i]; + if (sumProb > 0) { + for (size_t i = minTau; i < maxTau; ++i) + { + peakProb[i] = peakProb[i] / sumProb * peakProb[minInd]; + nonPeakProb -= peakProb[i]; + } } // std::cerr << nonPeakProb << std::endl; if (minInd > 0) @@ -282,55 +317,61 @@ } double betterTau = 0.0; - size_t x0; - size_t x2; + // size_t x0; + // size_t x2; - if (tau < 1) - { - x0 = tau; - } else { - x0 = tau - 1; - } - - if (tau + 1 < yinBufferSize) - { - x2 = tau + 1; - } else { - x2 = tau; - } - - if (x0 == tau) - { - if (yinBuffer[tau] <= yinBuffer[x2]) - { - betterTau = tau; - } else { - betterTau = x2; - } - } - else if (x2 == tau) - { - if (yinBuffer[tau] <= yinBuffer[x0]) - { - betterTau = tau; - } - else - { - betterTau = x0; - } - } - else - { + // if (tau < 1) + // { + // x0 = tau; + // } else { + // x0 = tau - 1; + // } + // + // if (tau + 1 < yinBufferSize) + // { + // x2 = tau + 1; + // } else { + // x2 = tau; + // } + // + // if (x0 == tau) + // { + // if (yinBuffer[tau] <= yinBuffer[x2]) + // { + // betterTau = tau; + // } else { + // betterTau = x2; + // } + // } + // else if (x2 == tau) + // { + // if (yinBuffer[tau] <= yinBuffer[x0]) + // { + // betterTau = tau; + // } + // else + // { + // betterTau = x0; + // } + // } + // else + // { + if (tau > 0 && tau < yinBufferSize-1) { float s0, s1, s2; - s0 = yinBuffer[x0]; + s0 = yinBuffer[tau-1]; s1 = yinBuffer[tau]; - s2 = yinBuffer[x2]; + s2 = yinBuffer[tau+1]; // fixed AUBIO implementation, thanks to Karl Helgason: // (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1 - betterTau = tau + (s2 - s0) / (2 * (2 * s1 - s2 - s0)); - // std::cerr << tau << " --> " << betterTau << std::endl; + double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0)); + if (abs(adjustment)>1) adjustment = 0; + + betterTau = tau + adjustment; + } else { + std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n"; + betterTau = tau; } return betterTau; }
--- a/YinUtil.h Thu Mar 06 15:48:46 2014 +0000 +++ b/YinUtil.h Thu Mar 06 15:49:36 2014 +0000 @@ -33,7 +33,7 @@ static void fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize); static void cumulativeDifference(double *yinBuffer, const size_t yinBufferSize); static int absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh); - static vector<double> yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize); + static vector<double> yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, size_t minTau = 0, size_t maxTau = 0); static double parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize); };
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YinVamp.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,367 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "YinVamp.h" +#include "MonoNote.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <cmath> +#include <complex> + +using std::string; +using std::vector; +using Vamp::RealTime; + + +YinVamp::YinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(1000), + m_yin(2048, inputSampleRate, 0.0), + m_outNoF0(0), + m_outNoPeriodicity(0), + m_outNoRms(0), + m_outNoSalience(0), + m_yinParameter(0.15f), + m_outputUnvoiced(2.0f) +{ +} + +YinVamp::~YinVamp() +{ +} + +string +YinVamp::getIdentifier() const +{ + return "yin"; +} + +string +YinVamp::getName() const +{ + return "Yin"; +} + +string +YinVamp::getDescription() const +{ + return "A vamp implementation of the Yin algorithm for monophonic frequency estimation."; +} + +string +YinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +YinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +YinVamp::getCopyright() const +{ + return "GPL"; +} + +YinVamp::InputDomain +YinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +YinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +YinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +YinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +YinVamp::getMaxChannelCount() const +{ + return 1; +} + +YinVamp::ParameterList +YinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + d.identifier = "yinThreshold"; + d.name = "Yin threshold"; + d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached."; + d.unit = ""; + d.minValue = 0.025f; + d.maxValue = 1.0f; + d.defaultValue = 0.15f; + d.isQuantized = true; + d.quantizeStep = 0.025f; + + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +YinVamp::getParameter(string identifier) const +{ + if (identifier == "yinThreshold") { + return m_yinParameter; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +YinVamp::setParameter(string identifier, float value) +{ + if (identifier == "yinThreshold") + { + m_yinParameter = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } +} + +YinVamp::ProgramList +YinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +YinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +YinVamp::selectProgram(string name) +{ +} + +YinVamp::OutputList +YinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0"; + d.name = "Estimated f0"; + d.description = "Estimated fundamental frequency"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoF0 = outputNumber++; + + d.identifier = "periodicity"; + d.name = "Periodicity"; + d.description = "by-product of Yin f0 estimation"; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoPeriodicity = outputNumber++; + + d.identifier = "rms"; + d.name = "Root mean square"; + d.description = "Root mean square of the waveform."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoRms = outputNumber++; + + d.identifier = "salience"; + d.name = "Salience"; + d.description = "Yin Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoSalience = outputNumber++; + + return outputs; +} + +bool +YinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "YinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +YinVamp::reset() +{ + m_yin.setThreshold(m_yinParameter); + m_yin.setFrameSize(m_blockSize); +/* + std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter) + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +YinVamp::FeatureSet +YinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); + FeatureSet fs; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + Yin::YinOutput yo = m_yin.process(dInputBuffers); + // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + if (m_outputUnvoiced == 0.0f) + { + // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; + if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) { + f.values.push_back(yo.f0); + fs[m_outNoF0].push_back(f); + } + } else if (m_outputUnvoiced == 1.0f) + { + if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { + f.values.push_back(fabs(yo.f0)); + fs[m_outNoF0].push_back(f); + } + } else + { + if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { + f.values.push_back(yo.f0); + fs[m_outNoF0].push_back(f); + } + } + + f.values.clear(); + f.values.push_back(yo.rms); + fs[m_outNoRms].push_back(f); + + f.values.clear(); + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + } + fs[m_outNoSalience].push_back(f); + + f.values.clear(); + // f.values[0] = yo.periodicity; + f.values.push_back(yo.periodicity); + fs[m_outNoPeriodicity].push_back(f); + + delete [] dInputBuffers; + + return fs; +} + +YinVamp::FeatureSet +YinVamp::getRemainingFeatures() +{ + FeatureSet fs; + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YinVamp.h Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _YINVAMP_H_ +#define _YINVAMP_H_ + +#include <vamp-sdk/Plugin.h> + +#include "Yin.h" + +class YinVamp : public Vamp::Plugin +{ +public: + YinVamp(float inputSampleRate); + virtual ~YinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_outNoF0; + mutable int m_outNoPeriodicity; + mutable int m_outNoRms; + mutable int m_outNoSalience; + + float m_yinParameter; + float m_outputUnvoiced; +}; + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YinVampFreqConstrained.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,269 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "YinVampFreqConstrained.h" +#include "MonoNote.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <cmath> +#include <complex> + +using std::string; +using std::vector; +using Vamp::RealTime; + + +YinVampFreqConstrained::YinVampFreqConstrained(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(1000), + m_yin(2048, inputSampleRate, 0.0), + m_yinFmin(100.f), + m_yinFmax(400.f) +{ +} + +YinVampFreqConstrained::~YinVampFreqConstrained() +{ +} + +string +YinVampFreqConstrained::getIdentifier() const +{ + return "yinfc"; +} + +string +YinVampFreqConstrained::getName() const +{ + return "Frequency-constrained Yin"; +} + +string +YinVampFreqConstrained::getDescription() const +{ + return "A vamp implementation of the Yin algorithm for monophonic frequency estimation with frequency constraints."; +} + +string +YinVampFreqConstrained::getMaker() const +{ + return "Matthias Mauch"; +} + +int +YinVampFreqConstrained::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +YinVampFreqConstrained::getCopyright() const +{ + return "GPL"; +} + +YinVampFreqConstrained::InputDomain +YinVampFreqConstrained::getInputDomain() const +{ + return TimeDomain; +} + +size_t +YinVampFreqConstrained::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +YinVampFreqConstrained::getPreferredStepSize() const +{ + return 256; +} + +size_t +YinVampFreqConstrained::getMinChannelCount() const +{ + return 1; +} + +size_t +YinVampFreqConstrained::getMaxChannelCount() const +{ + return 1; +} + +YinVampFreqConstrained::ParameterList +YinVampFreqConstrained::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "minfreq"; + d.name = "Minimum frequency"; + d.description = "Minimum frequency used when searching for difference function minimum."; + d.unit = "Hz"; + d.minValue = 40.f; + d.maxValue = 1000.0f; + d.defaultValue = 100.f; + d.isQuantized = false; + d.valueNames.clear(); + list.push_back(d); + + d.identifier = "maxfreq"; + d.name = "Maximum frequency"; + d.description = "Maximum frequency used when searching for difference function minimum."; + d.unit = "Hz"; + d.minValue = 40.f; + d.maxValue = 1000.0f; + d.defaultValue = 400.f; + d.isQuantized = false; + d.valueNames.clear(); + list.push_back(d); + + return list; +} + +float +YinVampFreqConstrained::getParameter(string identifier) const +{ + if (identifier == "minfreq") { + return m_yinFmin; + } + if (identifier == "maxfreq") { + return m_yinFmax; + } + return 0.f; +} + +void +YinVampFreqConstrained::setParameter(string identifier, float value) +{ + if (identifier == "minfreq") + { + m_yinFmin = value; + } + if (identifier == "maxfreq") + { + m_yinFmax = value; + } +} + +YinVampFreqConstrained::ProgramList +YinVampFreqConstrained::getPrograms() const +{ + ProgramList list; + return list; +} + +string +YinVampFreqConstrained::getCurrentProgram() const +{ + return ""; // no programs +} + +void +YinVampFreqConstrained::selectProgram(string name) +{ +} + +YinVampFreqConstrained::OutputList +YinVampFreqConstrained::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0"; + d.name = "Estimated f0"; + d.description = "Estimated fundamental frequency"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + + return outputs; +} + +bool +YinVampFreqConstrained::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "YinVampFreqConstrained::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +YinVampFreqConstrained::reset() +{ + m_yin.setFrameSize(m_blockSize); +} + +YinVampFreqConstrained::FeatureSet +YinVampFreqConstrained::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); + FeatureSet fs; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + // std::cerr << "f0 in YinVampFreqConstrained: " << yo.f0 << std::endl; + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + f.values.push_back(m_yin.constrainedMinPick(dInputBuffers, m_yinFmin, m_yinFmax)); + fs[0].push_back(f); + + delete [] dInputBuffers; + return fs; +} + +YinVampFreqConstrained::FeatureSet +YinVampFreqConstrained::getRemainingFeatures() +{ + FeatureSet fs; + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YinVampFreqConstrained.h Thu Mar 06 15:49:36 2014 +0000 @@ -0,0 +1,70 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _YINVAMPFREQCONSTRAINED_H_ +#define _YINVAMPFREQCONSTRAINED_H_ + +#include <vamp-sdk/Plugin.h> + +#include "Yin.h" + +class YinVampFreqConstrained : public Vamp::Plugin +{ +public: + YinVampFreqConstrained(float inputSampleRate); + virtual ~YinVampFreqConstrained(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + float m_yinFmin; + float m_yinFmax; +}; + +#endif
--- a/libmain.cpp Thu Mar 06 15:48:46 2014 +0000 +++ b/libmain.cpp Thu Mar 06 15:49:36 2014 +0000 @@ -14,11 +14,15 @@ #include <vamp/vamp.h> #include <vamp-sdk/PluginAdapter.h> -#include "PYIN.h" -#include "VampYin.h" +#include "PYinVamp.h" +#include "YinVamp.h" +#include "LocalCandidatePYIN.h" +#include "YinVampFreqConstrained.h" -static Vamp::PluginAdapter<PYIN> pyinPluginAdapter; -static Vamp::PluginAdapter<VampYin> vampyinPluginAdapter; +static Vamp::PluginAdapter<PYinVamp> pyinvampPluginAdapter; +static Vamp::PluginAdapter<YinVamp> yinvampPluginAdapter; +static Vamp::PluginAdapter<LocalCandidatePYIN> localCandidatePYINPluginAdapter; +static Vamp::PluginAdapter<YinVampFreqConstrained> yinVampFreqConstrainedPluginAdapter; const VampPluginDescriptor * vampGetPluginDescriptor(unsigned int version, unsigned int index) @@ -26,8 +30,10 @@ if (version < 1) return 0; switch (index) { - case 0: return pyinPluginAdapter.getDescriptor(); - case 1: return vampyinPluginAdapter.getDescriptor(); + case 0: return pyinvampPluginAdapter.getDescriptor(); + case 1: return yinvampPluginAdapter.getDescriptor(); + case 2: return localCandidatePYINPluginAdapter.getDescriptor(); + case 3: return yinVampFreqConstrainedPluginAdapter.getDescriptor(); default: return 0; } }