# HG changeset patch # User Matthias Mauch # Date 1427811007 -3600 # Node ID 9d6595563c11c0afb4872c099ed85b7279da7980 # Parent effea38d5a6dd566f34e721a316e3b422348725d# Parent 1629209f5bf21fc926bb6abd5ab786e78c5d7677 Merge from branch "tony" diff -r effea38d5a6d -r 9d6595563c11 .hgignore --- a/.hgignore Mon Jan 26 17:47:29 2015 +0000 +++ b/.hgignore Tue Mar 31 15:10:07 2015 +0100 @@ -1,5 +1,6 @@ syntax: glob +*.o +*.so *~ -*.o *.dll -*.so + diff -r effea38d5a6d -r 9d6595563c11 .hgtags --- a/.hgtags Mon Jan 26 17:47:29 2015 +0000 +++ b/.hgtags Tue Mar 31 15:10:07 2015 +0100 @@ -1,2 +1,3 @@ b3acb3a6de12023a8f9bb00dc88fda7e5853cdb7 v1.0 -585fdda4d7f92c249b2f15b1ec21d50d9b4644aa 20150126-default +e291f3657872db892f6ee525b36e98472a5ccd26 tony_v0.5 +062f0e49187789937b6162d2904b1af20872ed5e tony_v0.6 diff -r effea38d5a6d -r 9d6595563c11 LocalCandidatePYIN.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalCandidatePYIN.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,499 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#include "LocalCandidatePYIN.h" +#include "MonoPitch.h" +#include "YinUtil.h" + +#include "vamp-sdk/FFT.h" + +#include +#include + +#include +#include +// #include +#include +#include +#include + +#include + +using std::string; +using std::vector; +using std::map; +using Vamp::RealTime; + + +LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(700), + m_oPitchTrackCandidates(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_preciseTime(0.0f), + m_pitchProb(0), + m_timestamp(0), + m_nCandidate(13) +{ +} + +LocalCandidatePYIN::~LocalCandidatePYIN() +{ +} + +string +LocalCandidatePYIN::getIdentifier() const +{ + return "localcandidatepyin"; +} + +string +LocalCandidatePYIN::getName() const +{ + return "Local Candidate PYIN"; +} + +string +LocalCandidatePYIN::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +LocalCandidatePYIN::getMaker() const +{ + return "Matthias Mauch"; +} + +int +LocalCandidatePYIN::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +LocalCandidatePYIN::getCopyright() const +{ + return "GPL"; +} + +LocalCandidatePYIN::InputDomain +LocalCandidatePYIN::getInputDomain() const +{ + return TimeDomain; +} + +size_t +LocalCandidatePYIN::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +LocalCandidatePYIN::getPreferredStepSize() const +{ + return 256; +} + +size_t +LocalCandidatePYIN::getMinChannelCount() const +{ + return 1; +} + +size_t +LocalCandidatePYIN::getMaxChannelCount() const +{ + return 1; +} + +LocalCandidatePYIN::ParameterList +LocalCandidatePYIN::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + d.identifier = "precisetime"; + d.valueNames.clear(); + d.name = "Use non-standard precise YIN timing (slow)."; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + list.push_back(d); + + return list; +} + +float +LocalCandidatePYIN::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + if (identifier == "precisetime") { + return m_preciseTime; + } + return 0.f; +} + +void +LocalCandidatePYIN::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + if (identifier == "precisetime") + { + m_preciseTime = value; + } +} + +LocalCandidatePYIN::ProgramList +LocalCandidatePYIN::getPrograms() const +{ + ProgramList list; + return list; +} + +string +LocalCandidatePYIN::getCurrentProgram() const +{ + return ""; // no programs +} + +void +LocalCandidatePYIN::selectProgram(string name) +{ +} + +LocalCandidatePYIN::OutputList +LocalCandidatePYIN::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + d.identifier = "pitchtrackcandidates"; + d.name = "Pitch track candidates"; + d.description = "Multiple candidate pitch tracks."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; //!!!??? + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + + return outputs; +} + +bool +LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +LocalCandidatePYIN::reset() +{ + m_pitchProb.clear(); + m_timestamp.clear(); +/* + std::cerr << "LocalCandidatePYIN::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp) +{ + int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; + timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate)); + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + size_t yinBufferSize = m_blockSize/2; + double* yinBuffer = new double[yinBufferSize]; + if (!m_preciseTime) YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize); + else YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize); + + delete [] dInputBuffers; + + YinUtil::cumulativeDifference(yinBuffer, yinBufferSize); + + float minFrequency = 60; + float maxFrequency = 900; + vector peakProbability = YinUtil::yinProb(yinBuffer, + m_threshDistr, + yinBufferSize, + m_inputSampleRate/maxFrequency, + m_inputSampleRate/minFrequency); + + vector > tempPitchProb; + for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf) + { + if (peakProbability[iBuf] > 0) + { + double currentF0 = + m_inputSampleRate * (1.0 / + YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize)); + double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69; + tempPitchProb.push_back(pair(tempPitch, peakProbability[iBuf])); + } + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + delete[] yinBuffer; + + return FeatureSet(); +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::getRemainingFeatures() +{ + // timestamp -> candidate number -> value + map > featureValues; + + // std::cerr << "in remaining features" << std::endl; + + if (m_pitchProb.empty()) { + return FeatureSet(); + } + + // MONO-PITCH STUFF + MonoPitch mp; + size_t nFrame = m_timestamp.size(); + vector > pitchTracks; + vector freqSum = vector(m_nCandidate); + vector freqNumber = vector(m_nCandidate); + vector freqMean = vector(m_nCandidate); + + boost::math::normal normalDist(0, 8); // semitones sd + float maxNormalDist = boost::math::pdf(normalDist, 0); + + // Viterbi-decode multiple times with different frequencies emphasised + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + pitchTracks.push_back(vector(nFrame)); + vector > > tempPitchProb; + float centrePitch = 45 + 3 * iCandidate; + + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { + tempPitchProb.push_back(vector >()); + float sumProb = 0; + float pitch = 0; + float prob = 0; + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) + { + pitch = m_pitchProb[iFrame][iProb].first; + prob = m_pitchProb[iFrame][iProb].second * + boost::math::pdf(normalDist, pitch-centrePitch) / + maxNormalDist * 2; + sumProb += prob; + tempPitchProb[iFrame].push_back( + pair(pitch,prob)); + } + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) + { + tempPitchProb[iFrame][iProb].second /= sumProb; + } + } + + vector mpOut = mp.process(tempPitchProb); + float prevFreq = 0; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (mpOut[iFrame] > 0) { + + pitchTracks[iCandidate][iFrame] = mpOut[iFrame]; + freqSum[iCandidate] += mpOut[iFrame]; + freqNumber[iCandidate]++; + prevFreq = mpOut[iFrame]; + + } + } + freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate]; + } + + // find near duplicate pitch tracks + vector duplicates; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { + for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) { + size_t countEqual = 0; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) || + fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01) + countEqual++; + } + // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl; + if (countEqual * 1.0 / nFrame > 0.8) { + if (freqNumber[iCandidate] > freqNumber[jCandidate]) { + duplicates.push_back(jCandidate); + } else if (iCandidate < jCandidate) { + duplicates.push_back(iCandidate); + } + } + } + } + + // now find non-duplicate pitch tracks + map candidateActuals; + map candidateLabels; + + vector > outputFrequencies; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector()); + + int actualCandidateNumber = 0; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + bool isDuplicate = false; + for (size_t i = 0; i < duplicates.size(); ++i) { + + if (duplicates[i] == iCandidate) { + isDuplicate = true; + break; + } + } + if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame) + { + std::ostringstream convert; + convert << actualCandidateNumber++; + candidateLabels[iCandidate] = convert.str(); + candidateActuals[iCandidate] = actualCandidateNumber; + // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (pitchTracks[iCandidate][iFrame] > 0) + { + // featureValues[m_timestamp[iFrame]][iCandidate] = + // pitchTracks[iCandidate][iFrame]; + outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]); + } else { + outputFrequencies[iFrame].push_back(0); + } + } + } + // fs[m_oPitchTrackCandidates].push_back(f); + } + + // adapt our features so as to return a stack of candidate values + // per frame + + FeatureSet fs; + + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){ + Feature f; + f.hasTimestamp = true; + f.timestamp = m_timestamp[iFrame]; + f.values = outputFrequencies[iFrame]; + fs[0].push_back(f); + } + + // I stopped using Chris's map stuff below because I couldn't get my head around it + // + // for (map >::const_iterator i = + // featureValues.begin(); i != featureValues.end(); ++i) { + // Feature f; + // f.hasTimestamp = true; + // f.timestamp = i->first; + // int nextCandidate = candidateActuals.begin()->second; + // for (map::const_iterator j = + // i->second.begin(); j != i->second.end(); ++j) { + // while (candidateActuals[j->first] > nextCandidate) { + // f.values.push_back(0); + // ++nextCandidate; + // } + // f.values.push_back(j->second); + // nextCandidate = j->first + 1; + // } + // //!!! can't use labels? + // fs[0].push_back(f); + // } + + return fs; +} diff -r effea38d5a6d -r 9d6595563c11 LocalCandidatePYIN.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalCandidatePYIN.h Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#ifndef _LOCALCANDIDATEPYIN_H_ +#define _LOCALCANDIDATEPYIN_H_ + +#include + +#include "Yin.h" + +class LocalCandidatePYIN : public Vamp::Plugin +{ +public: + LocalCandidatePYIN(float inputSampleRate); + virtual ~LocalCandidatePYIN(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + + mutable int m_oPitchTrackCandidates; + + float m_threshDistr; + float m_outputUnvoiced; + float m_preciseTime; + vector > > m_pitchProb; + vector m_timestamp; + size_t m_nCandidate; +}; + +#endif diff -r effea38d5a6d -r 9d6595563c11 Makefile.inc --- a/Makefile.inc Mon Jan 26 17:47:29 2015 +0000 +++ b/Makefile.inc Tue Mar 31 15:10:07 2015 +0100 @@ -9,8 +9,9 @@ PLUGIN := pyin$(PLUGIN_EXT) -SOURCES := PYIN.cpp \ - VampYin.cpp \ +SOURCES := PYinVamp.cpp \ + YinVamp.cpp \ + LocalCandidatePYIN.cpp \ Yin.cpp \ YinUtil.cpp \ MonoNote.cpp \ @@ -63,16 +64,36 @@ # DO NOT DELETE -PYIN.o: PYIN.h -VampYin.o: VampYin.h -Yin.o: Yin.h +libmain.o: PYinVamp.h Yin.h MeanFilter.h YinVamp.h LocalCandidatePYIN.h +LocalCandidatePYIN.o: LocalCandidatePYIN.h Yin.h MeanFilter.h MonoPitch.h +LocalCandidatePYIN.o: MonoPitchHMM.h SparseHMM.h YinUtil.h +MonoNote.o: MonoNote.h MonoNoteHMM.h MonoNoteParameters.h SparseHMM.h +MonoNoteHMM.o: MonoNoteHMM.h MonoNoteParameters.h SparseHMM.h MonoNoteParameters.o: MonoNoteParameters.h -MonoNote.o: MonoNote.h -MonoPitch.o: MonoPitch.h -MonoPitchHMM.o: MonoPitchHMM.h +MonoPitch.o: MonoPitch.h MonoPitchHMM.h SparseHMM.h +MonoPitchHMM.o: MonoPitchHMM.h SparseHMM.h +PYinVamp.o: PYinVamp.h Yin.h MeanFilter.h MonoNote.h MonoNoteHMM.h +PYinVamp.o: MonoNoteParameters.h SparseHMM.h MonoPitch.h MonoPitchHMM.h SparseHMM.o: SparseHMM.h -MonoNoteHMM.o: MonoNoteHMM.h -libmain.o: PYIN.h VampYin.h +Yin.o: Yin.h MeanFilter.h YinUtil.h +YinUtil.o: YinUtil.h MeanFilter.h +YinVamp.o: YinVamp.h Yin.h MeanFilter.h MonoNote.h MonoNoteHMM.h +YinVamp.o: MonoNoteParameters.h SparseHMM.h +YinVampFreqConstrained.o: YinVampFreqConstrained.h Yin.h MeanFilter.h +YinVampFreqConstrained.o: MonoNote.h MonoNoteHMM.h MonoNoteParameters.h +YinVampFreqConstrained.o: SparseHMM.h test/TestMeanFilter.o: MeanFilter.h -test/TestYin.o: Yin.h - +test/TestMonoNote.o: MonoNote.h MonoNoteHMM.h MonoNoteParameters.h +test/TestMonoNote.o: SparseHMM.h +test/TestYin.o: Yin.h MeanFilter.h +test/TestYinUtil.o: YinUtil.h MeanFilter.h +LocalCandidatePYIN.o: Yin.h MeanFilter.h +MonoNote.o: MonoNoteHMM.h MonoNoteParameters.h SparseHMM.h +MonoNoteHMM.o: MonoNoteParameters.h SparseHMM.h +MonoPitch.o: MonoPitchHMM.h SparseHMM.h +MonoPitchHMM.o: SparseHMM.h +PYinVamp.o: Yin.h MeanFilter.h +Yin.o: MeanFilter.h +YinUtil.o: MeanFilter.h +YinVampFreqConstrained.o: Yin.h MeanFilter.h +YinVamp.o: Yin.h MeanFilter.h diff -r effea38d5a6d -r 9d6595563c11 Makefile.linux64 --- a/Makefile.linux64 Mon Jan 26 17:47:29 2015 +0000 +++ b/Makefile.linux64 Tue Mar 31 15:10:07 2015 +0100 @@ -1,11 +1,10 @@ -CFLAGS := $(CFLAGS) -Wall -O3 -fPIC -I../vamp-plugin-sdk/ -#CFLAGS := -g -fPIC -I../vamp-plugin-sdk +CFLAGS := $(CFLAGS) -Wall -O3 -fPIC -I../vamp-plugin-sdk/ -I../../vamp-plugin-sdk/ CXXFLAGS := $(CXXFLAGS) $(CFLAGS) -PLUGIN_LDFLAGS := $(LDFLAGS) -shared -Wl,-Bstatic -L../vamp-plugin-sdk -lvamp-sdk -Wl,-Bdynamic -Wl,--version-script=vamp-plugin.map -TEST_LDFLAGS := $(LDFLAGS) -Wl,-Bstatic -L../vamp-plugin-sdk -lvamp-sdk -Wl,-Bdynamic -lboost_unit_test_framework +PLUGIN_LDFLAGS := $(LDFLAGS) -shared -Wl,-Bstatic -L../vamp-plugin-sdk -L../../vamp-plugin-sdk -lvamp-sdk -Wl,-Bdynamic -Wl,--version-script=vamp-plugin.map +TEST_LDFLAGS := -Wl,-Bstatic -L../vamp-plugin-sdk -L../../vamp-plugin-sdk -lvamp-sdk -Wl,-Bdynamic -lboost_unit_test_framework PLUGIN_EXT := .so diff -r effea38d5a6d -r 9d6595563c11 Makefile.osx --- a/Makefile.osx Mon Jan 26 17:47:29 2015 +0000 +++ b/Makefile.osx Tue Mar 31 15:10:07 2015 +0100 @@ -1,8 +1,8 @@ ARCHFLAGS := -arch x86_64 -mmacosx-version-min=10.7 -CFLAGS := $(ARCHFLAGS) -O3 -I../vamp-plugin-sdk -I/usr/local/boost -Wall -fPIC +CFLAGS := $(ARCHFLAGS) -O3 -I../vamp-plugin-sdk -I../../vamp-plugin-sdk -I/usr/local/boost -Wall -fPIC CXXFLAGS := $(CFLAGS) -LDFLAGS := -L../vamp-plugin-sdk -lvamp-sdk $(ARCHFLAGS) +LDFLAGS := -L../vamp-plugin-sdk -L../../vamp-plugin-sdk -lvamp-sdk $(ARCHFLAGS) PLUGIN_LDFLAGS := -dynamiclib $(LDFLAGS) -exported_symbols_list vamp-plugin.list TEST_LDFLAGS := $(LDFLAGS) -lboost_unit_test_framework PLUGIN_EXT := .dylib diff -r effea38d5a6d -r 9d6595563c11 MonoNoteHMM.cpp --- a/MonoNoteHMM.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/MonoNoteHMM.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -49,7 +49,7 @@ double tempProbSum = 0; for (size_t i = 0; i < par.n; ++i) { - if (i % 4 != 2) + if (i % par.nSPP != 2) { // std::cerr << getMidiPitch(i) << std::endl; double tempProb = 0; @@ -68,7 +68,9 @@ minDistCandidate = iCandidate; } } - tempProb = std::pow(minDistProb, par.yinTrust) * boost::math::pdf(pitchDistr[i], pitchProb[minDistCandidate].first); + tempProb = std::pow(minDistProb, par.yinTrust) * + boost::math::pdf(pitchDistr[i], + pitchProb[minDistCandidate].first); } else { tempProb = 1; } @@ -79,7 +81,7 @@ for (size_t i = 0; i < par.n; ++i) { - if (i % 4 != 2) + if (i % par.nSPP != 2) { if (tempProbSum > 0) { @@ -101,16 +103,15 @@ // 0. attack state // 1. stable state // 2. silent state - // 3. inter state - // 4-6. second-lowest pitch - // 4. attack state + // 3-5. second-lowest pitch + // 3. attack state // ... // observation distributions for (size_t iState = 0; iState < par.n; ++iState) { pitchDistr.push_back(boost::math::normal(0,1)); - if (iState % 4 == 2) + if (iState % par.nSPP == 2) { // silent state starts tracking init.push_back(1.0/(par.nS * par.nPPS)); @@ -126,7 +127,6 @@ pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack); pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable); pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy - pitchDistr[index+3] = boost::math::normal(mu, par.sigmaYinPitchInter); } boost::math::normal noteDistanceDistr(0, par.sigma2Note); @@ -154,54 +154,43 @@ to.push_back(index+2); // to silent transProb.push_back(par.pStable2Silent); - from.push_back(index+1); - to.push_back(index+3); // to inter-note - transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent); - // the "easy" transitions from silent state from.push_back(index+2); to.push_back(index+2); transProb.push_back(par.pSilentSelftrans); - // the "easy" inter state transition - from.push_back(index+3); - to.push_back(index+3); - transProb.push_back(par.pInterSelftrans); - // the more complicated transitions from the silent and inter state + // the more complicated transitions from the silent double probSumSilent = 0; - double probSumInter = 0; - vector tempTransProbInter; + vector tempTransProbSilent; for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch) { int fromPitch = iPitch; int toPitch = jPitch; - double semitoneDistance = std::abs(fromPitch - toPitch) * 1.0 / par.nPPS; + double semitoneDistance = + std::abs(fromPitch - toPitch) * 1.0 / par.nPPS; // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance) - if (semitoneDistance == 0 || (semitoneDistance > par.minSemitoneDistance && semitoneDistance < par.maxJump)) + if (semitoneDistance == 0 || + (semitoneDistance > par.minSemitoneDistance + && semitoneDistance < par.maxJump)) { size_t toIndex = jPitch * par.nSPP; // note attack index - double tempWeightSilent = boost::math::pdf(noteDistanceDistr, semitoneDistance); - double tempWeightInter = semitoneDistance == 0 ? 0 : tempWeightSilent; + double tempWeightSilent = boost::math::pdf(noteDistanceDistr, + semitoneDistance); probSumSilent += tempWeightSilent; - probSumInter += tempWeightInter; tempTransProbSilent.push_back(tempWeightSilent); - tempTransProbInter.push_back(tempWeightInter); from.push_back(index+2); to.push_back(toIndex); - from.push_back(index+3); - to.push_back(toIndex); } } for (size_t i = 0; i < tempTransProbSilent.size(); ++i) { transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent); - transProb.push_back((1-par.pInterSelftrans) * tempTransProbInter[i]/probSumInter); } } } diff -r effea38d5a6d -r 9d6595563c11 MonoNoteParameters.cpp --- a/MonoNoteParameters.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/MonoNoteParameters.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -14,25 +14,25 @@ #include "MonoNoteParameters.h" MonoNoteParameters::MonoNoteParameters() : - minPitch(36), + minPitch(35), nPPS(3), - nS(43), - nSPP(4), // states per pitch + nS(69), + nSPP(3), // states per pitch n(0), initPi(0), - pAttackSelftrans(0.5), - pStableSelftrans(0.999), - pStable2Silent(0.005), - pSilentSelftrans(0.5), + pAttackSelftrans(0.9), + pStableSelftrans(0.99), + pStable2Silent(0.01), + pSilentSelftrans(0.9999), sigma2Note(0.7), maxJump(13), - pInterSelftrans(0.99), + pInterSelftrans(0.0), priorPitchedProb(.7), priorWeight(0.5), minSemitoneDistance(.5), sigmaYinPitchAttack(5), - sigmaYinPitchStable(0.9), - sigmaYinPitchInter(5), + sigmaYinPitchStable(0.8), + sigmaYinPitchInter(.1), yinTrust(0.1) { // just in case someone put in a silly value for pRelease2Unvoiced diff -r effea38d5a6d -r 9d6595563c11 MonoPitch.cpp --- a/MonoPitch.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/MonoPitch.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -41,7 +41,7 @@ obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame])); } - vector *scale = new vector(pitchProb.size()); + vector *scale = new vector(0); vector out; diff -r effea38d5a6d -r 9d6595563c11 MonoPitchHMM.cpp --- a/MonoPitchHMM.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/MonoPitchHMM.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -22,7 +22,7 @@ using std::pair; MonoPitchHMM::MonoPitchHMM() : -m_minFreq(55), +m_minFreq(61.735), m_nBPS(5), m_nPitch(0), m_transitionWidth(0), @@ -31,7 +31,7 @@ m_freqs(0) { m_transitionWidth = 5*(m_nBPS/2) + 1; - m_nPitch = 48 * m_nBPS; + m_nPitch = 69 * m_nBPS; m_freqs = vector(2*m_nPitch); for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) { @@ -68,6 +68,8 @@ } double probReallyPitched = m_yinTrust * probYinPitched; + // std::cerr << probReallyPitched << " " << probYinPitched << std::endl; + // damn, I forget what this is all about... for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) { if (probYinPitched > 0) out[iPitch] *= (probReallyPitched/probYinPitched) ; diff -r effea38d5a6d -r 9d6595563c11 PYIN.cpp --- a/PYIN.cpp Mon Jan 26 17:47:29 2015 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,499 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "PYIN.h" -#include "MonoNote.h" -#include "MonoPitch.h" - -#include "vamp-sdk/FFT.h" - -#include -#include - -#include -#include -#include - -using std::string; -using std::vector; -using Vamp::RealTime; - - -PYIN::PYIN(float inputSampleRate) : - Plugin(inputSampleRate), - m_channels(0), - m_stepSize(256), - m_blockSize(2048), - m_fmin(40), - m_fmax(700), - m_yin(2048, inputSampleRate, 0.0), - m_oF0Candidates(0), - m_oF0Probs(0), - m_oVoicedProb(0), - m_oCandidateSalience(0), - m_oSmoothedPitchTrack(0), - m_oNotes(0), - m_threshDistr(2.0f), - m_outputUnvoiced(0.0f), - m_pitchProb(0), - m_timestamp(0) -{ -} - -PYIN::~PYIN() -{ -} - -string -PYIN::getIdentifier() const -{ - return "pyin"; -} - -string -PYIN::getName() const -{ - return "pYin"; -} - -string -PYIN::getDescription() const -{ - return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; -} - -string -PYIN::getMaker() const -{ - return "Matthias Mauch"; -} - -int -PYIN::getPluginVersion() const -{ - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -PYIN::getCopyright() const -{ - return "GPL"; -} - -PYIN::InputDomain -PYIN::getInputDomain() const -{ - return TimeDomain; -} - -size_t -PYIN::getPreferredBlockSize() const -{ - return 2048; -} - -size_t -PYIN::getPreferredStepSize() const -{ - return 256; -} - -size_t -PYIN::getMinChannelCount() const -{ - return 1; -} - -size_t -PYIN::getMaxChannelCount() const -{ - return 1; -} - -PYIN::ParameterList -PYIN::getParameterDescriptors() const -{ - ParameterList list; - - ParameterDescriptor d; - - d.identifier = "threshdistr"; - d.name = "Yin threshold distribution"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 7.0f; - d.defaultValue = 2.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("Uniform"); - d.valueNames.push_back("Beta (mean 0.10)"); - d.valueNames.push_back("Beta (mean 0.15)"); - d.valueNames.push_back("Beta (mean 0.20)"); - d.valueNames.push_back("Beta (mean 0.30)"); - d.valueNames.push_back("Single Value 0.10"); - d.valueNames.push_back("Single Value 0.15"); - d.valueNames.push_back("Single Value 0.20"); - list.push_back(d); - - d.identifier = "outputunvoiced"; - d.valueNames.clear(); - d.name = "Output estimates classified as unvoiced?"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 2.0f; - d.defaultValue = 0.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("No"); - d.valueNames.push_back("Yes"); - d.valueNames.push_back("Yes, as negative frequencies"); - list.push_back(d); - - return list; -} - -float -PYIN::getParameter(string identifier) const -{ - if (identifier == "threshdistr") { - return m_threshDistr; - } - if (identifier == "outputunvoiced") { - return m_outputUnvoiced; - } - return 0.f; -} - -void -PYIN::setParameter(string identifier, float value) -{ - if (identifier == "threshdistr") - { - m_threshDistr = value; - } - if (identifier == "outputunvoiced") - { - m_outputUnvoiced = value; - } - -} - -PYIN::ProgramList -PYIN::getPrograms() const -{ - ProgramList list; - return list; -} - -string -PYIN::getCurrentProgram() const -{ - return ""; // no programs -} - -void -PYIN::selectProgram(string name) -{ -} - -PYIN::OutputList -PYIN::getOutputDescriptors() const -{ - OutputList outputs; - - OutputDescriptor d; - - int outputNumber = 0; - - d.identifier = "f0candidates"; - d.name = "F0 Candidates"; - d.description = "Estimated fundamental frequency candidates."; - d.unit = "Hz"; - d.hasFixedBinCount = false; - // d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = 500; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oF0Candidates = outputNumber++; - - d.identifier = "f0probs"; - d.name = "Candidate Probabilities"; - d.description = "Probabilities of estimated fundamental frequency candidates."; - d.unit = ""; - d.hasFixedBinCount = false; - // d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oF0Probs = outputNumber++; - - d.identifier = "voicedprob"; - d.name = "Voiced Probability"; - d.description = "Probability that the signal is voiced according to Probabilistic Yin."; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oVoicedProb = outputNumber++; - - d.identifier = "candidatesalience"; - d.name = "Candidate Salience"; - d.description = "Candidate Salience"; - d.hasFixedBinCount = true; - d.binCount = m_blockSize / 2; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oCandidateSalience = outputNumber++; - - d.identifier = "smoothedpitchtrack"; - d.name = "Smoothed Pitch Track"; - d.description = "."; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = false; - // d.minValue = 0; - // d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oSmoothedPitchTrack = outputNumber++; - - d.identifier = "notes"; - d.name = "Notes"; - d.description = "Derived fixed-pitch note frequencies"; - // d.unit = "MIDI unit"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = false; - d.isQuantized = false; - d.sampleType = OutputDescriptor::VariableSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = true; - outputs.push_back(d); - m_oNotes = outputNumber++; - - return outputs; -} - -bool -PYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - -/* - std::cerr << "PYIN::initialise: channels = " << channels - << ", stepSize = " << stepSize << ", blockSize = " << blockSize - << std::endl; -*/ - m_channels = channels; - m_stepSize = stepSize; - m_blockSize = blockSize; - - reset(); - - return true; -} - -void -PYIN::reset() -{ - m_yin.setThresholdDistr(m_threshDistr); - m_yin.setFrameSize(m_blockSize); - - m_pitchProb.clear(); - m_timestamp.clear(); -/* - std::cerr << "PYIN::reset" - << ", blockSize = " << m_blockSize - << std::endl; -*/ -} - -PYIN::FeatureSet -PYIN::process(const float *const *inputBuffers, RealTime timestamp) -{ - timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); - FeatureSet fs; - - double *dInputBuffers = new double[m_blockSize]; - for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; - - Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); - delete [] dInputBuffers; - - // First, get the things out of the way that we don't want to output - // immediately, but instead save for later. - vector > tempPitchProb; - for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) - { - double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; - tempPitchProb.push_back(pair - (tempPitch, yo.freqProb[iCandidate].second)); - } - m_pitchProb.push_back(tempPitchProb); - m_timestamp.push_back(timestamp); - - // F0 CANDIDATES - Feature f; - f.hasTimestamp = true; - f.timestamp = timestamp; - for (size_t i = 0; i < yo.freqProb.size(); ++i) - { - f.values.push_back(yo.freqProb[i].first); - } - fs[m_oF0Candidates].push_back(f); - - // VOICEDPROB - f.values.clear(); - float voicedProb = 0; - for (size_t i = 0; i < yo.freqProb.size(); ++i) - { - f.values.push_back(yo.freqProb[i].second); - voicedProb += yo.freqProb[i].second; - } - fs[m_oF0Probs].push_back(f); - - f.values.clear(); - f.values.push_back(voicedProb); - fs[m_oVoicedProb].push_back(f); - - // SALIENCE -- maybe this should eventually disappear - f.values.clear(); - float salienceSum = 0; - for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) - { - f.values.push_back(yo.salience[iBin]); - salienceSum += yo.salience[iBin]; - } - fs[m_oCandidateSalience].push_back(f); - - return fs; -} - -PYIN::FeatureSet -PYIN::getRemainingFeatures() -{ - FeatureSet fs; - Feature f; - f.hasTimestamp = true; - f.hasDuration = false; - - if (m_pitchProb.empty()) { - return fs; - } - - // MONO-PITCH STUFF - MonoPitch mp; - vector mpOut = mp.process(m_pitchProb); - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) - { - if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; - f.timestamp = m_timestamp[iFrame]; - f.values.clear(); - if (m_outputUnvoiced == 1) - { - f.values.push_back(fabs(mpOut[iFrame])); - } else { - f.values.push_back(mpOut[iFrame]); - } - - fs[m_oSmoothedPitchTrack].push_back(f); - } - - // MONO-NOTE STUFF - MonoNote mn; - std::vector > > smoothedPitch; - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { - std::vector > temp; - if (mpOut[iFrame] > 0) - { - double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; - temp.push_back(std::pair(tempPitch, .9)); - } - smoothedPitch.push_back(temp); - } - // vector mnOut = mn.process(m_pitchProb); - vector mnOut = mn.process(smoothedPitch); - - // turning feature into a note feature - f.hasTimestamp = true; - f.hasDuration = true; - f.values.clear(); - - int onsetFrame = 0; - bool isVoiced = 0; - bool oldIsVoiced = 0; - size_t nFrame = m_pitchProb.size(); - - std::vector notePitchTrack; // collects pitches for one note at a time - for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) - { - isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; - if (isVoiced && iFrame != nFrame-1) - { - if (oldIsVoiced == 0) // beginning of a note - { - onsetFrame = iFrame; - notePitchTrack.clear(); - } - float pitch = smoothedPitch[iFrame][0].first; - notePitchTrack.push_back(pitch); // add to the note's pitch track - } else { // not currently voiced - if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note - { - std::sort(notePitchTrack.begin(), notePitchTrack.end()); - float medianPitch = notePitchTrack[notePitchTrack.size()/2]; - float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; - f.values.clear(); - f.values.push_back(medianFreq); - f.timestamp = m_timestamp[onsetFrame]; - f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; - fs[m_oNotes].push_back(f); - } - } - oldIsVoiced = isVoiced; - } - return fs; -} diff -r effea38d5a6d -r 9d6595563c11 PYIN.h --- a/PYIN.h Mon Jan 26 17:47:29 2015 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _PYIN_H_ -#define _PYIN_H_ - -#include - -#include "Yin.h" - -class PYIN : public Vamp::Plugin -{ -public: - PYIN(float inputSampleRate); - virtual ~PYIN(); - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - InputDomain getInputDomain() const; - size_t getPreferredBlockSize() const; - size_t getPreferredStepSize() const; - size_t getMinChannelCount() const; - size_t getMaxChannelCount() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string identifier) const; - void setParameter(std::string identifier, float value); - - ProgramList getPrograms() const; - std::string getCurrentProgram() const; - void selectProgram(std::string name); - - OutputList getOutputDescriptors() const; - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - size_t m_channels; - size_t m_stepSize; - size_t m_blockSize; - float m_fmin; - float m_fmax; - Yin m_yin; - - mutable int m_oF0Candidates; - mutable int m_oF0Probs; - mutable int m_oVoicedProb; - mutable int m_oCandidateSalience; - mutable int m_oSmoothedPitchTrack; - mutable int m_oNotes; - - float m_threshDistr; - float m_outputUnvoiced; - vector > > m_pitchProb; - vector m_timestamp; -}; - -#endif diff -r effea38d5a6d -r 9d6595563c11 PYinVamp.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PYinVamp.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,608 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "PYinVamp.h" +#include "MonoNote.h" +#include "MonoPitch.h" + +#include "vamp-sdk/FFT.h" + +#include +#include + +#include +#include +#include + +using std::string; +using std::vector; +using Vamp::RealTime; + + +PYinVamp::PYinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(1600), + m_yin(2048, inputSampleRate, 0.0), + m_oF0Candidates(0), + m_oF0Probs(0), + m_oVoicedProb(0), + m_oCandidateSalience(0), + m_oSmoothedPitchTrack(0), + m_oNotes(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_preciseTime(0.0f), + m_lowAmp(0.1f), + m_onsetSensitivity(0.7f), + m_pruneThresh(0.1f), + m_pitchProb(0), + m_timestamp(0), + m_level(0) +{ +} + +PYinVamp::~PYinVamp() +{ +} + +string +PYinVamp::getIdentifier() const +{ + return "pyin"; +} + +string +PYinVamp::getName() const +{ + return "pYin"; +} + +string +PYinVamp::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +PYinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +PYinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +PYinVamp::getCopyright() const +{ + return "GPL"; +} + +PYinVamp::InputDomain +PYinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +PYinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +PYinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +PYinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +PYinVamp::getMaxChannelCount() const +{ + return 1; +} + +PYinVamp::ParameterList +PYinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + d.identifier = "precisetime"; + d.valueNames.clear(); + d.name = "Use non-standard precise YIN timing (slow)."; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + list.push_back(d); + + d.identifier = "lowampsuppression"; + d.valueNames.clear(); + d.name = "Suppress low amplitude pitch estimates."; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.1f; + d.isQuantized = false; + list.push_back(d); + + d.identifier = "onsetsensitivity"; + d.valueNames.clear(); + d.name = "Onset sensitivity"; + d.description = "Adds additional note onsets when RMS increases."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.7f; + d.isQuantized = false; + list.push_back(d); + + d.identifier = "prunethresh"; + d.valueNames.clear(); + d.name = "Duration pruning threshold."; + d.description = "Prune notes that are shorter than this value."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 0.2f; + d.defaultValue = 0.1f; + d.isQuantized = false; + list.push_back(d); + + return list; +} + +float +PYinVamp::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + if (identifier == "precisetime") { + return m_preciseTime; + } + if (identifier == "lowampsuppression") { + return m_lowAmp; + } + if (identifier == "onsetsensitivity") { + return m_onsetSensitivity; + } + if (identifier == "prunethresh") { + return m_pruneThresh; + } + return 0.f; +} + +void +PYinVamp::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + if (identifier == "precisetime") + { + m_preciseTime = value; + } + if (identifier == "lowampsuppression") + { + m_lowAmp = value; + } + if (identifier == "onsetsensitivity") + { + m_onsetSensitivity = value; + } + if (identifier == "prunethresh") + { + m_pruneThresh = value; + } +} + +PYinVamp::ProgramList +PYinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +PYinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +PYinVamp::selectProgram(string name) +{ +} + +PYinVamp::OutputList +PYinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0candidates"; + d.name = "F0 Candidates"; + d.description = "Estimated fundamental frequency candidates."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Candidates = outputNumber++; + + d.identifier = "f0probs"; + d.name = "Candidate Probabilities"; + d.description = "Probabilities of estimated fundamental frequency candidates."; + d.unit = ""; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Probs = outputNumber++; + + d.identifier = "voicedprob"; + d.name = "Voiced Probability"; + d.description = "Probability that the signal is voiced according to Probabilistic Yin."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oVoicedProb = outputNumber++; + + d.identifier = "candidatesalience"; + d.name = "Candidate Salience"; + d.description = "Candidate Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oCandidateSalience = outputNumber++; + + d.identifier = "smoothedpitchtrack"; + d.name = "Smoothed Pitch Track"; + d.description = "."; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + // d.minValue = 0; + // d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oSmoothedPitchTrack = outputNumber++; + + d.identifier = "notes"; + d.name = "Notes"; + d.description = "Derived fixed-pitch note frequencies"; + // d.unit = "MIDI unit"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = true; + outputs.push_back(d); + m_oNotes = outputNumber++; + + return outputs; +} + +bool +PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "PYinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +PYinVamp::reset() +{ + m_yin.setThresholdDistr(m_threshDistr); + m_yin.setFrameSize(m_blockSize); + m_yin.setFast(!m_preciseTime); + + m_pitchProb.clear(); + m_timestamp.clear(); + m_level.clear(); +/* + std::cerr << "PYinVamp::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +PYinVamp::FeatureSet +PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; + timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate)); + + FeatureSet fs; + + float rms = 0; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) { + dInputBuffers[i] = inputBuffers[0][i]; + rms += inputBuffers[0][i] * inputBuffers[0][i]; + } + rms /= m_blockSize; + rms = sqrt(rms); + + bool isLowAmplitude = (rms < m_lowAmp); + + Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); + delete [] dInputBuffers; + + m_level.push_back(yo.rms); + + // First, get the things out of the way that we don't want to output + // immediately, but instead save for later. + vector > tempPitchProb; + for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) + { + double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; + if (!isLowAmplitude) + { + tempPitchProb.push_back(pair + (tempPitch, yo.freqProb[iCandidate].second)); + } else { + float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp)); + tempPitchProb.push_back(pair + (tempPitch, yo.freqProb[iCandidate].second*factor)); + } + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + // F0 CANDIDATES + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].first); + } + fs[m_oF0Candidates].push_back(f); + + // VOICEDPROB + f.values.clear(); + float voicedProb = 0; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].second); + voicedProb += yo.freqProb[i].second; + } + fs[m_oF0Probs].push_back(f); + + f.values.push_back(voicedProb); + fs[m_oVoicedProb].push_back(f); + + // SALIENCE -- maybe this should eventually disappear + f.values.clear(); + float salienceSum = 0; + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + salienceSum += yo.salience[iBin]; + } + fs[m_oCandidateSalience].push_back(f); + + return fs; +} + +PYinVamp::FeatureSet +PYinVamp::getRemainingFeatures() +{ + FeatureSet fs; + Feature f; + f.hasTimestamp = true; + f.hasDuration = false; + + if (m_pitchProb.empty()) { + return fs; + } + + // MONO-PITCH STUFF + MonoPitch mp; + vector mpOut = mp.process(m_pitchProb); + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) + { + if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; + f.timestamp = m_timestamp[iFrame]; + f.values.clear(); + if (m_outputUnvoiced == 1) + { + f.values.push_back(fabs(mpOut[iFrame])); + } else { + f.values.push_back(mpOut[iFrame]); + } + + fs[m_oSmoothedPitchTrack].push_back(f); + } + + // MONO-NOTE STUFF + std::cerr << "Mono Note Stuff" << std::endl; + MonoNote mn; + std::vector > > smoothedPitch; + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { + std::vector > temp; + if (mpOut[iFrame] > 0) + { + double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; + temp.push_back(std::pair(tempPitch, .9)); + } + smoothedPitch.push_back(temp); + } + // vector mnOut = mn.process(m_pitchProb); + vector mnOut = mn.process(smoothedPitch); + + // turning feature into a note feature + f.hasTimestamp = true; + f.hasDuration = true; + f.values.clear(); + + int onsetFrame = 0; + bool isVoiced = 0; + bool oldIsVoiced = 0; + size_t nFrame = m_pitchProb.size(); + + float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; + + std::vector notePitchTrack; // collects pitches for one note at a time + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + isVoiced = mnOut[iFrame].noteState < 3 + && smoothedPitch[iFrame].size() > 0 + && (iFrame >= nFrame-2 + || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity)); + // std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl; + if (isVoiced && iFrame != nFrame-1) + { + if (oldIsVoiced == 0) // beginning of a note + { + onsetFrame = iFrame; + } + float pitch = smoothedPitch[iFrame][0].first; + notePitchTrack.push_back(pitch); // add to the note's pitch track + } else { // not currently voiced + if (oldIsVoiced == 1) // end of note + { + // std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl; + if (notePitchTrack.size() >= minNoteFrames) + { + std::sort(notePitchTrack.begin(), notePitchTrack.end()); + float medianPitch = notePitchTrack[notePitchTrack.size()/2]; + float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; + f.values.clear(); + f.values.push_back(medianFreq); + f.timestamp = m_timestamp[onsetFrame]; + f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; + fs[m_oNotes].push_back(f); + } + notePitchTrack.clear(); + } + } + oldIsVoiced = isVoiced; + } + return fs; +} diff -r effea38d5a6d -r 9d6595563c11 PYinVamp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PYinVamp.h Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,84 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _PYINVAMP_H_ +#define _PYINVAMP_H_ + +#include + +#include "Yin.h" + +class PYinVamp : public Vamp::Plugin +{ +public: + PYinVamp(float inputSampleRate); + virtual ~PYinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_oF0Candidates; + mutable int m_oF0Probs; + mutable int m_oVoicedProb; + mutable int m_oCandidateSalience; + mutable int m_oSmoothedPitchTrack; + mutable int m_oNotes; + + float m_threshDistr; + float m_outputUnvoiced; + float m_preciseTime; + float m_lowAmp; + float m_onsetSensitivity; + float m_pruneThresh; + vector > > m_pitchProb; + vector m_timestamp; + vector m_level; +}; + +#endif diff -r effea38d5a6d -r 9d6595563c11 SparseHMM.cpp --- a/SparseHMM.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/SparseHMM.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -30,6 +30,10 @@ SparseHMM::decodeViterbi(std::vector > obsProb, vector *scale) { + if (obsProb.size() < 1) { + return vector(); + } + size_t nState = init.size(); size_t nFrame = obsProb.size(); diff -r effea38d5a6d -r 9d6595563c11 VampYin.cpp --- a/VampYin.cpp Mon Jan 26 17:47:29 2015 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,379 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "VampYin.h" -#include "MonoNote.h" - -#include "vamp-sdk/FFT.h" - -#include -#include - -#include -#include -#include - -using std::string; -using std::vector; -using Vamp::RealTime; - - -VampYin::VampYin(float inputSampleRate) : - Plugin(inputSampleRate), - m_channels(0), - m_stepSize(256), - m_blockSize(2048), - m_fmin(40), - m_fmax(1000), - m_yin(2048, inputSampleRate, 0.0), - m_outNoF0(0), - m_outNoPeriodicity(0), - m_outNoRms(0), - m_outNoSalience(0), - m_yinParameter(0.15f), - m_outputUnvoiced(2.0f) -{ -} - -VampYin::~VampYin() -{ -} - -string -VampYin::getIdentifier() const -{ - return "yin"; -} - -string -VampYin::getName() const -{ - return "Yin"; -} - -string -VampYin::getDescription() const -{ - return "A vamp implementation of the Yin algorithm for monophonic frequency estimation."; -} - -string -VampYin::getMaker() const -{ - return "Matthias Mauch"; -} - -int -VampYin::getPluginVersion() const -{ - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -VampYin::getCopyright() const -{ - return "GPL"; -} - -VampYin::InputDomain -VampYin::getInputDomain() const -{ - return TimeDomain; -} - -size_t -VampYin::getPreferredBlockSize() const -{ - return 2048; -} - -size_t -VampYin::getPreferredStepSize() const -{ - return 256; -} - -size_t -VampYin::getMinChannelCount() const -{ - return 1; -} - -size_t -VampYin::getMaxChannelCount() const -{ - return 1; -} - -VampYin::ParameterList -VampYin::getParameterDescriptors() const -{ - ParameterList list; - - ParameterDescriptor d; - d.identifier = "yinThreshold"; - d.name = "Yin threshold"; - d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached."; - d.unit = ""; - d.minValue = 0.025f; - d.maxValue = 1.0f; - d.defaultValue = 0.15f; - d.isQuantized = true; - d.quantizeStep = 0.025f; - - list.push_back(d); - - // d.identifier = "removeunvoiced"; - // d.name = "Remove pitches classified as unvoiced."; - // d.description = "If ticked, then the pitch estimator will return the most likely pitch, even if it 'thinks' there isn't any."; - // d.unit = ""; - // d.minValue = 0.0f; - // d.maxValue = 1.0f; - // d.defaultValue = 0.0f; - // d.isQuantized = true; - // d.quantizeStep = 1.0f; - // d.valueNames.clear(); - // list.push_back(d); - - d.identifier = "outputunvoiced"; - d.valueNames.clear(); - d.name = "Output estimates classified as unvoiced?"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 2.0f; - d.defaultValue = 2.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("No"); - d.valueNames.push_back("Yes"); - d.valueNames.push_back("Yes, as negative frequencies"); - list.push_back(d); - - return list; -} - -float -VampYin::getParameter(string identifier) const -{ - if (identifier == "yinThreshold") { - return m_yinParameter; - } - if (identifier == "outputunvoiced") { - return m_outputUnvoiced; - } - return 0.f; -} - -void -VampYin::setParameter(string identifier, float value) -{ - if (identifier == "yinThreshold") - { - m_yinParameter = value; - } - if (identifier == "outputunvoiced") - { - m_outputUnvoiced = value; - } -} - -VampYin::ProgramList -VampYin::getPrograms() const -{ - ProgramList list; - return list; -} - -string -VampYin::getCurrentProgram() const -{ - return ""; // no programs -} - -void -VampYin::selectProgram(string name) -{ -} - -VampYin::OutputList -VampYin::getOutputDescriptors() const -{ - OutputList outputs; - - OutputDescriptor d; - - int outputNumber = 0; - - d.identifier = "f0"; - d.name = "Estimated f0"; - d.description = "Estimated fundamental frequency"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = 500; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoF0 = outputNumber++; - - d.identifier = "periodicity"; - d.name = "Periodicity"; - d.description = "by-product of Yin f0 estimation"; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoPeriodicity = outputNumber++; - - d.identifier = "rms"; - d.name = "Root mean square"; - d.description = "Root mean square of the waveform."; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoRms = outputNumber++; - - d.identifier = "salience"; - d.name = "Salience"; - d.description = "Yin Salience"; - d.hasFixedBinCount = true; - d.binCount = m_blockSize / 2; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_outNoSalience = outputNumber++; - - return outputs; -} - -bool -VampYin::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - -/* - std::cerr << "VampYin::initialise: channels = " << channels - << ", stepSize = " << stepSize << ", blockSize = " << blockSize - << std::endl; -*/ - m_channels = channels; - m_stepSize = stepSize; - m_blockSize = blockSize; - - reset(); - - return true; -} - -void -VampYin::reset() -{ - m_yin.setThreshold(m_yinParameter); - m_yin.setFrameSize(m_blockSize); -/* - std::cerr << "VampYin::reset: yin threshold set to " << (m_yinParameter) - << ", blockSize = " << m_blockSize - << std::endl; -*/ -} - -VampYin::FeatureSet -VampYin::process(const float *const *inputBuffers, RealTime timestamp) -{ - timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); - FeatureSet fs; - - double *dInputBuffers = new double[m_blockSize]; - for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; - - Yin::YinOutput yo = m_yin.process(dInputBuffers); - // std::cerr << "f0 in VampYin: " << yo.f0 << std::endl; - Feature f; - f.hasTimestamp = true; - f.timestamp = timestamp; - if (m_outputUnvoiced == 0.0f) - { - // std::cerr << "f0 in VampYin: " << yo.f0 << std::endl; - if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) { - f.values.push_back(yo.f0); - fs[m_outNoF0].push_back(f); - } - } else if (m_outputUnvoiced == 1.0f) - { - if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { - f.values.push_back(fabs(yo.f0)); - fs[m_outNoF0].push_back(f); - } - } else - { - if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { - f.values.push_back(yo.f0); - fs[m_outNoF0].push_back(f); - } - } - - f.values.clear(); - f.values.push_back(yo.rms); - fs[m_outNoRms].push_back(f); - - f.values.clear(); - for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) - { - f.values.push_back(yo.salience[iBin]); - } - fs[m_outNoSalience].push_back(f); - - f.values.clear(); - // f.values[0] = yo.periodicity; - f.values.push_back(yo.periodicity); - fs[m_outNoPeriodicity].push_back(f); - - delete [] dInputBuffers; - - return fs; -} - -VampYin::FeatureSet -VampYin::getRemainingFeatures() -{ - FeatureSet fs; - return fs; -} diff -r effea38d5a6d -r 9d6595563c11 VampYin.h --- a/VampYin.h Mon Jan 26 17:47:29 2015 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _VAMPYIN_H_ -#define _VAMPYIN_H_ - -#include - -#include "Yin.h" - -class VampYin : public Vamp::Plugin -{ -public: - VampYin(float inputSampleRate); - virtual ~VampYin(); - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - InputDomain getInputDomain() const; - size_t getPreferredBlockSize() const; - size_t getPreferredStepSize() const; - size_t getMinChannelCount() const; - size_t getMaxChannelCount() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string identifier) const; - void setParameter(std::string identifier, float value); - - ProgramList getPrograms() const; - std::string getCurrentProgram() const; - void selectProgram(std::string name); - - OutputList getOutputDescriptors() const; - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - size_t m_channels; - size_t m_stepSize; - size_t m_blockSize; - float m_fmin; - float m_fmax; - Yin m_yin; - - mutable int m_outNoF0; - mutable int m_outNoPeriodicity; - mutable int m_outNoRms; - mutable int m_outNoSalience; - - float m_yinParameter; - float m_outputUnvoiced; -}; - -#endif diff -r effea38d5a6d -r 9d6595563c11 Yin.cpp --- a/Yin.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/Yin.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -25,12 +25,13 @@ using std::vector; -Yin::Yin(size_t frameSize, size_t inputSampleRate, double thresh) : +Yin::Yin(size_t frameSize, size_t inputSampleRate, double thresh, bool fast) : m_frameSize(frameSize), m_inputSampleRate(inputSampleRate), m_thresh(thresh), m_threshDistr(2), - m_yinBufferSize(frameSize/2) + m_yinBufferSize(frameSize/2), + m_fast(fast) { if (frameSize & (frameSize-1)) { // throw "N must be a power of two"; @@ -47,7 +48,9 @@ double* yinBuffer = new double[m_yinBufferSize]; // calculate aperiodicity function for all periods - YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + if (m_fast) YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + else YinUtil::slowDifference(in, yinBuffer, m_yinBufferSize); + YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize); int tau = 0; @@ -86,7 +89,9 @@ double* yinBuffer = new double[m_yinBufferSize]; // calculate aperiodicity function for all periods - YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + if (m_fast) YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + else YinUtil::slowDifference(in, yinBuffer, m_yinBufferSize); + YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize); vector peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, m_yinBufferSize); @@ -97,8 +102,8 @@ { probSum += peakProbability[iBin]; } - - Yin::YinOutput yo(0,0,0); + double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize); + Yin::YinOutput yo(0,0,rms); for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) { yo.salience.push_back(peakProbability[iBuf]); @@ -140,9 +145,9 @@ return 0; } -// int -// Yin::setRemoveUnvoiced(bool parameter) -// { -// m_removeUnvoiced = parameter; -// return 0; -// } +int +Yin::setFast(bool parameter) +{ + m_fast = parameter; + return 0; +} diff -r effea38d5a6d -r 9d6595563c11 Yin.h --- a/Yin.h Mon Jan 26 17:47:29 2015 +0000 +++ b/Yin.h Tue Mar 31 15:10:07 2015 +0100 @@ -31,7 +31,7 @@ class Yin { public: - Yin(size_t frameSize, size_t inputSampleRate, double thresh = 0.2); + Yin(size_t frameSize, size_t inputSampleRate, double thresh = 0.2, bool fast = true); virtual ~Yin(); struct YinOutput { @@ -53,6 +53,7 @@ int setThreshold(double parameter); int setThresholdDistr(float parameter); int setFrameSize(size_t frameSize); + int setFast(bool fast); // int setRemoveUnvoiced(bool frameSize); YinOutput process(const double *in) const; YinOutput processProbabilisticYin(const double *in) const; @@ -63,6 +64,7 @@ mutable double m_thresh; mutable size_t m_threshDistr; mutable size_t m_yinBufferSize; + mutable bool m_fast; // mutable bool m_removeUnvoiced; }; diff -r effea38d5a6d -r 9d6595563c11 YinUtil.cpp --- a/YinUtil.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/YinUtil.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -22,6 +22,24 @@ #include void +YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) +{ + yinBuffer[0] = 0; + double delta ; + int startPoint = 0; + int endPoint = 0; + for (int i = 1; i < yinBufferSize; ++i) { + yinBuffer[i] = 0; + startPoint = yinBufferSize/2 - i/2; + endPoint = startPoint + yinBufferSize; + for (int j = startPoint; j < endPoint; ++j) { + delta = in[i+j] - in[j]; + yinBuffer[i] += delta * delta; + } + } +} + +void YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) { @@ -31,11 +49,6 @@ size_t frameSize = 2 * yinBufferSize; - for (size_t j = 0; j < yinBufferSize; ++j) - { - yinBuffer[j] = 0.; - } - double *audioTransformedReal = new double[frameSize]; double *audioTransformedImag = new double[frameSize]; double *nullImag = new double[frameSize]; @@ -48,7 +61,8 @@ for (size_t j = 0; j < yinBufferSize; ++j) { - powerTerms[j] = 0.; + yinBuffer[j] = 0.; // set to zero + powerTerms[j] = 0.; // set to zero } for (size_t j = 0; j < frameSize; ++j) @@ -82,7 +96,6 @@ // 2. half of the data, disguised as a convolution kernel for (size_t j = 0; j < yinBufferSize; ++j) { kernel[j] = in[yinBufferSize-1-j]; - kernel[j+yinBufferSize] = 0; } Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag); @@ -164,25 +177,30 @@ return 0; } +static float uniformDist[100] = {0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000}; +static float betaDist1[100] = {0.028911,0.048656,0.061306,0.068539,0.071703,0.071877,0.069915,0.066489,0.062117,0.057199,0.052034,0.046844,0.041786,0.036971,0.032470,0.028323,0.024549,0.021153,0.018124,0.015446,0.013096,0.011048,0.009275,0.007750,0.006445,0.005336,0.004397,0.003606,0.002945,0.002394,0.001937,0.001560,0.001250,0.000998,0.000792,0.000626,0.000492,0.000385,0.000300,0.000232,0.000179,0.000137,0.000104,0.000079,0.000060,0.000045,0.000033,0.000024,0.000018,0.000013,0.000009,0.000007,0.000005,0.000003,0.000002,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float betaDist2[100] = {0.012614,0.022715,0.030646,0.036712,0.041184,0.044301,0.046277,0.047298,0.047528,0.047110,0.046171,0.044817,0.043144,0.041231,0.039147,0.036950,0.034690,0.032406,0.030133,0.027898,0.025722,0.023624,0.021614,0.019704,0.017900,0.016205,0.014621,0.013148,0.011785,0.010530,0.009377,0.008324,0.007366,0.006497,0.005712,0.005005,0.004372,0.003806,0.003302,0.002855,0.002460,0.002112,0.001806,0.001539,0.001307,0.001105,0.000931,0.000781,0.000652,0.000542,0.000449,0.000370,0.000303,0.000247,0.000201,0.000162,0.000130,0.000104,0.000082,0.000065,0.000051,0.000039,0.000030,0.000023,0.000018,0.000013,0.000010,0.000007,0.000005,0.000004,0.000003,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float betaDist3[100] = {0.006715,0.012509,0.017463,0.021655,0.025155,0.028031,0.030344,0.032151,0.033506,0.034458,0.035052,0.035331,0.035332,0.035092,0.034643,0.034015,0.033234,0.032327,0.031314,0.030217,0.029054,0.027841,0.026592,0.025322,0.024042,0.022761,0.021489,0.020234,0.019002,0.017799,0.016630,0.015499,0.014409,0.013362,0.012361,0.011407,0.010500,0.009641,0.008830,0.008067,0.007351,0.006681,0.006056,0.005475,0.004936,0.004437,0.003978,0.003555,0.003168,0.002814,0.002492,0.002199,0.001934,0.001695,0.001481,0.001288,0.001116,0.000963,0.000828,0.000708,0.000603,0.000511,0.000431,0.000361,0.000301,0.000250,0.000206,0.000168,0.000137,0.000110,0.000088,0.000070,0.000055,0.000043,0.000033,0.000025,0.000019,0.000014,0.000010,0.000007,0.000005,0.000004,0.000002,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float betaDist4[100] = {0.003996,0.007596,0.010824,0.013703,0.016255,0.018501,0.020460,0.022153,0.023597,0.024809,0.025807,0.026607,0.027223,0.027671,0.027963,0.028114,0.028135,0.028038,0.027834,0.027535,0.027149,0.026687,0.026157,0.025567,0.024926,0.024240,0.023517,0.022763,0.021983,0.021184,0.020371,0.019548,0.018719,0.017890,0.017062,0.016241,0.015428,0.014627,0.013839,0.013068,0.012315,0.011582,0.010870,0.010181,0.009515,0.008874,0.008258,0.007668,0.007103,0.006565,0.006053,0.005567,0.005107,0.004673,0.004264,0.003880,0.003521,0.003185,0.002872,0.002581,0.002312,0.002064,0.001835,0.001626,0.001434,0.001260,0.001102,0.000959,0.000830,0.000715,0.000612,0.000521,0.000440,0.000369,0.000308,0.000254,0.000208,0.000169,0.000136,0.000108,0.000084,0.000065,0.000050,0.000037,0.000027,0.000019,0.000014,0.000009,0.000006,0.000004,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; +static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; +static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; std::vector -YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize) +YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0) { + size_t minTau = 2; + size_t maxTau = yinBufferSize; + + // adapt period range, if necessary + if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0; + if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0; + double minWeight = 0.01; size_t tau; std::vector thresholds; std::vector distribution; std::vector peakProb = std::vector(yinBufferSize); - // TODO: make the distributions below part of a class, so they don't have to - // be allocated every time. - float uniformDist[100] = {0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000}; - float betaDist1[100] = {0.028911,0.048656,0.061306,0.068539,0.071703,0.071877,0.069915,0.066489,0.062117,0.057199,0.052034,0.046844,0.041786,0.036971,0.032470,0.028323,0.024549,0.021153,0.018124,0.015446,0.013096,0.011048,0.009275,0.007750,0.006445,0.005336,0.004397,0.003606,0.002945,0.002394,0.001937,0.001560,0.001250,0.000998,0.000792,0.000626,0.000492,0.000385,0.000300,0.000232,0.000179,0.000137,0.000104,0.000079,0.000060,0.000045,0.000033,0.000024,0.000018,0.000013,0.000009,0.000007,0.000005,0.000003,0.000002,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; - float betaDist2[100] = {0.012614,0.022715,0.030646,0.036712,0.041184,0.044301,0.046277,0.047298,0.047528,0.047110,0.046171,0.044817,0.043144,0.041231,0.039147,0.036950,0.034690,0.032406,0.030133,0.027898,0.025722,0.023624,0.021614,0.019704,0.017900,0.016205,0.014621,0.013148,0.011785,0.010530,0.009377,0.008324,0.007366,0.006497,0.005712,0.005005,0.004372,0.003806,0.003302,0.002855,0.002460,0.002112,0.001806,0.001539,0.001307,0.001105,0.000931,0.000781,0.000652,0.000542,0.000449,0.000370,0.000303,0.000247,0.000201,0.000162,0.000130,0.000104,0.000082,0.000065,0.000051,0.000039,0.000030,0.000023,0.000018,0.000013,0.000010,0.000007,0.000005,0.000004,0.000003,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; - float betaDist3[100] = {0.006715,0.012509,0.017463,0.021655,0.025155,0.028031,0.030344,0.032151,0.033506,0.034458,0.035052,0.035331,0.035332,0.035092,0.034643,0.034015,0.033234,0.032327,0.031314,0.030217,0.029054,0.027841,0.026592,0.025322,0.024042,0.022761,0.021489,0.020234,0.019002,0.017799,0.016630,0.015499,0.014409,0.013362,0.012361,0.011407,0.010500,0.009641,0.008830,0.008067,0.007351,0.006681,0.006056,0.005475,0.004936,0.004437,0.003978,0.003555,0.003168,0.002814,0.002492,0.002199,0.001934,0.001695,0.001481,0.001288,0.001116,0.000963,0.000828,0.000708,0.000603,0.000511,0.000431,0.000361,0.000301,0.000250,0.000206,0.000168,0.000137,0.000110,0.000088,0.000070,0.000055,0.000043,0.000033,0.000025,0.000019,0.000014,0.000010,0.000007,0.000005,0.000004,0.000002,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; - float betaDist4[100] = {0.003996,0.007596,0.010824,0.013703,0.016255,0.018501,0.020460,0.022153,0.023597,0.024809,0.025807,0.026607,0.027223,0.027671,0.027963,0.028114,0.028135,0.028038,0.027834,0.027535,0.027149,0.026687,0.026157,0.025567,0.024926,0.024240,0.023517,0.022763,0.021983,0.021184,0.020371,0.019548,0.018719,0.017890,0.017062,0.016241,0.015428,0.014627,0.013839,0.013068,0.012315,0.011582,0.010870,0.010181,0.009515,0.008874,0.008258,0.007668,0.007103,0.006565,0.006053,0.005567,0.005107,0.004673,0.004264,0.003880,0.003521,0.003185,0.002872,0.002581,0.002312,0.002064,0.001835,0.001626,0.001434,0.001260,0.001102,0.000959,0.000830,0.000715,0.000612,0.000521,0.000440,0.000369,0.000308,0.000254,0.000208,0.000169,0.000136,0.000108,0.000084,0.000065,0.000050,0.000037,0.000027,0.000019,0.000014,0.000009,0.000006,0.000004,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; - float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; - float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; - float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; size_t nThreshold = 100; int nThresholdInt = nThreshold; @@ -220,28 +238,46 @@ thresholds.push_back(0.01 + i*0.01); } - // double minYin = 2936; - // for (size_t i = 2; i < yinBufferSize; ++i) - // { - // if (yinBuffer[i] < minYin) - // { - // minYin = yinBuffer[i]; - // } - // } - // if (minYin < 0.01) std::cerr << "min Yin buffer element: " << minYin << std::endl; - int currThreshInd = nThreshold-1; - tau = 2; + tau = minTau; // double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight size_t minInd = 0; float minVal = 42.f; - while (currThreshInd != -1 && tau < yinBufferSize) + // while (currThreshInd != -1 && tau < maxTau) + // { + // if (yinBuffer[tau] < thresholds[currThreshInd]) + // { + // while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) + // { + // tau++; + // } + // // tau is now local minimum + // // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + // if (yinBuffer[tau] < minVal && tau > 2){ + // minVal = yinBuffer[tau]; + // minInd = tau; + // } + // peakProb[tau] += distribution[currThreshInd]; + // currThreshInd--; + // } else { + // tau++; + // } + // } + // double nonPeakProb = 1; + // for (size_t i = minTau; i < maxTau; ++i) + // { + // nonPeakProb -= peakProb[i]; + // } + // + // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + float sumProb = 0; + while (tau+1 < maxTau) { - if (yinBuffer[tau] < thresholds[currThreshInd]) + if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau]) { - while (tau + 1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau]) + while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) { tau++; } @@ -251,18 +287,33 @@ minVal = yinBuffer[tau]; minInd = tau; } - peakProb[tau] += distribution[currThreshInd]; - currThreshInd--; + currThreshInd = nThresholdInt-1; + while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) { + // std::cerr << distribution[currThreshInd] << std::endl; + peakProb[tau] += distribution[currThreshInd]; + currThreshInd--; + } + // peakProb[tau] = 1 - yinBuffer[tau]; + sumProb += peakProb[tau]; + tau++; } else { tau++; } } + + if (peakProb[minInd] > 1) { + std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl; + return(std::vector(yinBufferSize)); + } + double nonPeakProb = 1; - for (size_t i = 0; i < yinBufferSize; ++i) - { - nonPeakProb -= peakProb[i]; + if (sumProb > 0) { + for (size_t i = minTau; i < maxTau; ++i) + { + peakProb[i] = peakProb[i] / sumProb * peakProb[minInd]; + nonPeakProb -= peakProb[i]; + } } - // std::cerr << nonPeakProb << std::endl; if (minInd > 0) { // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl; @@ -282,55 +333,20 @@ } double betterTau = 0.0; - size_t x0; - size_t x2; - - if (tau < 1) - { - x0 = tau; + if (tau > 0 && tau < yinBufferSize-1) { + float s0, s1, s2; + s0 = yinBuffer[tau-1]; + s1 = yinBuffer[tau]; + s2 = yinBuffer[tau+1]; + + double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0)); + + if (abs(adjustment)>1) adjustment = 0; + + betterTau = tau + adjustment; } else { - x0 = tau - 1; - } - - if (tau + 1 < yinBufferSize) - { - x2 = tau + 1; - } else { - x2 = tau; - } - - if (x0 == tau) - { - if (yinBuffer[tau] <= yinBuffer[x2]) - { - betterTau = tau; - } else { - betterTau = x2; - } - } - else if (x2 == tau) - { - if (yinBuffer[tau] <= yinBuffer[x0]) - { - betterTau = tau; - } - else - { - betterTau = x0; - } - } - else - { - float s0, s1, s2; - s0 = yinBuffer[x0]; - s1 = yinBuffer[tau]; - s2 = yinBuffer[x2]; - // fixed AUBIO implementation, thanks to Karl Helgason: - // (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1 - betterTau = tau + (s2 - s0) / (2 * (2 * s1 - s2 - s0)); - - // std::cerr << tau << " --> " << betterTau << std::endl; - + // std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n"; + betterTau = tau; } return betterTau; } diff -r effea38d5a6d -r 9d6595563c11 YinUtil.h --- a/YinUtil.h Mon Jan 26 17:47:29 2015 +0000 +++ b/YinUtil.h Tue Mar 31 15:10:07 2015 +0100 @@ -31,9 +31,10 @@ static double sumSquare(const double *in, const size_t startInd, const size_t endInd); static void difference(const double *in, double *yinBuffer, const size_t yinBufferSize); static void fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize); + static void slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize); static void cumulativeDifference(double *yinBuffer, const size_t yinBufferSize); static int absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh); - static vector yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize); + static vector yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, size_t minTau = 0, size_t maxTau = 0); static double parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize); }; diff -r effea38d5a6d -r 9d6595563c11 YinVamp.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YinVamp.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,367 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "YinVamp.h" +#include "MonoNote.h" + +#include "vamp-sdk/FFT.h" + +#include +#include + +#include +#include +#include + +using std::string; +using std::vector; +using Vamp::RealTime; + + +YinVamp::YinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(1600), + m_yin(2048, inputSampleRate, 0.0), + m_outNoF0(0), + m_outNoPeriodicity(0), + m_outNoRms(0), + m_outNoSalience(0), + m_yinParameter(0.15f), + m_outputUnvoiced(2.0f) +{ +} + +YinVamp::~YinVamp() +{ +} + +string +YinVamp::getIdentifier() const +{ + return "yin"; +} + +string +YinVamp::getName() const +{ + return "Yin"; +} + +string +YinVamp::getDescription() const +{ + return "A vamp implementation of the Yin algorithm for monophonic frequency estimation."; +} + +string +YinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +YinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +YinVamp::getCopyright() const +{ + return "GPL"; +} + +YinVamp::InputDomain +YinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +YinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +YinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +YinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +YinVamp::getMaxChannelCount() const +{ + return 1; +} + +YinVamp::ParameterList +YinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + d.identifier = "yinThreshold"; + d.name = "Yin threshold"; + d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached."; + d.unit = ""; + d.minValue = 0.025f; + d.maxValue = 1.0f; + d.defaultValue = 0.15f; + d.isQuantized = true; + d.quantizeStep = 0.025f; + + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +YinVamp::getParameter(string identifier) const +{ + if (identifier == "yinThreshold") { + return m_yinParameter; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +YinVamp::setParameter(string identifier, float value) +{ + if (identifier == "yinThreshold") + { + m_yinParameter = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } +} + +YinVamp::ProgramList +YinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +YinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +YinVamp::selectProgram(string name) +{ +} + +YinVamp::OutputList +YinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0"; + d.name = "Estimated f0"; + d.description = "Estimated fundamental frequency"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoF0 = outputNumber++; + + d.identifier = "periodicity"; + d.name = "Periodicity"; + d.description = "by-product of Yin f0 estimation"; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoPeriodicity = outputNumber++; + + d.identifier = "rms"; + d.name = "Root mean square"; + d.description = "Root mean square of the waveform."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoRms = outputNumber++; + + d.identifier = "salience"; + d.name = "Salience"; + d.description = "Yin Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoSalience = outputNumber++; + + return outputs; +} + +bool +YinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "YinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +YinVamp::reset() +{ + m_yin.setThreshold(m_yinParameter); + m_yin.setFrameSize(m_blockSize); +/* + std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter) + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +YinVamp::FeatureSet +YinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate)); + FeatureSet fs; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + Yin::YinOutput yo = m_yin.process(dInputBuffers); + // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + if (m_outputUnvoiced == 0.0f) + { + // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; + if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) { + f.values.push_back(yo.f0); + fs[m_outNoF0].push_back(f); + } + } else if (m_outputUnvoiced == 1.0f) + { + if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { + f.values.push_back(fabs(yo.f0)); + fs[m_outNoF0].push_back(f); + } + } else + { + if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { + f.values.push_back(yo.f0); + fs[m_outNoF0].push_back(f); + } + } + + f.values.clear(); + f.values.push_back(yo.rms); + fs[m_outNoRms].push_back(f); + + f.values.clear(); + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + } + fs[m_outNoSalience].push_back(f); + + f.values.clear(); + // f.values[0] = yo.periodicity; + f.values.push_back(yo.periodicity); + fs[m_outNoPeriodicity].push_back(f); + + delete [] dInputBuffers; + + return fs; +} + +YinVamp::FeatureSet +YinVamp::getRemainingFeatures() +{ + FeatureSet fs; + return fs; +} diff -r effea38d5a6d -r 9d6595563c11 YinVamp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YinVamp.h Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _YINVAMP_H_ +#define _YINVAMP_H_ + +#include + +#include "Yin.h" + +class YinVamp : public Vamp::Plugin +{ +public: + YinVamp(float inputSampleRate); + virtual ~YinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_outNoF0; + mutable int m_outNoPeriodicity; + mutable int m_outNoRms; + mutable int m_outNoSalience; + + float m_yinParameter; + float m_outputUnvoiced; +}; + +#endif diff -r effea38d5a6d -r 9d6595563c11 libmain.cpp --- a/libmain.cpp Mon Jan 26 17:47:29 2015 +0000 +++ b/libmain.cpp Tue Mar 31 15:10:07 2015 +0100 @@ -14,11 +14,13 @@ #include #include -#include "PYIN.h" -#include "VampYin.h" +#include "PYinVamp.h" +#include "YinVamp.h" +#include "LocalCandidatePYIN.h" -static Vamp::PluginAdapter pyinPluginAdapter; -static Vamp::PluginAdapter vampyinPluginAdapter; +static Vamp::PluginAdapter pyinvampPluginAdapter; +static Vamp::PluginAdapter yinvampPluginAdapter; +static Vamp::PluginAdapter localCandidatePYINPluginAdapter; const VampPluginDescriptor * vampGetPluginDescriptor(unsigned int version, unsigned int index) @@ -26,8 +28,9 @@ if (version < 1) return 0; switch (index) { - case 0: return pyinPluginAdapter.getDescriptor(); - case 1: return vampyinPluginAdapter.getDescriptor(); + case 0: return pyinvampPluginAdapter.getDescriptor(); + case 1: return yinvampPluginAdapter.getDescriptor(); + case 2: return localCandidatePYINPluginAdapter.getDescriptor(); default: return 0; } } diff -r effea38d5a6d -r 9d6595563c11 misc/pitchgenerator.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/pitchgenerator.m Tue Mar 31 15:10:07 2015 +0100 @@ -0,0 +1,17 @@ +f = 50:800; +fs = 44100; +t = (0:(2*fs))/fs; +nFreq = length(f); + +for iFreq = 1:nFreq + f(iFreq) + x = zeros(size(t)); + for iAdd = 1:100 + x = x + cos(2*pi*f(iFreq)*t*iAdd)*0.6^(iAdd-1); + if f(iFreq)*iAdd > fs/2 + break + end + end + x = x / max(abs(x)); + wavwrite(x, fs, sprintf('/Users/matthiasm/data/pyin/simplewavs/%iHz.wav', f(iFreq))); +end \ No newline at end of file diff -r effea38d5a6d -r 9d6595563c11 testdata/DontTellMeExcerpt.wav Binary file testdata/DontTellMeExcerpt.wav has changed diff -r effea38d5a6d -r 9d6595563c11 testdata/bob_02.wav Binary file testdata/bob_02.wav has changed diff -r effea38d5a6d -r 9d6595563c11 testdata/sine_a_440.ogg Binary file testdata/sine_a_440.ogg has changed diff -r effea38d5a6d -r 9d6595563c11 win32-build/pyin.pro --- a/win32-build/pyin.pro Mon Jan 26 17:47:29 2015 +0000 +++ b/win32-build/pyin.pro Tue Mar 31 15:10:07 2015 +0100 @@ -1,7 +1,7 @@ TEMPLATE = lib -INCLUDEPATH += ../../vamp-plugin-sdk/include ../../boost_1_54_0 -LIBS += ../../vamp-plugin-sdk/lib/libvamp-sdk.a -Wl,--version-script=../win32-build/vamp-plugin.map +INCLUDEPATH += ../../sv-dependency-builds/win32-mingw/include ../../../boost_1_55_0 +LIBS += ../../sv-dependency-builds/win32-mingw/lib/libvamp-sdk.a -Wl,--version-script=../win32-build/vamp-plugin.map CONFIG -= qt CONFIG += plugin release warn_on @@ -11,26 +11,30 @@ SOURCES += \ ../YinUtil.cpp \ ../Yin.cpp \ - ../VampYin.cpp \ ../SparseHMM.cpp \ - ../PYIN.cpp \ ../MonoPitchHMM.cpp \ ../MonoPitch.cpp \ ../MonoNoteParameters.cpp \ ../MonoNoteHMM.cpp \ ../MonoNote.cpp \ - ../libmain.cpp + ../libmain.cpp \ + ../YinVampFreqConstrained.cpp \ + ../YinVamp.cpp \ + ../PYinVamp.cpp \ + ../LocalCandidatePYIN.cpp HEADERS += \ ../YinUtil.h \ ../Yin.h \ - ../VampYin.h \ ../SparseHMM.h \ - ../PYIN.h \ ../MonoPitchHMM.h \ ../MonoPitch.h \ ../MonoNoteParameters.h \ ../MonoNoteHMM.h \ ../MonoNote.h \ - ../MeanFilter.h + ../MeanFilter.h \ + ../YinVampFreqConstrained.h \ + ../YinVamp.h \ + ../PYinVamp.h \ + ../LocalCandidatePYIN.h