Mercurial > hg > pyin
changeset 36:0aa451c5b2d9 tony
renames
author | matthiasm |
---|---|
date | Mon, 27 Jan 2014 09:34:17 +0000 |
parents | 8e50e88417e6 |
children | 34820224da74 ce38afe240a1 |
files | Makefile.inc PYIN.cpp PYIN.h PYinVamp.cpp PYinVamp.h libmain.cpp |
diffstat | 6 files changed, 583 insertions(+), 583 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile.inc Sun Jan 26 14:24:49 2014 +0000 +++ b/Makefile.inc Mon Jan 27 09:34:17 2014 +0000 @@ -9,7 +9,7 @@ PLUGIN := pyin$(PLUGIN_EXT) -SOURCES := PYIN.cpp \ +SOURCES := PYinVamp.cpp \ YinVamp.cpp \ LocalCandidatePYIN.cpp \ Yin.cpp \ @@ -64,7 +64,7 @@ # DO NOT DELETE -PYIN.o: PYIN.h +PYinVamp.o: PYinVamp.h YinVamp.o: YinVamp.h LocalCandidatePYIN.o: LocalCandidatePYIN.h Yin.o: Yin.h @@ -74,7 +74,7 @@ MonoPitchHMM.o: MonoPitchHMM.h SparseHMM.o: SparseHMM.h MonoNoteHMM.o: MonoNoteHMM.h -libmain.o: PYIN.h YinVamp.h LocalCandidatePYIN.h +libmain.o: PYinVamp.h YinVamp.h LocalCandidatePYIN.h test/TestMeanFilter.o: MeanFilter.h test/TestYin.o: Yin.h
--- a/PYIN.cpp Sun Jan 26 14:24:49 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,498 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "PYIN.h" -#include "MonoNote.h" -#include "MonoPitch.h" - -#include "vamp-sdk/FFT.h" - -#include <vector> -#include <algorithm> - -#include <cstdio> -#include <cmath> -#include <complex> - -using std::string; -using std::vector; -using Vamp::RealTime; - - -PYIN::PYIN(float inputSampleRate) : - Plugin(inputSampleRate), - m_channels(0), - m_stepSize(256), - m_blockSize(2048), - m_fmin(40), - m_fmax(700), - m_yin(2048, inputSampleRate, 0.0), - m_oF0Candidates(0), - m_oF0Probs(0), - m_oVoicedProb(0), - m_oCandidateSalience(0), - m_oSmoothedPitchTrack(0), - m_oNotes(0), - m_threshDistr(2.0f), - m_outputUnvoiced(0.0f), - m_pitchProb(0), - m_timestamp(0) -{ -} - -PYIN::~PYIN() -{ -} - -string -PYIN::getIdentifier() const -{ - return "pyin"; -} - -string -PYIN::getName() const -{ - return "pYin"; -} - -string -PYIN::getDescription() const -{ - return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; -} - -string -PYIN::getMaker() const -{ - return "Matthias Mauch"; -} - -int -PYIN::getPluginVersion() const -{ - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -PYIN::getCopyright() const -{ - return "GPL"; -} - -PYIN::InputDomain -PYIN::getInputDomain() const -{ - return TimeDomain; -} - -size_t -PYIN::getPreferredBlockSize() const -{ - return 2048; -} - -size_t -PYIN::getPreferredStepSize() const -{ - return 256; -} - -size_t -PYIN::getMinChannelCount() const -{ - return 1; -} - -size_t -PYIN::getMaxChannelCount() const -{ - return 1; -} - -PYIN::ParameterList -PYIN::getParameterDescriptors() const -{ - ParameterList list; - - ParameterDescriptor d; - - d.identifier = "threshdistr"; - d.name = "Yin threshold distribution"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 7.0f; - d.defaultValue = 2.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("Uniform"); - d.valueNames.push_back("Beta (mean 0.10)"); - d.valueNames.push_back("Beta (mean 0.15)"); - d.valueNames.push_back("Beta (mean 0.20)"); - d.valueNames.push_back("Beta (mean 0.30)"); - d.valueNames.push_back("Single Value 0.10"); - d.valueNames.push_back("Single Value 0.15"); - d.valueNames.push_back("Single Value 0.20"); - list.push_back(d); - - d.identifier = "outputunvoiced"; - d.valueNames.clear(); - d.name = "Output estimates classified as unvoiced?"; - d.description = "."; - d.unit = ""; - d.minValue = 0.0f; - d.maxValue = 2.0f; - d.defaultValue = 0.0f; - d.isQuantized = true; - d.quantizeStep = 1.0f; - d.valueNames.push_back("No"); - d.valueNames.push_back("Yes"); - d.valueNames.push_back("Yes, as negative frequencies"); - list.push_back(d); - - return list; -} - -float -PYIN::getParameter(string identifier) const -{ - if (identifier == "threshdistr") { - return m_threshDistr; - } - if (identifier == "outputunvoiced") { - return m_outputUnvoiced; - } - return 0.f; -} - -void -PYIN::setParameter(string identifier, float value) -{ - if (identifier == "threshdistr") - { - m_threshDistr = value; - } - if (identifier == "outputunvoiced") - { - m_outputUnvoiced = value; - } - -} - -PYIN::ProgramList -PYIN::getPrograms() const -{ - ProgramList list; - return list; -} - -string -PYIN::getCurrentProgram() const -{ - return ""; // no programs -} - -void -PYIN::selectProgram(string name) -{ -} - -PYIN::OutputList -PYIN::getOutputDescriptors() const -{ - OutputList outputs; - - OutputDescriptor d; - - int outputNumber = 0; - - d.identifier = "f0candidates"; - d.name = "F0 Candidates"; - d.description = "Estimated fundamental frequency candidates."; - d.unit = "Hz"; - d.hasFixedBinCount = false; - // d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = 500; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oF0Candidates = outputNumber++; - - d.identifier = "f0probs"; - d.name = "Candidate Probabilities"; - d.description = "Probabilities of estimated fundamental frequency candidates."; - d.unit = ""; - d.hasFixedBinCount = false; - // d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oF0Probs = outputNumber++; - - d.identifier = "voicedprob"; - d.name = "Voiced Probability"; - d.description = "Probability that the signal is voiced according to Probabilistic Yin."; - d.unit = ""; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oVoicedProb = outputNumber++; - - d.identifier = "candidatesalience"; - d.name = "Candidate Salience"; - d.description = "Candidate Salience"; - d.hasFixedBinCount = true; - d.binCount = m_blockSize / 2; - d.hasKnownExtents = true; - d.minValue = 0; - d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oCandidateSalience = outputNumber++; - - d.identifier = "smoothedpitchtrack"; - d.name = "Smoothed Pitch Track"; - d.description = "."; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = false; - // d.minValue = 0; - // d.maxValue = 1; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - m_oSmoothedPitchTrack = outputNumber++; - - d.identifier = "notes"; - d.name = "Notes"; - d.description = "Derived fixed-pitch note frequencies"; - // d.unit = "MIDI unit"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = false; - d.isQuantized = false; - d.sampleType = OutputDescriptor::VariableSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = true; - outputs.push_back(d); - m_oNotes = outputNumber++; - - return outputs; -} - -bool -PYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - -/* - std::cerr << "PYIN::initialise: channels = " << channels - << ", stepSize = " << stepSize << ", blockSize = " << blockSize - << std::endl; -*/ - m_channels = channels; - m_stepSize = stepSize; - m_blockSize = blockSize; - - reset(); - - return true; -} - -void -PYIN::reset() -{ - m_yin.setThresholdDistr(m_threshDistr); - m_yin.setFrameSize(m_blockSize); - - m_pitchProb.clear(); - m_timestamp.clear(); -/* - std::cerr << "PYIN::reset" - << ", blockSize = " << m_blockSize - << std::endl; -*/ -} - -PYIN::FeatureSet -PYIN::process(const float *const *inputBuffers, RealTime timestamp) -{ - timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); - FeatureSet fs; - - double *dInputBuffers = new double[m_blockSize]; - for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; - - Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); - delete [] dInputBuffers; - - // First, get the things out of the way that we don't want to output - // immediately, but instead save for later. - vector<pair<double, double> > tempPitchProb; - for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) - { - double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; - tempPitchProb.push_back(pair<double, double> - (tempPitch, yo.freqProb[iCandidate].second)); - } - m_pitchProb.push_back(tempPitchProb); - m_timestamp.push_back(timestamp); - - // F0 CANDIDATES - Feature f; - f.hasTimestamp = true; - f.timestamp = timestamp; - for (size_t i = 0; i < yo.freqProb.size(); ++i) - { - f.values.push_back(yo.freqProb[i].first); - } - fs[m_oF0Candidates].push_back(f); - - // VOICEDPROB - f.values.clear(); - float voicedProb = 0; - for (size_t i = 0; i < yo.freqProb.size(); ++i) - { - f.values.push_back(yo.freqProb[i].second); - voicedProb += yo.freqProb[i].second; - } - fs[m_oF0Probs].push_back(f); - - f.values.push_back(voicedProb); - fs[m_oVoicedProb].push_back(f); - - // SALIENCE -- maybe this should eventually disappear - f.values.clear(); - float salienceSum = 0; - for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) - { - f.values.push_back(yo.salience[iBin]); - salienceSum += yo.salience[iBin]; - } - fs[m_oCandidateSalience].push_back(f); - - return fs; -} - -PYIN::FeatureSet -PYIN::getRemainingFeatures() -{ - FeatureSet fs; - Feature f; - f.hasTimestamp = true; - f.hasDuration = false; - - if (m_pitchProb.empty()) { - return fs; - } - - // MONO-PITCH STUFF - MonoPitch mp; - vector<float> mpOut = mp.process(m_pitchProb); - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) - { - if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; - f.timestamp = m_timestamp[iFrame]; - f.values.clear(); - if (m_outputUnvoiced == 1) - { - f.values.push_back(fabs(mpOut[iFrame])); - } else { - f.values.push_back(mpOut[iFrame]); - } - - fs[m_oSmoothedPitchTrack].push_back(f); - } - - // MONO-NOTE STUFF - MonoNote mn; - std::vector<std::vector<std::pair<double, double> > > smoothedPitch; - for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { - std::vector<std::pair<double, double> > temp; - if (mpOut[iFrame] > 0) - { - double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; - temp.push_back(std::pair<double,double>(tempPitch, .9)); - } - smoothedPitch.push_back(temp); - } - // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb); - vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); - - // turning feature into a note feature - f.hasTimestamp = true; - f.hasDuration = true; - f.values.clear(); - - int onsetFrame = 0; - bool isVoiced = 0; - bool oldIsVoiced = 0; - size_t nFrame = m_pitchProb.size(); - - std::vector<float> notePitchTrack; // collects pitches for one note at a time - for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) - { - isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; - if (isVoiced && iFrame != nFrame-1) - { - if (oldIsVoiced == 0) // beginning of a note - { - onsetFrame = iFrame; - notePitchTrack.clear(); - } - float pitch = smoothedPitch[iFrame][0].first; - notePitchTrack.push_back(pitch); // add to the note's pitch track - } else { // not currently voiced - if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note - { - std::sort(notePitchTrack.begin(), notePitchTrack.end()); - float medianPitch = notePitchTrack[notePitchTrack.size()/2]; - float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; - f.values.clear(); - f.values.push_back(medianFreq); - f.timestamp = m_timestamp[onsetFrame]; - f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; - fs[m_oNotes].push_back(f); - } - } - oldIsVoiced = isVoiced; - } - return fs; -}
--- a/PYIN.h Sun Jan 26 14:24:49 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - pYIN - A fundamental frequency estimator for monophonic audio - Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _PYIN_H_ -#define _PYIN_H_ - -#include <vamp-sdk/Plugin.h> - -#include "Yin.h" - -class PYIN : public Vamp::Plugin -{ -public: - PYIN(float inputSampleRate); - virtual ~PYIN(); - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - InputDomain getInputDomain() const; - size_t getPreferredBlockSize() const; - size_t getPreferredStepSize() const; - size_t getMinChannelCount() const; - size_t getMaxChannelCount() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string identifier) const; - void setParameter(std::string identifier, float value); - - ProgramList getPrograms() const; - std::string getCurrentProgram() const; - void selectProgram(std::string name); - - OutputList getOutputDescriptors() const; - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - size_t m_channels; - size_t m_stepSize; - size_t m_blockSize; - float m_fmin; - float m_fmax; - Yin m_yin; - - mutable int m_oF0Candidates; - mutable int m_oF0Probs; - mutable int m_oVoicedProb; - mutable int m_oCandidateSalience; - mutable int m_oSmoothedPitchTrack; - mutable int m_oNotes; - - float m_threshDistr; - float m_outputUnvoiced; - vector<vector<pair<double, double> > > m_pitchProb; - vector<Vamp::RealTime> m_timestamp; -}; - -#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PYinVamp.cpp Mon Jan 27 09:34:17 2014 +0000 @@ -0,0 +1,498 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "PYinVamp.h" +#include "MonoNote.h" +#include "MonoPitch.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <cmath> +#include <complex> + +using std::string; +using std::vector; +using Vamp::RealTime; + + +PYinVamp::PYinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(700), + m_yin(2048, inputSampleRate, 0.0), + m_oF0Candidates(0), + m_oF0Probs(0), + m_oVoicedProb(0), + m_oCandidateSalience(0), + m_oSmoothedPitchTrack(0), + m_oNotes(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_pitchProb(0), + m_timestamp(0) +{ +} + +PYinVamp::~PYinVamp() +{ +} + +string +PYinVamp::getIdentifier() const +{ + return "pyin"; +} + +string +PYinVamp::getName() const +{ + return "pYin"; +} + +string +PYinVamp::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +PYinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +PYinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +PYinVamp::getCopyright() const +{ + return "GPL"; +} + +PYinVamp::InputDomain +PYinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +PYinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +PYinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +PYinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +PYinVamp::getMaxChannelCount() const +{ + return 1; +} + +PYinVamp::ParameterList +PYinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +PYinVamp::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +PYinVamp::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + +} + +PYinVamp::ProgramList +PYinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +PYinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +PYinVamp::selectProgram(string name) +{ +} + +PYinVamp::OutputList +PYinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0candidates"; + d.name = "F0 Candidates"; + d.description = "Estimated fundamental frequency candidates."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Candidates = outputNumber++; + + d.identifier = "f0probs"; + d.name = "Candidate Probabilities"; + d.description = "Probabilities of estimated fundamental frequency candidates."; + d.unit = ""; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Probs = outputNumber++; + + d.identifier = "voicedprob"; + d.name = "Voiced Probability"; + d.description = "Probability that the signal is voiced according to Probabilistic Yin."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oVoicedProb = outputNumber++; + + d.identifier = "candidatesalience"; + d.name = "Candidate Salience"; + d.description = "Candidate Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oCandidateSalience = outputNumber++; + + d.identifier = "smoothedpitchtrack"; + d.name = "Smoothed Pitch Track"; + d.description = "."; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + // d.minValue = 0; + // d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oSmoothedPitchTrack = outputNumber++; + + d.identifier = "notes"; + d.name = "Notes"; + d.description = "Derived fixed-pitch note frequencies"; + // d.unit = "MIDI unit"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = true; + outputs.push_back(d); + m_oNotes = outputNumber++; + + return outputs; +} + +bool +PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "PYinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +PYinVamp::reset() +{ + m_yin.setThresholdDistr(m_threshDistr); + m_yin.setFrameSize(m_blockSize); + + m_pitchProb.clear(); + m_timestamp.clear(); +/* + std::cerr << "PYinVamp::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +PYinVamp::FeatureSet +PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); + FeatureSet fs; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); + delete [] dInputBuffers; + + // First, get the things out of the way that we don't want to output + // immediately, but instead save for later. + vector<pair<double, double> > tempPitchProb; + for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) + { + double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; + tempPitchProb.push_back(pair<double, double> + (tempPitch, yo.freqProb[iCandidate].second)); + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + // F0 CANDIDATES + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].first); + } + fs[m_oF0Candidates].push_back(f); + + // VOICEDPROB + f.values.clear(); + float voicedProb = 0; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].second); + voicedProb += yo.freqProb[i].second; + } + fs[m_oF0Probs].push_back(f); + + f.values.push_back(voicedProb); + fs[m_oVoicedProb].push_back(f); + + // SALIENCE -- maybe this should eventually disappear + f.values.clear(); + float salienceSum = 0; + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + salienceSum += yo.salience[iBin]; + } + fs[m_oCandidateSalience].push_back(f); + + return fs; +} + +PYinVamp::FeatureSet +PYinVamp::getRemainingFeatures() +{ + FeatureSet fs; + Feature f; + f.hasTimestamp = true; + f.hasDuration = false; + + if (m_pitchProb.empty()) { + return fs; + } + + // MONO-PITCH STUFF + MonoPitch mp; + vector<float> mpOut = mp.process(m_pitchProb); + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) + { + if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; + f.timestamp = m_timestamp[iFrame]; + f.values.clear(); + if (m_outputUnvoiced == 1) + { + f.values.push_back(fabs(mpOut[iFrame])); + } else { + f.values.push_back(mpOut[iFrame]); + } + + fs[m_oSmoothedPitchTrack].push_back(f); + } + + // MONO-NOTE STUFF + MonoNote mn; + std::vector<std::vector<std::pair<double, double> > > smoothedPitch; + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { + std::vector<std::pair<double, double> > temp; + if (mpOut[iFrame] > 0) + { + double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; + temp.push_back(std::pair<double,double>(tempPitch, .9)); + } + smoothedPitch.push_back(temp); + } + // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb); + vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); + + // turning feature into a note feature + f.hasTimestamp = true; + f.hasDuration = true; + f.values.clear(); + + int onsetFrame = 0; + bool isVoiced = 0; + bool oldIsVoiced = 0; + size_t nFrame = m_pitchProb.size(); + + std::vector<float> notePitchTrack; // collects pitches for one note at a time + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0; + if (isVoiced && iFrame != nFrame-1) + { + if (oldIsVoiced == 0) // beginning of a note + { + onsetFrame = iFrame; + notePitchTrack.clear(); + } + float pitch = smoothedPitch[iFrame][0].first; + notePitchTrack.push_back(pitch); // add to the note's pitch track + } else { // not currently voiced + if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note + { + std::sort(notePitchTrack.begin(), notePitchTrack.end()); + float medianPitch = notePitchTrack[notePitchTrack.size()/2]; + float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; + f.values.clear(); + f.values.push_back(medianFreq); + f.timestamp = m_timestamp[onsetFrame]; + f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; + fs[m_oNotes].push_back(f); + } + } + oldIsVoiced = isVoiced; + } + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PYinVamp.h Mon Jan 27 09:34:17 2014 +0000 @@ -0,0 +1,79 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _PYINVAMP_H_ +#define _PYINVAMP_H_ + +#include <vamp-sdk/Plugin.h> + +#include "Yin.h" + +class PYinVamp : public Vamp::Plugin +{ +public: + PYinVamp(float inputSampleRate); + virtual ~PYinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_oF0Candidates; + mutable int m_oF0Probs; + mutable int m_oVoicedProb; + mutable int m_oCandidateSalience; + mutable int m_oSmoothedPitchTrack; + mutable int m_oNotes; + + float m_threshDistr; + float m_outputUnvoiced; + vector<vector<pair<double, double> > > m_pitchProb; + vector<Vamp::RealTime> m_timestamp; +}; + +#endif
--- a/libmain.cpp Sun Jan 26 14:24:49 2014 +0000 +++ b/libmain.cpp Mon Jan 27 09:34:17 2014 +0000 @@ -14,11 +14,11 @@ #include <vamp/vamp.h> #include <vamp-sdk/PluginAdapter.h> -#include "PYIN.h" +#include "PYinVamp.h" #include "YinVamp.h" #include "LocalCandidatePYIN.h" -static Vamp::PluginAdapter<PYIN> pyinPluginAdapter; +static Vamp::PluginAdapter<PYinVamp> pyinvampPluginAdapter; static Vamp::PluginAdapter<YinVamp> yinvampPluginAdapter; static Vamp::PluginAdapter<LocalCandidatePYIN> localCandidatePYINPluginAdapter; @@ -28,7 +28,7 @@ if (version < 1) return 0; switch (index) { - case 0: return pyinPluginAdapter.getDescriptor(); + case 0: return pyinvampPluginAdapter.getDescriptor(); case 1: return yinvampPluginAdapter.getDescriptor(); case 2: return localCandidatePYINPluginAdapter.getDescriptor(); default: return 0;