Mercurial > hg > pyin
changeset 32:cd63e578a0a2 tony
it's only now that I've actually added the new plugin
author | matthiasm |
---|---|
date | Fri, 24 Jan 2014 12:18:11 +0000 |
parents | c0763eed48f0 |
children | b8cc6a9720a0 |
files | LocalCandidatePYIN.cpp LocalCandidatePYIN.h |
diffstat | 2 files changed, 458 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalCandidatePYIN.cpp Fri Jan 24 12:18:11 2014 +0000 @@ -0,0 +1,383 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#include "LocalCandidatePYIN.h" +#include "MonoPitch.h" +#include "YinUtil.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <sstream> +// #include <iostream> +#include <cmath> +#include <complex> + +using std::string; +using std::vector; +using Vamp::RealTime; + + +LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(700), + m_yin(2048, inputSampleRate, 0.0), + m_oPitchTrackCandidates(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_pitchProb(0), + m_timestamp(0), + m_nCandidate(20) +{ +} + +LocalCandidatePYIN::~LocalCandidatePYIN() +{ +} + +string +LocalCandidatePYIN::getIdentifier() const +{ + return "localcandidatepyin"; +} + +string +LocalCandidatePYIN::getName() const +{ + return "Local Candidate PYIN"; +} + +string +LocalCandidatePYIN::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +LocalCandidatePYIN::getMaker() const +{ + return "Matthias Mauch"; +} + +int +LocalCandidatePYIN::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +LocalCandidatePYIN::getCopyright() const +{ + return "GPL"; +} + +LocalCandidatePYIN::InputDomain +LocalCandidatePYIN::getInputDomain() const +{ + return TimeDomain; +} + +size_t +LocalCandidatePYIN::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +LocalCandidatePYIN::getPreferredStepSize() const +{ + return 256; +} + +size_t +LocalCandidatePYIN::getMinChannelCount() const +{ + return 1; +} + +size_t +LocalCandidatePYIN::getMaxChannelCount() const +{ + return 1; +} + +LocalCandidatePYIN::ParameterList +LocalCandidatePYIN::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +LocalCandidatePYIN::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +LocalCandidatePYIN::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + +} + +LocalCandidatePYIN::ProgramList +LocalCandidatePYIN::getPrograms() const +{ + ProgramList list; + return list; +} + +string +LocalCandidatePYIN::getCurrentProgram() const +{ + return ""; // no programs +} + +void +LocalCandidatePYIN::selectProgram(string name) +{ +} + +LocalCandidatePYIN::OutputList +LocalCandidatePYIN::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "pitchtrackcandidates"; + d.name = "Pitch track candidates"; + d.description = "Multiple candidate pitch tracks."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + // m_oPitchTrackCandidates = outputNumber++; + + return outputs; +} + +bool +LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +LocalCandidatePYIN::reset() +{ + m_yin.setThresholdDistr(m_threshDistr); + m_yin.setFrameSize(m_blockSize); + + m_pitchProb.clear(); + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + m_pitchProb.push_back(vector<vector<pair<double, double> > >()); + } + m_timestamp.clear(); +/* + std::cerr << "LocalCandidatePYIN::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate)); + FeatureSet fs; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + size_t yinBufferSize = m_blockSize/2; + double* yinBuffer = new double[yinBufferSize]; + YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize); + + delete [] dInputBuffers; + + YinUtil::cumulativeDifference(yinBuffer, yinBufferSize); + + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + float minFrequency = m_fmin * std::pow(2,(3.0*iCandidate)/12); + float maxFrequency = m_fmin * std::pow(2,(3.0*iCandidate+9)/12); + vector<double> peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, yinBufferSize, m_inputSampleRate/maxFrequency, m_inputSampleRate/minFrequency); + + vector<pair<double, double> > tempPitchProb; + for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf) + { + if (peakProbability[iBuf] > 0) + { + double currentF0 = + m_inputSampleRate * (1.0 / + YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize)); + double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69; + tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf])); + } + } + m_pitchProb[iCandidate].push_back(tempPitchProb); + } + m_timestamp.push_back(timestamp); + + return fs; +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::getRemainingFeatures() +{ + FeatureSet fs; + Feature f; + f.hasTimestamp = true; + f.hasDuration = false; + f.values.push_back(0); + + std::cerr << "in remaining features" << std::endl; + + if (m_pitchProb.empty()) { + return fs; + } + + // MONO-PITCH STUFF + MonoPitch mp; + size_t nFrame = m_timestamp.size(); + vector<vector<float> > pitchTracks; + vector<float> freqSum = vector<float>(m_nCandidate); + vector<float> freqNumber = vector<float>(m_nCandidate); + vector<float> freqMean = vector<float>(m_nCandidate); + + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + pitchTracks.push_back(vector<float>(nFrame)); + vector<float> mpOut = mp.process(m_pitchProb[iCandidate]); + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (mpOut[iFrame] > 0) { + pitchTracks[iCandidate][iFrame] = mpOut[iFrame]; + freqSum[iCandidate] += mpOut[iFrame]; + freqNumber[iCandidate]++; + } + } + freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate]; + } + + int actualCandidateNumber = 0; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { + if ((freqNumber[iCandidate] > 0.8 * nFrame) && (iCandidate == 0 || fabs(freqMean[iCandidate]/freqMean[iCandidate-1]-1)<0.01)) + { + std::ostringstream convert; + convert << actualCandidateNumber++; + // f.label = sprintf (buffer, "%i", iCandidate); + f.label = convert.str(); + std::cerr << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (pitchTracks[iCandidate][iFrame] > 0) + { + f.values[0] = pitchTracks[iCandidate][iFrame]; + f.timestamp = m_timestamp[iFrame]; + fs[m_oPitchTrackCandidates].push_back(f); + } + } + } + // std::cerr << freqNumber[iCandidate] << " " << (freqSum[iCandidate]*1.0/freqNumber[iCandidate]) << std::endl; + } + + // only retain those that are close to their means + + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalCandidatePYIN.h Fri Jan 24 12:18:11 2014 +0000 @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#ifndef _LOCALCANDIDATEPYIN_H_ +#define _LOCALCANDIDATEPYIN_H_ + +#include <vamp-sdk/Plugin.h> + +#include "Yin.h" + +class LocalCandidatePYIN : public Vamp::Plugin +{ +public: + LocalCandidatePYIN(float inputSampleRate); + virtual ~LocalCandidatePYIN(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_oPitchTrackCandidates; + + float m_threshDistr; + float m_outputUnvoiced; + vector<vector<vector<pair<double, double> > > > m_pitchProb; + vector<Vamp::RealTime> m_timestamp; + size_t m_nCandidate; +}; + +#endif