Mercurial > hg > cepstral-pitchtracker
changeset 31:2c175adf8736
Pull out pitch tracker from vamp-simple-cepstrum to its own project
author | Chris Cannam |
---|---|
date | Thu, 19 Jul 2012 13:13:23 +0100 |
parents | 2554aab152a5 |
children | c88a9972975b |
files | CepstralPitchTracker.cpp CepstralPitchTracker.h CepstrumPitchTracker.cpp CepstrumPitchTracker.h Makefile.inc libmain.cpp |
diffstat | 6 files changed, 806 insertions(+), 785 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CepstralPitchTracker.cpp Thu Jul 19 13:13:23 2012 +0100 @@ -0,0 +1,646 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ +/* + This file is Copyright (c) 2012 Chris Cannam + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include "CepstralPitchTracker.h" + +#include "vamp-sdk/FFT.h" + +#include <vector> +#include <algorithm> + +#include <cstdio> +#include <cmath> +#include <complex> + +using std::string; +using std::vector; +using Vamp::RealTime; + +CepstralPitchTracker::Hypothesis::Hypothesis() +{ + m_state = New; +} + +CepstralPitchTracker::Hypothesis::~Hypothesis() +{ +} + +bool +CepstralPitchTracker::Hypothesis::isWithinTolerance(Estimate s) const +{ + if (m_pending.empty()) { + return true; + } + + // check we are within a relatively close tolerance of the last + // candidate + Estimate last = m_pending[m_pending.size()-1]; + double r = s.freq / last.freq; + int cents = lrint(1200.0 * (log(r) / log(2.0))); + if (cents < -60 || cents > 60) return false; + + // and within a slightly bigger tolerance of the current mean + double meanFreq = getMeanFrequency(); + r = s.freq / meanFreq; + cents = lrint(1200.0 * (log(r) / log(2.0))); + if (cents < -80 || cents > 80) return false; + + return true; +} + +bool +CepstralPitchTracker::Hypothesis::isOutOfDateFor(Estimate s) const +{ + if (m_pending.empty()) return false; + + return ((s.time - m_pending[m_pending.size()-1].time) > + RealTime::fromMilliseconds(40)); +} + +bool +CepstralPitchTracker::Hypothesis::isSatisfied() const +{ + if (m_pending.empty()) return false; + + double meanConfidence = 0.0; + for (int i = 0; i < m_pending.size(); ++i) { + meanConfidence += m_pending[i].confidence; + } + meanConfidence /= m_pending.size(); + + int lengthRequired = 10000; + if (meanConfidence > 0.0) { + lengthRequired = int(2.0 / meanConfidence + 0.5); + } + + return (m_pending.size() > lengthRequired); +} + +bool +CepstralPitchTracker::Hypothesis::accept(Estimate s) +{ + bool accept = false; + + switch (m_state) { + + case New: + m_state = Provisional; + accept = true; + break; + + case Provisional: + if (isOutOfDateFor(s)) { + m_state = Rejected; + } else if (isWithinTolerance(s)) { + accept = true; + } + break; + + case Satisfied: + if (isOutOfDateFor(s)) { + m_state = Expired; + } else if (isWithinTolerance(s)) { + accept = true; + } + break; + + case Rejected: + break; + + case Expired: + break; + } + + if (accept) { + m_pending.push_back(s); + if (m_state == Provisional && isSatisfied()) { + m_state = Satisfied; + } + } + + return accept; +} + +CepstralPitchTracker::Hypothesis::State +CepstralPitchTracker::Hypothesis::getState() const +{ + return m_state; +} + +CepstralPitchTracker::Hypothesis::Estimates +CepstralPitchTracker::Hypothesis::getAcceptedEstimates() const +{ + if (m_state == Satisfied || m_state == Expired) { + return m_pending; + } else { + return Estimates(); + } +} + +double +CepstralPitchTracker::Hypothesis::getMeanFrequency() const +{ + double acc = 0.0; + for (int i = 0; i < m_pending.size(); ++i) { + acc += m_pending[i].freq; + } + acc /= m_pending.size(); + return acc; +} + +CepstralPitchTracker::Hypothesis::Note +CepstralPitchTracker::Hypothesis::getAveragedNote() const +{ + Note n; + + if (!(m_state == Satisfied || m_state == Expired)) { + n.freq = 0.0; + n.time = RealTime::zeroTime; + n.duration = RealTime::zeroTime; + return n; + } + + n.time = m_pending.begin()->time; + + Estimates::const_iterator i = m_pending.end(); + --i; + n.duration = i->time - n.time; + + // just mean frequency for now, but this isn't at all right perceptually + n.freq = getMeanFrequency(); + + return n; +} + +CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(1024), + m_fmin(50), + m_fmax(900), + m_vflen(1), + m_binFrom(0), + m_binTo(0), + m_bins(0) +{ +} + +CepstralPitchTracker::~CepstralPitchTracker() +{ +} + +string +CepstralPitchTracker::getIdentifier() const +{ + return "cepstrum-pitch"; +} + +string +CepstralPitchTracker::getName() const +{ + return "Cepstrum Pitch Tracker"; +} + +string +CepstralPitchTracker::getDescription() const +{ + return "Estimate f0 of monophonic material using a cepstrum method."; +} + +string +CepstralPitchTracker::getMaker() const +{ + return "Chris Cannam"; +} + +int +CepstralPitchTracker::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 1; +} + +string +CepstralPitchTracker::getCopyright() const +{ + return "Freely redistributable (BSD license)"; +} + +CepstralPitchTracker::InputDomain +CepstralPitchTracker::getInputDomain() const +{ + return FrequencyDomain; +} + +size_t +CepstralPitchTracker::getPreferredBlockSize() const +{ + return 1024; +} + +size_t +CepstralPitchTracker::getPreferredStepSize() const +{ + return 256; +} + +size_t +CepstralPitchTracker::getMinChannelCount() const +{ + return 1; +} + +size_t +CepstralPitchTracker::getMaxChannelCount() const +{ + return 1; +} + +CepstralPitchTracker::ParameterList +CepstralPitchTracker::getParameterDescriptors() const +{ + ParameterList list; + return list; +} + +float +CepstralPitchTracker::getParameter(string identifier) const +{ + return 0.f; +} + +void +CepstralPitchTracker::setParameter(string identifier, float value) +{ +} + +CepstralPitchTracker::ProgramList +CepstralPitchTracker::getPrograms() const +{ + ProgramList list; + return list; +} + +string +CepstralPitchTracker::getCurrentProgram() const +{ + return ""; // no programs +} + +void +CepstralPitchTracker::selectProgram(string name) +{ +} + +CepstralPitchTracker::OutputList +CepstralPitchTracker::getOutputDescriptors() const +{ + OutputList outputs; + + int n = 0; + + OutputDescriptor d; + + d.identifier = "f0"; + d.name = "Estimated f0"; + d.description = "Estimated fundamental frequency"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = m_fmax; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + + d.identifier = "notes"; + d.name = "Notes"; + d.description = "Derived fixed-pitch note frequencies"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = m_fmax; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = true; + outputs.push_back(d); + + return outputs; +} + +bool +CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +// std::cerr << "CepstralPitchTracker::initialise: channels = " << channels +// << ", stepSize = " << stepSize << ", blockSize = " << blockSize +// << std::endl; + + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + m_binFrom = int(m_inputSampleRate / m_fmax); + m_binTo = int(m_inputSampleRate / m_fmin); + + if (m_binTo >= (int)m_blockSize / 2) { + m_binTo = m_blockSize / 2 - 1; + } + + m_bins = (m_binTo - m_binFrom) + 1; + + reset(); + + return true; +} + +void +CepstralPitchTracker::reset() +{ +} + +void +CepstralPitchTracker::addFeaturesFrom(Hypothesis h, FeatureSet &fs) +{ + Hypothesis::Estimates es = h.getAcceptedEstimates(); + + for (int i = 0; i < es.size(); ++i) { + Feature f; + f.hasTimestamp = true; + f.timestamp = es[i].time; + f.values.push_back(es[i].freq); + fs[0].push_back(f); + } + + Feature nf; + nf.hasTimestamp = true; + nf.hasDuration = true; + Hypothesis::Note n = h.getAveragedNote(); + nf.timestamp = n.time; + nf.duration = n.duration; + nf.values.push_back(n.freq); + fs[1].push_back(nf); +} + +void +CepstralPitchTracker::filter(const double *cep, double *data) +{ + for (int i = 0; i < m_bins; ++i) { + double v = 0; + int n = 0; + // average according to the vertical filter length + for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { + int ix = i + m_binFrom + j; + if (ix >= 0 && ix < m_blockSize) { + v += cep[ix]; + ++n; + } + } + data[i] = v / n; + } +} + +double +CepstralPitchTracker::cubicInterpolate(const double y[4], double x) +{ + double a0 = y[3] - y[2] - y[0] + y[1]; + double a1 = y[0] - y[1] - a0; + double a2 = y[2] - y[0]; + double a3 = y[1]; + return + a0 * x * x * x + + a1 * x * x + + a2 * x + + a3; +} + +double +CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin) +{ + if (maxbin < 2 || maxbin > m_bins - 3) { + return maxbin; + } + + double maxval = 0.0; + double maxidx = maxbin; + + const int divisions = 10; + double y[4]; + + y[0] = in[maxbin-1]; + y[1] = in[maxbin]; + y[2] = in[maxbin+1]; + y[3] = in[maxbin+2]; + for (int i = 0; i < divisions; ++i) { + double probe = double(i) / double(divisions); + double value = cubicInterpolate(y, probe); + if (value > maxval) { + maxval = value; + maxidx = maxbin + probe; + } + } + + y[3] = y[2]; + y[2] = y[1]; + y[1] = y[0]; + y[0] = in[maxbin-2]; + for (int i = 0; i < divisions; ++i) { + double probe = double(i) / double(divisions); + double value = cubicInterpolate(y, probe); + if (value > maxval) { + maxval = value; + maxidx = maxbin - 1 + probe; + } + } + +/* + std::cerr << "centre = " << maxbin << ": [" + << in[maxbin-2] << "," + << in[maxbin-1] << "," + << in[maxbin] << "," + << in[maxbin+1] << "," + << in[maxbin+2] << "] -> " << maxidx << std::endl; +*/ + + return maxidx; +} + +CepstralPitchTracker::FeatureSet +CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp) +{ + FeatureSet fs; + + int bs = m_blockSize; + int hs = m_blockSize/2 + 1; + + double *rawcep = new double[bs]; + double *io = new double[bs]; + double *logmag = new double[bs]; + + // The "inverse symmetric" method. Seems to be the most reliable + + double magmean = 0.0; + + for (int i = 0; i < hs; ++i) { + + double power = + inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + + inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; + double mag = sqrt(power); + + magmean += mag; + + double lm = log(mag + 0.00000001); + + logmag[i] = lm; + if (i > 0) logmag[bs - i] = lm; + } + + magmean /= hs; + double threshold = 0.1; // for magmean + + Vamp::FFT::inverse(bs, logmag, 0, rawcep, io); + + delete[] logmag; + delete[] io; + + int n = m_bins; + double *data = new double[n]; + filter(rawcep, data); + delete[] rawcep; + + double maxval = 0.0; + int maxbin = -1; + + for (int i = 0; i < n; ++i) { + if (data[i] > maxval) { + maxval = data[i]; + maxbin = i; + } + } + + if (maxbin < 0) { + delete[] data; + return fs; + } + + double nextPeakVal = 0.0; + for (int i = 1; i+1 < n; ++i) { + if (data[i] > data[i-1] && + data[i] > data[i+1] && + i != maxbin && + data[i] > nextPeakVal) { + nextPeakVal = data[i]; + } + } + + double cimax = findInterpolatedPeak(data, maxbin); + double peakfreq = m_inputSampleRate / (cimax + m_binFrom); + + double confidence = 0.0; + if (nextPeakVal != 0.0) { + confidence = (maxval - nextPeakVal) * 10.0; + if (magmean < threshold) confidence = 0.0; + std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl; + } + + Hypothesis::Estimate e; + e.freq = peakfreq; + e.time = timestamp; + e.confidence = confidence; + +// m_good.advanceTime(); + for (int i = 0; i < m_possible.size(); ++i) { +// m_possible[i].advanceTime(); + } + + if (!m_good.accept(e)) { + + int candidate = -1; + bool accepted = false; + + for (int i = 0; i < m_possible.size(); ++i) { + if (m_possible[i].accept(e)) { + if (m_possible[i].getState() == Hypothesis::Satisfied) { + accepted = true; + candidate = i; + } + break; + } + } + + if (!accepted) { + Hypothesis h; + h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this + m_possible.push_back(h); + } + + if (m_good.getState() == Hypothesis::Expired) { + addFeaturesFrom(m_good, fs); + } + + if (m_good.getState() == Hypothesis::Expired || + m_good.getState() == Hypothesis::Rejected) { + if (candidate >= 0) { + m_good = m_possible[candidate]; + } else { + m_good = Hypothesis(); + } + } + + // reap rejected/expired hypotheses from possible list + Hypotheses toReap = m_possible; + m_possible.clear(); + for (int i = 0; i < toReap.size(); ++i) { + Hypothesis h = toReap[i]; + if (h.getState() != Hypothesis::Rejected && + h.getState() != Hypothesis::Expired) { + m_possible.push_back(h); + } + } + } + + delete[] data; + return fs; +} + +CepstralPitchTracker::FeatureSet +CepstralPitchTracker::getRemainingFeatures() +{ + FeatureSet fs; + if (m_good.getState() == Hypothesis::Satisfied) { + addFeaturesFrom(m_good, fs); + } + return fs; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CepstralPitchTracker.h Thu Jul 19 13:13:23 2012 +0100 @@ -0,0 +1,133 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ +/* + This file is Copyright (c) 2012 Chris Cannam + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef _CEPSTRUM_PITCH_H_ +#define _CEPSTRUM_PITCH_H_ + +#include <vamp-sdk/Plugin.h> + +class CepstralPitchTracker : public Vamp::Plugin +{ +public: + CepstralPitchTracker(float inputSampleRate); + virtual ~CepstralPitchTracker(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + int m_vflen; + + int m_binFrom; + int m_binTo; + int m_bins; // count of "interesting" bins, those returned in m_cepOutput + + class Hypothesis { + + public: + struct Estimate { + double freq; + Vamp::RealTime time; + double confidence; + }; + typedef std::vector<Estimate> Estimates; + + struct Note { + double freq; + Vamp::RealTime time; + Vamp::RealTime duration; + }; + + Hypothesis(); + ~Hypothesis(); + + enum State { + New, + Provisional, + Satisfied, + Rejected, + Expired + }; + + bool accept(Estimate); + + State getState() const; + Estimates getAcceptedEstimates() const; + Note getAveragedNote() const; + + private: + bool isWithinTolerance(Estimate) const; + bool isOutOfDateFor(Estimate) const; + bool isSatisfied() const; + double getMeanFrequency() const; + + State m_state; + Estimates m_pending; + }; + + typedef std::vector<Hypothesis> Hypotheses; + Hypotheses m_possible; + Hypothesis m_good; + + void addFeaturesFrom(Hypothesis h, FeatureSet &fs); + + void filter(const double *in, double *out); + double cubicInterpolate(const double[4], double); + double findInterpolatedPeak(const double *in, int maxbin); +}; + +#endif
--- a/CepstrumPitchTracker.cpp Fri Jul 13 21:35:29 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,644 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ -/* - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation - files (the "Software"), to deal in the Software without - restriction, including without limitation the rights to use, copy, - modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR - ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "CepstrumPitchTracker.h" - -#include "vamp-sdk/FFT.h" - -#include <vector> -#include <algorithm> - -#include <cstdio> -#include <cmath> -#include <complex> - -using std::string; -using std::vector; -using Vamp::RealTime; - -CepstrumPitchTracker::Hypothesis::Hypothesis() -{ - m_state = New; -} - -CepstrumPitchTracker::Hypothesis::~Hypothesis() -{ -} - -bool -CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s) const -{ - if (m_pending.empty()) { - return true; - } - - // check we are within a relatively close tolerance of the last - // candidate - Estimate last = m_pending[m_pending.size()-1]; - double r = s.freq / last.freq; - int cents = lrint(1200.0 * (log(r) / log(2.0))); - if (cents < -60 || cents > 60) return false; - - // and within a slightly bigger tolerance of the current mean - double meanFreq = getMeanFrequency(); - r = s.freq / meanFreq; - cents = lrint(1200.0 * (log(r) / log(2.0))); - if (cents < -80 || cents > 80) return false; - - return true; -} - -bool -CepstrumPitchTracker::Hypothesis::isOutOfDateFor(Estimate s) const -{ - if (m_pending.empty()) return false; - - return ((s.time - m_pending[m_pending.size()-1].time) > - RealTime::fromMilliseconds(40)); -} - -bool -CepstrumPitchTracker::Hypothesis::isSatisfied() const -{ - if (m_pending.empty()) return false; - - double meanConfidence = 0.0; - for (int i = 0; i < m_pending.size(); ++i) { - meanConfidence += m_pending[i].confidence; - } - meanConfidence /= m_pending.size(); - - int lengthRequired = 10000; - if (meanConfidence > 0.0) { - lengthRequired = int(2.0 / meanConfidence + 0.5); - } - - return (m_pending.size() > lengthRequired); -} - -bool -CepstrumPitchTracker::Hypothesis::accept(Estimate s) -{ - bool accept = false; - - switch (m_state) { - - case New: - m_state = Provisional; - accept = true; - break; - - case Provisional: - if (isOutOfDateFor(s)) { - m_state = Rejected; - } else if (isWithinTolerance(s)) { - accept = true; - } - break; - - case Satisfied: - if (isOutOfDateFor(s)) { - m_state = Expired; - } else if (isWithinTolerance(s)) { - accept = true; - } - break; - - case Rejected: - break; - - case Expired: - break; - } - - if (accept) { - m_pending.push_back(s); - if (m_state == Provisional && isSatisfied()) { - m_state = Satisfied; - } - } - - return accept; -} - -CepstrumPitchTracker::Hypothesis::State -CepstrumPitchTracker::Hypothesis::getState() const -{ - return m_state; -} - -CepstrumPitchTracker::Hypothesis::Estimates -CepstrumPitchTracker::Hypothesis::getAcceptedEstimates() const -{ - if (m_state == Satisfied || m_state == Expired) { - return m_pending; - } else { - return Estimates(); - } -} - -double -CepstrumPitchTracker::Hypothesis::getMeanFrequency() const -{ - double acc = 0.0; - for (int i = 0; i < m_pending.size(); ++i) { - acc += m_pending[i].freq; - } - acc /= m_pending.size(); - return acc; -} - -CepstrumPitchTracker::Hypothesis::Note -CepstrumPitchTracker::Hypothesis::getAveragedNote() const -{ - Note n; - - if (!(m_state == Satisfied || m_state == Expired)) { - n.freq = 0.0; - n.time = RealTime::zeroTime; - n.duration = RealTime::zeroTime; - return n; - } - - n.time = m_pending.begin()->time; - - Estimates::const_iterator i = m_pending.end(); - --i; - n.duration = i->time - n.time; - - // just mean frequency for now, but this isn't at all right perceptually - n.freq = getMeanFrequency(); - - return n; -} - -CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) : - Plugin(inputSampleRate), - m_channels(0), - m_stepSize(256), - m_blockSize(1024), - m_fmin(50), - m_fmax(900), - m_vflen(1), - m_binFrom(0), - m_binTo(0), - m_bins(0) -{ -} - -CepstrumPitchTracker::~CepstrumPitchTracker() -{ -} - -string -CepstrumPitchTracker::getIdentifier() const -{ - return "cepstrum-pitch"; -} - -string -CepstrumPitchTracker::getName() const -{ - return "Cepstrum Pitch Tracker"; -} - -string -CepstrumPitchTracker::getDescription() const -{ - return "Estimate f0 of monophonic material using a cepstrum method."; -} - -string -CepstrumPitchTracker::getMaker() const -{ - return "Chris Cannam"; -} - -int -CepstrumPitchTracker::getPluginVersion() const -{ - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -CepstrumPitchTracker::getCopyright() const -{ - return "Freely redistributable (BSD license)"; -} - -CepstrumPitchTracker::InputDomain -CepstrumPitchTracker::getInputDomain() const -{ - return FrequencyDomain; -} - -size_t -CepstrumPitchTracker::getPreferredBlockSize() const -{ - return 1024; -} - -size_t -CepstrumPitchTracker::getPreferredStepSize() const -{ - return 256; -} - -size_t -CepstrumPitchTracker::getMinChannelCount() const -{ - return 1; -} - -size_t -CepstrumPitchTracker::getMaxChannelCount() const -{ - return 1; -} - -CepstrumPitchTracker::ParameterList -CepstrumPitchTracker::getParameterDescriptors() const -{ - ParameterList list; - return list; -} - -float -CepstrumPitchTracker::getParameter(string identifier) const -{ - return 0.f; -} - -void -CepstrumPitchTracker::setParameter(string identifier, float value) -{ -} - -CepstrumPitchTracker::ProgramList -CepstrumPitchTracker::getPrograms() const -{ - ProgramList list; - return list; -} - -string -CepstrumPitchTracker::getCurrentProgram() const -{ - return ""; // no programs -} - -void -CepstrumPitchTracker::selectProgram(string name) -{ -} - -CepstrumPitchTracker::OutputList -CepstrumPitchTracker::getOutputDescriptors() const -{ - OutputList outputs; - - int n = 0; - - OutputDescriptor d; - - d.identifier = "f0"; - d.name = "Estimated f0"; - d.description = "Estimated fundamental frequency"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = m_fmax; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = false; - outputs.push_back(d); - - d.identifier = "notes"; - d.name = "Notes"; - d.description = "Derived fixed-pitch note frequencies"; - d.unit = "Hz"; - d.hasFixedBinCount = true; - d.binCount = 1; - d.hasKnownExtents = true; - d.minValue = m_fmin; - d.maxValue = m_fmax; - d.isQuantized = false; - d.sampleType = OutputDescriptor::FixedSampleRate; - d.sampleRate = (m_inputSampleRate / m_stepSize); - d.hasDuration = true; - outputs.push_back(d); - - return outputs; -} - -bool -CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - -// std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels -// << ", stepSize = " << stepSize << ", blockSize = " << blockSize -// << std::endl; - - m_channels = channels; - m_stepSize = stepSize; - m_blockSize = blockSize; - - m_binFrom = int(m_inputSampleRate / m_fmax); - m_binTo = int(m_inputSampleRate / m_fmin); - - if (m_binTo >= (int)m_blockSize / 2) { - m_binTo = m_blockSize / 2 - 1; - } - - m_bins = (m_binTo - m_binFrom) + 1; - - reset(); - - return true; -} - -void -CepstrumPitchTracker::reset() -{ -} - -void -CepstrumPitchTracker::addFeaturesFrom(Hypothesis h, FeatureSet &fs) -{ - Hypothesis::Estimates es = h.getAcceptedEstimates(); - - for (int i = 0; i < es.size(); ++i) { - Feature f; - f.hasTimestamp = true; - f.timestamp = es[i].time; - f.values.push_back(es[i].freq); - fs[0].push_back(f); - } - - Feature nf; - nf.hasTimestamp = true; - nf.hasDuration = true; - Hypothesis::Note n = h.getAveragedNote(); - nf.timestamp = n.time; - nf.duration = n.duration; - nf.values.push_back(n.freq); - fs[1].push_back(nf); -} - -void -CepstrumPitchTracker::filter(const double *cep, double *data) -{ - for (int i = 0; i < m_bins; ++i) { - double v = 0; - int n = 0; - // average according to the vertical filter length - for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { - int ix = i + m_binFrom + j; - if (ix >= 0 && ix < m_blockSize) { - v += cep[ix]; - ++n; - } - } - data[i] = v / n; - } -} - -double -CepstrumPitchTracker::cubicInterpolate(const double y[4], double x) -{ - double a0 = y[3] - y[2] - y[0] + y[1]; - double a1 = y[0] - y[1] - a0; - double a2 = y[2] - y[0]; - double a3 = y[1]; - return - a0 * x * x * x + - a1 * x * x + - a2 * x + - a3; -} - -double -CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin) -{ - if (maxbin < 2 || maxbin > m_bins - 3) { - return maxbin; - } - - double maxval = 0.0; - double maxidx = maxbin; - - const int divisions = 10; - double y[4]; - - y[0] = in[maxbin-1]; - y[1] = in[maxbin]; - y[2] = in[maxbin+1]; - y[3] = in[maxbin+2]; - for (int i = 0; i < divisions; ++i) { - double probe = double(i) / double(divisions); - double value = cubicInterpolate(y, probe); - if (value > maxval) { - maxval = value; - maxidx = maxbin + probe; - } - } - - y[3] = y[2]; - y[2] = y[1]; - y[1] = y[0]; - y[0] = in[maxbin-2]; - for (int i = 0; i < divisions; ++i) { - double probe = double(i) / double(divisions); - double value = cubicInterpolate(y, probe); - if (value > maxval) { - maxval = value; - maxidx = maxbin - 1 + probe; - } - } - -/* - std::cerr << "centre = " << maxbin << ": [" - << in[maxbin-2] << "," - << in[maxbin-1] << "," - << in[maxbin] << "," - << in[maxbin+1] << "," - << in[maxbin+2] << "] -> " << maxidx << std::endl; -*/ - - return maxidx; -} - -CepstrumPitchTracker::FeatureSet -CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp) -{ - FeatureSet fs; - - int bs = m_blockSize; - int hs = m_blockSize/2 + 1; - - double *rawcep = new double[bs]; - double *io = new double[bs]; - double *logmag = new double[bs]; - - // The "inverse symmetric" method. Seems to be the most reliable - - double magmean = 0.0; - - for (int i = 0; i < hs; ++i) { - - double power = - inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + - inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; - double mag = sqrt(power); - - magmean += mag; - - double lm = log(mag + 0.00000001); - - logmag[i] = lm; - if (i > 0) logmag[bs - i] = lm; - } - - magmean /= hs; - double threshold = 0.1; // for magmean - - Vamp::FFT::inverse(bs, logmag, 0, rawcep, io); - - delete[] logmag; - delete[] io; - - int n = m_bins; - double *data = new double[n]; - filter(rawcep, data); - delete[] rawcep; - - double maxval = 0.0; - int maxbin = -1; - - for (int i = 0; i < n; ++i) { - if (data[i] > maxval) { - maxval = data[i]; - maxbin = i; - } - } - - if (maxbin < 0) { - delete[] data; - return fs; - } - - double nextPeakVal = 0.0; - for (int i = 1; i+1 < n; ++i) { - if (data[i] > data[i-1] && - data[i] > data[i+1] && - i != maxbin && - data[i] > nextPeakVal) { - nextPeakVal = data[i]; - } - } - - double cimax = findInterpolatedPeak(data, maxbin); - double peakfreq = m_inputSampleRate / (cimax + m_binFrom); - - double confidence = 0.0; - if (nextPeakVal != 0.0) { - confidence = (maxval - nextPeakVal) * 10.0; - if (magmean < threshold) confidence = 0.0; - std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl; - } - - Hypothesis::Estimate e; - e.freq = peakfreq; - e.time = timestamp; - e.confidence = confidence; - -// m_good.advanceTime(); - for (int i = 0; i < m_possible.size(); ++i) { -// m_possible[i].advanceTime(); - } - - if (!m_good.accept(e)) { - - int candidate = -1; - bool accepted = false; - - for (int i = 0; i < m_possible.size(); ++i) { - if (m_possible[i].accept(e)) { - if (m_possible[i].getState() == Hypothesis::Satisfied) { - accepted = true; - candidate = i; - } - break; - } - } - - if (!accepted) { - Hypothesis h; - h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this - m_possible.push_back(h); - } - - if (m_good.getState() == Hypothesis::Expired) { - addFeaturesFrom(m_good, fs); - } - - if (m_good.getState() == Hypothesis::Expired || - m_good.getState() == Hypothesis::Rejected) { - if (candidate >= 0) { - m_good = m_possible[candidate]; - } else { - m_good = Hypothesis(); - } - } - - // reap rejected/expired hypotheses from possible list - Hypotheses toReap = m_possible; - m_possible.clear(); - for (int i = 0; i < toReap.size(); ++i) { - Hypothesis h = toReap[i]; - if (h.getState() != Hypothesis::Rejected && - h.getState() != Hypothesis::Expired) { - m_possible.push_back(h); - } - } - } - - delete[] data; - return fs; -} - -CepstrumPitchTracker::FeatureSet -CepstrumPitchTracker::getRemainingFeatures() -{ - FeatureSet fs; - if (m_good.getState() == Hypothesis::Satisfied) { - addFeaturesFrom(m_good, fs); - } - return fs; -}
--- a/CepstrumPitchTracker.h Fri Jul 13 21:35:29 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,131 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ -/* - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation - files (the "Software"), to deal in the Software without - restriction, including without limitation the rights to use, copy, - modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR - ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef _CEPSTRUM_PITCH_H_ -#define _CEPSTRUM_PITCH_H_ - -#include <vamp-sdk/Plugin.h> - -class CepstrumPitchTracker : public Vamp::Plugin -{ -public: - CepstrumPitchTracker(float inputSampleRate); - virtual ~CepstrumPitchTracker(); - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - InputDomain getInputDomain() const; - size_t getPreferredBlockSize() const; - size_t getPreferredStepSize() const; - size_t getMinChannelCount() const; - size_t getMaxChannelCount() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string identifier) const; - void setParameter(std::string identifier, float value); - - ProgramList getPrograms() const; - std::string getCurrentProgram() const; - void selectProgram(std::string name); - - OutputList getOutputDescriptors() const; - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - size_t m_channels; - size_t m_stepSize; - size_t m_blockSize; - float m_fmin; - float m_fmax; - int m_vflen; - - int m_binFrom; - int m_binTo; - int m_bins; // count of "interesting" bins, those returned in m_cepOutput - - class Hypothesis { - - public: - struct Estimate { - double freq; - Vamp::RealTime time; - double confidence; - }; - typedef std::vector<Estimate> Estimates; - - struct Note { - double freq; - Vamp::RealTime time; - Vamp::RealTime duration; - }; - - Hypothesis(); - ~Hypothesis(); - - enum State { - New, - Provisional, - Satisfied, - Rejected, - Expired - }; - - bool accept(Estimate); - - State getState() const; - Estimates getAcceptedEstimates() const; - Note getAveragedNote() const; - - private: - bool isWithinTolerance(Estimate) const; - bool isOutOfDateFor(Estimate) const; - bool isSatisfied() const; - double getMeanFrequency() const; - - State m_state; - Estimates m_pending; - }; - - typedef std::vector<Hypothesis> Hypotheses; - Hypotheses m_possible; - Hypothesis m_good; - - void addFeaturesFrom(Hypothesis h, FeatureSet &fs); - - void filter(const double *in, double *out); - double cubicInterpolate(const double[4], double); - double findInterpolatedPeak(const double *in, int maxbin); -}; - -#endif
--- a/Makefile.inc Fri Jul 13 21:35:29 2012 +0100 +++ b/Makefile.inc Thu Jul 19 13:13:23 2012 +0100 @@ -1,6 +1,6 @@ PLUGIN_EXT ?= .so -PLUGIN ?= simple-cepstrum$(PLUGIN_EXT) +PLUGIN ?= cepstral-pitchtracker$(PLUGIN_EXT) CXX ?= g++ CC ?= gcc @@ -8,11 +8,9 @@ CXXFLAGS := $(CXXFLAGS) LDFLAGS := $(LDFLAGS) -HEADERS := SimpleCepstrum.h \ - CepstrumPitchTracker.h +HEADERS := CepstralPitchTracker.h -SOURCES := SimpleCepstrum.cpp \ - CepstrumPitchTracker.cpp \ +SOURCES := CepstralPitchTracker.cpp \ libmain.cpp OBJECTS := $(SOURCES:.cpp=.o)
--- a/libmain.cpp Fri Jul 13 21:35:29 2012 +0100 +++ b/libmain.cpp Thu Jul 19 13:13:23 2012 +0100 @@ -1,13 +1,33 @@ /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ +/* + This file is Copyright (c) 2012 Chris Cannam + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ #include <vamp/vamp.h> #include <vamp-sdk/PluginAdapter.h> -#include "SimpleCepstrum.h" -#include "CepstrumPitchTracker.h" +#include "CepstralPitchTracker.h" -static Vamp::PluginAdapter<SimpleCepstrum> cepPluginAdapter; -static Vamp::PluginAdapter<CepstrumPitchTracker> cepitchPluginAdapter; +static Vamp::PluginAdapter<CepstralPitchTracker> cepitchPluginAdapter; const VampPluginDescriptor * vampGetPluginDescriptor(unsigned int version, unsigned int index) @@ -15,7 +35,6 @@ if (version < 1) return 0; switch (index) { - case 0: return cepPluginAdapter.getDescriptor(); case 1: return cepitchPluginAdapter.getDescriptor(); default: return 0; }