Mercurial > hg > pyin

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LocalCandidatePYIN.cpp	Fri Jan 24 12:18:11 2014 +0000
@@ -0,0 +1,383 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    pYIN - A fundamental frequency estimator for monophonic audio
+    Centre for Digital Music, Queen Mary, University of London.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COLocalCandidatePYING included with this distribution for more information.
+*/
+
+#include "LocalCandidatePYIN.h"
+#include "MonoPitch.h"
+#include "YinUtil.h"
+
+#include "vamp-sdk/FFT.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <cstdio>
+#include <sstream>
+// #include <iostream>
+#include <cmath>
+#include <complex>
+
+using std::string;
+using std::vector;
+using Vamp::RealTime;
+
+
+LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
+    Plugin(inputSampleRate),
+    m_channels(0),
+    m_stepSize(256),
+    m_blockSize(2048),
+    m_fmin(40),
+    m_fmax(700),
+    m_yin(2048, inputSampleRate, 0.0),
+    m_oPitchTrackCandidates(0),
+    m_threshDistr(2.0f),
+    m_outputUnvoiced(0.0f),
+    m_pitchProb(0),
+    m_timestamp(0),
+    m_nCandidate(20)
+{
+}
+
+LocalCandidatePYIN::~LocalCandidatePYIN()
+{
+}
+
+string
+LocalCandidatePYIN::getIdentifier() const
+{
+    return "localcandidatepyin";
+}
+
+string
+LocalCandidatePYIN::getName() const
+{
+    return "Local Candidate PYIN";
+}
+
+string
+LocalCandidatePYIN::getDescription() const
+{
+    return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
+}
+
+string
+LocalCandidatePYIN::getMaker() const
+{
+    return "Matthias Mauch";
+}
+
+int
+LocalCandidatePYIN::getPluginVersion() const
+{
+    // Increment this each time you release a version that behaves
+    // differently from the previous one
+    return 1;
+}
+
+string
+LocalCandidatePYIN::getCopyright() const
+{
+    return "GPL";
+}
+
+LocalCandidatePYIN::InputDomain
+LocalCandidatePYIN::getInputDomain() const
+{
+    return TimeDomain;
+}
+
+size_t
+LocalCandidatePYIN::getPreferredBlockSize() const
+{
+    return 2048;
+}
+
+size_t
+LocalCandidatePYIN::getPreferredStepSize() const
+{
+    return 256;
+}
+
+size_t
+LocalCandidatePYIN::getMinChannelCount() const
+{
+    return 1;
+}
+
+size_t
+LocalCandidatePYIN::getMaxChannelCount() const
+{
+    return 1;
+}
+
+LocalCandidatePYIN::ParameterList
+LocalCandidatePYIN::getParameterDescriptors() const
+{
+    ParameterList list;
+
+    ParameterDescriptor d;
+
+    d.identifier = "threshdistr";
+    d.name = "Yin threshold distribution";
+    d.description = ".";
+    d.unit = "";
+    d.minValue = 0.0f;
+    d.maxValue = 7.0f;
+    d.defaultValue = 2.0f;
+    d.isQuantized = true;
+    d.quantizeStep = 1.0f;
+    d.valueNames.push_back("Uniform");
+    d.valueNames.push_back("Beta (mean 0.10)");
+    d.valueNames.push_back("Beta (mean 0.15)");
+    d.valueNames.push_back("Beta (mean 0.20)");
+    d.valueNames.push_back("Beta (mean 0.30)");
+    d.valueNames.push_back("Single Value 0.10");
+    d.valueNames.push_back("Single Value 0.15");
+    d.valueNames.push_back("Single Value 0.20");
+    list.push_back(d);
+
+    d.identifier = "outputunvoiced";
+    d.valueNames.clear();
+    d.name = "Output estimates classified as unvoiced?";
+    d.description = ".";
+    d.unit = "";
+    d.minValue = 0.0f;
+    d.maxValue = 2.0f;
+    d.defaultValue = 0.0f;
+    d.isQuantized = true;
+    d.quantizeStep = 1.0f;
+    d.valueNames.push_back("No");
+    d.valueNames.push_back("Yes");
+    d.valueNames.push_back("Yes, as negative frequencies");
+    list.push_back(d);
+
+    return list;
+}
+
+float
+LocalCandidatePYIN::getParameter(string identifier) const
+{
+    if (identifier == "threshdistr") {
+            return m_threshDistr;
+    }
+    if (identifier == "outputunvoiced") {
+            return m_outputUnvoiced;
+    }
+    return 0.f;
+}
+
+void
+LocalCandidatePYIN::setParameter(string identifier, float value)
+{
+    if (identifier == "threshdistr")
+    {
+        m_threshDistr = value;
+    }
+    if (identifier == "outputunvoiced")
+    {
+        m_outputUnvoiced = value;
+    }
+
+}
+
+LocalCandidatePYIN::ProgramList
+LocalCandidatePYIN::getPrograms() const
+{
+    ProgramList list;
+    return list;
+}
+
+string
+LocalCandidatePYIN::getCurrentProgram() const
+{
+    return ""; // no programs
+}
+
+void
+LocalCandidatePYIN::selectProgram(string name)
+{
+}
+
+LocalCandidatePYIN::OutputList
+LocalCandidatePYIN::getOutputDescriptors() const
+{
+    OutputList outputs;
+
+    OutputDescriptor d;
+
+    int outputNumber = 0;
+
+    d.identifier = "pitchtrackcandidates";
+    d.name = "Pitch track candidates";
+    d.description = "Multiple candidate pitch tracks.";
+    d.unit = "Hz";
+    d.hasFixedBinCount = false;
+    // d.binCount = 1;
+    d.hasKnownExtents = true;
+    d.minValue = m_fmin;
+    d.maxValue = 500;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = (m_inputSampleRate / m_stepSize);
+    d.hasDuration = false;
+    outputs.push_back(d);
+    // m_oPitchTrackCandidates = outputNumber++;
+
+    return outputs;
+}
+
+bool
+LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
+{
+    if (channels < getMinChannelCount() ||
+	channels > getMaxChannelCount()) return false;
+
+/*
+    std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
+          << ", stepSize = " << stepSize << ", blockSize = " << blockSize
+          << std::endl;
+*/
+    m_channels = channels;
+    m_stepSize = stepSize;
+    m_blockSize = blockSize;
+
+    reset();
+
+    return true;
+}
+
+void
+LocalCandidatePYIN::reset()
+{
+    m_yin.setThresholdDistr(m_threshDistr);
+    m_yin.setFrameSize(m_blockSize);
+
+    m_pitchProb.clear();
+    for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
+    {
+        m_pitchProb.push_back(vector<vector<pair<double, double> > >());
+    }
+    m_timestamp.clear();
+/*
+    std::cerr << "LocalCandidatePYIN::reset"
+          << ", blockSize = " << m_blockSize
+          << std::endl;
+*/
+}
+
+LocalCandidatePYIN::FeatureSet
+LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
+{
+    timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
+    FeatureSet fs;
+
+    double *dInputBuffers = new double[m_blockSize];
+    for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
+
+    size_t yinBufferSize = m_blockSize/2;
+    double* yinBuffer = new double[yinBufferSize];
+    YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize);
+
+    delete [] dInputBuffers;
+
+    YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
+
+    for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
+    {
+        float minFrequency = m_fmin * std::pow(2,(3.0*iCandidate)/12);
+        float maxFrequency = m_fmin * std::pow(2,(3.0*iCandidate+9)/12);
+        vector<double> peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, yinBufferSize, m_inputSampleRate/maxFrequency, m_inputSampleRate/minFrequency);
+
+        vector<pair<double, double> > tempPitchProb;
+        for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
+        {
+            if (peakProbability[iBuf] > 0)
+            {
+                double currentF0 =
+                    m_inputSampleRate * (1.0 /
+                    YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
+                double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
+                tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
+            }
+        }
+        m_pitchProb[iCandidate].push_back(tempPitchProb);
+    }
+    m_timestamp.push_back(timestamp);
+
+    return fs;
+}
+
+LocalCandidatePYIN::FeatureSet
+LocalCandidatePYIN::getRemainingFeatures()
+{
+    FeatureSet fs;
+    Feature f;
+    f.hasTimestamp = true;
+    f.hasDuration = false;
+    f.values.push_back(0);
+
+    std::cerr << "in remaining features" << std::endl;
+
+    if (m_pitchProb.empty()) {
+        return fs;
+    }
+
+    // MONO-PITCH STUFF
+    MonoPitch mp;
+    size_t nFrame = m_timestamp.size();
+    vector<vector<float> > pitchTracks;
+    vector<float> freqSum = vector<float>(m_nCandidate);
+    vector<float> freqNumber = vector<float>(m_nCandidate);
+    vector<float> freqMean = vector<float>(m_nCandidate);
+
+    for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
+    {
+        pitchTracks.push_back(vector<float>(nFrame));
+        vector<float> mpOut = mp.process(m_pitchProb[iCandidate]);
+        for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+        {
+            if (mpOut[iFrame] > 0) {
+                pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
+                freqSum[iCandidate] += mpOut[iFrame];
+                freqNumber[iCandidate]++;
+            }
+        }
+        freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
+    }
+
+    int actualCandidateNumber = 0;
+    for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
+        if ((freqNumber[iCandidate] > 0.8 * nFrame) && (iCandidate == 0 || fabs(freqMean[iCandidate]/freqMean[iCandidate-1]-1)<0.01))
+        {
+            std::ostringstream convert;
+            convert << actualCandidateNumber++;
+            // f.label = sprintf (buffer, "%i", iCandidate);
+            f.label = convert.str();
+            std::cerr << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
+            for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+            {
+                if (pitchTracks[iCandidate][iFrame] > 0)
+                {
+                    f.values[0] = pitchTracks[iCandidate][iFrame];
+                    f.timestamp = m_timestamp[iFrame];
+                    fs[m_oPitchTrackCandidates].push_back(f);
+                }
+            }
+        }
+        // std::cerr << freqNumber[iCandidate] << " " << (freqSum[iCandidate]*1.0/freqNumber[iCandidate]) << std::endl;
+    }
+
+    // only retain those that are close to their means
+
+    return fs;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LocalCandidatePYIN.h	Fri Jan 24 12:18:11 2014 +0000
@@ -0,0 +1,75 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    pYIN - A fundamental frequency estimator for monophonic audio
+    Centre for Digital Music, Queen Mary, University of London.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COLocalCandidatePYING included with this distribution for more information.
+*/
+
+#ifndef _LOCALCANDIDATEPYIN_H_
+#define _LOCALCANDIDATEPYIN_H_
+
+#include <vamp-sdk/Plugin.h>
+
+#include "Yin.h"
+
+class LocalCandidatePYIN : public Vamp::Plugin
+{
+public:
+    LocalCandidatePYIN(float inputSampleRate);
+    virtual ~LocalCandidatePYIN();
+
+    std::string getIdentifier() const;
+    std::string getName() const;
+    std::string getDescription() const;
+    std::string getMaker() const;
+    int getPluginVersion() const;
+    std::string getCopyright() const;
+
+    InputDomain getInputDomain() const;
+    size_t getPreferredBlockSize() const;
+    size_t getPreferredStepSize() const;
+    size_t getMinChannelCount() const;
+    size_t getMaxChannelCount() const;
+
+    ParameterList getParameterDescriptors() const;
+    float getParameter(std::string identifier) const;
+    void setParameter(std::string identifier, float value);
+
+    ProgramList getPrograms() const;
+    std::string getCurrentProgram() const;
+    void selectProgram(std::string name);
+
+    OutputList getOutputDescriptors() const;
+
+    bool initialise(size_t channels, size_t stepSize, size_t blockSize);
+    void reset();
+
+    FeatureSet process(const float *const *inputBuffers,
+                       Vamp::RealTime timestamp);
+
+    FeatureSet getRemainingFeatures();
+
+protected:
+    size_t m_channels;
+    size_t m_stepSize;
+    size_t m_blockSize;
+    float m_fmin;
+    float m_fmax;
+    Yin m_yin;
+
+    mutable int m_oPitchTrackCandidates;
+
+    float m_threshDistr;
+    float m_outputUnvoiced;
+    vector<vector<vector<pair<double, double> > > > m_pitchProb;
+    vector<Vamp::RealTime> m_timestamp;
+    size_t m_nCandidate;
+};
+
+#endif