matthiasm@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ matthiasm@0: Chris@9: /* Chris@9: pYIN - A fundamental frequency estimator for monophonic audio Chris@9: Centre for Digital Music, Queen Mary, University of London. Chris@9: Chris@9: This program is free software; you can redistribute it and/or Chris@9: modify it under the terms of the GNU General Public License as Chris@9: published by the Free Software Foundation; either version 2 of the Chris@9: License, or (at your option) any later version. See the file Chris@9: COPYING included with this distribution for more information. Chris@9: */ Chris@9: matthiasm@35: #include "YinVamp.h" matthiasm@0: #include "MonoNote.h" matthiasm@0: matthiasm@0: #include "vamp-sdk/FFT.h" matthiasm@0: matthiasm@0: #include matthiasm@0: #include matthiasm@0: matthiasm@0: #include matthiasm@0: #include matthiasm@0: #include matthiasm@0: matthiasm@0: using std::string; matthiasm@0: using std::vector; matthiasm@0: using Vamp::RealTime; matthiasm@0: matthiasm@0: matthiasm@35: YinVamp::YinVamp(float inputSampleRate) : matthiasm@0: Plugin(inputSampleRate), matthiasm@0: m_channels(0), matthiasm@0: m_stepSize(256), matthiasm@0: m_blockSize(2048), matthiasm@0: m_fmin(40), matthiasm@58: m_fmax(1600), matthiasm@0: m_yin(2048, inputSampleRate, 0.0), matthiasm@0: m_outNoF0(0), matthiasm@0: m_outNoPeriodicity(0), matthiasm@0: m_outNoRms(0), matthiasm@0: m_outNoSalience(0), matthiasm@0: m_yinParameter(0.15f), Chris@4: m_outputUnvoiced(2.0f) matthiasm@0: { matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::~YinVamp() matthiasm@0: { matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@35: YinVamp::getIdentifier() const matthiasm@0: { matthiasm@0: return "yin"; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@35: YinVamp::getName() const matthiasm@0: { matthiasm@0: return "Yin"; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@35: YinVamp::getDescription() const matthiasm@0: { matthiasm@0: return "A vamp implementation of the Yin algorithm for monophonic frequency estimation."; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@35: YinVamp::getMaker() const matthiasm@0: { matthiasm@0: return "Matthias Mauch"; matthiasm@0: } matthiasm@0: matthiasm@0: int matthiasm@35: YinVamp::getPluginVersion() const matthiasm@0: { matthiasm@0: // Increment this each time you release a version that behaves matthiasm@0: // differently from the previous one Chris@143: return 3; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@35: YinVamp::getCopyright() const matthiasm@0: { matthiasm@0: return "GPL"; matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::InputDomain matthiasm@35: YinVamp::getInputDomain() const matthiasm@0: { matthiasm@0: return TimeDomain; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@35: YinVamp::getPreferredBlockSize() const matthiasm@0: { matthiasm@0: return 2048; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@35: YinVamp::getPreferredStepSize() const matthiasm@0: { matthiasm@0: return 256; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@35: YinVamp::getMinChannelCount() const matthiasm@0: { matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@35: YinVamp::getMaxChannelCount() const matthiasm@0: { matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::ParameterList matthiasm@35: YinVamp::getParameterDescriptors() const matthiasm@0: { matthiasm@0: ParameterList list; matthiasm@0: matthiasm@0: ParameterDescriptor d; matthiasm@0: d.identifier = "yinThreshold"; matthiasm@0: d.name = "Yin threshold"; matthiasm@0: d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached."; matthiasm@0: d.unit = ""; matthiasm@0: d.minValue = 0.025f; matthiasm@0: d.maxValue = 1.0f; matthiasm@0: d.defaultValue = 0.15f; matthiasm@0: d.isQuantized = true; matthiasm@0: d.quantizeStep = 0.025f; matthiasm@0: matthiasm@0: list.push_back(d); matthiasm@0: matthiasm@0: d.identifier = "outputunvoiced"; matthiasm@0: d.valueNames.clear(); matthiasm@0: d.name = "Output estimates classified as unvoiced?"; matthiasm@0: d.description = "."; matthiasm@0: d.unit = ""; matthiasm@0: d.minValue = 0.0f; matthiasm@0: d.maxValue = 2.0f; matthiasm@0: d.defaultValue = 2.0f; matthiasm@0: d.isQuantized = true; matthiasm@0: d.quantizeStep = 1.0f; matthiasm@0: d.valueNames.push_back("No"); matthiasm@0: d.valueNames.push_back("Yes"); matthiasm@0: d.valueNames.push_back("Yes, as negative frequencies"); matthiasm@0: list.push_back(d); matthiasm@0: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: float matthiasm@35: YinVamp::getParameter(string identifier) const matthiasm@0: { matthiasm@0: if (identifier == "yinThreshold") { matthiasm@0: return m_yinParameter; matthiasm@0: } matthiasm@0: if (identifier == "outputunvoiced") { matthiasm@0: return m_outputUnvoiced; matthiasm@0: } matthiasm@0: return 0.f; matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@35: YinVamp::setParameter(string identifier, float value) matthiasm@0: { matthiasm@0: if (identifier == "yinThreshold") matthiasm@0: { matthiasm@0: m_yinParameter = value; matthiasm@0: } matthiasm@0: if (identifier == "outputunvoiced") matthiasm@0: { matthiasm@0: m_outputUnvoiced = value; matthiasm@0: } matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::ProgramList matthiasm@35: YinVamp::getPrograms() const matthiasm@0: { matthiasm@0: ProgramList list; matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@35: YinVamp::getCurrentProgram() const matthiasm@0: { matthiasm@0: return ""; // no programs matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@138: YinVamp::selectProgram(string) matthiasm@0: { matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::OutputList matthiasm@35: YinVamp::getOutputDescriptors() const matthiasm@0: { matthiasm@0: OutputList outputs; matthiasm@0: matthiasm@0: OutputDescriptor d; matthiasm@0: matthiasm@0: int outputNumber = 0; matthiasm@0: matthiasm@0: d.identifier = "f0"; matthiasm@0: d.name = "Estimated f0"; matthiasm@0: d.description = "Estimated fundamental frequency"; matthiasm@0: d.unit = "Hz"; matthiasm@0: d.hasFixedBinCount = true; matthiasm@0: d.binCount = 1; matthiasm@0: d.hasKnownExtents = true; matthiasm@0: d.minValue = m_fmin; matthiasm@0: d.maxValue = 500; matthiasm@0: d.isQuantized = false; matthiasm@0: d.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d.sampleRate = (m_inputSampleRate / m_stepSize); matthiasm@0: d.hasDuration = false; matthiasm@0: outputs.push_back(d); matthiasm@0: m_outNoF0 = outputNumber++; matthiasm@0: matthiasm@0: d.identifier = "periodicity"; matthiasm@0: d.name = "Periodicity"; matthiasm@0: d.description = "by-product of Yin f0 estimation"; matthiasm@0: d.unit = ""; matthiasm@0: d.hasFixedBinCount = true; matthiasm@0: d.binCount = 1; matthiasm@0: d.hasKnownExtents = true; matthiasm@0: d.minValue = 0; matthiasm@0: d.maxValue = 1; matthiasm@0: d.isQuantized = false; matthiasm@0: d.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d.sampleRate = (m_inputSampleRate / m_stepSize); matthiasm@0: d.hasDuration = false; matthiasm@0: outputs.push_back(d); matthiasm@0: m_outNoPeriodicity = outputNumber++; matthiasm@0: matthiasm@0: d.identifier = "rms"; Chris@15: d.name = "Root mean square"; matthiasm@0: d.description = "Root mean square of the waveform."; matthiasm@0: d.unit = ""; matthiasm@0: d.hasFixedBinCount = true; matthiasm@0: d.binCount = 1; matthiasm@0: d.hasKnownExtents = true; matthiasm@0: d.minValue = 0; matthiasm@0: d.maxValue = 1; matthiasm@0: d.isQuantized = false; matthiasm@0: d.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d.sampleRate = (m_inputSampleRate / m_stepSize); matthiasm@0: d.hasDuration = false; matthiasm@0: outputs.push_back(d); matthiasm@0: m_outNoRms = outputNumber++; matthiasm@0: matthiasm@0: d.identifier = "salience"; matthiasm@0: d.name = "Salience"; matthiasm@0: d.description = "Yin Salience"; matthiasm@0: d.hasFixedBinCount = true; matthiasm@0: d.binCount = m_blockSize / 2; matthiasm@0: d.hasKnownExtents = true; matthiasm@0: d.minValue = 0; matthiasm@0: d.maxValue = 1; matthiasm@0: d.isQuantized = false; matthiasm@0: d.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d.sampleRate = (m_inputSampleRate / m_stepSize); matthiasm@0: d.hasDuration = false; matthiasm@0: outputs.push_back(d); matthiasm@0: m_outNoSalience = outputNumber++; matthiasm@0: matthiasm@0: return outputs; matthiasm@0: } matthiasm@0: matthiasm@0: bool matthiasm@35: YinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) matthiasm@0: { matthiasm@0: if (channels < getMinChannelCount() || matthiasm@0: channels > getMaxChannelCount()) return false; matthiasm@0: Chris@9: /* matthiasm@35: std::cerr << "YinVamp::initialise: channels = " << channels matthiasm@0: << ", stepSize = " << stepSize << ", blockSize = " << blockSize matthiasm@0: << std::endl; Chris@9: */ matthiasm@0: m_channels = channels; matthiasm@0: m_stepSize = stepSize; matthiasm@0: m_blockSize = blockSize; matthiasm@0: matthiasm@0: reset(); matthiasm@0: matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@35: YinVamp::reset() matthiasm@0: { matthiasm@0: m_yin.setThreshold(m_yinParameter); matthiasm@0: m_yin.setFrameSize(m_blockSize); Chris@9: /* matthiasm@35: std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter) matthiasm@0: << ", blockSize = " << m_blockSize matthiasm@0: << std::endl; Chris@9: */ matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::FeatureSet matthiasm@35: YinVamp::process(const float *const *inputBuffers, RealTime timestamp) matthiasm@0: { matthiasm@60: timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate)); matthiasm@0: FeatureSet fs; matthiasm@0: matthiasm@0: double *dInputBuffers = new double[m_blockSize]; matthiasm@0: for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; matthiasm@0: matthiasm@0: Yin::YinOutput yo = m_yin.process(dInputBuffers); matthiasm@35: // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; matthiasm@0: Feature f; matthiasm@0: f.hasTimestamp = true; matthiasm@0: f.timestamp = timestamp; matthiasm@0: if (m_outputUnvoiced == 0.0f) matthiasm@0: { matthiasm@35: // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; matthiasm@0: if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) { matthiasm@0: f.values.push_back(yo.f0); matthiasm@0: fs[m_outNoF0].push_back(f); matthiasm@0: } matthiasm@0: } else if (m_outputUnvoiced == 1.0f) matthiasm@0: { matthiasm@29: if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { matthiasm@29: f.values.push_back(fabs(yo.f0)); matthiasm@0: fs[m_outNoF0].push_back(f); matthiasm@0: } matthiasm@0: } else matthiasm@0: { matthiasm@29: if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { matthiasm@0: f.values.push_back(yo.f0); matthiasm@0: fs[m_outNoF0].push_back(f); matthiasm@0: } matthiasm@0: } matthiasm@0: matthiasm@0: f.values.clear(); matthiasm@0: f.values.push_back(yo.rms); matthiasm@0: fs[m_outNoRms].push_back(f); matthiasm@0: matthiasm@0: f.values.clear(); matthiasm@0: for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) matthiasm@0: { matthiasm@0: f.values.push_back(yo.salience[iBin]); matthiasm@0: } matthiasm@0: fs[m_outNoSalience].push_back(f); matthiasm@0: matthiasm@0: f.values.clear(); matthiasm@0: // f.values[0] = yo.periodicity; matthiasm@0: f.values.push_back(yo.periodicity); matthiasm@0: fs[m_outNoPeriodicity].push_back(f); matthiasm@0: matthiasm@0: delete [] dInputBuffers; matthiasm@0: matthiasm@0: return fs; matthiasm@0: } matthiasm@0: matthiasm@35: YinVamp::FeatureSet matthiasm@35: YinVamp::getRemainingFeatures() matthiasm@0: { matthiasm@0: FeatureSet fs; matthiasm@0: return fs; matthiasm@0: }