Chris@3: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@3: /* Chris@31: This file is Copyright (c) 2012 Chris Cannam Chris@31: Chris@3: Permission is hereby granted, free of charge, to any person Chris@3: obtaining a copy of this software and associated documentation Chris@3: files (the "Software"), to deal in the Software without Chris@3: restriction, including without limitation the rights to use, copy, Chris@3: modify, merge, publish, distribute, sublicense, and/or sell copies Chris@3: of the Software, and to permit persons to whom the Software is Chris@3: furnished to do so, subject to the following conditions: Chris@3: Chris@3: The above copyright notice and this permission notice shall be Chris@3: included in all copies or substantial portions of the Software. Chris@3: Chris@3: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@3: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@3: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@3: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR Chris@3: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@3: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@3: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@3: */ Chris@3: Chris@31: #include "CepstralPitchTracker.h" Chris@47: #include "MeanFilter.h" Chris@3: Chris@26: #include "vamp-sdk/FFT.h" Chris@26: Chris@3: #include Chris@3: #include Chris@3: Chris@3: #include Chris@3: #include Chris@3: #include Chris@3: Chris@3: using std::string; Chris@7: using std::vector; Chris@16: using Vamp::RealTime; Chris@7: Chris@16: Chris@31: CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) : Chris@3: Plugin(inputSampleRate), Chris@3: m_channels(0), Chris@3: m_stepSize(256), Chris@3: m_blockSize(1024), Chris@3: m_fmin(50), Chris@25: m_fmax(900), Chris@18: m_vflen(1), Chris@3: m_binFrom(0), Chris@3: m_binTo(0), Chris@15: m_bins(0) Chris@3: { Chris@3: } Chris@3: Chris@31: CepstralPitchTracker::~CepstralPitchTracker() Chris@3: { Chris@3: } Chris@3: Chris@3: string Chris@31: CepstralPitchTracker::getIdentifier() const Chris@3: { Chris@39: return "cepstral-pitchtracker"; Chris@3: } Chris@3: Chris@3: string Chris@31: CepstralPitchTracker::getName() const Chris@3: { Chris@39: return "Cepstral Pitch Tracker"; Chris@3: } Chris@3: Chris@3: string Chris@31: CepstralPitchTracker::getDescription() const Chris@3: { Chris@3: return "Estimate f0 of monophonic material using a cepstrum method."; Chris@3: } Chris@3: Chris@3: string Chris@31: CepstralPitchTracker::getMaker() const Chris@3: { Chris@3: return "Chris Cannam"; Chris@3: } Chris@3: Chris@3: int Chris@31: CepstralPitchTracker::getPluginVersion() const Chris@3: { Chris@3: // Increment this each time you release a version that behaves Chris@3: // differently from the previous one Chris@3: return 1; Chris@3: } Chris@3: Chris@3: string Chris@31: CepstralPitchTracker::getCopyright() const Chris@3: { Chris@3: return "Freely redistributable (BSD license)"; Chris@3: } Chris@3: Chris@31: CepstralPitchTracker::InputDomain Chris@31: CepstralPitchTracker::getInputDomain() const Chris@3: { Chris@3: return FrequencyDomain; Chris@3: } Chris@3: Chris@3: size_t Chris@31: CepstralPitchTracker::getPreferredBlockSize() const Chris@3: { Chris@3: return 1024; Chris@3: } Chris@3: Chris@3: size_t Chris@31: CepstralPitchTracker::getPreferredStepSize() const Chris@3: { Chris@3: return 256; Chris@3: } Chris@3: Chris@3: size_t Chris@31: CepstralPitchTracker::getMinChannelCount() const Chris@3: { Chris@3: return 1; Chris@3: } Chris@3: Chris@3: size_t Chris@31: CepstralPitchTracker::getMaxChannelCount() const Chris@3: { Chris@3: return 1; Chris@3: } Chris@3: Chris@31: CepstralPitchTracker::ParameterList Chris@31: CepstralPitchTracker::getParameterDescriptors() const Chris@3: { Chris@3: ParameterList list; Chris@3: return list; Chris@3: } Chris@3: Chris@3: float Chris@31: CepstralPitchTracker::getParameter(string identifier) const Chris@3: { Chris@3: return 0.f; Chris@3: } Chris@3: Chris@3: void Chris@31: CepstralPitchTracker::setParameter(string identifier, float value) Chris@3: { Chris@3: } Chris@3: Chris@31: CepstralPitchTracker::ProgramList Chris@31: CepstralPitchTracker::getPrograms() const Chris@3: { Chris@3: ProgramList list; Chris@3: return list; Chris@3: } Chris@3: Chris@3: string Chris@31: CepstralPitchTracker::getCurrentProgram() const Chris@3: { Chris@3: return ""; // no programs Chris@3: } Chris@3: Chris@3: void Chris@31: CepstralPitchTracker::selectProgram(string name) Chris@3: { Chris@3: } Chris@3: Chris@31: CepstralPitchTracker::OutputList Chris@31: CepstralPitchTracker::getOutputDescriptors() const Chris@3: { Chris@3: OutputList outputs; Chris@3: Chris@3: OutputDescriptor d; Chris@3: Chris@3: d.identifier = "f0"; Chris@3: d.name = "Estimated f0"; Chris@3: d.description = "Estimated fundamental frequency"; Chris@3: d.unit = "Hz"; Chris@3: d.hasFixedBinCount = true; Chris@3: d.binCount = 1; Chris@3: d.hasKnownExtents = true; Chris@3: d.minValue = m_fmin; Chris@3: d.maxValue = m_fmax; Chris@3: d.isQuantized = false; Chris@3: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@3: d.sampleRate = (m_inputSampleRate / m_stepSize); Chris@3: d.hasDuration = false; Chris@3: outputs.push_back(d); Chris@3: Chris@16: d.identifier = "notes"; Chris@16: d.name = "Notes"; Chris@16: d.description = "Derived fixed-pitch note frequencies"; Chris@16: d.unit = "Hz"; Chris@16: d.hasFixedBinCount = true; Chris@16: d.binCount = 1; Chris@16: d.hasKnownExtents = true; Chris@16: d.minValue = m_fmin; Chris@16: d.maxValue = m_fmax; Chris@16: d.isQuantized = false; Chris@16: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@16: d.sampleRate = (m_inputSampleRate / m_stepSize); Chris@16: d.hasDuration = true; Chris@16: outputs.push_back(d); Chris@16: Chris@3: return outputs; Chris@3: } Chris@3: Chris@3: bool Chris@31: CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) Chris@3: { Chris@3: if (channels < getMinChannelCount() || Chris@3: channels > getMaxChannelCount()) return false; Chris@3: Chris@31: // std::cerr << "CepstralPitchTracker::initialise: channels = " << channels Chris@3: // << ", stepSize = " << stepSize << ", blockSize = " << blockSize Chris@3: // << std::endl; Chris@3: Chris@3: m_channels = channels; Chris@3: m_stepSize = stepSize; Chris@3: m_blockSize = blockSize; Chris@3: Chris@3: m_binFrom = int(m_inputSampleRate / m_fmax); Chris@3: m_binTo = int(m_inputSampleRate / m_fmin); Chris@3: Chris@3: if (m_binTo >= (int)m_blockSize / 2) { Chris@3: m_binTo = m_blockSize / 2 - 1; Chris@3: } Chris@3: Chris@3: m_bins = (m_binTo - m_binFrom) + 1; Chris@3: Chris@3: reset(); Chris@3: Chris@3: return true; Chris@3: } Chris@3: Chris@3: void Chris@31: CepstralPitchTracker::reset() Chris@3: { Chris@3: } Chris@3: Chris@3: void Chris@35: CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs) Chris@30: { Chris@35: NoteHypothesis::Estimates es = h.getAcceptedEstimates(); Chris@30: Chris@35: for (int i = 0; i < (int)es.size(); ++i) { Chris@30: Feature f; Chris@30: f.hasTimestamp = true; Chris@30: f.timestamp = es[i].time; Chris@30: f.values.push_back(es[i].freq); Chris@30: fs[0].push_back(f); Chris@30: } Chris@30: Chris@30: Feature nf; Chris@30: nf.hasTimestamp = true; Chris@30: nf.hasDuration = true; Chris@35: NoteHypothesis::Note n = h.getAveragedNote(); Chris@30: nf.timestamp = n.time; Chris@30: nf.duration = n.duration; Chris@30: nf.values.push_back(n.freq); Chris@30: fs[1].push_back(nf); Chris@30: } Chris@30: Chris@18: double Chris@31: CepstralPitchTracker::cubicInterpolate(const double y[4], double x) Chris@18: { Chris@18: double a0 = y[3] - y[2] - y[0] + y[1]; Chris@18: double a1 = y[0] - y[1] - a0; Chris@18: double a2 = y[2] - y[0]; Chris@18: double a3 = y[1]; Chris@18: return Chris@18: a0 * x * x * x + Chris@18: a1 * x * x + Chris@18: a2 * x + Chris@18: a3; Chris@18: } Chris@18: Chris@18: double Chris@31: CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin) Chris@18: { Chris@18: if (maxbin < 2 || maxbin > m_bins - 3) { Chris@18: return maxbin; Chris@18: } Chris@18: Chris@18: double maxval = 0.0; Chris@18: double maxidx = maxbin; Chris@18: Chris@18: const int divisions = 10; Chris@18: double y[4]; Chris@18: Chris@18: y[0] = in[maxbin-1]; Chris@18: y[1] = in[maxbin]; Chris@18: y[2] = in[maxbin+1]; Chris@18: y[3] = in[maxbin+2]; Chris@18: for (int i = 0; i < divisions; ++i) { Chris@18: double probe = double(i) / double(divisions); Chris@18: double value = cubicInterpolate(y, probe); Chris@18: if (value > maxval) { Chris@18: maxval = value; Chris@18: maxidx = maxbin + probe; Chris@18: } Chris@18: } Chris@18: Chris@18: y[3] = y[2]; Chris@18: y[2] = y[1]; Chris@18: y[1] = y[0]; Chris@18: y[0] = in[maxbin-2]; Chris@18: for (int i = 0; i < divisions; ++i) { Chris@18: double probe = double(i) / double(divisions); Chris@18: double value = cubicInterpolate(y, probe); Chris@18: if (value > maxval) { Chris@18: maxval = value; Chris@18: maxidx = maxbin - 1 + probe; Chris@18: } Chris@18: } Chris@18: Chris@18: /* Chris@18: std::cerr << "centre = " << maxbin << ": [" Chris@18: << in[maxbin-2] << "," Chris@18: << in[maxbin-1] << "," Chris@18: << in[maxbin] << "," Chris@18: << in[maxbin+1] << "," Chris@18: << in[maxbin+2] << "] -> " << maxidx << std::endl; Chris@18: */ Chris@18: Chris@18: return maxidx; Chris@18: } Chris@18: Chris@31: CepstralPitchTracker::FeatureSet Chris@31: CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp) Chris@3: { Chris@3: FeatureSet fs; Chris@3: Chris@3: int bs = m_blockSize; Chris@3: int hs = m_blockSize/2 + 1; Chris@3: Chris@3: double *rawcep = new double[bs]; Chris@3: double *io = new double[bs]; Chris@3: double *logmag = new double[bs]; Chris@3: Chris@4: // The "inverse symmetric" method. Seems to be the most reliable Chris@3: Chris@25: double magmean = 0.0; Chris@25: Chris@3: for (int i = 0; i < hs; ++i) { Chris@3: Chris@3: double power = Chris@3: inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + Chris@3: inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; Chris@3: double mag = sqrt(power); Chris@25: Chris@25: magmean += mag; Chris@25: Chris@3: double lm = log(mag + 0.00000001); Chris@3: Chris@4: logmag[i] = lm; Chris@4: if (i > 0) logmag[bs - i] = lm; Chris@3: } Chris@3: Chris@25: magmean /= hs; Chris@25: double threshold = 0.1; // for magmean Chris@25: Chris@26: Vamp::FFT::inverse(bs, logmag, 0, rawcep, io); Chris@3: Chris@3: delete[] logmag; Chris@3: delete[] io; Chris@3: Chris@3: int n = m_bins; Chris@3: double *data = new double[n]; Chris@47: MeanFilter(m_vflen).filterSubsequence(rawcep, data, m_blockSize, n, m_binFrom); Chris@3: delete[] rawcep; Chris@3: Chris@3: double maxval = 0.0; Chris@6: int maxbin = -1; Chris@3: Chris@3: for (int i = 0; i < n; ++i) { Chris@3: if (data[i] > maxval) { Chris@3: maxval = data[i]; Chris@3: maxbin = i; Chris@3: } Chris@3: } Chris@3: Chris@15: if (maxbin < 0) { Chris@15: delete[] data; Chris@15: return fs; Chris@15: } Chris@15: Chris@15: double nextPeakVal = 0.0; Chris@15: for (int i = 1; i+1 < n; ++i) { Chris@15: if (data[i] > data[i-1] && Chris@15: data[i] > data[i+1] && Chris@15: i != maxbin && Chris@15: data[i] > nextPeakVal) { Chris@15: nextPeakVal = data[i]; Chris@15: } Chris@15: } Chris@8: Chris@18: double cimax = findInterpolatedPeak(data, maxbin); Chris@18: double peakfreq = m_inputSampleRate / (cimax + m_binFrom); Chris@15: Chris@15: double confidence = 0.0; Chris@15: if (nextPeakVal != 0.0) { Chris@27: confidence = (maxval - nextPeakVal) * 10.0; Chris@25: if (magmean < threshold) confidence = 0.0; Chris@39: // std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl; Chris@15: } Chris@15: Chris@35: NoteHypothesis::Estimate e; Chris@8: e.freq = peakfreq; Chris@8: e.time = timestamp; Chris@15: e.confidence = confidence; Chris@8: Chris@28: if (!m_good.accept(e)) { Chris@13: Chris@11: int candidate = -1; Chris@13: bool accepted = false; Chris@13: Chris@35: for (int i = 0; i < (int)m_possible.size(); ++i) { Chris@28: if (m_possible[i].accept(e)) { Chris@35: if (m_possible[i].getState() == NoteHypothesis::Satisfied) { Chris@28: accepted = true; Chris@11: candidate = i; Chris@11: } Chris@11: break; Chris@11: } Chris@11: } Chris@12: Chris@13: if (!accepted) { Chris@35: NoteHypothesis h; Chris@28: h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this Chris@13: m_possible.push_back(h); Chris@13: } Chris@13: Chris@35: if (m_good.getState() == NoteHypothesis::Expired) { Chris@30: addFeaturesFrom(m_good, fs); Chris@12: } Chris@12: Chris@35: if (m_good.getState() == NoteHypothesis::Expired || Chris@35: m_good.getState() == NoteHypothesis::Rejected) { Chris@11: if (candidate >= 0) { Chris@28: m_good = m_possible[candidate]; Chris@11: } else { Chris@35: m_good = NoteHypothesis(); Chris@11: } Chris@11: } Chris@8: Chris@14: // reap rejected/expired hypotheses from possible list Chris@14: Hypotheses toReap = m_possible; Chris@14: m_possible.clear(); Chris@35: for (int i = 0; i < (int)toReap.size(); ++i) { Chris@35: NoteHypothesis h = toReap[i]; Chris@35: if (h.getState() != NoteHypothesis::Rejected && Chris@35: h.getState() != NoteHypothesis::Expired) { Chris@14: m_possible.push_back(h); Chris@14: } Chris@14: } Chris@14: } Chris@14: Chris@3: delete[] data; Chris@3: return fs; Chris@3: } Chris@3: Chris@31: CepstralPitchTracker::FeatureSet Chris@31: CepstralPitchTracker::getRemainingFeatures() Chris@3: { Chris@3: FeatureSet fs; Chris@35: if (m_good.getState() == NoteHypothesis::Satisfied) { Chris@30: addFeaturesFrom(m_good, fs); Chris@11: } Chris@3: return fs; Chris@3: }