Mercurial > hg > vamp-plugin-sdk
diff examples/FixedTempoEstimator.cpp @ 227:6b30e064cab7 distinct-libraries
* more moving
author | cannam |
---|---|
date | Thu, 06 Nov 2008 14:13:12 +0000 |
parents | src/FixedTempoEstimator.cpp@14029eb08472 |
children | 3cf5bd155e5b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/FixedTempoEstimator.cpp Thu Nov 06 14:13:12 2008 +0000 @@ -0,0 +1,534 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp + + An API for audio analysis and feature extraction plugins. + + Centre for Digital Music, Queen Mary, University of London. + Copyright 2006-2008 Chris Cannam and QMUL. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of the Centre for + Digital Music; Queen Mary, University of London; and Chris Cannam + shall not be used in advertising or otherwise to promote the sale, + use or other dealings in this Software without prior written + authorization. +*/ + +#include "FixedTempoEstimator.h" + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +using Vamp::RealTime; + +#include <cmath> + + +FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) : + Plugin(inputSampleRate), + m_stepSize(0), + m_blockSize(0), + m_priorMagnitudes(0), + m_df(0), + m_r(0), + m_fr(0), + m_t(0), + m_n(0) +{ +} + +FixedTempoEstimator::~FixedTempoEstimator() +{ + delete[] m_priorMagnitudes; + delete[] m_df; + delete[] m_r; + delete[] m_fr; + delete[] m_t; +} + +string +FixedTempoEstimator::getIdentifier() const +{ + return "fixedtempo"; +} + +string +FixedTempoEstimator::getName() const +{ + return "Simple Fixed Tempo Estimator"; +} + +string +FixedTempoEstimator::getDescription() const +{ + return "Study a short section of audio and estimate its tempo, assuming the tempo is constant"; +} + +string +FixedTempoEstimator::getMaker() const +{ + return "Vamp SDK Example Plugins"; +} + +int +FixedTempoEstimator::getPluginVersion() const +{ + return 1; +} + +string +FixedTempoEstimator::getCopyright() const +{ + return "Code copyright 2008 Queen Mary, University of London. Freely redistributable (BSD license)"; +} + +size_t +FixedTempoEstimator::getPreferredStepSize() const +{ + return 64; +} + +size_t +FixedTempoEstimator::getPreferredBlockSize() const +{ + return 256; +} + +bool +FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + + m_stepSize = stepSize; + m_blockSize = blockSize; + + float dfLengthSecs = 10.f; + m_dfsize = (dfLengthSecs * m_inputSampleRate) / m_stepSize; + + m_priorMagnitudes = new float[m_blockSize/2]; + m_df = new float[m_dfsize]; + + for (size_t i = 0; i < m_blockSize/2; ++i) { + m_priorMagnitudes[i] = 0.f; + } + for (size_t i = 0; i < m_dfsize; ++i) { + m_df[i] = 0.f; + } + + m_n = 0; + + return true; +} + +void +FixedTempoEstimator::reset() +{ + cerr << "FixedTempoEstimator: reset called" << endl; + + if (!m_priorMagnitudes) return; + + cerr << "FixedTempoEstimator: resetting" << endl; + + for (size_t i = 0; i < m_blockSize/2; ++i) { + m_priorMagnitudes[i] = 0.f; + } + for (size_t i = 0; i < m_dfsize; ++i) { + m_df[i] = 0.f; + } + + delete[] m_r; + m_r = 0; + + delete[] m_fr; + m_fr = 0; + + delete[] m_t; + m_t = 0; + + m_n = 0; + + m_start = RealTime::zeroTime; + m_lasttime = RealTime::zeroTime; +} + +FixedTempoEstimator::ParameterList +FixedTempoEstimator::getParameterDescriptors() const +{ + ParameterList list; + return list; +} + +float +FixedTempoEstimator::getParameter(std::string id) const +{ + return 0.f; +} + +void +FixedTempoEstimator::setParameter(std::string id, float value) +{ +} + +static int TempoOutput = 0; +static int CandidatesOutput = 1; +static int DFOutput = 2; +static int ACFOutput = 3; +static int FilteredACFOutput = 4; + +FixedTempoEstimator::OutputList +FixedTempoEstimator::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor d; + d.identifier = "tempo"; + d.name = "Tempo"; + d.description = "Estimated tempo"; + d.unit = "bpm"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.sampleRate = m_inputSampleRate; + d.hasDuration = true; // our returned tempo spans a certain range + list.push_back(d); + + d.identifier = "candidates"; + d.name = "Tempo candidates"; + d.description = "Possible tempo estimates, one per bin with the most likely in the first bin"; + d.unit = "bpm"; + d.hasFixedBinCount = false; + list.push_back(d); + + d.identifier = "detectionfunction"; + d.name = "Detection Function"; + d.description = "Onset detection function"; + d.unit = ""; + d.hasFixedBinCount = 1; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0.0; + d.maxValue = 1.0; + d.isQuantized = false; + d.quantizeStep = 0.0; + d.sampleType = OutputDescriptor::FixedSampleRate; + if (m_stepSize) { + d.sampleRate = m_inputSampleRate / m_stepSize; + } else { + d.sampleRate = m_inputSampleRate / (getPreferredBlockSize()/2); + } + d.hasDuration = false; + list.push_back(d); + + d.identifier = "acf"; + d.name = "Autocorrelation Function"; + d.description = "Autocorrelation of onset detection function"; + d.hasKnownExtents = false; + d.unit = "r"; + list.push_back(d); + + d.identifier = "filtered_acf"; + d.name = "Filtered Autocorrelation"; + d.description = "Filtered autocorrelation of onset detection function"; + d.unit = "r"; + list.push_back(d); + + return list; +} + +FixedTempoEstimator::FeatureSet +FixedTempoEstimator::process(const float *const *inputBuffers, RealTime ts) +{ + FeatureSet fs; + + if (m_stepSize == 0) { + cerr << "ERROR: FixedTempoEstimator::process: " + << "FixedTempoEstimator has not been initialised" + << endl; + return fs; + } + +// if (m_n < m_dfsize) cerr << "m_n = " << m_n << endl; + + if (m_n == 0) m_start = ts; + m_lasttime = ts; + + if (m_n == m_dfsize) { + calculate(); + fs = assembleFeatures(); + ++m_n; + return fs; + } + + if (m_n > m_dfsize) return FeatureSet(); + + float value = 0.f; + + for (size_t i = 1; i < m_blockSize/2; ++i) { + + float real = inputBuffers[0][i*2]; + float imag = inputBuffers[0][i*2 + 1]; + + float sqrmag = real * real + imag * imag; + value += fabsf(sqrmag - m_priorMagnitudes[i]); + + m_priorMagnitudes[i] = sqrmag; + } + + m_df[m_n] = value; + + ++m_n; + return fs; +} + +FixedTempoEstimator::FeatureSet +FixedTempoEstimator::getRemainingFeatures() +{ + FeatureSet fs; + if (m_n > m_dfsize) return fs; + calculate(); + fs = assembleFeatures(); + ++m_n; + return fs; +} + +float +FixedTempoEstimator::lag2tempo(int lag) +{ + return 60.f / ((lag * m_stepSize) / m_inputSampleRate); +} + +int +FixedTempoEstimator::tempo2lag(float tempo) +{ + return ((60.f / tempo) * m_inputSampleRate) / m_stepSize; +} + +void +FixedTempoEstimator::calculate() +{ + cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << endl; + + if (m_r) { + cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl; + return; + } + + if (m_n < m_dfsize / 9) { + cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << endl; + return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?) + } + + int n = m_n; + + m_r = new float[n/2]; + m_fr = new float[n/2]; + m_t = new float[n/2]; + + for (int i = 0; i < n/2; ++i) { + m_r[i] = 0.f; + m_fr[i] = 0.f; + m_t[i] = lag2tempo(i); + } + + for (int i = 0; i < n/2; ++i) { + + for (int j = i; j < n-1; ++j) { + m_r[i] += m_df[j] * m_df[j - i]; + } + + m_r[i] /= n - i - 1; + } + + float related[] = { 0.5, 2, 3, 4 }; + + for (int i = 1; i < n/2-1; ++i) { + + float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; + if (weight < 0.f) weight = 0.f; + weight = weight * weight * weight; + + m_fr[i] = m_r[i]; + + int div = 1; + + for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) { + + int k0 = int(i * related[j] + 0.5); + + if (k0 >= 0 && k0 < int(n/2)) { + + int kmax = 0, kmin = 0; + float kvmax = 0, kvmin = 0; + bool have = false; + + for (int k = k0 - 1; k <= k0 + 1; ++k) { + + if (k < 0 || k >= n/2) continue; + + if (!have || (m_r[k] > kvmax)) { kmax = k; kvmax = m_r[k]; } + if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; } + + have = true; + } + + m_fr[i] += m_r[kmax] / 5; + + if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) && + (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) && + kvmax > kvmin * 1.05) { + + m_t[i] = m_t[i] + lag2tempo(kmax) * related[j]; + ++div; + } + } + } + + m_t[i] /= div; + +// if (div > 1) { +// cerr << "adjusting tempo from " << lag2tempo(i) << " to " +// << m_t[i] << " for fr = " << m_fr[i] << " (div = " << div << ")" << endl; +// } + + m_fr[i] += m_fr[i] * (weight / 3); + } +} + + +FixedTempoEstimator::FeatureSet +FixedTempoEstimator::assembleFeatures() +{ + FeatureSet fs; + if (!m_r) return fs; // No results + + Feature feature; + feature.hasTimestamp = true; + feature.hasDuration = false; + feature.label = ""; + feature.values.clear(); + feature.values.push_back(0.f); + + char buffer[40]; + + int n = m_n; + + for (int i = 0; i < n; ++i) { + feature.timestamp = m_start + + RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); + feature.values[0] = m_df[i]; + feature.label = ""; + fs[DFOutput].push_back(feature); + } + + for (int i = 1; i < n/2; ++i) { + feature.timestamp = m_start + + RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); + feature.values[0] = m_r[i]; + sprintf(buffer, "%.1f bpm", lag2tempo(i)); + if (i == n/2-1) feature.label = ""; + else feature.label = buffer; + fs[ACFOutput].push_back(feature); + } + + float t0 = 50.f; // our minimum detected tempo (could be a parameter) + float t1 = 190.f; // our maximum detected tempo + + //!!! need some way for the host (or at least, the user) to know + //!!! that it should only pass a certain amount of + //!!! input... e.g. by making the amount configurable + + int p0 = tempo2lag(t1); + int p1 = tempo2lag(t0); + + std::map<float, int> candidates; + + for (int i = p0; i <= p1 && i < n/2-1; ++i) { + + if (m_fr[i] > m_fr[i-1] && + m_fr[i] > m_fr[i+1]) { + candidates[m_fr[i]] = i; + } + + feature.timestamp = m_start + + RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); + feature.values[0] = m_fr[i]; + sprintf(buffer, "%.1f bpm", lag2tempo(i)); + if (i == p1 || i == n/2-2) feature.label = ""; + else feature.label = buffer; + fs[FilteredACFOutput].push_back(feature); + } + +// cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << endl; + + if (candidates.empty()) { + cerr << "No tempo candidates!" << endl; + return fs; + } + + feature.hasTimestamp = true; + feature.timestamp = m_start; + + feature.hasDuration = true; + feature.duration = m_lasttime - m_start; + + std::map<float, int>::const_iterator ci = candidates.end(); + --ci; + int maxpi = ci->second; + + if (m_t[maxpi] > 0) { + cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl; + feature.values[0] = m_t[maxpi]; + } else { + // shouldn't happen -- it would imply that this high value was not a peak! + feature.values[0] = lag2tempo(maxpi); + cerr << "WARNING: No stored tempo for index " << maxpi << endl; + } + + sprintf(buffer, "%.1f bpm", feature.values[0]); + feature.label = buffer; + + fs[TempoOutput].push_back(feature); + + feature.values.clear(); + feature.label = ""; + + while (feature.values.size() < 8) { +// cerr << "adding tempo value from lag " << ci->second << endl; + if (m_t[ci->second] > 0) { + feature.values.push_back(m_t[ci->second]); + } else { + feature.values.push_back(lag2tempo(ci->second)); + } + if (ci == candidates.begin()) break; + --ci; + } + + fs[CandidatesOutput].push_back(feature); + + return fs; +}