cannam@227: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@227: cannam@227: /* cannam@227: Vamp cannam@227: cannam@227: An API for audio analysis and feature extraction plugins. cannam@227: cannam@227: Centre for Digital Music, Queen Mary, University of London. cannam@227: Copyright 2006-2007 Chris Cannam and QMUL. cannam@227: cannam@227: This file is based in part on Don Cross's public domain FFT cannam@227: implementation. cannam@227: cannam@227: Permission is hereby granted, free of charge, to any person cannam@227: obtaining a copy of this software and associated documentation cannam@227: files (the "Software"), to deal in the Software without cannam@227: restriction, including without limitation the rights to use, copy, cannam@227: modify, merge, publish, distribute, sublicense, and/or sell copies cannam@227: of the Software, and to permit persons to whom the Software is cannam@227: furnished to do so, subject to the following conditions: cannam@227: cannam@227: The above copyright notice and this permission notice shall be cannam@227: included in all copies or substantial portions of the Software. cannam@227: cannam@227: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, cannam@227: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF cannam@227: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND cannam@227: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR cannam@227: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF cannam@227: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION cannam@227: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. cannam@227: cannam@227: Except as contained in this notice, the names of the Centre for cannam@227: Digital Music; Queen Mary, University of London; and Chris Cannam cannam@227: shall not be used in advertising or otherwise to promote the sale, cannam@227: use or other dealings in this Software without prior written cannam@227: authorization. cannam@227: */ cannam@227: cannam@227: #include "PluginInputDomainAdapter.h" cannam@227: cannam@227: #include cannam@227: cannam@227: cannam@227: /** cannam@227: * If you want to compile using FFTW instead of the built-in FFT cannam@227: * implementation for the PluginInputDomainAdapter, define HAVE_FFTW3 cannam@227: * in the Makefile. cannam@227: * cannam@227: * Be aware that FFTW is licensed under the GPL -- unlike this SDK, cannam@227: * which is provided under a more liberal BSD license in order to cannam@227: * permit use in closed source applications. The use of FFTW would cannam@227: * mean that your code would need to be licensed under the GPL as cannam@227: * well. Do not define this symbol unless you understand and accept cannam@227: * the implications of this. cannam@227: * cannam@227: * Parties such as Linux distribution packagers who redistribute this cannam@227: * SDK for use in other programs should _not_ define this symbol, as cannam@227: * it would change the effective licensing terms under which the SDK cannam@227: * was available to third party developers. cannam@227: * cannam@227: * The default is not to use FFTW, and to use the built-in FFT instead. cannam@227: * cannam@227: * Note: The FFTW code uses FFTW_MEASURE, and so will perform badly on cannam@227: * its first invocation unless the host has saved and restored FFTW cannam@227: * wisdom (see the FFTW documentation). cannam@227: */ cannam@227: #ifdef HAVE_FFTW3 cannam@227: #include cannam@227: #endif cannam@227: cannam@227: cannam@227: namespace Vamp { cannam@227: cannam@227: namespace HostExt { cannam@227: cannam@227: class PluginInputDomainAdapter::Impl cannam@227: { cannam@227: public: cannam@227: Impl(Plugin *plugin, float inputSampleRate); cannam@227: ~Impl(); cannam@227: cannam@227: bool initialise(size_t channels, size_t stepSize, size_t blockSize); cannam@227: cannam@227: size_t getPreferredStepSize() const; cannam@227: size_t getPreferredBlockSize() const; cannam@227: cannam@227: FeatureSet process(const float *const *inputBuffers, RealTime timestamp); cannam@227: cannam@227: RealTime getTimestampAdjustment() const; cannam@227: cannam@227: protected: cannam@227: Plugin *m_plugin; cannam@227: float m_inputSampleRate; cannam@227: int m_channels; cannam@227: int m_blockSize; cannam@227: float **m_freqbuf; cannam@227: cannam@227: double *m_ri; cannam@227: double *m_window; cannam@227: cannam@227: #ifdef HAVE_FFTW3 cannam@227: fftw_plan m_plan; cannam@227: fftw_complex *m_cbuf; cannam@227: #else cannam@227: double *m_ro; cannam@227: double *m_io; cannam@227: void fft(unsigned int n, bool inverse, cannam@227: double *ri, double *ii, double *ro, double *io); cannam@227: #endif cannam@227: cannam@227: size_t makeBlockSizeAcceptable(size_t) const; cannam@227: }; cannam@227: cannam@227: PluginInputDomainAdapter::PluginInputDomainAdapter(Plugin *plugin) : cannam@227: PluginWrapper(plugin) cannam@227: { cannam@227: m_impl = new Impl(plugin, m_inputSampleRate); cannam@227: } cannam@227: cannam@227: PluginInputDomainAdapter::~PluginInputDomainAdapter() cannam@227: { cannam@227: delete m_impl; cannam@227: } cannam@227: cannam@227: bool cannam@227: PluginInputDomainAdapter::initialise(size_t channels, size_t stepSize, size_t blockSize) cannam@227: { cannam@227: return m_impl->initialise(channels, stepSize, blockSize); cannam@227: } cannam@227: cannam@227: Plugin::InputDomain cannam@227: PluginInputDomainAdapter::getInputDomain() const cannam@227: { cannam@227: return TimeDomain; cannam@227: } cannam@227: cannam@227: size_t cannam@227: PluginInputDomainAdapter::getPreferredStepSize() const cannam@227: { cannam@227: return m_impl->getPreferredStepSize(); cannam@227: } cannam@227: cannam@227: size_t cannam@227: PluginInputDomainAdapter::getPreferredBlockSize() const cannam@227: { cannam@227: return m_impl->getPreferredBlockSize(); cannam@227: } cannam@227: cannam@227: Plugin::FeatureSet cannam@227: PluginInputDomainAdapter::process(const float *const *inputBuffers, RealTime timestamp) cannam@227: { cannam@227: return m_impl->process(inputBuffers, timestamp); cannam@227: } cannam@227: cannam@227: RealTime cannam@227: PluginInputDomainAdapter::getTimestampAdjustment() const cannam@227: { cannam@227: return m_impl->getTimestampAdjustment(); cannam@227: } cannam@227: cannam@227: cannam@227: PluginInputDomainAdapter::Impl::Impl(Plugin *plugin, float inputSampleRate) : cannam@227: m_plugin(plugin), cannam@227: m_inputSampleRate(inputSampleRate), cannam@227: m_channels(0), cannam@227: m_blockSize(0), cannam@227: m_freqbuf(0), cannam@227: m_ri(0), cannam@227: m_window(0), cannam@227: #ifdef HAVE_FFTW3 cannam@227: m_plan(0), cannam@227: m_cbuf(0) cannam@227: #else cannam@227: m_ro(0), cannam@227: m_io(0) cannam@227: #endif cannam@227: { cannam@227: } cannam@227: cannam@227: PluginInputDomainAdapter::Impl::~Impl() cannam@227: { cannam@227: // the adapter will delete the plugin cannam@227: cannam@227: if (m_channels > 0) { cannam@227: for (int c = 0; c < m_channels; ++c) { cannam@227: delete[] m_freqbuf[c]; cannam@227: } cannam@227: delete[] m_freqbuf; cannam@227: #ifdef HAVE_FFTW3 cannam@227: if (m_plan) { cannam@227: fftw_destroy_plan(m_plan); cannam@227: fftw_free(m_ri); cannam@227: fftw_free(m_cbuf); cannam@227: m_plan = 0; cannam@227: } cannam@227: #else cannam@227: delete[] m_ri; cannam@227: delete[] m_ro; cannam@227: delete[] m_io; cannam@227: #endif cannam@227: delete[] m_window; cannam@227: } cannam@227: } cannam@227: cannam@227: // for some visual studii apparently cannam@227: #ifndef M_PI cannam@227: #define M_PI 3.14159265358979232846 cannam@227: #endif cannam@227: cannam@227: bool cannam@227: PluginInputDomainAdapter::Impl::initialise(size_t channels, size_t stepSize, size_t blockSize) cannam@227: { cannam@227: if (m_plugin->getInputDomain() == TimeDomain) { cannam@227: cannam@227: m_blockSize = int(blockSize); cannam@227: m_channels = int(channels); cannam@227: cannam@227: return m_plugin->initialise(channels, stepSize, blockSize); cannam@227: } cannam@227: cannam@227: if (blockSize < 2) { cannam@227: std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not supported" << std::endl; cannam@227: return false; cannam@227: } cannam@227: cannam@227: if (blockSize & (blockSize-1)) { cannam@227: std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported" << std::endl; cannam@227: return false; cannam@227: } cannam@227: cannam@227: if (m_channels > 0) { cannam@227: for (int c = 0; c < m_channels; ++c) { cannam@227: delete[] m_freqbuf[c]; cannam@227: } cannam@227: delete[] m_freqbuf; cannam@227: #ifdef HAVE_FFTW3 cannam@227: if (m_plan) { cannam@227: fftw_destroy_plan(m_plan); cannam@227: fftw_free(m_ri); cannam@227: fftw_free(m_cbuf); cannam@227: m_plan = 0; cannam@227: } cannam@227: #else cannam@227: delete[] m_ri; cannam@227: delete[] m_ro; cannam@227: delete[] m_io; cannam@227: #endif cannam@227: delete[] m_window; cannam@227: } cannam@227: cannam@227: m_blockSize = int(blockSize); cannam@227: m_channels = int(channels); cannam@227: cannam@227: m_freqbuf = new float *[m_channels]; cannam@227: for (int c = 0; c < m_channels; ++c) { cannam@227: m_freqbuf[c] = new float[m_blockSize + 2]; cannam@227: } cannam@227: m_window = new double[m_blockSize]; cannam@227: cannam@227: for (int i = 0; i < m_blockSize; ++i) { cannam@227: // Hanning window cannam@227: m_window[i] = (0.50 - 0.50 * cos((2.0 * M_PI * i) / m_blockSize)); cannam@227: } cannam@227: cannam@227: #ifdef HAVE_FFTW3 cannam@227: m_ri = (double *)fftw_malloc(blockSize * sizeof(double)); cannam@227: m_cbuf = (fftw_complex *)fftw_malloc((blockSize/2 + 1) * sizeof(fftw_complex)); cannam@227: m_plan = fftw_plan_dft_r2c_1d(blockSize, m_ri, m_cbuf, FFTW_MEASURE); cannam@227: #else cannam@227: m_ri = new double[m_blockSize]; cannam@227: m_ro = new double[m_blockSize]; cannam@227: m_io = new double[m_blockSize]; cannam@227: #endif cannam@227: cannam@227: return m_plugin->initialise(channels, stepSize, blockSize); cannam@227: } cannam@227: cannam@227: size_t cannam@227: PluginInputDomainAdapter::Impl::getPreferredStepSize() const cannam@227: { cannam@227: size_t step = m_plugin->getPreferredStepSize(); cannam@227: cannam@227: if (step == 0 && (m_plugin->getInputDomain() == FrequencyDomain)) { cannam@227: step = getPreferredBlockSize() / 2; cannam@227: } cannam@227: cannam@227: return step; cannam@227: } cannam@227: cannam@227: size_t cannam@227: PluginInputDomainAdapter::Impl::getPreferredBlockSize() const cannam@227: { cannam@227: size_t block = m_plugin->getPreferredBlockSize(); cannam@227: cannam@227: if (m_plugin->getInputDomain() == FrequencyDomain) { cannam@227: if (block == 0) { cannam@227: block = 1024; cannam@227: } else { cannam@227: block = makeBlockSizeAcceptable(block); cannam@227: } cannam@227: } cannam@227: cannam@227: return block; cannam@227: } cannam@227: cannam@227: size_t cannam@227: PluginInputDomainAdapter::Impl::makeBlockSizeAcceptable(size_t blockSize) const cannam@227: { cannam@227: if (blockSize < 2) { cannam@227: cannam@227: std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not" << std::endl cannam@227: << "supported, increasing from " << blockSize << " to 2" << std::endl; cannam@227: blockSize = 2; cannam@227: cannam@227: } else if (blockSize & (blockSize-1)) { cannam@227: cannam@227: #ifdef HAVE_FFTW3 cannam@227: // not an issue with FFTW cannam@227: #else cannam@227: cannam@227: // not a power of two, can't handle that with our built-in FFT cannam@227: // implementation cannam@227: cannam@227: size_t nearest = blockSize; cannam@227: size_t power = 0; cannam@227: while (nearest > 1) { cannam@227: nearest >>= 1; cannam@227: ++power; cannam@227: } cannam@227: nearest = 1; cannam@227: while (power) { cannam@227: nearest <<= 1; cannam@227: --power; cannam@227: } cannam@227: cannam@227: if (blockSize - nearest > (nearest*2) - blockSize) { cannam@227: nearest = nearest*2; cannam@227: } cannam@227: cannam@227: std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported, using blocksize " << nearest << " instead" << std::endl; cannam@227: blockSize = nearest; cannam@227: cannam@227: #endif cannam@227: } cannam@227: cannam@227: return blockSize; cannam@227: } cannam@227: cannam@227: RealTime cannam@227: PluginInputDomainAdapter::Impl::getTimestampAdjustment() const cannam@227: { cannam@227: if (m_plugin->getInputDomain() == TimeDomain) { cannam@227: return RealTime::zeroTime; cannam@227: } else { cannam@227: return RealTime::frame2RealTime cannam@227: (m_blockSize/2, int(m_inputSampleRate + 0.5)); cannam@227: } cannam@227: } cannam@227: cannam@227: Plugin::FeatureSet cannam@227: PluginInputDomainAdapter::Impl::process(const float *const *inputBuffers, cannam@227: RealTime timestamp) cannam@227: { cannam@227: if (m_plugin->getInputDomain() == TimeDomain) { cannam@227: return m_plugin->process(inputBuffers, timestamp); cannam@227: } cannam@227: cannam@227: // The timestamp supplied should be (according to the Vamp::Plugin cannam@227: // spec) the time of the start of the time-domain input block. cannam@227: // However, we want to pass to the plugin an FFT output calculated cannam@227: // from the block of samples _centred_ on that timestamp. cannam@227: // cannam@227: // We have two options: cannam@227: // cannam@227: // 1. Buffer the input, calculating the fft of the values at the cannam@227: // passed-in block minus blockSize/2 rather than starting at the cannam@227: // passed-in block. So each time we call process on the plugin, cannam@227: // we are passing in the same timestamp as was passed to our own cannam@227: // process plugin, but not (the frequency domain representation cannam@227: // of) the same set of samples. Advantages: avoids confusion in cannam@227: // the host by ensuring the returned values have timestamps cannam@227: // comparable with that passed in to this function (in fact this cannam@227: // is pretty much essential for one-value-per-block outputs); cannam@227: // consistent with hosts such as SV that deal with the cannam@227: // frequency-domain transform themselves. Disadvantages: means cannam@227: // making the not necessarily correct assumption that the samples cannam@227: // preceding the first official block are all zero (or some other cannam@227: // known value). cannam@227: // cannam@227: // 2. Increase the passed-in timestamps by half the blocksize. So cannam@227: // when we call process, we are passing in the frequency domain cannam@227: // representation of the same set of samples as passed to us, but cannam@227: // with a different timestamp. Advantages: simplicity; avoids cannam@227: // iffy assumption mentioned above. Disadvantages: inconsistency cannam@227: // with SV in cases where stepSize != blockSize/2; potential cannam@227: // confusion arising from returned timestamps being calculated cannam@227: // from the adjusted input timestamps rather than the original cannam@227: // ones (and inaccuracy where the returned timestamp is implied, cannam@227: // as in one-value-per-block). cannam@227: // cannam@227: // Neither way is ideal, but I don't think either is strictly cannam@227: // incorrect either. I think this is just a case where the same cannam@227: // plugin can legitimately produce differing results from the same cannam@227: // input data, depending on how that data is packaged. cannam@227: // cannam@227: // We'll go for option 2, adjusting the timestamps. Note in cannam@227: // particular that this means some results can differ from those cannam@227: // produced by SV. cannam@227: cannam@227: // std::cerr << "PluginInputDomainAdapter: sampleRate " << m_inputSampleRate << ", blocksize " << m_blockSize << ", adjusting time from " << timestamp; cannam@227: cannam@227: timestamp = timestamp + getTimestampAdjustment(); cannam@227: cannam@227: // std::cerr << " to " << timestamp << std::endl; cannam@227: cannam@227: for (int c = 0; c < m_channels; ++c) { cannam@227: cannam@227: for (int i = 0; i < m_blockSize; ++i) { cannam@227: m_ri[i] = double(inputBuffers[c][i]) * m_window[i]; cannam@227: } cannam@227: cannam@227: for (int i = 0; i < m_blockSize/2; ++i) { cannam@227: // FFT shift cannam@227: double value = m_ri[i]; cannam@227: m_ri[i] = m_ri[i + m_blockSize/2]; cannam@227: m_ri[i + m_blockSize/2] = value; cannam@227: } cannam@227: cannam@227: #ifdef HAVE_FFTW3 cannam@227: cannam@227: fftw_execute(m_plan); cannam@227: cannam@227: for (int i = 0; i <= m_blockSize/2; ++i) { cannam@227: m_freqbuf[c][i * 2] = float(m_cbuf[i][0]); cannam@227: m_freqbuf[c][i * 2 + 1] = float(m_cbuf[i][1]); cannam@227: } cannam@227: cannam@227: #else cannam@227: cannam@227: fft(m_blockSize, false, m_ri, 0, m_ro, m_io); cannam@227: cannam@227: for (int i = 0; i <= m_blockSize/2; ++i) { cannam@227: m_freqbuf[c][i * 2] = float(m_ro[i]); cannam@227: m_freqbuf[c][i * 2 + 1] = float(m_io[i]); cannam@227: } cannam@227: cannam@227: #endif cannam@227: } cannam@227: cannam@227: return m_plugin->process(m_freqbuf, timestamp); cannam@227: } cannam@227: cannam@227: #ifndef HAVE_FFTW3 cannam@227: cannam@227: void cannam@227: PluginInputDomainAdapter::Impl::fft(unsigned int n, bool inverse, cannam@227: double *ri, double *ii, double *ro, double *io) cannam@227: { cannam@227: if (!ri || !ro || !io) return; cannam@227: cannam@227: unsigned int bits; cannam@227: unsigned int i, j, k, m; cannam@227: unsigned int blockSize, blockEnd; cannam@227: cannam@227: double tr, ti; cannam@227: cannam@227: if (n < 2) return; cannam@227: if (n & (n-1)) return; cannam@227: cannam@227: double angle = 2.0 * M_PI; cannam@227: if (inverse) angle = -angle; cannam@227: cannam@227: for (i = 0; ; ++i) { cannam@227: if (n & (1 << i)) { cannam@227: bits = i; cannam@227: break; cannam@227: } cannam@227: } cannam@227: cannam@227: static unsigned int tableSize = 0; cannam@227: static int *table = 0; cannam@227: cannam@227: if (tableSize != n) { cannam@227: cannam@227: delete[] table; cannam@227: cannam@227: table = new int[n]; cannam@227: cannam@227: for (i = 0; i < n; ++i) { cannam@227: cannam@227: m = i; cannam@227: cannam@227: for (j = k = 0; j < bits; ++j) { cannam@227: k = (k << 1) | (m & 1); cannam@227: m >>= 1; cannam@227: } cannam@227: cannam@227: table[i] = k; cannam@227: } cannam@227: cannam@227: tableSize = n; cannam@227: } cannam@227: cannam@227: if (ii) { cannam@227: for (i = 0; i < n; ++i) { cannam@227: ro[table[i]] = ri[i]; cannam@227: io[table[i]] = ii[i]; cannam@227: } cannam@227: } else { cannam@227: for (i = 0; i < n; ++i) { cannam@227: ro[table[i]] = ri[i]; cannam@227: io[table[i]] = 0.0; cannam@227: } cannam@227: } cannam@227: cannam@227: blockEnd = 1; cannam@227: cannam@227: for (blockSize = 2; blockSize <= n; blockSize <<= 1) { cannam@227: cannam@227: double delta = angle / (double)blockSize; cannam@227: double sm2 = -sin(-2 * delta); cannam@227: double sm1 = -sin(-delta); cannam@227: double cm2 = cos(-2 * delta); cannam@227: double cm1 = cos(-delta); cannam@227: double w = 2 * cm1; cannam@227: double ar[3], ai[3]; cannam@227: cannam@227: for (i = 0; i < n; i += blockSize) { cannam@227: cannam@227: ar[2] = cm2; cannam@227: ar[1] = cm1; cannam@227: cannam@227: ai[2] = sm2; cannam@227: ai[1] = sm1; cannam@227: cannam@227: for (j = i, m = 0; m < blockEnd; j++, m++) { cannam@227: cannam@227: ar[0] = w * ar[1] - ar[2]; cannam@227: ar[2] = ar[1]; cannam@227: ar[1] = ar[0]; cannam@227: cannam@227: ai[0] = w * ai[1] - ai[2]; cannam@227: ai[2] = ai[1]; cannam@227: ai[1] = ai[0]; cannam@227: cannam@227: k = j + blockEnd; cannam@227: tr = ar[0] * ro[k] - ai[0] * io[k]; cannam@227: ti = ar[0] * io[k] + ai[0] * ro[k]; cannam@227: cannam@227: ro[k] = ro[j] - tr; cannam@227: io[k] = io[j] - ti; cannam@227: cannam@227: ro[j] += tr; cannam@227: io[j] += ti; cannam@227: } cannam@227: } cannam@227: cannam@227: blockEnd = blockSize; cannam@227: } cannam@227: cannam@227: if (inverse) { cannam@227: cannam@227: double denom = (double)n; cannam@227: cannam@227: for (i = 0; i < n; i++) { cannam@227: ro[i] /= denom; cannam@227: io[i] /= denom; cannam@227: } cannam@227: } cannam@227: } cannam@227: cannam@227: #endif cannam@227: cannam@227: } cannam@227: cannam@227: } cannam@227: