Mercurial > hg > lowfreq
view LowFreq.cpp @ 11:ccb271dd0b84 tip
More sensible defaults
author | Chris Cannam |
---|---|
date | Wed, 12 Mar 2014 13:23:29 +0000 |
parents | 0a56b08b373f |
children |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Low-frequency spectrogram Copyright (c) 2014 Queen Mary, University of London Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the names of the Centre for Digital Music; Queen Mary, University of London; and Chris Cannam shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization. */ #include "LowFreq.h" #include "dsp/rateconversion/Resampler.h" #include "dsp/transforms/FFT.h" #include <cmath> #include <cstdio> using std::cerr; using std::cout; using std::endl; using std::vector; static float minFmin = 0; static float maxFmin = 500; static float defaultFmin = 0.5; static float minFmax = 0.1; static float maxFmax = 500; static float defaultFmax = 40; static int minN = 1; static int maxN = 2048; static int defaultN = 79; LowFreq::LowFreq(float inputSampleRate) : Plugin(inputSampleRate), m_fmin(defaultFmin), m_fmax(defaultFmax), m_n(defaultN), m_blockSize(0), m_roundedInputRate(round(inputSampleRate)), m_drop(0), m_pad(0), m_resampler(0), m_fft(0), m_window(0) { m_nonIntegralInputRate = false; if (fabs(double(m_inputSampleRate) - round(m_inputSampleRate)) > 1e-6) { m_nonIntegralInputRate = true; } if (inputSampleRate / 4 < maxFmax) { maxFmax = inputSampleRate / 4; } } LowFreq::~LowFreq() { delete m_resampler; delete m_fft; delete m_window; } string LowFreq::getIdentifier() const { return "lowfreq"; } string LowFreq::getName() const { return "Low-frequency Spectrogram"; } string LowFreq::getDescription() const { //!!! Return something helpful here! return ""; } string LowFreq::getMaker() const { return "Queen Mary, University of London"; } int LowFreq::getPluginVersion() const { return 1; } string LowFreq::getCopyright() const { return "GPL"; } LowFreq::InputDomain LowFreq::getInputDomain() const { return TimeDomain; } size_t LowFreq::getPreferredBlockSize() const { return 1024; } size_t LowFreq::getPreferredStepSize() const { return 1024; } size_t LowFreq::getMinChannelCount() const { return 1; } size_t LowFreq::getMaxChannelCount() const { return 1; } LowFreq::ParameterList LowFreq::getParameterDescriptors() const { ParameterList list; ParameterDescriptor d; d.identifier = "fmin"; d.name = "Minimum frequency"; d.description = "Low bound for frequencies to include in the spectrogram. This will be rounded up to the next highest bin frequency based on the highest frequency and number of bins."; d.unit = "Hz"; d.minValue = minFmin; d.maxValue = maxFmin; d.defaultValue = defaultFmin; d.isQuantized = false; list.push_back(d); d.identifier = "fmax"; d.name = "Maximum frequency"; d.description = "Highest frequency component to include in the spectrogram."; d.unit = "Hz"; d.minValue = minFmax; d.maxValue = maxFmax; d.defaultValue = defaultFmax; d.isQuantized = false; list.push_back(d); d.identifier = "n"; d.name = "Number of bins"; d.description = "Number of spectrogram bins to return."; d.unit = ""; d.minValue = minN; d.maxValue = maxN; d.defaultValue = defaultN; d.isQuantized = true; d.quantizeStep = 1; list.push_back(d); return list; } float LowFreq::getParameter(string identifier) const { if (identifier == "fmin") { return m_fmin; } else if (identifier == "fmax") { return m_fmax; } else if (identifier == "n") { return m_n; } return 0; } void LowFreq::setParameter(string identifier, float value) { if (identifier == "fmin") { m_fmin = value; } else if (identifier == "fmax") { m_fmax = value; } else if (identifier == "n") { m_n = int(value + 0.01); } } LowFreq::ProgramList LowFreq::getPrograms() const { ProgramList list; return list; } string LowFreq::getCurrentProgram() const { return ""; // no programs } void LowFreq::selectProgram(string name) { } LowFreq::OutputList LowFreq::getOutputDescriptors() const { OutputList list; OutputDescriptor d; d.identifier = "spectrogram"; d.name = "Spectrogram"; d.description = ""; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_n; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = getOutputSampleRate(); // cerr << "output descriptor effective sample rate = " << d.sampleRate << endl; // cerr << "input sample rate = " << m_inputSampleRate << endl; // cerr << "input rate / output rate = " << m_inputSampleRate / d.sampleRate << endl; char namebuf[50]; for (int i = 0; i < m_n; ++i) { sprintf(namebuf, "%.3gHz", double(getOutputBinFrequency(i))); d.binNames.push_back(namebuf); } d.hasDuration = false; list.push_back(d); return list; } bool LowFreq::initialise(size_t channels, size_t stepSize, size_t blockSize) { if (channels < getMinChannelCount() || channels > getMaxChannelCount()) { cerr << "LowFreq::initialise: ERROR: channels " << channels << " out of acceptable range " << getMinChannelCount() << " -> " << getMaxChannelCount() << endl; return false; } if (stepSize != blockSize) { // We don't actually care what the block size is, but there // must be no overlap cerr << "LowFreq::initialise: ERROR: step size and block size must be " << "equal (" << stepSize << " != " << blockSize << ")" << endl; return false; } if (m_n < minN || m_n > maxN) { cerr << "LowFreq::initialise: ERROR: bin count " << m_n << " out of acceptable range " << minN << " -> " << maxN << endl; return false; } if (m_fmin < minFmin || m_fmin > maxFmin) { cerr << "LowFreq::initialise: ERROR: lowest frequency " << m_fmin << " out of acceptable range " << minFmin << " -> " << maxFmin << endl; return false; } if (m_fmax < minFmax || m_fmax > maxFmax) { cerr << "LowFreq::initialise: ERROR: highest frequency " << m_fmax << " out of acceptable range " << minFmax << " -> " << maxFmax << endl; return false; } if (m_nonIntegralInputRate) { cerr << "LowFreq::initialise: WARNING: input sample rate " << m_inputSampleRate << " is non-integral, output frequencies " << "will be skewed by rounding it to nearest integer" << endl; } m_blockSize = blockSize; reset(); return true; } void LowFreq::reset() { delete m_resampler; delete m_fft; delete m_window; // We want to return a number of bins n between frequencies fmin // and fmax. // The bin frequency for bin i of an n-point fft is fs/n*i (where // fs is the sample rate), with the top half aliasing the bottom // half. To get a max bin frequency of fmax, in theory we can // downsample to a sample rate of fmax*2 and then run an m-point // FFT, where m is sufficient to give us n bins remaining between // fmin and fmax. We get m by calculating fmax / ((fmax-fmin)/n). // However, our resampler introduces artifacts close to its cutoff // frequency within the filter transition band so we probably // shouldn't aim to resample to fmax*2. Instead we resample to // fmax*4 and carry out an FFT of twice the length. (We could // resample closer to fmax, but the sums make my brain hurt.) m_resampler = new Resampler(m_roundedInputRate, getTargetSampleRate()); m_fft = new FFT(getFFTSize()); m_window = new Window<double>(HanningWindow, getFFTSize()); m_buffer = std::vector<double>(); /* cerr << "LowFreq::reset: freq range " << m_fmin << " to " << m_fmax << endl; cerr << "LowFreq::reset: block size " << m_blockSize << ", input sample rate " << m_inputSampleRate << endl; cerr << "LowFreq::reset: target rate " << getTargetSampleRate() << ", target step " << getTargetStepSize() << ", resampler latency " << m_resampler->getLatency() << ", fft size " << getFFTSize() << endl; */ // Resampler's declared latency is its output latency. We need to // drop that number of samples from the start of its output... m_drop = m_resampler->getLatency(); // ...and add enough padding at the end of its input to ensure at // least that number of samples come back. m_pad = int(ceil((double(m_drop) * m_roundedInputRate) / getTargetSampleRate())); // We also need to make sure that the FFT frames are properly // aligned. That is, the first returned column is expected to show // an FFT frame centred on time zero. So we need to pre-pad the // buffer with half the FFT length. for (int i = 0; i < getFFTSize()/2; ++i) { m_buffer.push_back(0.0); } } int LowFreq::getTargetSampleRate() const { int tfs = int(ceil(m_fmax)) * 4; // cerr << "LowFreq::getTargetSampleRate: for range " << m_fmin << " -> " << m_fmax // << " target rate is " << tfs << endl; return tfs; } float LowFreq::getOutputSampleRate() const { return float(getTargetSampleRate()) / float(getTargetStepSize()); } static int gcd(int a, int b) { int c = a % b; if (c == 0) { return b; } else { return gcd(b, c); } } int LowFreq::getTargetStepSize() const { // We need to make sure that m_inputSampleRate / output sample // rate is an integer, otherwise hosts like SV will end up with // rounding error when counting columns with non-integral width in // terms of the input file sample rate. // // This is tricky if we allow the user to configure the step size // (e.g. by specifying a frame overlap for the fft) because the // output sample rate is target rate / target step size, and we // can't easily mess with the target rate, so we need some control // over the step size. // // The simplest way to do this is always use the smallest possible // step size that allows for an integral number of samples (at the // input sample rate) per output frame. That is, targetRate / g, // where g is the gcd of m_inputSampleRate and targetRate. int g = gcd(m_roundedInputRate, getTargetSampleRate()); int step = getTargetSampleRate() / g; if (step < 1) step = 1; return step; } int LowFreq::getFFTSize() const { // See note in reset() above int fftSize = 4 * int(ceil(m_fmax / ((m_fmax - m_fmin) / m_n))); // cerr << "LowFreq::getFFTSize: for range " << m_fmin << " -> " << m_fmax // << " and n = " << m_n << ", fft size is " << fftSize << endl; return fftSize; } int LowFreq::getFirstOutputBin() const { // Frequency of fft bin i is fs/n*i. We want the first bin whose // frequency is at least m_fmin. int first = int(ceil((m_fmin * getFFTSize()) / getTargetSampleRate())); // cerr << "LowFreq::getFirstOutputBin: for range " << m_fmin << " -> " << m_fmax // << " and n = " << m_n << ", bin is " << first << endl; return first; } float LowFreq::getOutputBinFrequency(int i) const { // That is, frequency of the i'th output bin that we actually // return -- the (i + getFirstOutputBin())'th bin in the fft return (float(getTargetSampleRate()) / getFFTSize()) * (i + getFirstOutputBin()); } LowFreq::FeatureSet LowFreq::process(const float *const *inputBuffers, Vamp::RealTime timestamp) { double *data = new double[m_blockSize]; for (int i = 0; i < m_blockSize; ++i) { data[i] = inputBuffers[0][i]; } vector<double> resampled = m_resampler->process(data, m_blockSize); m_buffer.insert(m_buffer.end(), resampled.begin(), resampled.end()); if (m_drop > 0) { int dropHere = m_drop; if (dropHere > int(m_buffer.size())) { dropHere = int(m_buffer.size()); } advanceBy(dropHere); m_drop -= dropHere; } delete[] data; FeatureSet fs; while (int(m_buffer.size()) >= getFFTSize()) { Feature f = processColumn(); fs[0].push_back(f); advance(); } return fs; } LowFreq::FeatureSet LowFreq::getRemainingFeatures() { FeatureSet fs; double *padding = new double[m_pad]; for (int i = 0; i < m_pad; ++i) { padding[i] = 0.0; } vector<double> lastBit = m_resampler->process(padding, m_pad); m_buffer.insert(m_buffer.end(), lastBit.begin(), lastBit.end()); for (int i = 0; i < getFFTSize() * 2 - getTargetStepSize(); ++i) { m_buffer.push_back(0.0); } while (int(m_buffer.size()) >= getFFTSize()) { Feature f = processColumn(); fs[0].push_back(f); advance(); } return fs; } LowFreq::Feature LowFreq::processColumn() { Feature f; int sz = getFFTSize(); double *realOut = new double[sz]; double *imagOut = new double[sz]; double *windowed = new double[sz]; m_window->cut(m_buffer.data(), windowed); m_fft->process(false, windowed, 0, realOut, imagOut); int base = getFirstOutputBin(); for (int i = 0; i < m_n; ++i) { int ix = base + i; float mag = (realOut[ix] * realOut[ix] + imagOut[ix] * imagOut[ix]); f.values.push_back(mag); } return f; } void LowFreq::advance() { advanceBy(getTargetStepSize()); } void LowFreq::advanceBy(int n) { std::vector<double> advanced(m_buffer.data() + n, m_buffer.data() + m_buffer.size()); m_buffer = advanced; }