Chris@152: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@152: Chris@152: /* Chris@152: Sonic Visualiser Chris@152: An audio file viewer and annotation editor. Chris@152: Centre for Digital Music, Queen Mary, University of London. Chris@152: This file copyright 2006 Chris Cannam. Chris@152: Chris@152: This program is free software; you can redistribute it and/or Chris@152: modify it under the terms of the GNU General Public License as Chris@152: published by the Free Software Foundation; either version 2 of the Chris@152: License, or (at your option) any later version. See the file Chris@152: COPYING included with this distribution for more information. Chris@152: */ Chris@152: Chris@152: #include "FFTModel.h" Chris@152: #include "DenseTimeValueModel.h" Chris@297: #include "AggregateWaveModel.h" Chris@152: Chris@183: #include "base/Profiler.h" Chris@275: #include "base/Pitch.h" Chris@183: Chris@152: #include Chris@152: Chris@152: FFTModel::FFTModel(const DenseTimeValueModel *model, Chris@152: int channel, Chris@152: WindowType windowType, Chris@152: size_t windowSize, Chris@152: size_t windowIncrement, Chris@152: size_t fftSize, Chris@152: bool polar, Chris@334: StorageAdviser::Criteria criteria, Chris@152: size_t fillFromColumn) : Chris@152: //!!! ZoomConstraint! Chris@152: m_server(0), Chris@152: m_xshift(0), Chris@152: m_yshift(0) Chris@152: { Chris@297: setSourceModel(const_cast(model)); //!!! hmm. Chris@297: Chris@297: m_server = getServer(model, Chris@297: channel, Chris@297: windowType, Chris@297: windowSize, Chris@297: windowIncrement, Chris@297: fftSize, Chris@297: polar, Chris@334: criteria, Chris@297: fillFromColumn); Chris@152: Chris@200: if (!m_server) return; // caller should check isOK() Chris@200: Chris@152: size_t xratio = windowIncrement / m_server->getWindowIncrement(); Chris@152: size_t yratio = m_server->getFFTSize() / fftSize; Chris@152: Chris@152: while (xratio > 1) { Chris@152: if (xratio & 0x1) { Chris@152: std::cerr << "ERROR: FFTModel: Window increment ratio " Chris@152: << windowIncrement << " / " Chris@152: << m_server->getWindowIncrement() Chris@152: << " must be a power of two" << std::endl; Chris@152: assert(!(xratio & 0x1)); Chris@152: } Chris@152: ++m_xshift; Chris@152: xratio >>= 1; Chris@152: } Chris@152: Chris@152: while (yratio > 1) { Chris@152: if (yratio & 0x1) { Chris@152: std::cerr << "ERROR: FFTModel: FFT size ratio " Chris@152: << m_server->getFFTSize() << " / " << fftSize Chris@152: << " must be a power of two" << std::endl; Chris@152: assert(!(yratio & 0x1)); Chris@152: } Chris@152: ++m_yshift; Chris@152: yratio >>= 1; Chris@152: } Chris@152: } Chris@152: Chris@152: FFTModel::~FFTModel() Chris@152: { Chris@200: if (m_server) FFTDataServer::releaseInstance(m_server); Chris@152: } Chris@152: Chris@297: FFTDataServer * Chris@297: FFTModel::getServer(const DenseTimeValueModel *model, Chris@297: int channel, Chris@297: WindowType windowType, Chris@297: size_t windowSize, Chris@297: size_t windowIncrement, Chris@297: size_t fftSize, Chris@297: bool polar, Chris@334: StorageAdviser::Criteria criteria, Chris@297: size_t fillFromColumn) Chris@297: { Chris@297: // Obviously, an FFT model of channel C (where C != -1) of an Chris@297: // aggregate model is the same as the FFT model of the appropriate Chris@297: // channel of whichever model that aggregate channel is drawn Chris@297: // from. We should use that model here, in case we already have Chris@297: // the data for it or will be wanting the same data again later. Chris@297: Chris@297: // If the channel is -1 (i.e. mixture of all channels), then we Chris@297: // can't do this shortcut unless the aggregate model only has one Chris@297: // channel or contains exactly all of the channels of a single Chris@297: // other model. That isn't very likely -- if it were the case, Chris@297: // why would we be using an aggregate model? Chris@297: Chris@297: if (channel >= 0) { Chris@297: Chris@297: const AggregateWaveModel *aggregate = Chris@297: dynamic_cast(model); Chris@297: Chris@297: if (aggregate && channel < aggregate->getComponentCount()) { Chris@297: Chris@297: AggregateWaveModel::ModelChannelSpec spec = Chris@297: aggregate->getComponent(channel); Chris@297: Chris@297: return getServer(spec.model, Chris@297: spec.channel, Chris@297: windowType, Chris@297: windowSize, Chris@297: windowIncrement, Chris@297: fftSize, Chris@297: polar, Chris@334: criteria, Chris@297: fillFromColumn); Chris@297: } Chris@297: } Chris@297: Chris@297: // The normal case Chris@297: Chris@297: return FFTDataServer::getFuzzyInstance(model, Chris@297: channel, Chris@297: windowType, Chris@297: windowSize, Chris@297: windowIncrement, Chris@297: fftSize, Chris@297: polar, Chris@334: criteria, Chris@297: fillFromColumn); Chris@297: } Chris@297: Chris@152: size_t Chris@152: FFTModel::getSampleRate() const Chris@152: { Chris@152: return isOK() ? m_server->getModel()->getSampleRate() : 0; Chris@152: } Chris@152: Chris@152: void Chris@182: FFTModel::getColumn(size_t x, Column &result) const Chris@152: { Chris@183: Profiler profiler("FFTModel::getColumn", false); Chris@183: Chris@152: result.clear(); Chris@152: size_t height(getHeight()); Chris@152: for (size_t y = 0; y < height; ++y) { Chris@152: result.push_back(const_cast(this)->getMagnitudeAt(x, y)); Chris@152: } Chris@152: } Chris@152: Chris@152: QString Chris@152: FFTModel::getBinName(size_t n) const Chris@152: { Chris@152: size_t sr = getSampleRate(); Chris@152: if (!sr) return ""; Chris@204: QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2)); Chris@152: return name; Chris@152: } Chris@152: Chris@275: bool Chris@275: FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency) Chris@275: { Chris@275: if (!isOK()) return false; Chris@275: Chris@275: size_t sampleRate = m_server->getModel()->getSampleRate(); Chris@275: Chris@275: size_t fftSize = m_server->getFFTSize() >> m_yshift; Chris@275: frequency = (float(y) * sampleRate) / fftSize; Chris@275: Chris@275: if (x+1 >= getWidth()) return false; Chris@275: Chris@275: // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. Chris@275: // At hopsize h and sample rate sr, one hop happens in h/sr sec. Chris@275: // At window size w, for bin b, f is b*sr/w. Chris@275: // thus 2pi phase shift happens in w/(b*sr) sec. Chris@275: // We need to know what phase shift we expect from h/sr sec. Chris@275: // -> 2pi * ((h/sr) / (w/(b*sr))) Chris@275: // = 2pi * ((h * b * sr) / (w * sr)) Chris@275: // = 2pi * (h * b) / w. Chris@275: Chris@275: float oldPhase = getPhaseAt(x, y); Chris@275: float newPhase = getPhaseAt(x+1, y); Chris@275: Chris@275: size_t incr = getResolution(); Chris@275: Chris@275: float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize; Chris@275: Chris@275: float phaseError = princargf(newPhase - expectedPhase); Chris@275: Chris@275: // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize)); Chris@275: Chris@275: // The new frequency estimate based on the phase error resulting Chris@275: // from assuming the "native" frequency of this bin Chris@275: Chris@275: frequency = Chris@275: (sampleRate * (expectedPhase + phaseError - oldPhase)) / Chris@275: (2 * M_PI * incr); Chris@275: Chris@275: return true; Chris@275: } Chris@275: Chris@275: FFTModel::PeakLocationSet Chris@275: FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax) Chris@275: { Chris@275: FFTModel::PeakLocationSet peaks; Chris@275: if (!isOK()) return peaks; Chris@275: Chris@275: if (ymax == 0 || ymax > getHeight() - 1) { Chris@275: ymax = getHeight() - 1; Chris@275: } Chris@275: Chris@275: Column values; Chris@275: Chris@275: if (type == AllPeaks) { Chris@275: for (size_t y = ymin; y <= ymax; ++y) { Chris@275: values.push_back(getMagnitudeAt(x, y)); Chris@275: } Chris@275: size_t i = 0; Chris@275: for (size_t bin = ymin; bin <= ymax; ++bin) { Chris@275: if ((i == 0 || values[i] > values[i-1]) && Chris@275: (i == values.size()-1 || values[i] >= values[i+1])) { Chris@275: peaks.insert(bin); Chris@275: } Chris@275: ++i; Chris@275: } Chris@275: return peaks; Chris@275: } Chris@275: Chris@275: getColumn(x, values); Chris@275: Chris@275: // For peak picking we use a moving median window, picking the Chris@275: // highest value within each continuous region of values that Chris@275: // exceed the median. For pitch adaptivity, we adjust the window Chris@275: // size to a roughly constant pitch range (about four tones). Chris@275: Chris@275: size_t sampleRate = getSampleRate(); Chris@275: Chris@275: std::deque window; Chris@275: std::vector inrange; Chris@280: float dist = 0.5; Chris@280: size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist); Chris@275: size_t halfWin = medianWinSize/2; Chris@275: Chris@275: size_t binmin; Chris@275: if (ymin > halfWin) binmin = ymin - halfWin; Chris@275: else binmin = 0; Chris@275: Chris@275: size_t binmax; Chris@275: if (ymax + halfWin < values.size()) binmax = ymax + halfWin; Chris@275: else binmax = values.size()-1; Chris@275: Chris@275: for (size_t bin = binmin; bin <= binmax; ++bin) { Chris@275: Chris@275: float value = values[bin]; Chris@275: Chris@275: window.push_back(value); Chris@275: Chris@280: // so-called median will actually be the dist*100'th percentile Chris@280: medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist); Chris@275: halfWin = medianWinSize/2; Chris@275: Chris@275: while (window.size() > medianWinSize) window.pop_front(); Chris@275: Chris@275: if (type == MajorPitchAdaptivePeaks) { Chris@275: if (ymax + halfWin < values.size()) binmax = ymax + halfWin; Chris@275: else binmax = values.size()-1; Chris@275: } Chris@275: Chris@275: std::deque sorted(window); Chris@275: std::sort(sorted.begin(), sorted.end()); Chris@280: float median = sorted[int(sorted.size() * dist)]; Chris@275: Chris@275: if (value > median) { Chris@275: inrange.push_back(bin); Chris@275: } Chris@275: Chris@275: if (value <= median || bin+1 == values.size()) { Chris@275: size_t peakbin = 0; Chris@275: float peakval = 0.f; Chris@275: if (!inrange.empty()) { Chris@275: for (size_t i = 0; i < inrange.size(); ++i) { Chris@275: if (i == 0 || values[inrange[i]] > peakval) { Chris@275: peakval = values[inrange[i]]; Chris@275: peakbin = inrange[i]; Chris@275: } Chris@275: } Chris@275: inrange.clear(); Chris@275: if (peakbin >= ymin && peakbin <= ymax) { Chris@275: peaks.insert(peakbin); Chris@275: } Chris@275: } Chris@275: } Chris@275: } Chris@275: Chris@275: return peaks; Chris@275: } Chris@275: Chris@275: size_t Chris@280: FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate, Chris@280: size_t bin, float &percentile) const Chris@275: { Chris@280: percentile = 0.5; Chris@275: if (type == MajorPeaks) return 10; Chris@275: if (bin == 0) return 3; Chris@280: Chris@275: size_t fftSize = m_server->getFFTSize() >> m_yshift; Chris@275: float binfreq = (sampleRate * bin) / fftSize; Chris@275: float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); Chris@280: Chris@275: int hibin = lrintf((hifreq * fftSize) / sampleRate); Chris@275: int medianWinSize = hibin - bin; Chris@275: if (medianWinSize < 3) medianWinSize = 3; Chris@280: Chris@280: percentile = 0.5 + (binfreq / sampleRate); Chris@280: Chris@275: return medianWinSize; Chris@275: } Chris@275: Chris@275: FFTModel::PeakSet Chris@275: FFTModel::getPeakFrequencies(PeakPickType type, size_t x, Chris@275: size_t ymin, size_t ymax) Chris@275: { Chris@275: PeakSet peaks; Chris@275: if (!isOK()) return peaks; Chris@275: PeakLocationSet locations = getPeaks(type, x, ymin, ymax); Chris@275: Chris@275: size_t sampleRate = getSampleRate(); Chris@275: size_t fftSize = m_server->getFFTSize() >> m_yshift; Chris@275: size_t incr = getResolution(); Chris@275: Chris@275: // This duplicates some of the work of estimateStableFrequency to Chris@275: // allow us to retrieve the phases in two separate vertical Chris@275: // columns, instead of jumping back and forth between columns x and Chris@275: // x+1, which may be significantly slower if re-seeking is needed Chris@275: Chris@275: std::vector phases; Chris@275: for (PeakLocationSet::iterator i = locations.begin(); Chris@275: i != locations.end(); ++i) { Chris@275: phases.push_back(getPhaseAt(x, *i)); Chris@275: } Chris@275: Chris@275: size_t phaseIndex = 0; Chris@275: for (PeakLocationSet::iterator i = locations.begin(); Chris@275: i != locations.end(); ++i) { Chris@275: float oldPhase = phases[phaseIndex]; Chris@275: float newPhase = getPhaseAt(x+1, *i); Chris@275: float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize; Chris@275: float phaseError = princargf(newPhase - expectedPhase); Chris@275: float frequency = Chris@275: (sampleRate * (expectedPhase + phaseError - oldPhase)) Chris@275: / (2 * M_PI * incr); Chris@275: // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize)); Chris@275: // if (stable) Chris@275: peaks[*i] = frequency; Chris@275: ++phaseIndex; Chris@275: } Chris@275: Chris@275: return peaks; Chris@275: } Chris@275: Chris@152: Model * Chris@152: FFTModel::clone() const Chris@152: { Chris@152: return new FFTModel(*this); Chris@152: } Chris@152: Chris@152: FFTModel::FFTModel(const FFTModel &model) : Chris@152: DenseThreeDimensionalModel(), Chris@152: m_server(model.m_server), Chris@152: m_xshift(model.m_xshift), Chris@152: m_yshift(model.m_yshift) Chris@152: { Chris@152: FFTDataServer::claimInstance(m_server); Chris@152: } Chris@152: