Mercurial > hg > svcore
view data/model/FFTModel.cpp @ 275:522f82311e4e
* Pull peak-picker out of SpectrumLayer and into FFTModel; use combined
peak-picker and frequency estimator for SpectrogramLayer (makes the
peak frequency spectrogram a bit quicker)
* Add more information to spectrum and spectrogram crosshairs
author | Chris Cannam |
---|---|
date | Wed, 04 Jul 2007 15:29:16 +0000 |
parents | 29b70bdaacdc |
children | daf89d31f45c |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Sonic Visualiser An audio file viewer and annotation editor. Centre for Digital Music, Queen Mary, University of London. This file copyright 2006 Chris Cannam. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "FFTModel.h" #include "DenseTimeValueModel.h" #include "base/Profiler.h" #include "base/Pitch.h" #include <cassert> FFTModel::FFTModel(const DenseTimeValueModel *model, int channel, WindowType windowType, size_t windowSize, size_t windowIncrement, size_t fftSize, bool polar, size_t fillFromColumn) : //!!! ZoomConstraint! m_server(0), m_xshift(0), m_yshift(0) { m_server = FFTDataServer::getFuzzyInstance(model, channel, windowType, windowSize, windowIncrement, fftSize, polar, fillFromColumn); if (!m_server) return; // caller should check isOK() size_t xratio = windowIncrement / m_server->getWindowIncrement(); size_t yratio = m_server->getFFTSize() / fftSize; while (xratio > 1) { if (xratio & 0x1) { std::cerr << "ERROR: FFTModel: Window increment ratio " << windowIncrement << " / " << m_server->getWindowIncrement() << " must be a power of two" << std::endl; assert(!(xratio & 0x1)); } ++m_xshift; xratio >>= 1; } while (yratio > 1) { if (yratio & 0x1) { std::cerr << "ERROR: FFTModel: FFT size ratio " << m_server->getFFTSize() << " / " << fftSize << " must be a power of two" << std::endl; assert(!(yratio & 0x1)); } ++m_yshift; yratio >>= 1; } } FFTModel::~FFTModel() { if (m_server) FFTDataServer::releaseInstance(m_server); } size_t FFTModel::getSampleRate() const { return isOK() ? m_server->getModel()->getSampleRate() : 0; } void FFTModel::getColumn(size_t x, Column &result) const { Profiler profiler("FFTModel::getColumn", false); result.clear(); size_t height(getHeight()); for (size_t y = 0; y < height; ++y) { result.push_back(const_cast<FFTModel *>(this)->getMagnitudeAt(x, y)); } } QString FFTModel::getBinName(size_t n) const { size_t sr = getSampleRate(); if (!sr) return ""; QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2)); return name; } bool FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency) { if (!isOK()) return false; size_t sampleRate = m_server->getModel()->getSampleRate(); size_t fftSize = m_server->getFFTSize() >> m_yshift; frequency = (float(y) * sampleRate) / fftSize; if (x+1 >= getWidth()) return false; // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. // At hopsize h and sample rate sr, one hop happens in h/sr sec. // At window size w, for bin b, f is b*sr/w. // thus 2pi phase shift happens in w/(b*sr) sec. // We need to know what phase shift we expect from h/sr sec. // -> 2pi * ((h/sr) / (w/(b*sr))) // = 2pi * ((h * b * sr) / (w * sr)) // = 2pi * (h * b) / w. float oldPhase = getPhaseAt(x, y); float newPhase = getPhaseAt(x+1, y); size_t incr = getResolution(); float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize; float phaseError = princargf(newPhase - expectedPhase); // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize)); // The new frequency estimate based on the phase error resulting // from assuming the "native" frequency of this bin frequency = (sampleRate * (expectedPhase + phaseError - oldPhase)) / (2 * M_PI * incr); return true; } FFTModel::PeakLocationSet FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax) { FFTModel::PeakLocationSet peaks; if (!isOK()) return peaks; if (ymax == 0 || ymax > getHeight() - 1) { ymax = getHeight() - 1; } Column values; if (type == AllPeaks) { for (size_t y = ymin; y <= ymax; ++y) { values.push_back(getMagnitudeAt(x, y)); } size_t i = 0; for (size_t bin = ymin; bin <= ymax; ++bin) { if ((i == 0 || values[i] > values[i-1]) && (i == values.size()-1 || values[i] >= values[i+1])) { peaks.insert(bin); } ++i; } return peaks; } getColumn(x, values); // For peak picking we use a moving median window, picking the // highest value within each continuous region of values that // exceed the median. For pitch adaptivity, we adjust the window // size to a roughly constant pitch range (about four tones). size_t sampleRate = getSampleRate(); std::deque<float> window; std::vector<size_t> inrange; size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin); size_t halfWin = medianWinSize/2; size_t binmin; if (ymin > halfWin) binmin = ymin - halfWin; else binmin = 0; size_t binmax; if (ymax + halfWin < values.size()) binmax = ymax + halfWin; else binmax = values.size()-1; for (size_t bin = binmin; bin <= binmax; ++bin) { float value = values[bin]; window.push_back(value); medianWinSize = getPeakPickWindowSize(type, sampleRate, bin); halfWin = medianWinSize/2; while (window.size() > medianWinSize) window.pop_front(); if (type == MajorPitchAdaptivePeaks) { if (ymax + halfWin < values.size()) binmax = ymax + halfWin; else binmax = values.size()-1; } std::deque<float> sorted(window); std::sort(sorted.begin(), sorted.end()); float median = sorted[sorted.size()/2]; if (value > median) { inrange.push_back(bin); } if (value <= median || bin+1 == values.size()) { size_t peakbin = 0; float peakval = 0.f; if (!inrange.empty()) { for (size_t i = 0; i < inrange.size(); ++i) { if (i == 0 || values[inrange[i]] > peakval) { peakval = values[inrange[i]]; peakbin = inrange[i]; } } inrange.clear(); if (peakbin >= ymin && peakbin <= ymax) { peaks.insert(peakbin); } } } } return peaks; } size_t FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate, size_t bin) const { if (type == MajorPeaks) return 10; if (bin == 0) return 3; size_t fftSize = m_server->getFFTSize() >> m_yshift; float binfreq = (sampleRate * bin) / fftSize; float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); int hibin = lrintf((hifreq * fftSize) / sampleRate); int medianWinSize = hibin - bin; if (medianWinSize < 3) medianWinSize = 3; return medianWinSize; } FFTModel::PeakSet FFTModel::getPeakFrequencies(PeakPickType type, size_t x, size_t ymin, size_t ymax) { PeakSet peaks; if (!isOK()) return peaks; PeakLocationSet locations = getPeaks(type, x, ymin, ymax); size_t sampleRate = getSampleRate(); size_t fftSize = m_server->getFFTSize() >> m_yshift; size_t incr = getResolution(); // This duplicates some of the work of estimateStableFrequency to // allow us to retrieve the phases in two separate vertical // columns, instead of jumping back and forth between columns x and // x+1, which may be significantly slower if re-seeking is needed std::vector<float> phases; for (PeakLocationSet::iterator i = locations.begin(); i != locations.end(); ++i) { phases.push_back(getPhaseAt(x, *i)); } size_t phaseIndex = 0; for (PeakLocationSet::iterator i = locations.begin(); i != locations.end(); ++i) { float oldPhase = phases[phaseIndex]; float newPhase = getPhaseAt(x+1, *i); float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize; float phaseError = princargf(newPhase - expectedPhase); float frequency = (sampleRate * (expectedPhase + phaseError - oldPhase)) / (2 * M_PI * incr); // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize)); // if (stable) peaks[*i] = frequency; ++phaseIndex; } return peaks; } Model * FFTModel::clone() const { return new FFTModel(*this); } FFTModel::FFTModel(const FFTModel &model) : DenseThreeDimensionalModel(), m_server(model.m_server), m_xshift(model.m_xshift), m_yshift(model.m_yshift) { FFTDataServer::claimInstance(m_server); }