Mercurial > hg > svcore
changeset 275:522f82311e4e
* Pull peak-picker out of SpectrumLayer and into FFTModel; use combined
peak-picker and frequency estimator for SpectrogramLayer (makes the
peak frequency spectrogram a bit quicker)
* Add more information to spectrum and spectrogram crosshairs
author | Chris Cannam |
---|---|
date | Wed, 04 Jul 2007 15:29:16 +0000 (2007-07-04) |
parents | e412f65884ee |
children | 657825878970 |
files | data/fft/FFTDataServer.cpp data/fft/FFTDataServer.h data/model/FFTModel.cpp data/model/FFTModel.h |
diffstat | 4 files changed, 227 insertions(+), 47 deletions(-) [+] |
line wrap: on
line diff
--- a/data/fft/FFTDataServer.cpp Tue Jul 03 18:47:39 2007 +0000 +++ b/data/fft/FFTDataServer.cpp Wed Jul 04 15:29:16 2007 +0000 @@ -1099,43 +1099,6 @@ } } -bool -FFTDataServer::estimateStableFrequency(size_t x, size_t y, - float sampleRate, float &frequency) -{ - frequency = (float(y) * sampleRate) / m_fftSize; - - if (x+1 >= m_width || y >= m_height) return false; - - // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. - // At hopsize h and sample rate sr, one hop happens in h/sr sec. - // At window size w, for bin b, f is b*sr/w. - // thus 2pi phase shift happens in w/(b*sr) sec. - // We need to know what phase shift we expect from h/sr sec. - // -> 2pi * ((h/sr) / (w/(b*sr))) - // = 2pi * ((h * b * sr) / (w * sr)) - // = 2pi * (h * b) / w. - - float oldPhase = getPhaseAt(x, y); - float newPhase = getPhaseAt(x+1, y); - - float expectedPhase = - oldPhase + (2.0 * M_PI * y * m_windowIncrement) / m_fftSize; - - float phaseError = princargf(newPhase - expectedPhase); - -// bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize)); - - // The new frequency estimate based on the phase error resulting - // from assuming the "native" frequency of this bin - - frequency = - (sampleRate * (expectedPhase + phaseError - oldPhase)) / - (2 * M_PI * m_windowIncrement); - - return true; -} - size_t FFTDataServer::getFillCompletion() const {
--- a/data/fft/FFTDataServer.h Tue Jul 03 18:47:39 2007 +0000 +++ b/data/fft/FFTDataServer.h Wed Jul 04 15:29:16 2007 +0000 @@ -92,12 +92,6 @@ return getMagnitudeAt(x, y) > threshold; } - // Calculate an estimated frequency for a stable signal in this - // bin, using phase unwrapping. This will be completely wrong if - // the signal is not stable here. - bool estimateStableFrequency(size_t x, size_t y, - float sampleRate, float &frequency); - size_t getFillCompletion() const; size_t getFillExtent() const;
--- a/data/model/FFTModel.cpp Tue Jul 03 18:47:39 2007 +0000 +++ b/data/model/FFTModel.cpp Wed Jul 04 15:29:16 2007 +0000 @@ -17,6 +17,7 @@ #include "DenseTimeValueModel.h" #include "base/Profiler.h" +#include "base/Pitch.h" #include <cassert> @@ -103,6 +104,198 @@ return name; } +bool +FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency) +{ + if (!isOK()) return false; + + size_t sampleRate = m_server->getModel()->getSampleRate(); + + size_t fftSize = m_server->getFFTSize() >> m_yshift; + frequency = (float(y) * sampleRate) / fftSize; + + if (x+1 >= getWidth()) return false; + + // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. + // At hopsize h and sample rate sr, one hop happens in h/sr sec. + // At window size w, for bin b, f is b*sr/w. + // thus 2pi phase shift happens in w/(b*sr) sec. + // We need to know what phase shift we expect from h/sr sec. + // -> 2pi * ((h/sr) / (w/(b*sr))) + // = 2pi * ((h * b * sr) / (w * sr)) + // = 2pi * (h * b) / w. + + float oldPhase = getPhaseAt(x, y); + float newPhase = getPhaseAt(x+1, y); + + size_t incr = getResolution(); + + float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize; + + float phaseError = princargf(newPhase - expectedPhase); + +// bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize)); + + // The new frequency estimate based on the phase error resulting + // from assuming the "native" frequency of this bin + + frequency = + (sampleRate * (expectedPhase + phaseError - oldPhase)) / + (2 * M_PI * incr); + + return true; +} + +FFTModel::PeakLocationSet +FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax) +{ + FFTModel::PeakLocationSet peaks; + if (!isOK()) return peaks; + + if (ymax == 0 || ymax > getHeight() - 1) { + ymax = getHeight() - 1; + } + + Column values; + + if (type == AllPeaks) { + for (size_t y = ymin; y <= ymax; ++y) { + values.push_back(getMagnitudeAt(x, y)); + } + size_t i = 0; + for (size_t bin = ymin; bin <= ymax; ++bin) { + if ((i == 0 || values[i] > values[i-1]) && + (i == values.size()-1 || values[i] >= values[i+1])) { + peaks.insert(bin); + } + ++i; + } + return peaks; + } + + getColumn(x, values); + + // For peak picking we use a moving median window, picking the + // highest value within each continuous region of values that + // exceed the median. For pitch adaptivity, we adjust the window + // size to a roughly constant pitch range (about four tones). + + size_t sampleRate = getSampleRate(); + + std::deque<float> window; + std::vector<size_t> inrange; + size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin); + size_t halfWin = medianWinSize/2; + + size_t binmin; + if (ymin > halfWin) binmin = ymin - halfWin; + else binmin = 0; + + size_t binmax; + if (ymax + halfWin < values.size()) binmax = ymax + halfWin; + else binmax = values.size()-1; + + for (size_t bin = binmin; bin <= binmax; ++bin) { + + float value = values[bin]; + + window.push_back(value); + + medianWinSize = getPeakPickWindowSize(type, sampleRate, bin); + halfWin = medianWinSize/2; + + while (window.size() > medianWinSize) window.pop_front(); + + if (type == MajorPitchAdaptivePeaks) { + if (ymax + halfWin < values.size()) binmax = ymax + halfWin; + else binmax = values.size()-1; + } + + std::deque<float> sorted(window); + std::sort(sorted.begin(), sorted.end()); + float median = sorted[sorted.size()/2]; + + if (value > median) { + inrange.push_back(bin); + } + + if (value <= median || bin+1 == values.size()) { + size_t peakbin = 0; + float peakval = 0.f; + if (!inrange.empty()) { + for (size_t i = 0; i < inrange.size(); ++i) { + if (i == 0 || values[inrange[i]] > peakval) { + peakval = values[inrange[i]]; + peakbin = inrange[i]; + } + } + inrange.clear(); + if (peakbin >= ymin && peakbin <= ymax) { + peaks.insert(peakbin); + } + } + } + } + + return peaks; +} + +size_t +FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate, size_t bin) const +{ + if (type == MajorPeaks) return 10; + if (bin == 0) return 3; + size_t fftSize = m_server->getFFTSize() >> m_yshift; + float binfreq = (sampleRate * bin) / fftSize; + float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); + int hibin = lrintf((hifreq * fftSize) / sampleRate); + int medianWinSize = hibin - bin; + if (medianWinSize < 3) medianWinSize = 3; + return medianWinSize; +} + +FFTModel::PeakSet +FFTModel::getPeakFrequencies(PeakPickType type, size_t x, + size_t ymin, size_t ymax) +{ + PeakSet peaks; + if (!isOK()) return peaks; + PeakLocationSet locations = getPeaks(type, x, ymin, ymax); + + size_t sampleRate = getSampleRate(); + size_t fftSize = m_server->getFFTSize() >> m_yshift; + size_t incr = getResolution(); + + // This duplicates some of the work of estimateStableFrequency to + // allow us to retrieve the phases in two separate vertical + // columns, instead of jumping back and forth between columns x and + // x+1, which may be significantly slower if re-seeking is needed + + std::vector<float> phases; + for (PeakLocationSet::iterator i = locations.begin(); + i != locations.end(); ++i) { + phases.push_back(getPhaseAt(x, *i)); + } + + size_t phaseIndex = 0; + for (PeakLocationSet::iterator i = locations.begin(); + i != locations.end(); ++i) { + float oldPhase = phases[phaseIndex]; + float newPhase = getPhaseAt(x+1, *i); + float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize; + float phaseError = princargf(newPhase - expectedPhase); + float frequency = + (sampleRate * (expectedPhase + phaseError - oldPhase)) + / (2 * M_PI * incr); +// bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize)); +// if (stable) + peaks[*i] = frequency; + ++phaseIndex; + } + + return peaks; +} + Model * FFTModel::clone() const {
--- a/data/model/FFTModel.h Tue Jul 03 18:47:39 2007 +0000 +++ b/data/model/FFTModel.h Wed Jul 04 15:29:16 2007 +0000 @@ -19,6 +19,9 @@ #include "data/fft/FFTDataServer.h" #include "DenseThreeDimensionalModel.h" +#include <set> +#include <map> + /** * An implementation of DenseThreeDimensionalModel that makes FFT data * derived from a DenseTimeValueModel available as a generic data grid. @@ -123,10 +126,35 @@ virtual void getColumn(size_t x, Column &result) const; virtual QString getBinName(size_t n) const; - virtual bool estimateStableFrequency(size_t x, size_t y, float &frequency) { - return m_server->estimateStableFrequency(x << m_xshift, y << m_yshift, - getSampleRate(), frequency); - } + /** + * Calculate an estimated frequency for a stable signal in this + * bin, using phase unwrapping. This will be completely wrong if + * the signal is not stable here. + */ + virtual bool estimateStableFrequency(size_t x, size_t y, float &frequency); + + enum PeakPickType + { + AllPeaks, /// Any bin exceeding its immediate neighbours + MajorPeaks, /// Peaks picked using sliding median window + MajorPitchAdaptivePeaks /// Bigger window for higher frequencies + }; + + typedef std::set<size_t> PeakLocationSet; + typedef std::map<size_t, float> PeakSet; + + /** + * Return locations of peak bins in the range [ymin,ymax]. If + * ymax is zero, getHeight()-1 will be used. + */ + virtual PeakLocationSet getPeaks(PeakPickType type, size_t x, + size_t ymin = 0, size_t ymax = 0); + + /** + * Return locations and estimated stable frequencies of peak bins. + */ + virtual PeakSet getPeakFrequencies(PeakPickType type, size_t x, + size_t ymin = 0, size_t ymax = 0); virtual int getCompletion() const { return m_server->getFillCompletion(); } @@ -143,6 +171,8 @@ FFTDataServer *m_server; int m_xshift; int m_yshift; + + size_t getPeakPickWindowSize(PeakPickType type, size_t sampleRate, size_t bin) const; }; #endif