Mercurial > hg > svcore

diff data/model/FFTModel.cpp @ 275:522f82311e4e
* Pull peak-picker out of SpectrumLayer and into FFTModel; use combined peak-picker and frequency estimator for SpectrogramLayer (makes the peak frequency spectrogram a bit quicker) * Add more information to spectrum and spectrogram crosshairs
author: Chris Cannam
date: Wed, 04 Jul 2007 15:29:16 +0000
parents: 29b70bdaacdc
children: daf89d31f45c
--- a/data/model/FFTModel.cpp	Tue Jul 03 18:47:39 2007 +0000
+++ b/data/model/FFTModel.cpp	Wed Jul 04 15:29:16 2007 +0000
@@ -17,6 +17,7 @@
 #include "DenseTimeValueModel.h"
 
 #include "base/Profiler.h"
+#include "base/Pitch.h"
 
 #include <cassert>
 
@@ -103,6 +104,198 @@
     return name;
 }
 
+bool
+FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
+{
+    if (!isOK()) return false;
+
+    size_t sampleRate = m_server->getModel()->getSampleRate();
+
+    size_t fftSize = m_server->getFFTSize() >> m_yshift;
+    frequency = (float(y) * sampleRate) / fftSize;
+
+    if (x+1 >= getWidth()) return false;
+
+    // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
+    // At hopsize h and sample rate sr, one hop happens in h/sr sec.
+    // At window size w, for bin b, f is b*sr/w.
+    // thus 2pi phase shift happens in w/(b*sr) sec.
+    // We need to know what phase shift we expect from h/sr sec.
+    // -> 2pi * ((h/sr) / (w/(b*sr)))
+    //  = 2pi * ((h * b * sr) / (w * sr))
+    //  = 2pi * (h * b) / w.
+
+    float oldPhase = getPhaseAt(x, y);
+    float newPhase = getPhaseAt(x+1, y);
+
+    size_t incr = getResolution();
+
+    float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
+
+    float phaseError = princargf(newPhase - expectedPhase);
+
+//    bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
+
+    // The new frequency estimate based on the phase error resulting
+    // from assuming the "native" frequency of this bin
+
+    frequency =
+        (sampleRate * (expectedPhase + phaseError - oldPhase)) /
+        (2 * M_PI * incr);
+
+    return true;
+}
+
+FFTModel::PeakLocationSet
+FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
+{
+    FFTModel::PeakLocationSet peaks;
+    if (!isOK()) return peaks;
+
+    if (ymax == 0 || ymax > getHeight() - 1) {
+        ymax = getHeight() - 1;
+    }
+
+    Column values;
+
+    if (type == AllPeaks) {
+        for (size_t y = ymin; y <= ymax; ++y) {
+            values.push_back(getMagnitudeAt(x, y));
+        }
+        size_t i = 0;
+        for (size_t bin = ymin; bin <= ymax; ++bin) {
+            if ((i == 0 || values[i] > values[i-1]) &&
+                (i == values.size()-1 || values[i] >= values[i+1])) {
+                peaks.insert(bin);
+            }
+            ++i;
+        }
+        return peaks;
+    }
+
+    getColumn(x, values);
+
+    // For peak picking we use a moving median window, picking the
+    // highest value within each continuous region of values that
+    // exceed the median.  For pitch adaptivity, we adjust the window
+    // size to a roughly constant pitch range (about four tones).
+
+    size_t sampleRate = getSampleRate();
+
+    std::deque<float> window;
+    std::vector<size_t> inrange;
+    size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin);
+    size_t halfWin = medianWinSize/2;
+
+    size_t binmin;
+    if (ymin > halfWin) binmin = ymin - halfWin;
+    else binmin = 0;
+
+    size_t binmax;
+    if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
+    else binmax = values.size()-1;
+
+    for (size_t bin = binmin; bin <= binmax; ++bin) {
+
+        float value = values[bin];
+
+        window.push_back(value);
+
+        medianWinSize = getPeakPickWindowSize(type, sampleRate, bin);
+        halfWin = medianWinSize/2;
+
+        while (window.size() > medianWinSize) window.pop_front();
+
+        if (type == MajorPitchAdaptivePeaks) {
+            if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
+            else binmax = values.size()-1;
+        }
+
+        std::deque<float> sorted(window);
+        std::sort(sorted.begin(), sorted.end());
+        float median = sorted[sorted.size()/2];
+
+        if (value > median) {
+            inrange.push_back(bin);
+        }
+
+        if (value <= median || bin+1 == values.size()) {
+            size_t peakbin = 0;
+            float peakval = 0.f;
+            if (!inrange.empty()) {
+                for (size_t i = 0; i < inrange.size(); ++i) {
+                    if (i == 0 || values[inrange[i]] > peakval) {
+                        peakval = values[inrange[i]];
+                        peakbin = inrange[i];
+                    }
+                }
+                inrange.clear();
+                if (peakbin >= ymin && peakbin <= ymax) {
+                    peaks.insert(peakbin);
+                }
+            }
+        }
+    }
+
+    return peaks;
+}
+
+size_t
+FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate, size_t bin) const
+{
+    if (type == MajorPeaks) return 10;
+    if (bin == 0) return 3;
+    size_t fftSize = m_server->getFFTSize() >> m_yshift;
+    float binfreq = (sampleRate * bin) / fftSize;
+    float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
+    int hibin = lrintf((hifreq * fftSize) / sampleRate);
+    int medianWinSize = hibin - bin;
+    if (medianWinSize < 3) medianWinSize = 3;
+    return medianWinSize;
+}
+
+FFTModel::PeakSet
+FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
+                             size_t ymin, size_t ymax)
+{
+    PeakSet peaks;
+    if (!isOK()) return peaks;
+    PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
+
+    size_t sampleRate = getSampleRate();
+    size_t fftSize = m_server->getFFTSize() >> m_yshift;
+    size_t incr = getResolution();
+
+    // This duplicates some of the work of estimateStableFrequency to
+    // allow us to retrieve the phases in two separate vertical
+    // columns, instead of jumping back and forth between columns x and
+    // x+1, which may be significantly slower if re-seeking is needed
+
+    std::vector<float> phases;
+    for (PeakLocationSet::iterator i = locations.begin();
+         i != locations.end(); ++i) {
+        phases.push_back(getPhaseAt(x, *i));
+    }
+
+    size_t phaseIndex = 0;
+    for (PeakLocationSet::iterator i = locations.begin();
+         i != locations.end(); ++i) {
+        float oldPhase = phases[phaseIndex];
+        float newPhase = getPhaseAt(x+1, *i);
+        float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
+        float phaseError = princargf(newPhase - expectedPhase);
+        float frequency =
+            (sampleRate * (expectedPhase + phaseError - oldPhase))
+            / (2 * M_PI * incr);
+//        bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
+//        if (stable)
+        peaks[*i] = frequency;
+        ++phaseIndex;
+    }
+
+    return peaks;
+}
+
 Model *
 FFTModel::clone() const
 {
author	Chris Cannam
date	Wed, 04 Jul 2007 15:29:16 +0000
parents	29b70bdaacdc
children	daf89d31f45c