changeset 275:522f82311e4e

* Pull peak-picker out of SpectrumLayer and into FFTModel; use combined peak-picker and frequency estimator for SpectrogramLayer (makes the peak frequency spectrogram a bit quicker) * Add more information to spectrum and spectrogram crosshairs
author Chris Cannam
date Wed, 04 Jul 2007 15:29:16 +0000 (2007-07-04)
parents e412f65884ee
children 657825878970
files data/fft/FFTDataServer.cpp data/fft/FFTDataServer.h data/model/FFTModel.cpp data/model/FFTModel.h
diffstat 4 files changed, 227 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/data/fft/FFTDataServer.cpp	Tue Jul 03 18:47:39 2007 +0000
+++ b/data/fft/FFTDataServer.cpp	Wed Jul 04 15:29:16 2007 +0000
@@ -1099,43 +1099,6 @@
     }
 }    
 
-bool
-FFTDataServer::estimateStableFrequency(size_t x, size_t y,
-                                       float sampleRate, float &frequency)
-{
-    frequency = (float(y) * sampleRate) / m_fftSize;
-
-    if (x+1 >= m_width || y >= m_height) return false;
-
-    // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
-    // At hopsize h and sample rate sr, one hop happens in h/sr sec.
-    // At window size w, for bin b, f is b*sr/w.
-    // thus 2pi phase shift happens in w/(b*sr) sec.
-    // We need to know what phase shift we expect from h/sr sec.
-    // -> 2pi * ((h/sr) / (w/(b*sr)))
-    //  = 2pi * ((h * b * sr) / (w * sr))
-    //  = 2pi * (h * b) / w.
-
-    float oldPhase = getPhaseAt(x, y);
-    float newPhase = getPhaseAt(x+1, y);
-
-    float expectedPhase =
-	oldPhase + (2.0 * M_PI * y * m_windowIncrement) / m_fftSize;
-
-    float phaseError = princargf(newPhase - expectedPhase);
-
-//    bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
-
-    // The new frequency estimate based on the phase error resulting
-    // from assuming the "native" frequency of this bin
-
-    frequency =
-        (sampleRate * (expectedPhase + phaseError - oldPhase)) /
-        (2 * M_PI * m_windowIncrement);
-
-    return true;
-}
-
 size_t
 FFTDataServer::getFillCompletion() const 
 {
--- a/data/fft/FFTDataServer.h	Tue Jul 03 18:47:39 2007 +0000
+++ b/data/fft/FFTDataServer.h	Wed Jul 04 15:29:16 2007 +0000
@@ -92,12 +92,6 @@
         return getMagnitudeAt(x, y) > threshold;
     }
 
-    // Calculate an estimated frequency for a stable signal in this
-    // bin, using phase unwrapping.  This will be completely wrong if
-    // the signal is not stable here.
-    bool estimateStableFrequency(size_t x, size_t y,
-                                 float sampleRate, float &frequency);
-
     size_t getFillCompletion() const;
     size_t getFillExtent() const;
 
--- a/data/model/FFTModel.cpp	Tue Jul 03 18:47:39 2007 +0000
+++ b/data/model/FFTModel.cpp	Wed Jul 04 15:29:16 2007 +0000
@@ -17,6 +17,7 @@
 #include "DenseTimeValueModel.h"
 
 #include "base/Profiler.h"
+#include "base/Pitch.h"
 
 #include <cassert>
 
@@ -103,6 +104,198 @@
     return name;
 }
 
+bool
+FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
+{
+    if (!isOK()) return false;
+
+    size_t sampleRate = m_server->getModel()->getSampleRate();
+
+    size_t fftSize = m_server->getFFTSize() >> m_yshift;
+    frequency = (float(y) * sampleRate) / fftSize;
+
+    if (x+1 >= getWidth()) return false;
+
+    // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
+    // At hopsize h and sample rate sr, one hop happens in h/sr sec.
+    // At window size w, for bin b, f is b*sr/w.
+    // thus 2pi phase shift happens in w/(b*sr) sec.
+    // We need to know what phase shift we expect from h/sr sec.
+    // -> 2pi * ((h/sr) / (w/(b*sr)))
+    //  = 2pi * ((h * b * sr) / (w * sr))
+    //  = 2pi * (h * b) / w.
+
+    float oldPhase = getPhaseAt(x, y);
+    float newPhase = getPhaseAt(x+1, y);
+
+    size_t incr = getResolution();
+
+    float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
+
+    float phaseError = princargf(newPhase - expectedPhase);
+
+//    bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
+
+    // The new frequency estimate based on the phase error resulting
+    // from assuming the "native" frequency of this bin
+
+    frequency =
+        (sampleRate * (expectedPhase + phaseError - oldPhase)) /
+        (2 * M_PI * incr);
+
+    return true;
+}
+
+FFTModel::PeakLocationSet
+FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
+{
+    FFTModel::PeakLocationSet peaks;
+    if (!isOK()) return peaks;
+
+    if (ymax == 0 || ymax > getHeight() - 1) {
+        ymax = getHeight() - 1;
+    }
+
+    Column values;
+
+    if (type == AllPeaks) {
+        for (size_t y = ymin; y <= ymax; ++y) {
+            values.push_back(getMagnitudeAt(x, y));
+        }
+        size_t i = 0;
+        for (size_t bin = ymin; bin <= ymax; ++bin) {
+            if ((i == 0 || values[i] > values[i-1]) &&
+                (i == values.size()-1 || values[i] >= values[i+1])) {
+                peaks.insert(bin);
+            }
+            ++i;
+        }
+        return peaks;
+    }
+
+    getColumn(x, values);
+
+    // For peak picking we use a moving median window, picking the
+    // highest value within each continuous region of values that
+    // exceed the median.  For pitch adaptivity, we adjust the window
+    // size to a roughly constant pitch range (about four tones).
+
+    size_t sampleRate = getSampleRate();
+
+    std::deque<float> window;
+    std::vector<size_t> inrange;
+    size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin);
+    size_t halfWin = medianWinSize/2;
+
+    size_t binmin;
+    if (ymin > halfWin) binmin = ymin - halfWin;
+    else binmin = 0;
+
+    size_t binmax;
+    if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
+    else binmax = values.size()-1;
+
+    for (size_t bin = binmin; bin <= binmax; ++bin) {
+
+        float value = values[bin];
+
+        window.push_back(value);
+
+        medianWinSize = getPeakPickWindowSize(type, sampleRate, bin);
+        halfWin = medianWinSize/2;
+
+        while (window.size() > medianWinSize) window.pop_front();
+
+        if (type == MajorPitchAdaptivePeaks) {
+            if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
+            else binmax = values.size()-1;
+        }
+
+        std::deque<float> sorted(window);
+        std::sort(sorted.begin(), sorted.end());
+        float median = sorted[sorted.size()/2];
+
+        if (value > median) {
+            inrange.push_back(bin);
+        }
+
+        if (value <= median || bin+1 == values.size()) {
+            size_t peakbin = 0;
+            float peakval = 0.f;
+            if (!inrange.empty()) {
+                for (size_t i = 0; i < inrange.size(); ++i) {
+                    if (i == 0 || values[inrange[i]] > peakval) {
+                        peakval = values[inrange[i]];
+                        peakbin = inrange[i];
+                    }
+                }
+                inrange.clear();
+                if (peakbin >= ymin && peakbin <= ymax) {
+                    peaks.insert(peakbin);
+                }
+            }
+        }
+    }
+
+    return peaks;
+}
+
+size_t
+FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate, size_t bin) const
+{
+    if (type == MajorPeaks) return 10;
+    if (bin == 0) return 3;
+    size_t fftSize = m_server->getFFTSize() >> m_yshift;
+    float binfreq = (sampleRate * bin) / fftSize;
+    float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
+    int hibin = lrintf((hifreq * fftSize) / sampleRate);
+    int medianWinSize = hibin - bin;
+    if (medianWinSize < 3) medianWinSize = 3;
+    return medianWinSize;
+}
+
+FFTModel::PeakSet
+FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
+                             size_t ymin, size_t ymax)
+{
+    PeakSet peaks;
+    if (!isOK()) return peaks;
+    PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
+
+    size_t sampleRate = getSampleRate();
+    size_t fftSize = m_server->getFFTSize() >> m_yshift;
+    size_t incr = getResolution();
+
+    // This duplicates some of the work of estimateStableFrequency to
+    // allow us to retrieve the phases in two separate vertical
+    // columns, instead of jumping back and forth between columns x and
+    // x+1, which may be significantly slower if re-seeking is needed
+
+    std::vector<float> phases;
+    for (PeakLocationSet::iterator i = locations.begin();
+         i != locations.end(); ++i) {
+        phases.push_back(getPhaseAt(x, *i));
+    }
+
+    size_t phaseIndex = 0;
+    for (PeakLocationSet::iterator i = locations.begin();
+         i != locations.end(); ++i) {
+        float oldPhase = phases[phaseIndex];
+        float newPhase = getPhaseAt(x+1, *i);
+        float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
+        float phaseError = princargf(newPhase - expectedPhase);
+        float frequency =
+            (sampleRate * (expectedPhase + phaseError - oldPhase))
+            / (2 * M_PI * incr);
+//        bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
+//        if (stable)
+        peaks[*i] = frequency;
+        ++phaseIndex;
+    }
+
+    return peaks;
+}
+
 Model *
 FFTModel::clone() const
 {
--- a/data/model/FFTModel.h	Tue Jul 03 18:47:39 2007 +0000
+++ b/data/model/FFTModel.h	Wed Jul 04 15:29:16 2007 +0000
@@ -19,6 +19,9 @@
 #include "data/fft/FFTDataServer.h"
 #include "DenseThreeDimensionalModel.h"
 
+#include <set>
+#include <map>
+
 /**
  * An implementation of DenseThreeDimensionalModel that makes FFT data
  * derived from a DenseTimeValueModel available as a generic data grid.
@@ -123,10 +126,35 @@
     virtual void getColumn(size_t x, Column &result) const;
     virtual QString getBinName(size_t n) const;
 
-    virtual bool estimateStableFrequency(size_t x, size_t y, float &frequency) {
-        return m_server->estimateStableFrequency(x << m_xshift, y << m_yshift,
-                                                 getSampleRate(), frequency);
-    }
+    /**
+     * Calculate an estimated frequency for a stable signal in this
+     * bin, using phase unwrapping.  This will be completely wrong if
+     * the signal is not stable here.
+     */
+    virtual bool estimateStableFrequency(size_t x, size_t y, float &frequency);
+
+    enum PeakPickType
+    {
+        AllPeaks,                /// Any bin exceeding its immediate neighbours
+        MajorPeaks,              /// Peaks picked using sliding median window
+        MajorPitchAdaptivePeaks  /// Bigger window for higher frequencies
+    };
+
+    typedef std::set<size_t> PeakLocationSet;
+    typedef std::map<size_t, float> PeakSet;
+
+    /**
+     * Return locations of peak bins in the range [ymin,ymax].  If
+     * ymax is zero, getHeight()-1 will be used.
+     */
+    virtual PeakLocationSet getPeaks(PeakPickType type, size_t x,
+                                     size_t ymin = 0, size_t ymax = 0);
+
+    /**
+     * Return locations and estimated stable frequencies of peak bins.
+     */
+    virtual PeakSet getPeakFrequencies(PeakPickType type, size_t x,
+                                       size_t ymin = 0, size_t ymax = 0);
 
     virtual int getCompletion() const { return m_server->getFillCompletion(); }
 
@@ -143,6 +171,8 @@
     FFTDataServer *m_server;
     int m_xshift;
     int m_yshift;
+
+    size_t getPeakPickWindowSize(PeakPickType type, size_t sampleRate, size_t bin) const;
 };
 
 #endif