annotate data/model/FFTModel.cpp @ 1090:420fc961c0c4 simple-fft-model

Gut the old code, but don't replace it yet (so nothing will link yet)
author Chris Cannam
date Fri, 12 Jun 2015 14:51:46 +0100
parents 0fd3661bcfff
children bdebff3265ae
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@152 18
Chris@183 19 #include "base/Profiler.h"
Chris@275 20 #include "base/Pitch.h"
Chris@183 21
Chris@402 22 #include <algorithm>
Chris@402 23
Chris@152 24 #include <cassert>
Chris@1090 25 #include <deque>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@1090 31 using namespace std;
Chris@1090 32
Chris@152 33 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 34 int channel,
Chris@152 35 WindowType windowType,
Chris@929 36 int windowSize,
Chris@929 37 int windowIncrement,
Chris@1090 38 int fftSize) :
Chris@1090 39 m_model(model),
Chris@1090 40 m_channel(channel),
Chris@1090 41 m_windowType(windowType),
Chris@1090 42 m_windowSize(windowSize),
Chris@1090 43 m_windowIncrement(windowIncrement),
Chris@1090 44 m_fftSize(fftSize),
Chris@1090 45 m_windower(windowType, windowSize)
Chris@152 46 {
Chris@152 47 }
Chris@152 48
Chris@152 49 FFTModel::~FFTModel()
Chris@152 50 {
Chris@152 51 }
Chris@152 52
Chris@360 53 void
Chris@360 54 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 55 {
Chris@1090 56 if (m_model) {
Chris@1090 57 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
Chris@1090 58 m_model = 0;
Chris@360 59 }
Chris@360 60 }
Chris@360 61
Chris@152 62 QString
Chris@929 63 FFTModel::getBinName(int n) const
Chris@152 64 {
Chris@1040 65 sv_samplerate_t sr = getSampleRate();
Chris@152 66 if (!sr) return "";
Chris@204 67 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 68 return name;
Chris@152 69 }
Chris@152 70
Chris@275 71 bool
Chris@1045 72 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
Chris@275 73 {
Chris@275 74 if (!isOK()) return false;
Chris@275 75
Chris@1090 76 frequency = double(y * getSampleRate()) / m_fftSize;
Chris@275 77
Chris@275 78 if (x+1 >= getWidth()) return false;
Chris@275 79
Chris@275 80 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 81 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 82 // At window size w, for bin b, f is b*sr/w.
Chris@275 83 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 84 // We need to know what phase shift we expect from h/sr sec.
Chris@275 85 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 86 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 87 // = 2pi * (h * b) / w.
Chris@275 88
Chris@1038 89 double oldPhase = getPhaseAt(x, y);
Chris@1038 90 double newPhase = getPhaseAt(x+1, y);
Chris@275 91
Chris@929 92 int incr = getResolution();
Chris@275 93
Chris@1090 94 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
Chris@275 95
Chris@1038 96 double phaseError = princarg(newPhase - expectedPhase);
Chris@275 97
Chris@275 98 // The new frequency estimate based on the phase error resulting
Chris@275 99 // from assuming the "native" frequency of this bin
Chris@275 100
Chris@275 101 frequency =
Chris@1090 102 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
Chris@1045 103 (2.0 * M_PI * incr);
Chris@275 104
Chris@275 105 return true;
Chris@275 106 }
Chris@275 107
Chris@275 108 FFTModel::PeakLocationSet
Chris@929 109 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
Chris@275 110 {
Chris@551 111 Profiler profiler("FFTModel::getPeaks");
Chris@551 112
Chris@275 113 FFTModel::PeakLocationSet peaks;
Chris@275 114 if (!isOK()) return peaks;
Chris@275 115
Chris@275 116 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 117 ymax = getHeight() - 1;
Chris@275 118 }
Chris@275 119
Chris@275 120 if (type == AllPeaks) {
Chris@551 121 int minbin = ymin;
Chris@551 122 if (minbin > 0) minbin = minbin - 1;
Chris@551 123 int maxbin = ymax;
Chris@551 124 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 125 const int n = maxbin - minbin + 1;
Chris@608 126 #ifdef __GNUC__
Chris@551 127 float values[n];
Chris@608 128 #else
Chris@608 129 float *values = (float *)alloca(n * sizeof(float));
Chris@608 130 #endif
Chris@551 131 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@929 132 for (int bin = ymin; bin <= ymax; ++bin) {
Chris@551 133 if (bin == minbin || bin == maxbin) continue;
Chris@551 134 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 135 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 136 peaks.insert(bin);
Chris@275 137 }
Chris@275 138 }
Chris@275 139 return peaks;
Chris@275 140 }
Chris@275 141
Chris@551 142 Column values = getColumn(x);
Chris@275 143
Chris@500 144 float mean = 0.f;
Chris@551 145 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@1038 146 if (values.size() > 0) mean = mean / float(values.size());
Chris@1038 147
Chris@275 148 // For peak picking we use a moving median window, picking the
Chris@275 149 // highest value within each continuous region of values that
Chris@275 150 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 151 // size to a roughly constant pitch range (about four tones).
Chris@275 152
Chris@1040 153 sv_samplerate_t sampleRate = getSampleRate();
Chris@275 154
Chris@1090 155 deque<float> window;
Chris@1090 156 vector<int> inrange;
Chris@280 157 float dist = 0.5;
Chris@500 158
Chris@929 159 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@929 160 int halfWin = medianWinSize/2;
Chris@275 161
Chris@929 162 int binmin;
Chris@275 163 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 164 else binmin = 0;
Chris@275 165
Chris@929 166 int binmax;
Chris@275 167 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 168 else binmax = values.size()-1;
Chris@275 169
Chris@929 170 int prevcentre = 0;
Chris@500 171
Chris@929 172 for (int bin = binmin; bin <= binmax; ++bin) {
Chris@275 173
Chris@275 174 float value = values[bin];
Chris@275 175
Chris@275 176 window.push_back(value);
Chris@275 177
Chris@280 178 // so-called median will actually be the dist*100'th percentile
Chris@280 179 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 180 halfWin = medianWinSize/2;
Chris@275 181
Chris@929 182 while ((int)window.size() > medianWinSize) {
Chris@500 183 window.pop_front();
Chris@500 184 }
Chris@500 185
Chris@1038 186 int actualSize = int(window.size());
Chris@275 187
Chris@275 188 if (type == MajorPitchAdaptivePeaks) {
Chris@275 189 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 190 else binmax = values.size()-1;
Chris@275 191 }
Chris@275 192
Chris@1090 193 deque<float> sorted(window);
Chris@1090 194 sort(sorted.begin(), sorted.end());
Chris@1038 195 float median = sorted[int(float(sorted.size()) * dist)];
Chris@275 196
Chris@929 197 int centrebin = 0;
Chris@500 198 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 199
Chris@500 200 while (centrebin > prevcentre || bin == binmin) {
Chris@275 201
Chris@500 202 if (centrebin > prevcentre) ++prevcentre;
Chris@500 203
Chris@500 204 float centre = values[prevcentre];
Chris@500 205
Chris@500 206 if (centre > median) {
Chris@500 207 inrange.push_back(centrebin);
Chris@500 208 }
Chris@500 209
Chris@500 210 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 211 if (!inrange.empty()) {
Chris@929 212 int peakbin = 0;
Chris@500 213 float peakval = 0.f;
Chris@929 214 for (int i = 0; i < (int)inrange.size(); ++i) {
Chris@500 215 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 216 peakval = values[inrange[i]];
Chris@500 217 peakbin = inrange[i];
Chris@500 218 }
Chris@500 219 }
Chris@500 220 inrange.clear();
Chris@500 221 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 222 peaks.insert(peakbin);
Chris@275 223 }
Chris@275 224 }
Chris@275 225 }
Chris@500 226
Chris@500 227 if (bin == binmin) break;
Chris@275 228 }
Chris@275 229 }
Chris@275 230
Chris@275 231 return peaks;
Chris@275 232 }
Chris@275 233
Chris@929 234 int
Chris@1040 235 FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate,
Chris@929 236 int bin, float &percentile) const
Chris@275 237 {
Chris@280 238 percentile = 0.5;
Chris@275 239 if (type == MajorPeaks) return 10;
Chris@275 240 if (bin == 0) return 3;
Chris@280 241
Chris@1090 242 double binfreq = (getSampleRate() * bin) / m_fftSize;
Chris@1038 243 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 244
Chris@1090 245 int hibin = int(lrint((hifreq * m_fftSize) / getSampleRate()));
Chris@275 246 int medianWinSize = hibin - bin;
Chris@275 247 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 248
Chris@1090 249 percentile = 0.5f + float(binfreq / getSampleRate());
Chris@280 250
Chris@275 251 return medianWinSize;
Chris@275 252 }
Chris@275 253
Chris@275 254 FFTModel::PeakSet
Chris@929 255 FFTModel::getPeakFrequencies(PeakPickType type, int x,
Chris@929 256 int ymin, int ymax)
Chris@275 257 {
Chris@551 258 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 259
Chris@275 260 PeakSet peaks;
Chris@275 261 if (!isOK()) return peaks;
Chris@275 262 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 263
Chris@1040 264 sv_samplerate_t sampleRate = getSampleRate();
Chris@929 265 int incr = getResolution();
Chris@275 266
Chris@275 267 // This duplicates some of the work of estimateStableFrequency to
Chris@275 268 // allow us to retrieve the phases in two separate vertical
Chris@275 269 // columns, instead of jumping back and forth between columns x and
Chris@275 270 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 271
Chris@1090 272 vector<float> phases;
Chris@275 273 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 274 i != locations.end(); ++i) {
Chris@275 275 phases.push_back(getPhaseAt(x, *i));
Chris@275 276 }
Chris@275 277
Chris@929 278 int phaseIndex = 0;
Chris@275 279 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 280 i != locations.end(); ++i) {
Chris@1038 281 double oldPhase = phases[phaseIndex];
Chris@1038 282 double newPhase = getPhaseAt(x+1, *i);
Chris@1090 283 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
Chris@1038 284 double phaseError = princarg(newPhase - expectedPhase);
Chris@1038 285 double frequency =
Chris@275 286 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 287 / (2 * M_PI * incr);
Chris@1045 288 peaks[*i] = frequency;
Chris@275 289 ++phaseIndex;
Chris@275 290 }
Chris@275 291
Chris@275 292 return peaks;
Chris@275 293 }
Chris@275 294