annotate data/model/FFTModel.cpp @ 1093:44b079427b36 simple-fft-model

Make a small cache of recently-used columns
author Chris Cannam
date Fri, 12 Jun 2015 18:50:52 +0100
parents 70f18770b72d
children b386363ff6c8
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@152 18
Chris@183 19 #include "base/Profiler.h"
Chris@275 20 #include "base/Pitch.h"
Chris@183 21
Chris@402 22 #include <algorithm>
Chris@402 23
Chris@152 24 #include <cassert>
Chris@1090 25 #include <deque>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@1090 31 using namespace std;
Chris@1090 32
Chris@152 33 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 34 int channel,
Chris@152 35 WindowType windowType,
Chris@929 36 int windowSize,
Chris@929 37 int windowIncrement,
Chris@1090 38 int fftSize) :
Chris@1090 39 m_model(model),
Chris@1090 40 m_channel(channel),
Chris@1090 41 m_windowType(windowType),
Chris@1090 42 m_windowSize(windowSize),
Chris@1090 43 m_windowIncrement(windowIncrement),
Chris@1090 44 m_fftSize(fftSize),
Chris@1091 45 m_windower(windowType, windowSize),
Chris@1093 46 m_fft(fftSize),
Chris@1093 47 m_cacheSize(3)
Chris@152 48 {
Chris@1091 49 if (m_windowSize > m_fftSize) {
Chris@1091 50 cerr << "ERROR: FFTModel::FFTModel: window size (" << m_windowSize
Chris@1091 51 << ") must be at least FFT size (" << m_fftSize << ")" << endl;
Chris@1091 52 throw invalid_argument("FFTModel window size must be at least FFT size");
Chris@1091 53 }
Chris@152 54 }
Chris@152 55
Chris@152 56 FFTModel::~FFTModel()
Chris@152 57 {
Chris@152 58 }
Chris@152 59
Chris@360 60 void
Chris@360 61 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 62 {
Chris@1090 63 if (m_model) {
Chris@1090 64 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
Chris@1090 65 m_model = 0;
Chris@360 66 }
Chris@360 67 }
Chris@360 68
Chris@1091 69 int
Chris@1091 70 FFTModel::getWidth() const
Chris@1091 71 {
Chris@1091 72 if (!m_model) return 0;
Chris@1091 73 return int((m_model->getEndFrame() - m_model->getStartFrame())
Chris@1091 74 / m_windowIncrement) + 1;
Chris@1091 75 }
Chris@1091 76
Chris@1091 77 int
Chris@1091 78 FFTModel::getHeight() const
Chris@1091 79 {
Chris@1091 80 return m_fftSize / 2 + 1;
Chris@1091 81 }
Chris@1091 82
Chris@152 83 QString
Chris@929 84 FFTModel::getBinName(int n) const
Chris@152 85 {
Chris@1040 86 sv_samplerate_t sr = getSampleRate();
Chris@152 87 if (!sr) return "";
Chris@204 88 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 89 return name;
Chris@152 90 }
Chris@152 91
Chris@1091 92 FFTModel::Column
Chris@1091 93 FFTModel::getColumn(int x) const
Chris@1091 94 {
Chris@1091 95 auto cplx = getFFTColumn(x);
Chris@1091 96 Column col;
Chris@1091 97 col.reserve(int(cplx.size()));
Chris@1091 98 for (auto c: cplx) col.push_back(abs(c));
Chris@1091 99 return col;
Chris@1091 100 }
Chris@1091 101
Chris@1091 102 float
Chris@1091 103 FFTModel::getMagnitudeAt(int x, int y) const
Chris@1091 104 {
Chris@1093 105 if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f;
Chris@1093 106 auto col = getFFTColumn(x);
Chris@1093 107 return abs(col[y]);
Chris@1091 108 }
Chris@1091 109
Chris@1091 110 float
Chris@1091 111 FFTModel::getMaximumMagnitudeAt(int x) const
Chris@1091 112 {
Chris@1091 113 Column col(getColumn(x));
Chris@1092 114 float max = 0.f;
Chris@1092 115 for (int i = 0; i < col.size(); ++i) {
Chris@1092 116 if (col[i] > max) max = col[i];
Chris@1092 117 }
Chris@1092 118 return max;
Chris@1091 119 }
Chris@1091 120
Chris@1091 121 float
Chris@1091 122 FFTModel::getPhaseAt(int x, int y) const
Chris@1091 123 {
Chris@1093 124 if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f;
Chris@1091 125 return arg(getFFTColumn(x)[y]);
Chris@1091 126 }
Chris@1091 127
Chris@1091 128 void
Chris@1091 129 FFTModel::getValuesAt(int x, int y, float &re, float &im) const
Chris@1091 130 {
Chris@1091 131 auto col = getFFTColumn(x);
Chris@1091 132 re = col[y].real();
Chris@1091 133 im = col[y].imag();
Chris@1091 134 }
Chris@1091 135
Chris@1091 136 bool
Chris@1093 137 FFTModel::isColumnAvailable(int) const
Chris@1091 138 {
Chris@1091 139 //!!!
Chris@1091 140 return true;
Chris@1091 141 }
Chris@1091 142
Chris@1091 143 bool
Chris@1091 144 FFTModel::getMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 145 {
Chris@1091 146 if (count == 0) count = getHeight();
Chris@1091 147 auto col = getFFTColumn(x);
Chris@1091 148 for (int i = 0; i < count; ++i) {
Chris@1091 149 values[i] = abs(col[minbin + i]);
Chris@1091 150 }
Chris@1091 151 return true;
Chris@1091 152 }
Chris@1091 153
Chris@1091 154 bool
Chris@1091 155 FFTModel::getNormalizedMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 156 {
Chris@1092 157 if (!getMagnitudesAt(x, values, minbin, count)) return false;
Chris@1092 158 if (count == 0) count = getHeight();
Chris@1092 159 float max = 0.f;
Chris@1092 160 for (int i = 0; i < count; ++i) {
Chris@1092 161 if (values[i] > max) max = values[i];
Chris@1092 162 }
Chris@1092 163 if (max > 0.f) {
Chris@1092 164 for (int i = 0; i < count; ++i) {
Chris@1092 165 values[i] /= max;
Chris@1092 166 }
Chris@1092 167 }
Chris@1092 168 return true;
Chris@1091 169 }
Chris@1091 170
Chris@1091 171 bool
Chris@1091 172 FFTModel::getPhasesAt(int x, float *values, int minbin, int count) const
Chris@1091 173 {
Chris@1091 174 if (count == 0) count = getHeight();
Chris@1091 175 auto col = getFFTColumn(x);
Chris@1091 176 for (int i = 0; i < count; ++i) {
Chris@1091 177 values[i] = arg(col[minbin + i]);
Chris@1091 178 }
Chris@1091 179 return true;
Chris@1091 180 }
Chris@1091 181
Chris@1091 182 bool
Chris@1091 183 FFTModel::getValuesAt(int x, float *reals, float *imags, int minbin, int count) const
Chris@1091 184 {
Chris@1091 185 if (count == 0) count = getHeight();
Chris@1091 186 auto col = getFFTColumn(x);
Chris@1091 187 for (int i = 0; i < count; ++i) {
Chris@1091 188 reals[i] = col[minbin + i].real();
Chris@1091 189 }
Chris@1091 190 for (int i = 0; i < count; ++i) {
Chris@1091 191 imags[i] = col[minbin + i].imag();
Chris@1091 192 }
Chris@1091 193 return true;
Chris@1091 194 }
Chris@1091 195
Chris@1091 196 vector<float>
Chris@1091 197 FFTModel::getSourceSamples(int column) const
Chris@1091 198 {
Chris@1091 199 auto range = getSourceSampleRange(column);
Chris@1091 200 vector<float> samples(m_fftSize, 0.f);
Chris@1091 201 int off = (m_fftSize - m_windowSize) / 2;
Chris@1091 202 decltype(range.first) pfx = 0;
Chris@1091 203 if (range.first < 0) {
Chris@1091 204 pfx = -range.first;
Chris@1091 205 range = { 0, range.second };
Chris@1091 206 }
Chris@1091 207 (void) m_model->getData(m_channel,
Chris@1091 208 range.first,
Chris@1091 209 range.second - range.first,
Chris@1091 210 &samples[off + pfx]);
Chris@1091 211 if (m_channel == -1) {
Chris@1091 212 int channels = m_model->getChannelCount();
Chris@1091 213 if (channels > 1) {
Chris@1091 214 for (int i = 0; i < range.second - range.first; ++i) {
Chris@1091 215 samples[off + pfx + i] /= float(channels);
Chris@1091 216 }
Chris@1091 217 }
Chris@1091 218 }
Chris@1091 219 return samples;
Chris@1091 220 }
Chris@1091 221
Chris@1091 222 vector<complex<float>>
Chris@1093 223 FFTModel::getFFTColumn(int n) const
Chris@1091 224 {
Chris@1093 225 for (auto &incache : m_cached) {
Chris@1093 226 if (incache.n == n) {
Chris@1093 227 return incache.col;
Chris@1093 228 }
Chris@1093 229 }
Chris@1093 230
Chris@1093 231 auto samples = getSourceSamples(n);
Chris@1091 232 m_windower.cut(&samples[0]);
Chris@1093 233 auto col = m_fft.process(samples);
Chris@1093 234
Chris@1093 235 SavedColumn sc { n, col };
Chris@1093 236 if (m_cached.size() >= m_cacheSize) {
Chris@1093 237 m_cached.pop_front();
Chris@1093 238 }
Chris@1093 239 m_cached.push_back(sc);
Chris@1093 240
Chris@1093 241 return col;
Chris@1091 242 }
Chris@1091 243
Chris@275 244 bool
Chris@1045 245 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
Chris@275 246 {
Chris@275 247 if (!isOK()) return false;
Chris@275 248
Chris@1090 249 frequency = double(y * getSampleRate()) / m_fftSize;
Chris@275 250
Chris@275 251 if (x+1 >= getWidth()) return false;
Chris@275 252
Chris@275 253 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 254 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 255 // At window size w, for bin b, f is b*sr/w.
Chris@275 256 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 257 // We need to know what phase shift we expect from h/sr sec.
Chris@275 258 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 259 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 260 // = 2pi * (h * b) / w.
Chris@275 261
Chris@1038 262 double oldPhase = getPhaseAt(x, y);
Chris@1038 263 double newPhase = getPhaseAt(x+1, y);
Chris@275 264
Chris@929 265 int incr = getResolution();
Chris@275 266
Chris@1090 267 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
Chris@275 268
Chris@1038 269 double phaseError = princarg(newPhase - expectedPhase);
Chris@275 270
Chris@275 271 // The new frequency estimate based on the phase error resulting
Chris@275 272 // from assuming the "native" frequency of this bin
Chris@275 273
Chris@275 274 frequency =
Chris@1090 275 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
Chris@1045 276 (2.0 * M_PI * incr);
Chris@275 277
Chris@275 278 return true;
Chris@275 279 }
Chris@275 280
Chris@275 281 FFTModel::PeakLocationSet
Chris@929 282 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
Chris@275 283 {
Chris@551 284 Profiler profiler("FFTModel::getPeaks");
Chris@551 285
Chris@275 286 FFTModel::PeakLocationSet peaks;
Chris@275 287 if (!isOK()) return peaks;
Chris@275 288
Chris@275 289 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 290 ymax = getHeight() - 1;
Chris@275 291 }
Chris@275 292
Chris@275 293 if (type == AllPeaks) {
Chris@551 294 int minbin = ymin;
Chris@551 295 if (minbin > 0) minbin = minbin - 1;
Chris@551 296 int maxbin = ymax;
Chris@551 297 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 298 const int n = maxbin - minbin + 1;
Chris@608 299 #ifdef __GNUC__
Chris@551 300 float values[n];
Chris@608 301 #else
Chris@608 302 float *values = (float *)alloca(n * sizeof(float));
Chris@608 303 #endif
Chris@551 304 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@929 305 for (int bin = ymin; bin <= ymax; ++bin) {
Chris@551 306 if (bin == minbin || bin == maxbin) continue;
Chris@551 307 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 308 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 309 peaks.insert(bin);
Chris@275 310 }
Chris@275 311 }
Chris@275 312 return peaks;
Chris@275 313 }
Chris@275 314
Chris@551 315 Column values = getColumn(x);
Chris@275 316
Chris@500 317 float mean = 0.f;
Chris@551 318 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@1038 319 if (values.size() > 0) mean = mean / float(values.size());
Chris@1038 320
Chris@275 321 // For peak picking we use a moving median window, picking the
Chris@275 322 // highest value within each continuous region of values that
Chris@275 323 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 324 // size to a roughly constant pitch range (about four tones).
Chris@275 325
Chris@1040 326 sv_samplerate_t sampleRate = getSampleRate();
Chris@275 327
Chris@1090 328 deque<float> window;
Chris@1090 329 vector<int> inrange;
Chris@280 330 float dist = 0.5;
Chris@500 331
Chris@929 332 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@929 333 int halfWin = medianWinSize/2;
Chris@275 334
Chris@929 335 int binmin;
Chris@275 336 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 337 else binmin = 0;
Chris@275 338
Chris@929 339 int binmax;
Chris@275 340 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 341 else binmax = values.size()-1;
Chris@275 342
Chris@929 343 int prevcentre = 0;
Chris@500 344
Chris@929 345 for (int bin = binmin; bin <= binmax; ++bin) {
Chris@275 346
Chris@275 347 float value = values[bin];
Chris@275 348
Chris@275 349 window.push_back(value);
Chris@275 350
Chris@280 351 // so-called median will actually be the dist*100'th percentile
Chris@280 352 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 353 halfWin = medianWinSize/2;
Chris@275 354
Chris@929 355 while ((int)window.size() > medianWinSize) {
Chris@500 356 window.pop_front();
Chris@500 357 }
Chris@500 358
Chris@1038 359 int actualSize = int(window.size());
Chris@275 360
Chris@275 361 if (type == MajorPitchAdaptivePeaks) {
Chris@275 362 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 363 else binmax = values.size()-1;
Chris@275 364 }
Chris@275 365
Chris@1090 366 deque<float> sorted(window);
Chris@1090 367 sort(sorted.begin(), sorted.end());
Chris@1038 368 float median = sorted[int(float(sorted.size()) * dist)];
Chris@275 369
Chris@929 370 int centrebin = 0;
Chris@500 371 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 372
Chris@500 373 while (centrebin > prevcentre || bin == binmin) {
Chris@275 374
Chris@500 375 if (centrebin > prevcentre) ++prevcentre;
Chris@500 376
Chris@500 377 float centre = values[prevcentre];
Chris@500 378
Chris@500 379 if (centre > median) {
Chris@500 380 inrange.push_back(centrebin);
Chris@500 381 }
Chris@500 382
Chris@500 383 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 384 if (!inrange.empty()) {
Chris@929 385 int peakbin = 0;
Chris@500 386 float peakval = 0.f;
Chris@929 387 for (int i = 0; i < (int)inrange.size(); ++i) {
Chris@500 388 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 389 peakval = values[inrange[i]];
Chris@500 390 peakbin = inrange[i];
Chris@500 391 }
Chris@500 392 }
Chris@500 393 inrange.clear();
Chris@500 394 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 395 peaks.insert(peakbin);
Chris@275 396 }
Chris@275 397 }
Chris@275 398 }
Chris@500 399
Chris@500 400 if (bin == binmin) break;
Chris@275 401 }
Chris@275 402 }
Chris@275 403
Chris@275 404 return peaks;
Chris@275 405 }
Chris@275 406
Chris@929 407 int
Chris@1040 408 FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate,
Chris@929 409 int bin, float &percentile) const
Chris@275 410 {
Chris@280 411 percentile = 0.5;
Chris@275 412 if (type == MajorPeaks) return 10;
Chris@275 413 if (bin == 0) return 3;
Chris@280 414
Chris@1091 415 double binfreq = (sampleRate * bin) / m_fftSize;
Chris@1038 416 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 417
Chris@1091 418 int hibin = int(lrint((hifreq * m_fftSize) / sampleRate));
Chris@275 419 int medianWinSize = hibin - bin;
Chris@275 420 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 421
Chris@1091 422 percentile = 0.5f + float(binfreq / sampleRate);
Chris@280 423
Chris@275 424 return medianWinSize;
Chris@275 425 }
Chris@275 426
Chris@275 427 FFTModel::PeakSet
Chris@929 428 FFTModel::getPeakFrequencies(PeakPickType type, int x,
Chris@929 429 int ymin, int ymax)
Chris@275 430 {
Chris@551 431 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 432
Chris@275 433 PeakSet peaks;
Chris@275 434 if (!isOK()) return peaks;
Chris@275 435 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 436
Chris@1040 437 sv_samplerate_t sampleRate = getSampleRate();
Chris@929 438 int incr = getResolution();
Chris@275 439
Chris@275 440 // This duplicates some of the work of estimateStableFrequency to
Chris@275 441 // allow us to retrieve the phases in two separate vertical
Chris@275 442 // columns, instead of jumping back and forth between columns x and
Chris@275 443 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 444
Chris@1090 445 vector<float> phases;
Chris@275 446 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 447 i != locations.end(); ++i) {
Chris@275 448 phases.push_back(getPhaseAt(x, *i));
Chris@275 449 }
Chris@275 450
Chris@929 451 int phaseIndex = 0;
Chris@275 452 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 453 i != locations.end(); ++i) {
Chris@1038 454 double oldPhase = phases[phaseIndex];
Chris@1038 455 double newPhase = getPhaseAt(x+1, *i);
Chris@1090 456 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
Chris@1038 457 double phaseError = princarg(newPhase - expectedPhase);
Chris@1038 458 double frequency =
Chris@275 459 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 460 / (2 * M_PI * incr);
Chris@1045 461 peaks[*i] = frequency;
Chris@275 462 ++phaseIndex;
Chris@275 463 }
Chris@275 464
Chris@275 465 return peaks;
Chris@275 466 }
Chris@275 467