annotate data/model/FFTModel.cpp @ 1094:b386363ff6c8 simple-fft-model

A small cache for source samples
author Chris Cannam
date Fri, 12 Jun 2015 19:18:56 +0100
parents 44b079427b36
children b66734b5f806
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@152 18
Chris@183 19 #include "base/Profiler.h"
Chris@275 20 #include "base/Pitch.h"
Chris@183 21
Chris@402 22 #include <algorithm>
Chris@402 23
Chris@152 24 #include <cassert>
Chris@1090 25 #include <deque>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@1090 31 using namespace std;
Chris@1090 32
Chris@152 33 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 34 int channel,
Chris@152 35 WindowType windowType,
Chris@929 36 int windowSize,
Chris@929 37 int windowIncrement,
Chris@1090 38 int fftSize) :
Chris@1090 39 m_model(model),
Chris@1090 40 m_channel(channel),
Chris@1090 41 m_windowType(windowType),
Chris@1090 42 m_windowSize(windowSize),
Chris@1090 43 m_windowIncrement(windowIncrement),
Chris@1090 44 m_fftSize(fftSize),
Chris@1091 45 m_windower(windowType, windowSize),
Chris@1093 46 m_fft(fftSize),
Chris@1093 47 m_cacheSize(3)
Chris@152 48 {
Chris@1091 49 if (m_windowSize > m_fftSize) {
Chris@1091 50 cerr << "ERROR: FFTModel::FFTModel: window size (" << m_windowSize
Chris@1091 51 << ") must be at least FFT size (" << m_fftSize << ")" << endl;
Chris@1091 52 throw invalid_argument("FFTModel window size must be at least FFT size");
Chris@1091 53 }
Chris@152 54 }
Chris@152 55
Chris@152 56 FFTModel::~FFTModel()
Chris@152 57 {
Chris@152 58 }
Chris@152 59
Chris@360 60 void
Chris@360 61 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 62 {
Chris@1090 63 if (m_model) {
Chris@1090 64 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
Chris@1090 65 m_model = 0;
Chris@360 66 }
Chris@360 67 }
Chris@360 68
Chris@1091 69 int
Chris@1091 70 FFTModel::getWidth() const
Chris@1091 71 {
Chris@1091 72 if (!m_model) return 0;
Chris@1091 73 return int((m_model->getEndFrame() - m_model->getStartFrame())
Chris@1091 74 / m_windowIncrement) + 1;
Chris@1091 75 }
Chris@1091 76
Chris@1091 77 int
Chris@1091 78 FFTModel::getHeight() const
Chris@1091 79 {
Chris@1091 80 return m_fftSize / 2 + 1;
Chris@1091 81 }
Chris@1091 82
Chris@152 83 QString
Chris@929 84 FFTModel::getBinName(int n) const
Chris@152 85 {
Chris@1040 86 sv_samplerate_t sr = getSampleRate();
Chris@152 87 if (!sr) return "";
Chris@204 88 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 89 return name;
Chris@152 90 }
Chris@152 91
Chris@1091 92 FFTModel::Column
Chris@1091 93 FFTModel::getColumn(int x) const
Chris@1091 94 {
Chris@1091 95 auto cplx = getFFTColumn(x);
Chris@1091 96 Column col;
Chris@1091 97 col.reserve(int(cplx.size()));
Chris@1091 98 for (auto c: cplx) col.push_back(abs(c));
Chris@1091 99 return col;
Chris@1091 100 }
Chris@1091 101
Chris@1091 102 float
Chris@1091 103 FFTModel::getMagnitudeAt(int x, int y) const
Chris@1091 104 {
Chris@1093 105 if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f;
Chris@1093 106 auto col = getFFTColumn(x);
Chris@1093 107 return abs(col[y]);
Chris@1091 108 }
Chris@1091 109
Chris@1091 110 float
Chris@1091 111 FFTModel::getMaximumMagnitudeAt(int x) const
Chris@1091 112 {
Chris@1091 113 Column col(getColumn(x));
Chris@1092 114 float max = 0.f;
Chris@1092 115 for (int i = 0; i < col.size(); ++i) {
Chris@1092 116 if (col[i] > max) max = col[i];
Chris@1092 117 }
Chris@1092 118 return max;
Chris@1091 119 }
Chris@1091 120
Chris@1091 121 float
Chris@1091 122 FFTModel::getPhaseAt(int x, int y) const
Chris@1091 123 {
Chris@1093 124 if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f;
Chris@1091 125 return arg(getFFTColumn(x)[y]);
Chris@1091 126 }
Chris@1091 127
Chris@1091 128 void
Chris@1091 129 FFTModel::getValuesAt(int x, int y, float &re, float &im) const
Chris@1091 130 {
Chris@1091 131 auto col = getFFTColumn(x);
Chris@1091 132 re = col[y].real();
Chris@1091 133 im = col[y].imag();
Chris@1091 134 }
Chris@1091 135
Chris@1091 136 bool
Chris@1093 137 FFTModel::isColumnAvailable(int) const
Chris@1091 138 {
Chris@1091 139 //!!!
Chris@1091 140 return true;
Chris@1091 141 }
Chris@1091 142
Chris@1091 143 bool
Chris@1091 144 FFTModel::getMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 145 {
Chris@1091 146 if (count == 0) count = getHeight();
Chris@1091 147 auto col = getFFTColumn(x);
Chris@1091 148 for (int i = 0; i < count; ++i) {
Chris@1091 149 values[i] = abs(col[minbin + i]);
Chris@1091 150 }
Chris@1091 151 return true;
Chris@1091 152 }
Chris@1091 153
Chris@1091 154 bool
Chris@1091 155 FFTModel::getNormalizedMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 156 {
Chris@1092 157 if (!getMagnitudesAt(x, values, minbin, count)) return false;
Chris@1092 158 if (count == 0) count = getHeight();
Chris@1092 159 float max = 0.f;
Chris@1092 160 for (int i = 0; i < count; ++i) {
Chris@1092 161 if (values[i] > max) max = values[i];
Chris@1092 162 }
Chris@1092 163 if (max > 0.f) {
Chris@1092 164 for (int i = 0; i < count; ++i) {
Chris@1092 165 values[i] /= max;
Chris@1092 166 }
Chris@1092 167 }
Chris@1092 168 return true;
Chris@1091 169 }
Chris@1091 170
Chris@1091 171 bool
Chris@1091 172 FFTModel::getPhasesAt(int x, float *values, int minbin, int count) const
Chris@1091 173 {
Chris@1091 174 if (count == 0) count = getHeight();
Chris@1091 175 auto col = getFFTColumn(x);
Chris@1091 176 for (int i = 0; i < count; ++i) {
Chris@1091 177 values[i] = arg(col[minbin + i]);
Chris@1091 178 }
Chris@1091 179 return true;
Chris@1091 180 }
Chris@1091 181
Chris@1091 182 bool
Chris@1091 183 FFTModel::getValuesAt(int x, float *reals, float *imags, int minbin, int count) const
Chris@1091 184 {
Chris@1091 185 if (count == 0) count = getHeight();
Chris@1091 186 auto col = getFFTColumn(x);
Chris@1091 187 for (int i = 0; i < count; ++i) {
Chris@1091 188 reals[i] = col[minbin + i].real();
Chris@1091 189 }
Chris@1091 190 for (int i = 0; i < count; ++i) {
Chris@1091 191 imags[i] = col[minbin + i].imag();
Chris@1091 192 }
Chris@1091 193 return true;
Chris@1091 194 }
Chris@1091 195
Chris@1091 196 vector<float>
Chris@1091 197 FFTModel::getSourceSamples(int column) const
Chris@1091 198 {
Chris@1094 199 // m_fftSize may be greater than m_windowSize, but not the reverse
Chris@1094 200
Chris@1094 201 // cerr << "getSourceSamples(" << column << ")" << endl;
Chris@1094 202
Chris@1091 203 auto range = getSourceSampleRange(column);
Chris@1094 204 auto data = getSourceData(range);
Chris@1094 205
Chris@1091 206 int off = (m_fftSize - m_windowSize) / 2;
Chris@1094 207
Chris@1094 208 if (off == 0) {
Chris@1094 209 return data;
Chris@1094 210 } else {
Chris@1094 211 vector<float> pad(off, 0.f);
Chris@1094 212 vector<float> padded;
Chris@1094 213 padded.reserve(m_fftSize);
Chris@1094 214 padded.insert(padded.end(), pad.begin(), pad.end());
Chris@1094 215 padded.insert(padded.end(), data.begin(), data.end());
Chris@1094 216 padded.insert(padded.end(), pad.begin(), pad.end());
Chris@1094 217 return padded;
Chris@1094 218 }
Chris@1094 219 }
Chris@1094 220
Chris@1094 221 vector<float>
Chris@1094 222 FFTModel::getSourceData(pair<sv_frame_t, sv_frame_t> range) const
Chris@1094 223 {
Chris@1094 224 // cerr << "getSourceData(" << range.first << "," << range.second
Chris@1094 225 // << "): saved range is (" << m_savedData.range.first
Chris@1094 226 // << "," << m_savedData.range.second << ")" << endl;
Chris@1094 227
Chris@1094 228 if (m_savedData.range == range) return m_savedData.data;
Chris@1094 229
Chris@1094 230 if (range.first < m_savedData.range.second &&
Chris@1094 231 range.first >= m_savedData.range.first &&
Chris@1094 232 range.second > m_savedData.range.second) {
Chris@1094 233
Chris@1094 234 //!!! Need FFTModel overlap tests to exercise this
Chris@1094 235
Chris@1094 236 sv_frame_t off = range.first - m_savedData.range.first;
Chris@1094 237
Chris@1094 238 vector<float> partial(m_savedData.data.begin() + off, m_savedData.data.end());
Chris@1094 239
Chris@1094 240 vector<float> rest = getSourceData({ m_savedData.range.second, range.second });
Chris@1094 241
Chris@1094 242 partial.insert(partial.end(), rest.begin(), rest.end());
Chris@1094 243 return partial;
Chris@1094 244 }
Chris@1094 245
Chris@1094 246 vector<float> data(range.second - range.first, 0.f);
Chris@1091 247 decltype(range.first) pfx = 0;
Chris@1091 248 if (range.first < 0) {
Chris@1091 249 pfx = -range.first;
Chris@1091 250 range = { 0, range.second };
Chris@1091 251 }
Chris@1094 252 // cerr << "requesting " << range.second - range.first << " from file" << endl;
Chris@1091 253 (void) m_model->getData(m_channel,
Chris@1091 254 range.first,
Chris@1091 255 range.second - range.first,
Chris@1094 256 &data[pfx]);
Chris@1091 257 if (m_channel == -1) {
Chris@1091 258 int channels = m_model->getChannelCount();
Chris@1091 259 if (channels > 1) {
Chris@1094 260 for (int i = 0; in_range_for(data, i); ++i) {
Chris@1094 261 data[i] /= float(channels);
Chris@1091 262 }
Chris@1091 263 }
Chris@1091 264 }
Chris@1094 265
Chris@1094 266 m_savedData = { range, data };
Chris@1094 267
Chris@1094 268 return data;
Chris@1091 269 }
Chris@1091 270
Chris@1091 271 vector<complex<float>>
Chris@1093 272 FFTModel::getFFTColumn(int n) const
Chris@1091 273 {
Chris@1093 274 for (auto &incache : m_cached) {
Chris@1093 275 if (incache.n == n) {
Chris@1093 276 return incache.col;
Chris@1093 277 }
Chris@1093 278 }
Chris@1093 279
Chris@1093 280 auto samples = getSourceSamples(n);
Chris@1091 281 m_windower.cut(&samples[0]);
Chris@1093 282 auto col = m_fft.process(samples);
Chris@1093 283
Chris@1093 284 SavedColumn sc { n, col };
Chris@1093 285 if (m_cached.size() >= m_cacheSize) {
Chris@1093 286 m_cached.pop_front();
Chris@1093 287 }
Chris@1093 288 m_cached.push_back(sc);
Chris@1093 289
Chris@1093 290 return col;
Chris@1091 291 }
Chris@1091 292
Chris@275 293 bool
Chris@1045 294 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
Chris@275 295 {
Chris@275 296 if (!isOK()) return false;
Chris@275 297
Chris@1090 298 frequency = double(y * getSampleRate()) / m_fftSize;
Chris@275 299
Chris@275 300 if (x+1 >= getWidth()) return false;
Chris@275 301
Chris@275 302 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 303 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 304 // At window size w, for bin b, f is b*sr/w.
Chris@275 305 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 306 // We need to know what phase shift we expect from h/sr sec.
Chris@275 307 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 308 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 309 // = 2pi * (h * b) / w.
Chris@275 310
Chris@1038 311 double oldPhase = getPhaseAt(x, y);
Chris@1038 312 double newPhase = getPhaseAt(x+1, y);
Chris@275 313
Chris@929 314 int incr = getResolution();
Chris@275 315
Chris@1090 316 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
Chris@275 317
Chris@1038 318 double phaseError = princarg(newPhase - expectedPhase);
Chris@275 319
Chris@275 320 // The new frequency estimate based on the phase error resulting
Chris@275 321 // from assuming the "native" frequency of this bin
Chris@275 322
Chris@275 323 frequency =
Chris@1090 324 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
Chris@1045 325 (2.0 * M_PI * incr);
Chris@275 326
Chris@275 327 return true;
Chris@275 328 }
Chris@275 329
Chris@275 330 FFTModel::PeakLocationSet
Chris@929 331 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
Chris@275 332 {
Chris@551 333 Profiler profiler("FFTModel::getPeaks");
Chris@551 334
Chris@275 335 FFTModel::PeakLocationSet peaks;
Chris@275 336 if (!isOK()) return peaks;
Chris@275 337
Chris@275 338 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 339 ymax = getHeight() - 1;
Chris@275 340 }
Chris@275 341
Chris@275 342 if (type == AllPeaks) {
Chris@551 343 int minbin = ymin;
Chris@551 344 if (minbin > 0) minbin = minbin - 1;
Chris@551 345 int maxbin = ymax;
Chris@551 346 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 347 const int n = maxbin - minbin + 1;
Chris@608 348 #ifdef __GNUC__
Chris@551 349 float values[n];
Chris@608 350 #else
Chris@608 351 float *values = (float *)alloca(n * sizeof(float));
Chris@608 352 #endif
Chris@551 353 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@929 354 for (int bin = ymin; bin <= ymax; ++bin) {
Chris@551 355 if (bin == minbin || bin == maxbin) continue;
Chris@551 356 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 357 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 358 peaks.insert(bin);
Chris@275 359 }
Chris@275 360 }
Chris@275 361 return peaks;
Chris@275 362 }
Chris@275 363
Chris@551 364 Column values = getColumn(x);
Chris@275 365
Chris@500 366 float mean = 0.f;
Chris@551 367 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@1038 368 if (values.size() > 0) mean = mean / float(values.size());
Chris@1038 369
Chris@275 370 // For peak picking we use a moving median window, picking the
Chris@275 371 // highest value within each continuous region of values that
Chris@275 372 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 373 // size to a roughly constant pitch range (about four tones).
Chris@275 374
Chris@1040 375 sv_samplerate_t sampleRate = getSampleRate();
Chris@275 376
Chris@1090 377 deque<float> window;
Chris@1090 378 vector<int> inrange;
Chris@280 379 float dist = 0.5;
Chris@500 380
Chris@929 381 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@929 382 int halfWin = medianWinSize/2;
Chris@275 383
Chris@929 384 int binmin;
Chris@275 385 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 386 else binmin = 0;
Chris@275 387
Chris@929 388 int binmax;
Chris@275 389 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 390 else binmax = values.size()-1;
Chris@275 391
Chris@929 392 int prevcentre = 0;
Chris@500 393
Chris@929 394 for (int bin = binmin; bin <= binmax; ++bin) {
Chris@275 395
Chris@275 396 float value = values[bin];
Chris@275 397
Chris@275 398 window.push_back(value);
Chris@275 399
Chris@280 400 // so-called median will actually be the dist*100'th percentile
Chris@280 401 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 402 halfWin = medianWinSize/2;
Chris@275 403
Chris@929 404 while ((int)window.size() > medianWinSize) {
Chris@500 405 window.pop_front();
Chris@500 406 }
Chris@500 407
Chris@1038 408 int actualSize = int(window.size());
Chris@275 409
Chris@275 410 if (type == MajorPitchAdaptivePeaks) {
Chris@275 411 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 412 else binmax = values.size()-1;
Chris@275 413 }
Chris@275 414
Chris@1090 415 deque<float> sorted(window);
Chris@1090 416 sort(sorted.begin(), sorted.end());
Chris@1038 417 float median = sorted[int(float(sorted.size()) * dist)];
Chris@275 418
Chris@929 419 int centrebin = 0;
Chris@500 420 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 421
Chris@500 422 while (centrebin > prevcentre || bin == binmin) {
Chris@275 423
Chris@500 424 if (centrebin > prevcentre) ++prevcentre;
Chris@500 425
Chris@500 426 float centre = values[prevcentre];
Chris@500 427
Chris@500 428 if (centre > median) {
Chris@500 429 inrange.push_back(centrebin);
Chris@500 430 }
Chris@500 431
Chris@500 432 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 433 if (!inrange.empty()) {
Chris@929 434 int peakbin = 0;
Chris@500 435 float peakval = 0.f;
Chris@929 436 for (int i = 0; i < (int)inrange.size(); ++i) {
Chris@500 437 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 438 peakval = values[inrange[i]];
Chris@500 439 peakbin = inrange[i];
Chris@500 440 }
Chris@500 441 }
Chris@500 442 inrange.clear();
Chris@500 443 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 444 peaks.insert(peakbin);
Chris@275 445 }
Chris@275 446 }
Chris@275 447 }
Chris@500 448
Chris@500 449 if (bin == binmin) break;
Chris@275 450 }
Chris@275 451 }
Chris@275 452
Chris@275 453 return peaks;
Chris@275 454 }
Chris@275 455
Chris@929 456 int
Chris@1040 457 FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate,
Chris@929 458 int bin, float &percentile) const
Chris@275 459 {
Chris@280 460 percentile = 0.5;
Chris@275 461 if (type == MajorPeaks) return 10;
Chris@275 462 if (bin == 0) return 3;
Chris@280 463
Chris@1091 464 double binfreq = (sampleRate * bin) / m_fftSize;
Chris@1038 465 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 466
Chris@1091 467 int hibin = int(lrint((hifreq * m_fftSize) / sampleRate));
Chris@275 468 int medianWinSize = hibin - bin;
Chris@275 469 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 470
Chris@1091 471 percentile = 0.5f + float(binfreq / sampleRate);
Chris@280 472
Chris@275 473 return medianWinSize;
Chris@275 474 }
Chris@275 475
Chris@275 476 FFTModel::PeakSet
Chris@929 477 FFTModel::getPeakFrequencies(PeakPickType type, int x,
Chris@929 478 int ymin, int ymax)
Chris@275 479 {
Chris@551 480 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 481
Chris@275 482 PeakSet peaks;
Chris@275 483 if (!isOK()) return peaks;
Chris@275 484 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 485
Chris@1040 486 sv_samplerate_t sampleRate = getSampleRate();
Chris@929 487 int incr = getResolution();
Chris@275 488
Chris@275 489 // This duplicates some of the work of estimateStableFrequency to
Chris@275 490 // allow us to retrieve the phases in two separate vertical
Chris@275 491 // columns, instead of jumping back and forth between columns x and
Chris@275 492 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 493
Chris@1090 494 vector<float> phases;
Chris@275 495 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 496 i != locations.end(); ++i) {
Chris@275 497 phases.push_back(getPhaseAt(x, *i));
Chris@275 498 }
Chris@275 499
Chris@929 500 int phaseIndex = 0;
Chris@275 501 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 502 i != locations.end(); ++i) {
Chris@1038 503 double oldPhase = phases[phaseIndex];
Chris@1038 504 double newPhase = getPhaseAt(x+1, *i);
Chris@1090 505 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
Chris@1038 506 double phaseError = princarg(newPhase - expectedPhase);
Chris@1038 507 double frequency =
Chris@275 508 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 509 / (2 * M_PI * incr);
Chris@1045 510 peaks[*i] = frequency;
Chris@275 511 ++phaseIndex;
Chris@275 512 }
Chris@275 513
Chris@275 514 return peaks;
Chris@275 515 }
Chris@275 516