annotate data/model/FFTModel.cpp @ 1091:bdebff3265ae simple-fft-model

Simplest naive FFTModel implementation (+ fill in tests)
author Chris Cannam
date Fri, 12 Jun 2015 18:08:57 +0100
parents 420fc961c0c4
children 70f18770b72d
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@152 18
Chris@183 19 #include "base/Profiler.h"
Chris@275 20 #include "base/Pitch.h"
Chris@183 21
Chris@402 22 #include <algorithm>
Chris@402 23
Chris@152 24 #include <cassert>
Chris@1090 25 #include <deque>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@1090 31 using namespace std;
Chris@1090 32
Chris@152 33 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 34 int channel,
Chris@152 35 WindowType windowType,
Chris@929 36 int windowSize,
Chris@929 37 int windowIncrement,
Chris@1090 38 int fftSize) :
Chris@1090 39 m_model(model),
Chris@1090 40 m_channel(channel),
Chris@1090 41 m_windowType(windowType),
Chris@1090 42 m_windowSize(windowSize),
Chris@1090 43 m_windowIncrement(windowIncrement),
Chris@1090 44 m_fftSize(fftSize),
Chris@1091 45 m_windower(windowType, windowSize),
Chris@1091 46 m_fft(fftSize)
Chris@152 47 {
Chris@1091 48 if (m_windowSize > m_fftSize) {
Chris@1091 49 cerr << "ERROR: FFTModel::FFTModel: window size (" << m_windowSize
Chris@1091 50 << ") must be at least FFT size (" << m_fftSize << ")" << endl;
Chris@1091 51 throw invalid_argument("FFTModel window size must be at least FFT size");
Chris@1091 52 }
Chris@152 53 }
Chris@152 54
Chris@152 55 FFTModel::~FFTModel()
Chris@152 56 {
Chris@152 57 }
Chris@152 58
Chris@360 59 void
Chris@360 60 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 61 {
Chris@1090 62 if (m_model) {
Chris@1090 63 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
Chris@1090 64 m_model = 0;
Chris@360 65 }
Chris@360 66 }
Chris@360 67
Chris@1091 68 int
Chris@1091 69 FFTModel::getWidth() const
Chris@1091 70 {
Chris@1091 71 if (!m_model) return 0;
Chris@1091 72 return int((m_model->getEndFrame() - m_model->getStartFrame())
Chris@1091 73 / m_windowIncrement) + 1;
Chris@1091 74 }
Chris@1091 75
Chris@1091 76 int
Chris@1091 77 FFTModel::getHeight() const
Chris@1091 78 {
Chris@1091 79 return m_fftSize / 2 + 1;
Chris@1091 80 }
Chris@1091 81
Chris@152 82 QString
Chris@929 83 FFTModel::getBinName(int n) const
Chris@152 84 {
Chris@1040 85 sv_samplerate_t sr = getSampleRate();
Chris@152 86 if (!sr) return "";
Chris@204 87 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 88 return name;
Chris@152 89 }
Chris@152 90
Chris@1091 91 FFTModel::Column
Chris@1091 92 FFTModel::getColumn(int x) const
Chris@1091 93 {
Chris@1091 94 auto cplx = getFFTColumn(x);
Chris@1091 95 Column col;
Chris@1091 96 col.reserve(int(cplx.size()));
Chris@1091 97 for (auto c: cplx) col.push_back(abs(c));
Chris@1091 98 return col;
Chris@1091 99 }
Chris@1091 100
Chris@1091 101 float
Chris@1091 102 FFTModel::getMagnitudeAt(int x, int y) const
Chris@1091 103 {
Chris@1091 104 //!!!
Chris@1091 105 return abs(getFFTColumn(x)[y]);
Chris@1091 106 }
Chris@1091 107
Chris@1091 108 float
Chris@1091 109 FFTModel::getMaximumMagnitudeAt(int x) const
Chris@1091 110 {
Chris@1091 111 Column col(getColumn(x));
Chris@1091 112 auto itr = max_element(col.begin(), col.end());
Chris@1091 113 if (itr == col.end()) return 0.f;
Chris@1091 114 else return *itr;
Chris@1091 115 }
Chris@1091 116
Chris@1091 117 float
Chris@1091 118 FFTModel::getPhaseAt(int x, int y) const
Chris@1091 119 {
Chris@1091 120 //!!!
Chris@1091 121 return arg(getFFTColumn(x)[y]);
Chris@1091 122 }
Chris@1091 123
Chris@1091 124 void
Chris@1091 125 FFTModel::getValuesAt(int x, int y, float &re, float &im) const
Chris@1091 126 {
Chris@1091 127 auto col = getFFTColumn(x);
Chris@1091 128 re = col[y].real();
Chris@1091 129 im = col[y].imag();
Chris@1091 130 }
Chris@1091 131
Chris@1091 132 bool
Chris@1091 133 FFTModel::isColumnAvailable(int ) const
Chris@1091 134 {
Chris@1091 135 //!!!
Chris@1091 136 return true;
Chris@1091 137 }
Chris@1091 138
Chris@1091 139 bool
Chris@1091 140 FFTModel::getMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 141 {
Chris@1091 142 if (count == 0) count = getHeight();
Chris@1091 143 auto col = getFFTColumn(x);
Chris@1091 144 for (int i = 0; i < count; ++i) {
Chris@1091 145 values[i] = abs(col[minbin + i]);
Chris@1091 146 }
Chris@1091 147 return true;
Chris@1091 148 }
Chris@1091 149
Chris@1091 150 bool
Chris@1091 151 FFTModel::getNormalizedMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 152 {
Chris@1091 153 //!!! WRONG
Chris@1091 154 return getMagnitudesAt(x, values, minbin, count);
Chris@1091 155 }
Chris@1091 156
Chris@1091 157 bool
Chris@1091 158 FFTModel::getPhasesAt(int x, float *values, int minbin, int count) const
Chris@1091 159 {
Chris@1091 160 if (count == 0) count = getHeight();
Chris@1091 161 auto col = getFFTColumn(x);
Chris@1091 162 for (int i = 0; i < count; ++i) {
Chris@1091 163 values[i] = arg(col[minbin + i]);
Chris@1091 164 }
Chris@1091 165 return true;
Chris@1091 166 }
Chris@1091 167
Chris@1091 168 bool
Chris@1091 169 FFTModel::getValuesAt(int x, float *reals, float *imags, int minbin, int count) const
Chris@1091 170 {
Chris@1091 171 if (count == 0) count = getHeight();
Chris@1091 172 auto col = getFFTColumn(x);
Chris@1091 173 for (int i = 0; i < count; ++i) {
Chris@1091 174 reals[i] = col[minbin + i].real();
Chris@1091 175 }
Chris@1091 176 for (int i = 0; i < count; ++i) {
Chris@1091 177 imags[i] = col[minbin + i].imag();
Chris@1091 178 }
Chris@1091 179 return true;
Chris@1091 180 }
Chris@1091 181
Chris@1091 182 vector<float>
Chris@1091 183 FFTModel::getSourceSamples(int column) const
Chris@1091 184 {
Chris@1091 185 auto range = getSourceSampleRange(column);
Chris@1091 186 vector<float> samples(m_fftSize, 0.f);
Chris@1091 187 int off = (m_fftSize - m_windowSize) / 2;
Chris@1091 188 decltype(range.first) pfx = 0;
Chris@1091 189 if (range.first < 0) {
Chris@1091 190 pfx = -range.first;
Chris@1091 191 range = { 0, range.second };
Chris@1091 192 }
Chris@1091 193 (void) m_model->getData(m_channel,
Chris@1091 194 range.first,
Chris@1091 195 range.second - range.first,
Chris@1091 196 &samples[off + pfx]);
Chris@1091 197 if (m_channel == -1) {
Chris@1091 198 int channels = m_model->getChannelCount();
Chris@1091 199 if (channels > 1) {
Chris@1091 200 for (int i = 0; i < range.second - range.first; ++i) {
Chris@1091 201 samples[off + pfx + i] /= float(channels);
Chris@1091 202 }
Chris@1091 203 }
Chris@1091 204 }
Chris@1091 205 return samples;
Chris@1091 206 }
Chris@1091 207
Chris@1091 208 vector<complex<float>>
Chris@1091 209 FFTModel::getFFTColumn(int column) const
Chris@1091 210 {
Chris@1091 211 auto samples = getSourceSamples(column);
Chris@1091 212 m_windower.cut(&samples[0]);
Chris@1091 213 return m_fft.process(samples);
Chris@1091 214 }
Chris@1091 215
Chris@275 216 bool
Chris@1045 217 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
Chris@275 218 {
Chris@275 219 if (!isOK()) return false;
Chris@275 220
Chris@1090 221 frequency = double(y * getSampleRate()) / m_fftSize;
Chris@275 222
Chris@275 223 if (x+1 >= getWidth()) return false;
Chris@275 224
Chris@275 225 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 226 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 227 // At window size w, for bin b, f is b*sr/w.
Chris@275 228 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 229 // We need to know what phase shift we expect from h/sr sec.
Chris@275 230 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 231 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 232 // = 2pi * (h * b) / w.
Chris@275 233
Chris@1038 234 double oldPhase = getPhaseAt(x, y);
Chris@1038 235 double newPhase = getPhaseAt(x+1, y);
Chris@275 236
Chris@929 237 int incr = getResolution();
Chris@275 238
Chris@1090 239 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
Chris@275 240
Chris@1038 241 double phaseError = princarg(newPhase - expectedPhase);
Chris@275 242
Chris@275 243 // The new frequency estimate based on the phase error resulting
Chris@275 244 // from assuming the "native" frequency of this bin
Chris@275 245
Chris@275 246 frequency =
Chris@1090 247 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
Chris@1045 248 (2.0 * M_PI * incr);
Chris@275 249
Chris@275 250 return true;
Chris@275 251 }
Chris@275 252
Chris@275 253 FFTModel::PeakLocationSet
Chris@929 254 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
Chris@275 255 {
Chris@551 256 Profiler profiler("FFTModel::getPeaks");
Chris@551 257
Chris@275 258 FFTModel::PeakLocationSet peaks;
Chris@275 259 if (!isOK()) return peaks;
Chris@275 260
Chris@275 261 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 262 ymax = getHeight() - 1;
Chris@275 263 }
Chris@275 264
Chris@275 265 if (type == AllPeaks) {
Chris@551 266 int minbin = ymin;
Chris@551 267 if (minbin > 0) minbin = minbin - 1;
Chris@551 268 int maxbin = ymax;
Chris@551 269 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 270 const int n = maxbin - minbin + 1;
Chris@608 271 #ifdef __GNUC__
Chris@551 272 float values[n];
Chris@608 273 #else
Chris@608 274 float *values = (float *)alloca(n * sizeof(float));
Chris@608 275 #endif
Chris@551 276 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@929 277 for (int bin = ymin; bin <= ymax; ++bin) {
Chris@551 278 if (bin == minbin || bin == maxbin) continue;
Chris@551 279 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 280 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 281 peaks.insert(bin);
Chris@275 282 }
Chris@275 283 }
Chris@275 284 return peaks;
Chris@275 285 }
Chris@275 286
Chris@551 287 Column values = getColumn(x);
Chris@275 288
Chris@500 289 float mean = 0.f;
Chris@551 290 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@1038 291 if (values.size() > 0) mean = mean / float(values.size());
Chris@1038 292
Chris@275 293 // For peak picking we use a moving median window, picking the
Chris@275 294 // highest value within each continuous region of values that
Chris@275 295 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 296 // size to a roughly constant pitch range (about four tones).
Chris@275 297
Chris@1040 298 sv_samplerate_t sampleRate = getSampleRate();
Chris@275 299
Chris@1090 300 deque<float> window;
Chris@1090 301 vector<int> inrange;
Chris@280 302 float dist = 0.5;
Chris@500 303
Chris@929 304 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@929 305 int halfWin = medianWinSize/2;
Chris@275 306
Chris@929 307 int binmin;
Chris@275 308 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 309 else binmin = 0;
Chris@275 310
Chris@929 311 int binmax;
Chris@275 312 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 313 else binmax = values.size()-1;
Chris@275 314
Chris@929 315 int prevcentre = 0;
Chris@500 316
Chris@929 317 for (int bin = binmin; bin <= binmax; ++bin) {
Chris@275 318
Chris@275 319 float value = values[bin];
Chris@275 320
Chris@275 321 window.push_back(value);
Chris@275 322
Chris@280 323 // so-called median will actually be the dist*100'th percentile
Chris@280 324 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 325 halfWin = medianWinSize/2;
Chris@275 326
Chris@929 327 while ((int)window.size() > medianWinSize) {
Chris@500 328 window.pop_front();
Chris@500 329 }
Chris@500 330
Chris@1038 331 int actualSize = int(window.size());
Chris@275 332
Chris@275 333 if (type == MajorPitchAdaptivePeaks) {
Chris@275 334 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 335 else binmax = values.size()-1;
Chris@275 336 }
Chris@275 337
Chris@1090 338 deque<float> sorted(window);
Chris@1090 339 sort(sorted.begin(), sorted.end());
Chris@1038 340 float median = sorted[int(float(sorted.size()) * dist)];
Chris@275 341
Chris@929 342 int centrebin = 0;
Chris@500 343 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 344
Chris@500 345 while (centrebin > prevcentre || bin == binmin) {
Chris@275 346
Chris@500 347 if (centrebin > prevcentre) ++prevcentre;
Chris@500 348
Chris@500 349 float centre = values[prevcentre];
Chris@500 350
Chris@500 351 if (centre > median) {
Chris@500 352 inrange.push_back(centrebin);
Chris@500 353 }
Chris@500 354
Chris@500 355 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 356 if (!inrange.empty()) {
Chris@929 357 int peakbin = 0;
Chris@500 358 float peakval = 0.f;
Chris@929 359 for (int i = 0; i < (int)inrange.size(); ++i) {
Chris@500 360 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 361 peakval = values[inrange[i]];
Chris@500 362 peakbin = inrange[i];
Chris@500 363 }
Chris@500 364 }
Chris@500 365 inrange.clear();
Chris@500 366 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 367 peaks.insert(peakbin);
Chris@275 368 }
Chris@275 369 }
Chris@275 370 }
Chris@500 371
Chris@500 372 if (bin == binmin) break;
Chris@275 373 }
Chris@275 374 }
Chris@275 375
Chris@275 376 return peaks;
Chris@275 377 }
Chris@275 378
Chris@929 379 int
Chris@1040 380 FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate,
Chris@929 381 int bin, float &percentile) const
Chris@275 382 {
Chris@280 383 percentile = 0.5;
Chris@275 384 if (type == MajorPeaks) return 10;
Chris@275 385 if (bin == 0) return 3;
Chris@280 386
Chris@1091 387 double binfreq = (sampleRate * bin) / m_fftSize;
Chris@1038 388 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 389
Chris@1091 390 int hibin = int(lrint((hifreq * m_fftSize) / sampleRate));
Chris@275 391 int medianWinSize = hibin - bin;
Chris@275 392 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 393
Chris@1091 394 percentile = 0.5f + float(binfreq / sampleRate);
Chris@280 395
Chris@275 396 return medianWinSize;
Chris@275 397 }
Chris@275 398
Chris@275 399 FFTModel::PeakSet
Chris@929 400 FFTModel::getPeakFrequencies(PeakPickType type, int x,
Chris@929 401 int ymin, int ymax)
Chris@275 402 {
Chris@551 403 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 404
Chris@275 405 PeakSet peaks;
Chris@275 406 if (!isOK()) return peaks;
Chris@275 407 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 408
Chris@1040 409 sv_samplerate_t sampleRate = getSampleRate();
Chris@929 410 int incr = getResolution();
Chris@275 411
Chris@275 412 // This duplicates some of the work of estimateStableFrequency to
Chris@275 413 // allow us to retrieve the phases in two separate vertical
Chris@275 414 // columns, instead of jumping back and forth between columns x and
Chris@275 415 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 416
Chris@1090 417 vector<float> phases;
Chris@275 418 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 419 i != locations.end(); ++i) {
Chris@275 420 phases.push_back(getPhaseAt(x, *i));
Chris@275 421 }
Chris@275 422
Chris@929 423 int phaseIndex = 0;
Chris@275 424 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 425 i != locations.end(); ++i) {
Chris@1038 426 double oldPhase = phases[phaseIndex];
Chris@1038 427 double newPhase = getPhaseAt(x+1, *i);
Chris@1090 428 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
Chris@1038 429 double phaseError = princarg(newPhase - expectedPhase);
Chris@1038 430 double frequency =
Chris@275 431 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 432 / (2 * M_PI * incr);
Chris@1045 433 peaks[*i] = frequency;
Chris@275 434 ++phaseIndex;
Chris@275 435 }
Chris@275 436
Chris@275 437 return peaks;
Chris@275 438 }
Chris@275 439