annotate data/model/FFTModel.cpp @ 1092:70f18770b72d simple-fft-model

Normalization function
author Chris Cannam
date Fri, 12 Jun 2015 18:20:09 +0100
parents bdebff3265ae
children 44b079427b36
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@152 18
Chris@183 19 #include "base/Profiler.h"
Chris@275 20 #include "base/Pitch.h"
Chris@183 21
Chris@402 22 #include <algorithm>
Chris@402 23
Chris@152 24 #include <cassert>
Chris@1090 25 #include <deque>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@1090 31 using namespace std;
Chris@1090 32
Chris@152 33 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 34 int channel,
Chris@152 35 WindowType windowType,
Chris@929 36 int windowSize,
Chris@929 37 int windowIncrement,
Chris@1090 38 int fftSize) :
Chris@1090 39 m_model(model),
Chris@1090 40 m_channel(channel),
Chris@1090 41 m_windowType(windowType),
Chris@1090 42 m_windowSize(windowSize),
Chris@1090 43 m_windowIncrement(windowIncrement),
Chris@1090 44 m_fftSize(fftSize),
Chris@1091 45 m_windower(windowType, windowSize),
Chris@1091 46 m_fft(fftSize)
Chris@152 47 {
Chris@1091 48 if (m_windowSize > m_fftSize) {
Chris@1091 49 cerr << "ERROR: FFTModel::FFTModel: window size (" << m_windowSize
Chris@1091 50 << ") must be at least FFT size (" << m_fftSize << ")" << endl;
Chris@1091 51 throw invalid_argument("FFTModel window size must be at least FFT size");
Chris@1091 52 }
Chris@152 53 }
Chris@152 54
Chris@152 55 FFTModel::~FFTModel()
Chris@152 56 {
Chris@152 57 }
Chris@152 58
Chris@360 59 void
Chris@360 60 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 61 {
Chris@1090 62 if (m_model) {
Chris@1090 63 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
Chris@1090 64 m_model = 0;
Chris@360 65 }
Chris@360 66 }
Chris@360 67
Chris@1091 68 int
Chris@1091 69 FFTModel::getWidth() const
Chris@1091 70 {
Chris@1091 71 if (!m_model) return 0;
Chris@1091 72 return int((m_model->getEndFrame() - m_model->getStartFrame())
Chris@1091 73 / m_windowIncrement) + 1;
Chris@1091 74 }
Chris@1091 75
Chris@1091 76 int
Chris@1091 77 FFTModel::getHeight() const
Chris@1091 78 {
Chris@1091 79 return m_fftSize / 2 + 1;
Chris@1091 80 }
Chris@1091 81
Chris@152 82 QString
Chris@929 83 FFTModel::getBinName(int n) const
Chris@152 84 {
Chris@1040 85 sv_samplerate_t sr = getSampleRate();
Chris@152 86 if (!sr) return "";
Chris@204 87 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 88 return name;
Chris@152 89 }
Chris@152 90
Chris@1091 91 FFTModel::Column
Chris@1091 92 FFTModel::getColumn(int x) const
Chris@1091 93 {
Chris@1091 94 auto cplx = getFFTColumn(x);
Chris@1091 95 Column col;
Chris@1091 96 col.reserve(int(cplx.size()));
Chris@1091 97 for (auto c: cplx) col.push_back(abs(c));
Chris@1091 98 return col;
Chris@1091 99 }
Chris@1091 100
Chris@1091 101 float
Chris@1091 102 FFTModel::getMagnitudeAt(int x, int y) const
Chris@1091 103 {
Chris@1091 104 //!!!
Chris@1091 105 return abs(getFFTColumn(x)[y]);
Chris@1091 106 }
Chris@1091 107
Chris@1091 108 float
Chris@1091 109 FFTModel::getMaximumMagnitudeAt(int x) const
Chris@1091 110 {
Chris@1091 111 Column col(getColumn(x));
Chris@1092 112 float max = 0.f;
Chris@1092 113 for (int i = 0; i < col.size(); ++i) {
Chris@1092 114 if (col[i] > max) max = col[i];
Chris@1092 115 }
Chris@1092 116 return max;
Chris@1091 117 }
Chris@1091 118
Chris@1091 119 float
Chris@1091 120 FFTModel::getPhaseAt(int x, int y) const
Chris@1091 121 {
Chris@1091 122 //!!!
Chris@1091 123 return arg(getFFTColumn(x)[y]);
Chris@1091 124 }
Chris@1091 125
Chris@1091 126 void
Chris@1091 127 FFTModel::getValuesAt(int x, int y, float &re, float &im) const
Chris@1091 128 {
Chris@1091 129 auto col = getFFTColumn(x);
Chris@1091 130 re = col[y].real();
Chris@1091 131 im = col[y].imag();
Chris@1091 132 }
Chris@1091 133
Chris@1091 134 bool
Chris@1091 135 FFTModel::isColumnAvailable(int ) const
Chris@1091 136 {
Chris@1091 137 //!!!
Chris@1091 138 return true;
Chris@1091 139 }
Chris@1091 140
Chris@1091 141 bool
Chris@1091 142 FFTModel::getMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 143 {
Chris@1091 144 if (count == 0) count = getHeight();
Chris@1091 145 auto col = getFFTColumn(x);
Chris@1091 146 for (int i = 0; i < count; ++i) {
Chris@1091 147 values[i] = abs(col[minbin + i]);
Chris@1091 148 }
Chris@1091 149 return true;
Chris@1091 150 }
Chris@1091 151
Chris@1091 152 bool
Chris@1091 153 FFTModel::getNormalizedMagnitudesAt(int x, float *values, int minbin, int count) const
Chris@1091 154 {
Chris@1092 155 if (!getMagnitudesAt(x, values, minbin, count)) return false;
Chris@1092 156 if (count == 0) count = getHeight();
Chris@1092 157 float max = 0.f;
Chris@1092 158 for (int i = 0; i < count; ++i) {
Chris@1092 159 if (values[i] > max) max = values[i];
Chris@1092 160 }
Chris@1092 161 if (max > 0.f) {
Chris@1092 162 for (int i = 0; i < count; ++i) {
Chris@1092 163 values[i] /= max;
Chris@1092 164 }
Chris@1092 165 }
Chris@1092 166 return true;
Chris@1091 167 }
Chris@1091 168
Chris@1091 169 bool
Chris@1091 170 FFTModel::getPhasesAt(int x, float *values, int minbin, int count) const
Chris@1091 171 {
Chris@1091 172 if (count == 0) count = getHeight();
Chris@1091 173 auto col = getFFTColumn(x);
Chris@1091 174 for (int i = 0; i < count; ++i) {
Chris@1091 175 values[i] = arg(col[minbin + i]);
Chris@1091 176 }
Chris@1091 177 return true;
Chris@1091 178 }
Chris@1091 179
Chris@1091 180 bool
Chris@1091 181 FFTModel::getValuesAt(int x, float *reals, float *imags, int minbin, int count) const
Chris@1091 182 {
Chris@1091 183 if (count == 0) count = getHeight();
Chris@1091 184 auto col = getFFTColumn(x);
Chris@1091 185 for (int i = 0; i < count; ++i) {
Chris@1091 186 reals[i] = col[minbin + i].real();
Chris@1091 187 }
Chris@1091 188 for (int i = 0; i < count; ++i) {
Chris@1091 189 imags[i] = col[minbin + i].imag();
Chris@1091 190 }
Chris@1091 191 return true;
Chris@1091 192 }
Chris@1091 193
Chris@1091 194 vector<float>
Chris@1091 195 FFTModel::getSourceSamples(int column) const
Chris@1091 196 {
Chris@1091 197 auto range = getSourceSampleRange(column);
Chris@1091 198 vector<float> samples(m_fftSize, 0.f);
Chris@1091 199 int off = (m_fftSize - m_windowSize) / 2;
Chris@1091 200 decltype(range.first) pfx = 0;
Chris@1091 201 if (range.first < 0) {
Chris@1091 202 pfx = -range.first;
Chris@1091 203 range = { 0, range.second };
Chris@1091 204 }
Chris@1091 205 (void) m_model->getData(m_channel,
Chris@1091 206 range.first,
Chris@1091 207 range.second - range.first,
Chris@1091 208 &samples[off + pfx]);
Chris@1091 209 if (m_channel == -1) {
Chris@1091 210 int channels = m_model->getChannelCount();
Chris@1091 211 if (channels > 1) {
Chris@1091 212 for (int i = 0; i < range.second - range.first; ++i) {
Chris@1091 213 samples[off + pfx + i] /= float(channels);
Chris@1091 214 }
Chris@1091 215 }
Chris@1091 216 }
Chris@1091 217 return samples;
Chris@1091 218 }
Chris@1091 219
Chris@1091 220 vector<complex<float>>
Chris@1091 221 FFTModel::getFFTColumn(int column) const
Chris@1091 222 {
Chris@1091 223 auto samples = getSourceSamples(column);
Chris@1091 224 m_windower.cut(&samples[0]);
Chris@1091 225 return m_fft.process(samples);
Chris@1091 226 }
Chris@1091 227
Chris@275 228 bool
Chris@1045 229 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
Chris@275 230 {
Chris@275 231 if (!isOK()) return false;
Chris@275 232
Chris@1090 233 frequency = double(y * getSampleRate()) / m_fftSize;
Chris@275 234
Chris@275 235 if (x+1 >= getWidth()) return false;
Chris@275 236
Chris@275 237 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 238 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 239 // At window size w, for bin b, f is b*sr/w.
Chris@275 240 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 241 // We need to know what phase shift we expect from h/sr sec.
Chris@275 242 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 243 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 244 // = 2pi * (h * b) / w.
Chris@275 245
Chris@1038 246 double oldPhase = getPhaseAt(x, y);
Chris@1038 247 double newPhase = getPhaseAt(x+1, y);
Chris@275 248
Chris@929 249 int incr = getResolution();
Chris@275 250
Chris@1090 251 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
Chris@275 252
Chris@1038 253 double phaseError = princarg(newPhase - expectedPhase);
Chris@275 254
Chris@275 255 // The new frequency estimate based on the phase error resulting
Chris@275 256 // from assuming the "native" frequency of this bin
Chris@275 257
Chris@275 258 frequency =
Chris@1090 259 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
Chris@1045 260 (2.0 * M_PI * incr);
Chris@275 261
Chris@275 262 return true;
Chris@275 263 }
Chris@275 264
Chris@275 265 FFTModel::PeakLocationSet
Chris@929 266 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
Chris@275 267 {
Chris@551 268 Profiler profiler("FFTModel::getPeaks");
Chris@551 269
Chris@275 270 FFTModel::PeakLocationSet peaks;
Chris@275 271 if (!isOK()) return peaks;
Chris@275 272
Chris@275 273 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 274 ymax = getHeight() - 1;
Chris@275 275 }
Chris@275 276
Chris@275 277 if (type == AllPeaks) {
Chris@551 278 int minbin = ymin;
Chris@551 279 if (minbin > 0) minbin = minbin - 1;
Chris@551 280 int maxbin = ymax;
Chris@551 281 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 282 const int n = maxbin - minbin + 1;
Chris@608 283 #ifdef __GNUC__
Chris@551 284 float values[n];
Chris@608 285 #else
Chris@608 286 float *values = (float *)alloca(n * sizeof(float));
Chris@608 287 #endif
Chris@551 288 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@929 289 for (int bin = ymin; bin <= ymax; ++bin) {
Chris@551 290 if (bin == minbin || bin == maxbin) continue;
Chris@551 291 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 292 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 293 peaks.insert(bin);
Chris@275 294 }
Chris@275 295 }
Chris@275 296 return peaks;
Chris@275 297 }
Chris@275 298
Chris@551 299 Column values = getColumn(x);
Chris@275 300
Chris@500 301 float mean = 0.f;
Chris@551 302 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@1038 303 if (values.size() > 0) mean = mean / float(values.size());
Chris@1038 304
Chris@275 305 // For peak picking we use a moving median window, picking the
Chris@275 306 // highest value within each continuous region of values that
Chris@275 307 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 308 // size to a roughly constant pitch range (about four tones).
Chris@275 309
Chris@1040 310 sv_samplerate_t sampleRate = getSampleRate();
Chris@275 311
Chris@1090 312 deque<float> window;
Chris@1090 313 vector<int> inrange;
Chris@280 314 float dist = 0.5;
Chris@500 315
Chris@929 316 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@929 317 int halfWin = medianWinSize/2;
Chris@275 318
Chris@929 319 int binmin;
Chris@275 320 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 321 else binmin = 0;
Chris@275 322
Chris@929 323 int binmax;
Chris@275 324 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 325 else binmax = values.size()-1;
Chris@275 326
Chris@929 327 int prevcentre = 0;
Chris@500 328
Chris@929 329 for (int bin = binmin; bin <= binmax; ++bin) {
Chris@275 330
Chris@275 331 float value = values[bin];
Chris@275 332
Chris@275 333 window.push_back(value);
Chris@275 334
Chris@280 335 // so-called median will actually be the dist*100'th percentile
Chris@280 336 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 337 halfWin = medianWinSize/2;
Chris@275 338
Chris@929 339 while ((int)window.size() > medianWinSize) {
Chris@500 340 window.pop_front();
Chris@500 341 }
Chris@500 342
Chris@1038 343 int actualSize = int(window.size());
Chris@275 344
Chris@275 345 if (type == MajorPitchAdaptivePeaks) {
Chris@275 346 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 347 else binmax = values.size()-1;
Chris@275 348 }
Chris@275 349
Chris@1090 350 deque<float> sorted(window);
Chris@1090 351 sort(sorted.begin(), sorted.end());
Chris@1038 352 float median = sorted[int(float(sorted.size()) * dist)];
Chris@275 353
Chris@929 354 int centrebin = 0;
Chris@500 355 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 356
Chris@500 357 while (centrebin > prevcentre || bin == binmin) {
Chris@275 358
Chris@500 359 if (centrebin > prevcentre) ++prevcentre;
Chris@500 360
Chris@500 361 float centre = values[prevcentre];
Chris@500 362
Chris@500 363 if (centre > median) {
Chris@500 364 inrange.push_back(centrebin);
Chris@500 365 }
Chris@500 366
Chris@500 367 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 368 if (!inrange.empty()) {
Chris@929 369 int peakbin = 0;
Chris@500 370 float peakval = 0.f;
Chris@929 371 for (int i = 0; i < (int)inrange.size(); ++i) {
Chris@500 372 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 373 peakval = values[inrange[i]];
Chris@500 374 peakbin = inrange[i];
Chris@500 375 }
Chris@500 376 }
Chris@500 377 inrange.clear();
Chris@500 378 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 379 peaks.insert(peakbin);
Chris@275 380 }
Chris@275 381 }
Chris@275 382 }
Chris@500 383
Chris@500 384 if (bin == binmin) break;
Chris@275 385 }
Chris@275 386 }
Chris@275 387
Chris@275 388 return peaks;
Chris@275 389 }
Chris@275 390
Chris@929 391 int
Chris@1040 392 FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate,
Chris@929 393 int bin, float &percentile) const
Chris@275 394 {
Chris@280 395 percentile = 0.5;
Chris@275 396 if (type == MajorPeaks) return 10;
Chris@275 397 if (bin == 0) return 3;
Chris@280 398
Chris@1091 399 double binfreq = (sampleRate * bin) / m_fftSize;
Chris@1038 400 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 401
Chris@1091 402 int hibin = int(lrint((hifreq * m_fftSize) / sampleRate));
Chris@275 403 int medianWinSize = hibin - bin;
Chris@275 404 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 405
Chris@1091 406 percentile = 0.5f + float(binfreq / sampleRate);
Chris@280 407
Chris@275 408 return medianWinSize;
Chris@275 409 }
Chris@275 410
Chris@275 411 FFTModel::PeakSet
Chris@929 412 FFTModel::getPeakFrequencies(PeakPickType type, int x,
Chris@929 413 int ymin, int ymax)
Chris@275 414 {
Chris@551 415 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 416
Chris@275 417 PeakSet peaks;
Chris@275 418 if (!isOK()) return peaks;
Chris@275 419 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 420
Chris@1040 421 sv_samplerate_t sampleRate = getSampleRate();
Chris@929 422 int incr = getResolution();
Chris@275 423
Chris@275 424 // This duplicates some of the work of estimateStableFrequency to
Chris@275 425 // allow us to retrieve the phases in two separate vertical
Chris@275 426 // columns, instead of jumping back and forth between columns x and
Chris@275 427 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 428
Chris@1090 429 vector<float> phases;
Chris@275 430 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 431 i != locations.end(); ++i) {
Chris@275 432 phases.push_back(getPhaseAt(x, *i));
Chris@275 433 }
Chris@275 434
Chris@929 435 int phaseIndex = 0;
Chris@275 436 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 437 i != locations.end(); ++i) {
Chris@1038 438 double oldPhase = phases[phaseIndex];
Chris@1038 439 double newPhase = getPhaseAt(x+1, *i);
Chris@1090 440 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
Chris@1038 441 double phaseError = princarg(newPhase - expectedPhase);
Chris@1038 442 double frequency =
Chris@275 443 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 444 / (2 * M_PI * incr);
Chris@1045 445 peaks[*i] = frequency;
Chris@275 446 ++phaseIndex;
Chris@275 447 }
Chris@275 448
Chris@275 449 return peaks;
Chris@275 450 }
Chris@275 451