annotate data/model/FFTModel.cpp @ 823:f0558e69a074

Rename Resampling- to DecodingWavFileReader, and use it whenever we have an audio file that is not quickly seekable using libsndfile. Avoids very slow performance when analysing ogg files.
author Chris Cannam
date Wed, 17 Jul 2013 15:40:01 +0100
parents d7f3dfe6f9a4
children e802e550a1f2
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@402 23 #include <algorithm>
Chris@402 24
Chris@152 25 #include <cassert>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@152 31 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 32 int channel,
Chris@152 33 WindowType windowType,
Chris@152 34 size_t windowSize,
Chris@152 35 size_t windowIncrement,
Chris@152 36 size_t fftSize,
Chris@152 37 bool polar,
Chris@334 38 StorageAdviser::Criteria criteria,
Chris@152 39 size_t fillFromColumn) :
Chris@152 40 //!!! ZoomConstraint!
Chris@152 41 m_server(0),
Chris@152 42 m_xshift(0),
Chris@152 43 m_yshift(0)
Chris@152 44 {
Chris@297 45 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 46
Chris@297 47 m_server = getServer(model,
Chris@297 48 channel,
Chris@297 49 windowType,
Chris@297 50 windowSize,
Chris@297 51 windowIncrement,
Chris@297 52 fftSize,
Chris@297 53 polar,
Chris@334 54 criteria,
Chris@297 55 fillFromColumn);
Chris@152 56
Chris@200 57 if (!m_server) return; // caller should check isOK()
Chris@200 58
Chris@152 59 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 60 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 61
Chris@152 62 while (xratio > 1) {
Chris@152 63 if (xratio & 0x1) {
Chris@152 64 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 65 << windowIncrement << " / "
Chris@152 66 << m_server->getWindowIncrement()
Chris@152 67 << " must be a power of two" << std::endl;
Chris@152 68 assert(!(xratio & 0x1));
Chris@152 69 }
Chris@152 70 ++m_xshift;
Chris@152 71 xratio >>= 1;
Chris@152 72 }
Chris@152 73
Chris@152 74 while (yratio > 1) {
Chris@152 75 if (yratio & 0x1) {
Chris@152 76 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 77 << m_server->getFFTSize() << " / " << fftSize
Chris@152 78 << " must be a power of two" << std::endl;
Chris@152 79 assert(!(yratio & 0x1));
Chris@152 80 }
Chris@152 81 ++m_yshift;
Chris@152 82 yratio >>= 1;
Chris@152 83 }
Chris@152 84 }
Chris@152 85
Chris@152 86 FFTModel::~FFTModel()
Chris@152 87 {
Chris@200 88 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 89 }
Chris@152 90
Chris@360 91 void
Chris@360 92 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 93 {
Chris@360 94 if (m_sourceModel) {
Chris@362 95 std::cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << std::endl;
Chris@362 96 if (m_server) {
Chris@362 97 FFTDataServer::releaseInstance(m_server);
Chris@362 98 m_server = 0;
Chris@362 99 }
Chris@360 100 FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
Chris@360 101 }
Chris@360 102 }
Chris@360 103
Chris@297 104 FFTDataServer *
Chris@297 105 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 106 int channel,
Chris@297 107 WindowType windowType,
Chris@297 108 size_t windowSize,
Chris@297 109 size_t windowIncrement,
Chris@297 110 size_t fftSize,
Chris@297 111 bool polar,
Chris@334 112 StorageAdviser::Criteria criteria,
Chris@297 113 size_t fillFromColumn)
Chris@297 114 {
Chris@297 115 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 116 // aggregate model is the same as the FFT model of the appropriate
Chris@297 117 // channel of whichever model that aggregate channel is drawn
Chris@297 118 // from. We should use that model here, in case we already have
Chris@297 119 // the data for it or will be wanting the same data again later.
Chris@297 120
Chris@297 121 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 122 // can't do this shortcut unless the aggregate model only has one
Chris@297 123 // channel or contains exactly all of the channels of a single
Chris@297 124 // other model. That isn't very likely -- if it were the case,
Chris@297 125 // why would we be using an aggregate model?
Chris@297 126
Chris@297 127 if (channel >= 0) {
Chris@297 128
Chris@297 129 const AggregateWaveModel *aggregate =
Chris@297 130 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 131
Chris@297 132 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 133
Chris@297 134 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 135 aggregate->getComponent(channel);
Chris@297 136
Chris@297 137 return getServer(spec.model,
Chris@297 138 spec.channel,
Chris@297 139 windowType,
Chris@297 140 windowSize,
Chris@297 141 windowIncrement,
Chris@297 142 fftSize,
Chris@297 143 polar,
Chris@334 144 criteria,
Chris@297 145 fillFromColumn);
Chris@297 146 }
Chris@297 147 }
Chris@297 148
Chris@297 149 // The normal case
Chris@297 150
Chris@297 151 return FFTDataServer::getFuzzyInstance(model,
Chris@297 152 channel,
Chris@297 153 windowType,
Chris@297 154 windowSize,
Chris@297 155 windowIncrement,
Chris@297 156 fftSize,
Chris@297 157 polar,
Chris@334 158 criteria,
Chris@297 159 fillFromColumn);
Chris@297 160 }
Chris@297 161
Chris@152 162 size_t
Chris@152 163 FFTModel::getSampleRate() const
Chris@152 164 {
Chris@152 165 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 166 }
Chris@152 167
Chris@533 168 FFTModel::Column
Chris@533 169 FFTModel::getColumn(size_t x) const
Chris@152 170 {
Chris@183 171 Profiler profiler("FFTModel::getColumn", false);
Chris@183 172
Chris@533 173 Column result;
Chris@533 174
Chris@152 175 result.clear();
Chris@408 176 size_t h = getHeight();
Chris@509 177 result.reserve(h);
Chris@408 178
Chris@608 179 #ifdef __GNUC__
Chris@408 180 float magnitudes[h];
Chris@608 181 #else
Chris@608 182 float *magnitudes = (float *)alloca(h * sizeof(float));
Chris@608 183 #endif
Chris@500 184
Chris@408 185 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
Chris@500 186
Chris@408 187 for (size_t y = 0; y < h; ++y) {
Chris@500 188 result.push_back(magnitudes[y]);
Chris@408 189 }
Chris@500 190
Chris@408 191 } else {
Chris@408 192 for (size_t i = 0; i < h; ++i) result.push_back(0.f);
Chris@152 193 }
Chris@533 194
Chris@533 195 return result;
Chris@152 196 }
Chris@152 197
Chris@152 198 QString
Chris@152 199 FFTModel::getBinName(size_t n) const
Chris@152 200 {
Chris@152 201 size_t sr = getSampleRate();
Chris@152 202 if (!sr) return "";
Chris@204 203 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 204 return name;
Chris@152 205 }
Chris@152 206
Chris@275 207 bool
Chris@275 208 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 209 {
Chris@275 210 if (!isOK()) return false;
Chris@275 211
Chris@275 212 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 213
Chris@275 214 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 215 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 216
Chris@275 217 if (x+1 >= getWidth()) return false;
Chris@275 218
Chris@275 219 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 220 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 221 // At window size w, for bin b, f is b*sr/w.
Chris@275 222 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 223 // We need to know what phase shift we expect from h/sr sec.
Chris@275 224 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 225 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 226 // = 2pi * (h * b) / w.
Chris@275 227
Chris@275 228 float oldPhase = getPhaseAt(x, y);
Chris@275 229 float newPhase = getPhaseAt(x+1, y);
Chris@275 230
Chris@275 231 size_t incr = getResolution();
Chris@275 232
Chris@275 233 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 234
Chris@275 235 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 236
Chris@275 237 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 238
Chris@275 239 // The new frequency estimate based on the phase error resulting
Chris@275 240 // from assuming the "native" frequency of this bin
Chris@275 241
Chris@275 242 frequency =
Chris@275 243 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 244 (2 * M_PI * incr);
Chris@275 245
Chris@275 246 return true;
Chris@275 247 }
Chris@275 248
Chris@275 249 FFTModel::PeakLocationSet
Chris@275 250 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 251 {
Chris@551 252 Profiler profiler("FFTModel::getPeaks");
Chris@551 253
Chris@275 254 FFTModel::PeakLocationSet peaks;
Chris@275 255 if (!isOK()) return peaks;
Chris@275 256
Chris@275 257 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 258 ymax = getHeight() - 1;
Chris@275 259 }
Chris@275 260
Chris@275 261 if (type == AllPeaks) {
Chris@551 262 int minbin = ymin;
Chris@551 263 if (minbin > 0) minbin = minbin - 1;
Chris@551 264 int maxbin = ymax;
Chris@551 265 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 266 const int n = maxbin - minbin + 1;
Chris@608 267 #ifdef __GNUC__
Chris@551 268 float values[n];
Chris@608 269 #else
Chris@608 270 float *values = (float *)alloca(n * sizeof(float));
Chris@608 271 #endif
Chris@551 272 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@275 273 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@551 274 if (bin == minbin || bin == maxbin) continue;
Chris@551 275 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 276 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 277 peaks.insert(bin);
Chris@275 278 }
Chris@275 279 }
Chris@275 280 return peaks;
Chris@275 281 }
Chris@275 282
Chris@551 283 Column values = getColumn(x);
Chris@275 284
Chris@500 285 float mean = 0.f;
Chris@551 286 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@500 287 if (values.size() >0) mean /= values.size();
Chris@500 288
Chris@275 289 // For peak picking we use a moving median window, picking the
Chris@275 290 // highest value within each continuous region of values that
Chris@275 291 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 292 // size to a roughly constant pitch range (about four tones).
Chris@275 293
Chris@275 294 size_t sampleRate = getSampleRate();
Chris@275 295
Chris@275 296 std::deque<float> window;
Chris@275 297 std::vector<size_t> inrange;
Chris@280 298 float dist = 0.5;
Chris@500 299
Chris@280 300 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 301 size_t halfWin = medianWinSize/2;
Chris@275 302
Chris@275 303 size_t binmin;
Chris@275 304 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 305 else binmin = 0;
Chris@275 306
Chris@275 307 size_t binmax;
Chris@275 308 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 309 else binmax = values.size()-1;
Chris@275 310
Chris@500 311 size_t prevcentre = 0;
Chris@500 312
Chris@275 313 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 314
Chris@275 315 float value = values[bin];
Chris@275 316
Chris@275 317 window.push_back(value);
Chris@275 318
Chris@280 319 // so-called median will actually be the dist*100'th percentile
Chris@280 320 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 321 halfWin = medianWinSize/2;
Chris@275 322
Chris@500 323 while (window.size() > medianWinSize) {
Chris@500 324 window.pop_front();
Chris@500 325 }
Chris@500 326
Chris@500 327 size_t actualSize = window.size();
Chris@275 328
Chris@275 329 if (type == MajorPitchAdaptivePeaks) {
Chris@275 330 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 331 else binmax = values.size()-1;
Chris@275 332 }
Chris@275 333
Chris@275 334 std::deque<float> sorted(window);
Chris@275 335 std::sort(sorted.begin(), sorted.end());
Chris@280 336 float median = sorted[int(sorted.size() * dist)];
Chris@275 337
Chris@500 338 size_t centrebin = 0;
Chris@500 339 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 340
Chris@500 341 while (centrebin > prevcentre || bin == binmin) {
Chris@275 342
Chris@500 343 if (centrebin > prevcentre) ++prevcentre;
Chris@500 344
Chris@500 345 float centre = values[prevcentre];
Chris@500 346
Chris@500 347 if (centre > median) {
Chris@500 348 inrange.push_back(centrebin);
Chris@500 349 }
Chris@500 350
Chris@500 351 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 352 if (!inrange.empty()) {
Chris@500 353 size_t peakbin = 0;
Chris@500 354 float peakval = 0.f;
Chris@500 355 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@500 356 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 357 peakval = values[inrange[i]];
Chris@500 358 peakbin = inrange[i];
Chris@500 359 }
Chris@500 360 }
Chris@500 361 inrange.clear();
Chris@500 362 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 363 peaks.insert(peakbin);
Chris@275 364 }
Chris@275 365 }
Chris@275 366 }
Chris@500 367
Chris@500 368 if (bin == binmin) break;
Chris@275 369 }
Chris@275 370 }
Chris@275 371
Chris@275 372 return peaks;
Chris@275 373 }
Chris@275 374
Chris@275 375 size_t
Chris@280 376 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 377 size_t bin, float &percentile) const
Chris@275 378 {
Chris@280 379 percentile = 0.5;
Chris@275 380 if (type == MajorPeaks) return 10;
Chris@275 381 if (bin == 0) return 3;
Chris@280 382
Chris@275 383 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 384 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 385 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 386
Chris@275 387 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 388 int medianWinSize = hibin - bin;
Chris@275 389 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 390
Chris@280 391 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 392
Chris@275 393 return medianWinSize;
Chris@275 394 }
Chris@275 395
Chris@275 396 FFTModel::PeakSet
Chris@275 397 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 398 size_t ymin, size_t ymax)
Chris@275 399 {
Chris@551 400 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 401
Chris@275 402 PeakSet peaks;
Chris@275 403 if (!isOK()) return peaks;
Chris@275 404 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 405
Chris@275 406 size_t sampleRate = getSampleRate();
Chris@275 407 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 408 size_t incr = getResolution();
Chris@275 409
Chris@275 410 // This duplicates some of the work of estimateStableFrequency to
Chris@275 411 // allow us to retrieve the phases in two separate vertical
Chris@275 412 // columns, instead of jumping back and forth between columns x and
Chris@275 413 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 414
Chris@275 415 std::vector<float> phases;
Chris@275 416 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 417 i != locations.end(); ++i) {
Chris@275 418 phases.push_back(getPhaseAt(x, *i));
Chris@275 419 }
Chris@275 420
Chris@275 421 size_t phaseIndex = 0;
Chris@275 422 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 423 i != locations.end(); ++i) {
Chris@275 424 float oldPhase = phases[phaseIndex];
Chris@275 425 float newPhase = getPhaseAt(x+1, *i);
Chris@275 426 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 427 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 428 float frequency =
Chris@275 429 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 430 / (2 * M_PI * incr);
Chris@275 431 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 432 // if (stable)
Chris@275 433 peaks[*i] = frequency;
Chris@275 434 ++phaseIndex;
Chris@275 435 }
Chris@275 436
Chris@275 437 return peaks;
Chris@275 438 }
Chris@275 439
Chris@152 440 Model *
Chris@152 441 FFTModel::clone() const
Chris@152 442 {
Chris@152 443 return new FFTModel(*this);
Chris@152 444 }
Chris@152 445
Chris@152 446 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 447 DenseThreeDimensionalModel(),
Chris@152 448 m_server(model.m_server),
Chris@152 449 m_xshift(model.m_xshift),
Chris@152 450 m_yshift(model.m_yshift)
Chris@152 451 {
Chris@152 452 FFTDataServer::claimInstance(m_server);
Chris@152 453 }
Chris@152 454