annotate data/model/FFTModel.cpp @ 588:d04b8674b710

* Try to identify the properly conformant audio file structure written out by Sonic Annotator (but we still don't actually import it yet)
author Chris Cannam
date Wed, 13 May 2009 13:30:08 +0000
parents 408e56d30f58
children d7f3dfe6f9a4
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@402 23 #include <algorithm>
Chris@402 24
Chris@152 25 #include <cassert>
Chris@152 26
Chris@152 27 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 28 int channel,
Chris@152 29 WindowType windowType,
Chris@152 30 size_t windowSize,
Chris@152 31 size_t windowIncrement,
Chris@152 32 size_t fftSize,
Chris@152 33 bool polar,
Chris@334 34 StorageAdviser::Criteria criteria,
Chris@152 35 size_t fillFromColumn) :
Chris@152 36 //!!! ZoomConstraint!
Chris@152 37 m_server(0),
Chris@152 38 m_xshift(0),
Chris@152 39 m_yshift(0)
Chris@152 40 {
Chris@297 41 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 42
Chris@297 43 m_server = getServer(model,
Chris@297 44 channel,
Chris@297 45 windowType,
Chris@297 46 windowSize,
Chris@297 47 windowIncrement,
Chris@297 48 fftSize,
Chris@297 49 polar,
Chris@334 50 criteria,
Chris@297 51 fillFromColumn);
Chris@152 52
Chris@200 53 if (!m_server) return; // caller should check isOK()
Chris@200 54
Chris@152 55 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 56 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 57
Chris@152 58 while (xratio > 1) {
Chris@152 59 if (xratio & 0x1) {
Chris@152 60 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 61 << windowIncrement << " / "
Chris@152 62 << m_server->getWindowIncrement()
Chris@152 63 << " must be a power of two" << std::endl;
Chris@152 64 assert(!(xratio & 0x1));
Chris@152 65 }
Chris@152 66 ++m_xshift;
Chris@152 67 xratio >>= 1;
Chris@152 68 }
Chris@152 69
Chris@152 70 while (yratio > 1) {
Chris@152 71 if (yratio & 0x1) {
Chris@152 72 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 73 << m_server->getFFTSize() << " / " << fftSize
Chris@152 74 << " must be a power of two" << std::endl;
Chris@152 75 assert(!(yratio & 0x1));
Chris@152 76 }
Chris@152 77 ++m_yshift;
Chris@152 78 yratio >>= 1;
Chris@152 79 }
Chris@152 80 }
Chris@152 81
Chris@152 82 FFTModel::~FFTModel()
Chris@152 83 {
Chris@200 84 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 85 }
Chris@152 86
Chris@360 87 void
Chris@360 88 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 89 {
Chris@360 90 if (m_sourceModel) {
Chris@362 91 std::cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << std::endl;
Chris@362 92 if (m_server) {
Chris@362 93 FFTDataServer::releaseInstance(m_server);
Chris@362 94 m_server = 0;
Chris@362 95 }
Chris@360 96 FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
Chris@360 97 }
Chris@360 98 }
Chris@360 99
Chris@297 100 FFTDataServer *
Chris@297 101 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 102 int channel,
Chris@297 103 WindowType windowType,
Chris@297 104 size_t windowSize,
Chris@297 105 size_t windowIncrement,
Chris@297 106 size_t fftSize,
Chris@297 107 bool polar,
Chris@334 108 StorageAdviser::Criteria criteria,
Chris@297 109 size_t fillFromColumn)
Chris@297 110 {
Chris@297 111 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 112 // aggregate model is the same as the FFT model of the appropriate
Chris@297 113 // channel of whichever model that aggregate channel is drawn
Chris@297 114 // from. We should use that model here, in case we already have
Chris@297 115 // the data for it or will be wanting the same data again later.
Chris@297 116
Chris@297 117 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 118 // can't do this shortcut unless the aggregate model only has one
Chris@297 119 // channel or contains exactly all of the channels of a single
Chris@297 120 // other model. That isn't very likely -- if it were the case,
Chris@297 121 // why would we be using an aggregate model?
Chris@297 122
Chris@297 123 if (channel >= 0) {
Chris@297 124
Chris@297 125 const AggregateWaveModel *aggregate =
Chris@297 126 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 127
Chris@297 128 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 129
Chris@297 130 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 131 aggregate->getComponent(channel);
Chris@297 132
Chris@297 133 return getServer(spec.model,
Chris@297 134 spec.channel,
Chris@297 135 windowType,
Chris@297 136 windowSize,
Chris@297 137 windowIncrement,
Chris@297 138 fftSize,
Chris@297 139 polar,
Chris@334 140 criteria,
Chris@297 141 fillFromColumn);
Chris@297 142 }
Chris@297 143 }
Chris@297 144
Chris@297 145 // The normal case
Chris@297 146
Chris@297 147 return FFTDataServer::getFuzzyInstance(model,
Chris@297 148 channel,
Chris@297 149 windowType,
Chris@297 150 windowSize,
Chris@297 151 windowIncrement,
Chris@297 152 fftSize,
Chris@297 153 polar,
Chris@334 154 criteria,
Chris@297 155 fillFromColumn);
Chris@297 156 }
Chris@297 157
Chris@152 158 size_t
Chris@152 159 FFTModel::getSampleRate() const
Chris@152 160 {
Chris@152 161 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 162 }
Chris@152 163
Chris@533 164 FFTModel::Column
Chris@533 165 FFTModel::getColumn(size_t x) const
Chris@152 166 {
Chris@183 167 Profiler profiler("FFTModel::getColumn", false);
Chris@183 168
Chris@533 169 Column result;
Chris@533 170
Chris@152 171 result.clear();
Chris@408 172 size_t h = getHeight();
Chris@509 173 result.reserve(h);
Chris@408 174
Chris@408 175 float magnitudes[h];
Chris@500 176
Chris@408 177 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
Chris@500 178
Chris@408 179 for (size_t y = 0; y < h; ++y) {
Chris@500 180 result.push_back(magnitudes[y]);
Chris@408 181 }
Chris@500 182
Chris@408 183 } else {
Chris@408 184 for (size_t i = 0; i < h; ++i) result.push_back(0.f);
Chris@152 185 }
Chris@533 186
Chris@533 187 return result;
Chris@152 188 }
Chris@152 189
Chris@152 190 QString
Chris@152 191 FFTModel::getBinName(size_t n) const
Chris@152 192 {
Chris@152 193 size_t sr = getSampleRate();
Chris@152 194 if (!sr) return "";
Chris@204 195 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 196 return name;
Chris@152 197 }
Chris@152 198
Chris@275 199 bool
Chris@275 200 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 201 {
Chris@275 202 if (!isOK()) return false;
Chris@275 203
Chris@275 204 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 205
Chris@275 206 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 207 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 208
Chris@275 209 if (x+1 >= getWidth()) return false;
Chris@275 210
Chris@275 211 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 212 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 213 // At window size w, for bin b, f is b*sr/w.
Chris@275 214 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 215 // We need to know what phase shift we expect from h/sr sec.
Chris@275 216 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 217 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 218 // = 2pi * (h * b) / w.
Chris@275 219
Chris@275 220 float oldPhase = getPhaseAt(x, y);
Chris@275 221 float newPhase = getPhaseAt(x+1, y);
Chris@275 222
Chris@275 223 size_t incr = getResolution();
Chris@275 224
Chris@275 225 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 226
Chris@275 227 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 228
Chris@275 229 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 230
Chris@275 231 // The new frequency estimate based on the phase error resulting
Chris@275 232 // from assuming the "native" frequency of this bin
Chris@275 233
Chris@275 234 frequency =
Chris@275 235 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 236 (2 * M_PI * incr);
Chris@275 237
Chris@275 238 return true;
Chris@275 239 }
Chris@275 240
Chris@275 241 FFTModel::PeakLocationSet
Chris@275 242 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 243 {
Chris@551 244 Profiler profiler("FFTModel::getPeaks");
Chris@551 245
Chris@275 246 FFTModel::PeakLocationSet peaks;
Chris@275 247 if (!isOK()) return peaks;
Chris@275 248
Chris@275 249 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 250 ymax = getHeight() - 1;
Chris@275 251 }
Chris@275 252
Chris@275 253 if (type == AllPeaks) {
Chris@551 254 int minbin = ymin;
Chris@551 255 if (minbin > 0) minbin = minbin - 1;
Chris@551 256 int maxbin = ymax;
Chris@551 257 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 258 const int n = maxbin - minbin + 1;
Chris@551 259 float values[n];
Chris@551 260 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@275 261 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@551 262 if (bin == minbin || bin == maxbin) continue;
Chris@551 263 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 264 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 265 peaks.insert(bin);
Chris@275 266 }
Chris@275 267 }
Chris@275 268 return peaks;
Chris@275 269 }
Chris@275 270
Chris@551 271 Column values = getColumn(x);
Chris@275 272
Chris@500 273 float mean = 0.f;
Chris@551 274 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@500 275 if (values.size() >0) mean /= values.size();
Chris@500 276
Chris@275 277 // For peak picking we use a moving median window, picking the
Chris@275 278 // highest value within each continuous region of values that
Chris@275 279 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 280 // size to a roughly constant pitch range (about four tones).
Chris@275 281
Chris@275 282 size_t sampleRate = getSampleRate();
Chris@275 283
Chris@275 284 std::deque<float> window;
Chris@275 285 std::vector<size_t> inrange;
Chris@280 286 float dist = 0.5;
Chris@500 287
Chris@280 288 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 289 size_t halfWin = medianWinSize/2;
Chris@275 290
Chris@275 291 size_t binmin;
Chris@275 292 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 293 else binmin = 0;
Chris@275 294
Chris@275 295 size_t binmax;
Chris@275 296 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 297 else binmax = values.size()-1;
Chris@275 298
Chris@500 299 size_t prevcentre = 0;
Chris@500 300
Chris@275 301 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 302
Chris@275 303 float value = values[bin];
Chris@275 304
Chris@275 305 window.push_back(value);
Chris@275 306
Chris@280 307 // so-called median will actually be the dist*100'th percentile
Chris@280 308 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 309 halfWin = medianWinSize/2;
Chris@275 310
Chris@500 311 while (window.size() > medianWinSize) {
Chris@500 312 window.pop_front();
Chris@500 313 }
Chris@500 314
Chris@500 315 size_t actualSize = window.size();
Chris@275 316
Chris@275 317 if (type == MajorPitchAdaptivePeaks) {
Chris@275 318 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 319 else binmax = values.size()-1;
Chris@275 320 }
Chris@275 321
Chris@275 322 std::deque<float> sorted(window);
Chris@275 323 std::sort(sorted.begin(), sorted.end());
Chris@280 324 float median = sorted[int(sorted.size() * dist)];
Chris@275 325
Chris@500 326 size_t centrebin = 0;
Chris@500 327 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 328
Chris@500 329 while (centrebin > prevcentre || bin == binmin) {
Chris@275 330
Chris@500 331 if (centrebin > prevcentre) ++prevcentre;
Chris@500 332
Chris@500 333 float centre = values[prevcentre];
Chris@500 334
Chris@500 335 if (centre > median) {
Chris@500 336 inrange.push_back(centrebin);
Chris@500 337 }
Chris@500 338
Chris@500 339 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 340 if (!inrange.empty()) {
Chris@500 341 size_t peakbin = 0;
Chris@500 342 float peakval = 0.f;
Chris@500 343 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@500 344 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 345 peakval = values[inrange[i]];
Chris@500 346 peakbin = inrange[i];
Chris@500 347 }
Chris@500 348 }
Chris@500 349 inrange.clear();
Chris@500 350 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 351 peaks.insert(peakbin);
Chris@275 352 }
Chris@275 353 }
Chris@275 354 }
Chris@500 355
Chris@500 356 if (bin == binmin) break;
Chris@275 357 }
Chris@275 358 }
Chris@275 359
Chris@275 360 return peaks;
Chris@275 361 }
Chris@275 362
Chris@275 363 size_t
Chris@280 364 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 365 size_t bin, float &percentile) const
Chris@275 366 {
Chris@280 367 percentile = 0.5;
Chris@275 368 if (type == MajorPeaks) return 10;
Chris@275 369 if (bin == 0) return 3;
Chris@280 370
Chris@275 371 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 372 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 373 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 374
Chris@275 375 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 376 int medianWinSize = hibin - bin;
Chris@275 377 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 378
Chris@280 379 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 380
Chris@275 381 return medianWinSize;
Chris@275 382 }
Chris@275 383
Chris@275 384 FFTModel::PeakSet
Chris@275 385 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 386 size_t ymin, size_t ymax)
Chris@275 387 {
Chris@551 388 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 389
Chris@275 390 PeakSet peaks;
Chris@275 391 if (!isOK()) return peaks;
Chris@275 392 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 393
Chris@275 394 size_t sampleRate = getSampleRate();
Chris@275 395 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 396 size_t incr = getResolution();
Chris@275 397
Chris@275 398 // This duplicates some of the work of estimateStableFrequency to
Chris@275 399 // allow us to retrieve the phases in two separate vertical
Chris@275 400 // columns, instead of jumping back and forth between columns x and
Chris@275 401 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 402
Chris@275 403 std::vector<float> phases;
Chris@275 404 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 405 i != locations.end(); ++i) {
Chris@275 406 phases.push_back(getPhaseAt(x, *i));
Chris@275 407 }
Chris@275 408
Chris@275 409 size_t phaseIndex = 0;
Chris@275 410 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 411 i != locations.end(); ++i) {
Chris@275 412 float oldPhase = phases[phaseIndex];
Chris@275 413 float newPhase = getPhaseAt(x+1, *i);
Chris@275 414 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 415 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 416 float frequency =
Chris@275 417 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 418 / (2 * M_PI * incr);
Chris@275 419 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 420 // if (stable)
Chris@275 421 peaks[*i] = frequency;
Chris@275 422 ++phaseIndex;
Chris@275 423 }
Chris@275 424
Chris@275 425 return peaks;
Chris@275 426 }
Chris@275 427
Chris@152 428 Model *
Chris@152 429 FFTModel::clone() const
Chris@152 430 {
Chris@152 431 return new FFTModel(*this);
Chris@152 432 }
Chris@152 433
Chris@152 434 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 435 DenseThreeDimensionalModel(),
Chris@152 436 m_server(model.m_server),
Chris@152 437 m_xshift(model.m_xshift),
Chris@152 438 m_yshift(model.m_yshift)
Chris@152 439 {
Chris@152 440 FFTDataServer::claimInstance(m_server);
Chris@152 441 }
Chris@152 442