annotate data/model/FFTModel.cpp @ 490:c3fb8258e34d

* Make it possible to import an entire session from an RDF document. However, at the moment the timings of events appear to be constrained by how far the audio decoder has got through its audio file at the time the event is queried -- need to investigate.
author Chris Cannam
date Fri, 21 Nov 2008 18:03:14 +0000
parents 115f60df1e4d
children 83eae5239db6
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@402 23 #include <algorithm>
Chris@402 24
Chris@152 25 #include <cassert>
Chris@152 26
Chris@152 27 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 28 int channel,
Chris@152 29 WindowType windowType,
Chris@152 30 size_t windowSize,
Chris@152 31 size_t windowIncrement,
Chris@152 32 size_t fftSize,
Chris@152 33 bool polar,
Chris@334 34 StorageAdviser::Criteria criteria,
Chris@152 35 size_t fillFromColumn) :
Chris@152 36 //!!! ZoomConstraint!
Chris@152 37 m_server(0),
Chris@152 38 m_xshift(0),
Chris@152 39 m_yshift(0)
Chris@152 40 {
Chris@297 41 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 42
Chris@297 43 m_server = getServer(model,
Chris@297 44 channel,
Chris@297 45 windowType,
Chris@297 46 windowSize,
Chris@297 47 windowIncrement,
Chris@297 48 fftSize,
Chris@297 49 polar,
Chris@334 50 criteria,
Chris@297 51 fillFromColumn);
Chris@152 52
Chris@200 53 if (!m_server) return; // caller should check isOK()
Chris@200 54
Chris@152 55 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 56 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 57
Chris@152 58 while (xratio > 1) {
Chris@152 59 if (xratio & 0x1) {
Chris@152 60 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 61 << windowIncrement << " / "
Chris@152 62 << m_server->getWindowIncrement()
Chris@152 63 << " must be a power of two" << std::endl;
Chris@152 64 assert(!(xratio & 0x1));
Chris@152 65 }
Chris@152 66 ++m_xshift;
Chris@152 67 xratio >>= 1;
Chris@152 68 }
Chris@152 69
Chris@152 70 while (yratio > 1) {
Chris@152 71 if (yratio & 0x1) {
Chris@152 72 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 73 << m_server->getFFTSize() << " / " << fftSize
Chris@152 74 << " must be a power of two" << std::endl;
Chris@152 75 assert(!(yratio & 0x1));
Chris@152 76 }
Chris@152 77 ++m_yshift;
Chris@152 78 yratio >>= 1;
Chris@152 79 }
Chris@152 80 }
Chris@152 81
Chris@152 82 FFTModel::~FFTModel()
Chris@152 83 {
Chris@200 84 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 85 }
Chris@152 86
Chris@360 87 void
Chris@360 88 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 89 {
Chris@360 90 if (m_sourceModel) {
Chris@362 91 std::cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << std::endl;
Chris@362 92 if (m_server) {
Chris@362 93 FFTDataServer::releaseInstance(m_server);
Chris@362 94 m_server = 0;
Chris@362 95 }
Chris@360 96 FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
Chris@360 97 }
Chris@360 98 }
Chris@360 99
Chris@297 100 FFTDataServer *
Chris@297 101 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 102 int channel,
Chris@297 103 WindowType windowType,
Chris@297 104 size_t windowSize,
Chris@297 105 size_t windowIncrement,
Chris@297 106 size_t fftSize,
Chris@297 107 bool polar,
Chris@334 108 StorageAdviser::Criteria criteria,
Chris@297 109 size_t fillFromColumn)
Chris@297 110 {
Chris@297 111 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 112 // aggregate model is the same as the FFT model of the appropriate
Chris@297 113 // channel of whichever model that aggregate channel is drawn
Chris@297 114 // from. We should use that model here, in case we already have
Chris@297 115 // the data for it or will be wanting the same data again later.
Chris@297 116
Chris@297 117 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 118 // can't do this shortcut unless the aggregate model only has one
Chris@297 119 // channel or contains exactly all of the channels of a single
Chris@297 120 // other model. That isn't very likely -- if it were the case,
Chris@297 121 // why would we be using an aggregate model?
Chris@297 122
Chris@297 123 if (channel >= 0) {
Chris@297 124
Chris@297 125 const AggregateWaveModel *aggregate =
Chris@297 126 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 127
Chris@297 128 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 129
Chris@297 130 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 131 aggregate->getComponent(channel);
Chris@297 132
Chris@297 133 return getServer(spec.model,
Chris@297 134 spec.channel,
Chris@297 135 windowType,
Chris@297 136 windowSize,
Chris@297 137 windowIncrement,
Chris@297 138 fftSize,
Chris@297 139 polar,
Chris@334 140 criteria,
Chris@297 141 fillFromColumn);
Chris@297 142 }
Chris@297 143 }
Chris@297 144
Chris@297 145 // The normal case
Chris@297 146
Chris@297 147 return FFTDataServer::getFuzzyInstance(model,
Chris@297 148 channel,
Chris@297 149 windowType,
Chris@297 150 windowSize,
Chris@297 151 windowIncrement,
Chris@297 152 fftSize,
Chris@297 153 polar,
Chris@334 154 criteria,
Chris@297 155 fillFromColumn);
Chris@297 156 }
Chris@297 157
Chris@152 158 size_t
Chris@152 159 FFTModel::getSampleRate() const
Chris@152 160 {
Chris@152 161 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 162 }
Chris@152 163
Chris@152 164 void
Chris@182 165 FFTModel::getColumn(size_t x, Column &result) const
Chris@152 166 {
Chris@183 167 Profiler profiler("FFTModel::getColumn", false);
Chris@183 168
Chris@152 169 result.clear();
Chris@408 170 size_t h = getHeight();
Chris@408 171
Chris@408 172 float magnitudes[h];
Chris@408 173 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
Chris@408 174 for (size_t y = 0; y < h; ++y) {
Chris@408 175 result.push_back(magnitudes[h]);
Chris@408 176 }
Chris@408 177 } else {
Chris@408 178 for (size_t i = 0; i < h; ++i) result.push_back(0.f);
Chris@152 179 }
Chris@152 180 }
Chris@152 181
Chris@152 182 QString
Chris@152 183 FFTModel::getBinName(size_t n) const
Chris@152 184 {
Chris@152 185 size_t sr = getSampleRate();
Chris@152 186 if (!sr) return "";
Chris@204 187 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 188 return name;
Chris@152 189 }
Chris@152 190
Chris@275 191 bool
Chris@275 192 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 193 {
Chris@275 194 if (!isOK()) return false;
Chris@275 195
Chris@275 196 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 197
Chris@275 198 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 199 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 200
Chris@275 201 if (x+1 >= getWidth()) return false;
Chris@275 202
Chris@275 203 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 204 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 205 // At window size w, for bin b, f is b*sr/w.
Chris@275 206 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 207 // We need to know what phase shift we expect from h/sr sec.
Chris@275 208 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 209 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 210 // = 2pi * (h * b) / w.
Chris@275 211
Chris@275 212 float oldPhase = getPhaseAt(x, y);
Chris@275 213 float newPhase = getPhaseAt(x+1, y);
Chris@275 214
Chris@275 215 size_t incr = getResolution();
Chris@275 216
Chris@275 217 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 218
Chris@275 219 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 220
Chris@275 221 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 222
Chris@275 223 // The new frequency estimate based on the phase error resulting
Chris@275 224 // from assuming the "native" frequency of this bin
Chris@275 225
Chris@275 226 frequency =
Chris@275 227 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 228 (2 * M_PI * incr);
Chris@275 229
Chris@275 230 return true;
Chris@275 231 }
Chris@275 232
Chris@275 233 FFTModel::PeakLocationSet
Chris@275 234 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 235 {
Chris@275 236 FFTModel::PeakLocationSet peaks;
Chris@275 237 if (!isOK()) return peaks;
Chris@275 238
Chris@275 239 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 240 ymax = getHeight() - 1;
Chris@275 241 }
Chris@275 242
Chris@275 243 Column values;
Chris@275 244
Chris@275 245 if (type == AllPeaks) {
Chris@275 246 for (size_t y = ymin; y <= ymax; ++y) {
Chris@275 247 values.push_back(getMagnitudeAt(x, y));
Chris@275 248 }
Chris@275 249 size_t i = 0;
Chris@275 250 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@275 251 if ((i == 0 || values[i] > values[i-1]) &&
Chris@275 252 (i == values.size()-1 || values[i] >= values[i+1])) {
Chris@275 253 peaks.insert(bin);
Chris@275 254 }
Chris@275 255 ++i;
Chris@275 256 }
Chris@275 257 return peaks;
Chris@275 258 }
Chris@275 259
Chris@275 260 getColumn(x, values);
Chris@275 261
Chris@275 262 // For peak picking we use a moving median window, picking the
Chris@275 263 // highest value within each continuous region of values that
Chris@275 264 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 265 // size to a roughly constant pitch range (about four tones).
Chris@275 266
Chris@275 267 size_t sampleRate = getSampleRate();
Chris@275 268
Chris@275 269 std::deque<float> window;
Chris@275 270 std::vector<size_t> inrange;
Chris@280 271 float dist = 0.5;
Chris@280 272 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 273 size_t halfWin = medianWinSize/2;
Chris@275 274
Chris@275 275 size_t binmin;
Chris@275 276 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 277 else binmin = 0;
Chris@275 278
Chris@275 279 size_t binmax;
Chris@275 280 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 281 else binmax = values.size()-1;
Chris@275 282
Chris@275 283 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 284
Chris@275 285 float value = values[bin];
Chris@275 286
Chris@275 287 window.push_back(value);
Chris@275 288
Chris@280 289 // so-called median will actually be the dist*100'th percentile
Chris@280 290 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 291 halfWin = medianWinSize/2;
Chris@275 292
Chris@275 293 while (window.size() > medianWinSize) window.pop_front();
Chris@275 294
Chris@275 295 if (type == MajorPitchAdaptivePeaks) {
Chris@275 296 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 297 else binmax = values.size()-1;
Chris@275 298 }
Chris@275 299
Chris@275 300 std::deque<float> sorted(window);
Chris@275 301 std::sort(sorted.begin(), sorted.end());
Chris@280 302 float median = sorted[int(sorted.size() * dist)];
Chris@275 303
Chris@275 304 if (value > median) {
Chris@275 305 inrange.push_back(bin);
Chris@275 306 }
Chris@275 307
Chris@275 308 if (value <= median || bin+1 == values.size()) {
Chris@275 309 size_t peakbin = 0;
Chris@275 310 float peakval = 0.f;
Chris@275 311 if (!inrange.empty()) {
Chris@275 312 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@275 313 if (i == 0 || values[inrange[i]] > peakval) {
Chris@275 314 peakval = values[inrange[i]];
Chris@275 315 peakbin = inrange[i];
Chris@275 316 }
Chris@275 317 }
Chris@275 318 inrange.clear();
Chris@275 319 if (peakbin >= ymin && peakbin <= ymax) {
Chris@275 320 peaks.insert(peakbin);
Chris@275 321 }
Chris@275 322 }
Chris@275 323 }
Chris@275 324 }
Chris@275 325
Chris@275 326 return peaks;
Chris@275 327 }
Chris@275 328
Chris@275 329 size_t
Chris@280 330 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 331 size_t bin, float &percentile) const
Chris@275 332 {
Chris@280 333 percentile = 0.5;
Chris@275 334 if (type == MajorPeaks) return 10;
Chris@275 335 if (bin == 0) return 3;
Chris@280 336
Chris@275 337 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 338 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 339 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 340
Chris@275 341 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 342 int medianWinSize = hibin - bin;
Chris@275 343 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 344
Chris@280 345 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 346
Chris@275 347 return medianWinSize;
Chris@275 348 }
Chris@275 349
Chris@275 350 FFTModel::PeakSet
Chris@275 351 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 352 size_t ymin, size_t ymax)
Chris@275 353 {
Chris@275 354 PeakSet peaks;
Chris@275 355 if (!isOK()) return peaks;
Chris@275 356 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 357
Chris@275 358 size_t sampleRate = getSampleRate();
Chris@275 359 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 360 size_t incr = getResolution();
Chris@275 361
Chris@275 362 // This duplicates some of the work of estimateStableFrequency to
Chris@275 363 // allow us to retrieve the phases in two separate vertical
Chris@275 364 // columns, instead of jumping back and forth between columns x and
Chris@275 365 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 366
Chris@275 367 std::vector<float> phases;
Chris@275 368 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 369 i != locations.end(); ++i) {
Chris@275 370 phases.push_back(getPhaseAt(x, *i));
Chris@275 371 }
Chris@275 372
Chris@275 373 size_t phaseIndex = 0;
Chris@275 374 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 375 i != locations.end(); ++i) {
Chris@275 376 float oldPhase = phases[phaseIndex];
Chris@275 377 float newPhase = getPhaseAt(x+1, *i);
Chris@275 378 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 379 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 380 float frequency =
Chris@275 381 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 382 / (2 * M_PI * incr);
Chris@275 383 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 384 // if (stable)
Chris@275 385 peaks[*i] = frequency;
Chris@275 386 ++phaseIndex;
Chris@275 387 }
Chris@275 388
Chris@275 389 return peaks;
Chris@275 390 }
Chris@275 391
Chris@152 392 Model *
Chris@152 393 FFTModel::clone() const
Chris@152 394 {
Chris@152 395 return new FFTModel(*this);
Chris@152 396 }
Chris@152 397
Chris@152 398 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 399 DenseThreeDimensionalModel(),
Chris@152 400 m_server(model.m_server),
Chris@152 401 m_xshift(model.m_xshift),
Chris@152 402 m_yshift(model.m_yshift)
Chris@152 403 {
Chris@152 404 FFTDataServer::claimInstance(m_server);
Chris@152 405 }
Chris@152 406