annotate data/model/FFTModel.cpp @ 503:3176aade1a03

* Make RDFTransformFactory::writeTransformToRDF write out partial library and plugin descriptions adequate to identify the plugin on disk, if no RDF description for the plugin has been found
author Chris Cannam
date Fri, 05 Dec 2008 09:40:42 +0000
parents 83eae5239db6
children 6066bde1c126
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@402 23 #include <algorithm>
Chris@402 24
Chris@152 25 #include <cassert>
Chris@152 26
Chris@152 27 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 28 int channel,
Chris@152 29 WindowType windowType,
Chris@152 30 size_t windowSize,
Chris@152 31 size_t windowIncrement,
Chris@152 32 size_t fftSize,
Chris@152 33 bool polar,
Chris@334 34 StorageAdviser::Criteria criteria,
Chris@152 35 size_t fillFromColumn) :
Chris@152 36 //!!! ZoomConstraint!
Chris@152 37 m_server(0),
Chris@152 38 m_xshift(0),
Chris@152 39 m_yshift(0)
Chris@152 40 {
Chris@297 41 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 42
Chris@297 43 m_server = getServer(model,
Chris@297 44 channel,
Chris@297 45 windowType,
Chris@297 46 windowSize,
Chris@297 47 windowIncrement,
Chris@297 48 fftSize,
Chris@297 49 polar,
Chris@334 50 criteria,
Chris@297 51 fillFromColumn);
Chris@152 52
Chris@200 53 if (!m_server) return; // caller should check isOK()
Chris@200 54
Chris@152 55 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 56 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 57
Chris@152 58 while (xratio > 1) {
Chris@152 59 if (xratio & 0x1) {
Chris@152 60 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 61 << windowIncrement << " / "
Chris@152 62 << m_server->getWindowIncrement()
Chris@152 63 << " must be a power of two" << std::endl;
Chris@152 64 assert(!(xratio & 0x1));
Chris@152 65 }
Chris@152 66 ++m_xshift;
Chris@152 67 xratio >>= 1;
Chris@152 68 }
Chris@152 69
Chris@152 70 while (yratio > 1) {
Chris@152 71 if (yratio & 0x1) {
Chris@152 72 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 73 << m_server->getFFTSize() << " / " << fftSize
Chris@152 74 << " must be a power of two" << std::endl;
Chris@152 75 assert(!(yratio & 0x1));
Chris@152 76 }
Chris@152 77 ++m_yshift;
Chris@152 78 yratio >>= 1;
Chris@152 79 }
Chris@152 80 }
Chris@152 81
Chris@152 82 FFTModel::~FFTModel()
Chris@152 83 {
Chris@200 84 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 85 }
Chris@152 86
Chris@360 87 void
Chris@360 88 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 89 {
Chris@360 90 if (m_sourceModel) {
Chris@362 91 std::cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << std::endl;
Chris@362 92 if (m_server) {
Chris@362 93 FFTDataServer::releaseInstance(m_server);
Chris@362 94 m_server = 0;
Chris@362 95 }
Chris@360 96 FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
Chris@360 97 }
Chris@360 98 }
Chris@360 99
Chris@297 100 FFTDataServer *
Chris@297 101 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 102 int channel,
Chris@297 103 WindowType windowType,
Chris@297 104 size_t windowSize,
Chris@297 105 size_t windowIncrement,
Chris@297 106 size_t fftSize,
Chris@297 107 bool polar,
Chris@334 108 StorageAdviser::Criteria criteria,
Chris@297 109 size_t fillFromColumn)
Chris@297 110 {
Chris@297 111 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 112 // aggregate model is the same as the FFT model of the appropriate
Chris@297 113 // channel of whichever model that aggregate channel is drawn
Chris@297 114 // from. We should use that model here, in case we already have
Chris@297 115 // the data for it or will be wanting the same data again later.
Chris@297 116
Chris@297 117 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 118 // can't do this shortcut unless the aggregate model only has one
Chris@297 119 // channel or contains exactly all of the channels of a single
Chris@297 120 // other model. That isn't very likely -- if it were the case,
Chris@297 121 // why would we be using an aggregate model?
Chris@297 122
Chris@297 123 if (channel >= 0) {
Chris@297 124
Chris@297 125 const AggregateWaveModel *aggregate =
Chris@297 126 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 127
Chris@297 128 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 129
Chris@297 130 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 131 aggregate->getComponent(channel);
Chris@297 132
Chris@297 133 return getServer(spec.model,
Chris@297 134 spec.channel,
Chris@297 135 windowType,
Chris@297 136 windowSize,
Chris@297 137 windowIncrement,
Chris@297 138 fftSize,
Chris@297 139 polar,
Chris@334 140 criteria,
Chris@297 141 fillFromColumn);
Chris@297 142 }
Chris@297 143 }
Chris@297 144
Chris@297 145 // The normal case
Chris@297 146
Chris@297 147 return FFTDataServer::getFuzzyInstance(model,
Chris@297 148 channel,
Chris@297 149 windowType,
Chris@297 150 windowSize,
Chris@297 151 windowIncrement,
Chris@297 152 fftSize,
Chris@297 153 polar,
Chris@334 154 criteria,
Chris@297 155 fillFromColumn);
Chris@297 156 }
Chris@297 157
Chris@152 158 size_t
Chris@152 159 FFTModel::getSampleRate() const
Chris@152 160 {
Chris@152 161 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 162 }
Chris@152 163
Chris@152 164 void
Chris@182 165 FFTModel::getColumn(size_t x, Column &result) const
Chris@152 166 {
Chris@183 167 Profiler profiler("FFTModel::getColumn", false);
Chris@183 168
Chris@152 169 result.clear();
Chris@408 170 size_t h = getHeight();
Chris@408 171
Chris@408 172 float magnitudes[h];
Chris@500 173
Chris@408 174 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
Chris@500 175
Chris@408 176 for (size_t y = 0; y < h; ++y) {
Chris@500 177 result.push_back(magnitudes[y]);
Chris@408 178 }
Chris@500 179
Chris@408 180 } else {
Chris@408 181 for (size_t i = 0; i < h; ++i) result.push_back(0.f);
Chris@152 182 }
Chris@152 183 }
Chris@152 184
Chris@152 185 QString
Chris@152 186 FFTModel::getBinName(size_t n) const
Chris@152 187 {
Chris@152 188 size_t sr = getSampleRate();
Chris@152 189 if (!sr) return "";
Chris@204 190 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 191 return name;
Chris@152 192 }
Chris@152 193
Chris@275 194 bool
Chris@275 195 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 196 {
Chris@275 197 if (!isOK()) return false;
Chris@275 198
Chris@275 199 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 200
Chris@275 201 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 202 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 203
Chris@275 204 if (x+1 >= getWidth()) return false;
Chris@275 205
Chris@275 206 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 207 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 208 // At window size w, for bin b, f is b*sr/w.
Chris@275 209 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 210 // We need to know what phase shift we expect from h/sr sec.
Chris@275 211 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 212 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 213 // = 2pi * (h * b) / w.
Chris@275 214
Chris@275 215 float oldPhase = getPhaseAt(x, y);
Chris@275 216 float newPhase = getPhaseAt(x+1, y);
Chris@275 217
Chris@275 218 size_t incr = getResolution();
Chris@275 219
Chris@275 220 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 221
Chris@275 222 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 223
Chris@275 224 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 225
Chris@275 226 // The new frequency estimate based on the phase error resulting
Chris@275 227 // from assuming the "native" frequency of this bin
Chris@275 228
Chris@275 229 frequency =
Chris@275 230 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 231 (2 * M_PI * incr);
Chris@275 232
Chris@275 233 return true;
Chris@275 234 }
Chris@275 235
Chris@275 236 FFTModel::PeakLocationSet
Chris@275 237 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 238 {
Chris@275 239 FFTModel::PeakLocationSet peaks;
Chris@275 240 if (!isOK()) return peaks;
Chris@275 241
Chris@275 242 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 243 ymax = getHeight() - 1;
Chris@275 244 }
Chris@275 245
Chris@275 246 Column values;
Chris@275 247
Chris@275 248 if (type == AllPeaks) {
Chris@275 249 for (size_t y = ymin; y <= ymax; ++y) {
Chris@275 250 values.push_back(getMagnitudeAt(x, y));
Chris@275 251 }
Chris@275 252 size_t i = 0;
Chris@275 253 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@275 254 if ((i == 0 || values[i] > values[i-1]) &&
Chris@275 255 (i == values.size()-1 || values[i] >= values[i+1])) {
Chris@275 256 peaks.insert(bin);
Chris@275 257 }
Chris@275 258 ++i;
Chris@275 259 }
Chris@275 260 return peaks;
Chris@275 261 }
Chris@275 262
Chris@275 263 getColumn(x, values);
Chris@275 264
Chris@500 265 float mean = 0.f;
Chris@500 266 for (int i =0; i < values.size(); ++i) mean += values[i];
Chris@500 267 if (values.size() >0) mean /= values.size();
Chris@500 268
Chris@275 269 // For peak picking we use a moving median window, picking the
Chris@275 270 // highest value within each continuous region of values that
Chris@275 271 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 272 // size to a roughly constant pitch range (about four tones).
Chris@275 273
Chris@275 274 size_t sampleRate = getSampleRate();
Chris@275 275
Chris@275 276 std::deque<float> window;
Chris@275 277 std::vector<size_t> inrange;
Chris@280 278 float dist = 0.5;
Chris@500 279
Chris@280 280 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 281 size_t halfWin = medianWinSize/2;
Chris@275 282
Chris@275 283 size_t binmin;
Chris@275 284 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 285 else binmin = 0;
Chris@275 286
Chris@275 287 size_t binmax;
Chris@275 288 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 289 else binmax = values.size()-1;
Chris@275 290
Chris@500 291 size_t prevcentre = 0;
Chris@500 292
Chris@275 293 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 294
Chris@275 295 float value = values[bin];
Chris@275 296
Chris@275 297 window.push_back(value);
Chris@275 298
Chris@280 299 // so-called median will actually be the dist*100'th percentile
Chris@280 300 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 301 halfWin = medianWinSize/2;
Chris@275 302
Chris@500 303 while (window.size() > medianWinSize) {
Chris@500 304 window.pop_front();
Chris@500 305 }
Chris@500 306
Chris@500 307 size_t actualSize = window.size();
Chris@275 308
Chris@275 309 if (type == MajorPitchAdaptivePeaks) {
Chris@275 310 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 311 else binmax = values.size()-1;
Chris@275 312 }
Chris@275 313
Chris@275 314 std::deque<float> sorted(window);
Chris@275 315 std::sort(sorted.begin(), sorted.end());
Chris@280 316 float median = sorted[int(sorted.size() * dist)];
Chris@275 317
Chris@500 318 size_t centrebin = 0;
Chris@500 319 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 320
Chris@500 321 while (centrebin > prevcentre || bin == binmin) {
Chris@275 322
Chris@500 323 if (centrebin > prevcentre) ++prevcentre;
Chris@500 324
Chris@500 325 float centre = values[prevcentre];
Chris@500 326
Chris@500 327 if (centre > median) {
Chris@500 328 inrange.push_back(centrebin);
Chris@500 329 }
Chris@500 330
Chris@500 331 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 332 if (!inrange.empty()) {
Chris@500 333 size_t peakbin = 0;
Chris@500 334 float peakval = 0.f;
Chris@500 335 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@500 336 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 337 peakval = values[inrange[i]];
Chris@500 338 peakbin = inrange[i];
Chris@500 339 }
Chris@500 340 }
Chris@500 341 inrange.clear();
Chris@500 342 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 343 peaks.insert(peakbin);
Chris@275 344 }
Chris@275 345 }
Chris@275 346 }
Chris@500 347
Chris@500 348 if (bin == binmin) break;
Chris@275 349 }
Chris@275 350 }
Chris@275 351
Chris@275 352 return peaks;
Chris@275 353 }
Chris@275 354
Chris@275 355 size_t
Chris@280 356 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 357 size_t bin, float &percentile) const
Chris@275 358 {
Chris@280 359 percentile = 0.5;
Chris@275 360 if (type == MajorPeaks) return 10;
Chris@275 361 if (bin == 0) return 3;
Chris@280 362
Chris@275 363 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 364 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 365 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 366
Chris@275 367 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 368 int medianWinSize = hibin - bin;
Chris@275 369 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 370
Chris@280 371 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 372
Chris@275 373 return medianWinSize;
Chris@275 374 }
Chris@275 375
Chris@275 376 FFTModel::PeakSet
Chris@275 377 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 378 size_t ymin, size_t ymax)
Chris@275 379 {
Chris@275 380 PeakSet peaks;
Chris@275 381 if (!isOK()) return peaks;
Chris@275 382 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 383
Chris@275 384 size_t sampleRate = getSampleRate();
Chris@275 385 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 386 size_t incr = getResolution();
Chris@275 387
Chris@275 388 // This duplicates some of the work of estimateStableFrequency to
Chris@275 389 // allow us to retrieve the phases in two separate vertical
Chris@275 390 // columns, instead of jumping back and forth between columns x and
Chris@275 391 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 392
Chris@275 393 std::vector<float> phases;
Chris@275 394 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 395 i != locations.end(); ++i) {
Chris@275 396 phases.push_back(getPhaseAt(x, *i));
Chris@275 397 }
Chris@275 398
Chris@275 399 size_t phaseIndex = 0;
Chris@275 400 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 401 i != locations.end(); ++i) {
Chris@275 402 float oldPhase = phases[phaseIndex];
Chris@275 403 float newPhase = getPhaseAt(x+1, *i);
Chris@275 404 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 405 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 406 float frequency =
Chris@275 407 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 408 / (2 * M_PI * incr);
Chris@275 409 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 410 // if (stable)
Chris@275 411 peaks[*i] = frequency;
Chris@275 412 ++phaseIndex;
Chris@275 413 }
Chris@275 414
Chris@275 415 return peaks;
Chris@275 416 }
Chris@275 417
Chris@152 418 Model *
Chris@152 419 FFTModel::clone() const
Chris@152 420 {
Chris@152 421 return new FFTModel(*this);
Chris@152 422 }
Chris@152 423
Chris@152 424 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 425 DenseThreeDimensionalModel(),
Chris@152 426 m_server(model.m_server),
Chris@152 427 m_xshift(model.m_xshift),
Chris@152 428 m_yshift(model.m_yshift)
Chris@152 429 {
Chris@152 430 FFTDataServer::claimInstance(m_server);
Chris@152 431 }
Chris@152 432