annotate data/model/FFTModel.cpp @ 1008:d9e0e59a1581

When using an aggregate model to pass data to a transform, zero-pad the shorter input to the duration of the longer rather than truncating the longer. (This is better behaviour for e.g. MATCH, and in any case the code was previously truncating incorrectly and ending up with garbage data at the end.)
author Chris Cannam
date Fri, 14 Nov 2014 13:51:33 +0000
parents a8aed8a85e09
children cc27f35aa75c
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@402 23 #include <algorithm>
Chris@402 24
Chris@152 25 #include <cassert>
Chris@152 26
Chris@608 27 #ifndef __GNUC__
Chris@608 28 #include <alloca.h>
Chris@608 29 #endif
Chris@608 30
Chris@152 31 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 32 int channel,
Chris@152 33 WindowType windowType,
Chris@929 34 int windowSize,
Chris@929 35 int windowIncrement,
Chris@929 36 int fftSize,
Chris@152 37 bool polar,
Chris@334 38 StorageAdviser::Criteria criteria,
Chris@929 39 int fillFromColumn) :
Chris@152 40 //!!! ZoomConstraint!
Chris@152 41 m_server(0),
Chris@152 42 m_xshift(0),
Chris@152 43 m_yshift(0)
Chris@152 44 {
Chris@297 45 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 46
Chris@297 47 m_server = getServer(model,
Chris@297 48 channel,
Chris@297 49 windowType,
Chris@297 50 windowSize,
Chris@297 51 windowIncrement,
Chris@297 52 fftSize,
Chris@297 53 polar,
Chris@334 54 criteria,
Chris@297 55 fillFromColumn);
Chris@152 56
Chris@200 57 if (!m_server) return; // caller should check isOK()
Chris@200 58
Chris@929 59 int xratio = windowIncrement / m_server->getWindowIncrement();
Chris@929 60 int yratio = m_server->getFFTSize() / fftSize;
Chris@152 61
Chris@152 62 while (xratio > 1) {
Chris@152 63 if (xratio & 0x1) {
Chris@843 64 cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 65 << windowIncrement << " / "
Chris@152 66 << m_server->getWindowIncrement()
Chris@843 67 << " must be a power of two" << endl;
Chris@152 68 assert(!(xratio & 0x1));
Chris@152 69 }
Chris@152 70 ++m_xshift;
Chris@152 71 xratio >>= 1;
Chris@152 72 }
Chris@152 73
Chris@152 74 while (yratio > 1) {
Chris@152 75 if (yratio & 0x1) {
Chris@843 76 cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 77 << m_server->getFFTSize() << " / " << fftSize
Chris@843 78 << " must be a power of two" << endl;
Chris@152 79 assert(!(yratio & 0x1));
Chris@152 80 }
Chris@152 81 ++m_yshift;
Chris@152 82 yratio >>= 1;
Chris@152 83 }
Chris@152 84 }
Chris@152 85
Chris@152 86 FFTModel::~FFTModel()
Chris@152 87 {
Chris@200 88 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 89 }
Chris@152 90
Chris@360 91 void
Chris@360 92 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 93 {
Chris@360 94 if (m_sourceModel) {
Chris@843 95 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << endl;
Chris@362 96 if (m_server) {
Chris@362 97 FFTDataServer::releaseInstance(m_server);
Chris@362 98 m_server = 0;
Chris@362 99 }
Chris@360 100 FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
Chris@360 101 }
Chris@360 102 }
Chris@360 103
Chris@297 104 FFTDataServer *
Chris@297 105 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 106 int channel,
Chris@297 107 WindowType windowType,
Chris@929 108 int windowSize,
Chris@929 109 int windowIncrement,
Chris@929 110 int fftSize,
Chris@297 111 bool polar,
Chris@334 112 StorageAdviser::Criteria criteria,
Chris@929 113 int fillFromColumn)
Chris@297 114 {
Chris@297 115 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 116 // aggregate model is the same as the FFT model of the appropriate
Chris@297 117 // channel of whichever model that aggregate channel is drawn
Chris@297 118 // from. We should use that model here, in case we already have
Chris@297 119 // the data for it or will be wanting the same data again later.
Chris@297 120
Chris@297 121 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 122 // can't do this shortcut unless the aggregate model only has one
Chris@297 123 // channel or contains exactly all of the channels of a single
Chris@297 124 // other model. That isn't very likely -- if it were the case,
Chris@297 125 // why would we be using an aggregate model?
Chris@297 126
Chris@297 127 if (channel >= 0) {
Chris@297 128
Chris@297 129 const AggregateWaveModel *aggregate =
Chris@297 130 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 131
Chris@297 132 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 133
Chris@297 134 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 135 aggregate->getComponent(channel);
Chris@297 136
Chris@297 137 return getServer(spec.model,
Chris@297 138 spec.channel,
Chris@297 139 windowType,
Chris@297 140 windowSize,
Chris@297 141 windowIncrement,
Chris@297 142 fftSize,
Chris@297 143 polar,
Chris@334 144 criteria,
Chris@297 145 fillFromColumn);
Chris@297 146 }
Chris@297 147 }
Chris@297 148
Chris@297 149 // The normal case
Chris@297 150
Chris@297 151 return FFTDataServer::getFuzzyInstance(model,
Chris@297 152 channel,
Chris@297 153 windowType,
Chris@297 154 windowSize,
Chris@297 155 windowIncrement,
Chris@297 156 fftSize,
Chris@297 157 polar,
Chris@334 158 criteria,
Chris@297 159 fillFromColumn);
Chris@297 160 }
Chris@297 161
Chris@929 162 int
Chris@152 163 FFTModel::getSampleRate() const
Chris@152 164 {
Chris@152 165 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 166 }
Chris@152 167
Chris@533 168 FFTModel::Column
Chris@929 169 FFTModel::getColumn(int x) const
Chris@152 170 {
Chris@183 171 Profiler profiler("FFTModel::getColumn", false);
Chris@183 172
Chris@533 173 Column result;
Chris@533 174
Chris@152 175 result.clear();
Chris@929 176 int h = getHeight();
Chris@509 177 result.reserve(h);
Chris@408 178
Chris@608 179 #ifdef __GNUC__
Chris@408 180 float magnitudes[h];
Chris@608 181 #else
Chris@608 182 float *magnitudes = (float *)alloca(h * sizeof(float));
Chris@608 183 #endif
Chris@500 184
Chris@408 185 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
Chris@500 186
Chris@929 187 for (int y = 0; y < h; ++y) {
Chris@500 188 result.push_back(magnitudes[y]);
Chris@408 189 }
Chris@500 190
Chris@408 191 } else {
Chris@929 192 for (int i = 0; i < h; ++i) result.push_back(0.f);
Chris@152 193 }
Chris@533 194
Chris@533 195 return result;
Chris@152 196 }
Chris@152 197
Chris@152 198 QString
Chris@929 199 FFTModel::getBinName(int n) const
Chris@152 200 {
Chris@929 201 int sr = getSampleRate();
Chris@152 202 if (!sr) return "";
Chris@204 203 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 204 return name;
Chris@152 205 }
Chris@152 206
Chris@275 207 bool
Chris@929 208 FFTModel::estimateStableFrequency(int x, int y, float &frequency)
Chris@275 209 {
Chris@275 210 if (!isOK()) return false;
Chris@275 211
Chris@929 212 int sampleRate = m_server->getModel()->getSampleRate();
Chris@275 213
Chris@929 214 int fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 215 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 216
Chris@275 217 if (x+1 >= getWidth()) return false;
Chris@275 218
Chris@275 219 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 220 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 221 // At window size w, for bin b, f is b*sr/w.
Chris@275 222 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 223 // We need to know what phase shift we expect from h/sr sec.
Chris@275 224 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 225 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 226 // = 2pi * (h * b) / w.
Chris@275 227
Chris@275 228 float oldPhase = getPhaseAt(x, y);
Chris@275 229 float newPhase = getPhaseAt(x+1, y);
Chris@275 230
Chris@929 231 int incr = getResolution();
Chris@275 232
Chris@275 233 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 234
Chris@275 235 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 236
Chris@275 237 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 238
Chris@275 239 // The new frequency estimate based on the phase error resulting
Chris@275 240 // from assuming the "native" frequency of this bin
Chris@275 241
Chris@275 242 frequency =
Chris@275 243 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 244 (2 * M_PI * incr);
Chris@275 245
Chris@275 246 return true;
Chris@275 247 }
Chris@275 248
Chris@275 249 FFTModel::PeakLocationSet
Chris@929 250 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
Chris@275 251 {
Chris@551 252 Profiler profiler("FFTModel::getPeaks");
Chris@551 253
Chris@275 254 FFTModel::PeakLocationSet peaks;
Chris@275 255 if (!isOK()) return peaks;
Chris@275 256
Chris@275 257 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 258 ymax = getHeight() - 1;
Chris@275 259 }
Chris@275 260
Chris@275 261 if (type == AllPeaks) {
Chris@551 262 int minbin = ymin;
Chris@551 263 if (minbin > 0) minbin = minbin - 1;
Chris@551 264 int maxbin = ymax;
Chris@551 265 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
Chris@551 266 const int n = maxbin - minbin + 1;
Chris@608 267 #ifdef __GNUC__
Chris@551 268 float values[n];
Chris@608 269 #else
Chris@608 270 float *values = (float *)alloca(n * sizeof(float));
Chris@608 271 #endif
Chris@551 272 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
Chris@929 273 for (int bin = ymin; bin <= ymax; ++bin) {
Chris@551 274 if (bin == minbin || bin == maxbin) continue;
Chris@551 275 if (values[bin - minbin] > values[bin - minbin - 1] &&
Chris@551 276 values[bin - minbin] > values[bin - minbin + 1]) {
Chris@275 277 peaks.insert(bin);
Chris@275 278 }
Chris@275 279 }
Chris@275 280 return peaks;
Chris@275 281 }
Chris@275 282
Chris@551 283 Column values = getColumn(x);
Chris@275 284
Chris@500 285 float mean = 0.f;
Chris@551 286 for (int i = 0; i < values.size(); ++i) mean += values[i];
Chris@500 287 if (values.size() >0) mean /= values.size();
Chris@500 288
Chris@275 289 // For peak picking we use a moving median window, picking the
Chris@275 290 // highest value within each continuous region of values that
Chris@275 291 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 292 // size to a roughly constant pitch range (about four tones).
Chris@275 293
Chris@929 294 int sampleRate = getSampleRate();
Chris@275 295
Chris@275 296 std::deque<float> window;
Chris@929 297 std::vector<int> inrange;
Chris@280 298 float dist = 0.5;
Chris@500 299
Chris@929 300 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@929 301 int halfWin = medianWinSize/2;
Chris@275 302
Chris@929 303 int binmin;
Chris@275 304 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 305 else binmin = 0;
Chris@275 306
Chris@929 307 int binmax;
Chris@275 308 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 309 else binmax = values.size()-1;
Chris@275 310
Chris@929 311 int prevcentre = 0;
Chris@500 312
Chris@929 313 for (int bin = binmin; bin <= binmax; ++bin) {
Chris@275 314
Chris@275 315 float value = values[bin];
Chris@275 316
Chris@275 317 window.push_back(value);
Chris@275 318
Chris@280 319 // so-called median will actually be the dist*100'th percentile
Chris@280 320 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 321 halfWin = medianWinSize/2;
Chris@275 322
Chris@929 323 while ((int)window.size() > medianWinSize) {
Chris@500 324 window.pop_front();
Chris@500 325 }
Chris@500 326
Chris@929 327 int actualSize = window.size();
Chris@275 328
Chris@275 329 if (type == MajorPitchAdaptivePeaks) {
Chris@275 330 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 331 else binmax = values.size()-1;
Chris@275 332 }
Chris@275 333
Chris@275 334 std::deque<float> sorted(window);
Chris@275 335 std::sort(sorted.begin(), sorted.end());
Chris@280 336 float median = sorted[int(sorted.size() * dist)];
Chris@275 337
Chris@929 338 int centrebin = 0;
Chris@500 339 if (bin > actualSize/2) centrebin = bin - actualSize/2;
Chris@500 340
Chris@500 341 while (centrebin > prevcentre || bin == binmin) {
Chris@275 342
Chris@500 343 if (centrebin > prevcentre) ++prevcentre;
Chris@500 344
Chris@500 345 float centre = values[prevcentre];
Chris@500 346
Chris@500 347 if (centre > median) {
Chris@500 348 inrange.push_back(centrebin);
Chris@500 349 }
Chris@500 350
Chris@500 351 if (centre <= median || centrebin+1 == values.size()) {
Chris@500 352 if (!inrange.empty()) {
Chris@929 353 int peakbin = 0;
Chris@500 354 float peakval = 0.f;
Chris@929 355 for (int i = 0; i < (int)inrange.size(); ++i) {
Chris@500 356 if (i == 0 || values[inrange[i]] > peakval) {
Chris@500 357 peakval = values[inrange[i]];
Chris@500 358 peakbin = inrange[i];
Chris@500 359 }
Chris@500 360 }
Chris@500 361 inrange.clear();
Chris@500 362 if (peakbin >= ymin && peakbin <= ymax) {
Chris@500 363 peaks.insert(peakbin);
Chris@275 364 }
Chris@275 365 }
Chris@275 366 }
Chris@500 367
Chris@500 368 if (bin == binmin) break;
Chris@275 369 }
Chris@275 370 }
Chris@275 371
Chris@275 372 return peaks;
Chris@275 373 }
Chris@275 374
Chris@929 375 int
Chris@929 376 FFTModel::getPeakPickWindowSize(PeakPickType type, int sampleRate,
Chris@929 377 int bin, float &percentile) const
Chris@275 378 {
Chris@280 379 percentile = 0.5;
Chris@275 380 if (type == MajorPeaks) return 10;
Chris@275 381 if (bin == 0) return 3;
Chris@280 382
Chris@929 383 int fftSize = m_server->getFFTSize() >> m_yshift;
Chris@969 384 float binfreq = (float(sampleRate) * bin) / fftSize;
Chris@275 385 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 386
Chris@275 387 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 388 int medianWinSize = hibin - bin;
Chris@275 389 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 390
Chris@280 391 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 392
Chris@275 393 return medianWinSize;
Chris@275 394 }
Chris@275 395
Chris@275 396 FFTModel::PeakSet
Chris@929 397 FFTModel::getPeakFrequencies(PeakPickType type, int x,
Chris@929 398 int ymin, int ymax)
Chris@275 399 {
Chris@551 400 Profiler profiler("FFTModel::getPeakFrequencies");
Chris@551 401
Chris@275 402 PeakSet peaks;
Chris@275 403 if (!isOK()) return peaks;
Chris@275 404 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 405
Chris@929 406 int sampleRate = getSampleRate();
Chris@929 407 int fftSize = m_server->getFFTSize() >> m_yshift;
Chris@929 408 int incr = getResolution();
Chris@275 409
Chris@275 410 // This duplicates some of the work of estimateStableFrequency to
Chris@275 411 // allow us to retrieve the phases in two separate vertical
Chris@275 412 // columns, instead of jumping back and forth between columns x and
Chris@275 413 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 414
Chris@275 415 std::vector<float> phases;
Chris@275 416 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 417 i != locations.end(); ++i) {
Chris@275 418 phases.push_back(getPhaseAt(x, *i));
Chris@275 419 }
Chris@275 420
Chris@929 421 int phaseIndex = 0;
Chris@275 422 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 423 i != locations.end(); ++i) {
Chris@275 424 float oldPhase = phases[phaseIndex];
Chris@275 425 float newPhase = getPhaseAt(x+1, *i);
Chris@275 426 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 427 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 428 float frequency =
Chris@275 429 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 430 / (2 * M_PI * incr);
Chris@275 431 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 432 // if (stable)
Chris@275 433 peaks[*i] = frequency;
Chris@275 434 ++phaseIndex;
Chris@275 435 }
Chris@275 436
Chris@275 437 return peaks;
Chris@275 438 }
Chris@275 439
Chris@152 440 Model *
Chris@152 441 FFTModel::clone() const
Chris@152 442 {
Chris@152 443 return new FFTModel(*this);
Chris@152 444 }
Chris@152 445
Chris@152 446 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 447 DenseThreeDimensionalModel(),
Chris@152 448 m_server(model.m_server),
Chris@152 449 m_xshift(model.m_xshift),
Chris@152 450 m_yshift(model.m_yshift)
Chris@152 451 {
Chris@152 452 FFTDataServer::claimInstance(m_server);
Chris@152 453 }
Chris@152 454