annotate data/model/FFTModel.cpp @ 392:183ee2a55fc7

* More work to abstract out interactive components used in the data library, so that it does not need to depend on QtGui.
author Chris Cannam
date Fri, 14 Mar 2008 17:14:21 +0000
parents cc4eb32efc6c
children 7e1b7fcb6c00
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@152 23 #include <cassert>
Chris@152 24
Chris@152 25 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 26 int channel,
Chris@152 27 WindowType windowType,
Chris@152 28 size_t windowSize,
Chris@152 29 size_t windowIncrement,
Chris@152 30 size_t fftSize,
Chris@152 31 bool polar,
Chris@334 32 StorageAdviser::Criteria criteria,
Chris@152 33 size_t fillFromColumn) :
Chris@152 34 //!!! ZoomConstraint!
Chris@152 35 m_server(0),
Chris@152 36 m_xshift(0),
Chris@152 37 m_yshift(0)
Chris@152 38 {
Chris@297 39 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 40
Chris@297 41 m_server = getServer(model,
Chris@297 42 channel,
Chris@297 43 windowType,
Chris@297 44 windowSize,
Chris@297 45 windowIncrement,
Chris@297 46 fftSize,
Chris@297 47 polar,
Chris@334 48 criteria,
Chris@297 49 fillFromColumn);
Chris@152 50
Chris@200 51 if (!m_server) return; // caller should check isOK()
Chris@200 52
Chris@152 53 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 54 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 55
Chris@152 56 while (xratio > 1) {
Chris@152 57 if (xratio & 0x1) {
Chris@152 58 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 59 << windowIncrement << " / "
Chris@152 60 << m_server->getWindowIncrement()
Chris@152 61 << " must be a power of two" << std::endl;
Chris@152 62 assert(!(xratio & 0x1));
Chris@152 63 }
Chris@152 64 ++m_xshift;
Chris@152 65 xratio >>= 1;
Chris@152 66 }
Chris@152 67
Chris@152 68 while (yratio > 1) {
Chris@152 69 if (yratio & 0x1) {
Chris@152 70 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 71 << m_server->getFFTSize() << " / " << fftSize
Chris@152 72 << " must be a power of two" << std::endl;
Chris@152 73 assert(!(yratio & 0x1));
Chris@152 74 }
Chris@152 75 ++m_yshift;
Chris@152 76 yratio >>= 1;
Chris@152 77 }
Chris@152 78 }
Chris@152 79
Chris@152 80 FFTModel::~FFTModel()
Chris@152 81 {
Chris@200 82 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 83 }
Chris@152 84
Chris@360 85 void
Chris@360 86 FFTModel::sourceModelAboutToBeDeleted()
Chris@360 87 {
Chris@360 88 if (m_sourceModel) {
Chris@362 89 std::cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << std::endl;
Chris@362 90 if (m_server) {
Chris@362 91 FFTDataServer::releaseInstance(m_server);
Chris@362 92 m_server = 0;
Chris@362 93 }
Chris@360 94 FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
Chris@360 95 }
Chris@360 96 }
Chris@360 97
Chris@297 98 FFTDataServer *
Chris@297 99 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 100 int channel,
Chris@297 101 WindowType windowType,
Chris@297 102 size_t windowSize,
Chris@297 103 size_t windowIncrement,
Chris@297 104 size_t fftSize,
Chris@297 105 bool polar,
Chris@334 106 StorageAdviser::Criteria criteria,
Chris@297 107 size_t fillFromColumn)
Chris@297 108 {
Chris@297 109 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 110 // aggregate model is the same as the FFT model of the appropriate
Chris@297 111 // channel of whichever model that aggregate channel is drawn
Chris@297 112 // from. We should use that model here, in case we already have
Chris@297 113 // the data for it or will be wanting the same data again later.
Chris@297 114
Chris@297 115 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 116 // can't do this shortcut unless the aggregate model only has one
Chris@297 117 // channel or contains exactly all of the channels of a single
Chris@297 118 // other model. That isn't very likely -- if it were the case,
Chris@297 119 // why would we be using an aggregate model?
Chris@297 120
Chris@297 121 if (channel >= 0) {
Chris@297 122
Chris@297 123 const AggregateWaveModel *aggregate =
Chris@297 124 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 125
Chris@297 126 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 127
Chris@297 128 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 129 aggregate->getComponent(channel);
Chris@297 130
Chris@297 131 return getServer(spec.model,
Chris@297 132 spec.channel,
Chris@297 133 windowType,
Chris@297 134 windowSize,
Chris@297 135 windowIncrement,
Chris@297 136 fftSize,
Chris@297 137 polar,
Chris@334 138 criteria,
Chris@297 139 fillFromColumn);
Chris@297 140 }
Chris@297 141 }
Chris@297 142
Chris@297 143 // The normal case
Chris@297 144
Chris@297 145 return FFTDataServer::getFuzzyInstance(model,
Chris@297 146 channel,
Chris@297 147 windowType,
Chris@297 148 windowSize,
Chris@297 149 windowIncrement,
Chris@297 150 fftSize,
Chris@297 151 polar,
Chris@334 152 criteria,
Chris@297 153 fillFromColumn);
Chris@297 154 }
Chris@297 155
Chris@152 156 size_t
Chris@152 157 FFTModel::getSampleRate() const
Chris@152 158 {
Chris@152 159 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 160 }
Chris@152 161
Chris@152 162 void
Chris@182 163 FFTModel::getColumn(size_t x, Column &result) const
Chris@152 164 {
Chris@183 165 Profiler profiler("FFTModel::getColumn", false);
Chris@183 166
Chris@152 167 result.clear();
Chris@152 168 size_t height(getHeight());
Chris@152 169 for (size_t y = 0; y < height; ++y) {
Chris@152 170 result.push_back(const_cast<FFTModel *>(this)->getMagnitudeAt(x, y));
Chris@152 171 }
Chris@152 172 }
Chris@152 173
Chris@152 174 QString
Chris@152 175 FFTModel::getBinName(size_t n) const
Chris@152 176 {
Chris@152 177 size_t sr = getSampleRate();
Chris@152 178 if (!sr) return "";
Chris@204 179 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 180 return name;
Chris@152 181 }
Chris@152 182
Chris@275 183 bool
Chris@275 184 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 185 {
Chris@275 186 if (!isOK()) return false;
Chris@275 187
Chris@275 188 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 189
Chris@275 190 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 191 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 192
Chris@275 193 if (x+1 >= getWidth()) return false;
Chris@275 194
Chris@275 195 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 196 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 197 // At window size w, for bin b, f is b*sr/w.
Chris@275 198 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 199 // We need to know what phase shift we expect from h/sr sec.
Chris@275 200 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 201 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 202 // = 2pi * (h * b) / w.
Chris@275 203
Chris@275 204 float oldPhase = getPhaseAt(x, y);
Chris@275 205 float newPhase = getPhaseAt(x+1, y);
Chris@275 206
Chris@275 207 size_t incr = getResolution();
Chris@275 208
Chris@275 209 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 210
Chris@275 211 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 212
Chris@275 213 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 214
Chris@275 215 // The new frequency estimate based on the phase error resulting
Chris@275 216 // from assuming the "native" frequency of this bin
Chris@275 217
Chris@275 218 frequency =
Chris@275 219 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 220 (2 * M_PI * incr);
Chris@275 221
Chris@275 222 return true;
Chris@275 223 }
Chris@275 224
Chris@275 225 FFTModel::PeakLocationSet
Chris@275 226 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 227 {
Chris@275 228 FFTModel::PeakLocationSet peaks;
Chris@275 229 if (!isOK()) return peaks;
Chris@275 230
Chris@275 231 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 232 ymax = getHeight() - 1;
Chris@275 233 }
Chris@275 234
Chris@275 235 Column values;
Chris@275 236
Chris@275 237 if (type == AllPeaks) {
Chris@275 238 for (size_t y = ymin; y <= ymax; ++y) {
Chris@275 239 values.push_back(getMagnitudeAt(x, y));
Chris@275 240 }
Chris@275 241 size_t i = 0;
Chris@275 242 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@275 243 if ((i == 0 || values[i] > values[i-1]) &&
Chris@275 244 (i == values.size()-1 || values[i] >= values[i+1])) {
Chris@275 245 peaks.insert(bin);
Chris@275 246 }
Chris@275 247 ++i;
Chris@275 248 }
Chris@275 249 return peaks;
Chris@275 250 }
Chris@275 251
Chris@275 252 getColumn(x, values);
Chris@275 253
Chris@275 254 // For peak picking we use a moving median window, picking the
Chris@275 255 // highest value within each continuous region of values that
Chris@275 256 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 257 // size to a roughly constant pitch range (about four tones).
Chris@275 258
Chris@275 259 size_t sampleRate = getSampleRate();
Chris@275 260
Chris@275 261 std::deque<float> window;
Chris@275 262 std::vector<size_t> inrange;
Chris@280 263 float dist = 0.5;
Chris@280 264 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 265 size_t halfWin = medianWinSize/2;
Chris@275 266
Chris@275 267 size_t binmin;
Chris@275 268 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 269 else binmin = 0;
Chris@275 270
Chris@275 271 size_t binmax;
Chris@275 272 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 273 else binmax = values.size()-1;
Chris@275 274
Chris@275 275 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 276
Chris@275 277 float value = values[bin];
Chris@275 278
Chris@275 279 window.push_back(value);
Chris@275 280
Chris@280 281 // so-called median will actually be the dist*100'th percentile
Chris@280 282 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 283 halfWin = medianWinSize/2;
Chris@275 284
Chris@275 285 while (window.size() > medianWinSize) window.pop_front();
Chris@275 286
Chris@275 287 if (type == MajorPitchAdaptivePeaks) {
Chris@275 288 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 289 else binmax = values.size()-1;
Chris@275 290 }
Chris@275 291
Chris@275 292 std::deque<float> sorted(window);
Chris@275 293 std::sort(sorted.begin(), sorted.end());
Chris@280 294 float median = sorted[int(sorted.size() * dist)];
Chris@275 295
Chris@275 296 if (value > median) {
Chris@275 297 inrange.push_back(bin);
Chris@275 298 }
Chris@275 299
Chris@275 300 if (value <= median || bin+1 == values.size()) {
Chris@275 301 size_t peakbin = 0;
Chris@275 302 float peakval = 0.f;
Chris@275 303 if (!inrange.empty()) {
Chris@275 304 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@275 305 if (i == 0 || values[inrange[i]] > peakval) {
Chris@275 306 peakval = values[inrange[i]];
Chris@275 307 peakbin = inrange[i];
Chris@275 308 }
Chris@275 309 }
Chris@275 310 inrange.clear();
Chris@275 311 if (peakbin >= ymin && peakbin <= ymax) {
Chris@275 312 peaks.insert(peakbin);
Chris@275 313 }
Chris@275 314 }
Chris@275 315 }
Chris@275 316 }
Chris@275 317
Chris@275 318 return peaks;
Chris@275 319 }
Chris@275 320
Chris@275 321 size_t
Chris@280 322 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 323 size_t bin, float &percentile) const
Chris@275 324 {
Chris@280 325 percentile = 0.5;
Chris@275 326 if (type == MajorPeaks) return 10;
Chris@275 327 if (bin == 0) return 3;
Chris@280 328
Chris@275 329 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 330 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 331 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 332
Chris@275 333 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 334 int medianWinSize = hibin - bin;
Chris@275 335 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 336
Chris@280 337 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 338
Chris@275 339 return medianWinSize;
Chris@275 340 }
Chris@275 341
Chris@275 342 FFTModel::PeakSet
Chris@275 343 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 344 size_t ymin, size_t ymax)
Chris@275 345 {
Chris@275 346 PeakSet peaks;
Chris@275 347 if (!isOK()) return peaks;
Chris@275 348 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 349
Chris@275 350 size_t sampleRate = getSampleRate();
Chris@275 351 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 352 size_t incr = getResolution();
Chris@275 353
Chris@275 354 // This duplicates some of the work of estimateStableFrequency to
Chris@275 355 // allow us to retrieve the phases in two separate vertical
Chris@275 356 // columns, instead of jumping back and forth between columns x and
Chris@275 357 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 358
Chris@275 359 std::vector<float> phases;
Chris@275 360 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 361 i != locations.end(); ++i) {
Chris@275 362 phases.push_back(getPhaseAt(x, *i));
Chris@275 363 }
Chris@275 364
Chris@275 365 size_t phaseIndex = 0;
Chris@275 366 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 367 i != locations.end(); ++i) {
Chris@275 368 float oldPhase = phases[phaseIndex];
Chris@275 369 float newPhase = getPhaseAt(x+1, *i);
Chris@275 370 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 371 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 372 float frequency =
Chris@275 373 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 374 / (2 * M_PI * incr);
Chris@275 375 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 376 // if (stable)
Chris@275 377 peaks[*i] = frequency;
Chris@275 378 ++phaseIndex;
Chris@275 379 }
Chris@275 380
Chris@275 381 return peaks;
Chris@275 382 }
Chris@275 383
Chris@152 384 Model *
Chris@152 385 FFTModel::clone() const
Chris@152 386 {
Chris@152 387 return new FFTModel(*this);
Chris@152 388 }
Chris@152 389
Chris@152 390 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 391 DenseThreeDimensionalModel(),
Chris@152 392 m_server(model.m_server),
Chris@152 393 m_xshift(model.m_xshift),
Chris@152 394 m_yshift(model.m_yshift)
Chris@152 395 {
Chris@152 396 FFTDataServer::claimInstance(m_server);
Chris@152 397 }
Chris@152 398