annotate data/model/FFTModel.cpp @ 335:02d2ad95ea52 spectrogram-cache-rejig

* Get storage advice for each cache in an FFT data server. Allows us to be more confident about the actual memory situation and cut over from memory to disc part way through an FFT calculation if necessary. StorageAdviser is now a bit too optimistic though (it's too keen to allocate large numbers of small blocks in memory).
author Chris Cannam
date Tue, 13 Nov 2007 13:54:10 +0000
parents aa8dbac62024
children ac300d385ab2 94fc0591ea43
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@152 23 #include <cassert>
Chris@152 24
Chris@152 25 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 26 int channel,
Chris@152 27 WindowType windowType,
Chris@152 28 size_t windowSize,
Chris@152 29 size_t windowIncrement,
Chris@152 30 size_t fftSize,
Chris@152 31 bool polar,
Chris@334 32 StorageAdviser::Criteria criteria,
Chris@152 33 size_t fillFromColumn) :
Chris@152 34 //!!! ZoomConstraint!
Chris@152 35 m_server(0),
Chris@152 36 m_xshift(0),
Chris@152 37 m_yshift(0)
Chris@152 38 {
Chris@297 39 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 40
Chris@297 41 m_server = getServer(model,
Chris@297 42 channel,
Chris@297 43 windowType,
Chris@297 44 windowSize,
Chris@297 45 windowIncrement,
Chris@297 46 fftSize,
Chris@297 47 polar,
Chris@334 48 criteria,
Chris@297 49 fillFromColumn);
Chris@152 50
Chris@200 51 if (!m_server) return; // caller should check isOK()
Chris@200 52
Chris@152 53 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 54 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 55
Chris@152 56 while (xratio > 1) {
Chris@152 57 if (xratio & 0x1) {
Chris@152 58 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 59 << windowIncrement << " / "
Chris@152 60 << m_server->getWindowIncrement()
Chris@152 61 << " must be a power of two" << std::endl;
Chris@152 62 assert(!(xratio & 0x1));
Chris@152 63 }
Chris@152 64 ++m_xshift;
Chris@152 65 xratio >>= 1;
Chris@152 66 }
Chris@152 67
Chris@152 68 while (yratio > 1) {
Chris@152 69 if (yratio & 0x1) {
Chris@152 70 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 71 << m_server->getFFTSize() << " / " << fftSize
Chris@152 72 << " must be a power of two" << std::endl;
Chris@152 73 assert(!(yratio & 0x1));
Chris@152 74 }
Chris@152 75 ++m_yshift;
Chris@152 76 yratio >>= 1;
Chris@152 77 }
Chris@152 78 }
Chris@152 79
Chris@152 80 FFTModel::~FFTModel()
Chris@152 81 {
Chris@200 82 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 83 }
Chris@152 84
Chris@297 85 FFTDataServer *
Chris@297 86 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 87 int channel,
Chris@297 88 WindowType windowType,
Chris@297 89 size_t windowSize,
Chris@297 90 size_t windowIncrement,
Chris@297 91 size_t fftSize,
Chris@297 92 bool polar,
Chris@334 93 StorageAdviser::Criteria criteria,
Chris@297 94 size_t fillFromColumn)
Chris@297 95 {
Chris@297 96 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 97 // aggregate model is the same as the FFT model of the appropriate
Chris@297 98 // channel of whichever model that aggregate channel is drawn
Chris@297 99 // from. We should use that model here, in case we already have
Chris@297 100 // the data for it or will be wanting the same data again later.
Chris@297 101
Chris@297 102 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 103 // can't do this shortcut unless the aggregate model only has one
Chris@297 104 // channel or contains exactly all of the channels of a single
Chris@297 105 // other model. That isn't very likely -- if it were the case,
Chris@297 106 // why would we be using an aggregate model?
Chris@297 107
Chris@297 108 if (channel >= 0) {
Chris@297 109
Chris@297 110 const AggregateWaveModel *aggregate =
Chris@297 111 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 112
Chris@297 113 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 114
Chris@297 115 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 116 aggregate->getComponent(channel);
Chris@297 117
Chris@297 118 return getServer(spec.model,
Chris@297 119 spec.channel,
Chris@297 120 windowType,
Chris@297 121 windowSize,
Chris@297 122 windowIncrement,
Chris@297 123 fftSize,
Chris@297 124 polar,
Chris@334 125 criteria,
Chris@297 126 fillFromColumn);
Chris@297 127 }
Chris@297 128 }
Chris@297 129
Chris@297 130 // The normal case
Chris@297 131
Chris@297 132 return FFTDataServer::getFuzzyInstance(model,
Chris@297 133 channel,
Chris@297 134 windowType,
Chris@297 135 windowSize,
Chris@297 136 windowIncrement,
Chris@297 137 fftSize,
Chris@297 138 polar,
Chris@334 139 criteria,
Chris@297 140 fillFromColumn);
Chris@297 141 }
Chris@297 142
Chris@152 143 size_t
Chris@152 144 FFTModel::getSampleRate() const
Chris@152 145 {
Chris@152 146 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 147 }
Chris@152 148
Chris@152 149 void
Chris@182 150 FFTModel::getColumn(size_t x, Column &result) const
Chris@152 151 {
Chris@183 152 Profiler profiler("FFTModel::getColumn", false);
Chris@183 153
Chris@152 154 result.clear();
Chris@152 155 size_t height(getHeight());
Chris@152 156 for (size_t y = 0; y < height; ++y) {
Chris@152 157 result.push_back(const_cast<FFTModel *>(this)->getMagnitudeAt(x, y));
Chris@152 158 }
Chris@152 159 }
Chris@152 160
Chris@152 161 QString
Chris@152 162 FFTModel::getBinName(size_t n) const
Chris@152 163 {
Chris@152 164 size_t sr = getSampleRate();
Chris@152 165 if (!sr) return "";
Chris@204 166 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 167 return name;
Chris@152 168 }
Chris@152 169
Chris@275 170 bool
Chris@275 171 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 172 {
Chris@275 173 if (!isOK()) return false;
Chris@275 174
Chris@275 175 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 176
Chris@275 177 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 178 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 179
Chris@275 180 if (x+1 >= getWidth()) return false;
Chris@275 181
Chris@275 182 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 183 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 184 // At window size w, for bin b, f is b*sr/w.
Chris@275 185 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 186 // We need to know what phase shift we expect from h/sr sec.
Chris@275 187 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 188 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 189 // = 2pi * (h * b) / w.
Chris@275 190
Chris@275 191 float oldPhase = getPhaseAt(x, y);
Chris@275 192 float newPhase = getPhaseAt(x+1, y);
Chris@275 193
Chris@275 194 size_t incr = getResolution();
Chris@275 195
Chris@275 196 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 197
Chris@275 198 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 199
Chris@275 200 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 201
Chris@275 202 // The new frequency estimate based on the phase error resulting
Chris@275 203 // from assuming the "native" frequency of this bin
Chris@275 204
Chris@275 205 frequency =
Chris@275 206 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 207 (2 * M_PI * incr);
Chris@275 208
Chris@275 209 return true;
Chris@275 210 }
Chris@275 211
Chris@275 212 FFTModel::PeakLocationSet
Chris@275 213 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 214 {
Chris@275 215 FFTModel::PeakLocationSet peaks;
Chris@275 216 if (!isOK()) return peaks;
Chris@275 217
Chris@275 218 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 219 ymax = getHeight() - 1;
Chris@275 220 }
Chris@275 221
Chris@275 222 Column values;
Chris@275 223
Chris@275 224 if (type == AllPeaks) {
Chris@275 225 for (size_t y = ymin; y <= ymax; ++y) {
Chris@275 226 values.push_back(getMagnitudeAt(x, y));
Chris@275 227 }
Chris@275 228 size_t i = 0;
Chris@275 229 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@275 230 if ((i == 0 || values[i] > values[i-1]) &&
Chris@275 231 (i == values.size()-1 || values[i] >= values[i+1])) {
Chris@275 232 peaks.insert(bin);
Chris@275 233 }
Chris@275 234 ++i;
Chris@275 235 }
Chris@275 236 return peaks;
Chris@275 237 }
Chris@275 238
Chris@275 239 getColumn(x, values);
Chris@275 240
Chris@275 241 // For peak picking we use a moving median window, picking the
Chris@275 242 // highest value within each continuous region of values that
Chris@275 243 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 244 // size to a roughly constant pitch range (about four tones).
Chris@275 245
Chris@275 246 size_t sampleRate = getSampleRate();
Chris@275 247
Chris@275 248 std::deque<float> window;
Chris@275 249 std::vector<size_t> inrange;
Chris@280 250 float dist = 0.5;
Chris@280 251 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 252 size_t halfWin = medianWinSize/2;
Chris@275 253
Chris@275 254 size_t binmin;
Chris@275 255 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 256 else binmin = 0;
Chris@275 257
Chris@275 258 size_t binmax;
Chris@275 259 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 260 else binmax = values.size()-1;
Chris@275 261
Chris@275 262 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 263
Chris@275 264 float value = values[bin];
Chris@275 265
Chris@275 266 window.push_back(value);
Chris@275 267
Chris@280 268 // so-called median will actually be the dist*100'th percentile
Chris@280 269 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 270 halfWin = medianWinSize/2;
Chris@275 271
Chris@275 272 while (window.size() > medianWinSize) window.pop_front();
Chris@275 273
Chris@275 274 if (type == MajorPitchAdaptivePeaks) {
Chris@275 275 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 276 else binmax = values.size()-1;
Chris@275 277 }
Chris@275 278
Chris@275 279 std::deque<float> sorted(window);
Chris@275 280 std::sort(sorted.begin(), sorted.end());
Chris@280 281 float median = sorted[int(sorted.size() * dist)];
Chris@275 282
Chris@275 283 if (value > median) {
Chris@275 284 inrange.push_back(bin);
Chris@275 285 }
Chris@275 286
Chris@275 287 if (value <= median || bin+1 == values.size()) {
Chris@275 288 size_t peakbin = 0;
Chris@275 289 float peakval = 0.f;
Chris@275 290 if (!inrange.empty()) {
Chris@275 291 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@275 292 if (i == 0 || values[inrange[i]] > peakval) {
Chris@275 293 peakval = values[inrange[i]];
Chris@275 294 peakbin = inrange[i];
Chris@275 295 }
Chris@275 296 }
Chris@275 297 inrange.clear();
Chris@275 298 if (peakbin >= ymin && peakbin <= ymax) {
Chris@275 299 peaks.insert(peakbin);
Chris@275 300 }
Chris@275 301 }
Chris@275 302 }
Chris@275 303 }
Chris@275 304
Chris@275 305 return peaks;
Chris@275 306 }
Chris@275 307
Chris@275 308 size_t
Chris@280 309 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 310 size_t bin, float &percentile) const
Chris@275 311 {
Chris@280 312 percentile = 0.5;
Chris@275 313 if (type == MajorPeaks) return 10;
Chris@275 314 if (bin == 0) return 3;
Chris@280 315
Chris@275 316 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 317 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 318 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 319
Chris@275 320 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 321 int medianWinSize = hibin - bin;
Chris@275 322 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 323
Chris@280 324 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 325
Chris@275 326 return medianWinSize;
Chris@275 327 }
Chris@275 328
Chris@275 329 FFTModel::PeakSet
Chris@275 330 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 331 size_t ymin, size_t ymax)
Chris@275 332 {
Chris@275 333 PeakSet peaks;
Chris@275 334 if (!isOK()) return peaks;
Chris@275 335 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 336
Chris@275 337 size_t sampleRate = getSampleRate();
Chris@275 338 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 339 size_t incr = getResolution();
Chris@275 340
Chris@275 341 // This duplicates some of the work of estimateStableFrequency to
Chris@275 342 // allow us to retrieve the phases in two separate vertical
Chris@275 343 // columns, instead of jumping back and forth between columns x and
Chris@275 344 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 345
Chris@275 346 std::vector<float> phases;
Chris@275 347 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 348 i != locations.end(); ++i) {
Chris@275 349 phases.push_back(getPhaseAt(x, *i));
Chris@275 350 }
Chris@275 351
Chris@275 352 size_t phaseIndex = 0;
Chris@275 353 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 354 i != locations.end(); ++i) {
Chris@275 355 float oldPhase = phases[phaseIndex];
Chris@275 356 float newPhase = getPhaseAt(x+1, *i);
Chris@275 357 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 358 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 359 float frequency =
Chris@275 360 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 361 / (2 * M_PI * incr);
Chris@275 362 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 363 // if (stable)
Chris@275 364 peaks[*i] = frequency;
Chris@275 365 ++phaseIndex;
Chris@275 366 }
Chris@275 367
Chris@275 368 return peaks;
Chris@275 369 }
Chris@275 370
Chris@152 371 Model *
Chris@152 372 FFTModel::clone() const
Chris@152 373 {
Chris@152 374 return new FFTModel(*this);
Chris@152 375 }
Chris@152 376
Chris@152 377 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 378 DenseThreeDimensionalModel(),
Chris@152 379 m_server(model.m_server),
Chris@152 380 m_xshift(model.m_xshift),
Chris@152 381 m_yshift(model.m_yshift)
Chris@152 382 {
Chris@152 383 FFTDataServer::claimInstance(m_server);
Chris@152 384 }
Chris@152 385