annotate data/model/FFTModel.cpp @ 308:14e0f60435b8

* Make it possible to drop audio files, layer files, session files and images onto SV panes. Need to do a bit more work on where we expect the dropped file to go, particularly in the case of audio files -- at the moment they're always opened in new panes, but it may be better to by default replace whatever is in the target pane.
author Chris Cannam
date Wed, 10 Oct 2007 15:18:02 +0000
parents c022976d18e8
children aa8dbac62024
rev   line source
Chris@152 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@152 2
Chris@152 3 /*
Chris@152 4 Sonic Visualiser
Chris@152 5 An audio file viewer and annotation editor.
Chris@152 6 Centre for Digital Music, Queen Mary, University of London.
Chris@152 7 This file copyright 2006 Chris Cannam.
Chris@152 8
Chris@152 9 This program is free software; you can redistribute it and/or
Chris@152 10 modify it under the terms of the GNU General Public License as
Chris@152 11 published by the Free Software Foundation; either version 2 of the
Chris@152 12 License, or (at your option) any later version. See the file
Chris@152 13 COPYING included with this distribution for more information.
Chris@152 14 */
Chris@152 15
Chris@152 16 #include "FFTModel.h"
Chris@152 17 #include "DenseTimeValueModel.h"
Chris@297 18 #include "AggregateWaveModel.h"
Chris@152 19
Chris@183 20 #include "base/Profiler.h"
Chris@275 21 #include "base/Pitch.h"
Chris@183 22
Chris@152 23 #include <cassert>
Chris@152 24
Chris@152 25 FFTModel::FFTModel(const DenseTimeValueModel *model,
Chris@152 26 int channel,
Chris@152 27 WindowType windowType,
Chris@152 28 size_t windowSize,
Chris@152 29 size_t windowIncrement,
Chris@152 30 size_t fftSize,
Chris@152 31 bool polar,
Chris@152 32 size_t fillFromColumn) :
Chris@152 33 //!!! ZoomConstraint!
Chris@152 34 m_server(0),
Chris@152 35 m_xshift(0),
Chris@152 36 m_yshift(0)
Chris@152 37 {
Chris@297 38 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm.
Chris@297 39
Chris@297 40 m_server = getServer(model,
Chris@297 41 channel,
Chris@297 42 windowType,
Chris@297 43 windowSize,
Chris@297 44 windowIncrement,
Chris@297 45 fftSize,
Chris@297 46 polar,
Chris@297 47 fillFromColumn);
Chris@152 48
Chris@200 49 if (!m_server) return; // caller should check isOK()
Chris@200 50
Chris@152 51 size_t xratio = windowIncrement / m_server->getWindowIncrement();
Chris@152 52 size_t yratio = m_server->getFFTSize() / fftSize;
Chris@152 53
Chris@152 54 while (xratio > 1) {
Chris@152 55 if (xratio & 0x1) {
Chris@152 56 std::cerr << "ERROR: FFTModel: Window increment ratio "
Chris@152 57 << windowIncrement << " / "
Chris@152 58 << m_server->getWindowIncrement()
Chris@152 59 << " must be a power of two" << std::endl;
Chris@152 60 assert(!(xratio & 0x1));
Chris@152 61 }
Chris@152 62 ++m_xshift;
Chris@152 63 xratio >>= 1;
Chris@152 64 }
Chris@152 65
Chris@152 66 while (yratio > 1) {
Chris@152 67 if (yratio & 0x1) {
Chris@152 68 std::cerr << "ERROR: FFTModel: FFT size ratio "
Chris@152 69 << m_server->getFFTSize() << " / " << fftSize
Chris@152 70 << " must be a power of two" << std::endl;
Chris@152 71 assert(!(yratio & 0x1));
Chris@152 72 }
Chris@152 73 ++m_yshift;
Chris@152 74 yratio >>= 1;
Chris@152 75 }
Chris@152 76 }
Chris@152 77
Chris@152 78 FFTModel::~FFTModel()
Chris@152 79 {
Chris@200 80 if (m_server) FFTDataServer::releaseInstance(m_server);
Chris@152 81 }
Chris@152 82
Chris@297 83 FFTDataServer *
Chris@297 84 FFTModel::getServer(const DenseTimeValueModel *model,
Chris@297 85 int channel,
Chris@297 86 WindowType windowType,
Chris@297 87 size_t windowSize,
Chris@297 88 size_t windowIncrement,
Chris@297 89 size_t fftSize,
Chris@297 90 bool polar,
Chris@297 91 size_t fillFromColumn)
Chris@297 92 {
Chris@297 93 // Obviously, an FFT model of channel C (where C != -1) of an
Chris@297 94 // aggregate model is the same as the FFT model of the appropriate
Chris@297 95 // channel of whichever model that aggregate channel is drawn
Chris@297 96 // from. We should use that model here, in case we already have
Chris@297 97 // the data for it or will be wanting the same data again later.
Chris@297 98
Chris@297 99 // If the channel is -1 (i.e. mixture of all channels), then we
Chris@297 100 // can't do this shortcut unless the aggregate model only has one
Chris@297 101 // channel or contains exactly all of the channels of a single
Chris@297 102 // other model. That isn't very likely -- if it were the case,
Chris@297 103 // why would we be using an aggregate model?
Chris@297 104
Chris@297 105 if (channel >= 0) {
Chris@297 106
Chris@297 107 const AggregateWaveModel *aggregate =
Chris@297 108 dynamic_cast<const AggregateWaveModel *>(model);
Chris@297 109
Chris@297 110 if (aggregate && channel < aggregate->getComponentCount()) {
Chris@297 111
Chris@297 112 AggregateWaveModel::ModelChannelSpec spec =
Chris@297 113 aggregate->getComponent(channel);
Chris@297 114
Chris@297 115 return getServer(spec.model,
Chris@297 116 spec.channel,
Chris@297 117 windowType,
Chris@297 118 windowSize,
Chris@297 119 windowIncrement,
Chris@297 120 fftSize,
Chris@297 121 polar,
Chris@297 122 fillFromColumn);
Chris@297 123 }
Chris@297 124 }
Chris@297 125
Chris@297 126 // The normal case
Chris@297 127
Chris@297 128 return FFTDataServer::getFuzzyInstance(model,
Chris@297 129 channel,
Chris@297 130 windowType,
Chris@297 131 windowSize,
Chris@297 132 windowIncrement,
Chris@297 133 fftSize,
Chris@297 134 polar,
Chris@297 135 fillFromColumn);
Chris@297 136 }
Chris@297 137
Chris@152 138 size_t
Chris@152 139 FFTModel::getSampleRate() const
Chris@152 140 {
Chris@152 141 return isOK() ? m_server->getModel()->getSampleRate() : 0;
Chris@152 142 }
Chris@152 143
Chris@152 144 void
Chris@182 145 FFTModel::getColumn(size_t x, Column &result) const
Chris@152 146 {
Chris@183 147 Profiler profiler("FFTModel::getColumn", false);
Chris@183 148
Chris@152 149 result.clear();
Chris@152 150 size_t height(getHeight());
Chris@152 151 for (size_t y = 0; y < height; ++y) {
Chris@152 152 result.push_back(const_cast<FFTModel *>(this)->getMagnitudeAt(x, y));
Chris@152 153 }
Chris@152 154 }
Chris@152 155
Chris@152 156 QString
Chris@152 157 FFTModel::getBinName(size_t n) const
Chris@152 158 {
Chris@152 159 size_t sr = getSampleRate();
Chris@152 160 if (!sr) return "";
Chris@204 161 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
Chris@152 162 return name;
Chris@152 163 }
Chris@152 164
Chris@275 165 bool
Chris@275 166 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
Chris@275 167 {
Chris@275 168 if (!isOK()) return false;
Chris@275 169
Chris@275 170 size_t sampleRate = m_server->getModel()->getSampleRate();
Chris@275 171
Chris@275 172 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 173 frequency = (float(y) * sampleRate) / fftSize;
Chris@275 174
Chris@275 175 if (x+1 >= getWidth()) return false;
Chris@275 176
Chris@275 177 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
Chris@275 178 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
Chris@275 179 // At window size w, for bin b, f is b*sr/w.
Chris@275 180 // thus 2pi phase shift happens in w/(b*sr) sec.
Chris@275 181 // We need to know what phase shift we expect from h/sr sec.
Chris@275 182 // -> 2pi * ((h/sr) / (w/(b*sr)))
Chris@275 183 // = 2pi * ((h * b * sr) / (w * sr))
Chris@275 184 // = 2pi * (h * b) / w.
Chris@275 185
Chris@275 186 float oldPhase = getPhaseAt(x, y);
Chris@275 187 float newPhase = getPhaseAt(x+1, y);
Chris@275 188
Chris@275 189 size_t incr = getResolution();
Chris@275 190
Chris@275 191 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
Chris@275 192
Chris@275 193 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 194
Chris@275 195 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
Chris@275 196
Chris@275 197 // The new frequency estimate based on the phase error resulting
Chris@275 198 // from assuming the "native" frequency of this bin
Chris@275 199
Chris@275 200 frequency =
Chris@275 201 (sampleRate * (expectedPhase + phaseError - oldPhase)) /
Chris@275 202 (2 * M_PI * incr);
Chris@275 203
Chris@275 204 return true;
Chris@275 205 }
Chris@275 206
Chris@275 207 FFTModel::PeakLocationSet
Chris@275 208 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
Chris@275 209 {
Chris@275 210 FFTModel::PeakLocationSet peaks;
Chris@275 211 if (!isOK()) return peaks;
Chris@275 212
Chris@275 213 if (ymax == 0 || ymax > getHeight() - 1) {
Chris@275 214 ymax = getHeight() - 1;
Chris@275 215 }
Chris@275 216
Chris@275 217 Column values;
Chris@275 218
Chris@275 219 if (type == AllPeaks) {
Chris@275 220 for (size_t y = ymin; y <= ymax; ++y) {
Chris@275 221 values.push_back(getMagnitudeAt(x, y));
Chris@275 222 }
Chris@275 223 size_t i = 0;
Chris@275 224 for (size_t bin = ymin; bin <= ymax; ++bin) {
Chris@275 225 if ((i == 0 || values[i] > values[i-1]) &&
Chris@275 226 (i == values.size()-1 || values[i] >= values[i+1])) {
Chris@275 227 peaks.insert(bin);
Chris@275 228 }
Chris@275 229 ++i;
Chris@275 230 }
Chris@275 231 return peaks;
Chris@275 232 }
Chris@275 233
Chris@275 234 getColumn(x, values);
Chris@275 235
Chris@275 236 // For peak picking we use a moving median window, picking the
Chris@275 237 // highest value within each continuous region of values that
Chris@275 238 // exceed the median. For pitch adaptivity, we adjust the window
Chris@275 239 // size to a roughly constant pitch range (about four tones).
Chris@275 240
Chris@275 241 size_t sampleRate = getSampleRate();
Chris@275 242
Chris@275 243 std::deque<float> window;
Chris@275 244 std::vector<size_t> inrange;
Chris@280 245 float dist = 0.5;
Chris@280 246 size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
Chris@275 247 size_t halfWin = medianWinSize/2;
Chris@275 248
Chris@275 249 size_t binmin;
Chris@275 250 if (ymin > halfWin) binmin = ymin - halfWin;
Chris@275 251 else binmin = 0;
Chris@275 252
Chris@275 253 size_t binmax;
Chris@275 254 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 255 else binmax = values.size()-1;
Chris@275 256
Chris@275 257 for (size_t bin = binmin; bin <= binmax; ++bin) {
Chris@275 258
Chris@275 259 float value = values[bin];
Chris@275 260
Chris@275 261 window.push_back(value);
Chris@275 262
Chris@280 263 // so-called median will actually be the dist*100'th percentile
Chris@280 264 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
Chris@275 265 halfWin = medianWinSize/2;
Chris@275 266
Chris@275 267 while (window.size() > medianWinSize) window.pop_front();
Chris@275 268
Chris@275 269 if (type == MajorPitchAdaptivePeaks) {
Chris@275 270 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
Chris@275 271 else binmax = values.size()-1;
Chris@275 272 }
Chris@275 273
Chris@275 274 std::deque<float> sorted(window);
Chris@275 275 std::sort(sorted.begin(), sorted.end());
Chris@280 276 float median = sorted[int(sorted.size() * dist)];
Chris@275 277
Chris@275 278 if (value > median) {
Chris@275 279 inrange.push_back(bin);
Chris@275 280 }
Chris@275 281
Chris@275 282 if (value <= median || bin+1 == values.size()) {
Chris@275 283 size_t peakbin = 0;
Chris@275 284 float peakval = 0.f;
Chris@275 285 if (!inrange.empty()) {
Chris@275 286 for (size_t i = 0; i < inrange.size(); ++i) {
Chris@275 287 if (i == 0 || values[inrange[i]] > peakval) {
Chris@275 288 peakval = values[inrange[i]];
Chris@275 289 peakbin = inrange[i];
Chris@275 290 }
Chris@275 291 }
Chris@275 292 inrange.clear();
Chris@275 293 if (peakbin >= ymin && peakbin <= ymax) {
Chris@275 294 peaks.insert(peakbin);
Chris@275 295 }
Chris@275 296 }
Chris@275 297 }
Chris@275 298 }
Chris@275 299
Chris@275 300 return peaks;
Chris@275 301 }
Chris@275 302
Chris@275 303 size_t
Chris@280 304 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
Chris@280 305 size_t bin, float &percentile) const
Chris@275 306 {
Chris@280 307 percentile = 0.5;
Chris@275 308 if (type == MajorPeaks) return 10;
Chris@275 309 if (bin == 0) return 3;
Chris@280 310
Chris@275 311 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 312 float binfreq = (sampleRate * bin) / fftSize;
Chris@275 313 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
Chris@280 314
Chris@275 315 int hibin = lrintf((hifreq * fftSize) / sampleRate);
Chris@275 316 int medianWinSize = hibin - bin;
Chris@275 317 if (medianWinSize < 3) medianWinSize = 3;
Chris@280 318
Chris@280 319 percentile = 0.5 + (binfreq / sampleRate);
Chris@280 320
Chris@275 321 return medianWinSize;
Chris@275 322 }
Chris@275 323
Chris@275 324 FFTModel::PeakSet
Chris@275 325 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
Chris@275 326 size_t ymin, size_t ymax)
Chris@275 327 {
Chris@275 328 PeakSet peaks;
Chris@275 329 if (!isOK()) return peaks;
Chris@275 330 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
Chris@275 331
Chris@275 332 size_t sampleRate = getSampleRate();
Chris@275 333 size_t fftSize = m_server->getFFTSize() >> m_yshift;
Chris@275 334 size_t incr = getResolution();
Chris@275 335
Chris@275 336 // This duplicates some of the work of estimateStableFrequency to
Chris@275 337 // allow us to retrieve the phases in two separate vertical
Chris@275 338 // columns, instead of jumping back and forth between columns x and
Chris@275 339 // x+1, which may be significantly slower if re-seeking is needed
Chris@275 340
Chris@275 341 std::vector<float> phases;
Chris@275 342 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 343 i != locations.end(); ++i) {
Chris@275 344 phases.push_back(getPhaseAt(x, *i));
Chris@275 345 }
Chris@275 346
Chris@275 347 size_t phaseIndex = 0;
Chris@275 348 for (PeakLocationSet::iterator i = locations.begin();
Chris@275 349 i != locations.end(); ++i) {
Chris@275 350 float oldPhase = phases[phaseIndex];
Chris@275 351 float newPhase = getPhaseAt(x+1, *i);
Chris@275 352 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
Chris@275 353 float phaseError = princargf(newPhase - expectedPhase);
Chris@275 354 float frequency =
Chris@275 355 (sampleRate * (expectedPhase + phaseError - oldPhase))
Chris@275 356 / (2 * M_PI * incr);
Chris@275 357 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
Chris@275 358 // if (stable)
Chris@275 359 peaks[*i] = frequency;
Chris@275 360 ++phaseIndex;
Chris@275 361 }
Chris@275 362
Chris@275 363 return peaks;
Chris@275 364 }
Chris@275 365
Chris@152 366 Model *
Chris@152 367 FFTModel::clone() const
Chris@152 368 {
Chris@152 369 return new FFTModel(*this);
Chris@152 370 }
Chris@152 371
Chris@152 372 FFTModel::FFTModel(const FFTModel &model) :
Chris@152 373 DenseThreeDimensionalModel(),
Chris@152 374 m_server(model.m_server),
Chris@152 375 m_xshift(model.m_xshift),
Chris@152 376 m_yshift(model.m_yshift)
Chris@152 377 {
Chris@152 378 FFTDataServer::claimInstance(m_server);
Chris@152 379 }
Chris@152 380