comparison data/model/FFTModel.cpp @ 1126:39019ce29178 tony-2.0-integration

Merge through to branch for Tony 2.0
author Chris Cannam
date Thu, 20 Aug 2015 14:54:21 +0100
parents 5cbf71022679
children e994747fb9dd
comparison
equal deleted inserted replaced
1119:e22bfe8ca248 1126:39019ce29178
13 COPYING included with this distribution for more information. 13 COPYING included with this distribution for more information.
14 */ 14 */
15 15
16 #include "FFTModel.h" 16 #include "FFTModel.h"
17 #include "DenseTimeValueModel.h" 17 #include "DenseTimeValueModel.h"
18 #include "AggregateWaveModel.h"
19 18
20 #include "base/Profiler.h" 19 #include "base/Profiler.h"
21 #include "base/Pitch.h" 20 #include "base/Pitch.h"
22 21
23 #include <algorithm> 22 #include <algorithm>
24 23
25 #include <cassert> 24 #include <cassert>
25 #include <deque>
26 26
27 #ifndef __GNUC__ 27 #ifndef __GNUC__
28 #include <alloca.h> 28 #include <alloca.h>
29 #endif 29 #endif
30
31 using namespace std;
30 32
31 FFTModel::FFTModel(const DenseTimeValueModel *model, 33 FFTModel::FFTModel(const DenseTimeValueModel *model,
32 int channel, 34 int channel,
33 WindowType windowType, 35 WindowType windowType,
34 int windowSize, 36 int windowSize,
35 int windowIncrement, 37 int windowIncrement,
36 int fftSize, 38 int fftSize) :
37 bool polar, 39 m_model(model),
38 StorageAdviser::Criteria criteria, 40 m_channel(channel),
39 sv_frame_t fillFromFrame) : 41 m_windowType(windowType),
40 //!!! ZoomConstraint! 42 m_windowSize(windowSize),
41 m_server(0), 43 m_windowIncrement(windowIncrement),
42 m_xshift(0), 44 m_fftSize(fftSize),
43 m_yshift(0) 45 m_windower(windowType, windowSize),
44 { 46 m_fft(fftSize),
45 setSourceModel(const_cast<DenseTimeValueModel *>(model)); //!!! hmm. 47 m_cacheSize(3)
46 48 {
47 m_server = getServer(model, 49 if (m_windowSize > m_fftSize) {
48 channel, 50 cerr << "ERROR: FFTModel::FFTModel: window size (" << m_windowSize
49 windowType, 51 << ") must be at least FFT size (" << m_fftSize << ")" << endl;
50 windowSize, 52 throw invalid_argument("FFTModel window size must be at least FFT size");
51 windowIncrement,
52 fftSize,
53 polar,
54 criteria,
55 fillFromFrame);
56
57 if (!m_server) return; // caller should check isOK()
58
59 int xratio = windowIncrement / m_server->getWindowIncrement();
60 int yratio = m_server->getFFTSize() / fftSize;
61
62 while (xratio > 1) {
63 if (xratio & 0x1) {
64 cerr << "ERROR: FFTModel: Window increment ratio "
65 << windowIncrement << " / "
66 << m_server->getWindowIncrement()
67 << " must be a power of two" << endl;
68 assert(!(xratio & 0x1));
69 }
70 ++m_xshift;
71 xratio >>= 1;
72 }
73
74 while (yratio > 1) {
75 if (yratio & 0x1) {
76 cerr << "ERROR: FFTModel: FFT size ratio "
77 << m_server->getFFTSize() << " / " << fftSize
78 << " must be a power of two" << endl;
79 assert(!(yratio & 0x1));
80 }
81 ++m_yshift;
82 yratio >>= 1;
83 } 53 }
84 } 54 }
85 55
86 FFTModel::~FFTModel() 56 FFTModel::~FFTModel()
87 { 57 {
88 if (m_server) FFTDataServer::releaseInstance(m_server);
89 } 58 }
90 59
91 void 60 void
92 FFTModel::sourceModelAboutToBeDeleted() 61 FFTModel::sourceModelAboutToBeDeleted()
93 { 62 {
94 if (m_sourceModel) { 63 if (m_model) {
95 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << endl; 64 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
96 if (m_server) { 65 m_model = 0;
97 FFTDataServer::releaseInstance(m_server); 66 }
98 m_server = 0; 67 }
99 } 68
100 FFTDataServer::modelAboutToBeDeleted(m_sourceModel); 69 int
101 } 70 FFTModel::getWidth() const
102 } 71 {
103 72 if (!m_model) return 0;
104 FFTDataServer * 73 return int((m_model->getEndFrame() - m_model->getStartFrame())
105 FFTModel::getServer(const DenseTimeValueModel *model, 74 / m_windowIncrement) + 1;
106 int channel, 75 }
107 WindowType windowType, 76
108 int windowSize, 77 int
109 int windowIncrement, 78 FFTModel::getHeight() const
110 int fftSize, 79 {
111 bool polar, 80 return m_fftSize / 2 + 1;
112 StorageAdviser::Criteria criteria,
113 sv_frame_t fillFromFrame)
114 {
115 // Obviously, an FFT model of channel C (where C != -1) of an
116 // aggregate model is the same as the FFT model of the appropriate
117 // channel of whichever model that aggregate channel is drawn
118 // from. We should use that model here, in case we already have
119 // the data for it or will be wanting the same data again later.
120
121 // If the channel is -1 (i.e. mixture of all channels), then we
122 // can't do this shortcut unless the aggregate model only has one
123 // channel or contains exactly all of the channels of a single
124 // other model. That isn't very likely -- if it were the case,
125 // why would we be using an aggregate model?
126
127 if (channel >= 0) {
128
129 const AggregateWaveModel *aggregate =
130 dynamic_cast<const AggregateWaveModel *>(model);
131
132 if (aggregate && channel < aggregate->getComponentCount()) {
133
134 AggregateWaveModel::ModelChannelSpec spec =
135 aggregate->getComponent(channel);
136
137 return getServer(spec.model,
138 spec.channel,
139 windowType,
140 windowSize,
141 windowIncrement,
142 fftSize,
143 polar,
144 criteria,
145 fillFromFrame);
146 }
147 }
148
149 // The normal case
150
151 return FFTDataServer::getFuzzyInstance(model,
152 channel,
153 windowType,
154 windowSize,
155 windowIncrement,
156 fftSize,
157 polar,
158 criteria,
159 fillFromFrame);
160 }
161
162 sv_samplerate_t
163 FFTModel::getSampleRate() const
164 {
165 return isOK() ? m_server->getModel()->getSampleRate() : 0;
166 }
167
168 FFTModel::Column
169 FFTModel::getColumn(int x) const
170 {
171 Profiler profiler("FFTModel::getColumn", false);
172
173 Column result;
174
175 result.clear();
176 int h = getHeight();
177 result.reserve(h);
178
179 #ifdef __GNUC__
180 float magnitudes[h];
181 #else
182 float *magnitudes = (float *)alloca(h * sizeof(float));
183 #endif
184
185 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
186
187 for (int y = 0; y < h; ++y) {
188 result.push_back(magnitudes[y]);
189 }
190
191 } else {
192 for (int i = 0; i < h; ++i) result.push_back(0.f);
193 }
194
195 return result;
196 } 81 }
197 82
198 QString 83 QString
199 FFTModel::getBinName(int n) const 84 FFTModel::getBinName(int n) const
200 { 85 {
202 if (!sr) return ""; 87 if (!sr) return "";
203 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2)); 88 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
204 return name; 89 return name;
205 } 90 }
206 91
92 FFTModel::Column
93 FFTModel::getColumn(int x) const
94 {
95 auto cplx = getFFTColumn(x);
96 Column col;
97 col.reserve(int(cplx.size()));
98 for (auto c: cplx) col.push_back(abs(c));
99 return col;
100 }
101
102 float
103 FFTModel::getMagnitudeAt(int x, int y) const
104 {
105 if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f;
106 auto col = getFFTColumn(x);
107 return abs(col[y]);
108 }
109
110 float
111 FFTModel::getMaximumMagnitudeAt(int x) const
112 {
113 Column col(getColumn(x));
114 float max = 0.f;
115 for (int i = 0; i < col.size(); ++i) {
116 if (col[i] > max) max = col[i];
117 }
118 return max;
119 }
120
121 float
122 FFTModel::getPhaseAt(int x, int y) const
123 {
124 if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f;
125 return arg(getFFTColumn(x)[y]);
126 }
127
128 void
129 FFTModel::getValuesAt(int x, int y, float &re, float &im) const
130 {
131 auto col = getFFTColumn(x);
132 re = col[y].real();
133 im = col[y].imag();
134 }
135
136 bool
137 FFTModel::isColumnAvailable(int) const
138 {
139 //!!!
140 return true;
141 }
142
143 bool
144 FFTModel::getMagnitudesAt(int x, float *values, int minbin, int count) const
145 {
146 if (count == 0) count = getHeight();
147 auto col = getFFTColumn(x);
148 for (int i = 0; i < count; ++i) {
149 values[i] = abs(col[minbin + i]);
150 }
151 return true;
152 }
153
154 bool
155 FFTModel::getNormalizedMagnitudesAt(int x, float *values, int minbin, int count) const
156 {
157 if (!getMagnitudesAt(x, values, minbin, count)) return false;
158 if (count == 0) count = getHeight();
159 float max = 0.f;
160 for (int i = 0; i < count; ++i) {
161 if (values[i] > max) max = values[i];
162 }
163 if (max > 0.f) {
164 for (int i = 0; i < count; ++i) {
165 values[i] /= max;
166 }
167 }
168 return true;
169 }
170
171 bool
172 FFTModel::getPhasesAt(int x, float *values, int minbin, int count) const
173 {
174 if (count == 0) count = getHeight();
175 auto col = getFFTColumn(x);
176 for (int i = 0; i < count; ++i) {
177 values[i] = arg(col[minbin + i]);
178 }
179 return true;
180 }
181
182 bool
183 FFTModel::getValuesAt(int x, float *reals, float *imags, int minbin, int count) const
184 {
185 if (count == 0) count = getHeight();
186 auto col = getFFTColumn(x);
187 for (int i = 0; i < count; ++i) {
188 reals[i] = col[minbin + i].real();
189 }
190 for (int i = 0; i < count; ++i) {
191 imags[i] = col[minbin + i].imag();
192 }
193 return true;
194 }
195
196 vector<float>
197 FFTModel::getSourceSamples(int column) const
198 {
199 // m_fftSize may be greater than m_windowSize, but not the reverse
200
201 // cerr << "getSourceSamples(" << column << ")" << endl;
202
203 auto range = getSourceSampleRange(column);
204 auto data = getSourceData(range);
205
206 int off = (m_fftSize - m_windowSize) / 2;
207
208 if (off == 0) {
209 return data;
210 } else {
211 vector<float> pad(off, 0.f);
212 vector<float> padded;
213 padded.reserve(m_fftSize);
214 padded.insert(padded.end(), pad.begin(), pad.end());
215 padded.insert(padded.end(), data.begin(), data.end());
216 padded.insert(padded.end(), pad.begin(), pad.end());
217 return padded;
218 }
219 }
220
221 vector<float>
222 FFTModel::getSourceData(pair<sv_frame_t, sv_frame_t> range) const
223 {
224 // cerr << "getSourceData(" << range.first << "," << range.second
225 // << "): saved range is (" << m_savedData.range.first
226 // << "," << m_savedData.range.second << ")" << endl;
227
228 if (m_savedData.range == range) {
229 return m_savedData.data;
230 }
231
232 if (range.first < m_savedData.range.second &&
233 range.first >= m_savedData.range.first &&
234 range.second > m_savedData.range.second) {
235
236 sv_frame_t discard = range.first - m_savedData.range.first;
237
238 vector<float> acc(m_savedData.data.begin() + discard,
239 m_savedData.data.end());
240
241 vector<float> rest =
242 getSourceDataUncached({ m_savedData.range.second, range.second });
243
244 acc.insert(acc.end(), rest.begin(), rest.end());
245
246 m_savedData = { range, acc };
247 return acc;
248
249 } else {
250
251 auto data = getSourceDataUncached(range);
252 m_savedData = { range, data };
253 return data;
254 }
255 }
256
257 vector<float>
258 FFTModel::getSourceDataUncached(pair<sv_frame_t, sv_frame_t> range) const
259 {
260 decltype(range.first) pfx = 0;
261 if (range.first < 0) {
262 pfx = -range.first;
263 range = { 0, range.second };
264 }
265
266 auto data = m_model->getData(m_channel,
267 range.first,
268 range.second - range.first);
269
270 // don't return a partial frame
271 data.resize(range.second - range.first, 0.f);
272
273 if (pfx > 0) {
274 vector<float> pad(pfx, 0.f);
275 data.insert(data.begin(), pad.begin(), pad.end());
276 }
277
278 if (m_channel == -1) {
279 int channels = m_model->getChannelCount();
280 if (channels > 1) {
281 int n = int(data.size());
282 float factor = 1.f / float(channels);
283 // use mean instead of sum for fft model input
284 for (int i = 0; i < n; ++i) {
285 data[i] *= factor;
286 }
287 }
288 }
289
290 return data;
291 }
292
293 vector<complex<float>>
294 FFTModel::getFFTColumn(int n) const
295 {
296 for (auto &incache : m_cached) {
297 if (incache.n == n) {
298 return incache.col;
299 }
300 }
301
302 auto samples = getSourceSamples(n);
303 m_windower.cut(samples.data());
304 auto col = m_fft.process(samples);
305
306 SavedColumn sc { n, col };
307 if (m_cached.size() >= m_cacheSize) {
308 m_cached.pop_front();
309 }
310 m_cached.push_back(sc);
311
312 return col;
313 }
314
207 bool 315 bool
208 FFTModel::estimateStableFrequency(int x, int y, double &frequency) 316 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
209 { 317 {
210 if (!isOK()) return false; 318 if (!isOK()) return false;
211 319
212 sv_samplerate_t sampleRate = m_server->getModel()->getSampleRate(); 320 frequency = double(y * getSampleRate()) / m_fftSize;
213
214 int fftSize = m_server->getFFTSize() >> m_yshift;
215 frequency = double(y * sampleRate) / fftSize;
216 321
217 if (x+1 >= getWidth()) return false; 322 if (x+1 >= getWidth()) return false;
218 323
219 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. 324 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
220 // At hopsize h and sample rate sr, one hop happens in h/sr sec. 325 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
228 double oldPhase = getPhaseAt(x, y); 333 double oldPhase = getPhaseAt(x, y);
229 double newPhase = getPhaseAt(x+1, y); 334 double newPhase = getPhaseAt(x+1, y);
230 335
231 int incr = getResolution(); 336 int incr = getResolution();
232 337
233 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize; 338 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
234 339
235 double phaseError = princarg(newPhase - expectedPhase); 340 double phaseError = princarg(newPhase - expectedPhase);
236
237 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
238 341
239 // The new frequency estimate based on the phase error resulting 342 // The new frequency estimate based on the phase error resulting
240 // from assuming the "native" frequency of this bin 343 // from assuming the "native" frequency of this bin
241 344
242 frequency = 345 frequency =
243 (sampleRate * (expectedPhase + phaseError - oldPhase)) / 346 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
244 (2.0 * M_PI * incr); 347 (2.0 * M_PI * incr);
245 348
246 return true; 349 return true;
247 } 350 }
248 351
291 // exceed the median. For pitch adaptivity, we adjust the window 394 // exceed the median. For pitch adaptivity, we adjust the window
292 // size to a roughly constant pitch range (about four tones). 395 // size to a roughly constant pitch range (about four tones).
293 396
294 sv_samplerate_t sampleRate = getSampleRate(); 397 sv_samplerate_t sampleRate = getSampleRate();
295 398
296 std::deque<float> window; 399 deque<float> window;
297 std::vector<int> inrange; 400 vector<int> inrange;
298 float dist = 0.5; 401 float dist = 0.5;
299 402
300 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist); 403 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
301 int halfWin = medianWinSize/2; 404 int halfWin = medianWinSize/2;
302 405
329 if (type == MajorPitchAdaptivePeaks) { 432 if (type == MajorPitchAdaptivePeaks) {
330 if (ymax + halfWin < values.size()) binmax = ymax + halfWin; 433 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
331 else binmax = values.size()-1; 434 else binmax = values.size()-1;
332 } 435 }
333 436
334 std::deque<float> sorted(window); 437 deque<float> sorted(window);
335 std::sort(sorted.begin(), sorted.end()); 438 sort(sorted.begin(), sorted.end());
336 float median = sorted[int(float(sorted.size()) * dist)]; 439 float median = sorted[int(float(sorted.size()) * dist)];
337 440
338 int centrebin = 0; 441 int centrebin = 0;
339 if (bin > actualSize/2) centrebin = bin - actualSize/2; 442 if (bin > actualSize/2) centrebin = bin - actualSize/2;
340 443
378 { 481 {
379 percentile = 0.5; 482 percentile = 0.5;
380 if (type == MajorPeaks) return 10; 483 if (type == MajorPeaks) return 10;
381 if (bin == 0) return 3; 484 if (bin == 0) return 3;
382 485
383 int fftSize = m_server->getFFTSize() >> m_yshift; 486 double binfreq = (sampleRate * bin) / m_fftSize;
384 double binfreq = (sampleRate * bin) / fftSize;
385 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); 487 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
386 488
387 int hibin = int(lrint((hifreq * fftSize) / sampleRate)); 489 int hibin = int(lrint((hifreq * m_fftSize) / sampleRate));
388 int medianWinSize = hibin - bin; 490 int medianWinSize = hibin - bin;
389 if (medianWinSize < 3) medianWinSize = 3; 491 if (medianWinSize < 3) medianWinSize = 3;
390 492
391 percentile = 0.5f + float(binfreq / sampleRate); 493 percentile = 0.5f + float(binfreq / sampleRate);
392 494
402 PeakSet peaks; 504 PeakSet peaks;
403 if (!isOK()) return peaks; 505 if (!isOK()) return peaks;
404 PeakLocationSet locations = getPeaks(type, x, ymin, ymax); 506 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
405 507
406 sv_samplerate_t sampleRate = getSampleRate(); 508 sv_samplerate_t sampleRate = getSampleRate();
407 int fftSize = m_server->getFFTSize() >> m_yshift;
408 int incr = getResolution(); 509 int incr = getResolution();
409 510
410 // This duplicates some of the work of estimateStableFrequency to 511 // This duplicates some of the work of estimateStableFrequency to
411 // allow us to retrieve the phases in two separate vertical 512 // allow us to retrieve the phases in two separate vertical
412 // columns, instead of jumping back and forth between columns x and 513 // columns, instead of jumping back and forth between columns x and
413 // x+1, which may be significantly slower if re-seeking is needed 514 // x+1, which may be significantly slower if re-seeking is needed
414 515
415 std::vector<float> phases; 516 vector<float> phases;
416 for (PeakLocationSet::iterator i = locations.begin(); 517 for (PeakLocationSet::iterator i = locations.begin();
417 i != locations.end(); ++i) { 518 i != locations.end(); ++i) {
418 phases.push_back(getPhaseAt(x, *i)); 519 phases.push_back(getPhaseAt(x, *i));
419 } 520 }
420 521
421 int phaseIndex = 0; 522 int phaseIndex = 0;
422 for (PeakLocationSet::iterator i = locations.begin(); 523 for (PeakLocationSet::iterator i = locations.begin();
423 i != locations.end(); ++i) { 524 i != locations.end(); ++i) {
424 double oldPhase = phases[phaseIndex]; 525 double oldPhase = phases[phaseIndex];
425 double newPhase = getPhaseAt(x+1, *i); 526 double newPhase = getPhaseAt(x+1, *i);
426 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize; 527 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
427 double phaseError = princarg(newPhase - expectedPhase); 528 double phaseError = princarg(newPhase - expectedPhase);
428 double frequency = 529 double frequency =
429 (sampleRate * (expectedPhase + phaseError - oldPhase)) 530 (sampleRate * (expectedPhase + phaseError - oldPhase))
430 / (2 * M_PI * incr); 531 / (2 * M_PI * incr);
431 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
432 // if (stable)
433 peaks[*i] = frequency; 532 peaks[*i] = frequency;
434 ++phaseIndex; 533 ++phaseIndex;
435 } 534 }
436 535
437 return peaks; 536 return peaks;
438 } 537 }
439 538
440 FFTModel::FFTModel(const FFTModel &model) :
441 DenseThreeDimensionalModel(),
442 m_server(model.m_server),
443 m_xshift(model.m_xshift),
444 m_yshift(model.m_yshift)
445 {
446 FFTDataServer::claimInstance(m_server);
447 }
448