comparison examples/FixedTempoEstimator.cpp @ 227:6b30e064cab7 distinct-libraries

* more moving
author cannam
date Thu, 06 Nov 2008 14:13:12 +0000
parents src/FixedTempoEstimator.cpp@14029eb08472
children 3cf5bd155e5b
comparison
equal deleted inserted replaced
226:14029eb08472 227:6b30e064cab7
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Vamp
5
6 An API for audio analysis and feature extraction plugins.
7
8 Centre for Digital Music, Queen Mary, University of London.
9 Copyright 2006-2008 Chris Cannam and QMUL.
10
11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use, copy,
15 modify, merge, publish, distribute, sublicense, and/or sell copies
16 of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be
20 included in all copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29
30 Except as contained in this notice, the names of the Centre for
31 Digital Music; Queen Mary, University of London; and Chris Cannam
32 shall not be used in advertising or otherwise to promote the sale,
33 use or other dealings in this Software without prior written
34 authorization.
35 */
36
37 #include "FixedTempoEstimator.h"
38
39 using std::string;
40 using std::vector;
41 using std::cerr;
42 using std::endl;
43
44 using Vamp::RealTime;
45
46 #include <cmath>
47
48
49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) :
50 Plugin(inputSampleRate),
51 m_stepSize(0),
52 m_blockSize(0),
53 m_priorMagnitudes(0),
54 m_df(0),
55 m_r(0),
56 m_fr(0),
57 m_t(0),
58 m_n(0)
59 {
60 }
61
62 FixedTempoEstimator::~FixedTempoEstimator()
63 {
64 delete[] m_priorMagnitudes;
65 delete[] m_df;
66 delete[] m_r;
67 delete[] m_fr;
68 delete[] m_t;
69 }
70
71 string
72 FixedTempoEstimator::getIdentifier() const
73 {
74 return "fixedtempo";
75 }
76
77 string
78 FixedTempoEstimator::getName() const
79 {
80 return "Simple Fixed Tempo Estimator";
81 }
82
83 string
84 FixedTempoEstimator::getDescription() const
85 {
86 return "Study a short section of audio and estimate its tempo, assuming the tempo is constant";
87 }
88
89 string
90 FixedTempoEstimator::getMaker() const
91 {
92 return "Vamp SDK Example Plugins";
93 }
94
95 int
96 FixedTempoEstimator::getPluginVersion() const
97 {
98 return 1;
99 }
100
101 string
102 FixedTempoEstimator::getCopyright() const
103 {
104 return "Code copyright 2008 Queen Mary, University of London. Freely redistributable (BSD license)";
105 }
106
107 size_t
108 FixedTempoEstimator::getPreferredStepSize() const
109 {
110 return 64;
111 }
112
113 size_t
114 FixedTempoEstimator::getPreferredBlockSize() const
115 {
116 return 256;
117 }
118
119 bool
120 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize)
121 {
122 if (channels < getMinChannelCount() ||
123 channels > getMaxChannelCount()) return false;
124
125 m_stepSize = stepSize;
126 m_blockSize = blockSize;
127
128 float dfLengthSecs = 10.f;
129 m_dfsize = (dfLengthSecs * m_inputSampleRate) / m_stepSize;
130
131 m_priorMagnitudes = new float[m_blockSize/2];
132 m_df = new float[m_dfsize];
133
134 for (size_t i = 0; i < m_blockSize/2; ++i) {
135 m_priorMagnitudes[i] = 0.f;
136 }
137 for (size_t i = 0; i < m_dfsize; ++i) {
138 m_df[i] = 0.f;
139 }
140
141 m_n = 0;
142
143 return true;
144 }
145
146 void
147 FixedTempoEstimator::reset()
148 {
149 cerr << "FixedTempoEstimator: reset called" << endl;
150
151 if (!m_priorMagnitudes) return;
152
153 cerr << "FixedTempoEstimator: resetting" << endl;
154
155 for (size_t i = 0; i < m_blockSize/2; ++i) {
156 m_priorMagnitudes[i] = 0.f;
157 }
158 for (size_t i = 0; i < m_dfsize; ++i) {
159 m_df[i] = 0.f;
160 }
161
162 delete[] m_r;
163 m_r = 0;
164
165 delete[] m_fr;
166 m_fr = 0;
167
168 delete[] m_t;
169 m_t = 0;
170
171 m_n = 0;
172
173 m_start = RealTime::zeroTime;
174 m_lasttime = RealTime::zeroTime;
175 }
176
177 FixedTempoEstimator::ParameterList
178 FixedTempoEstimator::getParameterDescriptors() const
179 {
180 ParameterList list;
181 return list;
182 }
183
184 float
185 FixedTempoEstimator::getParameter(std::string id) const
186 {
187 return 0.f;
188 }
189
190 void
191 FixedTempoEstimator::setParameter(std::string id, float value)
192 {
193 }
194
195 static int TempoOutput = 0;
196 static int CandidatesOutput = 1;
197 static int DFOutput = 2;
198 static int ACFOutput = 3;
199 static int FilteredACFOutput = 4;
200
201 FixedTempoEstimator::OutputList
202 FixedTempoEstimator::getOutputDescriptors() const
203 {
204 OutputList list;
205
206 OutputDescriptor d;
207 d.identifier = "tempo";
208 d.name = "Tempo";
209 d.description = "Estimated tempo";
210 d.unit = "bpm";
211 d.hasFixedBinCount = true;
212 d.binCount = 1;
213 d.hasKnownExtents = false;
214 d.isQuantized = false;
215 d.sampleType = OutputDescriptor::VariableSampleRate;
216 d.sampleRate = m_inputSampleRate;
217 d.hasDuration = true; // our returned tempo spans a certain range
218 list.push_back(d);
219
220 d.identifier = "candidates";
221 d.name = "Tempo candidates";
222 d.description = "Possible tempo estimates, one per bin with the most likely in the first bin";
223 d.unit = "bpm";
224 d.hasFixedBinCount = false;
225 list.push_back(d);
226
227 d.identifier = "detectionfunction";
228 d.name = "Detection Function";
229 d.description = "Onset detection function";
230 d.unit = "";
231 d.hasFixedBinCount = 1;
232 d.binCount = 1;
233 d.hasKnownExtents = true;
234 d.minValue = 0.0;
235 d.maxValue = 1.0;
236 d.isQuantized = false;
237 d.quantizeStep = 0.0;
238 d.sampleType = OutputDescriptor::FixedSampleRate;
239 if (m_stepSize) {
240 d.sampleRate = m_inputSampleRate / m_stepSize;
241 } else {
242 d.sampleRate = m_inputSampleRate / (getPreferredBlockSize()/2);
243 }
244 d.hasDuration = false;
245 list.push_back(d);
246
247 d.identifier = "acf";
248 d.name = "Autocorrelation Function";
249 d.description = "Autocorrelation of onset detection function";
250 d.hasKnownExtents = false;
251 d.unit = "r";
252 list.push_back(d);
253
254 d.identifier = "filtered_acf";
255 d.name = "Filtered Autocorrelation";
256 d.description = "Filtered autocorrelation of onset detection function";
257 d.unit = "r";
258 list.push_back(d);
259
260 return list;
261 }
262
263 FixedTempoEstimator::FeatureSet
264 FixedTempoEstimator::process(const float *const *inputBuffers, RealTime ts)
265 {
266 FeatureSet fs;
267
268 if (m_stepSize == 0) {
269 cerr << "ERROR: FixedTempoEstimator::process: "
270 << "FixedTempoEstimator has not been initialised"
271 << endl;
272 return fs;
273 }
274
275 // if (m_n < m_dfsize) cerr << "m_n = " << m_n << endl;
276
277 if (m_n == 0) m_start = ts;
278 m_lasttime = ts;
279
280 if (m_n == m_dfsize) {
281 calculate();
282 fs = assembleFeatures();
283 ++m_n;
284 return fs;
285 }
286
287 if (m_n > m_dfsize) return FeatureSet();
288
289 float value = 0.f;
290
291 for (size_t i = 1; i < m_blockSize/2; ++i) {
292
293 float real = inputBuffers[0][i*2];
294 float imag = inputBuffers[0][i*2 + 1];
295
296 float sqrmag = real * real + imag * imag;
297 value += fabsf(sqrmag - m_priorMagnitudes[i]);
298
299 m_priorMagnitudes[i] = sqrmag;
300 }
301
302 m_df[m_n] = value;
303
304 ++m_n;
305 return fs;
306 }
307
308 FixedTempoEstimator::FeatureSet
309 FixedTempoEstimator::getRemainingFeatures()
310 {
311 FeatureSet fs;
312 if (m_n > m_dfsize) return fs;
313 calculate();
314 fs = assembleFeatures();
315 ++m_n;
316 return fs;
317 }
318
319 float
320 FixedTempoEstimator::lag2tempo(int lag)
321 {
322 return 60.f / ((lag * m_stepSize) / m_inputSampleRate);
323 }
324
325 int
326 FixedTempoEstimator::tempo2lag(float tempo)
327 {
328 return ((60.f / tempo) * m_inputSampleRate) / m_stepSize;
329 }
330
331 void
332 FixedTempoEstimator::calculate()
333 {
334 cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << endl;
335
336 if (m_r) {
337 cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl;
338 return;
339 }
340
341 if (m_n < m_dfsize / 9) {
342 cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << endl;
343 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?)
344 }
345
346 int n = m_n;
347
348 m_r = new float[n/2];
349 m_fr = new float[n/2];
350 m_t = new float[n/2];
351
352 for (int i = 0; i < n/2; ++i) {
353 m_r[i] = 0.f;
354 m_fr[i] = 0.f;
355 m_t[i] = lag2tempo(i);
356 }
357
358 for (int i = 0; i < n/2; ++i) {
359
360 for (int j = i; j < n-1; ++j) {
361 m_r[i] += m_df[j] * m_df[j - i];
362 }
363
364 m_r[i] /= n - i - 1;
365 }
366
367 float related[] = { 0.5, 2, 3, 4 };
368
369 for (int i = 1; i < n/2-1; ++i) {
370
371 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
372 if (weight < 0.f) weight = 0.f;
373 weight = weight * weight * weight;
374
375 m_fr[i] = m_r[i];
376
377 int div = 1;
378
379 for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) {
380
381 int k0 = int(i * related[j] + 0.5);
382
383 if (k0 >= 0 && k0 < int(n/2)) {
384
385 int kmax = 0, kmin = 0;
386 float kvmax = 0, kvmin = 0;
387 bool have = false;
388
389 for (int k = k0 - 1; k <= k0 + 1; ++k) {
390
391 if (k < 0 || k >= n/2) continue;
392
393 if (!have || (m_r[k] > kvmax)) { kmax = k; kvmax = m_r[k]; }
394 if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; }
395
396 have = true;
397 }
398
399 m_fr[i] += m_r[kmax] / 5;
400
401 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) &&
402 (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) &&
403 kvmax > kvmin * 1.05) {
404
405 m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
406 ++div;
407 }
408 }
409 }
410
411 m_t[i] /= div;
412
413 // if (div > 1) {
414 // cerr << "adjusting tempo from " << lag2tempo(i) << " to "
415 // << m_t[i] << " for fr = " << m_fr[i] << " (div = " << div << ")" << endl;
416 // }
417
418 m_fr[i] += m_fr[i] * (weight / 3);
419 }
420 }
421
422
423 FixedTempoEstimator::FeatureSet
424 FixedTempoEstimator::assembleFeatures()
425 {
426 FeatureSet fs;
427 if (!m_r) return fs; // No results
428
429 Feature feature;
430 feature.hasTimestamp = true;
431 feature.hasDuration = false;
432 feature.label = "";
433 feature.values.clear();
434 feature.values.push_back(0.f);
435
436 char buffer[40];
437
438 int n = m_n;
439
440 for (int i = 0; i < n; ++i) {
441 feature.timestamp = m_start +
442 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
443 feature.values[0] = m_df[i];
444 feature.label = "";
445 fs[DFOutput].push_back(feature);
446 }
447
448 for (int i = 1; i < n/2; ++i) {
449 feature.timestamp = m_start +
450 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
451 feature.values[0] = m_r[i];
452 sprintf(buffer, "%.1f bpm", lag2tempo(i));
453 if (i == n/2-1) feature.label = "";
454 else feature.label = buffer;
455 fs[ACFOutput].push_back(feature);
456 }
457
458 float t0 = 50.f; // our minimum detected tempo (could be a parameter)
459 float t1 = 190.f; // our maximum detected tempo
460
461 //!!! need some way for the host (or at least, the user) to know
462 //!!! that it should only pass a certain amount of
463 //!!! input... e.g. by making the amount configurable
464
465 int p0 = tempo2lag(t1);
466 int p1 = tempo2lag(t0);
467
468 std::map<float, int> candidates;
469
470 for (int i = p0; i <= p1 && i < n/2-1; ++i) {
471
472 if (m_fr[i] > m_fr[i-1] &&
473 m_fr[i] > m_fr[i+1]) {
474 candidates[m_fr[i]] = i;
475 }
476
477 feature.timestamp = m_start +
478 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
479 feature.values[0] = m_fr[i];
480 sprintf(buffer, "%.1f bpm", lag2tempo(i));
481 if (i == p1 || i == n/2-2) feature.label = "";
482 else feature.label = buffer;
483 fs[FilteredACFOutput].push_back(feature);
484 }
485
486 // cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << endl;
487
488 if (candidates.empty()) {
489 cerr << "No tempo candidates!" << endl;
490 return fs;
491 }
492
493 feature.hasTimestamp = true;
494 feature.timestamp = m_start;
495
496 feature.hasDuration = true;
497 feature.duration = m_lasttime - m_start;
498
499 std::map<float, int>::const_iterator ci = candidates.end();
500 --ci;
501 int maxpi = ci->second;
502
503 if (m_t[maxpi] > 0) {
504 cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl;
505 feature.values[0] = m_t[maxpi];
506 } else {
507 // shouldn't happen -- it would imply that this high value was not a peak!
508 feature.values[0] = lag2tempo(maxpi);
509 cerr << "WARNING: No stored tempo for index " << maxpi << endl;
510 }
511
512 sprintf(buffer, "%.1f bpm", feature.values[0]);
513 feature.label = buffer;
514
515 fs[TempoOutput].push_back(feature);
516
517 feature.values.clear();
518 feature.label = "";
519
520 while (feature.values.size() < 8) {
521 // cerr << "adding tempo value from lag " << ci->second << endl;
522 if (m_t[ci->second] > 0) {
523 feature.values.push_back(m_t[ci->second]);
524 } else {
525 feature.values.push_back(lag2tempo(ci->second));
526 }
527 if (ci == candidates.begin()) break;
528 --ci;
529 }
530
531 fs[CandidatesOutput].push_back(feature);
532
533 return fs;
534 }