Mercurial > hg > vamp-plugin-sdk
comparison examples/FixedTempoEstimator.cpp @ 227:6b30e064cab7 distinct-libraries
* more moving
author | cannam |
---|---|
date | Thu, 06 Nov 2008 14:13:12 +0000 |
parents | src/FixedTempoEstimator.cpp@14029eb08472 |
children | 3cf5bd155e5b |
comparison
equal
deleted
inserted
replaced
226:14029eb08472 | 227:6b30e064cab7 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Vamp | |
5 | |
6 An API for audio analysis and feature extraction plugins. | |
7 | |
8 Centre for Digital Music, Queen Mary, University of London. | |
9 Copyright 2006-2008 Chris Cannam and QMUL. | |
10 | |
11 Permission is hereby granted, free of charge, to any person | |
12 obtaining a copy of this software and associated documentation | |
13 files (the "Software"), to deal in the Software without | |
14 restriction, including without limitation the rights to use, copy, | |
15 modify, merge, publish, distribute, sublicense, and/or sell copies | |
16 of the Software, and to permit persons to whom the Software is | |
17 furnished to do so, subject to the following conditions: | |
18 | |
19 The above copyright notice and this permission notice shall be | |
20 included in all copies or substantial portions of the Software. | |
21 | |
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR | |
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF | |
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
29 | |
30 Except as contained in this notice, the names of the Centre for | |
31 Digital Music; Queen Mary, University of London; and Chris Cannam | |
32 shall not be used in advertising or otherwise to promote the sale, | |
33 use or other dealings in this Software without prior written | |
34 authorization. | |
35 */ | |
36 | |
37 #include "FixedTempoEstimator.h" | |
38 | |
39 using std::string; | |
40 using std::vector; | |
41 using std::cerr; | |
42 using std::endl; | |
43 | |
44 using Vamp::RealTime; | |
45 | |
46 #include <cmath> | |
47 | |
48 | |
49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) : | |
50 Plugin(inputSampleRate), | |
51 m_stepSize(0), | |
52 m_blockSize(0), | |
53 m_priorMagnitudes(0), | |
54 m_df(0), | |
55 m_r(0), | |
56 m_fr(0), | |
57 m_t(0), | |
58 m_n(0) | |
59 { | |
60 } | |
61 | |
62 FixedTempoEstimator::~FixedTempoEstimator() | |
63 { | |
64 delete[] m_priorMagnitudes; | |
65 delete[] m_df; | |
66 delete[] m_r; | |
67 delete[] m_fr; | |
68 delete[] m_t; | |
69 } | |
70 | |
71 string | |
72 FixedTempoEstimator::getIdentifier() const | |
73 { | |
74 return "fixedtempo"; | |
75 } | |
76 | |
77 string | |
78 FixedTempoEstimator::getName() const | |
79 { | |
80 return "Simple Fixed Tempo Estimator"; | |
81 } | |
82 | |
83 string | |
84 FixedTempoEstimator::getDescription() const | |
85 { | |
86 return "Study a short section of audio and estimate its tempo, assuming the tempo is constant"; | |
87 } | |
88 | |
89 string | |
90 FixedTempoEstimator::getMaker() const | |
91 { | |
92 return "Vamp SDK Example Plugins"; | |
93 } | |
94 | |
95 int | |
96 FixedTempoEstimator::getPluginVersion() const | |
97 { | |
98 return 1; | |
99 } | |
100 | |
101 string | |
102 FixedTempoEstimator::getCopyright() const | |
103 { | |
104 return "Code copyright 2008 Queen Mary, University of London. Freely redistributable (BSD license)"; | |
105 } | |
106 | |
107 size_t | |
108 FixedTempoEstimator::getPreferredStepSize() const | |
109 { | |
110 return 64; | |
111 } | |
112 | |
113 size_t | |
114 FixedTempoEstimator::getPreferredBlockSize() const | |
115 { | |
116 return 256; | |
117 } | |
118 | |
119 bool | |
120 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
121 { | |
122 if (channels < getMinChannelCount() || | |
123 channels > getMaxChannelCount()) return false; | |
124 | |
125 m_stepSize = stepSize; | |
126 m_blockSize = blockSize; | |
127 | |
128 float dfLengthSecs = 10.f; | |
129 m_dfsize = (dfLengthSecs * m_inputSampleRate) / m_stepSize; | |
130 | |
131 m_priorMagnitudes = new float[m_blockSize/2]; | |
132 m_df = new float[m_dfsize]; | |
133 | |
134 for (size_t i = 0; i < m_blockSize/2; ++i) { | |
135 m_priorMagnitudes[i] = 0.f; | |
136 } | |
137 for (size_t i = 0; i < m_dfsize; ++i) { | |
138 m_df[i] = 0.f; | |
139 } | |
140 | |
141 m_n = 0; | |
142 | |
143 return true; | |
144 } | |
145 | |
146 void | |
147 FixedTempoEstimator::reset() | |
148 { | |
149 cerr << "FixedTempoEstimator: reset called" << endl; | |
150 | |
151 if (!m_priorMagnitudes) return; | |
152 | |
153 cerr << "FixedTempoEstimator: resetting" << endl; | |
154 | |
155 for (size_t i = 0; i < m_blockSize/2; ++i) { | |
156 m_priorMagnitudes[i] = 0.f; | |
157 } | |
158 for (size_t i = 0; i < m_dfsize; ++i) { | |
159 m_df[i] = 0.f; | |
160 } | |
161 | |
162 delete[] m_r; | |
163 m_r = 0; | |
164 | |
165 delete[] m_fr; | |
166 m_fr = 0; | |
167 | |
168 delete[] m_t; | |
169 m_t = 0; | |
170 | |
171 m_n = 0; | |
172 | |
173 m_start = RealTime::zeroTime; | |
174 m_lasttime = RealTime::zeroTime; | |
175 } | |
176 | |
177 FixedTempoEstimator::ParameterList | |
178 FixedTempoEstimator::getParameterDescriptors() const | |
179 { | |
180 ParameterList list; | |
181 return list; | |
182 } | |
183 | |
184 float | |
185 FixedTempoEstimator::getParameter(std::string id) const | |
186 { | |
187 return 0.f; | |
188 } | |
189 | |
190 void | |
191 FixedTempoEstimator::setParameter(std::string id, float value) | |
192 { | |
193 } | |
194 | |
195 static int TempoOutput = 0; | |
196 static int CandidatesOutput = 1; | |
197 static int DFOutput = 2; | |
198 static int ACFOutput = 3; | |
199 static int FilteredACFOutput = 4; | |
200 | |
201 FixedTempoEstimator::OutputList | |
202 FixedTempoEstimator::getOutputDescriptors() const | |
203 { | |
204 OutputList list; | |
205 | |
206 OutputDescriptor d; | |
207 d.identifier = "tempo"; | |
208 d.name = "Tempo"; | |
209 d.description = "Estimated tempo"; | |
210 d.unit = "bpm"; | |
211 d.hasFixedBinCount = true; | |
212 d.binCount = 1; | |
213 d.hasKnownExtents = false; | |
214 d.isQuantized = false; | |
215 d.sampleType = OutputDescriptor::VariableSampleRate; | |
216 d.sampleRate = m_inputSampleRate; | |
217 d.hasDuration = true; // our returned tempo spans a certain range | |
218 list.push_back(d); | |
219 | |
220 d.identifier = "candidates"; | |
221 d.name = "Tempo candidates"; | |
222 d.description = "Possible tempo estimates, one per bin with the most likely in the first bin"; | |
223 d.unit = "bpm"; | |
224 d.hasFixedBinCount = false; | |
225 list.push_back(d); | |
226 | |
227 d.identifier = "detectionfunction"; | |
228 d.name = "Detection Function"; | |
229 d.description = "Onset detection function"; | |
230 d.unit = ""; | |
231 d.hasFixedBinCount = 1; | |
232 d.binCount = 1; | |
233 d.hasKnownExtents = true; | |
234 d.minValue = 0.0; | |
235 d.maxValue = 1.0; | |
236 d.isQuantized = false; | |
237 d.quantizeStep = 0.0; | |
238 d.sampleType = OutputDescriptor::FixedSampleRate; | |
239 if (m_stepSize) { | |
240 d.sampleRate = m_inputSampleRate / m_stepSize; | |
241 } else { | |
242 d.sampleRate = m_inputSampleRate / (getPreferredBlockSize()/2); | |
243 } | |
244 d.hasDuration = false; | |
245 list.push_back(d); | |
246 | |
247 d.identifier = "acf"; | |
248 d.name = "Autocorrelation Function"; | |
249 d.description = "Autocorrelation of onset detection function"; | |
250 d.hasKnownExtents = false; | |
251 d.unit = "r"; | |
252 list.push_back(d); | |
253 | |
254 d.identifier = "filtered_acf"; | |
255 d.name = "Filtered Autocorrelation"; | |
256 d.description = "Filtered autocorrelation of onset detection function"; | |
257 d.unit = "r"; | |
258 list.push_back(d); | |
259 | |
260 return list; | |
261 } | |
262 | |
263 FixedTempoEstimator::FeatureSet | |
264 FixedTempoEstimator::process(const float *const *inputBuffers, RealTime ts) | |
265 { | |
266 FeatureSet fs; | |
267 | |
268 if (m_stepSize == 0) { | |
269 cerr << "ERROR: FixedTempoEstimator::process: " | |
270 << "FixedTempoEstimator has not been initialised" | |
271 << endl; | |
272 return fs; | |
273 } | |
274 | |
275 // if (m_n < m_dfsize) cerr << "m_n = " << m_n << endl; | |
276 | |
277 if (m_n == 0) m_start = ts; | |
278 m_lasttime = ts; | |
279 | |
280 if (m_n == m_dfsize) { | |
281 calculate(); | |
282 fs = assembleFeatures(); | |
283 ++m_n; | |
284 return fs; | |
285 } | |
286 | |
287 if (m_n > m_dfsize) return FeatureSet(); | |
288 | |
289 float value = 0.f; | |
290 | |
291 for (size_t i = 1; i < m_blockSize/2; ++i) { | |
292 | |
293 float real = inputBuffers[0][i*2]; | |
294 float imag = inputBuffers[0][i*2 + 1]; | |
295 | |
296 float sqrmag = real * real + imag * imag; | |
297 value += fabsf(sqrmag - m_priorMagnitudes[i]); | |
298 | |
299 m_priorMagnitudes[i] = sqrmag; | |
300 } | |
301 | |
302 m_df[m_n] = value; | |
303 | |
304 ++m_n; | |
305 return fs; | |
306 } | |
307 | |
308 FixedTempoEstimator::FeatureSet | |
309 FixedTempoEstimator::getRemainingFeatures() | |
310 { | |
311 FeatureSet fs; | |
312 if (m_n > m_dfsize) return fs; | |
313 calculate(); | |
314 fs = assembleFeatures(); | |
315 ++m_n; | |
316 return fs; | |
317 } | |
318 | |
319 float | |
320 FixedTempoEstimator::lag2tempo(int lag) | |
321 { | |
322 return 60.f / ((lag * m_stepSize) / m_inputSampleRate); | |
323 } | |
324 | |
325 int | |
326 FixedTempoEstimator::tempo2lag(float tempo) | |
327 { | |
328 return ((60.f / tempo) * m_inputSampleRate) / m_stepSize; | |
329 } | |
330 | |
331 void | |
332 FixedTempoEstimator::calculate() | |
333 { | |
334 cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << endl; | |
335 | |
336 if (m_r) { | |
337 cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl; | |
338 return; | |
339 } | |
340 | |
341 if (m_n < m_dfsize / 9) { | |
342 cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << endl; | |
343 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?) | |
344 } | |
345 | |
346 int n = m_n; | |
347 | |
348 m_r = new float[n/2]; | |
349 m_fr = new float[n/2]; | |
350 m_t = new float[n/2]; | |
351 | |
352 for (int i = 0; i < n/2; ++i) { | |
353 m_r[i] = 0.f; | |
354 m_fr[i] = 0.f; | |
355 m_t[i] = lag2tempo(i); | |
356 } | |
357 | |
358 for (int i = 0; i < n/2; ++i) { | |
359 | |
360 for (int j = i; j < n-1; ++j) { | |
361 m_r[i] += m_df[j] * m_df[j - i]; | |
362 } | |
363 | |
364 m_r[i] /= n - i - 1; | |
365 } | |
366 | |
367 float related[] = { 0.5, 2, 3, 4 }; | |
368 | |
369 for (int i = 1; i < n/2-1; ++i) { | |
370 | |
371 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; | |
372 if (weight < 0.f) weight = 0.f; | |
373 weight = weight * weight * weight; | |
374 | |
375 m_fr[i] = m_r[i]; | |
376 | |
377 int div = 1; | |
378 | |
379 for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) { | |
380 | |
381 int k0 = int(i * related[j] + 0.5); | |
382 | |
383 if (k0 >= 0 && k0 < int(n/2)) { | |
384 | |
385 int kmax = 0, kmin = 0; | |
386 float kvmax = 0, kvmin = 0; | |
387 bool have = false; | |
388 | |
389 for (int k = k0 - 1; k <= k0 + 1; ++k) { | |
390 | |
391 if (k < 0 || k >= n/2) continue; | |
392 | |
393 if (!have || (m_r[k] > kvmax)) { kmax = k; kvmax = m_r[k]; } | |
394 if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; } | |
395 | |
396 have = true; | |
397 } | |
398 | |
399 m_fr[i] += m_r[kmax] / 5; | |
400 | |
401 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) && | |
402 (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) && | |
403 kvmax > kvmin * 1.05) { | |
404 | |
405 m_t[i] = m_t[i] + lag2tempo(kmax) * related[j]; | |
406 ++div; | |
407 } | |
408 } | |
409 } | |
410 | |
411 m_t[i] /= div; | |
412 | |
413 // if (div > 1) { | |
414 // cerr << "adjusting tempo from " << lag2tempo(i) << " to " | |
415 // << m_t[i] << " for fr = " << m_fr[i] << " (div = " << div << ")" << endl; | |
416 // } | |
417 | |
418 m_fr[i] += m_fr[i] * (weight / 3); | |
419 } | |
420 } | |
421 | |
422 | |
423 FixedTempoEstimator::FeatureSet | |
424 FixedTempoEstimator::assembleFeatures() | |
425 { | |
426 FeatureSet fs; | |
427 if (!m_r) return fs; // No results | |
428 | |
429 Feature feature; | |
430 feature.hasTimestamp = true; | |
431 feature.hasDuration = false; | |
432 feature.label = ""; | |
433 feature.values.clear(); | |
434 feature.values.push_back(0.f); | |
435 | |
436 char buffer[40]; | |
437 | |
438 int n = m_n; | |
439 | |
440 for (int i = 0; i < n; ++i) { | |
441 feature.timestamp = m_start + | |
442 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); | |
443 feature.values[0] = m_df[i]; | |
444 feature.label = ""; | |
445 fs[DFOutput].push_back(feature); | |
446 } | |
447 | |
448 for (int i = 1; i < n/2; ++i) { | |
449 feature.timestamp = m_start + | |
450 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); | |
451 feature.values[0] = m_r[i]; | |
452 sprintf(buffer, "%.1f bpm", lag2tempo(i)); | |
453 if (i == n/2-1) feature.label = ""; | |
454 else feature.label = buffer; | |
455 fs[ACFOutput].push_back(feature); | |
456 } | |
457 | |
458 float t0 = 50.f; // our minimum detected tempo (could be a parameter) | |
459 float t1 = 190.f; // our maximum detected tempo | |
460 | |
461 //!!! need some way for the host (or at least, the user) to know | |
462 //!!! that it should only pass a certain amount of | |
463 //!!! input... e.g. by making the amount configurable | |
464 | |
465 int p0 = tempo2lag(t1); | |
466 int p1 = tempo2lag(t0); | |
467 | |
468 std::map<float, int> candidates; | |
469 | |
470 for (int i = p0; i <= p1 && i < n/2-1; ++i) { | |
471 | |
472 if (m_fr[i] > m_fr[i-1] && | |
473 m_fr[i] > m_fr[i+1]) { | |
474 candidates[m_fr[i]] = i; | |
475 } | |
476 | |
477 feature.timestamp = m_start + | |
478 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); | |
479 feature.values[0] = m_fr[i]; | |
480 sprintf(buffer, "%.1f bpm", lag2tempo(i)); | |
481 if (i == p1 || i == n/2-2) feature.label = ""; | |
482 else feature.label = buffer; | |
483 fs[FilteredACFOutput].push_back(feature); | |
484 } | |
485 | |
486 // cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << endl; | |
487 | |
488 if (candidates.empty()) { | |
489 cerr << "No tempo candidates!" << endl; | |
490 return fs; | |
491 } | |
492 | |
493 feature.hasTimestamp = true; | |
494 feature.timestamp = m_start; | |
495 | |
496 feature.hasDuration = true; | |
497 feature.duration = m_lasttime - m_start; | |
498 | |
499 std::map<float, int>::const_iterator ci = candidates.end(); | |
500 --ci; | |
501 int maxpi = ci->second; | |
502 | |
503 if (m_t[maxpi] > 0) { | |
504 cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl; | |
505 feature.values[0] = m_t[maxpi]; | |
506 } else { | |
507 // shouldn't happen -- it would imply that this high value was not a peak! | |
508 feature.values[0] = lag2tempo(maxpi); | |
509 cerr << "WARNING: No stored tempo for index " << maxpi << endl; | |
510 } | |
511 | |
512 sprintf(buffer, "%.1f bpm", feature.values[0]); | |
513 feature.label = buffer; | |
514 | |
515 fs[TempoOutput].push_back(feature); | |
516 | |
517 feature.values.clear(); | |
518 feature.label = ""; | |
519 | |
520 while (feature.values.size() < 8) { | |
521 // cerr << "adding tempo value from lag " << ci->second << endl; | |
522 if (m_t[ci->second] > 0) { | |
523 feature.values.push_back(m_t[ci->second]); | |
524 } else { | |
525 feature.values.push_back(lag2tempo(ci->second)); | |
526 } | |
527 if (ci == candidates.begin()) break; | |
528 --ci; | |
529 } | |
530 | |
531 fs[CandidatesOutput].push_back(feature); | |
532 | |
533 return fs; | |
534 } |