Mercurial > hg > vamp-plugin-sdk
comparison examples/FixedTempoEstimator.cpp @ 200:a86e777bf9a6
* tweaks to fixed-tempo estimator
author | cannam |
---|---|
date | Thu, 09 Oct 2008 15:37:34 +0000 |
parents | 84c4bb209227 |
children | 8e1b1fa94e15 |
comparison
equal
deleted
inserted
replaced
199:84c4bb209227 | 200:a86e777bf9a6 |
---|---|
49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) : | 49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) : |
50 Plugin(inputSampleRate), | 50 Plugin(inputSampleRate), |
51 m_stepSize(0), | 51 m_stepSize(0), |
52 m_blockSize(0), | 52 m_blockSize(0), |
53 m_priorMagnitudes(0), | 53 m_priorMagnitudes(0), |
54 m_df(0) | 54 m_df(0), |
55 m_r(0), | |
56 m_fr(0), | |
57 m_n(0) | |
55 { | 58 { |
56 } | 59 } |
57 | 60 |
58 FixedTempoEstimator::~FixedTempoEstimator() | 61 FixedTempoEstimator::~FixedTempoEstimator() |
59 { | 62 { |
60 delete[] m_priorMagnitudes; | 63 delete[] m_priorMagnitudes; |
61 delete[] m_df; | 64 delete[] m_df; |
65 delete[] m_r; | |
66 delete[] m_fr; | |
62 } | 67 } |
63 | 68 |
64 string | 69 string |
65 FixedTempoEstimator::getIdentifier() const | 70 FixedTempoEstimator::getIdentifier() const |
66 { | 71 { |
104 } | 109 } |
105 | 110 |
106 size_t | 111 size_t |
107 FixedTempoEstimator::getPreferredBlockSize() const | 112 FixedTempoEstimator::getPreferredBlockSize() const |
108 { | 113 { |
109 return 128; | 114 return 64; |
110 } | 115 } |
111 | 116 |
112 bool | 117 bool |
113 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize) | 118 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize) |
114 { | 119 { |
150 } | 155 } |
151 for (size_t i = 0; i < m_dfsize; ++i) { | 156 for (size_t i = 0; i < m_dfsize; ++i) { |
152 m_df[i] = 0.f; | 157 m_df[i] = 0.f; |
153 } | 158 } |
154 | 159 |
160 delete[] m_r; | |
161 m_r = 0; | |
162 | |
163 delete[] m_fr; | |
164 m_fr = 0; | |
165 | |
155 m_n = 0; | 166 m_n = 0; |
156 | 167 |
157 m_start = RealTime::zeroTime; | 168 m_start = RealTime::zeroTime; |
158 m_lasttime = RealTime::zeroTime; | 169 m_lasttime = RealTime::zeroTime; |
159 } | 170 } |
173 | 184 |
174 void | 185 void |
175 FixedTempoEstimator::setParameter(std::string id, float value) | 186 FixedTempoEstimator::setParameter(std::string id, float value) |
176 { | 187 { |
177 } | 188 } |
189 | |
190 static int TempoOutput = 0; | |
191 static int CandidatesOutput = 1; | |
192 static int DFOutput = 2; | |
193 static int ACFOutput = 3; | |
194 static int FilteredACFOutput = 4; | |
178 | 195 |
179 FixedTempoEstimator::OutputList | 196 FixedTempoEstimator::OutputList |
180 FixedTempoEstimator::getOutputDescriptors() const | 197 FixedTempoEstimator::getOutputDescriptors() const |
181 { | 198 { |
182 OutputList list; | 199 OutputList list; |
191 d.hasKnownExtents = false; | 208 d.hasKnownExtents = false; |
192 d.isQuantized = false; | 209 d.isQuantized = false; |
193 d.sampleType = OutputDescriptor::VariableSampleRate; | 210 d.sampleType = OutputDescriptor::VariableSampleRate; |
194 d.sampleRate = m_inputSampleRate; | 211 d.sampleRate = m_inputSampleRate; |
195 d.hasDuration = true; // our returned tempo spans a certain range | 212 d.hasDuration = true; // our returned tempo spans a certain range |
213 list.push_back(d); | |
214 | |
215 d.identifier = "candidates"; | |
216 d.name = "Tempo candidates"; | |
217 d.description = "Possible tempo estimates, one per bin with the most likely in the first bin"; | |
218 d.unit = "bpm"; | |
219 d.hasFixedBinCount = false; | |
196 list.push_back(d); | 220 list.push_back(d); |
197 | 221 |
198 d.identifier = "detectionfunction"; | 222 d.identifier = "detectionfunction"; |
199 d.name = "Detection Function"; | 223 d.name = "Detection Function"; |
200 d.description = "Onset detection function"; | 224 d.description = "Onset detection function"; |
239 << "FixedTempoEstimator has not been initialised" | 263 << "FixedTempoEstimator has not been initialised" |
240 << endl; | 264 << endl; |
241 return fs; | 265 return fs; |
242 } | 266 } |
243 | 267 |
244 if (m_n < m_dfsize) std::cerr << "m_n = " << m_n << std::endl; | 268 // if (m_n < m_dfsize) std::cerr << "m_n = " << m_n << std::endl; |
245 | 269 |
246 if (m_n == 0) m_start = ts; | 270 if (m_n == 0) m_start = ts; |
247 m_lasttime = ts; | 271 m_lasttime = ts; |
248 | 272 |
249 if (m_n == m_dfsize) { | 273 if (m_n == m_dfsize) { |
250 fs = calculateFeatures(); | 274 calculate(); |
275 fs = assembleFeatures(); | |
251 ++m_n; | 276 ++m_n; |
252 return fs; | 277 return fs; |
253 } | 278 } |
254 | 279 |
255 if (m_n > m_dfsize) return FeatureSet(); | 280 if (m_n > m_dfsize) return FeatureSet(); |
279 FixedTempoEstimator::FeatureSet | 304 FixedTempoEstimator::FeatureSet |
280 FixedTempoEstimator::getRemainingFeatures() | 305 FixedTempoEstimator::getRemainingFeatures() |
281 { | 306 { |
282 FeatureSet fs; | 307 FeatureSet fs; |
283 if (m_n > m_dfsize) return fs; | 308 if (m_n > m_dfsize) return fs; |
284 fs = calculateFeatures(); | 309 calculate(); |
310 fs = assembleFeatures(); | |
285 ++m_n; | 311 ++m_n; |
286 return fs; | 312 return fs; |
287 } | 313 } |
288 | 314 |
289 float | 315 float |
290 FixedTempoEstimator::lag2tempo(int lag) | 316 FixedTempoEstimator::lag2tempo(int lag) |
291 { | 317 { |
292 return 60.f / ((lag * m_stepSize) / m_inputSampleRate); | 318 return 60.f / ((lag * m_stepSize) / m_inputSampleRate); |
293 } | 319 } |
294 | 320 |
321 void | |
322 FixedTempoEstimator::calculate() | |
323 { | |
324 std::cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << std::endl; | |
325 | |
326 if (m_r) { | |
327 std::cerr << "FixedTempoEstimator::calculate: calculation already happened?" << std::endl; | |
328 return; | |
329 } | |
330 | |
331 if (m_n < m_dfsize / 6) { | |
332 std::cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << std::endl; | |
333 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?) | |
334 } | |
335 | |
336 int n = m_n; | |
337 | |
338 m_r = new float[n/2]; | |
339 m_fr = new float[n/2]; | |
340 | |
341 for (int i = 0; i < n/2; ++i) { | |
342 m_r[i] = 0.f; | |
343 m_fr[i] = 0.f; | |
344 } | |
345 | |
346 for (int i = 0; i < n/2; ++i) { | |
347 | |
348 for (int j = i; j < n-1; ++j) { | |
349 m_r[i] += m_df[j] * m_df[j - i]; | |
350 } | |
351 | |
352 m_r[i] /= n - i - 1; | |
353 } | |
354 | |
355 for (int i = 1; i < n/2; ++i) { | |
356 | |
357 m_fr[i] = m_r[i]; | |
358 | |
359 int div = 1; | |
360 | |
361 int j = i; | |
362 | |
363 while (j < n/2) { | |
364 m_fr[i] += m_r[j]; | |
365 j *= 2; | |
366 ++div; | |
367 } | |
368 /* | |
369 for (int j = 1; j <= (n/2 - 1)/i; ++j) { | |
370 m_fr[i] += m_r[i * j]; | |
371 ++div; | |
372 } | |
373 */ | |
374 std::cerr << "i = " << i << ", (n/2 - 1)/i = " << (n/2 - 1)/i << ", sum = " << m_fr[i] << ", div = " << div << ", val = " << m_fr[i] / div << ", t = " << lag2tempo(i) << std::endl; | |
375 | |
376 | |
377 // m_fr[i] /= 1 + (n/2 - 1)/i; | |
378 m_fr[i] /= div; | |
379 } | |
380 | |
381 std::cerr << "FixedTempoEstimator::calculate done" << std::endl; | |
382 } | |
383 | |
384 | |
295 FixedTempoEstimator::FeatureSet | 385 FixedTempoEstimator::FeatureSet |
296 FixedTempoEstimator::calculateFeatures() | 386 FixedTempoEstimator::assembleFeatures() |
297 { | 387 { |
298 FeatureSet fs; | 388 FeatureSet fs; |
389 if (!m_r) return fs; // No results | |
390 | |
299 Feature feature; | 391 Feature feature; |
300 feature.hasTimestamp = true; | 392 feature.hasTimestamp = true; |
301 feature.hasDuration = false; | 393 feature.hasDuration = false; |
302 feature.label = ""; | 394 feature.label = ""; |
303 feature.values.clear(); | 395 feature.values.clear(); |
304 feature.values.push_back(0.f); | 396 feature.values.push_back(0.f); |
305 | 397 |
306 char buffer[20]; | 398 char buffer[40]; |
307 | 399 |
308 if (m_n < m_dfsize / 4) return fs; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?) | |
309 | |
310 std::cerr << "FixedTempoEstimator::calculateTempo: m_n = " << m_n << std::endl; | |
311 | |
312 int n = m_n; | 400 int n = m_n; |
313 float *f = m_df; | |
314 | 401 |
315 for (int i = 0; i < n; ++i) { | 402 for (int i = 0; i < n; ++i) { |
316 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, | 403 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, |
317 m_inputSampleRate); | 404 m_inputSampleRate); |
318 feature.values[0] = f[i]; | 405 feature.values[0] = m_df[i]; |
319 feature.label = ""; | 406 feature.label = ""; |
320 fs[1].push_back(feature); | 407 fs[DFOutput].push_back(feature); |
321 } | |
322 | |
323 float *r = new float[n/2]; | |
324 for (int i = 0; i < n/2; ++i) r[i] = 0.f; | |
325 | |
326 int minlag = 10; | |
327 | |
328 for (int i = 0; i < n/2; ++i) { | |
329 for (int j = i; j < n-1; ++j) { | |
330 r[i] += f[j] * f[j - i]; | |
331 } | |
332 r[i] /= n - i - 1; | |
333 } | 408 } |
334 | 409 |
335 for (int i = 1; i < n/2; ++i) { | 410 for (int i = 1; i < n/2; ++i) { |
336 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, | 411 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, |
337 m_inputSampleRate); | 412 m_inputSampleRate); |
338 feature.values[0] = r[i]; | 413 feature.values[0] = m_r[i]; |
339 sprintf(buffer, "%.1f bpm", lag2tempo(i)); | 414 sprintf(buffer, "%.1f bpm", lag2tempo(i)); |
340 feature.label = buffer; | 415 if (i == n/2-1) feature.label = ""; |
341 fs[2].push_back(feature); | 416 else feature.label = buffer; |
342 } | 417 fs[ACFOutput].push_back(feature); |
343 | 418 } |
344 float max = 0.f; | |
345 int maxindex = 0; | |
346 | |
347 std::cerr << "n/2 = " << n/2 << std::endl; | |
348 | |
349 for (int i = minlag; i < n/2; ++i) { | |
350 | |
351 if (i == minlag || r[i] > max) { | |
352 max = r[i]; | |
353 maxindex = i; | |
354 } | |
355 | |
356 if (i == 0 || i == n/2-1) continue; | |
357 | |
358 if (r[i] > r[i-1] && r[i] > r[i+1]) { | |
359 std::cerr << "peak at " << i << " (value=" << r[i] << ", tempo would be " << lag2tempo(i) << ")" << std::endl; | |
360 } | |
361 } | |
362 | |
363 std::cerr << "overall max at " << maxindex << " (value=" << max << ")" << std::endl; | |
364 | |
365 float tempo = lag2tempo(maxindex); | |
366 | |
367 std::cerr << "provisional tempo = " << tempo << std::endl; | |
368 | 419 |
369 float t0 = 60.f; | 420 float t0 = 60.f; |
370 float t1 = 180.f; | 421 float t1 = 180.f; |
371 | 422 |
372 int p0 = ((60.f / t1) * m_inputSampleRate) / m_stepSize; | 423 int p0 = ((60.f / t1) * m_inputSampleRate) / m_stepSize; |
373 int p1 = ((60.f / t0) * m_inputSampleRate) / m_stepSize; | 424 int p1 = ((60.f / t0) * m_inputSampleRate) / m_stepSize; |
374 | 425 |
375 std::cerr << "p0 = " << p0 << ", p1 = " << p1 << std::endl; | 426 // std::cerr << "p0 = " << p0 << ", p1 = " << p1 << std::endl; |
376 | 427 |
377 int pc = p1 - p0 + 1; | 428 int pc = p1 - p0 + 1; |
378 std::cerr << "pc = " << pc << std::endl; | 429 // std::cerr << "pc = " << pc << std::endl; |
379 // float *filtered = new float[pc]; | 430 |
380 // for (int i = 0; i < pc; ++i) filtered[i] = 0.f; | 431 // int maxpi = 0; |
381 | 432 // float maxp = 0.f; |
382 int maxpi = 0; | 433 |
383 float maxp = 0.f; | 434 std::map<float, int> candidates; |
384 | 435 |
385 for (int i = p0; i <= p1; ++i) { | 436 for (int i = p0; i <= p1 && i < n/2-1; ++i) { |
386 | 437 |
387 // int fi = i - p0; | 438 // Only candidates here are those that were peaks in the |
388 | 439 // original acf |
389 float filtered = 0.f; | 440 // if (r[i] > r[i-1] && r[i] > r[i+1]) { |
390 | 441 // candidates[filtered] = i; |
391 for (int j = 1; j <= (n/2 - 1)/i; ++j) { | 442 // } |
392 // std::cerr << "j = " << j << ", i = " << i << std::endl; | 443 |
393 filtered += r[i * j]; | 444 candidates[m_fr[i]] = i; |
394 } | |
395 filtered /= (n/2 - 1)/i; | |
396 | |
397 if (i == p0 || filtered > maxp) { | |
398 maxp = filtered; | |
399 maxpi = i; | |
400 } | |
401 | 445 |
402 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, | 446 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, |
403 m_inputSampleRate); | 447 m_inputSampleRate); |
404 feature.values[0] = filtered; | 448 feature.values[0] = m_fr[i]; |
405 sprintf(buffer, "%.1f bpm", lag2tempo(i)); | 449 sprintf(buffer, "%.1f bpm", lag2tempo(i)); |
406 feature.label = buffer; | 450 if (i == p1 || i == n/2-2) feature.label = ""; |
407 fs[3].push_back(feature); | 451 else feature.label = buffer; |
408 } | 452 fs[FilteredACFOutput].push_back(feature); |
409 | 453 } |
410 std::cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << std::endl; | 454 |
411 | 455 // std::cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << std::endl; |
412 tempo = lag2tempo(maxpi); | 456 |
413 | 457 if (candidates.empty()) { |
414 delete[] r; | 458 std::cerr << "No tempo candidates!" << std::endl; |
459 return fs; | |
460 } | |
415 | 461 |
416 feature.hasTimestamp = true; | 462 feature.hasTimestamp = true; |
417 feature.timestamp = m_start; | 463 feature.timestamp = m_start; |
418 | 464 |
419 feature.hasDuration = true; | 465 feature.hasDuration = true; |
420 feature.duration = m_lasttime - m_start; | 466 feature.duration = m_lasttime - m_start; |
421 | 467 |
422 feature.values[0] = tempo; | 468 std::map<float, int>::const_iterator ci = candidates.end(); |
423 | 469 --ci; |
424 sprintf(buffer, "%.1f bpm", tempo); | 470 int maxpi = ci->second; |
471 | |
472 feature.values[0] = lag2tempo(maxpi); | |
473 | |
474 sprintf(buffer, "%.1f bpm", lag2tempo(maxpi)); | |
425 feature.label = buffer; | 475 feature.label = buffer; |
426 | 476 |
427 fs[0].push_back(feature); | 477 fs[TempoOutput].push_back(feature); |
428 | 478 |
479 feature.values.clear(); | |
480 feature.label = ""; | |
481 | |
482 while (feature.values.size() < 8) { | |
483 feature.values.push_back(lag2tempo(ci->second)); | |
484 if (ci == candidates.begin()) break; | |
485 --ci; | |
486 } | |
487 | |
488 fs[CandidatesOutput].push_back(feature); | |
489 | |
429 return fs; | 490 return fs; |
430 } | 491 } |