comparison examples/FixedTempoEstimator.cpp @ 207:fa8afbb7221b

* big improvements to FixedTempoEstimator
author cannam
date Wed, 15 Oct 2008 13:09:08 +0000
parents 4275327f9c79
children df55003e8968
comparison
equal deleted inserted replaced
206:5ee5f0100adb 207:fa8afbb7221b
105 } 105 }
106 106
107 size_t 107 size_t
108 FixedTempoEstimator::getPreferredStepSize() const 108 FixedTempoEstimator::getPreferredStepSize() const
109 { 109 {
110 return 0; 110 return 64;
111 } 111 }
112 112
113 size_t 113 size_t
114 FixedTempoEstimator::getPreferredBlockSize() const 114 FixedTempoEstimator::getPreferredBlockSize() const
115 { 115 {
116 return 128; 116 return 256;
117 } 117 }
118 118
119 bool 119 bool
120 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize) 120 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize)
121 { 121 {
144 } 144 }
145 145
146 void 146 void
147 FixedTempoEstimator::reset() 147 FixedTempoEstimator::reset()
148 { 148 {
149 std::cerr << "FixedTempoEstimator: reset called" << std::endl; 149 cerr << "FixedTempoEstimator: reset called" << endl;
150 150
151 if (!m_priorMagnitudes) return; 151 if (!m_priorMagnitudes) return;
152 152
153 std::cerr << "FixedTempoEstimator: resetting" << std::endl; 153 cerr << "FixedTempoEstimator: resetting" << endl;
154 154
155 for (size_t i = 0; i < m_blockSize/2; ++i) { 155 for (size_t i = 0; i < m_blockSize/2; ++i) {
156 m_priorMagnitudes[i] = 0.f; 156 m_priorMagnitudes[i] = 0.f;
157 } 157 }
158 for (size_t i = 0; i < m_dfsize; ++i) { 158 for (size_t i = 0; i < m_dfsize; ++i) {
270 << "FixedTempoEstimator has not been initialised" 270 << "FixedTempoEstimator has not been initialised"
271 << endl; 271 << endl;
272 return fs; 272 return fs;
273 } 273 }
274 274
275 // if (m_n < m_dfsize) std::cerr << "m_n = " << m_n << std::endl; 275 // if (m_n < m_dfsize) cerr << "m_n = " << m_n << endl;
276 276
277 if (m_n == 0) m_start = ts; 277 if (m_n == 0) m_start = ts;
278 m_lasttime = ts; 278 m_lasttime = ts;
279 279
280 if (m_n == m_dfsize) { 280 if (m_n == m_dfsize) {
284 return fs; 284 return fs;
285 } 285 }
286 286
287 if (m_n > m_dfsize) return FeatureSet(); 287 if (m_n > m_dfsize) return FeatureSet();
288 288
289 int count = 0; 289 float value = 0.f;
290
291 bool print = (ts == RealTime::zeroTime);
290 292
291 for (size_t i = 1; i < m_blockSize/2; ++i) { 293 for (size_t i = 1; i < m_blockSize/2; ++i) {
292 294
293 float real = inputBuffers[0][i*2]; 295 float real = inputBuffers[0][i*2];
294 float imag = inputBuffers[0][i*2 + 1]; 296 float imag = inputBuffers[0][i*2 + 1];
295 297
296 float sqrmag = real * real + imag * imag; 298 float sqrmag = real * real + imag * imag;
297 299 value += fabsf(sqrmag - m_priorMagnitudes[i]);
298 if (m_priorMagnitudes[i] > 0.f) { 300
299 float diff = 10.f * log10f(sqrmag / m_priorMagnitudes[i]); 301 // if (i == 1 && ts == RealTime::zeroTime) {
300 if (diff >= 3.f) ++count; 302 // cerr << "First sqrmag: " << sqrmag << ", value = " << value << endl;
301 } 303 // }
302 304
303 m_priorMagnitudes[i] = sqrmag; 305 m_priorMagnitudes[i] = sqrmag;
304 } 306 }
305 307
306 m_df[m_n] = float(count) / float(m_blockSize/2); 308 m_df[m_n] = value;
309
307 ++m_n; 310 ++m_n;
308 return fs; 311 return fs;
309 } 312 }
310 313
311 FixedTempoEstimator::FeatureSet 314 FixedTempoEstimator::FeatureSet
323 FixedTempoEstimator::lag2tempo(int lag) 326 FixedTempoEstimator::lag2tempo(int lag)
324 { 327 {
325 return 60.f / ((lag * m_stepSize) / m_inputSampleRate); 328 return 60.f / ((lag * m_stepSize) / m_inputSampleRate);
326 } 329 }
327 330
331 int
332 FixedTempoEstimator::tempo2lag(float tempo)
333 {
334 return ((60.f / tempo) * m_inputSampleRate) / m_stepSize;
335 }
336
328 void 337 void
329 FixedTempoEstimator::calculate() 338 FixedTempoEstimator::calculate()
330 { 339 {
331 std::cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << std::endl; 340 cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << endl;
332 341
333 if (m_r) { 342 if (m_r) {
334 std::cerr << "FixedTempoEstimator::calculate: calculation already happened?" << std::endl; 343 cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl;
335 return; 344 return;
336 } 345 }
337 346
338 if (m_n < m_dfsize / 6) { 347 if (m_n < m_dfsize / 6) {
339 std::cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << std::endl; 348 cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << endl;
340 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?) 349 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?)
341 } 350 }
342 351
343 int n = m_n; 352 int n = m_n;
344 353
347 m_t = new float[n/2]; 356 m_t = new float[n/2];
348 357
349 for (int i = 0; i < n/2; ++i) { 358 for (int i = 0; i < n/2; ++i) {
350 m_r[i] = 0.f; 359 m_r[i] = 0.f;
351 m_fr[i] = 0.f; 360 m_fr[i] = 0.f;
352 m_t[i] = 0.f; 361 m_t[i] = lag2tempo(i);
353 } 362 }
354 363
355 for (int i = 0; i < n/2; ++i) { 364 for (int i = 0; i < n/2; ++i) {
356 365
357 for (int j = i; j < n-1; ++j) { 366 for (int j = i; j < n-1; ++j) {
364 for (int i = 1; i < n/2; ++i) { 373 for (int i = 1; i < n/2; ++i) {
365 374
366 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; 375 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
367 if (weight < 0.f) weight = 0.f; 376 if (weight < 0.f) weight = 0.f;
368 weight = weight * weight; 377 weight = weight * weight;
369 std::cerr << "i = " << i << ": tempo = " << lag2tempo(i) << ", weight = " << weight << std::endl; 378
370 379 cerr << "i = " << i << ": tempo = " << lag2tempo(i) << ", weight = " << weight << endl;
371 // m_fr[i] = m_r[i]; 380
372 m_fr[i] = 0; 381 m_fr[i] = m_r[i] * (1 + weight / 3.f);
373 382 }
374 m_fr[i] = m_r[i] * (1 + weight/20.f); 383
375 } 384 float related[4] = { 2, 3, 4 };
376 385
377 float related[4] = { 1.5, 0.66666667, 0.5 }; 386 for (int i = 0; i < n/2; ++i) {
378 387
379 for (int i = 1; i < n/2 - 1; ++i) { 388 if (i == 0 || i == n/2 - 1 ||
380 389 !(m_fr[i] > m_fr[i-1] &&
381 if (!(m_fr[i] > m_fr[i-1] &&
382 m_fr[i] >= m_fr[i+1])) { 390 m_fr[i] >= m_fr[i+1])) {
383 continue; 391 continue;
384 } 392 }
385 393
386 m_t[i] = lag2tempo(i);
387
388 int div = 1; 394 int div = 1;
389 395
390 for (int j = 0; j < sizeof(related)/sizeof(related[0]); ++j) { 396 for (int j = 0; j < sizeof(related)/sizeof(related[0]); ++j) {
391 397
392 int k0 = i / related[j]; 398 int k0 = i * related[j];
393 399
394 if (k0 > 1 && k0 < n/2 - 2) { 400 if (k0 > 1 && k0 < n/2 - 2) {
395 401
396 for (int k = k0 - 1; k <= k0 + 1; ++k) { 402 int kmax = 0, kmin = 0;
397 403 float kvmax = 0, kvmin = 0;
398 if (m_r[k] > m_r[k-1] && 404
399 m_r[k] >= m_r[k+1]) { 405 for (int k = k0 - 2; k <= k0 + 2; ++k) {
400 406
401 std::cerr << "peak at " << i << " (val " << m_r[i] << ", tempo " << lag2tempo(i) << ") has sympathetic peak at " << k << " (val " << m_r[k] << " for relative tempo " << lag2tempo(k) / related[j] << ")" << std::endl; 407 if (k == k0 - 2 || m_r[k] > kvmax) {
402 408 kmax = k;
403 m_t[i] = m_t[i] + lag2tempo(k) / related[j]; 409 kvmax = m_r[k];
404 ++div;
405 } 410 }
411
412 if (k == k0 - 2 || m_r[k] < kvmin) {
413 kmin = k;
414 kvmin = m_r[k];
415 }
416 }
417
418 if (m_r[kmax] > m_r[kmax-1] &&
419 m_r[kmax] > m_r[kmax+1] &&
420 kvmax > kvmin * 1.05) {
421
422 // cerr << "peak at " << i << " (val " << m_r[i] << ", tempo " << lag2tempo(i) << ") has sympathetic peak at " << kmax << " (val " << m_r[kmax] << " for relative tempo " << lag2tempo(kmax) * related[j] << ")" << endl;
423
424 m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
425 ++div;
406 } 426 }
407 } 427 }
408 } 428 }
409 429
410 m_t[i] /= div; 430 m_t[i] /= div;
411 431
412 if (div > 1) { 432 // if (div > 1) {
413 std::cerr << "adjusting tempo from " << lag2tempo(i) << " to " 433 // cerr << "adjusting tempo from " << lag2tempo(i) << " to "
414 << m_t[i] << std::endl; 434 // << m_t[i] << endl;
435 // }
436 }
437
438 int e = tempo2lag(60.f);
439 int div = (n/2 - 1) / e;
440
441 // cerr << "e = " << e << ", n/2 = " << n/2 << ", div = " << div << endl;
442 if (div > 1) {
443 for (int j = 2; j <= div && j <= 8; j *= 2) {
444 for (int i = 1; i <= e; ++i) {
445 m_fr[i] += m_fr[i * j] * (1.f / j);
446 }
415 } 447 }
416 } 448 }
417 /* 449
418 for (int i = 1; i < n/2; ++i) { 450 // cerr << "i = " << i << ", (n/2 - 1)/i = " << (n/2 - 1)/i << ", sum = " << m_fr[i] << ", div = " << div << ", val = " << m_fr[i] / div << ", t = " << lag2tempo(i) << endl;
419
420 // int div = 1;
421 int j = i * 2;
422
423 while (j < n/2) {
424 m_fr[i] += m_fr[j] * 0.1;
425 j *= 2;
426 // ++div;
427 }
428
429 // m_fr[i] /= div;
430 }
431
432 // std::cerr << "i = " << i << ", (n/2 - 1)/i = " << (n/2 - 1)/i << ", sum = " << m_fr[i] << ", div = " << div << ", val = " << m_fr[i] / div << ", t = " << lag2tempo(i) << std::endl;
433 451
434 452
435 // } 453 // }
436 */ 454
437 std::cerr << "FixedTempoEstimator::calculate done" << std::endl; 455 cerr << "FixedTempoEstimator::calculate done" << endl;
438 } 456 }
439 457
440 458
441 FixedTempoEstimator::FeatureSet 459 FixedTempoEstimator::FeatureSet
442 FixedTempoEstimator::assembleFeatures() 460 FixedTempoEstimator::assembleFeatures()
474 } 492 }
475 493
476 float t0 = 60.f; 494 float t0 = 60.f;
477 float t1 = 180.f; 495 float t1 = 180.f;
478 496
479 int p0 = ((60.f / t1) * m_inputSampleRate) / m_stepSize; 497 int p0 = tempo2lag(t1);
480 int p1 = ((60.f / t0) * m_inputSampleRate) / m_stepSize; 498 int p1 = tempo2lag(t0);
481 499
482 // std::cerr << "p0 = " << p0 << ", p1 = " << p1 << std::endl; 500 cerr << "p0 = " << p0 << ", p1 = " << p1 << endl;
483 501
484 int pc = p1 - p0 + 1; 502 int pc = p1 - p0 + 1;
485 // std::cerr << "pc = " << pc << std::endl; 503 // cerr << "pc = " << pc << endl;
486 504
487 // int maxpi = 0; 505 // int maxpi = 0;
488 // float maxp = 0.f; 506 // float maxp = 0.f;
489 507
490 std::map<float, int> candidates; 508 std::map<float, int> candidates;
491 509
492 for (int i = p0; i <= p1 && i < n/2-1; ++i) { 510 for (int i = p0; i <= p1 && i < n/2-1; ++i) {
493
494 // Only candidates here are those that were peaks in the
495 // original acf
496 // if (r[i] > r[i-1] && r[i] > r[i+1]) {
497 // candidates[filtered] = i;
498 // }
499 511
500 candidates[m_fr[i]] = i; 512 candidates[m_fr[i]] = i;
501 513
502 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, 514 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize,
503 m_inputSampleRate); 515 m_inputSampleRate);
506 if (i == p1 || i == n/2-2) feature.label = ""; 518 if (i == p1 || i == n/2-2) feature.label = "";
507 else feature.label = buffer; 519 else feature.label = buffer;
508 fs[FilteredACFOutput].push_back(feature); 520 fs[FilteredACFOutput].push_back(feature);
509 } 521 }
510 522
511 // std::cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << std::endl; 523 // cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << endl;
512 524
513 if (candidates.empty()) { 525 if (candidates.empty()) {
514 std::cerr << "No tempo candidates!" << std::endl; 526 cerr << "No tempo candidates!" << endl;
515 return fs; 527 return fs;
516 } 528 }
517 529
518 feature.hasTimestamp = true; 530 feature.hasTimestamp = true;
519 feature.timestamp = m_start; 531 feature.timestamp = m_start;
524 std::map<float, int>::const_iterator ci = candidates.end(); 536 std::map<float, int>::const_iterator ci = candidates.end();
525 --ci; 537 --ci;
526 int maxpi = ci->second; 538 int maxpi = ci->second;
527 539
528 if (m_t[maxpi] > 0) { 540 if (m_t[maxpi] > 0) {
541 cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl;
529 feature.values[0] = m_t[maxpi]; 542 feature.values[0] = m_t[maxpi];
530 } else { 543 } else {
531 // shouldn't happen -- it would imply that this high value was not a peak! 544 // shouldn't happen -- it would imply that this high value was not a peak!
532 feature.values[0] = lag2tempo(maxpi); 545 feature.values[0] = lag2tempo(maxpi);
533 std::cerr << "WARNING: No stored tempo for index " << maxpi << std::endl; 546 cerr << "WARNING: No stored tempo for index " << maxpi << endl;
534 } 547 }
535 548
536 sprintf(buffer, "%.1f bpm", feature.values[0]); 549 sprintf(buffer, "%.1f bpm", feature.values[0]);
537 feature.label = buffer; 550 feature.label = buffer;
538 551
540 553
541 feature.values.clear(); 554 feature.values.clear();
542 feature.label = ""; 555 feature.label = "";
543 556
544 while (feature.values.size() < 8) { 557 while (feature.values.size() < 8) {
545 feature.values.push_back(lag2tempo(ci->second)); //!!!??? use m_t? 558 if (m_t[ci->second] > 0) {
559 feature.values.push_back(m_t[ci->second]);
560 } else {
561 feature.values.push_back(lag2tempo(ci->second));
562 }
546 if (ci == candidates.begin()) break; 563 if (ci == candidates.begin()) break;
547 --ci; 564 --ci;
548 } 565 }
549 566
550 fs[CandidatesOutput].push_back(feature); 567 fs[CandidatesOutput].push_back(feature);