Mercurial > hg > vamp-plugin-sdk
comparison examples/FixedTempoEstimator.cpp @ 255:88ef5ffdbe8d
* docs
author | cannam |
---|---|
date | Wed, 12 Nov 2008 14:11:01 +0000 |
parents | 5bfed156b45d |
children | 23352e424631 |
comparison
equal
deleted
inserted
replaced
254:e02c93c4de8f | 255:88ef5ffdbe8d |
---|---|
45 | 45 |
46 #include <cmath> | 46 #include <cmath> |
47 | 47 |
48 | 48 |
49 class FixedTempoEstimator::D | 49 class FixedTempoEstimator::D |
50 // this class just avoids us having to declare any data members in the header | |
50 { | 51 { |
51 public: | 52 public: |
52 D(float inputSampleRate); | 53 D(float inputSampleRate); |
53 ~D(); | 54 ~D(); |
54 | 55 |
312 | 313 |
313 if (m_n == 0) m_start = ts; | 314 if (m_n == 0) m_start = ts; |
314 m_lasttime = ts; | 315 m_lasttime = ts; |
315 | 316 |
316 if (m_n == m_dfsize) { | 317 if (m_n == m_dfsize) { |
318 // If we have seen enough input, do the estimation and return | |
317 calculate(); | 319 calculate(); |
318 fs = assembleFeatures(); | 320 fs = assembleFeatures(); |
319 ++m_n; | 321 ++m_n; |
320 return fs; | 322 return fs; |
321 } | 323 } |
322 | 324 |
325 // If we have seen more than enough, just discard and return! | |
323 if (m_n > m_dfsize) return FeatureSet(); | 326 if (m_n > m_dfsize) return FeatureSet(); |
324 | 327 |
325 float value = 0.f; | 328 float value = 0.f; |
329 | |
330 // m_df will contain an onset detection function based on the rise | |
331 // in overall power from one spectral frame to the next -- | |
332 // simplistic but reasonably effective for our purposes. | |
326 | 333 |
327 for (size_t i = 1; i < m_blockSize/2; ++i) { | 334 for (size_t i = 1; i < m_blockSize/2; ++i) { |
328 | 335 |
329 float real = inputBuffers[0][i*2]; | 336 float real = inputBuffers[0][i*2]; |
330 float imag = inputBuffers[0][i*2 + 1]; | 337 float imag = inputBuffers[0][i*2 + 1]; |
376 m_n < (1.0 * m_inputSampleRate) / m_stepSize) { // 1 second | 383 m_n < (1.0 * m_inputSampleRate) / m_stepSize) { // 1 second |
377 cerr << "FixedTempoEstimator::calculate: Input is too short" << endl; | 384 cerr << "FixedTempoEstimator::calculate: Input is too short" << endl; |
378 return; | 385 return; |
379 } | 386 } |
380 | 387 |
381 int n = m_n; | 388 // This function takes m_df (the detection function array filled |
382 | 389 // out in process()) and calculates m_r (the raw autocorrelation) |
383 m_r = new float[n/2]; | 390 // and m_fr (the filtered autocorrelation from whose peaks tempo |
384 m_fr = new float[n/2]; | 391 // estimates will be taken). |
385 m_t = new float[n/2]; | 392 |
393 int n = m_n; // length of actual df array (m_dfsize is the theoretical max) | |
394 | |
395 m_r = new float[n/2]; // raw autocorrelation | |
396 m_fr = new float[n/2]; // filtered autocorrelation | |
397 m_t = new float[n/2]; // averaged tempo estimate for each lag value | |
386 | 398 |
387 for (int i = 0; i < n/2; ++i) { | 399 for (int i = 0; i < n/2; ++i) { |
388 m_r[i] = 0.f; | 400 m_r[i] = 0.f; |
389 m_fr[i] = 0.f; | 401 m_fr[i] = 0.f; |
390 m_t[i] = lag2tempo(i); | 402 m_t[i] = lag2tempo(i); |
391 } | 403 } |
404 | |
405 // Calculate the raw autocorrelation of the detection function | |
392 | 406 |
393 for (int i = 0; i < n/2; ++i) { | 407 for (int i = 0; i < n/2; ++i) { |
394 | 408 |
395 for (int j = i; j < n-1; ++j) { | 409 for (int j = i; j < n-1; ++j) { |
396 m_r[i] += m_df[j] * m_df[j - i]; | 410 m_r[i] += m_df[j] * m_df[j - i]; |
397 } | 411 } |
398 | 412 |
399 m_r[i] /= n - i - 1; | 413 m_r[i] /= n - i - 1; |
400 } | 414 } |
401 | 415 |
416 // Filter the autocorrelation and average out the tempo estimates | |
417 | |
402 float related[] = { 0.5, 2, 4, 8 }; | 418 float related[] = { 0.5, 2, 4, 8 }; |
403 | 419 |
404 for (int i = 1; i < n/2-1; ++i) { | 420 for (int i = 1; i < n/2-1; ++i) { |
405 | 421 |
406 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; | |
407 if (weight < 0.f) weight = 0.f; | |
408 weight = weight * weight * weight; | |
409 | |
410 m_fr[i] = m_r[i]; | 422 m_fr[i] = m_r[i]; |
411 | 423 |
412 int div = 1; | 424 int div = 1; |
413 | 425 |
414 for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) { | 426 for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) { |
427 | |
428 // Check for an obvious peak at each metrically related lag | |
415 | 429 |
416 int k0 = int(i * related[j] + 0.5); | 430 int k0 = int(i * related[j] + 0.5); |
417 | 431 |
418 if (k0 >= 0 && k0 < int(n/2)) { | 432 if (k0 >= 0 && k0 < int(n/2)) { |
419 | 433 |
429 if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; } | 443 if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; } |
430 | 444 |
431 have = true; | 445 have = true; |
432 } | 446 } |
433 | 447 |
448 // Boost the original lag according to the strongest | |
449 // value found close to this related lag | |
450 | |
434 m_fr[i] += m_r[kmax] / 5; | 451 m_fr[i] += m_r[kmax] / 5; |
435 | 452 |
436 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) && | 453 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) && |
437 (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) && | 454 (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) && |
438 kvmax > kvmin * 1.05) { | 455 kvmax > kvmin * 1.05) { |
456 | |
457 // The strongest value close to the related lag is | |
458 // also a pretty good looking peak, so use it to | |
459 // improve our tempo estimate for the original lag | |
439 | 460 |
440 m_t[i] = m_t[i] + lag2tempo(kmax) * related[j]; | 461 m_t[i] = m_t[i] + lag2tempo(kmax) * related[j]; |
441 ++div; | 462 ++div; |
442 } | 463 } |
443 } | 464 } |
444 } | 465 } |
445 | 466 |
446 m_t[i] /= div; | 467 m_t[i] /= div; |
447 | 468 |
469 // Finally apply a primitive perceptual weighting (to prefer | |
470 // tempi of around 120-130) | |
471 | |
472 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; | |
473 if (weight < 0.f) weight = 0.f; | |
474 weight = weight * weight * weight; | |
475 | |
448 m_fr[i] += m_fr[i] * (weight / 3); | 476 m_fr[i] += m_fr[i] * (weight / 3); |
449 } | 477 } |
450 } | 478 } |
451 | 479 |
452 FixedTempoEstimator::FeatureSet | 480 FixedTempoEstimator::FeatureSet |
453 FixedTempoEstimator::D::assembleFeatures() | 481 FixedTempoEstimator::D::assembleFeatures() |
454 { | 482 { |
455 FeatureSet fs; | 483 FeatureSet fs; |
456 if (!m_r) return fs; // No results | 484 if (!m_r) return fs; // No autocorrelation: no results |
457 | 485 |
458 Feature feature; | 486 Feature feature; |
459 feature.hasTimestamp = true; | 487 feature.hasTimestamp = true; |
460 feature.hasDuration = false; | 488 feature.hasDuration = false; |
461 feature.label = ""; | 489 feature.label = ""; |
465 char buffer[40]; | 493 char buffer[40]; |
466 | 494 |
467 int n = m_n; | 495 int n = m_n; |
468 | 496 |
469 for (int i = 0; i < n; ++i) { | 497 for (int i = 0; i < n; ++i) { |
498 | |
499 // Return the detection function in the DF output | |
500 | |
470 feature.timestamp = m_start + | 501 feature.timestamp = m_start + |
471 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); | 502 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); |
472 feature.values[0] = m_df[i]; | 503 feature.values[0] = m_df[i]; |
473 feature.label = ""; | 504 feature.label = ""; |
474 fs[DFOutput].push_back(feature); | 505 fs[DFOutput].push_back(feature); |
475 } | 506 } |
476 | 507 |
477 for (int i = 1; i < n/2; ++i) { | 508 for (int i = 1; i < n/2; ++i) { |
509 | |
510 // Return the raw autocorrelation in the ACF output, each | |
511 // value labelled according to its corresponding tempo | |
512 | |
478 feature.timestamp = m_start + | 513 feature.timestamp = m_start + |
479 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); | 514 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); |
480 feature.values[0] = m_r[i]; | 515 feature.values[0] = m_r[i]; |
481 sprintf(buffer, "%.1f bpm", lag2tempo(i)); | 516 sprintf(buffer, "%.1f bpm", lag2tempo(i)); |
482 if (i == n/2-1) feature.label = ""; | 517 if (i == n/2-1) feature.label = ""; |
494 | 529 |
495 for (int i = p0; i <= p1 && i < n/2-1; ++i) { | 530 for (int i = p0; i <= p1 && i < n/2-1; ++i) { |
496 | 531 |
497 if (m_fr[i] > m_fr[i-1] && | 532 if (m_fr[i] > m_fr[i-1] && |
498 m_fr[i] > m_fr[i+1]) { | 533 m_fr[i] > m_fr[i+1]) { |
534 | |
535 // This is a peak in the filtered autocorrelation: stick | |
536 // it into the map from filtered autocorrelation to lag | |
537 // index -- this sorts our peaks by filtered acf value | |
538 | |
499 candidates[m_fr[i]] = i; | 539 candidates[m_fr[i]] = i; |
500 } | 540 } |
541 | |
542 // Also return the filtered autocorrelation in its own output | |
501 | 543 |
502 feature.timestamp = m_start + | 544 feature.timestamp = m_start + |
503 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); | 545 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); |
504 feature.values[0] = m_fr[i]; | 546 feature.values[0] = m_fr[i]; |
505 sprintf(buffer, "%.1f bpm", lag2tempo(i)); | 547 sprintf(buffer, "%.1f bpm", lag2tempo(i)); |
517 feature.timestamp = m_start; | 559 feature.timestamp = m_start; |
518 | 560 |
519 feature.hasDuration = true; | 561 feature.hasDuration = true; |
520 feature.duration = m_lasttime - m_start; | 562 feature.duration = m_lasttime - m_start; |
521 | 563 |
564 // The map contains only peaks and is sorted by filtered acf | |
565 // value, so the final element in it is our "best" tempo guess | |
566 | |
522 std::map<float, int>::const_iterator ci = candidates.end(); | 567 std::map<float, int>::const_iterator ci = candidates.end(); |
523 --ci; | 568 --ci; |
524 int maxpi = ci->second; | 569 int maxpi = ci->second; |
525 | 570 |
526 if (m_t[maxpi] > 0) { | 571 if (m_t[maxpi] > 0) { |
527 cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl; | 572 |
573 // This lag has an adjusted tempo from the averaging process: | |
574 // use it | |
575 | |
528 feature.values[0] = m_t[maxpi]; | 576 feature.values[0] = m_t[maxpi]; |
577 | |
529 } else { | 578 } else { |
530 // shouldn't happen -- it would imply that this high value was not a peak! | 579 |
580 // shouldn't happen -- it would imply that this high value was | |
581 // not a peak! | |
582 | |
531 feature.values[0] = lag2tempo(maxpi); | 583 feature.values[0] = lag2tempo(maxpi); |
532 cerr << "WARNING: No stored tempo for index " << maxpi << endl; | 584 cerr << "WARNING: No stored tempo for index " << maxpi << endl; |
533 } | 585 } |
534 | 586 |
535 sprintf(buffer, "%.1f bpm", feature.values[0]); | 587 sprintf(buffer, "%.1f bpm", feature.values[0]); |
536 feature.label = buffer; | 588 feature.label = buffer; |
537 | 589 |
590 // Return the best tempo in the main output | |
591 | |
538 fs[TempoOutput].push_back(feature); | 592 fs[TempoOutput].push_back(feature); |
593 | |
594 // And return the other estimates (up to the arbitrarily chosen | |
595 // number of 10 of them) in the candidates output | |
539 | 596 |
540 feature.values.clear(); | 597 feature.values.clear(); |
541 feature.label = ""; | 598 feature.label = ""; |
542 | 599 |
543 while (feature.values.size() < 8) { | 600 while (feature.values.size() < 10) { |
544 if (m_t[ci->second] > 0) { | 601 if (m_t[ci->second] > 0) { |
545 feature.values.push_back(m_t[ci->second]); | 602 feature.values.push_back(m_t[ci->second]); |
546 } else { | 603 } else { |
547 feature.values.push_back(lag2tempo(ci->second)); | 604 feature.values.push_back(lag2tempo(ci->second)); |
548 } | 605 } |