annotate examples/FixedTempoEstimator.cpp @ 215:a5a54b60e82e

* More tweaks to fixed-tempo estimator
author cannam
date Fri, 24 Oct 2008 16:10:43 +0000
parents 87b131a54b0a
children 991d2ae87980
rev   line source
cannam@198 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@198 2
cannam@198 3 /*
cannam@198 4 Vamp
cannam@198 5
cannam@198 6 An API for audio analysis and feature extraction plugins.
cannam@198 7
cannam@198 8 Centre for Digital Music, Queen Mary, University of London.
cannam@198 9 Copyright 2006-2008 Chris Cannam and QMUL.
cannam@198 10
cannam@198 11 Permission is hereby granted, free of charge, to any person
cannam@198 12 obtaining a copy of this software and associated documentation
cannam@198 13 files (the "Software"), to deal in the Software without
cannam@198 14 restriction, including without limitation the rights to use, copy,
cannam@198 15 modify, merge, publish, distribute, sublicense, and/or sell copies
cannam@198 16 of the Software, and to permit persons to whom the Software is
cannam@198 17 furnished to do so, subject to the following conditions:
cannam@198 18
cannam@198 19 The above copyright notice and this permission notice shall be
cannam@198 20 included in all copies or substantial portions of the Software.
cannam@198 21
cannam@198 22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
cannam@198 23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
cannam@198 24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
cannam@198 25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
cannam@198 26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
cannam@198 27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
cannam@198 28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
cannam@198 29
cannam@198 30 Except as contained in this notice, the names of the Centre for
cannam@198 31 Digital Music; Queen Mary, University of London; and Chris Cannam
cannam@198 32 shall not be used in advertising or otherwise to promote the sale,
cannam@198 33 use or other dealings in this Software without prior written
cannam@198 34 authorization.
cannam@198 35 */
cannam@198 36
cannam@198 37 #include "FixedTempoEstimator.h"
cannam@198 38
cannam@198 39 using std::string;
cannam@198 40 using std::vector;
cannam@198 41 using std::cerr;
cannam@198 42 using std::endl;
cannam@198 43
cannam@198 44 using Vamp::RealTime;
cannam@198 45
cannam@198 46 #include <cmath>
cannam@198 47
cannam@198 48
cannam@198 49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) :
cannam@198 50 Plugin(inputSampleRate),
cannam@198 51 m_stepSize(0),
cannam@198 52 m_blockSize(0),
cannam@198 53 m_priorMagnitudes(0),
cannam@200 54 m_df(0),
cannam@200 55 m_r(0),
cannam@200 56 m_fr(0),
cannam@204 57 m_t(0),
cannam@200 58 m_n(0)
cannam@198 59 {
cannam@198 60 }
cannam@198 61
cannam@198 62 FixedTempoEstimator::~FixedTempoEstimator()
cannam@198 63 {
cannam@198 64 delete[] m_priorMagnitudes;
cannam@198 65 delete[] m_df;
cannam@200 66 delete[] m_r;
cannam@200 67 delete[] m_fr;
cannam@204 68 delete[] m_t;
cannam@198 69 }
cannam@198 70
cannam@198 71 string
cannam@198 72 FixedTempoEstimator::getIdentifier() const
cannam@198 73 {
cannam@198 74 return "fixedtempo";
cannam@198 75 }
cannam@198 76
cannam@198 77 string
cannam@198 78 FixedTempoEstimator::getName() const
cannam@198 79 {
cannam@198 80 return "Simple Fixed Tempo Estimator";
cannam@198 81 }
cannam@198 82
cannam@198 83 string
cannam@198 84 FixedTempoEstimator::getDescription() const
cannam@198 85 {
cannam@198 86 return "Study a short section of audio and estimate its tempo, assuming the tempo is constant";
cannam@198 87 }
cannam@198 88
cannam@198 89 string
cannam@198 90 FixedTempoEstimator::getMaker() const
cannam@198 91 {
cannam@198 92 return "Vamp SDK Example Plugins";
cannam@198 93 }
cannam@198 94
cannam@198 95 int
cannam@198 96 FixedTempoEstimator::getPluginVersion() const
cannam@198 97 {
cannam@198 98 return 1;
cannam@198 99 }
cannam@198 100
cannam@198 101 string
cannam@198 102 FixedTempoEstimator::getCopyright() const
cannam@198 103 {
cannam@198 104 return "Code copyright 2008 Queen Mary, University of London. Freely redistributable (BSD license)";
cannam@198 105 }
cannam@198 106
cannam@198 107 size_t
cannam@198 108 FixedTempoEstimator::getPreferredStepSize() const
cannam@198 109 {
cannam@207 110 return 64;
cannam@198 111 }
cannam@198 112
cannam@198 113 size_t
cannam@198 114 FixedTempoEstimator::getPreferredBlockSize() const
cannam@198 115 {
cannam@207 116 return 256;
cannam@198 117 }
cannam@198 118
cannam@198 119 bool
cannam@198 120 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize)
cannam@198 121 {
cannam@198 122 if (channels < getMinChannelCount() ||
cannam@198 123 channels > getMaxChannelCount()) return false;
cannam@198 124
cannam@198 125 m_stepSize = stepSize;
cannam@198 126 m_blockSize = blockSize;
cannam@198 127
cannam@209 128 float dfLengthSecs = 10.f;
cannam@198 129 m_dfsize = (dfLengthSecs * m_inputSampleRate) / m_stepSize;
cannam@198 130
cannam@198 131 m_priorMagnitudes = new float[m_blockSize/2];
cannam@198 132 m_df = new float[m_dfsize];
cannam@198 133
cannam@198 134 for (size_t i = 0; i < m_blockSize/2; ++i) {
cannam@198 135 m_priorMagnitudes[i] = 0.f;
cannam@198 136 }
cannam@198 137 for (size_t i = 0; i < m_dfsize; ++i) {
cannam@198 138 m_df[i] = 0.f;
cannam@198 139 }
cannam@198 140
cannam@198 141 m_n = 0;
cannam@198 142
cannam@198 143 return true;
cannam@198 144 }
cannam@198 145
cannam@198 146 void
cannam@198 147 FixedTempoEstimator::reset()
cannam@198 148 {
cannam@207 149 cerr << "FixedTempoEstimator: reset called" << endl;
cannam@198 150
cannam@198 151 if (!m_priorMagnitudes) return;
cannam@198 152
cannam@207 153 cerr << "FixedTempoEstimator: resetting" << endl;
cannam@198 154
cannam@198 155 for (size_t i = 0; i < m_blockSize/2; ++i) {
cannam@198 156 m_priorMagnitudes[i] = 0.f;
cannam@198 157 }
cannam@198 158 for (size_t i = 0; i < m_dfsize; ++i) {
cannam@198 159 m_df[i] = 0.f;
cannam@198 160 }
cannam@198 161
cannam@200 162 delete[] m_r;
cannam@200 163 m_r = 0;
cannam@200 164
cannam@200 165 delete[] m_fr;
cannam@200 166 m_fr = 0;
cannam@200 167
cannam@204 168 delete[] m_t;
cannam@204 169 m_t = 0;
cannam@204 170
cannam@198 171 m_n = 0;
cannam@198 172
cannam@198 173 m_start = RealTime::zeroTime;
cannam@198 174 m_lasttime = RealTime::zeroTime;
cannam@198 175 }
cannam@198 176
cannam@198 177 FixedTempoEstimator::ParameterList
cannam@198 178 FixedTempoEstimator::getParameterDescriptors() const
cannam@198 179 {
cannam@198 180 ParameterList list;
cannam@198 181 return list;
cannam@198 182 }
cannam@198 183
cannam@198 184 float
cannam@198 185 FixedTempoEstimator::getParameter(std::string id) const
cannam@198 186 {
cannam@198 187 return 0.f;
cannam@198 188 }
cannam@198 189
cannam@198 190 void
cannam@198 191 FixedTempoEstimator::setParameter(std::string id, float value)
cannam@198 192 {
cannam@198 193 }
cannam@198 194
cannam@200 195 static int TempoOutput = 0;
cannam@200 196 static int CandidatesOutput = 1;
cannam@200 197 static int DFOutput = 2;
cannam@200 198 static int ACFOutput = 3;
cannam@200 199 static int FilteredACFOutput = 4;
cannam@200 200
cannam@198 201 FixedTempoEstimator::OutputList
cannam@198 202 FixedTempoEstimator::getOutputDescriptors() const
cannam@198 203 {
cannam@198 204 OutputList list;
cannam@198 205
cannam@198 206 OutputDescriptor d;
cannam@198 207 d.identifier = "tempo";
cannam@198 208 d.name = "Tempo";
cannam@198 209 d.description = "Estimated tempo";
cannam@198 210 d.unit = "bpm";
cannam@198 211 d.hasFixedBinCount = true;
cannam@198 212 d.binCount = 1;
cannam@198 213 d.hasKnownExtents = false;
cannam@198 214 d.isQuantized = false;
cannam@198 215 d.sampleType = OutputDescriptor::VariableSampleRate;
cannam@198 216 d.sampleRate = m_inputSampleRate;
cannam@198 217 d.hasDuration = true; // our returned tempo spans a certain range
cannam@198 218 list.push_back(d);
cannam@198 219
cannam@200 220 d.identifier = "candidates";
cannam@200 221 d.name = "Tempo candidates";
cannam@200 222 d.description = "Possible tempo estimates, one per bin with the most likely in the first bin";
cannam@200 223 d.unit = "bpm";
cannam@200 224 d.hasFixedBinCount = false;
cannam@200 225 list.push_back(d);
cannam@200 226
cannam@198 227 d.identifier = "detectionfunction";
cannam@198 228 d.name = "Detection Function";
cannam@198 229 d.description = "Onset detection function";
cannam@198 230 d.unit = "";
cannam@198 231 d.hasFixedBinCount = 1;
cannam@198 232 d.binCount = 1;
cannam@198 233 d.hasKnownExtents = true;
cannam@198 234 d.minValue = 0.0;
cannam@198 235 d.maxValue = 1.0;
cannam@198 236 d.isQuantized = false;
cannam@198 237 d.quantizeStep = 0.0;
cannam@198 238 d.sampleType = OutputDescriptor::FixedSampleRate;
cannam@198 239 if (m_stepSize) {
cannam@198 240 d.sampleRate = m_inputSampleRate / m_stepSize;
cannam@198 241 } else {
cannam@198 242 d.sampleRate = m_inputSampleRate / (getPreferredBlockSize()/2);
cannam@198 243 }
cannam@198 244 d.hasDuration = false;
cannam@198 245 list.push_back(d);
cannam@198 246
cannam@198 247 d.identifier = "acf";
cannam@198 248 d.name = "Autocorrelation Function";
cannam@198 249 d.description = "Autocorrelation of onset detection function";
cannam@198 250 d.hasKnownExtents = false;
cannam@201 251 d.unit = "r";
cannam@198 252 list.push_back(d);
cannam@198 253
cannam@198 254 d.identifier = "filtered_acf";
cannam@198 255 d.name = "Filtered Autocorrelation";
cannam@198 256 d.description = "Filtered autocorrelation of onset detection function";
cannam@201 257 d.unit = "r";
cannam@198 258 list.push_back(d);
cannam@198 259
cannam@198 260 return list;
cannam@198 261 }
cannam@198 262
cannam@198 263 FixedTempoEstimator::FeatureSet
cannam@198 264 FixedTempoEstimator::process(const float *const *inputBuffers, RealTime ts)
cannam@198 265 {
cannam@198 266 FeatureSet fs;
cannam@198 267
cannam@198 268 if (m_stepSize == 0) {
cannam@198 269 cerr << "ERROR: FixedTempoEstimator::process: "
cannam@198 270 << "FixedTempoEstimator has not been initialised"
cannam@198 271 << endl;
cannam@198 272 return fs;
cannam@198 273 }
cannam@198 274
cannam@207 275 // if (m_n < m_dfsize) cerr << "m_n = " << m_n << endl;
cannam@198 276
cannam@198 277 if (m_n == 0) m_start = ts;
cannam@198 278 m_lasttime = ts;
cannam@198 279
cannam@198 280 if (m_n == m_dfsize) {
cannam@200 281 calculate();
cannam@200 282 fs = assembleFeatures();
cannam@198 283 ++m_n;
cannam@198 284 return fs;
cannam@198 285 }
cannam@198 286
cannam@198 287 if (m_n > m_dfsize) return FeatureSet();
cannam@198 288
cannam@207 289 float value = 0.f;
cannam@207 290
cannam@198 291 for (size_t i = 1; i < m_blockSize/2; ++i) {
cannam@198 292
cannam@198 293 float real = inputBuffers[0][i*2];
cannam@198 294 float imag = inputBuffers[0][i*2 + 1];
cannam@198 295
cannam@198 296 float sqrmag = real * real + imag * imag;
cannam@207 297 value += fabsf(sqrmag - m_priorMagnitudes[i]);
cannam@198 298
cannam@198 299 m_priorMagnitudes[i] = sqrmag;
cannam@198 300 }
cannam@198 301
cannam@207 302 m_df[m_n] = value;
cannam@207 303
cannam@198 304 ++m_n;
cannam@198 305 return fs;
cannam@198 306 }
cannam@198 307
cannam@198 308 FixedTempoEstimator::FeatureSet
cannam@198 309 FixedTempoEstimator::getRemainingFeatures()
cannam@198 310 {
cannam@198 311 FeatureSet fs;
cannam@198 312 if (m_n > m_dfsize) return fs;
cannam@200 313 calculate();
cannam@200 314 fs = assembleFeatures();
cannam@198 315 ++m_n;
cannam@198 316 return fs;
cannam@198 317 }
cannam@198 318
cannam@198 319 float
cannam@199 320 FixedTempoEstimator::lag2tempo(int lag)
cannam@199 321 {
cannam@198 322 return 60.f / ((lag * m_stepSize) / m_inputSampleRate);
cannam@198 323 }
cannam@198 324
cannam@207 325 int
cannam@207 326 FixedTempoEstimator::tempo2lag(float tempo)
cannam@207 327 {
cannam@207 328 return ((60.f / tempo) * m_inputSampleRate) / m_stepSize;
cannam@207 329 }
cannam@207 330
cannam@200 331 void
cannam@200 332 FixedTempoEstimator::calculate()
cannam@200 333 {
cannam@207 334 cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << endl;
cannam@200 335
cannam@200 336 if (m_r) {
cannam@207 337 cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl;
cannam@200 338 return;
cannam@200 339 }
cannam@200 340
cannam@209 341 if (m_n < m_dfsize / 9) {
cannam@207 342 cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << endl;
cannam@200 343 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?)
cannam@200 344 }
cannam@200 345
cannam@200 346 int n = m_n;
cannam@200 347
cannam@200 348 m_r = new float[n/2];
cannam@200 349 m_fr = new float[n/2];
cannam@204 350 m_t = new float[n/2];
cannam@200 351
cannam@200 352 for (int i = 0; i < n/2; ++i) {
cannam@200 353 m_r[i] = 0.f;
cannam@200 354 m_fr[i] = 0.f;
cannam@207 355 m_t[i] = lag2tempo(i);
cannam@200 356 }
cannam@200 357
cannam@200 358 for (int i = 0; i < n/2; ++i) {
cannam@200 359
cannam@200 360 for (int j = i; j < n-1; ++j) {
cannam@200 361 m_r[i] += m_df[j] * m_df[j - i];
cannam@200 362 }
cannam@200 363
cannam@200 364 m_r[i] /= n - i - 1;
cannam@200 365 }
cannam@200 366
cannam@215 367 float related[] = { 0.5, 2, 3, 4 };
cannam@208 368
cannam@209 369 for (int i = 1; i < n/2-1; ++i) {
cannam@204 370
cannam@209 371 float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
cannam@209 372 if (weight < 0.f) weight = 0.f;
cannam@215 373 weight = weight * weight * weight;
cannam@209 374
cannam@209 375 m_fr[i] = m_r[i];
cannam@204 376
cannam@200 377 int div = 1;
cannam@200 378
cannam@215 379 for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) {
cannam@204 380
cannam@215 381 int k0 = int(i * related[j] + 0.5);
cannam@209 382
cannam@215 383 if (k0 >= 0 && k0 < int(n/2)) {
cannam@204 384
cannam@207 385 int kmax = 0, kmin = 0;
cannam@207 386 float kvmax = 0, kvmin = 0;
cannam@209 387 bool have = false;
cannam@204 388
cannam@209 389 for (int k = k0 - 1; k <= k0 + 1; ++k) {
cannam@204 390
cannam@209 391 if (k < 0 || k >= n/2) continue;
cannam@209 392
cannam@215 393 if (!have || (m_r[k] > kvmax)) { kmax = k; kvmax = m_r[k]; }
cannam@215 394 if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; }
cannam@209 395
cannam@209 396 have = true;
cannam@204 397 }
cannam@209 398
cannam@215 399 m_fr[i] += m_r[kmax] / 5;
cannam@209 400
cannam@209 401 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) &&
cannam@209 402 (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) &&
cannam@207 403 kvmax > kvmin * 1.05) {
cannam@209 404
cannam@207 405 m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
cannam@207 406 ++div;
cannam@207 407 }
cannam@204 408 }
cannam@204 409 }
cannam@209 410
cannam@204 411 m_t[i] /= div;
cannam@204 412
cannam@215 413 // if (div > 1) {
cannam@215 414 // cerr << "adjusting tempo from " << lag2tempo(i) << " to "
cannam@215 415 // << m_t[i] << " for fr = " << m_fr[i] << " (div = " << div << ")" << endl;
cannam@215 416 // }
cannam@209 417
cannam@215 418 m_fr[i] += m_fr[i] * (weight / 3);
cannam@207 419 }
cannam@200 420 }
cannam@200 421
cannam@200 422
cannam@198 423 FixedTempoEstimator::FeatureSet
cannam@200 424 FixedTempoEstimator::assembleFeatures()
cannam@198 425 {
cannam@198 426 FeatureSet fs;
cannam@200 427 if (!m_r) return fs; // No results
cannam@200 428
cannam@198 429 Feature feature;
cannam@198 430 feature.hasTimestamp = true;
cannam@198 431 feature.hasDuration = false;
cannam@198 432 feature.label = "";
cannam@198 433 feature.values.clear();
cannam@198 434 feature.values.push_back(0.f);
cannam@198 435
cannam@200 436 char buffer[40];
cannam@198 437
cannam@198 438 int n = m_n;
cannam@198 439
cannam@198 440 for (int i = 0; i < n; ++i) {
cannam@208 441 feature.timestamp = m_start +
cannam@208 442 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
cannam@200 443 feature.values[0] = m_df[i];
cannam@198 444 feature.label = "";
cannam@200 445 fs[DFOutput].push_back(feature);
cannam@198 446 }
cannam@198 447
cannam@199 448 for (int i = 1; i < n/2; ++i) {
cannam@208 449 feature.timestamp = m_start +
cannam@208 450 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
cannam@200 451 feature.values[0] = m_r[i];
cannam@199 452 sprintf(buffer, "%.1f bpm", lag2tempo(i));
cannam@200 453 if (i == n/2-1) feature.label = "";
cannam@200 454 else feature.label = buffer;
cannam@200 455 fs[ACFOutput].push_back(feature);
cannam@198 456 }
cannam@198 457
cannam@215 458 float t0 = 50.f; // our minimum detected tempo (could be a parameter)
cannam@215 459 float t1 = 190.f; // our maximum detected tempo
cannam@198 460
cannam@207 461 int p0 = tempo2lag(t1);
cannam@207 462 int p1 = tempo2lag(t0);
cannam@198 463
cannam@200 464 std::map<float, int> candidates;
cannam@198 465
cannam@200 466 for (int i = p0; i <= p1 && i < n/2-1; ++i) {
cannam@198 467
cannam@209 468 if (m_fr[i] > m_fr[i-1] &&
cannam@209 469 m_fr[i] > m_fr[i+1]) {
cannam@209 470 candidates[m_fr[i]] = i;
cannam@209 471 }
cannam@198 472
cannam@208 473 feature.timestamp = m_start +
cannam@208 474 RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
cannam@200 475 feature.values[0] = m_fr[i];
cannam@199 476 sprintf(buffer, "%.1f bpm", lag2tempo(i));
cannam@200 477 if (i == p1 || i == n/2-2) feature.label = "";
cannam@200 478 else feature.label = buffer;
cannam@200 479 fs[FilteredACFOutput].push_back(feature);
cannam@198 480 }
cannam@198 481
cannam@207 482 // cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << endl;
cannam@198 483
cannam@200 484 if (candidates.empty()) {
cannam@207 485 cerr << "No tempo candidates!" << endl;
cannam@200 486 return fs;
cannam@200 487 }
cannam@198 488
cannam@198 489 feature.hasTimestamp = true;
cannam@198 490 feature.timestamp = m_start;
cannam@198 491
cannam@198 492 feature.hasDuration = true;
cannam@198 493 feature.duration = m_lasttime - m_start;
cannam@198 494
cannam@200 495 std::map<float, int>::const_iterator ci = candidates.end();
cannam@200 496 --ci;
cannam@200 497 int maxpi = ci->second;
cannam@198 498
cannam@204 499 if (m_t[maxpi] > 0) {
cannam@207 500 cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl;
cannam@204 501 feature.values[0] = m_t[maxpi];
cannam@204 502 } else {
cannam@204 503 // shouldn't happen -- it would imply that this high value was not a peak!
cannam@204 504 feature.values[0] = lag2tempo(maxpi);
cannam@207 505 cerr << "WARNING: No stored tempo for index " << maxpi << endl;
cannam@204 506 }
cannam@204 507
cannam@204 508 sprintf(buffer, "%.1f bpm", feature.values[0]);
cannam@199 509 feature.label = buffer;
cannam@199 510
cannam@200 511 fs[TempoOutput].push_back(feature);
cannam@198 512
cannam@200 513 feature.values.clear();
cannam@200 514 feature.label = "";
cannam@200 515
cannam@200 516 while (feature.values.size() < 8) {
cannam@213 517 // cerr << "adding tempo value from lag " << ci->second << endl;
cannam@207 518 if (m_t[ci->second] > 0) {
cannam@207 519 feature.values.push_back(m_t[ci->second]);
cannam@207 520 } else {
cannam@207 521 feature.values.push_back(lag2tempo(ci->second));
cannam@207 522 }
cannam@200 523 if (ci == candidates.begin()) break;
cannam@200 524 --ci;
cannam@200 525 }
cannam@200 526
cannam@200 527 fs[CandidatesOutput].push_back(feature);
cannam@200 528
cannam@198 529 return fs;
cannam@198 530 }