comparison examples/FixedTempoEstimator.cpp @ 200:a86e777bf9a6

* tweaks to fixed-tempo estimator
author cannam
date Thu, 09 Oct 2008 15:37:34 +0000
parents 84c4bb209227
children 8e1b1fa94e15
comparison
equal deleted inserted replaced
199:84c4bb209227 200:a86e777bf9a6
49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) : 49 FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) :
50 Plugin(inputSampleRate), 50 Plugin(inputSampleRate),
51 m_stepSize(0), 51 m_stepSize(0),
52 m_blockSize(0), 52 m_blockSize(0),
53 m_priorMagnitudes(0), 53 m_priorMagnitudes(0),
54 m_df(0) 54 m_df(0),
55 m_r(0),
56 m_fr(0),
57 m_n(0)
55 { 58 {
56 } 59 }
57 60
58 FixedTempoEstimator::~FixedTempoEstimator() 61 FixedTempoEstimator::~FixedTempoEstimator()
59 { 62 {
60 delete[] m_priorMagnitudes; 63 delete[] m_priorMagnitudes;
61 delete[] m_df; 64 delete[] m_df;
65 delete[] m_r;
66 delete[] m_fr;
62 } 67 }
63 68
64 string 69 string
65 FixedTempoEstimator::getIdentifier() const 70 FixedTempoEstimator::getIdentifier() const
66 { 71 {
104 } 109 }
105 110
106 size_t 111 size_t
107 FixedTempoEstimator::getPreferredBlockSize() const 112 FixedTempoEstimator::getPreferredBlockSize() const
108 { 113 {
109 return 128; 114 return 64;
110 } 115 }
111 116
112 bool 117 bool
113 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize) 118 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize)
114 { 119 {
150 } 155 }
151 for (size_t i = 0; i < m_dfsize; ++i) { 156 for (size_t i = 0; i < m_dfsize; ++i) {
152 m_df[i] = 0.f; 157 m_df[i] = 0.f;
153 } 158 }
154 159
160 delete[] m_r;
161 m_r = 0;
162
163 delete[] m_fr;
164 m_fr = 0;
165
155 m_n = 0; 166 m_n = 0;
156 167
157 m_start = RealTime::zeroTime; 168 m_start = RealTime::zeroTime;
158 m_lasttime = RealTime::zeroTime; 169 m_lasttime = RealTime::zeroTime;
159 } 170 }
173 184
174 void 185 void
175 FixedTempoEstimator::setParameter(std::string id, float value) 186 FixedTempoEstimator::setParameter(std::string id, float value)
176 { 187 {
177 } 188 }
189
190 static int TempoOutput = 0;
191 static int CandidatesOutput = 1;
192 static int DFOutput = 2;
193 static int ACFOutput = 3;
194 static int FilteredACFOutput = 4;
178 195
179 FixedTempoEstimator::OutputList 196 FixedTempoEstimator::OutputList
180 FixedTempoEstimator::getOutputDescriptors() const 197 FixedTempoEstimator::getOutputDescriptors() const
181 { 198 {
182 OutputList list; 199 OutputList list;
191 d.hasKnownExtents = false; 208 d.hasKnownExtents = false;
192 d.isQuantized = false; 209 d.isQuantized = false;
193 d.sampleType = OutputDescriptor::VariableSampleRate; 210 d.sampleType = OutputDescriptor::VariableSampleRate;
194 d.sampleRate = m_inputSampleRate; 211 d.sampleRate = m_inputSampleRate;
195 d.hasDuration = true; // our returned tempo spans a certain range 212 d.hasDuration = true; // our returned tempo spans a certain range
213 list.push_back(d);
214
215 d.identifier = "candidates";
216 d.name = "Tempo candidates";
217 d.description = "Possible tempo estimates, one per bin with the most likely in the first bin";
218 d.unit = "bpm";
219 d.hasFixedBinCount = false;
196 list.push_back(d); 220 list.push_back(d);
197 221
198 d.identifier = "detectionfunction"; 222 d.identifier = "detectionfunction";
199 d.name = "Detection Function"; 223 d.name = "Detection Function";
200 d.description = "Onset detection function"; 224 d.description = "Onset detection function";
239 << "FixedTempoEstimator has not been initialised" 263 << "FixedTempoEstimator has not been initialised"
240 << endl; 264 << endl;
241 return fs; 265 return fs;
242 } 266 }
243 267
244 if (m_n < m_dfsize) std::cerr << "m_n = " << m_n << std::endl; 268 // if (m_n < m_dfsize) std::cerr << "m_n = " << m_n << std::endl;
245 269
246 if (m_n == 0) m_start = ts; 270 if (m_n == 0) m_start = ts;
247 m_lasttime = ts; 271 m_lasttime = ts;
248 272
249 if (m_n == m_dfsize) { 273 if (m_n == m_dfsize) {
250 fs = calculateFeatures(); 274 calculate();
275 fs = assembleFeatures();
251 ++m_n; 276 ++m_n;
252 return fs; 277 return fs;
253 } 278 }
254 279
255 if (m_n > m_dfsize) return FeatureSet(); 280 if (m_n > m_dfsize) return FeatureSet();
279 FixedTempoEstimator::FeatureSet 304 FixedTempoEstimator::FeatureSet
280 FixedTempoEstimator::getRemainingFeatures() 305 FixedTempoEstimator::getRemainingFeatures()
281 { 306 {
282 FeatureSet fs; 307 FeatureSet fs;
283 if (m_n > m_dfsize) return fs; 308 if (m_n > m_dfsize) return fs;
284 fs = calculateFeatures(); 309 calculate();
310 fs = assembleFeatures();
285 ++m_n; 311 ++m_n;
286 return fs; 312 return fs;
287 } 313 }
288 314
289 float 315 float
290 FixedTempoEstimator::lag2tempo(int lag) 316 FixedTempoEstimator::lag2tempo(int lag)
291 { 317 {
292 return 60.f / ((lag * m_stepSize) / m_inputSampleRate); 318 return 60.f / ((lag * m_stepSize) / m_inputSampleRate);
293 } 319 }
294 320
321 void
322 FixedTempoEstimator::calculate()
323 {
324 std::cerr << "FixedTempoEstimator::calculate: m_n = " << m_n << std::endl;
325
326 if (m_r) {
327 std::cerr << "FixedTempoEstimator::calculate: calculation already happened?" << std::endl;
328 return;
329 }
330
331 if (m_n < m_dfsize / 6) {
332 std::cerr << "FixedTempoEstimator::calculate: Not enough data to go on (have " << m_n << ", want at least " << m_dfsize/4 << ")" << std::endl;
333 return; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?)
334 }
335
336 int n = m_n;
337
338 m_r = new float[n/2];
339 m_fr = new float[n/2];
340
341 for (int i = 0; i < n/2; ++i) {
342 m_r[i] = 0.f;
343 m_fr[i] = 0.f;
344 }
345
346 for (int i = 0; i < n/2; ++i) {
347
348 for (int j = i; j < n-1; ++j) {
349 m_r[i] += m_df[j] * m_df[j - i];
350 }
351
352 m_r[i] /= n - i - 1;
353 }
354
355 for (int i = 1; i < n/2; ++i) {
356
357 m_fr[i] = m_r[i];
358
359 int div = 1;
360
361 int j = i;
362
363 while (j < n/2) {
364 m_fr[i] += m_r[j];
365 j *= 2;
366 ++div;
367 }
368 /*
369 for (int j = 1; j <= (n/2 - 1)/i; ++j) {
370 m_fr[i] += m_r[i * j];
371 ++div;
372 }
373 */
374 std::cerr << "i = " << i << ", (n/2 - 1)/i = " << (n/2 - 1)/i << ", sum = " << m_fr[i] << ", div = " << div << ", val = " << m_fr[i] / div << ", t = " << lag2tempo(i) << std::endl;
375
376
377 // m_fr[i] /= 1 + (n/2 - 1)/i;
378 m_fr[i] /= div;
379 }
380
381 std::cerr << "FixedTempoEstimator::calculate done" << std::endl;
382 }
383
384
295 FixedTempoEstimator::FeatureSet 385 FixedTempoEstimator::FeatureSet
296 FixedTempoEstimator::calculateFeatures() 386 FixedTempoEstimator::assembleFeatures()
297 { 387 {
298 FeatureSet fs; 388 FeatureSet fs;
389 if (!m_r) return fs; // No results
390
299 Feature feature; 391 Feature feature;
300 feature.hasTimestamp = true; 392 feature.hasTimestamp = true;
301 feature.hasDuration = false; 393 feature.hasDuration = false;
302 feature.label = ""; 394 feature.label = "";
303 feature.values.clear(); 395 feature.values.clear();
304 feature.values.push_back(0.f); 396 feature.values.push_back(0.f);
305 397
306 char buffer[20]; 398 char buffer[40];
307 399
308 if (m_n < m_dfsize / 4) return fs; // not enough data (perhaps we should return the duration of the input as the "estimated" beat length?)
309
310 std::cerr << "FixedTempoEstimator::calculateTempo: m_n = " << m_n << std::endl;
311
312 int n = m_n; 400 int n = m_n;
313 float *f = m_df;
314 401
315 for (int i = 0; i < n; ++i) { 402 for (int i = 0; i < n; ++i) {
316 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, 403 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize,
317 m_inputSampleRate); 404 m_inputSampleRate);
318 feature.values[0] = f[i]; 405 feature.values[0] = m_df[i];
319 feature.label = ""; 406 feature.label = "";
320 fs[1].push_back(feature); 407 fs[DFOutput].push_back(feature);
321 }
322
323 float *r = new float[n/2];
324 for (int i = 0; i < n/2; ++i) r[i] = 0.f;
325
326 int minlag = 10;
327
328 for (int i = 0; i < n/2; ++i) {
329 for (int j = i; j < n-1; ++j) {
330 r[i] += f[j] * f[j - i];
331 }
332 r[i] /= n - i - 1;
333 } 408 }
334 409
335 for (int i = 1; i < n/2; ++i) { 410 for (int i = 1; i < n/2; ++i) {
336 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, 411 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize,
337 m_inputSampleRate); 412 m_inputSampleRate);
338 feature.values[0] = r[i]; 413 feature.values[0] = m_r[i];
339 sprintf(buffer, "%.1f bpm", lag2tempo(i)); 414 sprintf(buffer, "%.1f bpm", lag2tempo(i));
340 feature.label = buffer; 415 if (i == n/2-1) feature.label = "";
341 fs[2].push_back(feature); 416 else feature.label = buffer;
342 } 417 fs[ACFOutput].push_back(feature);
343 418 }
344 float max = 0.f;
345 int maxindex = 0;
346
347 std::cerr << "n/2 = " << n/2 << std::endl;
348
349 for (int i = minlag; i < n/2; ++i) {
350
351 if (i == minlag || r[i] > max) {
352 max = r[i];
353 maxindex = i;
354 }
355
356 if (i == 0 || i == n/2-1) continue;
357
358 if (r[i] > r[i-1] && r[i] > r[i+1]) {
359 std::cerr << "peak at " << i << " (value=" << r[i] << ", tempo would be " << lag2tempo(i) << ")" << std::endl;
360 }
361 }
362
363 std::cerr << "overall max at " << maxindex << " (value=" << max << ")" << std::endl;
364
365 float tempo = lag2tempo(maxindex);
366
367 std::cerr << "provisional tempo = " << tempo << std::endl;
368 419
369 float t0 = 60.f; 420 float t0 = 60.f;
370 float t1 = 180.f; 421 float t1 = 180.f;
371 422
372 int p0 = ((60.f / t1) * m_inputSampleRate) / m_stepSize; 423 int p0 = ((60.f / t1) * m_inputSampleRate) / m_stepSize;
373 int p1 = ((60.f / t0) * m_inputSampleRate) / m_stepSize; 424 int p1 = ((60.f / t0) * m_inputSampleRate) / m_stepSize;
374 425
375 std::cerr << "p0 = " << p0 << ", p1 = " << p1 << std::endl; 426 // std::cerr << "p0 = " << p0 << ", p1 = " << p1 << std::endl;
376 427
377 int pc = p1 - p0 + 1; 428 int pc = p1 - p0 + 1;
378 std::cerr << "pc = " << pc << std::endl; 429 // std::cerr << "pc = " << pc << std::endl;
379 // float *filtered = new float[pc]; 430
380 // for (int i = 0; i < pc; ++i) filtered[i] = 0.f; 431 // int maxpi = 0;
381 432 // float maxp = 0.f;
382 int maxpi = 0; 433
383 float maxp = 0.f; 434 std::map<float, int> candidates;
384 435
385 for (int i = p0; i <= p1; ++i) { 436 for (int i = p0; i <= p1 && i < n/2-1; ++i) {
386 437
387 // int fi = i - p0; 438 // Only candidates here are those that were peaks in the
388 439 // original acf
389 float filtered = 0.f; 440 // if (r[i] > r[i-1] && r[i] > r[i+1]) {
390 441 // candidates[filtered] = i;
391 for (int j = 1; j <= (n/2 - 1)/i; ++j) { 442 // }
392 // std::cerr << "j = " << j << ", i = " << i << std::endl; 443
393 filtered += r[i * j]; 444 candidates[m_fr[i]] = i;
394 }
395 filtered /= (n/2 - 1)/i;
396
397 if (i == p0 || filtered > maxp) {
398 maxp = filtered;
399 maxpi = i;
400 }
401 445
402 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize, 446 feature.timestamp = RealTime::frame2RealTime(i * m_stepSize,
403 m_inputSampleRate); 447 m_inputSampleRate);
404 feature.values[0] = filtered; 448 feature.values[0] = m_fr[i];
405 sprintf(buffer, "%.1f bpm", lag2tempo(i)); 449 sprintf(buffer, "%.1f bpm", lag2tempo(i));
406 feature.label = buffer; 450 if (i == p1 || i == n/2-2) feature.label = "";
407 fs[3].push_back(feature); 451 else feature.label = buffer;
408 } 452 fs[FilteredACFOutput].push_back(feature);
409 453 }
410 std::cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << std::endl; 454
411 455 // std::cerr << "maxpi = " << maxpi << " for tempo " << lag2tempo(maxpi) << " (value = " << maxp << ")" << std::endl;
412 tempo = lag2tempo(maxpi); 456
413 457 if (candidates.empty()) {
414 delete[] r; 458 std::cerr << "No tempo candidates!" << std::endl;
459 return fs;
460 }
415 461
416 feature.hasTimestamp = true; 462 feature.hasTimestamp = true;
417 feature.timestamp = m_start; 463 feature.timestamp = m_start;
418 464
419 feature.hasDuration = true; 465 feature.hasDuration = true;
420 feature.duration = m_lasttime - m_start; 466 feature.duration = m_lasttime - m_start;
421 467
422 feature.values[0] = tempo; 468 std::map<float, int>::const_iterator ci = candidates.end();
423 469 --ci;
424 sprintf(buffer, "%.1f bpm", tempo); 470 int maxpi = ci->second;
471
472 feature.values[0] = lag2tempo(maxpi);
473
474 sprintf(buffer, "%.1f bpm", lag2tempo(maxpi));
425 feature.label = buffer; 475 feature.label = buffer;
426 476
427 fs[0].push_back(feature); 477 fs[TempoOutput].push_back(feature);
428 478
479 feature.values.clear();
480 feature.label = "";
481
482 while (feature.values.size() < 8) {
483 feature.values.push_back(lag2tempo(ci->second));
484 if (ci == candidates.begin()) break;
485 --ci;
486 }
487
488 fs[CandidatesOutput].push_back(feature);
489
429 return fs; 490 return fs;
430 } 491 }