annotate dsp/tempotracking/TempoTrackV2.cpp @ 277:09bceb0aeff6

* Add Matthew's newer beat tracking implementation
author Chris Cannam <c.cannam@qmul.ac.uk>
date Tue, 20 Jan 2009 15:01:01 +0000
parents
children 796170a9c8e4
rev   line source
c@277 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@277 2
c@277 3 /*
c@277 4 QM DSP Library
c@277 5
c@277 6 Centre for Digital Music, Queen Mary, University of London.
c@277 7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@277 8 All rights reserved.
c@277 9 */
c@277 10
c@277 11 #include "TempoTrackV2.h"
c@277 12
c@277 13 #include <cmath>
c@277 14 #include <cstdlib>
c@277 15
c@277 16
c@277 17 //#define FRAMESIZE 512
c@277 18 //#define BIGFRAMESIZE 1024
c@277 19 #define TWOPI 6.283185307179586232
c@277 20 #define EPS 0.0000008 // just some arbitrary small number
c@277 21
c@277 22 TempoTrackV2::TempoTrackV2() { }
c@277 23 TempoTrackV2::~TempoTrackV2() { }
c@277 24
c@277 25 void
c@277 26 TempoTrackV2::adapt_thresh(d_vec_t &df)
c@277 27 {
c@277 28
c@277 29 d_vec_t smoothed(df.size());
c@277 30
c@277 31 int p_post = 7;
c@277 32 int p_pre = 8;
c@277 33
c@277 34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays
c@277 35
c@277 36 // find threshold for first 't' samples, where a full average cannot be computed yet
c@277 37 for (int i = 0;i <= t;i++)
c@277 38 {
c@277 39 int k = std::min((i+p_pre),static_cast<int>(df.size()));
c@277 40 smoothed[i] = mean_array(df,1,k);
c@277 41 }
c@277 42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post]
c@277 43 for (uint i = t+1;i < df.size()-p_post;i++)
c@277 44 {
c@277 45 smoothed[i] = mean_array(df,i-p_pre,i+p_post);
c@277 46 }
c@277 47 // for last few samples calculate threshold, again, not enough samples to do as above
c@277 48 for (uint i = df.size()-p_post;i < df.size();i++)
c@277 49 {
c@277 50 int k = std::max((static_cast<int> (i) -p_post),1);
c@277 51 smoothed[i] = mean_array(df,k,df.size());
c@277 52 }
c@277 53
c@277 54 // subtract the threshold from the detection function and check that it is not less than 0
c@277 55 for (uint i = 0;i < df.size();i++)
c@277 56 {
c@277 57 df[i] -= smoothed[i];
c@277 58 if (df[i] < 0)
c@277 59 {
c@277 60 df[i] = 0;
c@277 61 }
c@277 62 }
c@277 63 }
c@277 64
c@277 65 double
c@277 66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end)
c@277 67 {
c@277 68
c@277 69 double sum = 0.;
c@277 70
c@277 71 // find sum
c@277 72 for (int i = start;i < end+1;i++)
c@277 73 {
c@277 74 sum += dfin[i];
c@277 75 }
c@277 76
c@277 77 return static_cast<double> (sum / (end - start + 1) ); // average and return
c@277 78 }
c@277 79
c@277 80 void
c@277 81 TempoTrackV2::filter_df(d_vec_t &df)
c@277 82 {
c@277 83
c@277 84
c@277 85 d_vec_t a(3);
c@277 86 d_vec_t b(3);
c@277 87 d_vec_t lp_df(df.size());
c@277 88
c@277 89 //equivalent in matlab to [b,a] = butter(2,0.4);
c@277 90 a[0] = 1.0000;
c@277 91 a[1] = -0.3695;
c@277 92 a[2] = 0.1958;
c@277 93 b[0] = 0.2066;
c@277 94 b[1] = 0.4131;
c@277 95 b[2] = 0.2066;
c@277 96
c@277 97 double inp1 = 0.;
c@277 98 double inp2 = 0.;
c@277 99 double out1 = 0.;
c@277 100 double out2 = 0.;
c@277 101
c@277 102
c@277 103 // forwards filtering
c@277 104 for (uint i = 0;i < df.size();i++)
c@277 105 {
c@277 106 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@277 107 inp2 = inp1;
c@277 108 inp1 = df[i];
c@277 109 out2 = out1;
c@277 110 out1 = lp_df[i];
c@277 111 }
c@277 112
c@277 113
c@277 114 // copy forwards filtering to df...
c@277 115 // but, time-reversed, ready for backwards filtering
c@277 116 for (uint i = 0;i < df.size();i++)
c@277 117 {
c@277 118 df[i] = lp_df[df.size()-i];
c@277 119 }
c@277 120
c@277 121 for (uint i = 0;i < df.size();i++)
c@277 122 {
c@277 123 lp_df[i] = 0.;
c@277 124 }
c@277 125
c@277 126 inp1 = 0.; inp2 = 0.;
c@277 127 out1 = 0.; out2 = 0.;
c@277 128
c@277 129 // backwards filetering on time-reversed df
c@277 130 for (uint i = 0;i < df.size();i++)
c@277 131 {
c@277 132 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@277 133 inp2 = inp1;
c@277 134 inp1 = df[i];
c@277 135 out2 = out1;
c@277 136 out1 = lp_df[i];
c@277 137 }
c@277 138
c@277 139 // write the re-reversed (i.e. forward) version back to df
c@277 140 for (uint i = 0;i < df.size();i++)
c@277 141 {
c@277 142 df[i] = lp_df[df.size()-i];
c@277 143 }
c@277 144
c@277 145
c@277 146 }
c@277 147
c@277 148
c@277 149 void
c@277 150 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period)
c@277 151 {
c@277 152
c@277 153 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@277 154 // calculate the acf,
c@277 155 // then the rcf.. and then stick the rcfs as columns of a matrix
c@277 156 // then call viterbi decoding with weight vector and transition matrix
c@277 157 // and get best path
c@277 158
c@277 159 uint wv_len = 128;
c@277 160 double rayparam = 43.;
c@277 161
c@277 162 // make rayleigh weighting curve
c@277 163 d_vec_t wv(wv_len);
c@277 164 for (uint i=0; i<wv.size(); i++)
c@277 165 {
c@277 166 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
c@277 167 }
c@277 168
c@277 169
c@277 170 uint winlen = 512;
c@277 171 uint step = 128;
c@277 172
c@277 173 d_mat_t rcfmat;
c@277 174 int col_counter = -1;
c@277 175 // main loop for beat period calculation
c@277 176 for (uint i=0; i<(df.size()-winlen); i+=step)
c@277 177 {
c@277 178 // get dfframe
c@277 179 d_vec_t dfframe(winlen);
c@277 180 for (uint k=0; k<winlen; k++)
c@277 181 {
c@277 182 dfframe[k] = df[i+k];
c@277 183 }
c@277 184 // get rcf vector for current frame
c@277 185 d_vec_t rcf(wv_len);
c@277 186 get_rcf(dfframe,wv,rcf);
c@277 187
c@277 188 rcfmat.push_back( d_vec_t() ); // adds a new column
c@277 189 col_counter++;
c@277 190 for (uint j=0; j<rcf.size(); j++)
c@277 191 {
c@277 192 rcfmat[col_counter].push_back( rcf[j] );
c@277 193 }
c@277 194
c@277 195 }
c@277 196
c@277 197 // now call viterbi decoding function
c@277 198 viterbi_decode(rcfmat,wv,beat_period);
c@277 199
c@277 200
c@277 201
c@277 202 }
c@277 203
c@277 204
c@277 205 void
c@277 206 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277 207 {
c@277 208 // calculate autocorrelation function
c@277 209 // then rcf
c@277 210 // just hard code for now... don't really need separate functions to do this
c@277 211
c@277 212 // make acf
c@277 213
c@277 214 d_vec_t dfframe(dfframe_in);
c@277 215
c@277 216 adapt_thresh(dfframe);
c@277 217
c@277 218 d_vec_t acf(dfframe.size());
c@277 219
c@277 220
c@277 221 for (uint lag=0; lag<dfframe.size(); lag++)
c@277 222 {
c@277 223
c@277 224 double sum = 0.;
c@277 225 double tmp = 0.;
c@277 226
c@277 227 for (uint n=0; n<(dfframe.size()-lag); n++)
c@277 228 {
c@277 229 tmp = dfframe[n] * dfframe[n+lag];
c@277 230 sum += tmp;
c@277 231 }
c@277 232 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@277 233 }
c@277 234
c@277 235
c@277 236 // for (uint i=0; i<dfframe.size(); i++)
c@277 237 // {
c@277 238 // cout << dfframe[i] << " " << acf[i] << endl;
c@277 239 // }
c@277 240
c@277 241 // cout << "~~~~~~~~~~~~~~" << endl;
c@277 242
c@277 243
c@277 244
c@277 245
c@277 246
c@277 247 // now apply comb filtering
c@277 248 int numelem = 4;
c@277 249
c@277 250 // for (uint i = 1;i < 118;i++) // max beat period
c@277 251 for (uint i = 2;i < rcf.size();i++) // max beat period
c@277 252 {
c@277 253 for (int a = 1;a <= numelem;a++) // number of comb elements
c@277 254 {
c@277 255 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@277 256 {
c@277 257 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@277 258 }
c@277 259 }
c@277 260 }
c@277 261
c@277 262 // apply adaptive threshold to rcf
c@277 263 adapt_thresh(rcf);
c@277 264
c@277 265 double rcfsum =0.;
c@277 266 for (uint i=0; i<rcf.size(); i++)
c@277 267 {
c@277 268 // rcf[i] *= acf[i];
c@277 269 rcf[i] += EPS ;
c@277 270 rcfsum += rcf[i];
c@277 271 }
c@277 272
c@277 273 // normalise rcf to sum to unity
c@277 274 for (uint i=0; i<rcf.size(); i++)
c@277 275 {
c@277 276 rcf[i] /= (rcfsum + EPS);
c@277 277 }
c@277 278
c@277 279
c@277 280
c@277 281 }
c@277 282
c@277 283 void
c@277 284 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period)
c@277 285 {
c@277 286
c@277 287 // make transition matrix
c@277 288 d_mat_t tmat;
c@277 289 for (uint i=0;i<wv.size();i++)
c@277 290 {
c@277 291 tmat.push_back ( d_vec_t() ); // adds a new column
c@277 292 for (uint j=0; j<wv.size(); j++)
c@277 293 {
c@277 294 tmat[i].push_back(0.); // fill with zeros initially
c@277 295 }
c@277 296 }
c@277 297
c@277 298 double sigma = 8.;
c@277 299 for (uint i=20;i <wv.size()-20; i++)
c@277 300 {
c@277 301 for (uint j=20; j<wv.size()-20; j++)
c@277 302 {
c@277 303 double mu = static_cast<double>(i);
c@277 304 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
c@277 305 }
c@277 306 }
c@277 307
c@277 308 d_mat_t delta;
c@277 309 i_mat_t psi;
c@277 310 for (uint i=0;i <rcfmat.size(); i++)
c@277 311 {
c@277 312 delta.push_back( d_vec_t());
c@277 313 psi.push_back( i_vec_t());
c@277 314 for (uint j=0; j<rcfmat[i].size(); j++)
c@277 315 {
c@277 316 delta[i].push_back(0.); // fill with zeros initially
c@277 317 psi[i].push_back(0); // fill with zeros initially
c@277 318 }
c@277 319 }
c@277 320
c@277 321
c@277 322 uint T = delta.size();
c@277 323 uint Q = delta[0].size();
c@277 324
c@277 325 // initialize first column of delta
c@277 326 for (uint j=0; j<Q; j++)
c@277 327 {
c@277 328 delta[0][j] = wv[j] * rcfmat[0][j];
c@277 329 psi[0][j] = 0;
c@277 330 }
c@277 331
c@277 332 double deltasum = 0.;
c@277 333 for (uint i=0; i<Q; i++)
c@277 334 {
c@277 335 deltasum += delta[0][i];
c@277 336 }
c@277 337 for (uint i=0; i<Q; i++)
c@277 338 {
c@277 339 delta[0][i] /= (deltasum + EPS);
c@277 340 }
c@277 341
c@277 342
c@277 343
c@277 344 for (uint t=1; t<T; t++)
c@277 345 {
c@277 346 d_vec_t tmp_vec(Q);
c@277 347
c@277 348 for (uint j=0; j<Q; j++)
c@277 349 {
c@277 350
c@277 351 for (uint i=0; i<Q; i++)
c@277 352 {
c@277 353 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@277 354 }
c@277 355
c@277 356 delta[t][j] = get_max_val(tmp_vec);
c@277 357
c@277 358 psi[t][j] = get_max_ind(tmp_vec);
c@277 359
c@277 360 delta[t][j] *= rcfmat[t][j];
c@277 361
c@277 362
c@277 363 }
c@277 364
c@277 365 double deltasum = 0.;
c@277 366 for (uint i=0; i<Q; i++)
c@277 367 {
c@277 368 deltasum += delta[t][i];
c@277 369 }
c@277 370 for (uint i=0; i<Q; i++)
c@277 371 {
c@277 372 delta[t][i] /= (deltasum + EPS);
c@277 373 }
c@277 374
c@277 375
c@277 376
c@277 377
c@277 378 }
c@277 379
c@277 380
c@277 381 // ofstream tmatfile;
c@277 382 // tmatfile.open("/home/matthewd/Desktop/tmat.txt");
c@277 383
c@277 384 // for (uint i=0;i <delta.size(); i++)
c@277 385 // {
c@277 386 // for (uint j=0; j<delta[i].size(); j++)
c@277 387 // {
c@277 388 // tmatfile << rcfmat[i][j] << endl;
c@277 389 // }
c@277 390 // }
c@277 391
c@277 392 // tmatfile.close();
c@277 393
c@277 394 i_vec_t bestpath(T);
c@277 395 d_vec_t tmp_vec(Q);
c@277 396 for (uint i=0; i<Q; i++)
c@277 397 {
c@277 398 tmp_vec[i] = delta[T-1][i];
c@277 399 }
c@277 400
c@277 401
c@277 402 bestpath[T-1] = get_max_ind(tmp_vec);
c@277 403
c@277 404 for (uint t=T-2; t>0 ;t--)
c@277 405 {
c@277 406 bestpath[t] = psi[t+1][bestpath[t+1]];
c@277 407 }
c@277 408 // very weird hack!
c@277 409 bestpath[0] = psi[1][bestpath[1]];
c@277 410
c@277 411 // for (uint i=0; i<bestpath.size(); i++)
c@277 412 // {
c@277 413 // cout << bestpath[i] << endl;
c@277 414 // }
c@277 415
c@277 416
c@277 417 uint lastind = 0;
c@277 418 for (uint i=0; i<T; i++)
c@277 419 {
c@277 420 uint step = 128;
c@277 421 // cout << bestpath[i] << " " << i << endl;
c@277 422 for (uint j=0; j<step; j++)
c@277 423 {
c@277 424 lastind = i*step+j;
c@277 425 beat_period[lastind] = bestpath[i];
c@277 426
c@277 427 }
c@277 428 }
c@277 429
c@277 430 //fill in the last values...
c@277 431 for (uint i=lastind; i<beat_period.size(); i++)
c@277 432 {
c@277 433 beat_period[i] = beat_period[lastind];
c@277 434 }
c@277 435
c@277 436
c@277 437
c@277 438 }
c@277 439
c@277 440 double
c@277 441 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277 442 {
c@277 443 double maxval = 0.;
c@277 444 for (uint i=0; i<df.size(); i++)
c@277 445 {
c@277 446
c@277 447 if (maxval < df[i])
c@277 448 {
c@277 449 maxval = df[i];
c@277 450 }
c@277 451
c@277 452 }
c@277 453
c@277 454
c@277 455 return maxval;
c@277 456
c@277 457 }
c@277 458
c@277 459 int
c@277 460 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277 461 {
c@277 462
c@277 463 double maxval = 0.;
c@277 464 int ind = 0;
c@277 465 for (uint i=0; i<df.size(); i++)
c@277 466 {
c@277 467 if (maxval < df[i])
c@277 468 {
c@277 469 maxval = df[i];
c@277 470 ind = i;
c@277 471 }
c@277 472
c@277 473 }
c@277 474
c@277 475 return ind;
c@277 476
c@277 477 }
c@277 478
c@277 479 void
c@277 480 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277 481 {
c@277 482 double sum = 0.;
c@277 483 for (uint i=0; i<df.size(); i++)
c@277 484 {
c@277 485 sum += df[i];
c@277 486 }
c@277 487
c@277 488 for (uint i=0; i<df.size(); i++)
c@277 489 {
c@277 490 df[i]/= (sum + EPS);
c@277 491 }
c@277 492
c@277 493
c@277 494 }
c@277 495
c@277 496 void
c@277 497 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
c@277 498 d_vec_t &beats)
c@277 499 {
c@277 500
c@277 501 d_vec_t cumscore(df.size());
c@277 502 i_vec_t backlink(df.size());
c@277 503 d_vec_t localscore(df.size());
c@277 504
c@277 505 // WHEN I FIGURE OUT HOW, I'LL WANT TO DO SOME FILTERING ON THIS...
c@277 506 for (uint i=0; i<df.size(); i++)
c@277 507 {
c@277 508 localscore[i] = df[i];
c@277 509 backlink[i] = -1;
c@277 510 }
c@277 511
c@277 512 double tightness = 4.;
c@277 513 double alpha = 0.9;
c@277 514
c@277 515 // main loop
c@277 516 for (uint i=3*beat_period[0]; i<localscore.size(); i++)
c@277 517 {
c@277 518 int prange_min = -2*beat_period[i];
c@277 519 int prange_max = round(-0.5*beat_period[i]);
c@277 520
c@277 521 d_vec_t txwt (prange_max - prange_min + 1);
c@277 522 d_vec_t scorecands (txwt.size());
c@277 523
c@277 524 for (uint j=0;j<txwt.size();j++)
c@277 525 {
c@277 526 double mu = static_cast<double> (beat_period[i]);
c@277 527 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
c@277 528
c@277 529 scorecands[j] = txwt[j] * cumscore[i+prange_min+j];
c@277 530 }
c@277 531
c@277 532 double vv = get_max_val(scorecands);
c@277 533 int xx = get_max_ind(scorecands);
c@277 534
c@277 535 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
c@277 536
c@277 537 backlink[i] = i+prange_min+xx;
c@277 538
c@277 539 }
c@277 540
c@277 541
c@277 542 d_vec_t tmp_vec;
c@277 543 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@277 544 {
c@277 545 tmp_vec.push_back(cumscore[i]);
c@277 546 }
c@277 547
c@277 548 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@277 549
c@277 550 i_vec_t ibeats;
c@277 551 ibeats.push_back(startpoint);
c@277 552 while (backlink[ibeats.back()] > 3*beat_period[0])
c@277 553 {
c@277 554 ibeats.push_back(backlink[ibeats.back()]);
c@277 555 }
c@277 556
c@277 557
c@277 558 for (uint i=0; i<ibeats.size(); i++)
c@277 559 {
c@277 560
c@277 561 beats.push_back( static_cast<double>(ibeats[i]) );
c@277 562
c@277 563 // cout << ibeats[i] << " " << beats[i] <<endl;
c@277 564 }
c@277 565 }
c@277 566
c@277 567