annotate dsp/tempotracking/TempoTrackV2.cpp @ 52:4d1f32efcafd

* Add Matthew's newer beat tracking implementation
author cannam
date Tue, 20 Jan 2009 15:01:01 +0000
parents
children 796170a9c8e4
rev   line source
cannam@52 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@52 2
cannam@52 3 /*
cannam@52 4 QM DSP Library
cannam@52 5
cannam@52 6 Centre for Digital Music, Queen Mary, University of London.
cannam@52 7 This file copyright 2008-2009 Matthew Davies and QMUL.
cannam@52 8 All rights reserved.
cannam@52 9 */
cannam@52 10
cannam@52 11 #include "TempoTrackV2.h"
cannam@52 12
cannam@52 13 #include <cmath>
cannam@52 14 #include <cstdlib>
cannam@52 15
cannam@52 16
cannam@52 17 //#define FRAMESIZE 512
cannam@52 18 //#define BIGFRAMESIZE 1024
cannam@52 19 #define TWOPI 6.283185307179586232
cannam@52 20 #define EPS 0.0000008 // just some arbitrary small number
cannam@52 21
cannam@52 22 TempoTrackV2::TempoTrackV2() { }
cannam@52 23 TempoTrackV2::~TempoTrackV2() { }
cannam@52 24
cannam@52 25 void
cannam@52 26 TempoTrackV2::adapt_thresh(d_vec_t &df)
cannam@52 27 {
cannam@52 28
cannam@52 29 d_vec_t smoothed(df.size());
cannam@52 30
cannam@52 31 int p_post = 7;
cannam@52 32 int p_pre = 8;
cannam@52 33
cannam@52 34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays
cannam@52 35
cannam@52 36 // find threshold for first 't' samples, where a full average cannot be computed yet
cannam@52 37 for (int i = 0;i <= t;i++)
cannam@52 38 {
cannam@52 39 int k = std::min((i+p_pre),static_cast<int>(df.size()));
cannam@52 40 smoothed[i] = mean_array(df,1,k);
cannam@52 41 }
cannam@52 42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post]
cannam@52 43 for (uint i = t+1;i < df.size()-p_post;i++)
cannam@52 44 {
cannam@52 45 smoothed[i] = mean_array(df,i-p_pre,i+p_post);
cannam@52 46 }
cannam@52 47 // for last few samples calculate threshold, again, not enough samples to do as above
cannam@52 48 for (uint i = df.size()-p_post;i < df.size();i++)
cannam@52 49 {
cannam@52 50 int k = std::max((static_cast<int> (i) -p_post),1);
cannam@52 51 smoothed[i] = mean_array(df,k,df.size());
cannam@52 52 }
cannam@52 53
cannam@52 54 // subtract the threshold from the detection function and check that it is not less than 0
cannam@52 55 for (uint i = 0;i < df.size();i++)
cannam@52 56 {
cannam@52 57 df[i] -= smoothed[i];
cannam@52 58 if (df[i] < 0)
cannam@52 59 {
cannam@52 60 df[i] = 0;
cannam@52 61 }
cannam@52 62 }
cannam@52 63 }
cannam@52 64
cannam@52 65 double
cannam@52 66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end)
cannam@52 67 {
cannam@52 68
cannam@52 69 double sum = 0.;
cannam@52 70
cannam@52 71 // find sum
cannam@52 72 for (int i = start;i < end+1;i++)
cannam@52 73 {
cannam@52 74 sum += dfin[i];
cannam@52 75 }
cannam@52 76
cannam@52 77 return static_cast<double> (sum / (end - start + 1) ); // average and return
cannam@52 78 }
cannam@52 79
cannam@52 80 void
cannam@52 81 TempoTrackV2::filter_df(d_vec_t &df)
cannam@52 82 {
cannam@52 83
cannam@52 84
cannam@52 85 d_vec_t a(3);
cannam@52 86 d_vec_t b(3);
cannam@52 87 d_vec_t lp_df(df.size());
cannam@52 88
cannam@52 89 //equivalent in matlab to [b,a] = butter(2,0.4);
cannam@52 90 a[0] = 1.0000;
cannam@52 91 a[1] = -0.3695;
cannam@52 92 a[2] = 0.1958;
cannam@52 93 b[0] = 0.2066;
cannam@52 94 b[1] = 0.4131;
cannam@52 95 b[2] = 0.2066;
cannam@52 96
cannam@52 97 double inp1 = 0.;
cannam@52 98 double inp2 = 0.;
cannam@52 99 double out1 = 0.;
cannam@52 100 double out2 = 0.;
cannam@52 101
cannam@52 102
cannam@52 103 // forwards filtering
cannam@52 104 for (uint i = 0;i < df.size();i++)
cannam@52 105 {
cannam@52 106 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@52 107 inp2 = inp1;
cannam@52 108 inp1 = df[i];
cannam@52 109 out2 = out1;
cannam@52 110 out1 = lp_df[i];
cannam@52 111 }
cannam@52 112
cannam@52 113
cannam@52 114 // copy forwards filtering to df...
cannam@52 115 // but, time-reversed, ready for backwards filtering
cannam@52 116 for (uint i = 0;i < df.size();i++)
cannam@52 117 {
cannam@52 118 df[i] = lp_df[df.size()-i];
cannam@52 119 }
cannam@52 120
cannam@52 121 for (uint i = 0;i < df.size();i++)
cannam@52 122 {
cannam@52 123 lp_df[i] = 0.;
cannam@52 124 }
cannam@52 125
cannam@52 126 inp1 = 0.; inp2 = 0.;
cannam@52 127 out1 = 0.; out2 = 0.;
cannam@52 128
cannam@52 129 // backwards filetering on time-reversed df
cannam@52 130 for (uint i = 0;i < df.size();i++)
cannam@52 131 {
cannam@52 132 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@52 133 inp2 = inp1;
cannam@52 134 inp1 = df[i];
cannam@52 135 out2 = out1;
cannam@52 136 out1 = lp_df[i];
cannam@52 137 }
cannam@52 138
cannam@52 139 // write the re-reversed (i.e. forward) version back to df
cannam@52 140 for (uint i = 0;i < df.size();i++)
cannam@52 141 {
cannam@52 142 df[i] = lp_df[df.size()-i];
cannam@52 143 }
cannam@52 144
cannam@52 145
cannam@52 146 }
cannam@52 147
cannam@52 148
cannam@52 149 void
cannam@52 150 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period)
cannam@52 151 {
cannam@52 152
cannam@52 153 // to follow matlab.. split into 512 sample frames with a 128 hop size
cannam@52 154 // calculate the acf,
cannam@52 155 // then the rcf.. and then stick the rcfs as columns of a matrix
cannam@52 156 // then call viterbi decoding with weight vector and transition matrix
cannam@52 157 // and get best path
cannam@52 158
cannam@52 159 uint wv_len = 128;
cannam@52 160 double rayparam = 43.;
cannam@52 161
cannam@52 162 // make rayleigh weighting curve
cannam@52 163 d_vec_t wv(wv_len);
cannam@52 164 for (uint i=0; i<wv.size(); i++)
cannam@52 165 {
cannam@52 166 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
cannam@52 167 }
cannam@52 168
cannam@52 169
cannam@52 170 uint winlen = 512;
cannam@52 171 uint step = 128;
cannam@52 172
cannam@52 173 d_mat_t rcfmat;
cannam@52 174 int col_counter = -1;
cannam@52 175 // main loop for beat period calculation
cannam@52 176 for (uint i=0; i<(df.size()-winlen); i+=step)
cannam@52 177 {
cannam@52 178 // get dfframe
cannam@52 179 d_vec_t dfframe(winlen);
cannam@52 180 for (uint k=0; k<winlen; k++)
cannam@52 181 {
cannam@52 182 dfframe[k] = df[i+k];
cannam@52 183 }
cannam@52 184 // get rcf vector for current frame
cannam@52 185 d_vec_t rcf(wv_len);
cannam@52 186 get_rcf(dfframe,wv,rcf);
cannam@52 187
cannam@52 188 rcfmat.push_back( d_vec_t() ); // adds a new column
cannam@52 189 col_counter++;
cannam@52 190 for (uint j=0; j<rcf.size(); j++)
cannam@52 191 {
cannam@52 192 rcfmat[col_counter].push_back( rcf[j] );
cannam@52 193 }
cannam@52 194
cannam@52 195 }
cannam@52 196
cannam@52 197 // now call viterbi decoding function
cannam@52 198 viterbi_decode(rcfmat,wv,beat_period);
cannam@52 199
cannam@52 200
cannam@52 201
cannam@52 202 }
cannam@52 203
cannam@52 204
cannam@52 205 void
cannam@52 206 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
cannam@52 207 {
cannam@52 208 // calculate autocorrelation function
cannam@52 209 // then rcf
cannam@52 210 // just hard code for now... don't really need separate functions to do this
cannam@52 211
cannam@52 212 // make acf
cannam@52 213
cannam@52 214 d_vec_t dfframe(dfframe_in);
cannam@52 215
cannam@52 216 adapt_thresh(dfframe);
cannam@52 217
cannam@52 218 d_vec_t acf(dfframe.size());
cannam@52 219
cannam@52 220
cannam@52 221 for (uint lag=0; lag<dfframe.size(); lag++)
cannam@52 222 {
cannam@52 223
cannam@52 224 double sum = 0.;
cannam@52 225 double tmp = 0.;
cannam@52 226
cannam@52 227 for (uint n=0; n<(dfframe.size()-lag); n++)
cannam@52 228 {
cannam@52 229 tmp = dfframe[n] * dfframe[n+lag];
cannam@52 230 sum += tmp;
cannam@52 231 }
cannam@52 232 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
cannam@52 233 }
cannam@52 234
cannam@52 235
cannam@52 236 // for (uint i=0; i<dfframe.size(); i++)
cannam@52 237 // {
cannam@52 238 // cout << dfframe[i] << " " << acf[i] << endl;
cannam@52 239 // }
cannam@52 240
cannam@52 241 // cout << "~~~~~~~~~~~~~~" << endl;
cannam@52 242
cannam@52 243
cannam@52 244
cannam@52 245
cannam@52 246
cannam@52 247 // now apply comb filtering
cannam@52 248 int numelem = 4;
cannam@52 249
cannam@52 250 // for (uint i = 1;i < 118;i++) // max beat period
cannam@52 251 for (uint i = 2;i < rcf.size();i++) // max beat period
cannam@52 252 {
cannam@52 253 for (int a = 1;a <= numelem;a++) // number of comb elements
cannam@52 254 {
cannam@52 255 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
cannam@52 256 {
cannam@52 257 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
cannam@52 258 }
cannam@52 259 }
cannam@52 260 }
cannam@52 261
cannam@52 262 // apply adaptive threshold to rcf
cannam@52 263 adapt_thresh(rcf);
cannam@52 264
cannam@52 265 double rcfsum =0.;
cannam@52 266 for (uint i=0; i<rcf.size(); i++)
cannam@52 267 {
cannam@52 268 // rcf[i] *= acf[i];
cannam@52 269 rcf[i] += EPS ;
cannam@52 270 rcfsum += rcf[i];
cannam@52 271 }
cannam@52 272
cannam@52 273 // normalise rcf to sum to unity
cannam@52 274 for (uint i=0; i<rcf.size(); i++)
cannam@52 275 {
cannam@52 276 rcf[i] /= (rcfsum + EPS);
cannam@52 277 }
cannam@52 278
cannam@52 279
cannam@52 280
cannam@52 281 }
cannam@52 282
cannam@52 283 void
cannam@52 284 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period)
cannam@52 285 {
cannam@52 286
cannam@52 287 // make transition matrix
cannam@52 288 d_mat_t tmat;
cannam@52 289 for (uint i=0;i<wv.size();i++)
cannam@52 290 {
cannam@52 291 tmat.push_back ( d_vec_t() ); // adds a new column
cannam@52 292 for (uint j=0; j<wv.size(); j++)
cannam@52 293 {
cannam@52 294 tmat[i].push_back(0.); // fill with zeros initially
cannam@52 295 }
cannam@52 296 }
cannam@52 297
cannam@52 298 double sigma = 8.;
cannam@52 299 for (uint i=20;i <wv.size()-20; i++)
cannam@52 300 {
cannam@52 301 for (uint j=20; j<wv.size()-20; j++)
cannam@52 302 {
cannam@52 303 double mu = static_cast<double>(i);
cannam@52 304 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
cannam@52 305 }
cannam@52 306 }
cannam@52 307
cannam@52 308 d_mat_t delta;
cannam@52 309 i_mat_t psi;
cannam@52 310 for (uint i=0;i <rcfmat.size(); i++)
cannam@52 311 {
cannam@52 312 delta.push_back( d_vec_t());
cannam@52 313 psi.push_back( i_vec_t());
cannam@52 314 for (uint j=0; j<rcfmat[i].size(); j++)
cannam@52 315 {
cannam@52 316 delta[i].push_back(0.); // fill with zeros initially
cannam@52 317 psi[i].push_back(0); // fill with zeros initially
cannam@52 318 }
cannam@52 319 }
cannam@52 320
cannam@52 321
cannam@52 322 uint T = delta.size();
cannam@52 323 uint Q = delta[0].size();
cannam@52 324
cannam@52 325 // initialize first column of delta
cannam@52 326 for (uint j=0; j<Q; j++)
cannam@52 327 {
cannam@52 328 delta[0][j] = wv[j] * rcfmat[0][j];
cannam@52 329 psi[0][j] = 0;
cannam@52 330 }
cannam@52 331
cannam@52 332 double deltasum = 0.;
cannam@52 333 for (uint i=0; i<Q; i++)
cannam@52 334 {
cannam@52 335 deltasum += delta[0][i];
cannam@52 336 }
cannam@52 337 for (uint i=0; i<Q; i++)
cannam@52 338 {
cannam@52 339 delta[0][i] /= (deltasum + EPS);
cannam@52 340 }
cannam@52 341
cannam@52 342
cannam@52 343
cannam@52 344 for (uint t=1; t<T; t++)
cannam@52 345 {
cannam@52 346 d_vec_t tmp_vec(Q);
cannam@52 347
cannam@52 348 for (uint j=0; j<Q; j++)
cannam@52 349 {
cannam@52 350
cannam@52 351 for (uint i=0; i<Q; i++)
cannam@52 352 {
cannam@52 353 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
cannam@52 354 }
cannam@52 355
cannam@52 356 delta[t][j] = get_max_val(tmp_vec);
cannam@52 357
cannam@52 358 psi[t][j] = get_max_ind(tmp_vec);
cannam@52 359
cannam@52 360 delta[t][j] *= rcfmat[t][j];
cannam@52 361
cannam@52 362
cannam@52 363 }
cannam@52 364
cannam@52 365 double deltasum = 0.;
cannam@52 366 for (uint i=0; i<Q; i++)
cannam@52 367 {
cannam@52 368 deltasum += delta[t][i];
cannam@52 369 }
cannam@52 370 for (uint i=0; i<Q; i++)
cannam@52 371 {
cannam@52 372 delta[t][i] /= (deltasum + EPS);
cannam@52 373 }
cannam@52 374
cannam@52 375
cannam@52 376
cannam@52 377
cannam@52 378 }
cannam@52 379
cannam@52 380
cannam@52 381 // ofstream tmatfile;
cannam@52 382 // tmatfile.open("/home/matthewd/Desktop/tmat.txt");
cannam@52 383
cannam@52 384 // for (uint i=0;i <delta.size(); i++)
cannam@52 385 // {
cannam@52 386 // for (uint j=0; j<delta[i].size(); j++)
cannam@52 387 // {
cannam@52 388 // tmatfile << rcfmat[i][j] << endl;
cannam@52 389 // }
cannam@52 390 // }
cannam@52 391
cannam@52 392 // tmatfile.close();
cannam@52 393
cannam@52 394 i_vec_t bestpath(T);
cannam@52 395 d_vec_t tmp_vec(Q);
cannam@52 396 for (uint i=0; i<Q; i++)
cannam@52 397 {
cannam@52 398 tmp_vec[i] = delta[T-1][i];
cannam@52 399 }
cannam@52 400
cannam@52 401
cannam@52 402 bestpath[T-1] = get_max_ind(tmp_vec);
cannam@52 403
cannam@52 404 for (uint t=T-2; t>0 ;t--)
cannam@52 405 {
cannam@52 406 bestpath[t] = psi[t+1][bestpath[t+1]];
cannam@52 407 }
cannam@52 408 // very weird hack!
cannam@52 409 bestpath[0] = psi[1][bestpath[1]];
cannam@52 410
cannam@52 411 // for (uint i=0; i<bestpath.size(); i++)
cannam@52 412 // {
cannam@52 413 // cout << bestpath[i] << endl;
cannam@52 414 // }
cannam@52 415
cannam@52 416
cannam@52 417 uint lastind = 0;
cannam@52 418 for (uint i=0; i<T; i++)
cannam@52 419 {
cannam@52 420 uint step = 128;
cannam@52 421 // cout << bestpath[i] << " " << i << endl;
cannam@52 422 for (uint j=0; j<step; j++)
cannam@52 423 {
cannam@52 424 lastind = i*step+j;
cannam@52 425 beat_period[lastind] = bestpath[i];
cannam@52 426
cannam@52 427 }
cannam@52 428 }
cannam@52 429
cannam@52 430 //fill in the last values...
cannam@52 431 for (uint i=lastind; i<beat_period.size(); i++)
cannam@52 432 {
cannam@52 433 beat_period[i] = beat_period[lastind];
cannam@52 434 }
cannam@52 435
cannam@52 436
cannam@52 437
cannam@52 438 }
cannam@52 439
cannam@52 440 double
cannam@52 441 TempoTrackV2::get_max_val(const d_vec_t &df)
cannam@52 442 {
cannam@52 443 double maxval = 0.;
cannam@52 444 for (uint i=0; i<df.size(); i++)
cannam@52 445 {
cannam@52 446
cannam@52 447 if (maxval < df[i])
cannam@52 448 {
cannam@52 449 maxval = df[i];
cannam@52 450 }
cannam@52 451
cannam@52 452 }
cannam@52 453
cannam@52 454
cannam@52 455 return maxval;
cannam@52 456
cannam@52 457 }
cannam@52 458
cannam@52 459 int
cannam@52 460 TempoTrackV2::get_max_ind(const d_vec_t &df)
cannam@52 461 {
cannam@52 462
cannam@52 463 double maxval = 0.;
cannam@52 464 int ind = 0;
cannam@52 465 for (uint i=0; i<df.size(); i++)
cannam@52 466 {
cannam@52 467 if (maxval < df[i])
cannam@52 468 {
cannam@52 469 maxval = df[i];
cannam@52 470 ind = i;
cannam@52 471 }
cannam@52 472
cannam@52 473 }
cannam@52 474
cannam@52 475 return ind;
cannam@52 476
cannam@52 477 }
cannam@52 478
cannam@52 479 void
cannam@52 480 TempoTrackV2::normalise_vec(d_vec_t &df)
cannam@52 481 {
cannam@52 482 double sum = 0.;
cannam@52 483 for (uint i=0; i<df.size(); i++)
cannam@52 484 {
cannam@52 485 sum += df[i];
cannam@52 486 }
cannam@52 487
cannam@52 488 for (uint i=0; i<df.size(); i++)
cannam@52 489 {
cannam@52 490 df[i]/= (sum + EPS);
cannam@52 491 }
cannam@52 492
cannam@52 493
cannam@52 494 }
cannam@52 495
cannam@52 496 void
cannam@52 497 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
cannam@52 498 d_vec_t &beats)
cannam@52 499 {
cannam@52 500
cannam@52 501 d_vec_t cumscore(df.size());
cannam@52 502 i_vec_t backlink(df.size());
cannam@52 503 d_vec_t localscore(df.size());
cannam@52 504
cannam@52 505 // WHEN I FIGURE OUT HOW, I'LL WANT TO DO SOME FILTERING ON THIS...
cannam@52 506 for (uint i=0; i<df.size(); i++)
cannam@52 507 {
cannam@52 508 localscore[i] = df[i];
cannam@52 509 backlink[i] = -1;
cannam@52 510 }
cannam@52 511
cannam@52 512 double tightness = 4.;
cannam@52 513 double alpha = 0.9;
cannam@52 514
cannam@52 515 // main loop
cannam@52 516 for (uint i=3*beat_period[0]; i<localscore.size(); i++)
cannam@52 517 {
cannam@52 518 int prange_min = -2*beat_period[i];
cannam@52 519 int prange_max = round(-0.5*beat_period[i]);
cannam@52 520
cannam@52 521 d_vec_t txwt (prange_max - prange_min + 1);
cannam@52 522 d_vec_t scorecands (txwt.size());
cannam@52 523
cannam@52 524 for (uint j=0;j<txwt.size();j++)
cannam@52 525 {
cannam@52 526 double mu = static_cast<double> (beat_period[i]);
cannam@52 527 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
cannam@52 528
cannam@52 529 scorecands[j] = txwt[j] * cumscore[i+prange_min+j];
cannam@52 530 }
cannam@52 531
cannam@52 532 double vv = get_max_val(scorecands);
cannam@52 533 int xx = get_max_ind(scorecands);
cannam@52 534
cannam@52 535 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
cannam@52 536
cannam@52 537 backlink[i] = i+prange_min+xx;
cannam@52 538
cannam@52 539 }
cannam@52 540
cannam@52 541
cannam@52 542 d_vec_t tmp_vec;
cannam@52 543 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
cannam@52 544 {
cannam@52 545 tmp_vec.push_back(cumscore[i]);
cannam@52 546 }
cannam@52 547
cannam@52 548 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
cannam@52 549
cannam@52 550 i_vec_t ibeats;
cannam@52 551 ibeats.push_back(startpoint);
cannam@52 552 while (backlink[ibeats.back()] > 3*beat_period[0])
cannam@52 553 {
cannam@52 554 ibeats.push_back(backlink[ibeats.back()]);
cannam@52 555 }
cannam@52 556
cannam@52 557
cannam@52 558 for (uint i=0; i<ibeats.size(); i++)
cannam@52 559 {
cannam@52 560
cannam@52 561 beats.push_back( static_cast<double>(ibeats[i]) );
cannam@52 562
cannam@52 563 // cout << ibeats[i] << " " << beats[i] <<endl;
cannam@52 564 }
cannam@52 565 }
cannam@52 566
cannam@52 567