annotate dsp/tempotracking/TempoTrackV2.cpp @ 53:796170a9c8e4

* Update with fixes from Matthew's newer code
author cannam
date Mon, 09 Feb 2009 16:05:32 +0000
parents 4d1f32efcafd
children 5bec06ecc88a
rev   line source
cannam@52 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@52 2
cannam@52 3 /*
cannam@52 4 QM DSP Library
cannam@52 5
cannam@52 6 Centre for Digital Music, Queen Mary, University of London.
cannam@52 7 This file copyright 2008-2009 Matthew Davies and QMUL.
cannam@52 8 All rights reserved.
cannam@52 9 */
cannam@52 10
cannam@52 11 #include "TempoTrackV2.h"
cannam@52 12
cannam@52 13 #include <cmath>
cannam@52 14 #include <cstdlib>
cannam@53 15 #include <iostream>
cannam@52 16
cannam@52 17
cannam@52 18 //#define FRAMESIZE 512
cannam@52 19 //#define BIGFRAMESIZE 1024
cannam@52 20 #define TWOPI 6.283185307179586232
cannam@52 21 #define EPS 0.0000008 // just some arbitrary small number
cannam@52 22
cannam@52 23 TempoTrackV2::TempoTrackV2() { }
cannam@52 24 TempoTrackV2::~TempoTrackV2() { }
cannam@52 25
cannam@52 26 void
cannam@52 27 TempoTrackV2::adapt_thresh(d_vec_t &df)
cannam@52 28 {
cannam@53 29 d_vec_t smoothed(df.size());
cannam@53 30
cannam@53 31 int p_post = 7;
cannam@53 32 int p_pre = 8;
cannam@52 33
cannam@53 34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays
cannam@52 35
cannam@53 36 // find threshold for first 't' samples, where a full average cannot be computed yet
cannam@53 37 for (int i = 0;i <= t;i++)
cannam@53 38 {
cannam@53 39 int k = std::min((i+p_pre),static_cast<int>(df.size()));
cannam@53 40 smoothed[i] = mean_array(df,1,k);
cannam@53 41 }
cannam@53 42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post]
cannam@53 43 for (uint i = t+1;i < df.size()-p_post;i++)
cannam@53 44 {
cannam@53 45 smoothed[i] = mean_array(df,i-p_pre,i+p_post);
cannam@53 46 }
cannam@53 47 // for last few samples calculate threshold, again, not enough samples to do as above
cannam@53 48 for (uint i = df.size()-p_post;i < df.size();i++)
cannam@53 49 {
cannam@53 50 int k = std::max((static_cast<int> (i) -p_post),1);
cannam@53 51 smoothed[i] = mean_array(df,k,df.size());
cannam@53 52 }
cannam@52 53
cannam@53 54 // subtract the threshold from the detection function and check that it is not less than 0
cannam@53 55 for (uint i = 0;i < df.size();i++)
cannam@53 56 {
cannam@53 57 df[i] -= smoothed[i];
cannam@53 58 if (df[i] < 0)
cannam@53 59 {
cannam@53 60 df[i] = 0;
cannam@53 61 }
cannam@53 62 }
cannam@52 63 }
cannam@52 64
cannam@52 65 double
cannam@52 66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end)
cannam@52 67 {
cannam@53 68 double sum = 0.;
cannam@53 69
cannam@53 70 // find sum
cannam@53 71 for (int i = start;i < end;i++)
cannam@53 72 {
cannam@53 73 sum += dfin[i];
cannam@53 74 }
cannam@52 75
cannam@53 76 return static_cast<double> (sum / (end - start + 1) ); // average and return
cannam@52 77 }
cannam@52 78
cannam@52 79 void
cannam@52 80 TempoTrackV2::filter_df(d_vec_t &df)
cannam@52 81 {
cannam@53 82 d_vec_t a(3);
cannam@53 83 d_vec_t b(3);
cannam@53 84 d_vec_t lp_df(df.size());
cannam@52 85
cannam@53 86 //equivalent in matlab to [b,a] = butter(2,0.4);
cannam@53 87 a[0] = 1.0000;
cannam@53 88 a[1] = -0.3695;
cannam@53 89 a[2] = 0.1958;
cannam@53 90 b[0] = 0.2066;
cannam@53 91 b[1] = 0.4131;
cannam@53 92 b[2] = 0.2066;
cannam@53 93
cannam@53 94 double inp1 = 0.;
cannam@53 95 double inp2 = 0.;
cannam@53 96 double out1 = 0.;
cannam@53 97 double out2 = 0.;
cannam@52 98
cannam@52 99
cannam@53 100 // forwards filtering
cannam@53 101 for (uint i = 0;i < df.size();i++)
cannam@53 102 {
cannam@53 103 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 104 inp2 = inp1;
cannam@53 105 inp1 = df[i];
cannam@53 106 out2 = out1;
cannam@53 107 out1 = lp_df[i];
cannam@53 108 }
cannam@52 109
cannam@53 110 // copy forwards filtering to df...
cannam@53 111 // but, time-reversed, ready for backwards filtering
cannam@53 112 for (uint i = 0;i < df.size();i++)
cannam@53 113 {
cannam@53 114 df[i] = lp_df[df.size()-i-1];
cannam@53 115 }
cannam@52 116
cannam@53 117 for (uint i = 0;i < df.size();i++)
cannam@53 118 {
cannam@53 119 lp_df[i] = 0.;
cannam@53 120 }
cannam@52 121
cannam@53 122 inp1 = 0.; inp2 = 0.;
cannam@53 123 out1 = 0.; out2 = 0.;
cannam@52 124
cannam@52 125 // backwards filetering on time-reversed df
cannam@53 126 for (uint i = 0;i < df.size();i++)
cannam@53 127 {
cannam@53 128 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 129 inp2 = inp1;
cannam@53 130 inp1 = df[i];
cannam@53 131 out2 = out1;
cannam@53 132 out1 = lp_df[i];
cannam@53 133 }
cannam@52 134
cannam@52 135 // write the re-reversed (i.e. forward) version back to df
cannam@53 136 for (uint i = 0;i < df.size();i++)
cannam@53 137 {
cannam@53 138 df[i] = lp_df[df.size()-i-1];
cannam@53 139 }
cannam@52 140 }
cannam@52 141
cannam@52 142
cannam@52 143 void
cannam@53 144 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period,
cannam@53 145 d_vec_t &tempi)
cannam@52 146 {
cannam@53 147 // to follow matlab.. split into 512 sample frames with a 128 hop size
cannam@53 148 // calculate the acf,
cannam@53 149 // then the rcf.. and then stick the rcfs as columns of a matrix
cannam@53 150 // then call viterbi decoding with weight vector and transition matrix
cannam@53 151 // and get best path
cannam@52 152
cannam@53 153 uint wv_len = 128;
cannam@53 154 double rayparam = 43.;
cannam@52 155
cannam@53 156 // make rayleigh weighting curve
cannam@53 157 d_vec_t wv(wv_len);
cannam@53 158 for (uint i=0; i<wv.size(); i++)
cannam@52 159 {
cannam@53 160 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
cannam@52 161 }
cannam@52 162
cannam@53 163 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
cannam@53 164 uint winlen = 512;
cannam@53 165 uint step = 128;
cannam@53 166
cannam@53 167 // matrix to store output of comb filter bank, increment column of matrix at each frame
cannam@53 168 d_mat_t rcfmat;
cannam@53 169 int col_counter = -1;
cannam@53 170
cannam@53 171 // main loop for beat period calculation
cannam@53 172 for (uint i=0; i<(df.size()-winlen); i+=step)
cannam@53 173 {
cannam@53 174 // get dfframe
cannam@53 175 d_vec_t dfframe(winlen);
cannam@53 176 for (uint k=0; k<winlen; k++)
cannam@53 177 {
cannam@53 178 dfframe[k] = df[i+k];
cannam@53 179 }
cannam@53 180 // get rcf vector for current frame
cannam@53 181 d_vec_t rcf(wv_len);
cannam@53 182 get_rcf(dfframe,wv,rcf);
cannam@52 183
cannam@53 184 rcfmat.push_back( d_vec_t() ); // adds a new column
cannam@53 185 col_counter++;
cannam@53 186 for (uint j=0; j<rcf.size(); j++)
cannam@53 187 {
cannam@53 188 rcfmat[col_counter].push_back( rcf[j] );
cannam@53 189 }
cannam@53 190 }
cannam@53 191
cannam@53 192 // now call viterbi decoding function
cannam@53 193 viterbi_decode(rcfmat,wv,beat_period,tempi);
cannam@52 194 }
cannam@52 195
cannam@52 196
cannam@52 197 void
cannam@52 198 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
cannam@52 199 {
cannam@53 200 // calculate autocorrelation function
cannam@53 201 // then rcf
cannam@53 202 // just hard code for now... don't really need separate functions to do this
cannam@52 203
cannam@53 204 // make acf
cannam@52 205
cannam@53 206 d_vec_t dfframe(dfframe_in);
cannam@52 207
cannam@53 208 adapt_thresh(dfframe);
cannam@52 209
cannam@53 210 d_vec_t acf(dfframe.size());
cannam@52 211
cannam@53 212
cannam@53 213 for (uint lag=0; lag<dfframe.size(); lag++)
cannam@53 214 {
cannam@53 215 double sum = 0.;
cannam@53 216 double tmp = 0.;
cannam@52 217
cannam@53 218 for (uint n=0; n<(dfframe.size()-lag); n++)
cannam@53 219 {
cannam@53 220 tmp = dfframe[n] * dfframe[n+lag];
cannam@53 221 sum += tmp;
cannam@53 222 }
cannam@53 223 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
cannam@53 224 }
cannam@52 225
cannam@53 226 // now apply comb filtering
cannam@53 227 int numelem = 4;
cannam@53 228
cannam@53 229 for (uint i = 2;i < rcf.size();i++) // max beat period
cannam@53 230 {
cannam@53 231 for (int a = 1;a <= numelem;a++) // number of comb elements
cannam@53 232 {
cannam@53 233 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
cannam@53 234 {
cannam@53 235 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
cannam@53 236 }
cannam@53 237 }
cannam@53 238 }
cannam@53 239
cannam@53 240 // apply adaptive threshold to rcf
cannam@53 241 adapt_thresh(rcf);
cannam@53 242
cannam@53 243 double rcfsum =0.;
cannam@53 244 for (uint i=0; i<rcf.size(); i++)
cannam@53 245 {
cannam@53 246 rcf[i] += EPS ;
cannam@53 247 rcfsum += rcf[i];
cannam@53 248 }
cannam@52 249
cannam@53 250 // normalise rcf to sum to unity
cannam@53 251 for (uint i=0; i<rcf.size(); i++)
cannam@52 252 {
cannam@53 253 rcf[i] /= (rcfsum + EPS);
cannam@52 254 }
cannam@52 255 }
cannam@52 256
cannam@52 257 void
cannam@53 258 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
cannam@52 259 {
cannam@53 260 // following Kevin Murphy's Viterbi decoding to get best path of
cannam@53 261 // beat periods through rfcmat
cannam@52 262
cannam@53 263 // make transition matrix
cannam@53 264 d_mat_t tmat;
cannam@53 265 for (uint i=0;i<wv.size();i++)
cannam@53 266 {
cannam@53 267 tmat.push_back ( d_vec_t() ); // adds a new column
cannam@53 268 for (uint j=0; j<wv.size(); j++)
cannam@53 269 {
cannam@53 270 tmat[i].push_back(0.); // fill with zeros initially
cannam@53 271 }
cannam@53 272 }
cannam@53 273
cannam@53 274 // variance of Gaussians in transition matrix
cannam@53 275 // formed of Gaussians on diagonal - implies slow tempo change
cannam@53 276 double sigma = 8.;
cannam@53 277 // don't want really short beat periods, or really long ones
cannam@53 278 for (uint i=20;i <wv.size()-20; i++)
cannam@53 279 {
cannam@53 280 for (uint j=20; j<wv.size()-20; j++)
cannam@53 281 {
cannam@53 282 double mu = static_cast<double>(i);
cannam@53 283 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
cannam@53 284 }
cannam@53 285 }
cannam@52 286
cannam@53 287 // parameters for Viterbi decoding... this part is taken from
cannam@53 288 // Murphy's matlab
cannam@52 289
cannam@53 290 d_mat_t delta;
cannam@53 291 i_mat_t psi;
cannam@53 292 for (uint i=0;i <rcfmat.size(); i++)
cannam@53 293 {
cannam@53 294 delta.push_back( d_vec_t());
cannam@53 295 psi.push_back( i_vec_t());
cannam@53 296 for (uint j=0; j<rcfmat[i].size(); j++)
cannam@53 297 {
cannam@53 298 delta[i].push_back(0.); // fill with zeros initially
cannam@53 299 psi[i].push_back(0); // fill with zeros initially
cannam@53 300 }
cannam@53 301 }
cannam@52 302
cannam@52 303
cannam@53 304 uint T = delta.size();
cannam@53 305 uint Q = delta[0].size();
cannam@52 306
cannam@53 307 // initialize first column of delta
cannam@52 308 for (uint j=0; j<Q; j++)
cannam@52 309 {
cannam@53 310 delta[0][j] = wv[j] * rcfmat[0][j];
cannam@53 311 psi[0][j] = 0;
cannam@52 312 }
cannam@53 313
cannam@52 314 double deltasum = 0.;
cannam@52 315 for (uint i=0; i<Q; i++)
cannam@52 316 {
cannam@53 317 deltasum += delta[0][i];
cannam@52 318 }
cannam@52 319 for (uint i=0; i<Q; i++)
cannam@52 320 {
cannam@53 321 delta[0][i] /= (deltasum + EPS);
cannam@52 322 }
cannam@52 323
cannam@52 324
cannam@53 325 for (uint t=1; t<T; t++)
cannam@53 326 {
cannam@53 327 d_vec_t tmp_vec(Q);
cannam@52 328
cannam@53 329 for (uint j=0; j<Q; j++)
cannam@53 330 {
cannam@53 331 for (uint i=0; i<Q; i++)
cannam@53 332 {
cannam@53 333 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
cannam@53 334 }
cannam@53 335
cannam@53 336 delta[t][j] = get_max_val(tmp_vec);
cannam@52 337
cannam@53 338 psi[t][j] = get_max_ind(tmp_vec);
cannam@53 339
cannam@53 340 delta[t][j] *= rcfmat[t][j];
cannam@53 341 }
cannam@52 342
cannam@53 343 // normalise current delta column
cannam@53 344 double deltasum = 0.;
cannam@53 345 for (uint i=0; i<Q; i++)
cannam@53 346 {
cannam@53 347 deltasum += delta[t][i];
cannam@53 348 }
cannam@53 349 for (uint i=0; i<Q; i++)
cannam@53 350 {
cannam@53 351 delta[t][i] /= (deltasum + EPS);
cannam@53 352 }
cannam@53 353 }
cannam@52 354
cannam@53 355 i_vec_t bestpath(T);
cannam@53 356 d_vec_t tmp_vec(Q);
cannam@53 357 for (uint i=0; i<Q; i++)
cannam@53 358 {
cannam@53 359 tmp_vec[i] = delta[T-1][i];
cannam@53 360 }
cannam@52 361
cannam@53 362 // find starting point - best beat period for "last" frame
cannam@53 363 bestpath[T-1] = get_max_ind(tmp_vec);
cannam@53 364
cannam@53 365 // backtrace through index of maximum values in psi
cannam@53 366 for (uint t=T-2; t>0 ;t--)
cannam@53 367 {
cannam@53 368 bestpath[t] = psi[t+1][bestpath[t+1]];
cannam@53 369 }
cannam@52 370
cannam@53 371 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
cannam@53 372 bestpath[0] = psi[1][bestpath[1]];
cannam@52 373
cannam@53 374 uint lastind = 0;
cannam@53 375 for (uint i=0; i<T; i++)
cannam@53 376 {
cannam@53 377 uint step = 128;
cannam@53 378 for (uint j=0; j<step; j++)
cannam@53 379 {
cannam@53 380 lastind = i*step+j;
cannam@53 381 beat_period[lastind] = bestpath[i];
cannam@53 382 }
cannam@53 383 }
cannam@52 384
cannam@53 385 //fill in the last values...
cannam@53 386 for (uint i=lastind; i<beat_period.size(); i++)
cannam@53 387 {
cannam@53 388 beat_period[i] = beat_period[lastind];
cannam@53 389 }
cannam@52 390
cannam@53 391 for (uint i = 0; i < beat_period.size(); i++)
cannam@52 392 {
cannam@53 393 tempi.push_back((60.*44100./512.)/beat_period[i]);
cannam@52 394 }
cannam@52 395 }
cannam@52 396
cannam@52 397 double
cannam@52 398 TempoTrackV2::get_max_val(const d_vec_t &df)
cannam@52 399 {
cannam@53 400 double maxval = 0.;
cannam@53 401 for (uint i=0; i<df.size(); i++)
cannam@52 402 {
cannam@53 403 if (maxval < df[i])
cannam@53 404 {
cannam@53 405 maxval = df[i];
cannam@53 406 }
cannam@52 407 }
cannam@52 408
cannam@53 409 return maxval;
cannam@52 410 }
cannam@52 411
cannam@52 412 int
cannam@52 413 TempoTrackV2::get_max_ind(const d_vec_t &df)
cannam@52 414 {
cannam@53 415 double maxval = 0.;
cannam@53 416 int ind = 0;
cannam@53 417 for (uint i=0; i<df.size(); i++)
cannam@52 418 {
cannam@53 419 if (maxval < df[i])
cannam@53 420 {
cannam@53 421 maxval = df[i];
cannam@53 422 ind = i;
cannam@53 423 }
cannam@52 424 }
cannam@53 425
cannam@53 426 return ind;
cannam@52 427 }
cannam@52 428
cannam@52 429 void
cannam@52 430 TempoTrackV2::normalise_vec(d_vec_t &df)
cannam@52 431 {
cannam@53 432 double sum = 0.;
cannam@53 433 for (uint i=0; i<df.size(); i++)
cannam@53 434 {
cannam@53 435 sum += df[i];
cannam@53 436 }
cannam@53 437
cannam@53 438 for (uint i=0; i<df.size(); i++)
cannam@53 439 {
cannam@53 440 df[i]/= (sum + EPS);
cannam@53 441 }
cannam@52 442 }
cannam@52 443
cannam@52 444 void
cannam@52 445 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
cannam@52 446 d_vec_t &beats)
cannam@52 447 {
cannam@53 448 d_vec_t cumscore(df.size()); // store cumulative score
cannam@53 449 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
cannam@53 450 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
cannam@52 451
cannam@53 452 for (uint i=0; i<df.size(); i++)
cannam@52 453 {
cannam@53 454 localscore[i] = df[i];
cannam@53 455 backlink[i] = -1;
cannam@52 456 }
cannam@52 457
cannam@53 458 double tightness = 4.;
cannam@53 459 double alpha = 0.9;
cannam@52 460
cannam@53 461 // main loop
cannam@53 462 for (uint i=0; i<localscore.size(); i++)
cannam@53 463 {
cannam@53 464 int prange_min = -2*beat_period[i];
cannam@53 465 int prange_max = round(-0.5*beat_period[i]);
cannam@52 466
cannam@53 467 // transition range
cannam@53 468 d_vec_t txwt (prange_max - prange_min + 1);
cannam@53 469 d_vec_t scorecands (txwt.size());
cannam@52 470
cannam@53 471 for (uint j=0;j<txwt.size();j++)
cannam@53 472 {
cannam@53 473 double mu = static_cast<double> (beat_period[i]);
cannam@53 474 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
cannam@52 475
cannam@53 476 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
cannam@53 477 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
cannam@52 478
cannam@53 479 int cscore_ind = i+prange_min+j;
cannam@53 480 if (cscore_ind >= 0)
cannam@53 481 {
cannam@53 482 scorecands[j] = txwt[j] * cumscore[cscore_ind];
cannam@53 483 }
cannam@53 484 }
cannam@52 485
cannam@53 486 // find max value and index of maximum value
cannam@53 487 double vv = get_max_val(scorecands);
cannam@53 488 int xx = get_max_ind(scorecands);
cannam@52 489
cannam@53 490 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
cannam@53 491 backlink[i] = i+prange_min+xx;
cannam@53 492 }
cannam@53 493
cannam@53 494 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
cannam@53 495 d_vec_t tmp_vec;
cannam@53 496 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
cannam@53 497 {
cannam@53 498 tmp_vec.push_back(cumscore[i]);
cannam@53 499 }
cannam@53 500
cannam@53 501 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
cannam@53 502
cannam@53 503 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
cannam@53 504 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
cannam@53 505 i_vec_t ibeats;
cannam@53 506 ibeats.push_back(startpoint);
cannam@53 507 while (backlink[ibeats.back()] > 0)
cannam@53 508 {
cannam@53 509 ibeats.push_back(backlink[ibeats.back()]);
cannam@53 510 }
cannam@52 511
cannam@53 512 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
cannam@53 513 for (uint i=0; i<ibeats.size(); i++)
cannam@53 514 {
cannam@53 515 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
cannam@53 516 }
cannam@52 517 }
cannam@52 518
cannam@52 519