annotate dsp/tempotracking/TempoTrackV2.cpp @ 79:054c384d860d

* Solaris build fixes
author cannam
date Mon, 14 Sep 2009 13:01:44 +0000
parents c3cdb404f807
children e5907ae6de17
rev   line source
cannam@52 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@52 2
cannam@52 3 /*
cannam@52 4 QM DSP Library
cannam@52 5
cannam@52 6 Centre for Digital Music, Queen Mary, University of London.
cannam@52 7 This file copyright 2008-2009 Matthew Davies and QMUL.
cannam@52 8 All rights reserved.
cannam@52 9 */
cannam@52 10
cannam@52 11 #include "TempoTrackV2.h"
cannam@52 12
cannam@52 13 #include <cmath>
cannam@52 14 #include <cstdlib>
cannam@53 15 #include <iostream>
cannam@52 16
cannam@54 17 #include "maths/MathUtilities.h"
cannam@52 18
cannam@52 19 #define EPS 0.0000008 // just some arbitrary small number
cannam@52 20
cannam@54 21 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
cannam@54 22 m_rate(rate), m_increment(increment) { }
cannam@52 23 TempoTrackV2::~TempoTrackV2() { }
cannam@52 24
cannam@52 25 void
cannam@52 26 TempoTrackV2::filter_df(d_vec_t &df)
cannam@52 27 {
cannam@53 28 d_vec_t a(3);
cannam@53 29 d_vec_t b(3);
cannam@53 30 d_vec_t lp_df(df.size());
cannam@52 31
cannam@53 32 //equivalent in matlab to [b,a] = butter(2,0.4);
cannam@53 33 a[0] = 1.0000;
cannam@53 34 a[1] = -0.3695;
cannam@53 35 a[2] = 0.1958;
cannam@53 36 b[0] = 0.2066;
cannam@53 37 b[1] = 0.4131;
cannam@53 38 b[2] = 0.2066;
cannam@53 39
cannam@53 40 double inp1 = 0.;
cannam@53 41 double inp2 = 0.;
cannam@53 42 double out1 = 0.;
cannam@53 43 double out2 = 0.;
cannam@52 44
cannam@52 45
cannam@53 46 // forwards filtering
cannam@70 47 for (unsigned int i = 0;i < df.size();i++)
cannam@53 48 {
cannam@53 49 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 50 inp2 = inp1;
cannam@53 51 inp1 = df[i];
cannam@53 52 out2 = out1;
cannam@53 53 out1 = lp_df[i];
cannam@53 54 }
cannam@52 55
cannam@53 56 // copy forwards filtering to df...
cannam@53 57 // but, time-reversed, ready for backwards filtering
cannam@70 58 for (unsigned int i = 0;i < df.size();i++)
cannam@53 59 {
cannam@53 60 df[i] = lp_df[df.size()-i-1];
cannam@53 61 }
cannam@52 62
cannam@70 63 for (unsigned int i = 0;i < df.size();i++)
cannam@53 64 {
cannam@53 65 lp_df[i] = 0.;
cannam@53 66 }
cannam@52 67
cannam@53 68 inp1 = 0.; inp2 = 0.;
cannam@53 69 out1 = 0.; out2 = 0.;
cannam@52 70
cannam@52 71 // backwards filetering on time-reversed df
cannam@70 72 for (unsigned int i = 0;i < df.size();i++)
cannam@53 73 {
cannam@53 74 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 75 inp2 = inp1;
cannam@53 76 inp1 = df[i];
cannam@53 77 out2 = out1;
cannam@53 78 out1 = lp_df[i];
cannam@53 79 }
cannam@52 80
cannam@52 81 // write the re-reversed (i.e. forward) version back to df
cannam@70 82 for (unsigned int i = 0;i < df.size();i++)
cannam@53 83 {
cannam@53 84 df[i] = lp_df[df.size()-i-1];
cannam@53 85 }
cannam@52 86 }
cannam@52 87
cannam@52 88
cannam@52 89 void
cannam@79 90 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
cannam@79 91 vector<double> &beat_period,
cannam@79 92 vector<double> &tempi)
cannam@52 93 {
cannam@53 94 // to follow matlab.. split into 512 sample frames with a 128 hop size
cannam@53 95 // calculate the acf,
cannam@53 96 // then the rcf.. and then stick the rcfs as columns of a matrix
cannam@53 97 // then call viterbi decoding with weight vector and transition matrix
cannam@53 98 // and get best path
cannam@52 99
cannam@70 100 unsigned int wv_len = 128;
cannam@53 101 double rayparam = 43.;
cannam@52 102
cannam@53 103 // make rayleigh weighting curve
cannam@53 104 d_vec_t wv(wv_len);
cannam@70 105 for (unsigned int i=0; i<wv.size(); i++)
cannam@52 106 {
cannam@53 107 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
cannam@52 108 }
cannam@52 109
cannam@53 110 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
cannam@70 111 unsigned int winlen = 512;
cannam@70 112 unsigned int step = 128;
cannam@53 113
cannam@53 114 // matrix to store output of comb filter bank, increment column of matrix at each frame
cannam@53 115 d_mat_t rcfmat;
cannam@53 116 int col_counter = -1;
cannam@53 117
cannam@53 118 // main loop for beat period calculation
cannam@70 119 for (unsigned int i=0; i+winlen<df.size(); i+=step)
cannam@53 120 {
cannam@53 121 // get dfframe
cannam@53 122 d_vec_t dfframe(winlen);
cannam@70 123 for (unsigned int k=0; k<winlen; k++)
cannam@53 124 {
cannam@53 125 dfframe[k] = df[i+k];
cannam@53 126 }
cannam@53 127 // get rcf vector for current frame
cannam@53 128 d_vec_t rcf(wv_len);
cannam@53 129 get_rcf(dfframe,wv,rcf);
cannam@52 130
cannam@53 131 rcfmat.push_back( d_vec_t() ); // adds a new column
cannam@53 132 col_counter++;
cannam@70 133 for (unsigned int j=0; j<rcf.size(); j++)
cannam@53 134 {
cannam@53 135 rcfmat[col_counter].push_back( rcf[j] );
cannam@53 136 }
cannam@53 137 }
cannam@53 138
cannam@53 139 // now call viterbi decoding function
cannam@53 140 viterbi_decode(rcfmat,wv,beat_period,tempi);
cannam@52 141 }
cannam@52 142
cannam@52 143
cannam@52 144 void
cannam@52 145 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
cannam@52 146 {
cannam@53 147 // calculate autocorrelation function
cannam@53 148 // then rcf
cannam@53 149 // just hard code for now... don't really need separate functions to do this
cannam@52 150
cannam@53 151 // make acf
cannam@52 152
cannam@53 153 d_vec_t dfframe(dfframe_in);
cannam@52 154
cannam@54 155 MathUtilities::adaptiveThreshold(dfframe);
cannam@52 156
cannam@53 157 d_vec_t acf(dfframe.size());
cannam@52 158
cannam@53 159
cannam@70 160 for (unsigned int lag=0; lag<dfframe.size(); lag++)
cannam@53 161 {
cannam@53 162 double sum = 0.;
cannam@53 163 double tmp = 0.;
cannam@52 164
cannam@70 165 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
cannam@53 166 {
cannam@53 167 tmp = dfframe[n] * dfframe[n+lag];
cannam@53 168 sum += tmp;
cannam@53 169 }
cannam@53 170 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
cannam@53 171 }
cannam@52 172
cannam@53 173 // now apply comb filtering
cannam@53 174 int numelem = 4;
cannam@53 175
cannam@70 176 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
cannam@53 177 {
cannam@53 178 for (int a = 1;a <= numelem;a++) // number of comb elements
cannam@53 179 {
cannam@53 180 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
cannam@53 181 {
cannam@53 182 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
cannam@53 183 }
cannam@53 184 }
cannam@53 185 }
cannam@53 186
cannam@53 187 // apply adaptive threshold to rcf
cannam@54 188 MathUtilities::adaptiveThreshold(rcf);
cannam@53 189
cannam@53 190 double rcfsum =0.;
cannam@70 191 for (unsigned int i=0; i<rcf.size(); i++)
cannam@53 192 {
cannam@53 193 rcf[i] += EPS ;
cannam@53 194 rcfsum += rcf[i];
cannam@53 195 }
cannam@52 196
cannam@53 197 // normalise rcf to sum to unity
cannam@70 198 for (unsigned int i=0; i<rcf.size(); i++)
cannam@52 199 {
cannam@53 200 rcf[i] /= (rcfsum + EPS);
cannam@52 201 }
cannam@52 202 }
cannam@52 203
cannam@52 204 void
cannam@53 205 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
cannam@52 206 {
cannam@53 207 // following Kevin Murphy's Viterbi decoding to get best path of
cannam@53 208 // beat periods through rfcmat
cannam@52 209
cannam@53 210 // make transition matrix
cannam@53 211 d_mat_t tmat;
cannam@70 212 for (unsigned int i=0;i<wv.size();i++)
cannam@53 213 {
cannam@53 214 tmat.push_back ( d_vec_t() ); // adds a new column
cannam@70 215 for (unsigned int j=0; j<wv.size(); j++)
cannam@53 216 {
cannam@53 217 tmat[i].push_back(0.); // fill with zeros initially
cannam@53 218 }
cannam@53 219 }
cannam@53 220
cannam@53 221 // variance of Gaussians in transition matrix
cannam@53 222 // formed of Gaussians on diagonal - implies slow tempo change
cannam@53 223 double sigma = 8.;
cannam@53 224 // don't want really short beat periods, or really long ones
cannam@70 225 for (unsigned int i=20;i <wv.size()-20; i++)
cannam@53 226 {
cannam@70 227 for (unsigned int j=20; j<wv.size()-20; j++)
cannam@53 228 {
cannam@53 229 double mu = static_cast<double>(i);
cannam@53 230 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
cannam@53 231 }
cannam@53 232 }
cannam@52 233
cannam@53 234 // parameters for Viterbi decoding... this part is taken from
cannam@53 235 // Murphy's matlab
cannam@52 236
cannam@53 237 d_mat_t delta;
cannam@53 238 i_mat_t psi;
cannam@70 239 for (unsigned int i=0;i <rcfmat.size(); i++)
cannam@53 240 {
cannam@53 241 delta.push_back( d_vec_t());
cannam@53 242 psi.push_back( i_vec_t());
cannam@70 243 for (unsigned int j=0; j<rcfmat[i].size(); j++)
cannam@53 244 {
cannam@53 245 delta[i].push_back(0.); // fill with zeros initially
cannam@53 246 psi[i].push_back(0); // fill with zeros initially
cannam@53 247 }
cannam@53 248 }
cannam@52 249
cannam@52 250
cannam@70 251 unsigned int T = delta.size();
cannam@56 252
cannam@56 253 if (T < 2) return; // can't do anything at all meaningful
cannam@56 254
cannam@70 255 unsigned int Q = delta[0].size();
cannam@52 256
cannam@53 257 // initialize first column of delta
cannam@70 258 for (unsigned int j=0; j<Q; j++)
cannam@52 259 {
cannam@53 260 delta[0][j] = wv[j] * rcfmat[0][j];
cannam@53 261 psi[0][j] = 0;
cannam@52 262 }
cannam@53 263
cannam@52 264 double deltasum = 0.;
cannam@70 265 for (unsigned int i=0; i<Q; i++)
cannam@52 266 {
cannam@53 267 deltasum += delta[0][i];
cannam@52 268 }
cannam@70 269 for (unsigned int i=0; i<Q; i++)
cannam@52 270 {
cannam@53 271 delta[0][i] /= (deltasum + EPS);
cannam@52 272 }
cannam@52 273
cannam@52 274
cannam@70 275 for (unsigned int t=1; t<T; t++)
cannam@53 276 {
cannam@53 277 d_vec_t tmp_vec(Q);
cannam@52 278
cannam@70 279 for (unsigned int j=0; j<Q; j++)
cannam@53 280 {
cannam@70 281 for (unsigned int i=0; i<Q; i++)
cannam@53 282 {
cannam@53 283 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
cannam@53 284 }
cannam@53 285
cannam@53 286 delta[t][j] = get_max_val(tmp_vec);
cannam@52 287
cannam@53 288 psi[t][j] = get_max_ind(tmp_vec);
cannam@53 289
cannam@53 290 delta[t][j] *= rcfmat[t][j];
cannam@53 291 }
cannam@52 292
cannam@53 293 // normalise current delta column
cannam@53 294 double deltasum = 0.;
cannam@70 295 for (unsigned int i=0; i<Q; i++)
cannam@53 296 {
cannam@53 297 deltasum += delta[t][i];
cannam@53 298 }
cannam@70 299 for (unsigned int i=0; i<Q; i++)
cannam@53 300 {
cannam@53 301 delta[t][i] /= (deltasum + EPS);
cannam@53 302 }
cannam@53 303 }
cannam@52 304
cannam@53 305 i_vec_t bestpath(T);
cannam@53 306 d_vec_t tmp_vec(Q);
cannam@70 307 for (unsigned int i=0; i<Q; i++)
cannam@53 308 {
cannam@53 309 tmp_vec[i] = delta[T-1][i];
cannam@53 310 }
cannam@52 311
cannam@53 312 // find starting point - best beat period for "last" frame
cannam@53 313 bestpath[T-1] = get_max_ind(tmp_vec);
cannam@53 314
cannam@53 315 // backtrace through index of maximum values in psi
cannam@70 316 for (unsigned int t=T-2; t>0 ;t--)
cannam@53 317 {
cannam@53 318 bestpath[t] = psi[t+1][bestpath[t+1]];
cannam@53 319 }
cannam@52 320
cannam@53 321 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
cannam@53 322 bestpath[0] = psi[1][bestpath[1]];
cannam@52 323
cannam@70 324 unsigned int lastind = 0;
cannam@70 325 for (unsigned int i=0; i<T; i++)
cannam@53 326 {
cannam@70 327 unsigned int step = 128;
cannam@70 328 for (unsigned int j=0; j<step; j++)
cannam@53 329 {
cannam@53 330 lastind = i*step+j;
cannam@53 331 beat_period[lastind] = bestpath[i];
cannam@53 332 }
cannam@57 333 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
cannam@53 334 }
cannam@52 335
cannam@53 336 //fill in the last values...
cannam@70 337 for (unsigned int i=lastind; i<beat_period.size(); i++)
cannam@53 338 {
cannam@53 339 beat_period[i] = beat_period[lastind];
cannam@53 340 }
cannam@52 341
cannam@70 342 for (unsigned int i = 0; i < beat_period.size(); i++)
cannam@52 343 {
cannam@54 344 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
cannam@52 345 }
cannam@52 346 }
cannam@52 347
cannam@52 348 double
cannam@52 349 TempoTrackV2::get_max_val(const d_vec_t &df)
cannam@52 350 {
cannam@53 351 double maxval = 0.;
cannam@70 352 for (unsigned int i=0; i<df.size(); i++)
cannam@52 353 {
cannam@53 354 if (maxval < df[i])
cannam@53 355 {
cannam@53 356 maxval = df[i];
cannam@53 357 }
cannam@52 358 }
cannam@52 359
cannam@53 360 return maxval;
cannam@52 361 }
cannam@52 362
cannam@52 363 int
cannam@52 364 TempoTrackV2::get_max_ind(const d_vec_t &df)
cannam@52 365 {
cannam@53 366 double maxval = 0.;
cannam@53 367 int ind = 0;
cannam@70 368 for (unsigned int i=0; i<df.size(); i++)
cannam@52 369 {
cannam@53 370 if (maxval < df[i])
cannam@53 371 {
cannam@53 372 maxval = df[i];
cannam@53 373 ind = i;
cannam@53 374 }
cannam@52 375 }
cannam@53 376
cannam@53 377 return ind;
cannam@52 378 }
cannam@52 379
cannam@52 380 void
cannam@52 381 TempoTrackV2::normalise_vec(d_vec_t &df)
cannam@52 382 {
cannam@53 383 double sum = 0.;
cannam@70 384 for (unsigned int i=0; i<df.size(); i++)
cannam@53 385 {
cannam@53 386 sum += df[i];
cannam@53 387 }
cannam@53 388
cannam@70 389 for (unsigned int i=0; i<df.size(); i++)
cannam@53 390 {
cannam@53 391 df[i]/= (sum + EPS);
cannam@53 392 }
cannam@52 393 }
cannam@52 394
cannam@52 395 void
cannam@79 396 TempoTrackV2::calculateBeats(const vector<double> &df,
cannam@79 397 const vector<double> &beat_period,
cannam@79 398 vector<double> &beats)
cannam@52 399 {
cannam@56 400 if (df.empty() || beat_period.empty()) return;
cannam@56 401
cannam@53 402 d_vec_t cumscore(df.size()); // store cumulative score
cannam@53 403 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
cannam@53 404 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
cannam@52 405
cannam@70 406 for (unsigned int i=0; i<df.size(); i++)
cannam@52 407 {
cannam@53 408 localscore[i] = df[i];
cannam@53 409 backlink[i] = -1;
cannam@52 410 }
cannam@52 411
cannam@53 412 double tightness = 4.;
cannam@53 413 double alpha = 0.9;
cannam@52 414
cannam@53 415 // main loop
cannam@70 416 for (unsigned int i=0; i<localscore.size(); i++)
cannam@53 417 {
cannam@53 418 int prange_min = -2*beat_period[i];
cannam@53 419 int prange_max = round(-0.5*beat_period[i]);
cannam@52 420
cannam@53 421 // transition range
cannam@53 422 d_vec_t txwt (prange_max - prange_min + 1);
cannam@53 423 d_vec_t scorecands (txwt.size());
cannam@52 424
cannam@70 425 for (unsigned int j=0;j<txwt.size();j++)
cannam@53 426 {
cannam@53 427 double mu = static_cast<double> (beat_period[i]);
cannam@53 428 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
cannam@52 429
cannam@53 430 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
cannam@53 431 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
cannam@52 432
cannam@53 433 int cscore_ind = i+prange_min+j;
cannam@53 434 if (cscore_ind >= 0)
cannam@53 435 {
cannam@53 436 scorecands[j] = txwt[j] * cumscore[cscore_ind];
cannam@53 437 }
cannam@53 438 }
cannam@52 439
cannam@53 440 // find max value and index of maximum value
cannam@53 441 double vv = get_max_val(scorecands);
cannam@53 442 int xx = get_max_ind(scorecands);
cannam@52 443
cannam@53 444 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
cannam@53 445 backlink[i] = i+prange_min+xx;
cannam@55 446
cannam@57 447 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
cannam@53 448 }
cannam@53 449
cannam@53 450 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
cannam@53 451 d_vec_t tmp_vec;
cannam@70 452 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
cannam@53 453 {
cannam@53 454 tmp_vec.push_back(cumscore[i]);
cannam@53 455 }
cannam@53 456
cannam@53 457 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
cannam@53 458
cannam@56 459 // can happen if no results obtained earlier (e.g. input too short)
cannam@56 460 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
cannam@56 461
cannam@53 462 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
cannam@53 463 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
cannam@53 464 i_vec_t ibeats;
cannam@53 465 ibeats.push_back(startpoint);
cannam@57 466 // std::cerr << "startpoint = " << startpoint << std::endl;
cannam@53 467 while (backlink[ibeats.back()] > 0)
cannam@53 468 {
cannam@57 469 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
cannam@56 470 int b = ibeats.back();
cannam@56 471 if (backlink[b] == b) break; // shouldn't happen... haha
cannam@56 472 ibeats.push_back(backlink[b]);
cannam@53 473 }
cannam@52 474
cannam@53 475 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
cannam@70 476 for (unsigned int i=0; i<ibeats.size(); i++)
cannam@53 477 {
cannam@53 478 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
cannam@53 479 }
cannam@52 480 }
cannam@52 481
cannam@52 482