annotate dsp/tempotracking/TempoTrackV2.cpp @ 304:702ff8c08137

* Solaris build fixes
author Chris Cannam <c.cannam@qmul.ac.uk>
date Mon, 14 Sep 2009 13:01:44 +0000
parents 1c9258dd155e
children e5907ae6de17
rev   line source
c@277 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@277 2
c@277 3 /*
c@277 4 QM DSP Library
c@277 5
c@277 6 Centre for Digital Music, Queen Mary, University of London.
c@277 7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@277 8 All rights reserved.
c@277 9 */
c@277 10
c@277 11 #include "TempoTrackV2.h"
c@277 12
c@277 13 #include <cmath>
c@277 14 #include <cstdlib>
c@278 15 #include <iostream>
c@277 16
c@279 17 #include "maths/MathUtilities.h"
c@277 18
c@277 19 #define EPS 0.0000008 // just some arbitrary small number
c@277 20
c@279 21 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279 22 m_rate(rate), m_increment(increment) { }
c@277 23 TempoTrackV2::~TempoTrackV2() { }
c@277 24
c@277 25 void
c@277 26 TempoTrackV2::filter_df(d_vec_t &df)
c@277 27 {
c@278 28 d_vec_t a(3);
c@278 29 d_vec_t b(3);
c@278 30 d_vec_t lp_df(df.size());
c@277 31
c@278 32 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278 33 a[0] = 1.0000;
c@278 34 a[1] = -0.3695;
c@278 35 a[2] = 0.1958;
c@278 36 b[0] = 0.2066;
c@278 37 b[1] = 0.4131;
c@278 38 b[2] = 0.2066;
c@278 39
c@278 40 double inp1 = 0.;
c@278 41 double inp2 = 0.;
c@278 42 double out1 = 0.;
c@278 43 double out2 = 0.;
c@277 44
c@277 45
c@278 46 // forwards filtering
c@295 47 for (unsigned int i = 0;i < df.size();i++)
c@278 48 {
c@278 49 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 50 inp2 = inp1;
c@278 51 inp1 = df[i];
c@278 52 out2 = out1;
c@278 53 out1 = lp_df[i];
c@278 54 }
c@277 55
c@278 56 // copy forwards filtering to df...
c@278 57 // but, time-reversed, ready for backwards filtering
c@295 58 for (unsigned int i = 0;i < df.size();i++)
c@278 59 {
c@278 60 df[i] = lp_df[df.size()-i-1];
c@278 61 }
c@277 62
c@295 63 for (unsigned int i = 0;i < df.size();i++)
c@278 64 {
c@278 65 lp_df[i] = 0.;
c@278 66 }
c@277 67
c@278 68 inp1 = 0.; inp2 = 0.;
c@278 69 out1 = 0.; out2 = 0.;
c@277 70
c@277 71 // backwards filetering on time-reversed df
c@295 72 for (unsigned int i = 0;i < df.size();i++)
c@278 73 {
c@278 74 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 75 inp2 = inp1;
c@278 76 inp1 = df[i];
c@278 77 out2 = out1;
c@278 78 out1 = lp_df[i];
c@278 79 }
c@277 80
c@277 81 // write the re-reversed (i.e. forward) version back to df
c@295 82 for (unsigned int i = 0;i < df.size();i++)
c@278 83 {
c@278 84 df[i] = lp_df[df.size()-i-1];
c@278 85 }
c@277 86 }
c@277 87
c@277 88
c@277 89 void
c@304 90 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
c@304 91 vector<double> &beat_period,
c@304 92 vector<double> &tempi)
c@277 93 {
c@278 94 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278 95 // calculate the acf,
c@278 96 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278 97 // then call viterbi decoding with weight vector and transition matrix
c@278 98 // and get best path
c@277 99
c@295 100 unsigned int wv_len = 128;
c@278 101 double rayparam = 43.;
c@277 102
c@278 103 // make rayleigh weighting curve
c@278 104 d_vec_t wv(wv_len);
c@295 105 for (unsigned int i=0; i<wv.size(); i++)
c@277 106 {
c@278 107 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
c@277 108 }
c@277 109
c@278 110 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295 111 unsigned int winlen = 512;
c@295 112 unsigned int step = 128;
c@278 113
c@278 114 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278 115 d_mat_t rcfmat;
c@278 116 int col_counter = -1;
c@278 117
c@278 118 // main loop for beat period calculation
c@295 119 for (unsigned int i=0; i+winlen<df.size(); i+=step)
c@278 120 {
c@278 121 // get dfframe
c@278 122 d_vec_t dfframe(winlen);
c@295 123 for (unsigned int k=0; k<winlen; k++)
c@278 124 {
c@278 125 dfframe[k] = df[i+k];
c@278 126 }
c@278 127 // get rcf vector for current frame
c@278 128 d_vec_t rcf(wv_len);
c@278 129 get_rcf(dfframe,wv,rcf);
c@277 130
c@278 131 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278 132 col_counter++;
c@295 133 for (unsigned int j=0; j<rcf.size(); j++)
c@278 134 {
c@278 135 rcfmat[col_counter].push_back( rcf[j] );
c@278 136 }
c@278 137 }
c@278 138
c@278 139 // now call viterbi decoding function
c@278 140 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277 141 }
c@277 142
c@277 143
c@277 144 void
c@277 145 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277 146 {
c@278 147 // calculate autocorrelation function
c@278 148 // then rcf
c@278 149 // just hard code for now... don't really need separate functions to do this
c@277 150
c@278 151 // make acf
c@277 152
c@278 153 d_vec_t dfframe(dfframe_in);
c@277 154
c@279 155 MathUtilities::adaptiveThreshold(dfframe);
c@277 156
c@278 157 d_vec_t acf(dfframe.size());
c@277 158
c@278 159
c@295 160 for (unsigned int lag=0; lag<dfframe.size(); lag++)
c@278 161 {
c@278 162 double sum = 0.;
c@278 163 double tmp = 0.;
c@277 164
c@295 165 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
c@278 166 {
c@278 167 tmp = dfframe[n] * dfframe[n+lag];
c@278 168 sum += tmp;
c@278 169 }
c@278 170 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278 171 }
c@277 172
c@278 173 // now apply comb filtering
c@278 174 int numelem = 4;
c@278 175
c@295 176 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
c@278 177 {
c@278 178 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278 179 {
c@278 180 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278 181 {
c@278 182 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278 183 }
c@278 184 }
c@278 185 }
c@278 186
c@278 187 // apply adaptive threshold to rcf
c@279 188 MathUtilities::adaptiveThreshold(rcf);
c@278 189
c@278 190 double rcfsum =0.;
c@295 191 for (unsigned int i=0; i<rcf.size(); i++)
c@278 192 {
c@278 193 rcf[i] += EPS ;
c@278 194 rcfsum += rcf[i];
c@278 195 }
c@277 196
c@278 197 // normalise rcf to sum to unity
c@295 198 for (unsigned int i=0; i<rcf.size(); i++)
c@277 199 {
c@278 200 rcf[i] /= (rcfsum + EPS);
c@277 201 }
c@277 202 }
c@277 203
c@277 204 void
c@278 205 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277 206 {
c@278 207 // following Kevin Murphy's Viterbi decoding to get best path of
c@278 208 // beat periods through rfcmat
c@277 209
c@278 210 // make transition matrix
c@278 211 d_mat_t tmat;
c@295 212 for (unsigned int i=0;i<wv.size();i++)
c@278 213 {
c@278 214 tmat.push_back ( d_vec_t() ); // adds a new column
c@295 215 for (unsigned int j=0; j<wv.size(); j++)
c@278 216 {
c@278 217 tmat[i].push_back(0.); // fill with zeros initially
c@278 218 }
c@278 219 }
c@278 220
c@278 221 // variance of Gaussians in transition matrix
c@278 222 // formed of Gaussians on diagonal - implies slow tempo change
c@278 223 double sigma = 8.;
c@278 224 // don't want really short beat periods, or really long ones
c@295 225 for (unsigned int i=20;i <wv.size()-20; i++)
c@278 226 {
c@295 227 for (unsigned int j=20; j<wv.size()-20; j++)
c@278 228 {
c@278 229 double mu = static_cast<double>(i);
c@278 230 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
c@278 231 }
c@278 232 }
c@277 233
c@278 234 // parameters for Viterbi decoding... this part is taken from
c@278 235 // Murphy's matlab
c@277 236
c@278 237 d_mat_t delta;
c@278 238 i_mat_t psi;
c@295 239 for (unsigned int i=0;i <rcfmat.size(); i++)
c@278 240 {
c@278 241 delta.push_back( d_vec_t());
c@278 242 psi.push_back( i_vec_t());
c@295 243 for (unsigned int j=0; j<rcfmat[i].size(); j++)
c@278 244 {
c@278 245 delta[i].push_back(0.); // fill with zeros initially
c@278 246 psi[i].push_back(0); // fill with zeros initially
c@278 247 }
c@278 248 }
c@277 249
c@277 250
c@295 251 unsigned int T = delta.size();
c@281 252
c@281 253 if (T < 2) return; // can't do anything at all meaningful
c@281 254
c@295 255 unsigned int Q = delta[0].size();
c@277 256
c@278 257 // initialize first column of delta
c@295 258 for (unsigned int j=0; j<Q; j++)
c@277 259 {
c@278 260 delta[0][j] = wv[j] * rcfmat[0][j];
c@278 261 psi[0][j] = 0;
c@277 262 }
c@278 263
c@277 264 double deltasum = 0.;
c@295 265 for (unsigned int i=0; i<Q; i++)
c@277 266 {
c@278 267 deltasum += delta[0][i];
c@277 268 }
c@295 269 for (unsigned int i=0; i<Q; i++)
c@277 270 {
c@278 271 delta[0][i] /= (deltasum + EPS);
c@277 272 }
c@277 273
c@277 274
c@295 275 for (unsigned int t=1; t<T; t++)
c@278 276 {
c@278 277 d_vec_t tmp_vec(Q);
c@277 278
c@295 279 for (unsigned int j=0; j<Q; j++)
c@278 280 {
c@295 281 for (unsigned int i=0; i<Q; i++)
c@278 282 {
c@278 283 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278 284 }
c@278 285
c@278 286 delta[t][j] = get_max_val(tmp_vec);
c@277 287
c@278 288 psi[t][j] = get_max_ind(tmp_vec);
c@278 289
c@278 290 delta[t][j] *= rcfmat[t][j];
c@278 291 }
c@277 292
c@278 293 // normalise current delta column
c@278 294 double deltasum = 0.;
c@295 295 for (unsigned int i=0; i<Q; i++)
c@278 296 {
c@278 297 deltasum += delta[t][i];
c@278 298 }
c@295 299 for (unsigned int i=0; i<Q; i++)
c@278 300 {
c@278 301 delta[t][i] /= (deltasum + EPS);
c@278 302 }
c@278 303 }
c@277 304
c@278 305 i_vec_t bestpath(T);
c@278 306 d_vec_t tmp_vec(Q);
c@295 307 for (unsigned int i=0; i<Q; i++)
c@278 308 {
c@278 309 tmp_vec[i] = delta[T-1][i];
c@278 310 }
c@277 311
c@278 312 // find starting point - best beat period for "last" frame
c@278 313 bestpath[T-1] = get_max_ind(tmp_vec);
c@278 314
c@278 315 // backtrace through index of maximum values in psi
c@295 316 for (unsigned int t=T-2; t>0 ;t--)
c@278 317 {
c@278 318 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278 319 }
c@277 320
c@278 321 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278 322 bestpath[0] = psi[1][bestpath[1]];
c@277 323
c@295 324 unsigned int lastind = 0;
c@295 325 for (unsigned int i=0; i<T; i++)
c@278 326 {
c@295 327 unsigned int step = 128;
c@295 328 for (unsigned int j=0; j<step; j++)
c@278 329 {
c@278 330 lastind = i*step+j;
c@278 331 beat_period[lastind] = bestpath[i];
c@278 332 }
c@282 333 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
c@278 334 }
c@277 335
c@278 336 //fill in the last values...
c@295 337 for (unsigned int i=lastind; i<beat_period.size(); i++)
c@278 338 {
c@278 339 beat_period[i] = beat_period[lastind];
c@278 340 }
c@277 341
c@295 342 for (unsigned int i = 0; i < beat_period.size(); i++)
c@277 343 {
c@279 344 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277 345 }
c@277 346 }
c@277 347
c@277 348 double
c@277 349 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277 350 {
c@278 351 double maxval = 0.;
c@295 352 for (unsigned int i=0; i<df.size(); i++)
c@277 353 {
c@278 354 if (maxval < df[i])
c@278 355 {
c@278 356 maxval = df[i];
c@278 357 }
c@277 358 }
c@277 359
c@278 360 return maxval;
c@277 361 }
c@277 362
c@277 363 int
c@277 364 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277 365 {
c@278 366 double maxval = 0.;
c@278 367 int ind = 0;
c@295 368 for (unsigned int i=0; i<df.size(); i++)
c@277 369 {
c@278 370 if (maxval < df[i])
c@278 371 {
c@278 372 maxval = df[i];
c@278 373 ind = i;
c@278 374 }
c@277 375 }
c@278 376
c@278 377 return ind;
c@277 378 }
c@277 379
c@277 380 void
c@277 381 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277 382 {
c@278 383 double sum = 0.;
c@295 384 for (unsigned int i=0; i<df.size(); i++)
c@278 385 {
c@278 386 sum += df[i];
c@278 387 }
c@278 388
c@295 389 for (unsigned int i=0; i<df.size(); i++)
c@278 390 {
c@278 391 df[i]/= (sum + EPS);
c@278 392 }
c@277 393 }
c@277 394
c@277 395 void
c@304 396 TempoTrackV2::calculateBeats(const vector<double> &df,
c@304 397 const vector<double> &beat_period,
c@304 398 vector<double> &beats)
c@277 399 {
c@281 400 if (df.empty() || beat_period.empty()) return;
c@281 401
c@278 402 d_vec_t cumscore(df.size()); // store cumulative score
c@278 403 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278 404 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277 405
c@295 406 for (unsigned int i=0; i<df.size(); i++)
c@277 407 {
c@278 408 localscore[i] = df[i];
c@278 409 backlink[i] = -1;
c@277 410 }
c@277 411
c@278 412 double tightness = 4.;
c@278 413 double alpha = 0.9;
c@277 414
c@278 415 // main loop
c@295 416 for (unsigned int i=0; i<localscore.size(); i++)
c@278 417 {
c@278 418 int prange_min = -2*beat_period[i];
c@278 419 int prange_max = round(-0.5*beat_period[i]);
c@277 420
c@278 421 // transition range
c@278 422 d_vec_t txwt (prange_max - prange_min + 1);
c@278 423 d_vec_t scorecands (txwt.size());
c@277 424
c@295 425 for (unsigned int j=0;j<txwt.size();j++)
c@278 426 {
c@278 427 double mu = static_cast<double> (beat_period[i]);
c@278 428 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
c@277 429
c@278 430 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278 431 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277 432
c@278 433 int cscore_ind = i+prange_min+j;
c@278 434 if (cscore_ind >= 0)
c@278 435 {
c@278 436 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278 437 }
c@278 438 }
c@277 439
c@278 440 // find max value and index of maximum value
c@278 441 double vv = get_max_val(scorecands);
c@278 442 int xx = get_max_ind(scorecands);
c@277 443
c@278 444 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
c@278 445 backlink[i] = i+prange_min+xx;
c@280 446
c@282 447 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278 448 }
c@278 449
c@278 450 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278 451 d_vec_t tmp_vec;
c@295 452 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278 453 {
c@278 454 tmp_vec.push_back(cumscore[i]);
c@278 455 }
c@278 456
c@278 457 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278 458
c@281 459 // can happen if no results obtained earlier (e.g. input too short)
c@281 460 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
c@281 461
c@278 462 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278 463 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278 464 i_vec_t ibeats;
c@278 465 ibeats.push_back(startpoint);
c@282 466 // std::cerr << "startpoint = " << startpoint << std::endl;
c@278 467 while (backlink[ibeats.back()] > 0)
c@278 468 {
c@282 469 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281 470 int b = ibeats.back();
c@281 471 if (backlink[b] == b) break; // shouldn't happen... haha
c@281 472 ibeats.push_back(backlink[b]);
c@278 473 }
c@277 474
c@278 475 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@295 476 for (unsigned int i=0; i<ibeats.size(); i++)
c@278 477 {
c@278 478 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278 479 }
c@277 480 }
c@277 481
c@277 482