annotate dsp/tempotracking/TempoTrackV2.cpp @ 73:dcb555b90924

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author cannam
date Fri, 05 Jun 2009 15:12:39 +0000
parents c3cdb404f807
children 054c384d860d
rev   line source
cannam@52 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@52 2
cannam@52 3 /*
cannam@52 4 QM DSP Library
cannam@52 5
cannam@52 6 Centre for Digital Music, Queen Mary, University of London.
cannam@52 7 This file copyright 2008-2009 Matthew Davies and QMUL.
cannam@52 8 All rights reserved.
cannam@52 9 */
cannam@52 10
cannam@52 11 #include "TempoTrackV2.h"
cannam@52 12
cannam@52 13 #include <cmath>
cannam@52 14 #include <cstdlib>
cannam@53 15 #include <iostream>
cannam@52 16
cannam@54 17 #include "maths/MathUtilities.h"
cannam@52 18
cannam@52 19 #define EPS 0.0000008 // just some arbitrary small number
cannam@52 20
cannam@54 21 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
cannam@54 22 m_rate(rate), m_increment(increment) { }
cannam@52 23 TempoTrackV2::~TempoTrackV2() { }
cannam@52 24
cannam@52 25 void
cannam@52 26 TempoTrackV2::filter_df(d_vec_t &df)
cannam@52 27 {
cannam@53 28 d_vec_t a(3);
cannam@53 29 d_vec_t b(3);
cannam@53 30 d_vec_t lp_df(df.size());
cannam@52 31
cannam@53 32 //equivalent in matlab to [b,a] = butter(2,0.4);
cannam@53 33 a[0] = 1.0000;
cannam@53 34 a[1] = -0.3695;
cannam@53 35 a[2] = 0.1958;
cannam@53 36 b[0] = 0.2066;
cannam@53 37 b[1] = 0.4131;
cannam@53 38 b[2] = 0.2066;
cannam@53 39
cannam@53 40 double inp1 = 0.;
cannam@53 41 double inp2 = 0.;
cannam@53 42 double out1 = 0.;
cannam@53 43 double out2 = 0.;
cannam@52 44
cannam@52 45
cannam@53 46 // forwards filtering
cannam@70 47 for (unsigned int i = 0;i < df.size();i++)
cannam@53 48 {
cannam@53 49 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 50 inp2 = inp1;
cannam@53 51 inp1 = df[i];
cannam@53 52 out2 = out1;
cannam@53 53 out1 = lp_df[i];
cannam@53 54 }
cannam@52 55
cannam@53 56 // copy forwards filtering to df...
cannam@53 57 // but, time-reversed, ready for backwards filtering
cannam@70 58 for (unsigned int i = 0;i < df.size();i++)
cannam@53 59 {
cannam@53 60 df[i] = lp_df[df.size()-i-1];
cannam@53 61 }
cannam@52 62
cannam@70 63 for (unsigned int i = 0;i < df.size();i++)
cannam@53 64 {
cannam@53 65 lp_df[i] = 0.;
cannam@53 66 }
cannam@52 67
cannam@53 68 inp1 = 0.; inp2 = 0.;
cannam@53 69 out1 = 0.; out2 = 0.;
cannam@52 70
cannam@52 71 // backwards filetering on time-reversed df
cannam@70 72 for (unsigned int i = 0;i < df.size();i++)
cannam@53 73 {
cannam@53 74 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 75 inp2 = inp1;
cannam@53 76 inp1 = df[i];
cannam@53 77 out2 = out1;
cannam@53 78 out1 = lp_df[i];
cannam@53 79 }
cannam@52 80
cannam@52 81 // write the re-reversed (i.e. forward) version back to df
cannam@70 82 for (unsigned int i = 0;i < df.size();i++)
cannam@53 83 {
cannam@53 84 df[i] = lp_df[df.size()-i-1];
cannam@53 85 }
cannam@52 86 }
cannam@52 87
cannam@52 88
cannam@52 89 void
cannam@53 90 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period,
cannam@53 91 d_vec_t &tempi)
cannam@52 92 {
cannam@53 93 // to follow matlab.. split into 512 sample frames with a 128 hop size
cannam@53 94 // calculate the acf,
cannam@53 95 // then the rcf.. and then stick the rcfs as columns of a matrix
cannam@53 96 // then call viterbi decoding with weight vector and transition matrix
cannam@53 97 // and get best path
cannam@52 98
cannam@70 99 unsigned int wv_len = 128;
cannam@53 100 double rayparam = 43.;
cannam@52 101
cannam@53 102 // make rayleigh weighting curve
cannam@53 103 d_vec_t wv(wv_len);
cannam@70 104 for (unsigned int i=0; i<wv.size(); i++)
cannam@52 105 {
cannam@53 106 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
cannam@52 107 }
cannam@52 108
cannam@53 109 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
cannam@70 110 unsigned int winlen = 512;
cannam@70 111 unsigned int step = 128;
cannam@53 112
cannam@53 113 // matrix to store output of comb filter bank, increment column of matrix at each frame
cannam@53 114 d_mat_t rcfmat;
cannam@53 115 int col_counter = -1;
cannam@53 116
cannam@53 117 // main loop for beat period calculation
cannam@70 118 for (unsigned int i=0; i+winlen<df.size(); i+=step)
cannam@53 119 {
cannam@53 120 // get dfframe
cannam@53 121 d_vec_t dfframe(winlen);
cannam@70 122 for (unsigned int k=0; k<winlen; k++)
cannam@53 123 {
cannam@53 124 dfframe[k] = df[i+k];
cannam@53 125 }
cannam@53 126 // get rcf vector for current frame
cannam@53 127 d_vec_t rcf(wv_len);
cannam@53 128 get_rcf(dfframe,wv,rcf);
cannam@52 129
cannam@53 130 rcfmat.push_back( d_vec_t() ); // adds a new column
cannam@53 131 col_counter++;
cannam@70 132 for (unsigned int j=0; j<rcf.size(); j++)
cannam@53 133 {
cannam@53 134 rcfmat[col_counter].push_back( rcf[j] );
cannam@53 135 }
cannam@53 136 }
cannam@53 137
cannam@53 138 // now call viterbi decoding function
cannam@53 139 viterbi_decode(rcfmat,wv,beat_period,tempi);
cannam@52 140 }
cannam@52 141
cannam@52 142
cannam@52 143 void
cannam@52 144 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
cannam@52 145 {
cannam@53 146 // calculate autocorrelation function
cannam@53 147 // then rcf
cannam@53 148 // just hard code for now... don't really need separate functions to do this
cannam@52 149
cannam@53 150 // make acf
cannam@52 151
cannam@53 152 d_vec_t dfframe(dfframe_in);
cannam@52 153
cannam@54 154 MathUtilities::adaptiveThreshold(dfframe);
cannam@52 155
cannam@53 156 d_vec_t acf(dfframe.size());
cannam@52 157
cannam@53 158
cannam@70 159 for (unsigned int lag=0; lag<dfframe.size(); lag++)
cannam@53 160 {
cannam@53 161 double sum = 0.;
cannam@53 162 double tmp = 0.;
cannam@52 163
cannam@70 164 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
cannam@53 165 {
cannam@53 166 tmp = dfframe[n] * dfframe[n+lag];
cannam@53 167 sum += tmp;
cannam@53 168 }
cannam@53 169 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
cannam@53 170 }
cannam@52 171
cannam@53 172 // now apply comb filtering
cannam@53 173 int numelem = 4;
cannam@53 174
cannam@70 175 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
cannam@53 176 {
cannam@53 177 for (int a = 1;a <= numelem;a++) // number of comb elements
cannam@53 178 {
cannam@53 179 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
cannam@53 180 {
cannam@53 181 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
cannam@53 182 }
cannam@53 183 }
cannam@53 184 }
cannam@53 185
cannam@53 186 // apply adaptive threshold to rcf
cannam@54 187 MathUtilities::adaptiveThreshold(rcf);
cannam@53 188
cannam@53 189 double rcfsum =0.;
cannam@70 190 for (unsigned int i=0; i<rcf.size(); i++)
cannam@53 191 {
cannam@53 192 rcf[i] += EPS ;
cannam@53 193 rcfsum += rcf[i];
cannam@53 194 }
cannam@52 195
cannam@53 196 // normalise rcf to sum to unity
cannam@70 197 for (unsigned int i=0; i<rcf.size(); i++)
cannam@52 198 {
cannam@53 199 rcf[i] /= (rcfsum + EPS);
cannam@52 200 }
cannam@52 201 }
cannam@52 202
cannam@52 203 void
cannam@53 204 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
cannam@52 205 {
cannam@53 206 // following Kevin Murphy's Viterbi decoding to get best path of
cannam@53 207 // beat periods through rfcmat
cannam@52 208
cannam@53 209 // make transition matrix
cannam@53 210 d_mat_t tmat;
cannam@70 211 for (unsigned int i=0;i<wv.size();i++)
cannam@53 212 {
cannam@53 213 tmat.push_back ( d_vec_t() ); // adds a new column
cannam@70 214 for (unsigned int j=0; j<wv.size(); j++)
cannam@53 215 {
cannam@53 216 tmat[i].push_back(0.); // fill with zeros initially
cannam@53 217 }
cannam@53 218 }
cannam@53 219
cannam@53 220 // variance of Gaussians in transition matrix
cannam@53 221 // formed of Gaussians on diagonal - implies slow tempo change
cannam@53 222 double sigma = 8.;
cannam@53 223 // don't want really short beat periods, or really long ones
cannam@70 224 for (unsigned int i=20;i <wv.size()-20; i++)
cannam@53 225 {
cannam@70 226 for (unsigned int j=20; j<wv.size()-20; j++)
cannam@53 227 {
cannam@53 228 double mu = static_cast<double>(i);
cannam@53 229 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
cannam@53 230 }
cannam@53 231 }
cannam@52 232
cannam@53 233 // parameters for Viterbi decoding... this part is taken from
cannam@53 234 // Murphy's matlab
cannam@52 235
cannam@53 236 d_mat_t delta;
cannam@53 237 i_mat_t psi;
cannam@70 238 for (unsigned int i=0;i <rcfmat.size(); i++)
cannam@53 239 {
cannam@53 240 delta.push_back( d_vec_t());
cannam@53 241 psi.push_back( i_vec_t());
cannam@70 242 for (unsigned int j=0; j<rcfmat[i].size(); j++)
cannam@53 243 {
cannam@53 244 delta[i].push_back(0.); // fill with zeros initially
cannam@53 245 psi[i].push_back(0); // fill with zeros initially
cannam@53 246 }
cannam@53 247 }
cannam@52 248
cannam@52 249
cannam@70 250 unsigned int T = delta.size();
cannam@56 251
cannam@56 252 if (T < 2) return; // can't do anything at all meaningful
cannam@56 253
cannam@70 254 unsigned int Q = delta[0].size();
cannam@52 255
cannam@53 256 // initialize first column of delta
cannam@70 257 for (unsigned int j=0; j<Q; j++)
cannam@52 258 {
cannam@53 259 delta[0][j] = wv[j] * rcfmat[0][j];
cannam@53 260 psi[0][j] = 0;
cannam@52 261 }
cannam@53 262
cannam@52 263 double deltasum = 0.;
cannam@70 264 for (unsigned int i=0; i<Q; i++)
cannam@52 265 {
cannam@53 266 deltasum += delta[0][i];
cannam@52 267 }
cannam@70 268 for (unsigned int i=0; i<Q; i++)
cannam@52 269 {
cannam@53 270 delta[0][i] /= (deltasum + EPS);
cannam@52 271 }
cannam@52 272
cannam@52 273
cannam@70 274 for (unsigned int t=1; t<T; t++)
cannam@53 275 {
cannam@53 276 d_vec_t tmp_vec(Q);
cannam@52 277
cannam@70 278 for (unsigned int j=0; j<Q; j++)
cannam@53 279 {
cannam@70 280 for (unsigned int i=0; i<Q; i++)
cannam@53 281 {
cannam@53 282 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
cannam@53 283 }
cannam@53 284
cannam@53 285 delta[t][j] = get_max_val(tmp_vec);
cannam@52 286
cannam@53 287 psi[t][j] = get_max_ind(tmp_vec);
cannam@53 288
cannam@53 289 delta[t][j] *= rcfmat[t][j];
cannam@53 290 }
cannam@52 291
cannam@53 292 // normalise current delta column
cannam@53 293 double deltasum = 0.;
cannam@70 294 for (unsigned int i=0; i<Q; i++)
cannam@53 295 {
cannam@53 296 deltasum += delta[t][i];
cannam@53 297 }
cannam@70 298 for (unsigned int i=0; i<Q; i++)
cannam@53 299 {
cannam@53 300 delta[t][i] /= (deltasum + EPS);
cannam@53 301 }
cannam@53 302 }
cannam@52 303
cannam@53 304 i_vec_t bestpath(T);
cannam@53 305 d_vec_t tmp_vec(Q);
cannam@70 306 for (unsigned int i=0; i<Q; i++)
cannam@53 307 {
cannam@53 308 tmp_vec[i] = delta[T-1][i];
cannam@53 309 }
cannam@52 310
cannam@53 311 // find starting point - best beat period for "last" frame
cannam@53 312 bestpath[T-1] = get_max_ind(tmp_vec);
cannam@53 313
cannam@53 314 // backtrace through index of maximum values in psi
cannam@70 315 for (unsigned int t=T-2; t>0 ;t--)
cannam@53 316 {
cannam@53 317 bestpath[t] = psi[t+1][bestpath[t+1]];
cannam@53 318 }
cannam@52 319
cannam@53 320 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
cannam@53 321 bestpath[0] = psi[1][bestpath[1]];
cannam@52 322
cannam@70 323 unsigned int lastind = 0;
cannam@70 324 for (unsigned int i=0; i<T; i++)
cannam@53 325 {
cannam@70 326 unsigned int step = 128;
cannam@70 327 for (unsigned int j=0; j<step; j++)
cannam@53 328 {
cannam@53 329 lastind = i*step+j;
cannam@53 330 beat_period[lastind] = bestpath[i];
cannam@53 331 }
cannam@57 332 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
cannam@53 333 }
cannam@52 334
cannam@53 335 //fill in the last values...
cannam@70 336 for (unsigned int i=lastind; i<beat_period.size(); i++)
cannam@53 337 {
cannam@53 338 beat_period[i] = beat_period[lastind];
cannam@53 339 }
cannam@52 340
cannam@70 341 for (unsigned int i = 0; i < beat_period.size(); i++)
cannam@52 342 {
cannam@54 343 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
cannam@52 344 }
cannam@52 345 }
cannam@52 346
cannam@52 347 double
cannam@52 348 TempoTrackV2::get_max_val(const d_vec_t &df)
cannam@52 349 {
cannam@53 350 double maxval = 0.;
cannam@70 351 for (unsigned int i=0; i<df.size(); i++)
cannam@52 352 {
cannam@53 353 if (maxval < df[i])
cannam@53 354 {
cannam@53 355 maxval = df[i];
cannam@53 356 }
cannam@52 357 }
cannam@52 358
cannam@53 359 return maxval;
cannam@52 360 }
cannam@52 361
cannam@52 362 int
cannam@52 363 TempoTrackV2::get_max_ind(const d_vec_t &df)
cannam@52 364 {
cannam@53 365 double maxval = 0.;
cannam@53 366 int ind = 0;
cannam@70 367 for (unsigned int i=0; i<df.size(); i++)
cannam@52 368 {
cannam@53 369 if (maxval < df[i])
cannam@53 370 {
cannam@53 371 maxval = df[i];
cannam@53 372 ind = i;
cannam@53 373 }
cannam@52 374 }
cannam@53 375
cannam@53 376 return ind;
cannam@52 377 }
cannam@52 378
cannam@52 379 void
cannam@52 380 TempoTrackV2::normalise_vec(d_vec_t &df)
cannam@52 381 {
cannam@53 382 double sum = 0.;
cannam@70 383 for (unsigned int i=0; i<df.size(); i++)
cannam@53 384 {
cannam@53 385 sum += df[i];
cannam@53 386 }
cannam@53 387
cannam@70 388 for (unsigned int i=0; i<df.size(); i++)
cannam@53 389 {
cannam@53 390 df[i]/= (sum + EPS);
cannam@53 391 }
cannam@52 392 }
cannam@52 393
cannam@52 394 void
cannam@52 395 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
cannam@52 396 d_vec_t &beats)
cannam@52 397 {
cannam@56 398 if (df.empty() || beat_period.empty()) return;
cannam@56 399
cannam@53 400 d_vec_t cumscore(df.size()); // store cumulative score
cannam@53 401 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
cannam@53 402 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
cannam@52 403
cannam@70 404 for (unsigned int i=0; i<df.size(); i++)
cannam@52 405 {
cannam@53 406 localscore[i] = df[i];
cannam@53 407 backlink[i] = -1;
cannam@52 408 }
cannam@52 409
cannam@53 410 double tightness = 4.;
cannam@53 411 double alpha = 0.9;
cannam@52 412
cannam@53 413 // main loop
cannam@70 414 for (unsigned int i=0; i<localscore.size(); i++)
cannam@53 415 {
cannam@53 416 int prange_min = -2*beat_period[i];
cannam@53 417 int prange_max = round(-0.5*beat_period[i]);
cannam@52 418
cannam@53 419 // transition range
cannam@53 420 d_vec_t txwt (prange_max - prange_min + 1);
cannam@53 421 d_vec_t scorecands (txwt.size());
cannam@52 422
cannam@70 423 for (unsigned int j=0;j<txwt.size();j++)
cannam@53 424 {
cannam@53 425 double mu = static_cast<double> (beat_period[i]);
cannam@53 426 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
cannam@52 427
cannam@53 428 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
cannam@53 429 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
cannam@52 430
cannam@53 431 int cscore_ind = i+prange_min+j;
cannam@53 432 if (cscore_ind >= 0)
cannam@53 433 {
cannam@53 434 scorecands[j] = txwt[j] * cumscore[cscore_ind];
cannam@53 435 }
cannam@53 436 }
cannam@52 437
cannam@53 438 // find max value and index of maximum value
cannam@53 439 double vv = get_max_val(scorecands);
cannam@53 440 int xx = get_max_ind(scorecands);
cannam@52 441
cannam@53 442 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
cannam@53 443 backlink[i] = i+prange_min+xx;
cannam@55 444
cannam@57 445 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
cannam@53 446 }
cannam@53 447
cannam@53 448 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
cannam@53 449 d_vec_t tmp_vec;
cannam@70 450 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
cannam@53 451 {
cannam@53 452 tmp_vec.push_back(cumscore[i]);
cannam@53 453 }
cannam@53 454
cannam@53 455 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
cannam@53 456
cannam@56 457 // can happen if no results obtained earlier (e.g. input too short)
cannam@56 458 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
cannam@56 459
cannam@53 460 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
cannam@53 461 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
cannam@53 462 i_vec_t ibeats;
cannam@53 463 ibeats.push_back(startpoint);
cannam@57 464 // std::cerr << "startpoint = " << startpoint << std::endl;
cannam@53 465 while (backlink[ibeats.back()] > 0)
cannam@53 466 {
cannam@57 467 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
cannam@56 468 int b = ibeats.back();
cannam@56 469 if (backlink[b] == b) break; // shouldn't happen... haha
cannam@56 470 ibeats.push_back(backlink[b]);
cannam@53 471 }
cannam@52 472
cannam@53 473 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
cannam@70 474 for (unsigned int i=0; i<ibeats.size(); i++)
cannam@53 475 {
cannam@53 476 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
cannam@53 477 }
cannam@52 478 }
cannam@52 479
cannam@52 480