annotate dsp/tempotracking/TempoTrackV2.cpp @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents 1c9258dd155e
children 054c384d860d
rev   line source
c@277 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@277 2
c@277 3 /*
c@277 4 QM DSP Library
c@277 5
c@277 6 Centre for Digital Music, Queen Mary, University of London.
c@277 7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@277 8 All rights reserved.
c@277 9 */
c@277 10
c@277 11 #include "TempoTrackV2.h"
c@277 12
c@277 13 #include <cmath>
c@277 14 #include <cstdlib>
c@278 15 #include <iostream>
c@277 16
c@279 17 #include "maths/MathUtilities.h"
c@277 18
c@277 19 #define EPS 0.0000008 // just some arbitrary small number
c@277 20
c@279 21 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279 22 m_rate(rate), m_increment(increment) { }
c@277 23 TempoTrackV2::~TempoTrackV2() { }
c@277 24
c@277 25 void
c@277 26 TempoTrackV2::filter_df(d_vec_t &df)
c@277 27 {
c@278 28 d_vec_t a(3);
c@278 29 d_vec_t b(3);
c@278 30 d_vec_t lp_df(df.size());
c@277 31
c@278 32 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278 33 a[0] = 1.0000;
c@278 34 a[1] = -0.3695;
c@278 35 a[2] = 0.1958;
c@278 36 b[0] = 0.2066;
c@278 37 b[1] = 0.4131;
c@278 38 b[2] = 0.2066;
c@278 39
c@278 40 double inp1 = 0.;
c@278 41 double inp2 = 0.;
c@278 42 double out1 = 0.;
c@278 43 double out2 = 0.;
c@277 44
c@277 45
c@278 46 // forwards filtering
c@295 47 for (unsigned int i = 0;i < df.size();i++)
c@278 48 {
c@278 49 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 50 inp2 = inp1;
c@278 51 inp1 = df[i];
c@278 52 out2 = out1;
c@278 53 out1 = lp_df[i];
c@278 54 }
c@277 55
c@278 56 // copy forwards filtering to df...
c@278 57 // but, time-reversed, ready for backwards filtering
c@295 58 for (unsigned int i = 0;i < df.size();i++)
c@278 59 {
c@278 60 df[i] = lp_df[df.size()-i-1];
c@278 61 }
c@277 62
c@295 63 for (unsigned int i = 0;i < df.size();i++)
c@278 64 {
c@278 65 lp_df[i] = 0.;
c@278 66 }
c@277 67
c@278 68 inp1 = 0.; inp2 = 0.;
c@278 69 out1 = 0.; out2 = 0.;
c@277 70
c@277 71 // backwards filetering on time-reversed df
c@295 72 for (unsigned int i = 0;i < df.size();i++)
c@278 73 {
c@278 74 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 75 inp2 = inp1;
c@278 76 inp1 = df[i];
c@278 77 out2 = out1;
c@278 78 out1 = lp_df[i];
c@278 79 }
c@277 80
c@277 81 // write the re-reversed (i.e. forward) version back to df
c@295 82 for (unsigned int i = 0;i < df.size();i++)
c@278 83 {
c@278 84 df[i] = lp_df[df.size()-i-1];
c@278 85 }
c@277 86 }
c@277 87
c@277 88
c@277 89 void
c@278 90 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period,
c@278 91 d_vec_t &tempi)
c@277 92 {
c@278 93 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278 94 // calculate the acf,
c@278 95 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278 96 // then call viterbi decoding with weight vector and transition matrix
c@278 97 // and get best path
c@277 98
c@295 99 unsigned int wv_len = 128;
c@278 100 double rayparam = 43.;
c@277 101
c@278 102 // make rayleigh weighting curve
c@278 103 d_vec_t wv(wv_len);
c@295 104 for (unsigned int i=0; i<wv.size(); i++)
c@277 105 {
c@278 106 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
c@277 107 }
c@277 108
c@278 109 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295 110 unsigned int winlen = 512;
c@295 111 unsigned int step = 128;
c@278 112
c@278 113 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278 114 d_mat_t rcfmat;
c@278 115 int col_counter = -1;
c@278 116
c@278 117 // main loop for beat period calculation
c@295 118 for (unsigned int i=0; i+winlen<df.size(); i+=step)
c@278 119 {
c@278 120 // get dfframe
c@278 121 d_vec_t dfframe(winlen);
c@295 122 for (unsigned int k=0; k<winlen; k++)
c@278 123 {
c@278 124 dfframe[k] = df[i+k];
c@278 125 }
c@278 126 // get rcf vector for current frame
c@278 127 d_vec_t rcf(wv_len);
c@278 128 get_rcf(dfframe,wv,rcf);
c@277 129
c@278 130 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278 131 col_counter++;
c@295 132 for (unsigned int j=0; j<rcf.size(); j++)
c@278 133 {
c@278 134 rcfmat[col_counter].push_back( rcf[j] );
c@278 135 }
c@278 136 }
c@278 137
c@278 138 // now call viterbi decoding function
c@278 139 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277 140 }
c@277 141
c@277 142
c@277 143 void
c@277 144 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277 145 {
c@278 146 // calculate autocorrelation function
c@278 147 // then rcf
c@278 148 // just hard code for now... don't really need separate functions to do this
c@277 149
c@278 150 // make acf
c@277 151
c@278 152 d_vec_t dfframe(dfframe_in);
c@277 153
c@279 154 MathUtilities::adaptiveThreshold(dfframe);
c@277 155
c@278 156 d_vec_t acf(dfframe.size());
c@277 157
c@278 158
c@295 159 for (unsigned int lag=0; lag<dfframe.size(); lag++)
c@278 160 {
c@278 161 double sum = 0.;
c@278 162 double tmp = 0.;
c@277 163
c@295 164 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
c@278 165 {
c@278 166 tmp = dfframe[n] * dfframe[n+lag];
c@278 167 sum += tmp;
c@278 168 }
c@278 169 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278 170 }
c@277 171
c@278 172 // now apply comb filtering
c@278 173 int numelem = 4;
c@278 174
c@295 175 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
c@278 176 {
c@278 177 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278 178 {
c@278 179 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278 180 {
c@278 181 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278 182 }
c@278 183 }
c@278 184 }
c@278 185
c@278 186 // apply adaptive threshold to rcf
c@279 187 MathUtilities::adaptiveThreshold(rcf);
c@278 188
c@278 189 double rcfsum =0.;
c@295 190 for (unsigned int i=0; i<rcf.size(); i++)
c@278 191 {
c@278 192 rcf[i] += EPS ;
c@278 193 rcfsum += rcf[i];
c@278 194 }
c@277 195
c@278 196 // normalise rcf to sum to unity
c@295 197 for (unsigned int i=0; i<rcf.size(); i++)
c@277 198 {
c@278 199 rcf[i] /= (rcfsum + EPS);
c@277 200 }
c@277 201 }
c@277 202
c@277 203 void
c@278 204 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277 205 {
c@278 206 // following Kevin Murphy's Viterbi decoding to get best path of
c@278 207 // beat periods through rfcmat
c@277 208
c@278 209 // make transition matrix
c@278 210 d_mat_t tmat;
c@295 211 for (unsigned int i=0;i<wv.size();i++)
c@278 212 {
c@278 213 tmat.push_back ( d_vec_t() ); // adds a new column
c@295 214 for (unsigned int j=0; j<wv.size(); j++)
c@278 215 {
c@278 216 tmat[i].push_back(0.); // fill with zeros initially
c@278 217 }
c@278 218 }
c@278 219
c@278 220 // variance of Gaussians in transition matrix
c@278 221 // formed of Gaussians on diagonal - implies slow tempo change
c@278 222 double sigma = 8.;
c@278 223 // don't want really short beat periods, or really long ones
c@295 224 for (unsigned int i=20;i <wv.size()-20; i++)
c@278 225 {
c@295 226 for (unsigned int j=20; j<wv.size()-20; j++)
c@278 227 {
c@278 228 double mu = static_cast<double>(i);
c@278 229 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
c@278 230 }
c@278 231 }
c@277 232
c@278 233 // parameters for Viterbi decoding... this part is taken from
c@278 234 // Murphy's matlab
c@277 235
c@278 236 d_mat_t delta;
c@278 237 i_mat_t psi;
c@295 238 for (unsigned int i=0;i <rcfmat.size(); i++)
c@278 239 {
c@278 240 delta.push_back( d_vec_t());
c@278 241 psi.push_back( i_vec_t());
c@295 242 for (unsigned int j=0; j<rcfmat[i].size(); j++)
c@278 243 {
c@278 244 delta[i].push_back(0.); // fill with zeros initially
c@278 245 psi[i].push_back(0); // fill with zeros initially
c@278 246 }
c@278 247 }
c@277 248
c@277 249
c@295 250 unsigned int T = delta.size();
c@281 251
c@281 252 if (T < 2) return; // can't do anything at all meaningful
c@281 253
c@295 254 unsigned int Q = delta[0].size();
c@277 255
c@278 256 // initialize first column of delta
c@295 257 for (unsigned int j=0; j<Q; j++)
c@277 258 {
c@278 259 delta[0][j] = wv[j] * rcfmat[0][j];
c@278 260 psi[0][j] = 0;
c@277 261 }
c@278 262
c@277 263 double deltasum = 0.;
c@295 264 for (unsigned int i=0; i<Q; i++)
c@277 265 {
c@278 266 deltasum += delta[0][i];
c@277 267 }
c@295 268 for (unsigned int i=0; i<Q; i++)
c@277 269 {
c@278 270 delta[0][i] /= (deltasum + EPS);
c@277 271 }
c@277 272
c@277 273
c@295 274 for (unsigned int t=1; t<T; t++)
c@278 275 {
c@278 276 d_vec_t tmp_vec(Q);
c@277 277
c@295 278 for (unsigned int j=0; j<Q; j++)
c@278 279 {
c@295 280 for (unsigned int i=0; i<Q; i++)
c@278 281 {
c@278 282 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278 283 }
c@278 284
c@278 285 delta[t][j] = get_max_val(tmp_vec);
c@277 286
c@278 287 psi[t][j] = get_max_ind(tmp_vec);
c@278 288
c@278 289 delta[t][j] *= rcfmat[t][j];
c@278 290 }
c@277 291
c@278 292 // normalise current delta column
c@278 293 double deltasum = 0.;
c@295 294 for (unsigned int i=0; i<Q; i++)
c@278 295 {
c@278 296 deltasum += delta[t][i];
c@278 297 }
c@295 298 for (unsigned int i=0; i<Q; i++)
c@278 299 {
c@278 300 delta[t][i] /= (deltasum + EPS);
c@278 301 }
c@278 302 }
c@277 303
c@278 304 i_vec_t bestpath(T);
c@278 305 d_vec_t tmp_vec(Q);
c@295 306 for (unsigned int i=0; i<Q; i++)
c@278 307 {
c@278 308 tmp_vec[i] = delta[T-1][i];
c@278 309 }
c@277 310
c@278 311 // find starting point - best beat period for "last" frame
c@278 312 bestpath[T-1] = get_max_ind(tmp_vec);
c@278 313
c@278 314 // backtrace through index of maximum values in psi
c@295 315 for (unsigned int t=T-2; t>0 ;t--)
c@278 316 {
c@278 317 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278 318 }
c@277 319
c@278 320 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278 321 bestpath[0] = psi[1][bestpath[1]];
c@277 322
c@295 323 unsigned int lastind = 0;
c@295 324 for (unsigned int i=0; i<T; i++)
c@278 325 {
c@295 326 unsigned int step = 128;
c@295 327 for (unsigned int j=0; j<step; j++)
c@278 328 {
c@278 329 lastind = i*step+j;
c@278 330 beat_period[lastind] = bestpath[i];
c@278 331 }
c@282 332 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
c@278 333 }
c@277 334
c@278 335 //fill in the last values...
c@295 336 for (unsigned int i=lastind; i<beat_period.size(); i++)
c@278 337 {
c@278 338 beat_period[i] = beat_period[lastind];
c@278 339 }
c@277 340
c@295 341 for (unsigned int i = 0; i < beat_period.size(); i++)
c@277 342 {
c@279 343 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277 344 }
c@277 345 }
c@277 346
c@277 347 double
c@277 348 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277 349 {
c@278 350 double maxval = 0.;
c@295 351 for (unsigned int i=0; i<df.size(); i++)
c@277 352 {
c@278 353 if (maxval < df[i])
c@278 354 {
c@278 355 maxval = df[i];
c@278 356 }
c@277 357 }
c@277 358
c@278 359 return maxval;
c@277 360 }
c@277 361
c@277 362 int
c@277 363 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277 364 {
c@278 365 double maxval = 0.;
c@278 366 int ind = 0;
c@295 367 for (unsigned int i=0; i<df.size(); i++)
c@277 368 {
c@278 369 if (maxval < df[i])
c@278 370 {
c@278 371 maxval = df[i];
c@278 372 ind = i;
c@278 373 }
c@277 374 }
c@278 375
c@278 376 return ind;
c@277 377 }
c@277 378
c@277 379 void
c@277 380 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277 381 {
c@278 382 double sum = 0.;
c@295 383 for (unsigned int i=0; i<df.size(); i++)
c@278 384 {
c@278 385 sum += df[i];
c@278 386 }
c@278 387
c@295 388 for (unsigned int i=0; i<df.size(); i++)
c@278 389 {
c@278 390 df[i]/= (sum + EPS);
c@278 391 }
c@277 392 }
c@277 393
c@277 394 void
c@277 395 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
c@277 396 d_vec_t &beats)
c@277 397 {
c@281 398 if (df.empty() || beat_period.empty()) return;
c@281 399
c@278 400 d_vec_t cumscore(df.size()); // store cumulative score
c@278 401 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278 402 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277 403
c@295 404 for (unsigned int i=0; i<df.size(); i++)
c@277 405 {
c@278 406 localscore[i] = df[i];
c@278 407 backlink[i] = -1;
c@277 408 }
c@277 409
c@278 410 double tightness = 4.;
c@278 411 double alpha = 0.9;
c@277 412
c@278 413 // main loop
c@295 414 for (unsigned int i=0; i<localscore.size(); i++)
c@278 415 {
c@278 416 int prange_min = -2*beat_period[i];
c@278 417 int prange_max = round(-0.5*beat_period[i]);
c@277 418
c@278 419 // transition range
c@278 420 d_vec_t txwt (prange_max - prange_min + 1);
c@278 421 d_vec_t scorecands (txwt.size());
c@277 422
c@295 423 for (unsigned int j=0;j<txwt.size();j++)
c@278 424 {
c@278 425 double mu = static_cast<double> (beat_period[i]);
c@278 426 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
c@277 427
c@278 428 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278 429 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277 430
c@278 431 int cscore_ind = i+prange_min+j;
c@278 432 if (cscore_ind >= 0)
c@278 433 {
c@278 434 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278 435 }
c@278 436 }
c@277 437
c@278 438 // find max value and index of maximum value
c@278 439 double vv = get_max_val(scorecands);
c@278 440 int xx = get_max_ind(scorecands);
c@277 441
c@278 442 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
c@278 443 backlink[i] = i+prange_min+xx;
c@280 444
c@282 445 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278 446 }
c@278 447
c@278 448 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278 449 d_vec_t tmp_vec;
c@295 450 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278 451 {
c@278 452 tmp_vec.push_back(cumscore[i]);
c@278 453 }
c@278 454
c@278 455 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278 456
c@281 457 // can happen if no results obtained earlier (e.g. input too short)
c@281 458 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
c@281 459
c@278 460 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278 461 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278 462 i_vec_t ibeats;
c@278 463 ibeats.push_back(startpoint);
c@282 464 // std::cerr << "startpoint = " << startpoint << std::endl;
c@278 465 while (backlink[ibeats.back()] > 0)
c@278 466 {
c@282 467 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281 468 int b = ibeats.back();
c@281 469 if (backlink[b] == b) break; // shouldn't happen... haha
c@281 470 ibeats.push_back(backlink[b]);
c@278 471 }
c@277 472
c@278 473 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@295 474 for (unsigned int i=0; i<ibeats.size(); i++)
c@278 475 {
c@278 476 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278 477 }
c@277 478 }
c@277 479
c@277 480