annotate dsp/tempotracking/TempoTrackV2.cpp @ 479:7e52c034cf62

Untabify, indent, tidy
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 31 May 2019 10:35:08 +0100
parents 3f7b4b282df1
children bb78ca3fe7de
rev   line source
c@277 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@277 2
c@277 3 /*
c@277 4 QM DSP Library
c@277 5
c@277 6 Centre for Digital Music, Queen Mary, University of London.
c@277 7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@309 8
c@309 9 This program is free software; you can redistribute it and/or
c@309 10 modify it under the terms of the GNU General Public License as
c@309 11 published by the Free Software Foundation; either version 2 of the
c@309 12 License, or (at your option) any later version. See the file
c@309 13 COPYING included with this distribution for more information.
c@277 14 */
c@277 15
c@277 16 #include "TempoTrackV2.h"
c@277 17
c@277 18 #include <cmath>
c@277 19 #include <cstdlib>
c@278 20 #include <iostream>
c@277 21
c@279 22 #include "maths/MathUtilities.h"
c@277 23
c@277 24 #define EPS 0.0000008 // just some arbitrary small number
c@277 25
c@279 26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279 27 m_rate(rate), m_increment(increment) { }
cannam@479 28
c@277 29 TempoTrackV2::~TempoTrackV2() { }
c@277 30
c@277 31 void
c@277 32 TempoTrackV2::filter_df(d_vec_t &df)
c@277 33 {
c@278 34 d_vec_t a(3);
c@278 35 d_vec_t b(3);
cannam@479 36 d_vec_t lp_df(df.size());
c@277 37
c@278 38 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278 39 a[0] = 1.0000;
c@278 40 a[1] = -0.3695;
c@278 41 a[2] = 0.1958;
c@278 42 b[0] = 0.2066;
c@278 43 b[1] = 0.4131;
c@278 44 b[2] = 0.2066;
luis@327 45
c@278 46 double inp1 = 0.;
c@278 47 double inp2 = 0.;
c@278 48 double out1 = 0.;
c@278 49 double out2 = 0.;
c@277 50
c@277 51
c@278 52 // forwards filtering
cannam@479 53 for (unsigned int i = 0;i < df.size();i++) {
c@278 54 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 55 inp2 = inp1;
c@278 56 inp1 = df[i];
c@278 57 out2 = out1;
c@278 58 out1 = lp_df[i];
c@278 59 }
c@277 60
c@278 61 // copy forwards filtering to df...
c@278 62 // but, time-reversed, ready for backwards filtering
cannam@479 63 for (unsigned int i = 0;i < df.size();i++) {
c@278 64 df[i] = lp_df[df.size()-i-1];
c@278 65 }
c@277 66
cannam@479 67 for (unsigned int i = 0;i < df.size();i++) {
luis@327 68 lp_df[i] = 0.;
c@278 69 }
c@277 70
c@278 71 inp1 = 0.; inp2 = 0.;
c@278 72 out1 = 0.; out2 = 0.;
c@277 73
cannam@479 74 // backwards filetering on time-reversed df
cannam@479 75 for (unsigned int i = 0;i < df.size();i++) {
c@278 76 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 77 inp2 = inp1;
c@278 78 inp1 = df[i];
c@278 79 out2 = out1;
c@278 80 out1 = lp_df[i];
c@278 81 }
c@277 82
cannam@479 83 // write the re-reversed (i.e. forward) version back to df
cannam@479 84 for (unsigned int i = 0;i < df.size();i++) {
c@278 85 df[i] = lp_df[df.size()-i-1];
c@278 86 }
c@277 87 }
c@277 88
c@277 89
luis@327 90 // MEPD 28/11/12
luis@327 91 // This function now allows for a user to specify an inputtempo (in BPM)
luis@327 92 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
luis@327 93 // with a gaussian which is centered around the input tempo
luis@327 94 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
luis@327 95 // as it was before
c@277 96 void
c@304 97 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
c@304 98 vector<double> &beat_period,
luis@327 99 vector<double> &tempi,
luis@327 100 double inputtempo, bool constraintempo)
c@277 101 {
c@278 102 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278 103 // calculate the acf,
c@278 104 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278 105 // then call viterbi decoding with weight vector and transition matrix
c@278 106 // and get best path
c@277 107
c@295 108 unsigned int wv_len = 128;
luis@327 109
luis@327 110 // MEPD 28/11/12
luis@327 111 // the default value of inputtempo in the beat tracking plugin is 120
luis@327 112 // so if the user specifies a different inputtempo, the rayparam will be updated
luis@327 113 // accordingly.
luis@327 114 // note: 60*44100/512 is a magic number
luis@327 115 // this might (will?) break if a user specifies a different frame rate for the onset detection function
luis@327 116 double rayparam = (60*44100/512)/inputtempo;
luis@327 117
c@278 118 // make rayleigh weighting curve
c@278 119 d_vec_t wv(wv_len);
luis@327 120
luis@327 121 // check whether or not to use rayleigh weighting (if constraintempo is false)
luis@327 122 // or use gaussian weighting it (constraintempo is true)
cannam@479 123 if (constraintempo) {
cannam@479 124 for (unsigned int i=0; i<wv.size(); i++) {
luis@327 125 // MEPD 28/11/12
luis@327 126 // do a gaussian weighting instead of rayleigh
luis@327 127 wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
luis@327 128 }
cannam@479 129 } else {
cannam@479 130 for (unsigned int i=0; i<wv.size(); i++) {
luis@327 131 // MEPD 28/11/12
luis@327 132 // standard rayleigh weighting over periodicities
luis@327 133 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
luis@327 134 }
c@277 135 }
c@277 136
c@278 137 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295 138 unsigned int winlen = 512;
c@295 139 unsigned int step = 128;
c@278 140
c@278 141 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278 142 d_mat_t rcfmat;
c@278 143 int col_counter = -1;
c@278 144
c@278 145 // main loop for beat period calculation
cannam@479 146 for (unsigned int i=0; i+winlen<df.size(); i+=step) {
cannam@479 147
c@278 148 // get dfframe
c@278 149 d_vec_t dfframe(winlen);
cannam@479 150 for (unsigned int k=0; k<winlen; k++) {
c@278 151 dfframe[k] = df[i+k];
c@278 152 }
c@278 153 // get rcf vector for current frame
luis@327 154 d_vec_t rcf(wv_len);
c@278 155 get_rcf(dfframe,wv,rcf);
luis@327 156
c@278 157 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278 158 col_counter++;
cannam@479 159 for (unsigned int j=0; j<rcf.size(); j++) {
c@278 160 rcfmat[col_counter].push_back( rcf[j] );
c@278 161 }
c@278 162 }
luis@327 163
c@278 164 // now call viterbi decoding function
c@278 165 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277 166 }
c@277 167
c@277 168
c@277 169 void
c@277 170 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277 171 {
c@278 172 // calculate autocorrelation function
c@278 173 // then rcf
c@278 174 // just hard code for now... don't really need separate functions to do this
c@277 175
c@278 176 // make acf
c@277 177
c@278 178 d_vec_t dfframe(dfframe_in);
c@277 179
c@279 180 MathUtilities::adaptiveThreshold(dfframe);
c@277 181
c@278 182 d_vec_t acf(dfframe.size());
c@277 183
cannam@479 184 for (unsigned int lag=0; lag<dfframe.size(); lag++) {
c@278 185 double sum = 0.;
c@278 186 double tmp = 0.;
c@277 187
cannam@479 188 for (unsigned int n=0; n<(dfframe.size()-lag); n++) {
luis@327 189 tmp = dfframe[n] * dfframe[n+lag];
c@278 190 sum += tmp;
c@278 191 }
c@278 192 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278 193 }
c@277 194
c@278 195 // now apply comb filtering
c@278 196 int numelem = 4;
luis@327 197
cannam@479 198 for (unsigned int i = 2;i < rcf.size();i++) { // max beat period
cannam@479 199 for (int a = 1;a <= numelem;a++) { // number of comb elements
cannam@479 200 for (int b = 1-a;b <= a-1;b++) { // general state using normalisation of comb elements
cannam@479 201 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278 202 }
c@278 203 }
c@278 204 }
luis@327 205
c@278 206 // apply adaptive threshold to rcf
c@279 207 MathUtilities::adaptiveThreshold(rcf);
luis@327 208
c@278 209 double rcfsum =0.;
cannam@479 210 for (unsigned int i=0; i<rcf.size(); i++) {
c@278 211 rcf[i] += EPS ;
c@278 212 rcfsum += rcf[i];
c@278 213 }
c@277 214
c@278 215 // normalise rcf to sum to unity
cannam@479 216 for (unsigned int i=0; i<rcf.size(); i++) {
c@278 217 rcf[i] /= (rcfsum + EPS);
c@277 218 }
c@277 219 }
c@277 220
c@277 221 void
c@278 222 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277 223 {
c@278 224 // following Kevin Murphy's Viterbi decoding to get best path of
c@278 225 // beat periods through rfcmat
c@277 226
c@278 227 // make transition matrix
c@278 228 d_mat_t tmat;
cannam@479 229 for (unsigned int i=0;i<wv.size();i++) {
c@278 230 tmat.push_back ( d_vec_t() ); // adds a new column
cannam@479 231 for (unsigned int j=0; j<wv.size(); j++) {
c@278 232 tmat[i].push_back(0.); // fill with zeros initially
c@278 233 }
c@278 234 }
luis@327 235
c@278 236 // variance of Gaussians in transition matrix
c@278 237 // formed of Gaussians on diagonal - implies slow tempo change
c@278 238 double sigma = 8.;
c@278 239 // don't want really short beat periods, or really long ones
cannam@479 240 for (unsigned int i=20;i <wv.size()-20; i++) {
cannam@479 241 for (unsigned int j=20; j<wv.size()-20; j++) {
c@278 242 double mu = static_cast<double>(i);
c@278 243 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
c@278 244 }
c@278 245 }
c@277 246
c@278 247 // parameters for Viterbi decoding... this part is taken from
c@278 248 // Murphy's matlab
c@277 249
c@278 250 d_mat_t delta;
c@278 251 i_mat_t psi;
cannam@479 252 for (unsigned int i=0;i <rcfmat.size(); i++) {
c@278 253 delta.push_back( d_vec_t());
c@278 254 psi.push_back( i_vec_t());
cannam@479 255 for (unsigned int j=0; j<rcfmat[i].size(); j++) {
c@278 256 delta[i].push_back(0.); // fill with zeros initially
c@278 257 psi[i].push_back(0); // fill with zeros initially
c@278 258 }
c@278 259 }
c@277 260
c@295 261 unsigned int T = delta.size();
c@281 262
c@281 263 if (T < 2) return; // can't do anything at all meaningful
c@281 264
c@295 265 unsigned int Q = delta[0].size();
c@277 266
c@278 267 // initialize first column of delta
cannam@479 268 for (unsigned int j=0; j<Q; j++) {
c@278 269 delta[0][j] = wv[j] * rcfmat[0][j];
c@278 270 psi[0][j] = 0;
c@277 271 }
luis@327 272
c@277 273 double deltasum = 0.;
cannam@479 274 for (unsigned int i=0; i<Q; i++) {
c@278 275 deltasum += delta[0][i];
luis@327 276 }
cannam@479 277 for (unsigned int i=0; i<Q; i++) {
c@278 278 delta[0][i] /= (deltasum + EPS);
luis@327 279 }
c@277 280
c@295 281 for (unsigned int t=1; t<T; t++)
c@278 282 {
c@278 283 d_vec_t tmp_vec(Q);
c@277 284
cannam@479 285 for (unsigned int j=0; j<Q; j++) {
cannam@479 286 for (unsigned int i=0; i<Q; i++) {
c@278 287 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
luis@327 288 }
luis@327 289
luis@327 290 delta[t][j] = get_max_val(tmp_vec);
c@277 291
c@278 292 psi[t][j] = get_max_ind(tmp_vec);
luis@327 293
c@278 294 delta[t][j] *= rcfmat[t][j];
c@278 295 }
c@277 296
c@278 297 // normalise current delta column
c@278 298 double deltasum = 0.;
cannam@479 299 for (unsigned int i=0; i<Q; i++) {
c@278 300 deltasum += delta[t][i];
luis@327 301 }
cannam@479 302 for (unsigned int i=0; i<Q; i++) {
c@278 303 delta[t][i] /= (deltasum + EPS);
luis@327 304 }
c@278 305 }
c@277 306
c@278 307 i_vec_t bestpath(T);
c@278 308 d_vec_t tmp_vec(Q);
cannam@479 309 for (unsigned int i=0; i<Q; i++) {
c@278 310 tmp_vec[i] = delta[T-1][i];
c@278 311 }
c@277 312
c@278 313 // find starting point - best beat period for "last" frame
c@278 314 bestpath[T-1] = get_max_ind(tmp_vec);
luis@327 315
c@278 316 // backtrace through index of maximum values in psi
cannam@479 317 for (unsigned int t=T-2; t>0 ;t--) {
c@278 318 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278 319 }
c@277 320
c@278 321 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278 322 bestpath[0] = psi[1][bestpath[1]];
c@277 323
c@295 324 unsigned int lastind = 0;
cannam@479 325 for (unsigned int i=0; i<T; i++) {
c@295 326 unsigned int step = 128;
cannam@479 327 for (unsigned int j=0; j<step; j++) {
c@278 328 lastind = i*step+j;
c@278 329 beat_period[lastind] = bestpath[i];
c@278 330 }
c@282 331 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
c@278 332 }
c@277 333
c@278 334 //fill in the last values...
cannam@479 335 for (unsigned int i=lastind; i<beat_period.size(); i++) {
c@278 336 beat_period[i] = beat_period[lastind];
c@278 337 }
c@277 338
cannam@479 339 for (unsigned int i = 0; i < beat_period.size(); i++) {
c@279 340 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277 341 }
c@277 342 }
c@277 343
c@277 344 double
c@277 345 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277 346 {
c@278 347 double maxval = 0.;
cannam@479 348 for (unsigned int i=0; i<df.size(); i++) {
cannam@479 349 if (maxval < df[i]) {
c@278 350 maxval = df[i];
c@278 351 }
c@277 352 }
luis@327 353
c@278 354 return maxval;
c@277 355 }
c@277 356
c@277 357 int
c@277 358 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277 359 {
c@278 360 double maxval = 0.;
c@278 361 int ind = 0;
cannam@479 362 for (unsigned int i=0; i<df.size(); i++) {
cannam@479 363 if (maxval < df[i]) {
c@278 364 maxval = df[i];
c@278 365 ind = i;
c@278 366 }
c@277 367 }
luis@327 368
c@278 369 return ind;
c@277 370 }
c@277 371
c@277 372 void
c@277 373 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277 374 {
c@278 375 double sum = 0.;
cannam@479 376 for (unsigned int i=0; i<df.size(); i++) {
c@278 377 sum += df[i];
c@278 378 }
luis@327 379
cannam@479 380 for (unsigned int i=0; i<df.size(); i++) {
c@278 381 df[i]/= (sum + EPS);
c@278 382 }
c@277 383 }
c@277 384
luis@327 385 // MEPD 28/11/12
luis@327 386 // this function has been updated to allow the "alpha" and "tightness" parameters
luis@327 387 // of the dynamic program to be set by the user
luis@327 388 // the default value of alpha = 0.9 and tightness = 4
c@277 389 void
c@304 390 TempoTrackV2::calculateBeats(const vector<double> &df,
c@304 391 const vector<double> &beat_period,
luis@327 392 vector<double> &beats, double alpha, double tightness)
c@277 393 {
c@281 394 if (df.empty() || beat_period.empty()) return;
c@281 395
c@278 396 d_vec_t cumscore(df.size()); // store cumulative score
c@278 397 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278 398 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277 399
cannam@479 400 for (unsigned int i=0; i<df.size(); i++) {
c@278 401 localscore[i] = df[i];
c@278 402 backlink[i] = -1;
c@277 403 }
c@277 404
luis@327 405 //double tightness = 4.;
luis@327 406 //double alpha = 0.9;
luis@327 407 // MEPD 28/11/12
luis@327 408 // debug statements that can be removed.
c@330 409 // std::cerr << "alpha" << alpha << std::endl;
c@330 410 // std::cerr << "tightness" << tightness << std::endl;
c@277 411
c@278 412 // main loop
cannam@479 413 for (unsigned int i=0; i<localscore.size(); i++) {
cannam@479 414
c@278 415 int prange_min = -2*beat_period[i];
c@278 416 int prange_max = round(-0.5*beat_period[i]);
c@277 417
c@278 418 // transition range
c@278 419 d_vec_t txwt (prange_max - prange_min + 1);
c@278 420 d_vec_t scorecands (txwt.size());
c@277 421
cannam@479 422 for (unsigned int j=0;j<txwt.size();j++) {
cannam@479 423
c@278 424 double mu = static_cast<double> (beat_period[i]);
c@278 425 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
c@277 426
c@278 427 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278 428 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277 429
c@278 430 int cscore_ind = i+prange_min+j;
cannam@479 431 if (cscore_ind >= 0) {
c@278 432 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278 433 }
c@278 434 }
c@277 435
c@278 436 // find max value and index of maximum value
c@278 437 double vv = get_max_val(scorecands);
c@278 438 int xx = get_max_ind(scorecands);
c@277 439
c@278 440 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
c@278 441 backlink[i] = i+prange_min+xx;
c@280 442
c@282 443 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278 444 }
c@278 445
c@278 446 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278 447 d_vec_t tmp_vec;
cannam@479 448 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) {
c@278 449 tmp_vec.push_back(cumscore[i]);
luis@327 450 }
c@278 451
cannam@479 452 int startpoint = get_max_ind(tmp_vec) +
cannam@479 453 cumscore.size() - beat_period[beat_period.size()-1] ;
c@278 454
c@281 455 // can happen if no results obtained earlier (e.g. input too short)
cannam@479 456 if (startpoint >= (int)backlink.size()) {
cannam@479 457 startpoint = backlink.size()-1;
cannam@479 458 }
c@281 459
c@278 460 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278 461 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278 462 i_vec_t ibeats;
c@278 463 ibeats.push_back(startpoint);
c@282 464 // std::cerr << "startpoint = " << startpoint << std::endl;
cannam@479 465 while (backlink[ibeats.back()] > 0) {
c@282 466 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281 467 int b = ibeats.back();
c@281 468 if (backlink[b] == b) break; // shouldn't happen... haha
c@281 469 ibeats.push_back(backlink[b]);
c@278 470 }
luis@327 471
c@278 472 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
cannam@479 473 for (unsigned int i=0; i<ibeats.size(); i++) {
c@278 474 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278 475 }
c@277 476 }
c@277 477
c@277 478