annotate dsp/tempotracking/TempoTrackV2.cpp @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents d5014ab8b0e5
children d7619173d43c 37449f085a4c
rev   line source
c@277 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@277 2
c@277 3 /*
c@277 4 QM DSP Library
c@277 5
c@277 6 Centre for Digital Music, Queen Mary, University of London.
c@277 7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@309 8
c@309 9 This program is free software; you can redistribute it and/or
c@309 10 modify it under the terms of the GNU General Public License as
c@309 11 published by the Free Software Foundation; either version 2 of the
c@309 12 License, or (at your option) any later version. See the file
c@309 13 COPYING included with this distribution for more information.
c@277 14 */
c@277 15
c@277 16 #include "TempoTrackV2.h"
c@277 17
c@277 18 #include <cmath>
c@277 19 #include <cstdlib>
c@278 20 #include <iostream>
c@277 21
c@279 22 #include "maths/MathUtilities.h"
c@277 23
c@277 24 #define EPS 0.0000008 // just some arbitrary small number
c@277 25
c@279 26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279 27 m_rate(rate), m_increment(increment) { }
c@277 28 TempoTrackV2::~TempoTrackV2() { }
c@277 29
c@277 30 void
c@277 31 TempoTrackV2::filter_df(d_vec_t &df)
c@277 32 {
c@278 33 d_vec_t a(3);
c@278 34 d_vec_t b(3);
c@278 35 d_vec_t lp_df(df.size());
c@277 36
c@278 37 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278 38 a[0] = 1.0000;
c@278 39 a[1] = -0.3695;
c@278 40 a[2] = 0.1958;
c@278 41 b[0] = 0.2066;
c@278 42 b[1] = 0.4131;
c@278 43 b[2] = 0.2066;
c@278 44
c@278 45 double inp1 = 0.;
c@278 46 double inp2 = 0.;
c@278 47 double out1 = 0.;
c@278 48 double out2 = 0.;
c@277 49
c@277 50
c@278 51 // forwards filtering
c@295 52 for (unsigned int i = 0;i < df.size();i++)
c@278 53 {
c@278 54 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 55 inp2 = inp1;
c@278 56 inp1 = df[i];
c@278 57 out2 = out1;
c@278 58 out1 = lp_df[i];
c@278 59 }
c@277 60
c@278 61 // copy forwards filtering to df...
c@278 62 // but, time-reversed, ready for backwards filtering
c@295 63 for (unsigned int i = 0;i < df.size();i++)
c@278 64 {
c@278 65 df[i] = lp_df[df.size()-i-1];
c@278 66 }
c@277 67
c@295 68 for (unsigned int i = 0;i < df.size();i++)
c@278 69 {
c@278 70 lp_df[i] = 0.;
c@278 71 }
c@277 72
c@278 73 inp1 = 0.; inp2 = 0.;
c@278 74 out1 = 0.; out2 = 0.;
c@277 75
c@277 76 // backwards filetering on time-reversed df
c@295 77 for (unsigned int i = 0;i < df.size();i++)
c@278 78 {
c@278 79 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
c@278 80 inp2 = inp1;
c@278 81 inp1 = df[i];
c@278 82 out2 = out1;
c@278 83 out1 = lp_df[i];
c@278 84 }
c@277 85
c@277 86 // write the re-reversed (i.e. forward) version back to df
c@295 87 for (unsigned int i = 0;i < df.size();i++)
c@278 88 {
c@278 89 df[i] = lp_df[df.size()-i-1];
c@278 90 }
c@277 91 }
c@277 92
c@277 93
c@277 94 void
c@304 95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
c@304 96 vector<double> &beat_period,
c@304 97 vector<double> &tempi)
c@277 98 {
c@278 99 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278 100 // calculate the acf,
c@278 101 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278 102 // then call viterbi decoding with weight vector and transition matrix
c@278 103 // and get best path
c@277 104
c@295 105 unsigned int wv_len = 128;
c@278 106 double rayparam = 43.;
c@277 107
c@278 108 // make rayleigh weighting curve
c@278 109 d_vec_t wv(wv_len);
c@295 110 for (unsigned int i=0; i<wv.size(); i++)
c@277 111 {
c@278 112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
c@277 113 }
c@277 114
c@278 115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295 116 unsigned int winlen = 512;
c@295 117 unsigned int step = 128;
c@278 118
c@278 119 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278 120 d_mat_t rcfmat;
c@278 121 int col_counter = -1;
c@278 122
c@278 123 // main loop for beat period calculation
c@295 124 for (unsigned int i=0; i+winlen<df.size(); i+=step)
c@278 125 {
c@278 126 // get dfframe
c@278 127 d_vec_t dfframe(winlen);
c@295 128 for (unsigned int k=0; k<winlen; k++)
c@278 129 {
c@278 130 dfframe[k] = df[i+k];
c@278 131 }
c@278 132 // get rcf vector for current frame
c@278 133 d_vec_t rcf(wv_len);
c@278 134 get_rcf(dfframe,wv,rcf);
c@277 135
c@278 136 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278 137 col_counter++;
c@295 138 for (unsigned int j=0; j<rcf.size(); j++)
c@278 139 {
c@278 140 rcfmat[col_counter].push_back( rcf[j] );
c@278 141 }
c@278 142 }
c@278 143
c@278 144 // now call viterbi decoding function
c@278 145 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277 146 }
c@277 147
c@277 148
c@277 149 void
c@277 150 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277 151 {
c@278 152 // calculate autocorrelation function
c@278 153 // then rcf
c@278 154 // just hard code for now... don't really need separate functions to do this
c@277 155
c@278 156 // make acf
c@277 157
c@278 158 d_vec_t dfframe(dfframe_in);
c@277 159
c@279 160 MathUtilities::adaptiveThreshold(dfframe);
c@277 161
c@278 162 d_vec_t acf(dfframe.size());
c@277 163
c@278 164
c@295 165 for (unsigned int lag=0; lag<dfframe.size(); lag++)
c@278 166 {
c@278 167 double sum = 0.;
c@278 168 double tmp = 0.;
c@277 169
c@295 170 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
c@278 171 {
c@278 172 tmp = dfframe[n] * dfframe[n+lag];
c@278 173 sum += tmp;
c@278 174 }
c@278 175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278 176 }
c@277 177
c@278 178 // now apply comb filtering
c@278 179 int numelem = 4;
c@278 180
c@295 181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
c@278 182 {
c@278 183 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278 184 {
c@278 185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278 186 {
c@278 187 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278 188 }
c@278 189 }
c@278 190 }
c@278 191
c@278 192 // apply adaptive threshold to rcf
c@279 193 MathUtilities::adaptiveThreshold(rcf);
c@278 194
c@278 195 double rcfsum =0.;
c@295 196 for (unsigned int i=0; i<rcf.size(); i++)
c@278 197 {
c@278 198 rcf[i] += EPS ;
c@278 199 rcfsum += rcf[i];
c@278 200 }
c@277 201
c@278 202 // normalise rcf to sum to unity
c@295 203 for (unsigned int i=0; i<rcf.size(); i++)
c@277 204 {
c@278 205 rcf[i] /= (rcfsum + EPS);
c@277 206 }
c@277 207 }
c@277 208
c@277 209 void
c@278 210 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277 211 {
c@278 212 // following Kevin Murphy's Viterbi decoding to get best path of
c@278 213 // beat periods through rfcmat
c@277 214
c@278 215 // make transition matrix
c@278 216 d_mat_t tmat;
c@295 217 for (unsigned int i=0;i<wv.size();i++)
c@278 218 {
c@278 219 tmat.push_back ( d_vec_t() ); // adds a new column
c@295 220 for (unsigned int j=0; j<wv.size(); j++)
c@278 221 {
c@278 222 tmat[i].push_back(0.); // fill with zeros initially
c@278 223 }
c@278 224 }
c@278 225
c@278 226 // variance of Gaussians in transition matrix
c@278 227 // formed of Gaussians on diagonal - implies slow tempo change
c@278 228 double sigma = 8.;
c@278 229 // don't want really short beat periods, or really long ones
c@295 230 for (unsigned int i=20;i <wv.size()-20; i++)
c@278 231 {
c@295 232 for (unsigned int j=20; j<wv.size()-20; j++)
c@278 233 {
c@278 234 double mu = static_cast<double>(i);
c@278 235 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
c@278 236 }
c@278 237 }
c@277 238
c@278 239 // parameters for Viterbi decoding... this part is taken from
c@278 240 // Murphy's matlab
c@277 241
c@278 242 d_mat_t delta;
c@278 243 i_mat_t psi;
c@295 244 for (unsigned int i=0;i <rcfmat.size(); i++)
c@278 245 {
c@278 246 delta.push_back( d_vec_t());
c@278 247 psi.push_back( i_vec_t());
c@295 248 for (unsigned int j=0; j<rcfmat[i].size(); j++)
c@278 249 {
c@278 250 delta[i].push_back(0.); // fill with zeros initially
c@278 251 psi[i].push_back(0); // fill with zeros initially
c@278 252 }
c@278 253 }
c@277 254
c@277 255
c@295 256 unsigned int T = delta.size();
c@281 257
c@281 258 if (T < 2) return; // can't do anything at all meaningful
c@281 259
c@295 260 unsigned int Q = delta[0].size();
c@277 261
c@278 262 // initialize first column of delta
c@295 263 for (unsigned int j=0; j<Q; j++)
c@277 264 {
c@278 265 delta[0][j] = wv[j] * rcfmat[0][j];
c@278 266 psi[0][j] = 0;
c@277 267 }
c@278 268
c@277 269 double deltasum = 0.;
c@295 270 for (unsigned int i=0; i<Q; i++)
c@277 271 {
c@278 272 deltasum += delta[0][i];
c@277 273 }
c@295 274 for (unsigned int i=0; i<Q; i++)
c@277 275 {
c@278 276 delta[0][i] /= (deltasum + EPS);
c@277 277 }
c@277 278
c@277 279
c@295 280 for (unsigned int t=1; t<T; t++)
c@278 281 {
c@278 282 d_vec_t tmp_vec(Q);
c@277 283
c@295 284 for (unsigned int j=0; j<Q; j++)
c@278 285 {
c@295 286 for (unsigned int i=0; i<Q; i++)
c@278 287 {
c@278 288 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278 289 }
c@278 290
c@278 291 delta[t][j] = get_max_val(tmp_vec);
c@277 292
c@278 293 psi[t][j] = get_max_ind(tmp_vec);
c@278 294
c@278 295 delta[t][j] *= rcfmat[t][j];
c@278 296 }
c@277 297
c@278 298 // normalise current delta column
c@278 299 double deltasum = 0.;
c@295 300 for (unsigned int i=0; i<Q; i++)
c@278 301 {
c@278 302 deltasum += delta[t][i];
c@278 303 }
c@295 304 for (unsigned int i=0; i<Q; i++)
c@278 305 {
c@278 306 delta[t][i] /= (deltasum + EPS);
c@278 307 }
c@278 308 }
c@277 309
c@278 310 i_vec_t bestpath(T);
c@278 311 d_vec_t tmp_vec(Q);
c@295 312 for (unsigned int i=0; i<Q; i++)
c@278 313 {
c@278 314 tmp_vec[i] = delta[T-1][i];
c@278 315 }
c@277 316
c@278 317 // find starting point - best beat period for "last" frame
c@278 318 bestpath[T-1] = get_max_ind(tmp_vec);
c@278 319
c@278 320 // backtrace through index of maximum values in psi
c@295 321 for (unsigned int t=T-2; t>0 ;t--)
c@278 322 {
c@278 323 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278 324 }
c@277 325
c@278 326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278 327 bestpath[0] = psi[1][bestpath[1]];
c@277 328
c@295 329 unsigned int lastind = 0;
c@295 330 for (unsigned int i=0; i<T; i++)
c@278 331 {
c@295 332 unsigned int step = 128;
c@295 333 for (unsigned int j=0; j<step; j++)
c@278 334 {
c@278 335 lastind = i*step+j;
c@278 336 beat_period[lastind] = bestpath[i];
c@278 337 }
c@282 338 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
c@278 339 }
c@277 340
c@278 341 //fill in the last values...
c@295 342 for (unsigned int i=lastind; i<beat_period.size(); i++)
c@278 343 {
c@278 344 beat_period[i] = beat_period[lastind];
c@278 345 }
c@277 346
c@295 347 for (unsigned int i = 0; i < beat_period.size(); i++)
c@277 348 {
c@279 349 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277 350 }
c@277 351 }
c@277 352
c@277 353 double
c@277 354 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277 355 {
c@278 356 double maxval = 0.;
c@295 357 for (unsigned int i=0; i<df.size(); i++)
c@277 358 {
c@278 359 if (maxval < df[i])
c@278 360 {
c@278 361 maxval = df[i];
c@278 362 }
c@277 363 }
c@277 364
c@278 365 return maxval;
c@277 366 }
c@277 367
c@277 368 int
c@277 369 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277 370 {
c@278 371 double maxval = 0.;
c@278 372 int ind = 0;
c@295 373 for (unsigned int i=0; i<df.size(); i++)
c@277 374 {
c@278 375 if (maxval < df[i])
c@278 376 {
c@278 377 maxval = df[i];
c@278 378 ind = i;
c@278 379 }
c@277 380 }
c@278 381
c@278 382 return ind;
c@277 383 }
c@277 384
c@277 385 void
c@277 386 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277 387 {
c@278 388 double sum = 0.;
c@295 389 for (unsigned int i=0; i<df.size(); i++)
c@278 390 {
c@278 391 sum += df[i];
c@278 392 }
c@278 393
c@295 394 for (unsigned int i=0; i<df.size(); i++)
c@278 395 {
c@278 396 df[i]/= (sum + EPS);
c@278 397 }
c@277 398 }
c@277 399
c@277 400 void
c@304 401 TempoTrackV2::calculateBeats(const vector<double> &df,
c@304 402 const vector<double> &beat_period,
c@304 403 vector<double> &beats)
c@277 404 {
c@281 405 if (df.empty() || beat_period.empty()) return;
c@281 406
c@278 407 d_vec_t cumscore(df.size()); // store cumulative score
c@278 408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278 409 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277 410
c@295 411 for (unsigned int i=0; i<df.size(); i++)
c@277 412 {
c@278 413 localscore[i] = df[i];
c@278 414 backlink[i] = -1;
c@277 415 }
c@277 416
c@278 417 double tightness = 4.;
c@278 418 double alpha = 0.9;
c@277 419
c@278 420 // main loop
c@295 421 for (unsigned int i=0; i<localscore.size(); i++)
c@278 422 {
c@278 423 int prange_min = -2*beat_period[i];
c@278 424 int prange_max = round(-0.5*beat_period[i]);
c@277 425
c@278 426 // transition range
c@278 427 d_vec_t txwt (prange_max - prange_min + 1);
c@278 428 d_vec_t scorecands (txwt.size());
c@277 429
c@295 430 for (unsigned int j=0;j<txwt.size();j++)
c@278 431 {
c@278 432 double mu = static_cast<double> (beat_period[i]);
c@278 433 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
c@277 434
c@278 435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278 436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277 437
c@278 438 int cscore_ind = i+prange_min+j;
c@278 439 if (cscore_ind >= 0)
c@278 440 {
c@278 441 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278 442 }
c@278 443 }
c@277 444
c@278 445 // find max value and index of maximum value
c@278 446 double vv = get_max_val(scorecands);
c@278 447 int xx = get_max_ind(scorecands);
c@277 448
c@278 449 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
c@278 450 backlink[i] = i+prange_min+xx;
c@280 451
c@282 452 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278 453 }
c@278 454
c@278 455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278 456 d_vec_t tmp_vec;
c@295 457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278 458 {
c@278 459 tmp_vec.push_back(cumscore[i]);
c@278 460 }
c@278 461
c@278 462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278 463
c@281 464 // can happen if no results obtained earlier (e.g. input too short)
c@281 465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
c@281 466
c@278 467 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278 468 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278 469 i_vec_t ibeats;
c@278 470 ibeats.push_back(startpoint);
c@282 471 // std::cerr << "startpoint = " << startpoint << std::endl;
c@278 472 while (backlink[ibeats.back()] > 0)
c@278 473 {
c@282 474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281 475 int b = ibeats.back();
c@281 476 if (backlink[b] == b) break; // shouldn't happen... haha
c@281 477 ibeats.push_back(backlink[b]);
c@278 478 }
c@277 479
c@278 480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@295 481 for (unsigned int i=0; i<ibeats.size(); i++)
c@278 482 {
c@278 483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278 484 }
c@277 485 }
c@277 486
c@277 487