annotate dsp/tempotracking/TempoTrackV2.cpp @ 96:88f3cfcff55f

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents e5907ae6de17
children d7619173d43c 37449f085a4c
rev   line source
cannam@52 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@52 2
cannam@52 3 /*
cannam@52 4 QM DSP Library
cannam@52 5
cannam@52 6 Centre for Digital Music, Queen Mary, University of London.
cannam@52 7 This file copyright 2008-2009 Matthew Davies and QMUL.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@52 14 */
cannam@52 15
cannam@52 16 #include "TempoTrackV2.h"
cannam@52 17
cannam@52 18 #include <cmath>
cannam@52 19 #include <cstdlib>
cannam@53 20 #include <iostream>
cannam@52 21
cannam@54 22 #include "maths/MathUtilities.h"
cannam@52 23
cannam@52 24 #define EPS 0.0000008 // just some arbitrary small number
cannam@52 25
cannam@54 26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
cannam@54 27 m_rate(rate), m_increment(increment) { }
cannam@52 28 TempoTrackV2::~TempoTrackV2() { }
cannam@52 29
cannam@52 30 void
cannam@52 31 TempoTrackV2::filter_df(d_vec_t &df)
cannam@52 32 {
cannam@53 33 d_vec_t a(3);
cannam@53 34 d_vec_t b(3);
cannam@53 35 d_vec_t lp_df(df.size());
cannam@52 36
cannam@53 37 //equivalent in matlab to [b,a] = butter(2,0.4);
cannam@53 38 a[0] = 1.0000;
cannam@53 39 a[1] = -0.3695;
cannam@53 40 a[2] = 0.1958;
cannam@53 41 b[0] = 0.2066;
cannam@53 42 b[1] = 0.4131;
cannam@53 43 b[2] = 0.2066;
cannam@53 44
cannam@53 45 double inp1 = 0.;
cannam@53 46 double inp2 = 0.;
cannam@53 47 double out1 = 0.;
cannam@53 48 double out2 = 0.;
cannam@52 49
cannam@52 50
cannam@53 51 // forwards filtering
cannam@70 52 for (unsigned int i = 0;i < df.size();i++)
cannam@53 53 {
cannam@53 54 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 55 inp2 = inp1;
cannam@53 56 inp1 = df[i];
cannam@53 57 out2 = out1;
cannam@53 58 out1 = lp_df[i];
cannam@53 59 }
cannam@52 60
cannam@53 61 // copy forwards filtering to df...
cannam@53 62 // but, time-reversed, ready for backwards filtering
cannam@70 63 for (unsigned int i = 0;i < df.size();i++)
cannam@53 64 {
cannam@53 65 df[i] = lp_df[df.size()-i-1];
cannam@53 66 }
cannam@52 67
cannam@70 68 for (unsigned int i = 0;i < df.size();i++)
cannam@53 69 {
cannam@53 70 lp_df[i] = 0.;
cannam@53 71 }
cannam@52 72
cannam@53 73 inp1 = 0.; inp2 = 0.;
cannam@53 74 out1 = 0.; out2 = 0.;
cannam@52 75
cannam@52 76 // backwards filetering on time-reversed df
cannam@70 77 for (unsigned int i = 0;i < df.size();i++)
cannam@53 78 {
cannam@53 79 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
cannam@53 80 inp2 = inp1;
cannam@53 81 inp1 = df[i];
cannam@53 82 out2 = out1;
cannam@53 83 out1 = lp_df[i];
cannam@53 84 }
cannam@52 85
cannam@52 86 // write the re-reversed (i.e. forward) version back to df
cannam@70 87 for (unsigned int i = 0;i < df.size();i++)
cannam@53 88 {
cannam@53 89 df[i] = lp_df[df.size()-i-1];
cannam@53 90 }
cannam@52 91 }
cannam@52 92
cannam@52 93
cannam@52 94 void
cannam@79 95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
cannam@79 96 vector<double> &beat_period,
cannam@79 97 vector<double> &tempi)
cannam@52 98 {
cannam@53 99 // to follow matlab.. split into 512 sample frames with a 128 hop size
cannam@53 100 // calculate the acf,
cannam@53 101 // then the rcf.. and then stick the rcfs as columns of a matrix
cannam@53 102 // then call viterbi decoding with weight vector and transition matrix
cannam@53 103 // and get best path
cannam@52 104
cannam@70 105 unsigned int wv_len = 128;
cannam@53 106 double rayparam = 43.;
cannam@52 107
cannam@53 108 // make rayleigh weighting curve
cannam@53 109 d_vec_t wv(wv_len);
cannam@70 110 for (unsigned int i=0; i<wv.size(); i++)
cannam@52 111 {
cannam@53 112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
cannam@52 113 }
cannam@52 114
cannam@53 115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
cannam@70 116 unsigned int winlen = 512;
cannam@70 117 unsigned int step = 128;
cannam@53 118
cannam@53 119 // matrix to store output of comb filter bank, increment column of matrix at each frame
cannam@53 120 d_mat_t rcfmat;
cannam@53 121 int col_counter = -1;
cannam@53 122
cannam@53 123 // main loop for beat period calculation
cannam@70 124 for (unsigned int i=0; i+winlen<df.size(); i+=step)
cannam@53 125 {
cannam@53 126 // get dfframe
cannam@53 127 d_vec_t dfframe(winlen);
cannam@70 128 for (unsigned int k=0; k<winlen; k++)
cannam@53 129 {
cannam@53 130 dfframe[k] = df[i+k];
cannam@53 131 }
cannam@53 132 // get rcf vector for current frame
cannam@53 133 d_vec_t rcf(wv_len);
cannam@53 134 get_rcf(dfframe,wv,rcf);
cannam@52 135
cannam@53 136 rcfmat.push_back( d_vec_t() ); // adds a new column
cannam@53 137 col_counter++;
cannam@70 138 for (unsigned int j=0; j<rcf.size(); j++)
cannam@53 139 {
cannam@53 140 rcfmat[col_counter].push_back( rcf[j] );
cannam@53 141 }
cannam@53 142 }
cannam@53 143
cannam@53 144 // now call viterbi decoding function
cannam@53 145 viterbi_decode(rcfmat,wv,beat_period,tempi);
cannam@52 146 }
cannam@52 147
cannam@52 148
cannam@52 149 void
cannam@52 150 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
cannam@52 151 {
cannam@53 152 // calculate autocorrelation function
cannam@53 153 // then rcf
cannam@53 154 // just hard code for now... don't really need separate functions to do this
cannam@52 155
cannam@53 156 // make acf
cannam@52 157
cannam@53 158 d_vec_t dfframe(dfframe_in);
cannam@52 159
cannam@54 160 MathUtilities::adaptiveThreshold(dfframe);
cannam@52 161
cannam@53 162 d_vec_t acf(dfframe.size());
cannam@52 163
cannam@53 164
cannam@70 165 for (unsigned int lag=0; lag<dfframe.size(); lag++)
cannam@53 166 {
cannam@53 167 double sum = 0.;
cannam@53 168 double tmp = 0.;
cannam@52 169
cannam@70 170 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
cannam@53 171 {
cannam@53 172 tmp = dfframe[n] * dfframe[n+lag];
cannam@53 173 sum += tmp;
cannam@53 174 }
cannam@53 175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
cannam@53 176 }
cannam@52 177
cannam@53 178 // now apply comb filtering
cannam@53 179 int numelem = 4;
cannam@53 180
cannam@70 181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
cannam@53 182 {
cannam@53 183 for (int a = 1;a <= numelem;a++) // number of comb elements
cannam@53 184 {
cannam@53 185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
cannam@53 186 {
cannam@53 187 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
cannam@53 188 }
cannam@53 189 }
cannam@53 190 }
cannam@53 191
cannam@53 192 // apply adaptive threshold to rcf
cannam@54 193 MathUtilities::adaptiveThreshold(rcf);
cannam@53 194
cannam@53 195 double rcfsum =0.;
cannam@70 196 for (unsigned int i=0; i<rcf.size(); i++)
cannam@53 197 {
cannam@53 198 rcf[i] += EPS ;
cannam@53 199 rcfsum += rcf[i];
cannam@53 200 }
cannam@52 201
cannam@53 202 // normalise rcf to sum to unity
cannam@70 203 for (unsigned int i=0; i<rcf.size(); i++)
cannam@52 204 {
cannam@53 205 rcf[i] /= (rcfsum + EPS);
cannam@52 206 }
cannam@52 207 }
cannam@52 208
cannam@52 209 void
cannam@53 210 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
cannam@52 211 {
cannam@53 212 // following Kevin Murphy's Viterbi decoding to get best path of
cannam@53 213 // beat periods through rfcmat
cannam@52 214
cannam@53 215 // make transition matrix
cannam@53 216 d_mat_t tmat;
cannam@70 217 for (unsigned int i=0;i<wv.size();i++)
cannam@53 218 {
cannam@53 219 tmat.push_back ( d_vec_t() ); // adds a new column
cannam@70 220 for (unsigned int j=0; j<wv.size(); j++)
cannam@53 221 {
cannam@53 222 tmat[i].push_back(0.); // fill with zeros initially
cannam@53 223 }
cannam@53 224 }
cannam@53 225
cannam@53 226 // variance of Gaussians in transition matrix
cannam@53 227 // formed of Gaussians on diagonal - implies slow tempo change
cannam@53 228 double sigma = 8.;
cannam@53 229 // don't want really short beat periods, or really long ones
cannam@70 230 for (unsigned int i=20;i <wv.size()-20; i++)
cannam@53 231 {
cannam@70 232 for (unsigned int j=20; j<wv.size()-20; j++)
cannam@53 233 {
cannam@53 234 double mu = static_cast<double>(i);
cannam@53 235 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
cannam@53 236 }
cannam@53 237 }
cannam@52 238
cannam@53 239 // parameters for Viterbi decoding... this part is taken from
cannam@53 240 // Murphy's matlab
cannam@52 241
cannam@53 242 d_mat_t delta;
cannam@53 243 i_mat_t psi;
cannam@70 244 for (unsigned int i=0;i <rcfmat.size(); i++)
cannam@53 245 {
cannam@53 246 delta.push_back( d_vec_t());
cannam@53 247 psi.push_back( i_vec_t());
cannam@70 248 for (unsigned int j=0; j<rcfmat[i].size(); j++)
cannam@53 249 {
cannam@53 250 delta[i].push_back(0.); // fill with zeros initially
cannam@53 251 psi[i].push_back(0); // fill with zeros initially
cannam@53 252 }
cannam@53 253 }
cannam@52 254
cannam@52 255
cannam@70 256 unsigned int T = delta.size();
cannam@56 257
cannam@56 258 if (T < 2) return; // can't do anything at all meaningful
cannam@56 259
cannam@70 260 unsigned int Q = delta[0].size();
cannam@52 261
cannam@53 262 // initialize first column of delta
cannam@70 263 for (unsigned int j=0; j<Q; j++)
cannam@52 264 {
cannam@53 265 delta[0][j] = wv[j] * rcfmat[0][j];
cannam@53 266 psi[0][j] = 0;
cannam@52 267 }
cannam@53 268
cannam@52 269 double deltasum = 0.;
cannam@70 270 for (unsigned int i=0; i<Q; i++)
cannam@52 271 {
cannam@53 272 deltasum += delta[0][i];
cannam@52 273 }
cannam@70 274 for (unsigned int i=0; i<Q; i++)
cannam@52 275 {
cannam@53 276 delta[0][i] /= (deltasum + EPS);
cannam@52 277 }
cannam@52 278
cannam@52 279
cannam@70 280 for (unsigned int t=1; t<T; t++)
cannam@53 281 {
cannam@53 282 d_vec_t tmp_vec(Q);
cannam@52 283
cannam@70 284 for (unsigned int j=0; j<Q; j++)
cannam@53 285 {
cannam@70 286 for (unsigned int i=0; i<Q; i++)
cannam@53 287 {
cannam@53 288 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
cannam@53 289 }
cannam@53 290
cannam@53 291 delta[t][j] = get_max_val(tmp_vec);
cannam@52 292
cannam@53 293 psi[t][j] = get_max_ind(tmp_vec);
cannam@53 294
cannam@53 295 delta[t][j] *= rcfmat[t][j];
cannam@53 296 }
cannam@52 297
cannam@53 298 // normalise current delta column
cannam@53 299 double deltasum = 0.;
cannam@70 300 for (unsigned int i=0; i<Q; i++)
cannam@53 301 {
cannam@53 302 deltasum += delta[t][i];
cannam@53 303 }
cannam@70 304 for (unsigned int i=0; i<Q; i++)
cannam@53 305 {
cannam@53 306 delta[t][i] /= (deltasum + EPS);
cannam@53 307 }
cannam@53 308 }
cannam@52 309
cannam@53 310 i_vec_t bestpath(T);
cannam@53 311 d_vec_t tmp_vec(Q);
cannam@70 312 for (unsigned int i=0; i<Q; i++)
cannam@53 313 {
cannam@53 314 tmp_vec[i] = delta[T-1][i];
cannam@53 315 }
cannam@52 316
cannam@53 317 // find starting point - best beat period for "last" frame
cannam@53 318 bestpath[T-1] = get_max_ind(tmp_vec);
cannam@53 319
cannam@53 320 // backtrace through index of maximum values in psi
cannam@70 321 for (unsigned int t=T-2; t>0 ;t--)
cannam@53 322 {
cannam@53 323 bestpath[t] = psi[t+1][bestpath[t+1]];
cannam@53 324 }
cannam@52 325
cannam@53 326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
cannam@53 327 bestpath[0] = psi[1][bestpath[1]];
cannam@52 328
cannam@70 329 unsigned int lastind = 0;
cannam@70 330 for (unsigned int i=0; i<T; i++)
cannam@53 331 {
cannam@70 332 unsigned int step = 128;
cannam@70 333 for (unsigned int j=0; j<step; j++)
cannam@53 334 {
cannam@53 335 lastind = i*step+j;
cannam@53 336 beat_period[lastind] = bestpath[i];
cannam@53 337 }
cannam@57 338 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
cannam@53 339 }
cannam@52 340
cannam@53 341 //fill in the last values...
cannam@70 342 for (unsigned int i=lastind; i<beat_period.size(); i++)
cannam@53 343 {
cannam@53 344 beat_period[i] = beat_period[lastind];
cannam@53 345 }
cannam@52 346
cannam@70 347 for (unsigned int i = 0; i < beat_period.size(); i++)
cannam@52 348 {
cannam@54 349 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
cannam@52 350 }
cannam@52 351 }
cannam@52 352
cannam@52 353 double
cannam@52 354 TempoTrackV2::get_max_val(const d_vec_t &df)
cannam@52 355 {
cannam@53 356 double maxval = 0.;
cannam@70 357 for (unsigned int i=0; i<df.size(); i++)
cannam@52 358 {
cannam@53 359 if (maxval < df[i])
cannam@53 360 {
cannam@53 361 maxval = df[i];
cannam@53 362 }
cannam@52 363 }
cannam@52 364
cannam@53 365 return maxval;
cannam@52 366 }
cannam@52 367
cannam@52 368 int
cannam@52 369 TempoTrackV2::get_max_ind(const d_vec_t &df)
cannam@52 370 {
cannam@53 371 double maxval = 0.;
cannam@53 372 int ind = 0;
cannam@70 373 for (unsigned int i=0; i<df.size(); i++)
cannam@52 374 {
cannam@53 375 if (maxval < df[i])
cannam@53 376 {
cannam@53 377 maxval = df[i];
cannam@53 378 ind = i;
cannam@53 379 }
cannam@52 380 }
cannam@53 381
cannam@53 382 return ind;
cannam@52 383 }
cannam@52 384
cannam@52 385 void
cannam@52 386 TempoTrackV2::normalise_vec(d_vec_t &df)
cannam@52 387 {
cannam@53 388 double sum = 0.;
cannam@70 389 for (unsigned int i=0; i<df.size(); i++)
cannam@53 390 {
cannam@53 391 sum += df[i];
cannam@53 392 }
cannam@53 393
cannam@70 394 for (unsigned int i=0; i<df.size(); i++)
cannam@53 395 {
cannam@53 396 df[i]/= (sum + EPS);
cannam@53 397 }
cannam@52 398 }
cannam@52 399
cannam@52 400 void
cannam@79 401 TempoTrackV2::calculateBeats(const vector<double> &df,
cannam@79 402 const vector<double> &beat_period,
cannam@79 403 vector<double> &beats)
cannam@52 404 {
cannam@56 405 if (df.empty() || beat_period.empty()) return;
cannam@56 406
cannam@53 407 d_vec_t cumscore(df.size()); // store cumulative score
cannam@53 408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
cannam@53 409 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
cannam@52 410
cannam@70 411 for (unsigned int i=0; i<df.size(); i++)
cannam@52 412 {
cannam@53 413 localscore[i] = df[i];
cannam@53 414 backlink[i] = -1;
cannam@52 415 }
cannam@52 416
cannam@53 417 double tightness = 4.;
cannam@53 418 double alpha = 0.9;
cannam@52 419
cannam@53 420 // main loop
cannam@70 421 for (unsigned int i=0; i<localscore.size(); i++)
cannam@53 422 {
cannam@53 423 int prange_min = -2*beat_period[i];
cannam@53 424 int prange_max = round(-0.5*beat_period[i]);
cannam@52 425
cannam@53 426 // transition range
cannam@53 427 d_vec_t txwt (prange_max - prange_min + 1);
cannam@53 428 d_vec_t scorecands (txwt.size());
cannam@52 429
cannam@70 430 for (unsigned int j=0;j<txwt.size();j++)
cannam@53 431 {
cannam@53 432 double mu = static_cast<double> (beat_period[i]);
cannam@53 433 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
cannam@52 434
cannam@53 435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
cannam@53 436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
cannam@52 437
cannam@53 438 int cscore_ind = i+prange_min+j;
cannam@53 439 if (cscore_ind >= 0)
cannam@53 440 {
cannam@53 441 scorecands[j] = txwt[j] * cumscore[cscore_ind];
cannam@53 442 }
cannam@53 443 }
cannam@52 444
cannam@53 445 // find max value and index of maximum value
cannam@53 446 double vv = get_max_val(scorecands);
cannam@53 447 int xx = get_max_ind(scorecands);
cannam@52 448
cannam@53 449 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
cannam@53 450 backlink[i] = i+prange_min+xx;
cannam@55 451
cannam@57 452 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
cannam@53 453 }
cannam@53 454
cannam@53 455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
cannam@53 456 d_vec_t tmp_vec;
cannam@70 457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
cannam@53 458 {
cannam@53 459 tmp_vec.push_back(cumscore[i]);
cannam@53 460 }
cannam@53 461
cannam@53 462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
cannam@53 463
cannam@56 464 // can happen if no results obtained earlier (e.g. input too short)
cannam@56 465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
cannam@56 466
cannam@53 467 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
cannam@53 468 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
cannam@53 469 i_vec_t ibeats;
cannam@53 470 ibeats.push_back(startpoint);
cannam@57 471 // std::cerr << "startpoint = " << startpoint << std::endl;
cannam@53 472 while (backlink[ibeats.back()] > 0)
cannam@53 473 {
cannam@57 474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
cannam@56 475 int b = ibeats.back();
cannam@56 476 if (backlink[b] == b) break; // shouldn't happen... haha
cannam@56 477 ibeats.push_back(backlink[b]);
cannam@53 478 }
cannam@52 479
cannam@53 480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
cannam@70 481 for (unsigned int i=0; i<ibeats.size(); i++)
cannam@53 482 {
cannam@53 483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
cannam@53 484 }
cannam@52 485 }
cannam@52 486
cannam@52 487