TempoTrackV2.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  QM DSP Library
5 
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2008-2009 Matthew Davies and QMUL.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include "TempoTrackV2.h"
17 
18 #include <cmath>
19 #include <cstdlib>
20 #include <iostream>
21 
22 #include "maths/MathUtilities.h"
23 
24 using std::vector;
25 
26 #define EPS 0.0000008 // just some arbitrary small number
27 
28 TempoTrackV2::TempoTrackV2(float rate, int increment) :
29  m_rate(rate), m_increment(increment) {
30 }
31 
33 
34 void
36 {
37  int df_len = int(df.size());
38 
39  d_vec_t a(3);
40  d_vec_t b(3);
41  d_vec_t lp_df(df_len);
42 
43  //equivalent in matlab to [b,a] = butter(2,0.4);
44  a[0] = 1.0000;
45  a[1] = -0.3695;
46  a[2] = 0.1958;
47  b[0] = 0.2066;
48  b[1] = 0.4131;
49  b[2] = 0.2066;
50 
51  double inp1 = 0.;
52  double inp2 = 0.;
53  double out1 = 0.;
54  double out2 = 0.;
55 
56 
57  // forwards filtering
58  for (int i = 0; i < df_len; i++) {
59  lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
60  inp2 = inp1;
61  inp1 = df[i];
62  out2 = out1;
63  out1 = lp_df[i];
64  }
65 
66  // copy forwards filtering to df...
67  // but, time-reversed, ready for backwards filtering
68  for (int i = 0; i < df_len; i++) {
69  df[i] = lp_df[df_len - i - 1];
70  }
71 
72  for (int i = 0; i < df_len; i++) {
73  lp_df[i] = 0.;
74  }
75 
76  inp1 = 0.; inp2 = 0.;
77  out1 = 0.; out2 = 0.;
78 
79  // backwards filetering on time-reversed df
80  for (int i = 0; i < df_len; i++) {
81  lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
82  inp2 = inp1;
83  inp1 = df[i];
84  out2 = out1;
85  out1 = lp_df[i];
86  }
87 
88  // write the re-reversed (i.e. forward) version back to df
89  for (int i = 0; i < df_len; i++) {
90  df[i] = lp_df[df_len - i - 1];
91  }
92 }
93 
94 
95 // MEPD 28/11/12
96 // This function now allows for a user to specify an inputtempo (in BPM)
97 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
98 // with a gaussian which is centered around the input tempo
99 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
100 // as it was before
101 void
102 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
103  vector<double> &beat_period,
104  vector<double> &tempi,
105  double inputtempo, bool constraintempo)
106 {
107  // to follow matlab.. split into 512 sample frames with a 128 hop size
108  // calculate the acf,
109  // then the rcf.. and then stick the rcfs as columns of a matrix
110  // then call viterbi decoding with weight vector and transition matrix
111  // and get best path
112 
113  int wv_len = 128;
114 
115  // MEPD 28/11/12
116  // the default value of inputtempo in the beat tracking plugin is 120
117  // so if the user specifies a different inputtempo, the rayparam will be updated
118  // accordingly.
119  // note: 60*44100/512 is a magic number
120  // this might (will?) break if a user specifies a different frame rate for the onset detection function
121  double rayparam = (60*44100/512)/inputtempo;
122 
123  // make rayleigh weighting curve
124  d_vec_t wv(wv_len);
125 
126  // check whether or not to use rayleigh weighting (if constraintempo is false)
127  // or use gaussian weighting it (constraintempo is true)
128  if (constraintempo) {
129  for (int i = 0; i < wv_len; i++) {
130  // MEPD 28/11/12
131  // do a gaussian weighting instead of rayleigh
132  wv[i] = exp( (-1.*pow((double(i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
133  }
134  } else {
135  for (int i = 0; i < wv_len; i++) {
136  // MEPD 28/11/12
137  // standard rayleigh weighting over periodicities
138  wv[i] = (double(i) / pow(rayparam,2.)) * exp((-1.*pow(-double(i),2.)) / (2.*pow(rayparam,2.)));
139  }
140  }
141 
142  // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
143  int winlen = 512;
144  int step = 128;
145 
146  // matrix to store output of comb filter bank, increment column of matrix at each frame
147  d_mat_t rcfmat;
148  int col_counter = -1;
149  int df_len = int(df.size());
150 
151  // main loop for beat period calculation
152  for (int i = 0; i+winlen < df_len; i+=step) {
153 
154  // get dfframe
155  d_vec_t dfframe(winlen);
156  for (int k=0; k < winlen; k++) {
157  dfframe[k] = df[i+k];
158  }
159  // get rcf vector for current frame
160  d_vec_t rcf(wv_len);
161  get_rcf(dfframe,wv,rcf);
162 
163  rcfmat.push_back( d_vec_t() ); // adds a new column
164  col_counter++;
165  for (int j = 0; j < wv_len; j++) {
166  rcfmat[col_counter].push_back( rcf[j] );
167  }
168  }
169 
170  // now call viterbi decoding function
171  viterbi_decode(rcfmat,wv,beat_period,tempi);
172 }
173 
174 
175 void
176 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
177 {
178  // calculate autocorrelation function
179  // then rcf
180  // just hard code for now... don't really need separate functions to do this
181 
182  // make acf
183 
184  d_vec_t dfframe(dfframe_in);
185 
187 
188  int dfframe_len = int(dfframe.size());
189  int rcf_len = int(rcf.size());
190 
191  d_vec_t acf(dfframe_len);
192 
193  for (int lag = 0; lag < dfframe_len; lag++) {
194  double sum = 0.;
195  double tmp = 0.;
196 
197  for (int n = 0; n < (dfframe_len - lag); n++) {
198  tmp = dfframe[n] * dfframe[n + lag];
199  sum += tmp;
200  }
201  acf[lag] = double(sum/ (dfframe_len - lag));
202  }
203 
204  // now apply comb filtering
205  int numelem = 4;
206 
207  for (int i = 2; i < rcf_len; i++) { // max beat period
208  for (int a = 1; a <= numelem; a++) { // number of comb elements
209  for (int b = 1-a; b <= a-1; b++) { // general state using normalisation of comb elements
210  rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
211  }
212  }
213  }
214 
215  // apply adaptive threshold to rcf
217 
218  double rcfsum =0.;
219  for (int i = 0; i < rcf_len; i++) {
220  rcf[i] += EPS ;
221  rcfsum += rcf[i];
222  }
223 
224  // normalise rcf to sum to unity
225  for (int i = 0; i < rcf_len; i++) {
226  rcf[i] /= (rcfsum + EPS);
227  }
228 }
229 
230 void
231 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
232 {
233  // following Kevin Murphy's Viterbi decoding to get best path of
234  // beat periods through rfcmat
235 
236  int wv_len = int(wv.size());
237 
238  // make transition matrix
239  d_mat_t tmat;
240  for (int i = 0; i < wv_len; i++) {
241  tmat.push_back ( d_vec_t() ); // adds a new column
242  for (int j = 0; j < wv_len; j++) {
243  tmat[i].push_back(0.); // fill with zeros initially
244  }
245  }
246 
247  // variance of Gaussians in transition matrix
248  // formed of Gaussians on diagonal - implies slow tempo change
249  double sigma = 8.;
250  // don't want really short beat periods, or really long ones
251  for (int i = 20; i < wv_len - 20; i++) {
252  for (int j = 20; j < wv_len - 20; j++) {
253  double mu = double(i);
254  tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
255  }
256  }
257 
258  // parameters for Viterbi decoding... this part is taken from
259  // Murphy's matlab
260 
261  d_mat_t delta;
262  i_mat_t psi;
263  for (int i = 0; i < int(rcfmat.size()); i++) {
264  delta.push_back(d_vec_t());
265  psi.push_back(i_vec_t());
266  for (int j = 0; j < int(rcfmat[i].size()); j++) {
267  delta[i].push_back(0.); // fill with zeros initially
268  psi[i].push_back(0); // fill with zeros initially
269  }
270  }
271 
272  int T = int(delta.size());
273 
274  if (T < 2) return; // can't do anything at all meaningful
275 
276  int Q = int(delta[0].size());
277 
278  // initialize first column of delta
279  for (int j = 0; j < Q; j++) {
280  delta[0][j] = wv[j] * rcfmat[0][j];
281  psi[0][j] = 0;
282  }
283 
284  double deltasum = 0.;
285  for (int i = 0; i < Q; i++) {
286  deltasum += delta[0][i];
287  }
288  for (int i = 0; i < Q; i++) {
289  delta[0][i] /= (deltasum + EPS);
290  }
291 
292  for (int t=1; t < T; t++)
293  {
294  d_vec_t tmp_vec(Q);
295 
296  for (int j = 0; j < Q; j++) {
297  for (int i = 0; i < Q; i++) {
298  tmp_vec[i] = delta[t-1][i] * tmat[j][i];
299  }
300 
301  delta[t][j] = get_max_val(tmp_vec);
302 
303  psi[t][j] = get_max_ind(tmp_vec);
304 
305  delta[t][j] *= rcfmat[t][j];
306  }
307 
308  // normalise current delta column
309  double deltasum = 0.;
310  for (int i = 0; i < Q; i++) {
311  deltasum += delta[t][i];
312  }
313  for (int i = 0; i < Q; i++) {
314  delta[t][i] /= (deltasum + EPS);
315  }
316  }
317 
318  i_vec_t bestpath(T);
319  d_vec_t tmp_vec(Q);
320  for (int i = 0; i < Q; i++) {
321  tmp_vec[i] = delta[T-1][i];
322  }
323 
324  // find starting point - best beat period for "last" frame
325  bestpath[T-1] = get_max_ind(tmp_vec);
326 
327  // backtrace through index of maximum values in psi
328  for (int t=T-2; t>0 ;t--) {
329  bestpath[t] = psi[t+1][bestpath[t+1]];
330  }
331 
332  // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
333  bestpath[0] = psi[1][bestpath[1]];
334 
335  int lastind = 0;
336  for (int i = 0; i < T; i++) {
337  int step = 128;
338  for (int j = 0; j < step; j++) {
339  lastind = i*step+j;
340  beat_period[lastind] = bestpath[i];
341  }
342 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
343  }
344 
345  // fill in the last values...
346  for (int i = lastind; i < int(beat_period.size()); i++) {
347  beat_period[i] = beat_period[lastind];
348  }
349 
350  for (int i = 0; i < int(beat_period.size()); i++) {
351  tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
352  }
353 }
354 
355 double
357 {
358  double maxval = 0.;
359  int df_len = int(df.size());
360 
361  for (int i = 0; i < df_len; i++) {
362  if (maxval < df[i]) {
363  maxval = df[i];
364  }
365  }
366 
367  return maxval;
368 }
369 
370 int
372 {
373  double maxval = 0.;
374  int ind = 0;
375  int df_len = int(df.size());
376 
377  for (int i = 0; i < df_len; i++) {
378  if (maxval < df[i]) {
379  maxval = df[i];
380  ind = i;
381  }
382  }
383 
384  return ind;
385 }
386 
387 void
389 {
390  double sum = 0.;
391  int df_len = int(df.size());
392 
393  for (int i = 0; i < df_len; i++) {
394  sum += df[i];
395  }
396 
397  for (int i = 0; i < df_len; i++) {
398  df[i]/= (sum + EPS);
399  }
400 }
401 
402 // MEPD 28/11/12
403 // this function has been updated to allow the "alpha" and "tightness" parameters
404 // of the dynamic program to be set by the user
405 // the default value of alpha = 0.9 and tightness = 4
406 void
407 TempoTrackV2::calculateBeats(const vector<double> &df,
408  const vector<double> &beat_period,
409  vector<double> &beats, double alpha, double tightness)
410 {
411  if (df.empty() || beat_period.empty()) return;
412 
413  int df_len = int(df.size());
414 
415  d_vec_t cumscore(df_len); // store cumulative score
416  i_vec_t backlink(df_len); // backlink (stores best beat locations at each time instant)
417  d_vec_t localscore(df_len); // localscore, for now this is the same as the detection function
418 
419  for (int i = 0; i < df_len; i++) {
420  localscore[i] = df[i];
421  backlink[i] = -1;
422  }
423 
424  //double tightness = 4.;
425  //double alpha = 0.9;
426  // MEPD 28/11/12
427  // debug statements that can be removed.
428 // std::cerr << "alpha" << alpha << std::endl;
429 // std::cerr << "tightness" << tightness << std::endl;
430 
431  // main loop
432  for (int i = 0; i < df_len; i++) {
433 
434  int prange_min = -2*beat_period[i];
435  int prange_max = round(-0.5*beat_period[i]);
436 
437  // transition range
438  int txwt_len = prange_max - prange_min + 1;
439  d_vec_t txwt (txwt_len);
440  d_vec_t scorecands (txwt_len);
441 
442  for (int j = 0; j < txwt_len; j++) {
443 
444  double mu = double(beat_period[i]);
445  txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
446 
447  // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
448  // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
449 
450  int cscore_ind = i + prange_min + j;
451  if (cscore_ind >= 0) {
452  scorecands[j] = txwt[j] * cumscore[cscore_ind];
453  }
454  }
455 
456  // find max value and index of maximum value
457  double vv = get_max_val(scorecands);
458  int xx = get_max_ind(scorecands);
459 
460  cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
461  backlink[i] = i+prange_min+xx;
462 
463 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
464  }
465 
466  // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
467  d_vec_t tmp_vec;
468  for (int i = df_len - beat_period[beat_period.size()-1] ; i < df_len; i++) {
469  tmp_vec.push_back(cumscore[i]);
470  }
471 
472  int startpoint = get_max_ind(tmp_vec) +
473  df_len - beat_period[beat_period.size()-1] ;
474 
475  // can happen if no results obtained earlier (e.g. input too short)
476  if (startpoint >= int(backlink.size())) {
477  startpoint = int(backlink.size()) - 1;
478  }
479 
480  // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
481  // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
482  i_vec_t ibeats;
483  ibeats.push_back(startpoint);
484 // std::cerr << "startpoint = " << startpoint << std::endl;
485  while (backlink[ibeats.back()] > 0) {
486 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
487  int b = ibeats.back();
488  if (backlink[b] == b) break; // shouldn't happen... haha
489  ibeats.push_back(backlink[b]);
490  }
491 
492  // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
493  for (int i = 0; i < int(ibeats.size()); i++) {
494  beats.push_back(double(ibeats[ibeats.size() - i - 1]));
495  }
496 }
497 
498 
static void adaptiveThreshold(std::vector< double > &data)
Threshold the input/output vector data against a moving-mean average filter.
void normalise_vec(d_vec_t &df)
void viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &bp, d_vec_t &tempi)
double get_max_val(const d_vec_t &df)
std::vector< std::vector< int > > i_mat_t
Definition: TempoTrackV2.h:72
int get_max_ind(const d_vec_t &df)
#define EPS
std::vector< std::vector< double > > d_mat_t
Definition: TempoTrackV2.h:74
void calculateBeatPeriod(const std::vector< double > &df, std::vector< double > &beatPeriod, std::vector< double > &tempi)
Definition: TempoTrackV2.h:41
void get_rcf(const d_vec_t &dfframe, const d_vec_t &wv, d_vec_t &rcf)
std::vector< double > d_vec_t
Definition: TempoTrackV2.h:73
TempoTrackV2(float sampleRate, int dfIncrement)
Construct a tempo tracker that will operate on beat detection function data calculated from audio at ...
void filter_df(d_vec_t &df)
void calculateBeats(const std::vector< double > &df, const std::vector< double > &beatPeriod, std::vector< double > &beats)
Definition: TempoTrackV2.h:56
std::vector< int > i_vec_t
Definition: TempoTrackV2.h:71