comparison dsp/tempotracking/TempoTrackV2.cpp @ 52:4d1f32efcafd

* Add Matthew's newer beat tracking implementation
author cannam
date Tue, 20 Jan 2009 15:01:01 +0000
parents
children 796170a9c8e4
comparison
equal deleted inserted replaced
51:114e833c07ac 52:4d1f32efcafd
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 QM DSP Library
5
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2008-2009 Matthew Davies and QMUL.
8 All rights reserved.
9 */
10
11 #include "TempoTrackV2.h"
12
13 #include <cmath>
14 #include <cstdlib>
15
16
17 //#define FRAMESIZE 512
18 //#define BIGFRAMESIZE 1024
19 #define TWOPI 6.283185307179586232
20 #define EPS 0.0000008 // just some arbitrary small number
21
22 TempoTrackV2::TempoTrackV2() { }
23 TempoTrackV2::~TempoTrackV2() { }
24
25 void
26 TempoTrackV2::adapt_thresh(d_vec_t &df)
27 {
28
29 d_vec_t smoothed(df.size());
30
31 int p_post = 7;
32 int p_pre = 8;
33
34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays
35
36 // find threshold for first 't' samples, where a full average cannot be computed yet
37 for (int i = 0;i <= t;i++)
38 {
39 int k = std::min((i+p_pre),static_cast<int>(df.size()));
40 smoothed[i] = mean_array(df,1,k);
41 }
42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post]
43 for (uint i = t+1;i < df.size()-p_post;i++)
44 {
45 smoothed[i] = mean_array(df,i-p_pre,i+p_post);
46 }
47 // for last few samples calculate threshold, again, not enough samples to do as above
48 for (uint i = df.size()-p_post;i < df.size();i++)
49 {
50 int k = std::max((static_cast<int> (i) -p_post),1);
51 smoothed[i] = mean_array(df,k,df.size());
52 }
53
54 // subtract the threshold from the detection function and check that it is not less than 0
55 for (uint i = 0;i < df.size();i++)
56 {
57 df[i] -= smoothed[i];
58 if (df[i] < 0)
59 {
60 df[i] = 0;
61 }
62 }
63 }
64
65 double
66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end)
67 {
68
69 double sum = 0.;
70
71 // find sum
72 for (int i = start;i < end+1;i++)
73 {
74 sum += dfin[i];
75 }
76
77 return static_cast<double> (sum / (end - start + 1) ); // average and return
78 }
79
80 void
81 TempoTrackV2::filter_df(d_vec_t &df)
82 {
83
84
85 d_vec_t a(3);
86 d_vec_t b(3);
87 d_vec_t lp_df(df.size());
88
89 //equivalent in matlab to [b,a] = butter(2,0.4);
90 a[0] = 1.0000;
91 a[1] = -0.3695;
92 a[2] = 0.1958;
93 b[0] = 0.2066;
94 b[1] = 0.4131;
95 b[2] = 0.2066;
96
97 double inp1 = 0.;
98 double inp2 = 0.;
99 double out1 = 0.;
100 double out2 = 0.;
101
102
103 // forwards filtering
104 for (uint i = 0;i < df.size();i++)
105 {
106 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
107 inp2 = inp1;
108 inp1 = df[i];
109 out2 = out1;
110 out1 = lp_df[i];
111 }
112
113
114 // copy forwards filtering to df...
115 // but, time-reversed, ready for backwards filtering
116 for (uint i = 0;i < df.size();i++)
117 {
118 df[i] = lp_df[df.size()-i];
119 }
120
121 for (uint i = 0;i < df.size();i++)
122 {
123 lp_df[i] = 0.;
124 }
125
126 inp1 = 0.; inp2 = 0.;
127 out1 = 0.; out2 = 0.;
128
129 // backwards filetering on time-reversed df
130 for (uint i = 0;i < df.size();i++)
131 {
132 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
133 inp2 = inp1;
134 inp1 = df[i];
135 out2 = out1;
136 out1 = lp_df[i];
137 }
138
139 // write the re-reversed (i.e. forward) version back to df
140 for (uint i = 0;i < df.size();i++)
141 {
142 df[i] = lp_df[df.size()-i];
143 }
144
145
146 }
147
148
149 void
150 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period)
151 {
152
153 // to follow matlab.. split into 512 sample frames with a 128 hop size
154 // calculate the acf,
155 // then the rcf.. and then stick the rcfs as columns of a matrix
156 // then call viterbi decoding with weight vector and transition matrix
157 // and get best path
158
159 uint wv_len = 128;
160 double rayparam = 43.;
161
162 // make rayleigh weighting curve
163 d_vec_t wv(wv_len);
164 for (uint i=0; i<wv.size(); i++)
165 {
166 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
167 }
168
169
170 uint winlen = 512;
171 uint step = 128;
172
173 d_mat_t rcfmat;
174 int col_counter = -1;
175 // main loop for beat period calculation
176 for (uint i=0; i<(df.size()-winlen); i+=step)
177 {
178 // get dfframe
179 d_vec_t dfframe(winlen);
180 for (uint k=0; k<winlen; k++)
181 {
182 dfframe[k] = df[i+k];
183 }
184 // get rcf vector for current frame
185 d_vec_t rcf(wv_len);
186 get_rcf(dfframe,wv,rcf);
187
188 rcfmat.push_back( d_vec_t() ); // adds a new column
189 col_counter++;
190 for (uint j=0; j<rcf.size(); j++)
191 {
192 rcfmat[col_counter].push_back( rcf[j] );
193 }
194
195 }
196
197 // now call viterbi decoding function
198 viterbi_decode(rcfmat,wv,beat_period);
199
200
201
202 }
203
204
205 void
206 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
207 {
208 // calculate autocorrelation function
209 // then rcf
210 // just hard code for now... don't really need separate functions to do this
211
212 // make acf
213
214 d_vec_t dfframe(dfframe_in);
215
216 adapt_thresh(dfframe);
217
218 d_vec_t acf(dfframe.size());
219
220
221 for (uint lag=0; lag<dfframe.size(); lag++)
222 {
223
224 double sum = 0.;
225 double tmp = 0.;
226
227 for (uint n=0; n<(dfframe.size()-lag); n++)
228 {
229 tmp = dfframe[n] * dfframe[n+lag];
230 sum += tmp;
231 }
232 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
233 }
234
235
236 // for (uint i=0; i<dfframe.size(); i++)
237 // {
238 // cout << dfframe[i] << " " << acf[i] << endl;
239 // }
240
241 // cout << "~~~~~~~~~~~~~~" << endl;
242
243
244
245
246
247 // now apply comb filtering
248 int numelem = 4;
249
250 // for (uint i = 1;i < 118;i++) // max beat period
251 for (uint i = 2;i < rcf.size();i++) // max beat period
252 {
253 for (int a = 1;a <= numelem;a++) // number of comb elements
254 {
255 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
256 {
257 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
258 }
259 }
260 }
261
262 // apply adaptive threshold to rcf
263 adapt_thresh(rcf);
264
265 double rcfsum =0.;
266 for (uint i=0; i<rcf.size(); i++)
267 {
268 // rcf[i] *= acf[i];
269 rcf[i] += EPS ;
270 rcfsum += rcf[i];
271 }
272
273 // normalise rcf to sum to unity
274 for (uint i=0; i<rcf.size(); i++)
275 {
276 rcf[i] /= (rcfsum + EPS);
277 }
278
279
280
281 }
282
283 void
284 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period)
285 {
286
287 // make transition matrix
288 d_mat_t tmat;
289 for (uint i=0;i<wv.size();i++)
290 {
291 tmat.push_back ( d_vec_t() ); // adds a new column
292 for (uint j=0; j<wv.size(); j++)
293 {
294 tmat[i].push_back(0.); // fill with zeros initially
295 }
296 }
297
298 double sigma = 8.;
299 for (uint i=20;i <wv.size()-20; i++)
300 {
301 for (uint j=20; j<wv.size()-20; j++)
302 {
303 double mu = static_cast<double>(i);
304 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
305 }
306 }
307
308 d_mat_t delta;
309 i_mat_t psi;
310 for (uint i=0;i <rcfmat.size(); i++)
311 {
312 delta.push_back( d_vec_t());
313 psi.push_back( i_vec_t());
314 for (uint j=0; j<rcfmat[i].size(); j++)
315 {
316 delta[i].push_back(0.); // fill with zeros initially
317 psi[i].push_back(0); // fill with zeros initially
318 }
319 }
320
321
322 uint T = delta.size();
323 uint Q = delta[0].size();
324
325 // initialize first column of delta
326 for (uint j=0; j<Q; j++)
327 {
328 delta[0][j] = wv[j] * rcfmat[0][j];
329 psi[0][j] = 0;
330 }
331
332 double deltasum = 0.;
333 for (uint i=0; i<Q; i++)
334 {
335 deltasum += delta[0][i];
336 }
337 for (uint i=0; i<Q; i++)
338 {
339 delta[0][i] /= (deltasum + EPS);
340 }
341
342
343
344 for (uint t=1; t<T; t++)
345 {
346 d_vec_t tmp_vec(Q);
347
348 for (uint j=0; j<Q; j++)
349 {
350
351 for (uint i=0; i<Q; i++)
352 {
353 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
354 }
355
356 delta[t][j] = get_max_val(tmp_vec);
357
358 psi[t][j] = get_max_ind(tmp_vec);
359
360 delta[t][j] *= rcfmat[t][j];
361
362
363 }
364
365 double deltasum = 0.;
366 for (uint i=0; i<Q; i++)
367 {
368 deltasum += delta[t][i];
369 }
370 for (uint i=0; i<Q; i++)
371 {
372 delta[t][i] /= (deltasum + EPS);
373 }
374
375
376
377
378 }
379
380
381 // ofstream tmatfile;
382 // tmatfile.open("/home/matthewd/Desktop/tmat.txt");
383
384 // for (uint i=0;i <delta.size(); i++)
385 // {
386 // for (uint j=0; j<delta[i].size(); j++)
387 // {
388 // tmatfile << rcfmat[i][j] << endl;
389 // }
390 // }
391
392 // tmatfile.close();
393
394 i_vec_t bestpath(T);
395 d_vec_t tmp_vec(Q);
396 for (uint i=0; i<Q; i++)
397 {
398 tmp_vec[i] = delta[T-1][i];
399 }
400
401
402 bestpath[T-1] = get_max_ind(tmp_vec);
403
404 for (uint t=T-2; t>0 ;t--)
405 {
406 bestpath[t] = psi[t+1][bestpath[t+1]];
407 }
408 // very weird hack!
409 bestpath[0] = psi[1][bestpath[1]];
410
411 // for (uint i=0; i<bestpath.size(); i++)
412 // {
413 // cout << bestpath[i] << endl;
414 // }
415
416
417 uint lastind = 0;
418 for (uint i=0; i<T; i++)
419 {
420 uint step = 128;
421 // cout << bestpath[i] << " " << i << endl;
422 for (uint j=0; j<step; j++)
423 {
424 lastind = i*step+j;
425 beat_period[lastind] = bestpath[i];
426
427 }
428 }
429
430 //fill in the last values...
431 for (uint i=lastind; i<beat_period.size(); i++)
432 {
433 beat_period[i] = beat_period[lastind];
434 }
435
436
437
438 }
439
440 double
441 TempoTrackV2::get_max_val(const d_vec_t &df)
442 {
443 double maxval = 0.;
444 for (uint i=0; i<df.size(); i++)
445 {
446
447 if (maxval < df[i])
448 {
449 maxval = df[i];
450 }
451
452 }
453
454
455 return maxval;
456
457 }
458
459 int
460 TempoTrackV2::get_max_ind(const d_vec_t &df)
461 {
462
463 double maxval = 0.;
464 int ind = 0;
465 for (uint i=0; i<df.size(); i++)
466 {
467 if (maxval < df[i])
468 {
469 maxval = df[i];
470 ind = i;
471 }
472
473 }
474
475 return ind;
476
477 }
478
479 void
480 TempoTrackV2::normalise_vec(d_vec_t &df)
481 {
482 double sum = 0.;
483 for (uint i=0; i<df.size(); i++)
484 {
485 sum += df[i];
486 }
487
488 for (uint i=0; i<df.size(); i++)
489 {
490 df[i]/= (sum + EPS);
491 }
492
493
494 }
495
496 void
497 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
498 d_vec_t &beats)
499 {
500
501 d_vec_t cumscore(df.size());
502 i_vec_t backlink(df.size());
503 d_vec_t localscore(df.size());
504
505 // WHEN I FIGURE OUT HOW, I'LL WANT TO DO SOME FILTERING ON THIS...
506 for (uint i=0; i<df.size(); i++)
507 {
508 localscore[i] = df[i];
509 backlink[i] = -1;
510 }
511
512 double tightness = 4.;
513 double alpha = 0.9;
514
515 // main loop
516 for (uint i=3*beat_period[0]; i<localscore.size(); i++)
517 {
518 int prange_min = -2*beat_period[i];
519 int prange_max = round(-0.5*beat_period[i]);
520
521 d_vec_t txwt (prange_max - prange_min + 1);
522 d_vec_t scorecands (txwt.size());
523
524 for (uint j=0;j<txwt.size();j++)
525 {
526 double mu = static_cast<double> (beat_period[i]);
527 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
528
529 scorecands[j] = txwt[j] * cumscore[i+prange_min+j];
530 }
531
532 double vv = get_max_val(scorecands);
533 int xx = get_max_ind(scorecands);
534
535 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
536
537 backlink[i] = i+prange_min+xx;
538
539 }
540
541
542 d_vec_t tmp_vec;
543 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
544 {
545 tmp_vec.push_back(cumscore[i]);
546 }
547
548 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
549
550 i_vec_t ibeats;
551 ibeats.push_back(startpoint);
552 while (backlink[ibeats.back()] > 3*beat_period[0])
553 {
554 ibeats.push_back(backlink[ibeats.back()]);
555 }
556
557
558 for (uint i=0; i<ibeats.size(); i++)
559 {
560
561 beats.push_back( static_cast<double>(ibeats[i]) );
562
563 // cout << ibeats[i] << " " << beats[i] <<endl;
564 }
565 }
566
567