Mercurial > hg > qm-dsp
comparison dsp/tempotracking/TempoTrackV2.cpp @ 52:4d1f32efcafd
* Add Matthew's newer beat tracking implementation
author | cannam |
---|---|
date | Tue, 20 Jan 2009 15:01:01 +0000 |
parents | |
children | 796170a9c8e4 |
comparison
equal
deleted
inserted
replaced
51:114e833c07ac | 52:4d1f32efcafd |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 QM DSP Library | |
5 | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2008-2009 Matthew Davies and QMUL. | |
8 All rights reserved. | |
9 */ | |
10 | |
11 #include "TempoTrackV2.h" | |
12 | |
13 #include <cmath> | |
14 #include <cstdlib> | |
15 | |
16 | |
17 //#define FRAMESIZE 512 | |
18 //#define BIGFRAMESIZE 1024 | |
19 #define TWOPI 6.283185307179586232 | |
20 #define EPS 0.0000008 // just some arbitrary small number | |
21 | |
22 TempoTrackV2::TempoTrackV2() { } | |
23 TempoTrackV2::~TempoTrackV2() { } | |
24 | |
25 void | |
26 TempoTrackV2::adapt_thresh(d_vec_t &df) | |
27 { | |
28 | |
29 d_vec_t smoothed(df.size()); | |
30 | |
31 int p_post = 7; | |
32 int p_pre = 8; | |
33 | |
34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays | |
35 | |
36 // find threshold for first 't' samples, where a full average cannot be computed yet | |
37 for (int i = 0;i <= t;i++) | |
38 { | |
39 int k = std::min((i+p_pre),static_cast<int>(df.size())); | |
40 smoothed[i] = mean_array(df,1,k); | |
41 } | |
42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post] | |
43 for (uint i = t+1;i < df.size()-p_post;i++) | |
44 { | |
45 smoothed[i] = mean_array(df,i-p_pre,i+p_post); | |
46 } | |
47 // for last few samples calculate threshold, again, not enough samples to do as above | |
48 for (uint i = df.size()-p_post;i < df.size();i++) | |
49 { | |
50 int k = std::max((static_cast<int> (i) -p_post),1); | |
51 smoothed[i] = mean_array(df,k,df.size()); | |
52 } | |
53 | |
54 // subtract the threshold from the detection function and check that it is not less than 0 | |
55 for (uint i = 0;i < df.size();i++) | |
56 { | |
57 df[i] -= smoothed[i]; | |
58 if (df[i] < 0) | |
59 { | |
60 df[i] = 0; | |
61 } | |
62 } | |
63 } | |
64 | |
65 double | |
66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end) | |
67 { | |
68 | |
69 double sum = 0.; | |
70 | |
71 // find sum | |
72 for (int i = start;i < end+1;i++) | |
73 { | |
74 sum += dfin[i]; | |
75 } | |
76 | |
77 return static_cast<double> (sum / (end - start + 1) ); // average and return | |
78 } | |
79 | |
80 void | |
81 TempoTrackV2::filter_df(d_vec_t &df) | |
82 { | |
83 | |
84 | |
85 d_vec_t a(3); | |
86 d_vec_t b(3); | |
87 d_vec_t lp_df(df.size()); | |
88 | |
89 //equivalent in matlab to [b,a] = butter(2,0.4); | |
90 a[0] = 1.0000; | |
91 a[1] = -0.3695; | |
92 a[2] = 0.1958; | |
93 b[0] = 0.2066; | |
94 b[1] = 0.4131; | |
95 b[2] = 0.2066; | |
96 | |
97 double inp1 = 0.; | |
98 double inp2 = 0.; | |
99 double out1 = 0.; | |
100 double out2 = 0.; | |
101 | |
102 | |
103 // forwards filtering | |
104 for (uint i = 0;i < df.size();i++) | |
105 { | |
106 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; | |
107 inp2 = inp1; | |
108 inp1 = df[i]; | |
109 out2 = out1; | |
110 out1 = lp_df[i]; | |
111 } | |
112 | |
113 | |
114 // copy forwards filtering to df... | |
115 // but, time-reversed, ready for backwards filtering | |
116 for (uint i = 0;i < df.size();i++) | |
117 { | |
118 df[i] = lp_df[df.size()-i]; | |
119 } | |
120 | |
121 for (uint i = 0;i < df.size();i++) | |
122 { | |
123 lp_df[i] = 0.; | |
124 } | |
125 | |
126 inp1 = 0.; inp2 = 0.; | |
127 out1 = 0.; out2 = 0.; | |
128 | |
129 // backwards filetering on time-reversed df | |
130 for (uint i = 0;i < df.size();i++) | |
131 { | |
132 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; | |
133 inp2 = inp1; | |
134 inp1 = df[i]; | |
135 out2 = out1; | |
136 out1 = lp_df[i]; | |
137 } | |
138 | |
139 // write the re-reversed (i.e. forward) version back to df | |
140 for (uint i = 0;i < df.size();i++) | |
141 { | |
142 df[i] = lp_df[df.size()-i]; | |
143 } | |
144 | |
145 | |
146 } | |
147 | |
148 | |
149 void | |
150 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period) | |
151 { | |
152 | |
153 // to follow matlab.. split into 512 sample frames with a 128 hop size | |
154 // calculate the acf, | |
155 // then the rcf.. and then stick the rcfs as columns of a matrix | |
156 // then call viterbi decoding with weight vector and transition matrix | |
157 // and get best path | |
158 | |
159 uint wv_len = 128; | |
160 double rayparam = 43.; | |
161 | |
162 // make rayleigh weighting curve | |
163 d_vec_t wv(wv_len); | |
164 for (uint i=0; i<wv.size(); i++) | |
165 { | |
166 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.))); | |
167 } | |
168 | |
169 | |
170 uint winlen = 512; | |
171 uint step = 128; | |
172 | |
173 d_mat_t rcfmat; | |
174 int col_counter = -1; | |
175 // main loop for beat period calculation | |
176 for (uint i=0; i<(df.size()-winlen); i+=step) | |
177 { | |
178 // get dfframe | |
179 d_vec_t dfframe(winlen); | |
180 for (uint k=0; k<winlen; k++) | |
181 { | |
182 dfframe[k] = df[i+k]; | |
183 } | |
184 // get rcf vector for current frame | |
185 d_vec_t rcf(wv_len); | |
186 get_rcf(dfframe,wv,rcf); | |
187 | |
188 rcfmat.push_back( d_vec_t() ); // adds a new column | |
189 col_counter++; | |
190 for (uint j=0; j<rcf.size(); j++) | |
191 { | |
192 rcfmat[col_counter].push_back( rcf[j] ); | |
193 } | |
194 | |
195 } | |
196 | |
197 // now call viterbi decoding function | |
198 viterbi_decode(rcfmat,wv,beat_period); | |
199 | |
200 | |
201 | |
202 } | |
203 | |
204 | |
205 void | |
206 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf) | |
207 { | |
208 // calculate autocorrelation function | |
209 // then rcf | |
210 // just hard code for now... don't really need separate functions to do this | |
211 | |
212 // make acf | |
213 | |
214 d_vec_t dfframe(dfframe_in); | |
215 | |
216 adapt_thresh(dfframe); | |
217 | |
218 d_vec_t acf(dfframe.size()); | |
219 | |
220 | |
221 for (uint lag=0; lag<dfframe.size(); lag++) | |
222 { | |
223 | |
224 double sum = 0.; | |
225 double tmp = 0.; | |
226 | |
227 for (uint n=0; n<(dfframe.size()-lag); n++) | |
228 { | |
229 tmp = dfframe[n] * dfframe[n+lag]; | |
230 sum += tmp; | |
231 } | |
232 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); | |
233 } | |
234 | |
235 | |
236 // for (uint i=0; i<dfframe.size(); i++) | |
237 // { | |
238 // cout << dfframe[i] << " " << acf[i] << endl; | |
239 // } | |
240 | |
241 // cout << "~~~~~~~~~~~~~~" << endl; | |
242 | |
243 | |
244 | |
245 | |
246 | |
247 // now apply comb filtering | |
248 int numelem = 4; | |
249 | |
250 // for (uint i = 1;i < 118;i++) // max beat period | |
251 for (uint i = 2;i < rcf.size();i++) // max beat period | |
252 { | |
253 for (int a = 1;a <= numelem;a++) // number of comb elements | |
254 { | |
255 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements | |
256 { | |
257 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row | |
258 } | |
259 } | |
260 } | |
261 | |
262 // apply adaptive threshold to rcf | |
263 adapt_thresh(rcf); | |
264 | |
265 double rcfsum =0.; | |
266 for (uint i=0; i<rcf.size(); i++) | |
267 { | |
268 // rcf[i] *= acf[i]; | |
269 rcf[i] += EPS ; | |
270 rcfsum += rcf[i]; | |
271 } | |
272 | |
273 // normalise rcf to sum to unity | |
274 for (uint i=0; i<rcf.size(); i++) | |
275 { | |
276 rcf[i] /= (rcfsum + EPS); | |
277 } | |
278 | |
279 | |
280 | |
281 } | |
282 | |
283 void | |
284 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period) | |
285 { | |
286 | |
287 // make transition matrix | |
288 d_mat_t tmat; | |
289 for (uint i=0;i<wv.size();i++) | |
290 { | |
291 tmat.push_back ( d_vec_t() ); // adds a new column | |
292 for (uint j=0; j<wv.size(); j++) | |
293 { | |
294 tmat[i].push_back(0.); // fill with zeros initially | |
295 } | |
296 } | |
297 | |
298 double sigma = 8.; | |
299 for (uint i=20;i <wv.size()-20; i++) | |
300 { | |
301 for (uint j=20; j<wv.size()-20; j++) | |
302 { | |
303 double mu = static_cast<double>(i); | |
304 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); | |
305 } | |
306 } | |
307 | |
308 d_mat_t delta; | |
309 i_mat_t psi; | |
310 for (uint i=0;i <rcfmat.size(); i++) | |
311 { | |
312 delta.push_back( d_vec_t()); | |
313 psi.push_back( i_vec_t()); | |
314 for (uint j=0; j<rcfmat[i].size(); j++) | |
315 { | |
316 delta[i].push_back(0.); // fill with zeros initially | |
317 psi[i].push_back(0); // fill with zeros initially | |
318 } | |
319 } | |
320 | |
321 | |
322 uint T = delta.size(); | |
323 uint Q = delta[0].size(); | |
324 | |
325 // initialize first column of delta | |
326 for (uint j=0; j<Q; j++) | |
327 { | |
328 delta[0][j] = wv[j] * rcfmat[0][j]; | |
329 psi[0][j] = 0; | |
330 } | |
331 | |
332 double deltasum = 0.; | |
333 for (uint i=0; i<Q; i++) | |
334 { | |
335 deltasum += delta[0][i]; | |
336 } | |
337 for (uint i=0; i<Q; i++) | |
338 { | |
339 delta[0][i] /= (deltasum + EPS); | |
340 } | |
341 | |
342 | |
343 | |
344 for (uint t=1; t<T; t++) | |
345 { | |
346 d_vec_t tmp_vec(Q); | |
347 | |
348 for (uint j=0; j<Q; j++) | |
349 { | |
350 | |
351 for (uint i=0; i<Q; i++) | |
352 { | |
353 tmp_vec[i] = delta[t-1][i] * tmat[j][i]; | |
354 } | |
355 | |
356 delta[t][j] = get_max_val(tmp_vec); | |
357 | |
358 psi[t][j] = get_max_ind(tmp_vec); | |
359 | |
360 delta[t][j] *= rcfmat[t][j]; | |
361 | |
362 | |
363 } | |
364 | |
365 double deltasum = 0.; | |
366 for (uint i=0; i<Q; i++) | |
367 { | |
368 deltasum += delta[t][i]; | |
369 } | |
370 for (uint i=0; i<Q; i++) | |
371 { | |
372 delta[t][i] /= (deltasum + EPS); | |
373 } | |
374 | |
375 | |
376 | |
377 | |
378 } | |
379 | |
380 | |
381 // ofstream tmatfile; | |
382 // tmatfile.open("/home/matthewd/Desktop/tmat.txt"); | |
383 | |
384 // for (uint i=0;i <delta.size(); i++) | |
385 // { | |
386 // for (uint j=0; j<delta[i].size(); j++) | |
387 // { | |
388 // tmatfile << rcfmat[i][j] << endl; | |
389 // } | |
390 // } | |
391 | |
392 // tmatfile.close(); | |
393 | |
394 i_vec_t bestpath(T); | |
395 d_vec_t tmp_vec(Q); | |
396 for (uint i=0; i<Q; i++) | |
397 { | |
398 tmp_vec[i] = delta[T-1][i]; | |
399 } | |
400 | |
401 | |
402 bestpath[T-1] = get_max_ind(tmp_vec); | |
403 | |
404 for (uint t=T-2; t>0 ;t--) | |
405 { | |
406 bestpath[t] = psi[t+1][bestpath[t+1]]; | |
407 } | |
408 // very weird hack! | |
409 bestpath[0] = psi[1][bestpath[1]]; | |
410 | |
411 // for (uint i=0; i<bestpath.size(); i++) | |
412 // { | |
413 // cout << bestpath[i] << endl; | |
414 // } | |
415 | |
416 | |
417 uint lastind = 0; | |
418 for (uint i=0; i<T; i++) | |
419 { | |
420 uint step = 128; | |
421 // cout << bestpath[i] << " " << i << endl; | |
422 for (uint j=0; j<step; j++) | |
423 { | |
424 lastind = i*step+j; | |
425 beat_period[lastind] = bestpath[i]; | |
426 | |
427 } | |
428 } | |
429 | |
430 //fill in the last values... | |
431 for (uint i=lastind; i<beat_period.size(); i++) | |
432 { | |
433 beat_period[i] = beat_period[lastind]; | |
434 } | |
435 | |
436 | |
437 | |
438 } | |
439 | |
440 double | |
441 TempoTrackV2::get_max_val(const d_vec_t &df) | |
442 { | |
443 double maxval = 0.; | |
444 for (uint i=0; i<df.size(); i++) | |
445 { | |
446 | |
447 if (maxval < df[i]) | |
448 { | |
449 maxval = df[i]; | |
450 } | |
451 | |
452 } | |
453 | |
454 | |
455 return maxval; | |
456 | |
457 } | |
458 | |
459 int | |
460 TempoTrackV2::get_max_ind(const d_vec_t &df) | |
461 { | |
462 | |
463 double maxval = 0.; | |
464 int ind = 0; | |
465 for (uint i=0; i<df.size(); i++) | |
466 { | |
467 if (maxval < df[i]) | |
468 { | |
469 maxval = df[i]; | |
470 ind = i; | |
471 } | |
472 | |
473 } | |
474 | |
475 return ind; | |
476 | |
477 } | |
478 | |
479 void | |
480 TempoTrackV2::normalise_vec(d_vec_t &df) | |
481 { | |
482 double sum = 0.; | |
483 for (uint i=0; i<df.size(); i++) | |
484 { | |
485 sum += df[i]; | |
486 } | |
487 | |
488 for (uint i=0; i<df.size(); i++) | |
489 { | |
490 df[i]/= (sum + EPS); | |
491 } | |
492 | |
493 | |
494 } | |
495 | |
496 void | |
497 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period, | |
498 d_vec_t &beats) | |
499 { | |
500 | |
501 d_vec_t cumscore(df.size()); | |
502 i_vec_t backlink(df.size()); | |
503 d_vec_t localscore(df.size()); | |
504 | |
505 // WHEN I FIGURE OUT HOW, I'LL WANT TO DO SOME FILTERING ON THIS... | |
506 for (uint i=0; i<df.size(); i++) | |
507 { | |
508 localscore[i] = df[i]; | |
509 backlink[i] = -1; | |
510 } | |
511 | |
512 double tightness = 4.; | |
513 double alpha = 0.9; | |
514 | |
515 // main loop | |
516 for (uint i=3*beat_period[0]; i<localscore.size(); i++) | |
517 { | |
518 int prange_min = -2*beat_period[i]; | |
519 int prange_max = round(-0.5*beat_period[i]); | |
520 | |
521 d_vec_t txwt (prange_max - prange_min + 1); | |
522 d_vec_t scorecands (txwt.size()); | |
523 | |
524 for (uint j=0;j<txwt.size();j++) | |
525 { | |
526 double mu = static_cast<double> (beat_period[i]); | |
527 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2)); | |
528 | |
529 scorecands[j] = txwt[j] * cumscore[i+prange_min+j]; | |
530 } | |
531 | |
532 double vv = get_max_val(scorecands); | |
533 int xx = get_max_ind(scorecands); | |
534 | |
535 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i]; | |
536 | |
537 backlink[i] = i+prange_min+xx; | |
538 | |
539 } | |
540 | |
541 | |
542 d_vec_t tmp_vec; | |
543 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) | |
544 { | |
545 tmp_vec.push_back(cumscore[i]); | |
546 } | |
547 | |
548 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ; | |
549 | |
550 i_vec_t ibeats; | |
551 ibeats.push_back(startpoint); | |
552 while (backlink[ibeats.back()] > 3*beat_period[0]) | |
553 { | |
554 ibeats.push_back(backlink[ibeats.back()]); | |
555 } | |
556 | |
557 | |
558 for (uint i=0; i<ibeats.size(); i++) | |
559 { | |
560 | |
561 beats.push_back( static_cast<double>(ibeats[i]) ); | |
562 | |
563 // cout << ibeats[i] << " " << beats[i] <<endl; | |
564 } | |
565 } | |
566 | |
567 |