c@277
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@277
|
2
|
c@277
|
3 /*
|
c@277
|
4 QM DSP Library
|
c@277
|
5
|
c@277
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@277
|
7 This file copyright 2008-2009 Matthew Davies and QMUL.
|
c@277
|
8 All rights reserved.
|
c@277
|
9 */
|
c@277
|
10
|
c@277
|
11 #include "TempoTrackV2.h"
|
c@277
|
12
|
c@277
|
13 #include <cmath>
|
c@277
|
14 #include <cstdlib>
|
c@278
|
15 #include <iostream>
|
c@277
|
16
|
c@279
|
17 #include "maths/MathUtilities.h"
|
c@277
|
18
|
c@277
|
19 #define EPS 0.0000008 // just some arbitrary small number
|
c@277
|
20
|
c@279
|
21 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
|
c@279
|
22 m_rate(rate), m_increment(increment) { }
|
c@277
|
23 TempoTrackV2::~TempoTrackV2() { }
|
c@277
|
24
|
c@277
|
25 void
|
c@277
|
26 TempoTrackV2::filter_df(d_vec_t &df)
|
c@277
|
27 {
|
c@278
|
28 d_vec_t a(3);
|
c@278
|
29 d_vec_t b(3);
|
c@278
|
30 d_vec_t lp_df(df.size());
|
c@277
|
31
|
c@278
|
32 //equivalent in matlab to [b,a] = butter(2,0.4);
|
c@278
|
33 a[0] = 1.0000;
|
c@278
|
34 a[1] = -0.3695;
|
c@278
|
35 a[2] = 0.1958;
|
c@278
|
36 b[0] = 0.2066;
|
c@278
|
37 b[1] = 0.4131;
|
c@278
|
38 b[2] = 0.2066;
|
c@278
|
39
|
c@278
|
40 double inp1 = 0.;
|
c@278
|
41 double inp2 = 0.;
|
c@278
|
42 double out1 = 0.;
|
c@278
|
43 double out2 = 0.;
|
c@277
|
44
|
c@277
|
45
|
c@278
|
46 // forwards filtering
|
c@278
|
47 for (uint i = 0;i < df.size();i++)
|
c@278
|
48 {
|
c@278
|
49 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
|
c@278
|
50 inp2 = inp1;
|
c@278
|
51 inp1 = df[i];
|
c@278
|
52 out2 = out1;
|
c@278
|
53 out1 = lp_df[i];
|
c@278
|
54 }
|
c@277
|
55
|
c@278
|
56 // copy forwards filtering to df...
|
c@278
|
57 // but, time-reversed, ready for backwards filtering
|
c@278
|
58 for (uint i = 0;i < df.size();i++)
|
c@278
|
59 {
|
c@278
|
60 df[i] = lp_df[df.size()-i-1];
|
c@278
|
61 }
|
c@277
|
62
|
c@278
|
63 for (uint i = 0;i < df.size();i++)
|
c@278
|
64 {
|
c@278
|
65 lp_df[i] = 0.;
|
c@278
|
66 }
|
c@277
|
67
|
c@278
|
68 inp1 = 0.; inp2 = 0.;
|
c@278
|
69 out1 = 0.; out2 = 0.;
|
c@277
|
70
|
c@277
|
71 // backwards filetering on time-reversed df
|
c@278
|
72 for (uint i = 0;i < df.size();i++)
|
c@278
|
73 {
|
c@278
|
74 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
|
c@278
|
75 inp2 = inp1;
|
c@278
|
76 inp1 = df[i];
|
c@278
|
77 out2 = out1;
|
c@278
|
78 out1 = lp_df[i];
|
c@278
|
79 }
|
c@277
|
80
|
c@277
|
81 // write the re-reversed (i.e. forward) version back to df
|
c@278
|
82 for (uint i = 0;i < df.size();i++)
|
c@278
|
83 {
|
c@278
|
84 df[i] = lp_df[df.size()-i-1];
|
c@278
|
85 }
|
c@277
|
86 }
|
c@277
|
87
|
c@277
|
88
|
c@277
|
89 void
|
c@278
|
90 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period,
|
c@278
|
91 d_vec_t &tempi)
|
c@277
|
92 {
|
c@278
|
93 // to follow matlab.. split into 512 sample frames with a 128 hop size
|
c@278
|
94 // calculate the acf,
|
c@278
|
95 // then the rcf.. and then stick the rcfs as columns of a matrix
|
c@278
|
96 // then call viterbi decoding with weight vector and transition matrix
|
c@278
|
97 // and get best path
|
c@277
|
98
|
c@278
|
99 uint wv_len = 128;
|
c@278
|
100 double rayparam = 43.;
|
c@277
|
101
|
c@278
|
102 // make rayleigh weighting curve
|
c@278
|
103 d_vec_t wv(wv_len);
|
c@278
|
104 for (uint i=0; i<wv.size(); i++)
|
c@277
|
105 {
|
c@278
|
106 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
|
c@277
|
107 }
|
c@277
|
108
|
c@278
|
109 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
|
c@278
|
110 uint winlen = 512;
|
c@278
|
111 uint step = 128;
|
c@278
|
112
|
c@278
|
113 // matrix to store output of comb filter bank, increment column of matrix at each frame
|
c@278
|
114 d_mat_t rcfmat;
|
c@278
|
115 int col_counter = -1;
|
c@278
|
116
|
c@278
|
117 // main loop for beat period calculation
|
c@278
|
118 for (uint i=0; i<(df.size()-winlen); i+=step)
|
c@278
|
119 {
|
c@278
|
120 // get dfframe
|
c@278
|
121 d_vec_t dfframe(winlen);
|
c@278
|
122 for (uint k=0; k<winlen; k++)
|
c@278
|
123 {
|
c@278
|
124 dfframe[k] = df[i+k];
|
c@278
|
125 }
|
c@278
|
126 // get rcf vector for current frame
|
c@278
|
127 d_vec_t rcf(wv_len);
|
c@278
|
128 get_rcf(dfframe,wv,rcf);
|
c@277
|
129
|
c@278
|
130 rcfmat.push_back( d_vec_t() ); // adds a new column
|
c@278
|
131 col_counter++;
|
c@278
|
132 for (uint j=0; j<rcf.size(); j++)
|
c@278
|
133 {
|
c@278
|
134 rcfmat[col_counter].push_back( rcf[j] );
|
c@278
|
135 }
|
c@278
|
136 }
|
c@278
|
137
|
c@278
|
138 // now call viterbi decoding function
|
c@278
|
139 viterbi_decode(rcfmat,wv,beat_period,tempi);
|
c@277
|
140 }
|
c@277
|
141
|
c@277
|
142
|
c@277
|
143 void
|
c@277
|
144 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
|
c@277
|
145 {
|
c@278
|
146 // calculate autocorrelation function
|
c@278
|
147 // then rcf
|
c@278
|
148 // just hard code for now... don't really need separate functions to do this
|
c@277
|
149
|
c@278
|
150 // make acf
|
c@277
|
151
|
c@278
|
152 d_vec_t dfframe(dfframe_in);
|
c@277
|
153
|
c@279
|
154 MathUtilities::adaptiveThreshold(dfframe);
|
c@277
|
155
|
c@278
|
156 d_vec_t acf(dfframe.size());
|
c@277
|
157
|
c@278
|
158
|
c@278
|
159 for (uint lag=0; lag<dfframe.size(); lag++)
|
c@278
|
160 {
|
c@278
|
161 double sum = 0.;
|
c@278
|
162 double tmp = 0.;
|
c@277
|
163
|
c@278
|
164 for (uint n=0; n<(dfframe.size()-lag); n++)
|
c@278
|
165 {
|
c@278
|
166 tmp = dfframe[n] * dfframe[n+lag];
|
c@278
|
167 sum += tmp;
|
c@278
|
168 }
|
c@278
|
169 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
|
c@278
|
170 }
|
c@277
|
171
|
c@278
|
172 // now apply comb filtering
|
c@278
|
173 int numelem = 4;
|
c@278
|
174
|
c@278
|
175 for (uint i = 2;i < rcf.size();i++) // max beat period
|
c@278
|
176 {
|
c@278
|
177 for (int a = 1;a <= numelem;a++) // number of comb elements
|
c@278
|
178 {
|
c@278
|
179 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
|
c@278
|
180 {
|
c@278
|
181 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
|
c@278
|
182 }
|
c@278
|
183 }
|
c@278
|
184 }
|
c@278
|
185
|
c@278
|
186 // apply adaptive threshold to rcf
|
c@279
|
187 MathUtilities::adaptiveThreshold(rcf);
|
c@278
|
188
|
c@278
|
189 double rcfsum =0.;
|
c@278
|
190 for (uint i=0; i<rcf.size(); i++)
|
c@278
|
191 {
|
c@278
|
192 rcf[i] += EPS ;
|
c@278
|
193 rcfsum += rcf[i];
|
c@278
|
194 }
|
c@277
|
195
|
c@278
|
196 // normalise rcf to sum to unity
|
c@278
|
197 for (uint i=0; i<rcf.size(); i++)
|
c@277
|
198 {
|
c@278
|
199 rcf[i] /= (rcfsum + EPS);
|
c@277
|
200 }
|
c@277
|
201 }
|
c@277
|
202
|
c@277
|
203 void
|
c@278
|
204 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
|
c@277
|
205 {
|
c@278
|
206 // following Kevin Murphy's Viterbi decoding to get best path of
|
c@278
|
207 // beat periods through rfcmat
|
c@277
|
208
|
c@278
|
209 // make transition matrix
|
c@278
|
210 d_mat_t tmat;
|
c@278
|
211 for (uint i=0;i<wv.size();i++)
|
c@278
|
212 {
|
c@278
|
213 tmat.push_back ( d_vec_t() ); // adds a new column
|
c@278
|
214 for (uint j=0; j<wv.size(); j++)
|
c@278
|
215 {
|
c@278
|
216 tmat[i].push_back(0.); // fill with zeros initially
|
c@278
|
217 }
|
c@278
|
218 }
|
c@278
|
219
|
c@278
|
220 // variance of Gaussians in transition matrix
|
c@278
|
221 // formed of Gaussians on diagonal - implies slow tempo change
|
c@278
|
222 double sigma = 8.;
|
c@278
|
223 // don't want really short beat periods, or really long ones
|
c@278
|
224 for (uint i=20;i <wv.size()-20; i++)
|
c@278
|
225 {
|
c@278
|
226 for (uint j=20; j<wv.size()-20; j++)
|
c@278
|
227 {
|
c@278
|
228 double mu = static_cast<double>(i);
|
c@278
|
229 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
|
c@278
|
230 }
|
c@278
|
231 }
|
c@277
|
232
|
c@278
|
233 // parameters for Viterbi decoding... this part is taken from
|
c@278
|
234 // Murphy's matlab
|
c@277
|
235
|
c@278
|
236 d_mat_t delta;
|
c@278
|
237 i_mat_t psi;
|
c@278
|
238 for (uint i=0;i <rcfmat.size(); i++)
|
c@278
|
239 {
|
c@278
|
240 delta.push_back( d_vec_t());
|
c@278
|
241 psi.push_back( i_vec_t());
|
c@278
|
242 for (uint j=0; j<rcfmat[i].size(); j++)
|
c@278
|
243 {
|
c@278
|
244 delta[i].push_back(0.); // fill with zeros initially
|
c@278
|
245 psi[i].push_back(0); // fill with zeros initially
|
c@278
|
246 }
|
c@278
|
247 }
|
c@277
|
248
|
c@277
|
249
|
c@278
|
250 uint T = delta.size();
|
c@278
|
251 uint Q = delta[0].size();
|
c@277
|
252
|
c@278
|
253 // initialize first column of delta
|
c@277
|
254 for (uint j=0; j<Q; j++)
|
c@277
|
255 {
|
c@278
|
256 delta[0][j] = wv[j] * rcfmat[0][j];
|
c@278
|
257 psi[0][j] = 0;
|
c@277
|
258 }
|
c@278
|
259
|
c@277
|
260 double deltasum = 0.;
|
c@277
|
261 for (uint i=0; i<Q; i++)
|
c@277
|
262 {
|
c@278
|
263 deltasum += delta[0][i];
|
c@277
|
264 }
|
c@277
|
265 for (uint i=0; i<Q; i++)
|
c@277
|
266 {
|
c@278
|
267 delta[0][i] /= (deltasum + EPS);
|
c@277
|
268 }
|
c@277
|
269
|
c@277
|
270
|
c@278
|
271 for (uint t=1; t<T; t++)
|
c@278
|
272 {
|
c@278
|
273 d_vec_t tmp_vec(Q);
|
c@277
|
274
|
c@278
|
275 for (uint j=0; j<Q; j++)
|
c@278
|
276 {
|
c@278
|
277 for (uint i=0; i<Q; i++)
|
c@278
|
278 {
|
c@278
|
279 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
|
c@278
|
280 }
|
c@278
|
281
|
c@278
|
282 delta[t][j] = get_max_val(tmp_vec);
|
c@277
|
283
|
c@278
|
284 psi[t][j] = get_max_ind(tmp_vec);
|
c@278
|
285
|
c@278
|
286 delta[t][j] *= rcfmat[t][j];
|
c@278
|
287 }
|
c@277
|
288
|
c@278
|
289 // normalise current delta column
|
c@278
|
290 double deltasum = 0.;
|
c@278
|
291 for (uint i=0; i<Q; i++)
|
c@278
|
292 {
|
c@278
|
293 deltasum += delta[t][i];
|
c@278
|
294 }
|
c@278
|
295 for (uint i=0; i<Q; i++)
|
c@278
|
296 {
|
c@278
|
297 delta[t][i] /= (deltasum + EPS);
|
c@278
|
298 }
|
c@278
|
299 }
|
c@277
|
300
|
c@278
|
301 i_vec_t bestpath(T);
|
c@278
|
302 d_vec_t tmp_vec(Q);
|
c@278
|
303 for (uint i=0; i<Q; i++)
|
c@278
|
304 {
|
c@278
|
305 tmp_vec[i] = delta[T-1][i];
|
c@278
|
306 }
|
c@277
|
307
|
c@278
|
308 // find starting point - best beat period for "last" frame
|
c@278
|
309 bestpath[T-1] = get_max_ind(tmp_vec);
|
c@278
|
310
|
c@278
|
311 // backtrace through index of maximum values in psi
|
c@278
|
312 for (uint t=T-2; t>0 ;t--)
|
c@278
|
313 {
|
c@278
|
314 bestpath[t] = psi[t+1][bestpath[t+1]];
|
c@278
|
315 }
|
c@277
|
316
|
c@278
|
317 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
|
c@278
|
318 bestpath[0] = psi[1][bestpath[1]];
|
c@277
|
319
|
c@278
|
320 uint lastind = 0;
|
c@278
|
321 for (uint i=0; i<T; i++)
|
c@278
|
322 {
|
c@278
|
323 uint step = 128;
|
c@278
|
324 for (uint j=0; j<step; j++)
|
c@278
|
325 {
|
c@278
|
326 lastind = i*step+j;
|
c@278
|
327 beat_period[lastind] = bestpath[i];
|
c@278
|
328 }
|
c@278
|
329 }
|
c@277
|
330
|
c@278
|
331 //fill in the last values...
|
c@278
|
332 for (uint i=lastind; i<beat_period.size(); i++)
|
c@278
|
333 {
|
c@278
|
334 beat_period[i] = beat_period[lastind];
|
c@278
|
335 }
|
c@277
|
336
|
c@278
|
337 for (uint i = 0; i < beat_period.size(); i++)
|
c@277
|
338 {
|
c@279
|
339 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
|
c@277
|
340 }
|
c@277
|
341 }
|
c@277
|
342
|
c@277
|
343 double
|
c@277
|
344 TempoTrackV2::get_max_val(const d_vec_t &df)
|
c@277
|
345 {
|
c@278
|
346 double maxval = 0.;
|
c@278
|
347 for (uint i=0; i<df.size(); i++)
|
c@277
|
348 {
|
c@278
|
349 if (maxval < df[i])
|
c@278
|
350 {
|
c@278
|
351 maxval = df[i];
|
c@278
|
352 }
|
c@277
|
353 }
|
c@277
|
354
|
c@278
|
355 return maxval;
|
c@277
|
356 }
|
c@277
|
357
|
c@277
|
358 int
|
c@277
|
359 TempoTrackV2::get_max_ind(const d_vec_t &df)
|
c@277
|
360 {
|
c@278
|
361 double maxval = 0.;
|
c@278
|
362 int ind = 0;
|
c@278
|
363 for (uint i=0; i<df.size(); i++)
|
c@277
|
364 {
|
c@278
|
365 if (maxval < df[i])
|
c@278
|
366 {
|
c@278
|
367 maxval = df[i];
|
c@278
|
368 ind = i;
|
c@278
|
369 }
|
c@277
|
370 }
|
c@278
|
371
|
c@278
|
372 return ind;
|
c@277
|
373 }
|
c@277
|
374
|
c@277
|
375 void
|
c@277
|
376 TempoTrackV2::normalise_vec(d_vec_t &df)
|
c@277
|
377 {
|
c@278
|
378 double sum = 0.;
|
c@278
|
379 for (uint i=0; i<df.size(); i++)
|
c@278
|
380 {
|
c@278
|
381 sum += df[i];
|
c@278
|
382 }
|
c@278
|
383
|
c@278
|
384 for (uint i=0; i<df.size(); i++)
|
c@278
|
385 {
|
c@278
|
386 df[i]/= (sum + EPS);
|
c@278
|
387 }
|
c@277
|
388 }
|
c@277
|
389
|
c@277
|
390 void
|
c@277
|
391 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
|
c@277
|
392 d_vec_t &beats)
|
c@277
|
393 {
|
c@278
|
394 d_vec_t cumscore(df.size()); // store cumulative score
|
c@278
|
395 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
|
c@278
|
396 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
|
c@277
|
397
|
c@278
|
398 for (uint i=0; i<df.size(); i++)
|
c@277
|
399 {
|
c@278
|
400 localscore[i] = df[i];
|
c@278
|
401 backlink[i] = -1;
|
c@277
|
402 }
|
c@277
|
403
|
c@278
|
404 double tightness = 4.;
|
c@278
|
405 double alpha = 0.9;
|
c@277
|
406
|
c@278
|
407 // main loop
|
c@278
|
408 for (uint i=0; i<localscore.size(); i++)
|
c@278
|
409 {
|
c@278
|
410 int prange_min = -2*beat_period[i];
|
c@278
|
411 int prange_max = round(-0.5*beat_period[i]);
|
c@277
|
412
|
c@278
|
413 // transition range
|
c@278
|
414 d_vec_t txwt (prange_max - prange_min + 1);
|
c@278
|
415 d_vec_t scorecands (txwt.size());
|
c@277
|
416
|
c@278
|
417 for (uint j=0;j<txwt.size();j++)
|
c@278
|
418 {
|
c@278
|
419 double mu = static_cast<double> (beat_period[i]);
|
c@278
|
420 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
|
c@277
|
421
|
c@278
|
422 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
|
c@278
|
423 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
|
c@277
|
424
|
c@278
|
425 int cscore_ind = i+prange_min+j;
|
c@278
|
426 if (cscore_ind >= 0)
|
c@278
|
427 {
|
c@278
|
428 scorecands[j] = txwt[j] * cumscore[cscore_ind];
|
c@278
|
429 }
|
c@278
|
430 }
|
c@277
|
431
|
c@278
|
432 // find max value and index of maximum value
|
c@278
|
433 double vv = get_max_val(scorecands);
|
c@278
|
434 int xx = get_max_ind(scorecands);
|
c@277
|
435
|
c@278
|
436 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
|
c@278
|
437 backlink[i] = i+prange_min+xx;
|
c@278
|
438 }
|
c@278
|
439
|
c@278
|
440 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
|
c@278
|
441 d_vec_t tmp_vec;
|
c@278
|
442 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
|
c@278
|
443 {
|
c@278
|
444 tmp_vec.push_back(cumscore[i]);
|
c@278
|
445 }
|
c@278
|
446
|
c@278
|
447 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
|
c@278
|
448
|
c@278
|
449 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
|
c@278
|
450 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
|
c@278
|
451 i_vec_t ibeats;
|
c@278
|
452 ibeats.push_back(startpoint);
|
c@278
|
453 while (backlink[ibeats.back()] > 0)
|
c@278
|
454 {
|
c@278
|
455 ibeats.push_back(backlink[ibeats.back()]);
|
c@278
|
456 }
|
c@277
|
457
|
c@278
|
458 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
|
c@278
|
459 for (uint i=0; i<ibeats.size(); i++)
|
c@278
|
460 {
|
c@278
|
461 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
|
c@278
|
462 }
|
c@277
|
463 }
|
c@277
|
464
|
c@277
|
465
|