Mercurial > hg > qm-dsp
comparison dsp/tempotracking/TempoTrackV2.cpp @ 100:d7619173d43c mepd_new_params
Now includes latest changes from MEPD:
* TempoTrackV2::calculatebeats method - exposes "alpha" and "tightness" parameters
* TempoTrackV2::calculateBeatPeriod - user can specify an inputtempo (in BPM) and "constraintempo" (bool)
author | luisf <luis.figueira@eecs.qmul.ac.uk> |
---|---|
date | Wed, 12 Dec 2012 15:10:38 +0000 |
parents | e5907ae6de17 |
children | 1e433aaa44ad |
comparison
equal
deleted
inserted
replaced
99:505ed5f42800 | 100:d7619173d43c |
---|---|
39 a[1] = -0.3695; | 39 a[1] = -0.3695; |
40 a[2] = 0.1958; | 40 a[2] = 0.1958; |
41 b[0] = 0.2066; | 41 b[0] = 0.2066; |
42 b[1] = 0.4131; | 42 b[1] = 0.4131; |
43 b[2] = 0.2066; | 43 b[2] = 0.2066; |
44 | 44 |
45 double inp1 = 0.; | 45 double inp1 = 0.; |
46 double inp2 = 0.; | 46 double inp2 = 0.; |
47 double out1 = 0.; | 47 double out1 = 0.; |
48 double out2 = 0.; | 48 double out2 = 0.; |
49 | 49 |
65 df[i] = lp_df[df.size()-i-1]; | 65 df[i] = lp_df[df.size()-i-1]; |
66 } | 66 } |
67 | 67 |
68 for (unsigned int i = 0;i < df.size();i++) | 68 for (unsigned int i = 0;i < df.size();i++) |
69 { | 69 { |
70 lp_df[i] = 0.; | 70 lp_df[i] = 0.; |
71 } | 71 } |
72 | 72 |
73 inp1 = 0.; inp2 = 0.; | 73 inp1 = 0.; inp2 = 0.; |
74 out1 = 0.; out2 = 0.; | 74 out1 = 0.; out2 = 0.; |
75 | 75 |
89 df[i] = lp_df[df.size()-i-1]; | 89 df[i] = lp_df[df.size()-i-1]; |
90 } | 90 } |
91 } | 91 } |
92 | 92 |
93 | 93 |
94 // MEPD 28/11/12 | |
95 // This function now allows for a user to specify an inputtempo (in BPM) | |
96 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities | |
97 // with a gaussian which is centered around the input tempo | |
98 // Note, if inputtempo = 120 and constraintempo = false, then functionality is | |
99 // as it was before | |
94 void | 100 void |
95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df, | 101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df, |
96 vector<double> &beat_period, | 102 vector<double> &beat_period, |
97 vector<double> &tempi) | 103 vector<double> &tempi, |
104 double inputtempo, bool constraintempo) | |
98 { | 105 { |
99 // to follow matlab.. split into 512 sample frames with a 128 hop size | 106 // to follow matlab.. split into 512 sample frames with a 128 hop size |
100 // calculate the acf, | 107 // calculate the acf, |
101 // then the rcf.. and then stick the rcfs as columns of a matrix | 108 // then the rcf.. and then stick the rcfs as columns of a matrix |
102 // then call viterbi decoding with weight vector and transition matrix | 109 // then call viterbi decoding with weight vector and transition matrix |
103 // and get best path | 110 // and get best path |
104 | 111 |
105 unsigned int wv_len = 128; | 112 unsigned int wv_len = 128; |
106 double rayparam = 43.; | 113 |
114 // MEPD 28/11/12 | |
115 // the default value of inputtempo in the beat tracking plugin is 120 | |
116 // so if the user specifies a different inputtempo, the rayparam will be updated | |
117 // accordingly. | |
118 // note: 60*44100/512 is a magic number | |
119 // this might (will?) break if a user specifies a different frame rate for the onset detection function | |
120 double rayparam = (60*44100/512)/inputtempo; | |
121 | |
122 // these debug statements can be removed. | |
123 std::cout << "inputtempo" << inputtempo << std::endl; | |
124 std::cout << "rayparam" << rayparam << std::endl; | |
125 std::cout << "constraintempo" << constraintempo << std::endl; | |
107 | 126 |
108 // make rayleigh weighting curve | 127 // make rayleigh weighting curve |
109 d_vec_t wv(wv_len); | 128 d_vec_t wv(wv_len); |
110 for (unsigned int i=0; i<wv.size(); i++) | 129 |
111 { | 130 // check whether or not to use rayleigh weighting (if constraintempo is false) |
112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.))); | 131 // or use gaussian weighting it (constraintempo is true) |
132 if (constraintempo) | |
133 { | |
134 for (unsigned int i=0; i<wv.size(); i++) | |
135 { | |
136 // MEPD 28/11/12 | |
137 // do a gaussian weighting instead of rayleigh | |
138 wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) ); | |
139 } | |
140 } | |
141 else | |
142 { | |
143 for (unsigned int i=0; i<wv.size(); i++) | |
144 { | |
145 // MEPD 28/11/12 | |
146 // standard rayleigh weighting over periodicities | |
147 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.))); | |
148 } | |
113 } | 149 } |
114 | 150 |
115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds) | 151 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds) |
116 unsigned int winlen = 512; | 152 unsigned int winlen = 512; |
117 unsigned int step = 128; | 153 unsigned int step = 128; |
128 for (unsigned int k=0; k<winlen; k++) | 164 for (unsigned int k=0; k<winlen; k++) |
129 { | 165 { |
130 dfframe[k] = df[i+k]; | 166 dfframe[k] = df[i+k]; |
131 } | 167 } |
132 // get rcf vector for current frame | 168 // get rcf vector for current frame |
133 d_vec_t rcf(wv_len); | 169 d_vec_t rcf(wv_len); |
134 get_rcf(dfframe,wv,rcf); | 170 get_rcf(dfframe,wv,rcf); |
135 | 171 |
136 rcfmat.push_back( d_vec_t() ); // adds a new column | 172 rcfmat.push_back( d_vec_t() ); // adds a new column |
137 col_counter++; | 173 col_counter++; |
138 for (unsigned int j=0; j<rcf.size(); j++) | 174 for (unsigned int j=0; j<rcf.size(); j++) |
139 { | 175 { |
140 rcfmat[col_counter].push_back( rcf[j] ); | 176 rcfmat[col_counter].push_back( rcf[j] ); |
141 } | 177 } |
142 } | 178 } |
143 | 179 |
144 // now call viterbi decoding function | 180 // now call viterbi decoding function |
145 viterbi_decode(rcfmat,wv,beat_period,tempi); | 181 viterbi_decode(rcfmat,wv,beat_period,tempi); |
146 } | 182 } |
147 | 183 |
148 | 184 |
159 | 195 |
160 MathUtilities::adaptiveThreshold(dfframe); | 196 MathUtilities::adaptiveThreshold(dfframe); |
161 | 197 |
162 d_vec_t acf(dfframe.size()); | 198 d_vec_t acf(dfframe.size()); |
163 | 199 |
164 | 200 |
165 for (unsigned int lag=0; lag<dfframe.size(); lag++) | 201 for (unsigned int lag=0; lag<dfframe.size(); lag++) |
166 { | 202 { |
167 double sum = 0.; | 203 double sum = 0.; |
168 double tmp = 0.; | 204 double tmp = 0.; |
169 | 205 |
170 for (unsigned int n=0; n<(dfframe.size()-lag); n++) | 206 for (unsigned int n=0; n<(dfframe.size()-lag); n++) |
171 { | 207 { |
172 tmp = dfframe[n] * dfframe[n+lag]; | 208 tmp = dfframe[n] * dfframe[n+lag]; |
173 sum += tmp; | 209 sum += tmp; |
174 } | 210 } |
175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); | 211 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); |
176 } | 212 } |
177 | 213 |
178 // now apply comb filtering | 214 // now apply comb filtering |
179 int numelem = 4; | 215 int numelem = 4; |
180 | 216 |
181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period | 217 for (unsigned int i = 2;i < rcf.size();i++) // max beat period |
182 { | 218 { |
183 for (int a = 1;a <= numelem;a++) // number of comb elements | 219 for (int a = 1;a <= numelem;a++) // number of comb elements |
184 { | 220 { |
185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements | 221 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements |
186 { | 222 { |
187 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row | 223 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row |
188 } | 224 } |
189 } | 225 } |
190 } | 226 } |
191 | 227 |
192 // apply adaptive threshold to rcf | 228 // apply adaptive threshold to rcf |
193 MathUtilities::adaptiveThreshold(rcf); | 229 MathUtilities::adaptiveThreshold(rcf); |
194 | 230 |
195 double rcfsum =0.; | 231 double rcfsum =0.; |
196 for (unsigned int i=0; i<rcf.size(); i++) | 232 for (unsigned int i=0; i<rcf.size(); i++) |
197 { | 233 { |
198 rcf[i] += EPS ; | 234 rcf[i] += EPS ; |
199 rcfsum += rcf[i]; | 235 rcfsum += rcf[i]; |
216 d_mat_t tmat; | 252 d_mat_t tmat; |
217 for (unsigned int i=0;i<wv.size();i++) | 253 for (unsigned int i=0;i<wv.size();i++) |
218 { | 254 { |
219 tmat.push_back ( d_vec_t() ); // adds a new column | 255 tmat.push_back ( d_vec_t() ); // adds a new column |
220 for (unsigned int j=0; j<wv.size(); j++) | 256 for (unsigned int j=0; j<wv.size(); j++) |
221 { | 257 { |
222 tmat[i].push_back(0.); // fill with zeros initially | 258 tmat[i].push_back(0.); // fill with zeros initially |
223 } | 259 } |
224 } | 260 } |
225 | 261 |
226 // variance of Gaussians in transition matrix | 262 // variance of Gaussians in transition matrix |
227 // formed of Gaussians on diagonal - implies slow tempo change | 263 // formed of Gaussians on diagonal - implies slow tempo change |
228 double sigma = 8.; | 264 double sigma = 8.; |
229 // don't want really short beat periods, or really long ones | 265 // don't want really short beat periods, or really long ones |
230 for (unsigned int i=20;i <wv.size()-20; i++) | 266 for (unsigned int i=20;i <wv.size()-20; i++) |
231 { | 267 { |
232 for (unsigned int j=20; j<wv.size()-20; j++) | 268 for (unsigned int j=20; j<wv.size()-20; j++) |
233 { | 269 { |
234 double mu = static_cast<double>(i); | 270 double mu = static_cast<double>(i); |
235 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); | 271 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); |
236 } | 272 } |
237 } | 273 } |
238 | 274 |
244 for (unsigned int i=0;i <rcfmat.size(); i++) | 280 for (unsigned int i=0;i <rcfmat.size(); i++) |
245 { | 281 { |
246 delta.push_back( d_vec_t()); | 282 delta.push_back( d_vec_t()); |
247 psi.push_back( i_vec_t()); | 283 psi.push_back( i_vec_t()); |
248 for (unsigned int j=0; j<rcfmat[i].size(); j++) | 284 for (unsigned int j=0; j<rcfmat[i].size(); j++) |
249 { | 285 { |
250 delta[i].push_back(0.); // fill with zeros initially | 286 delta[i].push_back(0.); // fill with zeros initially |
251 psi[i].push_back(0); // fill with zeros initially | 287 psi[i].push_back(0); // fill with zeros initially |
252 } | 288 } |
253 } | 289 } |
254 | 290 |
263 for (unsigned int j=0; j<Q; j++) | 299 for (unsigned int j=0; j<Q; j++) |
264 { | 300 { |
265 delta[0][j] = wv[j] * rcfmat[0][j]; | 301 delta[0][j] = wv[j] * rcfmat[0][j]; |
266 psi[0][j] = 0; | 302 psi[0][j] = 0; |
267 } | 303 } |
268 | 304 |
269 double deltasum = 0.; | 305 double deltasum = 0.; |
270 for (unsigned int i=0; i<Q; i++) | 306 for (unsigned int i=0; i<Q; i++) |
271 { | 307 { |
272 deltasum += delta[0][i]; | 308 deltasum += delta[0][i]; |
273 } | 309 } |
274 for (unsigned int i=0; i<Q; i++) | 310 for (unsigned int i=0; i<Q; i++) |
275 { | 311 { |
276 delta[0][i] /= (deltasum + EPS); | 312 delta[0][i] /= (deltasum + EPS); |
277 } | 313 } |
278 | 314 |
279 | 315 |
280 for (unsigned int t=1; t<T; t++) | 316 for (unsigned int t=1; t<T; t++) |
281 { | 317 { |
282 d_vec_t tmp_vec(Q); | 318 d_vec_t tmp_vec(Q); |
284 for (unsigned int j=0; j<Q; j++) | 320 for (unsigned int j=0; j<Q; j++) |
285 { | 321 { |
286 for (unsigned int i=0; i<Q; i++) | 322 for (unsigned int i=0; i<Q; i++) |
287 { | 323 { |
288 tmp_vec[i] = delta[t-1][i] * tmat[j][i]; | 324 tmp_vec[i] = delta[t-1][i] * tmat[j][i]; |
289 } | 325 } |
290 | 326 |
291 delta[t][j] = get_max_val(tmp_vec); | 327 delta[t][j] = get_max_val(tmp_vec); |
292 | 328 |
293 psi[t][j] = get_max_ind(tmp_vec); | 329 psi[t][j] = get_max_ind(tmp_vec); |
294 | 330 |
295 delta[t][j] *= rcfmat[t][j]; | 331 delta[t][j] *= rcfmat[t][j]; |
296 } | 332 } |
297 | 333 |
298 // normalise current delta column | 334 // normalise current delta column |
299 double deltasum = 0.; | 335 double deltasum = 0.; |
300 for (unsigned int i=0; i<Q; i++) | 336 for (unsigned int i=0; i<Q; i++) |
301 { | 337 { |
302 deltasum += delta[t][i]; | 338 deltasum += delta[t][i]; |
303 } | 339 } |
304 for (unsigned int i=0; i<Q; i++) | 340 for (unsigned int i=0; i<Q; i++) |
305 { | 341 { |
306 delta[t][i] /= (deltasum + EPS); | 342 delta[t][i] /= (deltasum + EPS); |
307 } | 343 } |
308 } | 344 } |
309 | 345 |
310 i_vec_t bestpath(T); | 346 i_vec_t bestpath(T); |
311 d_vec_t tmp_vec(Q); | 347 d_vec_t tmp_vec(Q); |
312 for (unsigned int i=0; i<Q; i++) | 348 for (unsigned int i=0; i<Q; i++) |
313 { | 349 { |
314 tmp_vec[i] = delta[T-1][i]; | 350 tmp_vec[i] = delta[T-1][i]; |
315 } | 351 } |
316 | 352 |
317 // find starting point - best beat period for "last" frame | 353 // find starting point - best beat period for "last" frame |
318 bestpath[T-1] = get_max_ind(tmp_vec); | 354 bestpath[T-1] = get_max_ind(tmp_vec); |
319 | 355 |
320 // backtrace through index of maximum values in psi | 356 // backtrace through index of maximum values in psi |
321 for (unsigned int t=T-2; t>0 ;t--) | 357 for (unsigned int t=T-2; t>0 ;t--) |
322 { | 358 { |
323 bestpath[t] = psi[t+1][bestpath[t+1]]; | 359 bestpath[t] = psi[t+1][bestpath[t+1]]; |
324 } | 360 } |
326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0 | 362 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0 |
327 bestpath[0] = psi[1][bestpath[1]]; | 363 bestpath[0] = psi[1][bestpath[1]]; |
328 | 364 |
329 unsigned int lastind = 0; | 365 unsigned int lastind = 0; |
330 for (unsigned int i=0; i<T; i++) | 366 for (unsigned int i=0; i<T; i++) |
331 { | 367 { |
332 unsigned int step = 128; | 368 unsigned int step = 128; |
333 for (unsigned int j=0; j<step; j++) | 369 for (unsigned int j=0; j<step; j++) |
334 { | 370 { |
335 lastind = i*step+j; | 371 lastind = i*step+j; |
336 beat_period[lastind] = bestpath[i]; | 372 beat_period[lastind] = bestpath[i]; |
359 if (maxval < df[i]) | 395 if (maxval < df[i]) |
360 { | 396 { |
361 maxval = df[i]; | 397 maxval = df[i]; |
362 } | 398 } |
363 } | 399 } |
364 | 400 |
365 return maxval; | 401 return maxval; |
366 } | 402 } |
367 | 403 |
368 int | 404 int |
369 TempoTrackV2::get_max_ind(const d_vec_t &df) | 405 TempoTrackV2::get_max_ind(const d_vec_t &df) |
376 { | 412 { |
377 maxval = df[i]; | 413 maxval = df[i]; |
378 ind = i; | 414 ind = i; |
379 } | 415 } |
380 } | 416 } |
381 | 417 |
382 return ind; | 418 return ind; |
383 } | 419 } |
384 | 420 |
385 void | 421 void |
386 TempoTrackV2::normalise_vec(d_vec_t &df) | 422 TempoTrackV2::normalise_vec(d_vec_t &df) |
388 double sum = 0.; | 424 double sum = 0.; |
389 for (unsigned int i=0; i<df.size(); i++) | 425 for (unsigned int i=0; i<df.size(); i++) |
390 { | 426 { |
391 sum += df[i]; | 427 sum += df[i]; |
392 } | 428 } |
393 | 429 |
394 for (unsigned int i=0; i<df.size(); i++) | 430 for (unsigned int i=0; i<df.size(); i++) |
395 { | 431 { |
396 df[i]/= (sum + EPS); | 432 df[i]/= (sum + EPS); |
397 } | 433 } |
398 } | 434 } |
399 | 435 |
436 // MEPD 28/11/12 | |
437 // this function has been updated to allow the "alpha" and "tightness" parameters | |
438 // of the dynamic program to be set by the user | |
439 // the default value of alpha = 0.9 and tightness = 4 | |
400 void | 440 void |
401 TempoTrackV2::calculateBeats(const vector<double> &df, | 441 TempoTrackV2::calculateBeats(const vector<double> &df, |
402 const vector<double> &beat_period, | 442 const vector<double> &beat_period, |
403 vector<double> &beats) | 443 vector<double> &beats, double alpha, double tightness) |
404 { | 444 { |
405 if (df.empty() || beat_period.empty()) return; | 445 if (df.empty() || beat_period.empty()) return; |
406 | 446 |
407 d_vec_t cumscore(df.size()); // store cumulative score | 447 d_vec_t cumscore(df.size()); // store cumulative score |
408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant) | 448 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant) |
412 { | 452 { |
413 localscore[i] = df[i]; | 453 localscore[i] = df[i]; |
414 backlink[i] = -1; | 454 backlink[i] = -1; |
415 } | 455 } |
416 | 456 |
417 double tightness = 4.; | 457 //double tightness = 4.; |
418 double alpha = 0.9; | 458 //double alpha = 0.9; |
459 // MEPD 28/11/12 | |
460 // debug statements that can be removed. | |
461 std::cout << "alpha" << alpha << std::endl; | |
462 std::cout << "tightness" << tightness << std::endl; | |
419 | 463 |
420 // main loop | 464 // main loop |
421 for (unsigned int i=0; i<localscore.size(); i++) | 465 for (unsigned int i=0; i<localscore.size(); i++) |
422 { | 466 { |
423 int prange_min = -2*beat_period[i]; | 467 int prange_min = -2*beat_period[i]; |
434 | 478 |
435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J | 479 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J |
436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE()); | 480 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE()); |
437 | 481 |
438 int cscore_ind = i+prange_min+j; | 482 int cscore_ind = i+prange_min+j; |
439 if (cscore_ind >= 0) | 483 if (cscore_ind >= 0) |
440 { | 484 { |
441 scorecands[j] = txwt[j] * cumscore[cscore_ind]; | 485 scorecands[j] = txwt[j] * cumscore[cscore_ind]; |
442 } | 486 } |
443 } | 487 } |
444 | 488 |
455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR | 499 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR |
456 d_vec_t tmp_vec; | 500 d_vec_t tmp_vec; |
457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) | 501 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) |
458 { | 502 { |
459 tmp_vec.push_back(cumscore[i]); | 503 tmp_vec.push_back(cumscore[i]); |
460 } | 504 } |
461 | 505 |
462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ; | 506 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ; |
463 | 507 |
464 // can happen if no results obtained earlier (e.g. input too short) | 508 // can happen if no results obtained earlier (e.g. input too short) |
465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1; | 509 if (startpoint >= backlink.size()) startpoint = backlink.size()-1; |
474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl; | 518 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl; |
475 int b = ibeats.back(); | 519 int b = ibeats.back(); |
476 if (backlink[b] == b) break; // shouldn't happen... haha | 520 if (backlink[b] == b) break; // shouldn't happen... haha |
477 ibeats.push_back(backlink[b]); | 521 ibeats.push_back(backlink[b]); |
478 } | 522 } |
479 | 523 |
480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS | 524 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS |
481 for (unsigned int i=0; i<ibeats.size(); i++) | 525 for (unsigned int i=0; i<ibeats.size(); i++) |
482 { | 526 { |
483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) ); | 527 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) ); |
484 } | 528 } |
485 } | 529 } |
486 | 530 |
487 | 531 |