comparison dsp/tempotracking/TempoTrackV2.cpp @ 100:d7619173d43c mepd_new_params

Now includes latest changes from MEPD: * TempoTrackV2::calculatebeats method - exposes "alpha" and "tightness" parameters * TempoTrackV2::calculateBeatPeriod - user can specify an inputtempo (in BPM) and "constraintempo" (bool)
author luisf <luis.figueira@eecs.qmul.ac.uk>
date Wed, 12 Dec 2012 15:10:38 +0000
parents e5907ae6de17
children 1e433aaa44ad
comparison
equal deleted inserted replaced
99:505ed5f42800 100:d7619173d43c
39 a[1] = -0.3695; 39 a[1] = -0.3695;
40 a[2] = 0.1958; 40 a[2] = 0.1958;
41 b[0] = 0.2066; 41 b[0] = 0.2066;
42 b[1] = 0.4131; 42 b[1] = 0.4131;
43 b[2] = 0.2066; 43 b[2] = 0.2066;
44 44
45 double inp1 = 0.; 45 double inp1 = 0.;
46 double inp2 = 0.; 46 double inp2 = 0.;
47 double out1 = 0.; 47 double out1 = 0.;
48 double out2 = 0.; 48 double out2 = 0.;
49 49
65 df[i] = lp_df[df.size()-i-1]; 65 df[i] = lp_df[df.size()-i-1];
66 } 66 }
67 67
68 for (unsigned int i = 0;i < df.size();i++) 68 for (unsigned int i = 0;i < df.size();i++)
69 { 69 {
70 lp_df[i] = 0.; 70 lp_df[i] = 0.;
71 } 71 }
72 72
73 inp1 = 0.; inp2 = 0.; 73 inp1 = 0.; inp2 = 0.;
74 out1 = 0.; out2 = 0.; 74 out1 = 0.; out2 = 0.;
75 75
89 df[i] = lp_df[df.size()-i-1]; 89 df[i] = lp_df[df.size()-i-1];
90 } 90 }
91 } 91 }
92 92
93 93
94 // MEPD 28/11/12
95 // This function now allows for a user to specify an inputtempo (in BPM)
96 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
97 // with a gaussian which is centered around the input tempo
98 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
99 // as it was before
94 void 100 void
95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df, 101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
96 vector<double> &beat_period, 102 vector<double> &beat_period,
97 vector<double> &tempi) 103 vector<double> &tempi,
104 double inputtempo, bool constraintempo)
98 { 105 {
99 // to follow matlab.. split into 512 sample frames with a 128 hop size 106 // to follow matlab.. split into 512 sample frames with a 128 hop size
100 // calculate the acf, 107 // calculate the acf,
101 // then the rcf.. and then stick the rcfs as columns of a matrix 108 // then the rcf.. and then stick the rcfs as columns of a matrix
102 // then call viterbi decoding with weight vector and transition matrix 109 // then call viterbi decoding with weight vector and transition matrix
103 // and get best path 110 // and get best path
104 111
105 unsigned int wv_len = 128; 112 unsigned int wv_len = 128;
106 double rayparam = 43.; 113
114 // MEPD 28/11/12
115 // the default value of inputtempo in the beat tracking plugin is 120
116 // so if the user specifies a different inputtempo, the rayparam will be updated
117 // accordingly.
118 // note: 60*44100/512 is a magic number
119 // this might (will?) break if a user specifies a different frame rate for the onset detection function
120 double rayparam = (60*44100/512)/inputtempo;
121
122 // these debug statements can be removed.
123 std::cout << "inputtempo" << inputtempo << std::endl;
124 std::cout << "rayparam" << rayparam << std::endl;
125 std::cout << "constraintempo" << constraintempo << std::endl;
107 126
108 // make rayleigh weighting curve 127 // make rayleigh weighting curve
109 d_vec_t wv(wv_len); 128 d_vec_t wv(wv_len);
110 for (unsigned int i=0; i<wv.size(); i++) 129
111 { 130 // check whether or not to use rayleigh weighting (if constraintempo is false)
112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.))); 131 // or use gaussian weighting it (constraintempo is true)
132 if (constraintempo)
133 {
134 for (unsigned int i=0; i<wv.size(); i++)
135 {
136 // MEPD 28/11/12
137 // do a gaussian weighting instead of rayleigh
138 wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
139 }
140 }
141 else
142 {
143 for (unsigned int i=0; i<wv.size(); i++)
144 {
145 // MEPD 28/11/12
146 // standard rayleigh weighting over periodicities
147 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
148 }
113 } 149 }
114 150
115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds) 151 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
116 unsigned int winlen = 512; 152 unsigned int winlen = 512;
117 unsigned int step = 128; 153 unsigned int step = 128;
128 for (unsigned int k=0; k<winlen; k++) 164 for (unsigned int k=0; k<winlen; k++)
129 { 165 {
130 dfframe[k] = df[i+k]; 166 dfframe[k] = df[i+k];
131 } 167 }
132 // get rcf vector for current frame 168 // get rcf vector for current frame
133 d_vec_t rcf(wv_len); 169 d_vec_t rcf(wv_len);
134 get_rcf(dfframe,wv,rcf); 170 get_rcf(dfframe,wv,rcf);
135 171
136 rcfmat.push_back( d_vec_t() ); // adds a new column 172 rcfmat.push_back( d_vec_t() ); // adds a new column
137 col_counter++; 173 col_counter++;
138 for (unsigned int j=0; j<rcf.size(); j++) 174 for (unsigned int j=0; j<rcf.size(); j++)
139 { 175 {
140 rcfmat[col_counter].push_back( rcf[j] ); 176 rcfmat[col_counter].push_back( rcf[j] );
141 } 177 }
142 } 178 }
143 179
144 // now call viterbi decoding function 180 // now call viterbi decoding function
145 viterbi_decode(rcfmat,wv,beat_period,tempi); 181 viterbi_decode(rcfmat,wv,beat_period,tempi);
146 } 182 }
147 183
148 184
159 195
160 MathUtilities::adaptiveThreshold(dfframe); 196 MathUtilities::adaptiveThreshold(dfframe);
161 197
162 d_vec_t acf(dfframe.size()); 198 d_vec_t acf(dfframe.size());
163 199
164 200
165 for (unsigned int lag=0; lag<dfframe.size(); lag++) 201 for (unsigned int lag=0; lag<dfframe.size(); lag++)
166 { 202 {
167 double sum = 0.; 203 double sum = 0.;
168 double tmp = 0.; 204 double tmp = 0.;
169 205
170 for (unsigned int n=0; n<(dfframe.size()-lag); n++) 206 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
171 { 207 {
172 tmp = dfframe[n] * dfframe[n+lag]; 208 tmp = dfframe[n] * dfframe[n+lag];
173 sum += tmp; 209 sum += tmp;
174 } 210 }
175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); 211 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
176 } 212 }
177 213
178 // now apply comb filtering 214 // now apply comb filtering
179 int numelem = 4; 215 int numelem = 4;
180 216
181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period 217 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
182 { 218 {
183 for (int a = 1;a <= numelem;a++) // number of comb elements 219 for (int a = 1;a <= numelem;a++) // number of comb elements
184 { 220 {
185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements 221 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
186 { 222 {
187 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row 223 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
188 } 224 }
189 } 225 }
190 } 226 }
191 227
192 // apply adaptive threshold to rcf 228 // apply adaptive threshold to rcf
193 MathUtilities::adaptiveThreshold(rcf); 229 MathUtilities::adaptiveThreshold(rcf);
194 230
195 double rcfsum =0.; 231 double rcfsum =0.;
196 for (unsigned int i=0; i<rcf.size(); i++) 232 for (unsigned int i=0; i<rcf.size(); i++)
197 { 233 {
198 rcf[i] += EPS ; 234 rcf[i] += EPS ;
199 rcfsum += rcf[i]; 235 rcfsum += rcf[i];
216 d_mat_t tmat; 252 d_mat_t tmat;
217 for (unsigned int i=0;i<wv.size();i++) 253 for (unsigned int i=0;i<wv.size();i++)
218 { 254 {
219 tmat.push_back ( d_vec_t() ); // adds a new column 255 tmat.push_back ( d_vec_t() ); // adds a new column
220 for (unsigned int j=0; j<wv.size(); j++) 256 for (unsigned int j=0; j<wv.size(); j++)
221 { 257 {
222 tmat[i].push_back(0.); // fill with zeros initially 258 tmat[i].push_back(0.); // fill with zeros initially
223 } 259 }
224 } 260 }
225 261
226 // variance of Gaussians in transition matrix 262 // variance of Gaussians in transition matrix
227 // formed of Gaussians on diagonal - implies slow tempo change 263 // formed of Gaussians on diagonal - implies slow tempo change
228 double sigma = 8.; 264 double sigma = 8.;
229 // don't want really short beat periods, or really long ones 265 // don't want really short beat periods, or really long ones
230 for (unsigned int i=20;i <wv.size()-20; i++) 266 for (unsigned int i=20;i <wv.size()-20; i++)
231 { 267 {
232 for (unsigned int j=20; j<wv.size()-20; j++) 268 for (unsigned int j=20; j<wv.size()-20; j++)
233 { 269 {
234 double mu = static_cast<double>(i); 270 double mu = static_cast<double>(i);
235 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); 271 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
236 } 272 }
237 } 273 }
238 274
244 for (unsigned int i=0;i <rcfmat.size(); i++) 280 for (unsigned int i=0;i <rcfmat.size(); i++)
245 { 281 {
246 delta.push_back( d_vec_t()); 282 delta.push_back( d_vec_t());
247 psi.push_back( i_vec_t()); 283 psi.push_back( i_vec_t());
248 for (unsigned int j=0; j<rcfmat[i].size(); j++) 284 for (unsigned int j=0; j<rcfmat[i].size(); j++)
249 { 285 {
250 delta[i].push_back(0.); // fill with zeros initially 286 delta[i].push_back(0.); // fill with zeros initially
251 psi[i].push_back(0); // fill with zeros initially 287 psi[i].push_back(0); // fill with zeros initially
252 } 288 }
253 } 289 }
254 290
263 for (unsigned int j=0; j<Q; j++) 299 for (unsigned int j=0; j<Q; j++)
264 { 300 {
265 delta[0][j] = wv[j] * rcfmat[0][j]; 301 delta[0][j] = wv[j] * rcfmat[0][j];
266 psi[0][j] = 0; 302 psi[0][j] = 0;
267 } 303 }
268 304
269 double deltasum = 0.; 305 double deltasum = 0.;
270 for (unsigned int i=0; i<Q; i++) 306 for (unsigned int i=0; i<Q; i++)
271 { 307 {
272 deltasum += delta[0][i]; 308 deltasum += delta[0][i];
273 } 309 }
274 for (unsigned int i=0; i<Q; i++) 310 for (unsigned int i=0; i<Q; i++)
275 { 311 {
276 delta[0][i] /= (deltasum + EPS); 312 delta[0][i] /= (deltasum + EPS);
277 } 313 }
278 314
279 315
280 for (unsigned int t=1; t<T; t++) 316 for (unsigned int t=1; t<T; t++)
281 { 317 {
282 d_vec_t tmp_vec(Q); 318 d_vec_t tmp_vec(Q);
284 for (unsigned int j=0; j<Q; j++) 320 for (unsigned int j=0; j<Q; j++)
285 { 321 {
286 for (unsigned int i=0; i<Q; i++) 322 for (unsigned int i=0; i<Q; i++)
287 { 323 {
288 tmp_vec[i] = delta[t-1][i] * tmat[j][i]; 324 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
289 } 325 }
290 326
291 delta[t][j] = get_max_val(tmp_vec); 327 delta[t][j] = get_max_val(tmp_vec);
292 328
293 psi[t][j] = get_max_ind(tmp_vec); 329 psi[t][j] = get_max_ind(tmp_vec);
294 330
295 delta[t][j] *= rcfmat[t][j]; 331 delta[t][j] *= rcfmat[t][j];
296 } 332 }
297 333
298 // normalise current delta column 334 // normalise current delta column
299 double deltasum = 0.; 335 double deltasum = 0.;
300 for (unsigned int i=0; i<Q; i++) 336 for (unsigned int i=0; i<Q; i++)
301 { 337 {
302 deltasum += delta[t][i]; 338 deltasum += delta[t][i];
303 } 339 }
304 for (unsigned int i=0; i<Q; i++) 340 for (unsigned int i=0; i<Q; i++)
305 { 341 {
306 delta[t][i] /= (deltasum + EPS); 342 delta[t][i] /= (deltasum + EPS);
307 } 343 }
308 } 344 }
309 345
310 i_vec_t bestpath(T); 346 i_vec_t bestpath(T);
311 d_vec_t tmp_vec(Q); 347 d_vec_t tmp_vec(Q);
312 for (unsigned int i=0; i<Q; i++) 348 for (unsigned int i=0; i<Q; i++)
313 { 349 {
314 tmp_vec[i] = delta[T-1][i]; 350 tmp_vec[i] = delta[T-1][i];
315 } 351 }
316 352
317 // find starting point - best beat period for "last" frame 353 // find starting point - best beat period for "last" frame
318 bestpath[T-1] = get_max_ind(tmp_vec); 354 bestpath[T-1] = get_max_ind(tmp_vec);
319 355
320 // backtrace through index of maximum values in psi 356 // backtrace through index of maximum values in psi
321 for (unsigned int t=T-2; t>0 ;t--) 357 for (unsigned int t=T-2; t>0 ;t--)
322 { 358 {
323 bestpath[t] = psi[t+1][bestpath[t+1]]; 359 bestpath[t] = psi[t+1][bestpath[t+1]];
324 } 360 }
326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0 362 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
327 bestpath[0] = psi[1][bestpath[1]]; 363 bestpath[0] = psi[1][bestpath[1]];
328 364
329 unsigned int lastind = 0; 365 unsigned int lastind = 0;
330 for (unsigned int i=0; i<T; i++) 366 for (unsigned int i=0; i<T; i++)
331 { 367 {
332 unsigned int step = 128; 368 unsigned int step = 128;
333 for (unsigned int j=0; j<step; j++) 369 for (unsigned int j=0; j<step; j++)
334 { 370 {
335 lastind = i*step+j; 371 lastind = i*step+j;
336 beat_period[lastind] = bestpath[i]; 372 beat_period[lastind] = bestpath[i];
359 if (maxval < df[i]) 395 if (maxval < df[i])
360 { 396 {
361 maxval = df[i]; 397 maxval = df[i];
362 } 398 }
363 } 399 }
364 400
365 return maxval; 401 return maxval;
366 } 402 }
367 403
368 int 404 int
369 TempoTrackV2::get_max_ind(const d_vec_t &df) 405 TempoTrackV2::get_max_ind(const d_vec_t &df)
376 { 412 {
377 maxval = df[i]; 413 maxval = df[i];
378 ind = i; 414 ind = i;
379 } 415 }
380 } 416 }
381 417
382 return ind; 418 return ind;
383 } 419 }
384 420
385 void 421 void
386 TempoTrackV2::normalise_vec(d_vec_t &df) 422 TempoTrackV2::normalise_vec(d_vec_t &df)
388 double sum = 0.; 424 double sum = 0.;
389 for (unsigned int i=0; i<df.size(); i++) 425 for (unsigned int i=0; i<df.size(); i++)
390 { 426 {
391 sum += df[i]; 427 sum += df[i];
392 } 428 }
393 429
394 for (unsigned int i=0; i<df.size(); i++) 430 for (unsigned int i=0; i<df.size(); i++)
395 { 431 {
396 df[i]/= (sum + EPS); 432 df[i]/= (sum + EPS);
397 } 433 }
398 } 434 }
399 435
436 // MEPD 28/11/12
437 // this function has been updated to allow the "alpha" and "tightness" parameters
438 // of the dynamic program to be set by the user
439 // the default value of alpha = 0.9 and tightness = 4
400 void 440 void
401 TempoTrackV2::calculateBeats(const vector<double> &df, 441 TempoTrackV2::calculateBeats(const vector<double> &df,
402 const vector<double> &beat_period, 442 const vector<double> &beat_period,
403 vector<double> &beats) 443 vector<double> &beats, double alpha, double tightness)
404 { 444 {
405 if (df.empty() || beat_period.empty()) return; 445 if (df.empty() || beat_period.empty()) return;
406 446
407 d_vec_t cumscore(df.size()); // store cumulative score 447 d_vec_t cumscore(df.size()); // store cumulative score
408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant) 448 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
412 { 452 {
413 localscore[i] = df[i]; 453 localscore[i] = df[i];
414 backlink[i] = -1; 454 backlink[i] = -1;
415 } 455 }
416 456
417 double tightness = 4.; 457 //double tightness = 4.;
418 double alpha = 0.9; 458 //double alpha = 0.9;
459 // MEPD 28/11/12
460 // debug statements that can be removed.
461 std::cout << "alpha" << alpha << std::endl;
462 std::cout << "tightness" << tightness << std::endl;
419 463
420 // main loop 464 // main loop
421 for (unsigned int i=0; i<localscore.size(); i++) 465 for (unsigned int i=0; i<localscore.size(); i++)
422 { 466 {
423 int prange_min = -2*beat_period[i]; 467 int prange_min = -2*beat_period[i];
434 478
435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J 479 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE()); 480 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
437 481
438 int cscore_ind = i+prange_min+j; 482 int cscore_ind = i+prange_min+j;
439 if (cscore_ind >= 0) 483 if (cscore_ind >= 0)
440 { 484 {
441 scorecands[j] = txwt[j] * cumscore[cscore_ind]; 485 scorecands[j] = txwt[j] * cumscore[cscore_ind];
442 } 486 }
443 } 487 }
444 488
455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR 499 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
456 d_vec_t tmp_vec; 500 d_vec_t tmp_vec;
457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) 501 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
458 { 502 {
459 tmp_vec.push_back(cumscore[i]); 503 tmp_vec.push_back(cumscore[i]);
460 } 504 }
461 505
462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ; 506 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
463 507
464 // can happen if no results obtained earlier (e.g. input too short) 508 // can happen if no results obtained earlier (e.g. input too short)
465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1; 509 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl; 518 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
475 int b = ibeats.back(); 519 int b = ibeats.back();
476 if (backlink[b] == b) break; // shouldn't happen... haha 520 if (backlink[b] == b) break; // shouldn't happen... haha
477 ibeats.push_back(backlink[b]); 521 ibeats.push_back(backlink[b]);
478 } 522 }
479 523
480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS 524 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
481 for (unsigned int i=0; i<ibeats.size(); i++) 525 for (unsigned int i=0; i<ibeats.size(); i++)
482 { 526 {
483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) ); 527 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
484 } 528 }
485 } 529 }
486 530
487 531