qm-dsp: dsp/tempotracking/TempoTrackV2.cpp annotate

annotate dsp/tempotracking/TempoTrackV2.cpp @ 278:833ca65b0820

* Update with fixes from Matthew's newer code

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Mon, 09 Feb 2009 16:05:32 +0000
parents	09bceb0aeff6
children	5bec06ecc88a

rev	line source
c@277	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@277	2
c@277	3 /*
c@277	4 QM DSP Library
c@277	5
c@277	6 Centre for Digital Music, Queen Mary, University of London.
c@277	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@277	8 All rights reserved.
c@277	9 */
c@277	10
c@277	11 #include "TempoTrackV2.h"
c@277	12
c@277	13 #include <cmath>
c@277	14 #include <cstdlib>
c@278	15 #include <iostream>
c@277	16
c@277	17
c@277	18 //#define FRAMESIZE 512
c@277	19 //#define BIGFRAMESIZE 1024
c@277	20 #define TWOPI 6.283185307179586232
c@277	21 #define EPS 0.0000008 // just some arbitrary small number
c@277	22
c@277	23 TempoTrackV2::TempoTrackV2() { }
c@277	24 TempoTrackV2::~TempoTrackV2() { }
c@277	25
c@277	26 void
c@277	27 TempoTrackV2::adapt_thresh(d_vec_t &df)
c@277	28 {
c@278	29 d_vec_t smoothed(df.size());
c@278	30
c@278	31 int p_post = 7;
c@278	32 int p_pre = 8;
c@277	33
c@278	34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays
c@277	35
c@278	36 // find threshold for first 't' samples, where a full average cannot be computed yet
c@278	37 for (int i = 0;i <= t;i++)
c@278	38 {
c@278	39 int k = std::min((i+p_pre),static_cast<int>(df.size()));
c@278	40 smoothed[i] = mean_array(df,1,k);
c@278	41 }
c@278	42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post]
c@278	43 for (uint i = t+1;i < df.size()-p_post;i++)
c@278	44 {
c@278	45 smoothed[i] = mean_array(df,i-p_pre,i+p_post);
c@278	46 }
c@278	47 // for last few samples calculate threshold, again, not enough samples to do as above
c@278	48 for (uint i = df.size()-p_post;i < df.size();i++)
c@278	49 {
c@278	50 int k = std::max((static_cast<int> (i) -p_post),1);
c@278	51 smoothed[i] = mean_array(df,k,df.size());
c@278	52 }
c@277	53
c@278	54 // subtract the threshold from the detection function and check that it is not less than 0
c@278	55 for (uint i = 0;i < df.size();i++)
c@278	56 {
c@278	57 df[i] -= smoothed[i];
c@278	58 if (df[i] < 0)
c@278	59 {
c@278	60 df[i] = 0;
c@278	61 }
c@278	62 }
c@277	63 }
c@277	64
c@277	65 double
c@277	66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end)
c@277	67 {
c@278	68 double sum = 0.;
c@278	69
c@278	70 // find sum
c@278	71 for (int i = start;i < end;i++)
c@278	72 {
c@278	73 sum += dfin[i];
c@278	74 }
c@277	75
c@278	76 return static_cast<double> (sum / (end - start + 1) ); // average and return
c@277	77 }
c@277	78
c@277	79 void
c@277	80 TempoTrackV2::filter_df(d_vec_t &df)
c@277	81 {
c@278	82 d_vec_t a(3);
c@278	83 d_vec_t b(3);
c@278	84 d_vec_t lp_df(df.size());
c@277	85
c@278	86 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278	87 a[0] = 1.0000;
c@278	88 a[1] = -0.3695;
c@278	89 a[2] = 0.1958;
c@278	90 b[0] = 0.2066;
c@278	91 b[1] = 0.4131;
c@278	92 b[2] = 0.2066;
c@278	93
c@278	94 double inp1 = 0.;
c@278	95 double inp2 = 0.;
c@278	96 double out1 = 0.;
c@278	97 double out2 = 0.;
c@277	98
c@277	99
c@278	100 // forwards filtering
c@278	101 for (uint i = 0;i < df.size();i++)
c@278	102 {
c@278	103 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	104 inp2 = inp1;
c@278	105 inp1 = df[i];
c@278	106 out2 = out1;
c@278	107 out1 = lp_df[i];
c@278	108 }
c@277	109
c@278	110 // copy forwards filtering to df...
c@278	111 // but, time-reversed, ready for backwards filtering
c@278	112 for (uint i = 0;i < df.size();i++)
c@278	113 {
c@278	114 df[i] = lp_df[df.size()-i-1];
c@278	115 }
c@277	116
c@278	117 for (uint i = 0;i < df.size();i++)
c@278	118 {
c@278	119 lp_df[i] = 0.;
c@278	120 }
c@277	121
c@278	122 inp1 = 0.; inp2 = 0.;
c@278	123 out1 = 0.; out2 = 0.;
c@277	124
c@277	125 // backwards filetering on time-reversed df
c@278	126 for (uint i = 0;i < df.size();i++)
c@278	127 {
c@278	128 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	129 inp2 = inp1;
c@278	130 inp1 = df[i];
c@278	131 out2 = out1;
c@278	132 out1 = lp_df[i];
c@278	133 }
c@277	134
c@277	135 // write the re-reversed (i.e. forward) version back to df
c@278	136 for (uint i = 0;i < df.size();i++)
c@278	137 {
c@278	138 df[i] = lp_df[df.size()-i-1];
c@278	139 }
c@277	140 }
c@277	141
c@277	142
c@277	143 void
c@278	144 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period,
c@278	145 d_vec_t &tempi)
c@277	146 {
c@278	147 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278	148 // calculate the acf,
c@278	149 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278	150 // then call viterbi decoding with weight vector and transition matrix
c@278	151 // and get best path
c@277	152
c@278	153 uint wv_len = 128;
c@278	154 double rayparam = 43.;
c@277	155
c@278	156 // make rayleigh weighting curve
c@278	157 d_vec_t wv(wv_len);
c@278	158 for (uint i=0; i<wv.size(); i++)
c@277	159 {
c@278	160 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.pow(-static_cast<double> (i),2.)) / (2.pow(rayparam,2.)));
c@277	161 }
c@277	162
c@278	163 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@278	164 uint winlen = 512;
c@278	165 uint step = 128;
c@278	166
c@278	167 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278	168 d_mat_t rcfmat;
c@278	169 int col_counter = -1;
c@278	170
c@278	171 // main loop for beat period calculation
c@278	172 for (uint i=0; i<(df.size()-winlen); i+=step)
c@278	173 {
c@278	174 // get dfframe
c@278	175 d_vec_t dfframe(winlen);
c@278	176 for (uint k=0; k<winlen; k++)
c@278	177 {
c@278	178 dfframe[k] = df[i+k];
c@278	179 }
c@278	180 // get rcf vector for current frame
c@278	181 d_vec_t rcf(wv_len);
c@278	182 get_rcf(dfframe,wv,rcf);
c@277	183
c@278	184 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278	185 col_counter++;
c@278	186 for (uint j=0; j<rcf.size(); j++)
c@278	187 {
c@278	188 rcfmat[col_counter].push_back( rcf[j] );
c@278	189 }
c@278	190 }
c@278	191
c@278	192 // now call viterbi decoding function
c@278	193 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277	194 }
c@277	195
c@277	196
c@277	197 void
c@277	198 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277	199 {
c@278	200 // calculate autocorrelation function
c@278	201 // then rcf
c@278	202 // just hard code for now... don't really need separate functions to do this
c@277	203
c@278	204 // make acf
c@277	205
c@278	206 d_vec_t dfframe(dfframe_in);
c@277	207
c@278	208 adapt_thresh(dfframe);
c@277	209
c@278	210 d_vec_t acf(dfframe.size());
c@277	211
c@278	212
c@278	213 for (uint lag=0; lag<dfframe.size(); lag++)
c@278	214 {
c@278	215 double sum = 0.;
c@278	216 double tmp = 0.;
c@277	217
c@278	218 for (uint n=0; n<(dfframe.size()-lag); n++)
c@278	219 {
c@278	220 tmp = dfframe[n] * dfframe[n+lag];
c@278	221 sum += tmp;
c@278	222 }
c@278	223 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278	224 }
c@277	225
c@278	226 // now apply comb filtering
c@278	227 int numelem = 4;
c@278	228
c@278	229 for (uint i = 2;i < rcf.size();i++) // max beat period
c@278	230 {
c@278	231 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278	232 {
c@278	233 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278	234 {
c@278	235 rcf[i-1] += ( acf[(ai+b)-1]wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278	236 }
c@278	237 }
c@278	238 }
c@278	239
c@278	240 // apply adaptive threshold to rcf
c@278	241 adapt_thresh(rcf);
c@278	242
c@278	243 double rcfsum =0.;
c@278	244 for (uint i=0; i<rcf.size(); i++)
c@278	245 {
c@278	246 rcf[i] += EPS ;
c@278	247 rcfsum += rcf[i];
c@278	248 }
c@277	249
c@278	250 // normalise rcf to sum to unity
c@278	251 for (uint i=0; i<rcf.size(); i++)
c@277	252 {
c@278	253 rcf[i] /= (rcfsum + EPS);
c@277	254 }
c@277	255 }
c@277	256
c@277	257 void
c@278	258 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277	259 {
c@278	260 // following Kevin Murphy's Viterbi decoding to get best path of
c@278	261 // beat periods through rfcmat
c@277	262
c@278	263 // make transition matrix
c@278	264 d_mat_t tmat;
c@278	265 for (uint i=0;i<wv.size();i++)
c@278	266 {
c@278	267 tmat.push_back ( d_vec_t() ); // adds a new column
c@278	268 for (uint j=0; j<wv.size(); j++)
c@278	269 {
c@278	270 tmat[i].push_back(0.); // fill with zeros initially
c@278	271 }
c@278	272 }
c@278	273
c@278	274 // variance of Gaussians in transition matrix
c@278	275 // formed of Gaussians on diagonal - implies slow tempo change
c@278	276 double sigma = 8.;
c@278	277 // don't want really short beat periods, or really long ones
c@278	278 for (uint i=20;i <wv.size()-20; i++)
c@278	279 {
c@278	280 for (uint j=20; j<wv.size()-20; j++)
c@278	281 {
c@278	282 double mu = static_cast<double>(i);
c@278	283 tmat[i][j] = exp( (-1.pow((j-mu),2.)) / (2.pow(sigma,2.)) );
c@278	284 }
c@278	285 }
c@277	286
c@278	287 // parameters for Viterbi decoding... this part is taken from
c@278	288 // Murphy's matlab
c@277	289
c@278	290 d_mat_t delta;
c@278	291 i_mat_t psi;
c@278	292 for (uint i=0;i <rcfmat.size(); i++)
c@278	293 {
c@278	294 delta.push_back( d_vec_t());
c@278	295 psi.push_back( i_vec_t());
c@278	296 for (uint j=0; j<rcfmat[i].size(); j++)
c@278	297 {
c@278	298 delta[i].push_back(0.); // fill with zeros initially
c@278	299 psi[i].push_back(0); // fill with zeros initially
c@278	300 }
c@278	301 }
c@277	302
c@277	303
c@278	304 uint T = delta.size();
c@278	305 uint Q = delta[0].size();
c@277	306
c@278	307 // initialize first column of delta
c@277	308 for (uint j=0; j<Q; j++)
c@277	309 {
c@278	310 delta[0][j] = wv[j] * rcfmat[0][j];
c@278	311 psi[0][j] = 0;
c@277	312 }
c@278	313
c@277	314 double deltasum = 0.;
c@277	315 for (uint i=0; i<Q; i++)
c@277	316 {
c@278	317 deltasum += delta[0][i];
c@277	318 }
c@277	319 for (uint i=0; i<Q; i++)
c@277	320 {
c@278	321 delta[0][i] /= (deltasum + EPS);
c@277	322 }
c@277	323
c@277	324
c@278	325 for (uint t=1; t<T; t++)
c@278	326 {
c@278	327 d_vec_t tmp_vec(Q);
c@277	328
c@278	329 for (uint j=0; j<Q; j++)
c@278	330 {
c@278	331 for (uint i=0; i<Q; i++)
c@278	332 {
c@278	333 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278	334 }
c@278	335
c@278	336 delta[t][j] = get_max_val(tmp_vec);
c@277	337
c@278	338 psi[t][j] = get_max_ind(tmp_vec);
c@278	339
c@278	340 delta[t][j] *= rcfmat[t][j];
c@278	341 }
c@277	342
c@278	343 // normalise current delta column
c@278	344 double deltasum = 0.;
c@278	345 for (uint i=0; i<Q; i++)
c@278	346 {
c@278	347 deltasum += delta[t][i];
c@278	348 }
c@278	349 for (uint i=0; i<Q; i++)
c@278	350 {
c@278	351 delta[t][i] /= (deltasum + EPS);
c@278	352 }
c@278	353 }
c@277	354
c@278	355 i_vec_t bestpath(T);
c@278	356 d_vec_t tmp_vec(Q);
c@278	357 for (uint i=0; i<Q; i++)
c@278	358 {
c@278	359 tmp_vec[i] = delta[T-1][i];
c@278	360 }
c@277	361
c@278	362 // find starting point - best beat period for "last" frame
c@278	363 bestpath[T-1] = get_max_ind(tmp_vec);
c@278	364
c@278	365 // backtrace through index of maximum values in psi
c@278	366 for (uint t=T-2; t>0 ;t--)
c@278	367 {
c@278	368 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278	369 }
c@277	370
c@278	371 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278	372 bestpath[0] = psi[1][bestpath[1]];
c@277	373
c@278	374 uint lastind = 0;
c@278	375 for (uint i=0; i<T; i++)
c@278	376 {
c@278	377 uint step = 128;
c@278	378 for (uint j=0; j<step; j++)
c@278	379 {
c@278	380 lastind = i*step+j;
c@278	381 beat_period[lastind] = bestpath[i];
c@278	382 }
c@278	383 }
c@277	384
c@278	385 //fill in the last values...
c@278	386 for (uint i=lastind; i<beat_period.size(); i++)
c@278	387 {
c@278	388 beat_period[i] = beat_period[lastind];
c@278	389 }
c@277	390
c@278	391 for (uint i = 0; i < beat_period.size(); i++)
c@277	392 {
c@278	393 tempi.push_back((60.*44100./512.)/beat_period[i]);
c@277	394 }
c@277	395 }
c@277	396
c@277	397 double
c@277	398 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277	399 {
c@278	400 double maxval = 0.;
c@278	401 for (uint i=0; i<df.size(); i++)
c@277	402 {
c@278	403 if (maxval < df[i])
c@278	404 {
c@278	405 maxval = df[i];
c@278	406 }
c@277	407 }
c@277	408
c@278	409 return maxval;
c@277	410 }
c@277	411
c@277	412 int
c@277	413 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277	414 {
c@278	415 double maxval = 0.;
c@278	416 int ind = 0;
c@278	417 for (uint i=0; i<df.size(); i++)
c@277	418 {
c@278	419 if (maxval < df[i])
c@278	420 {
c@278	421 maxval = df[i];
c@278	422 ind = i;
c@278	423 }
c@277	424 }
c@278	425
c@278	426 return ind;
c@277	427 }
c@277	428
c@277	429 void
c@277	430 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277	431 {
c@278	432 double sum = 0.;
c@278	433 for (uint i=0; i<df.size(); i++)
c@278	434 {
c@278	435 sum += df[i];
c@278	436 }
c@278	437
c@278	438 for (uint i=0; i<df.size(); i++)
c@278	439 {
c@278	440 df[i]/= (sum + EPS);
c@278	441 }
c@277	442 }
c@277	443
c@277	444 void
c@277	445 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
c@277	446 d_vec_t &beats)
c@277	447 {
c@278	448 d_vec_t cumscore(df.size()); // store cumulative score
c@278	449 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278	450 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277	451
c@278	452 for (uint i=0; i<df.size(); i++)
c@277	453 {
c@278	454 localscore[i] = df[i];
c@278	455 backlink[i] = -1;
c@277	456 }
c@277	457
c@278	458 double tightness = 4.;
c@278	459 double alpha = 0.9;
c@277	460
c@278	461 // main loop
c@278	462 for (uint i=0; i<localscore.size(); i++)
c@278	463 {
c@278	464 int prange_min = -2*beat_period[i];
c@278	465 int prange_max = round(-0.5*beat_period[i]);
c@277	466
c@278	467 // transition range
c@278	468 d_vec_t txwt (prange_max - prange_min + 1);
c@278	469 d_vec_t scorecands (txwt.size());
c@277	470
c@278	471 for (uint j=0;j<txwt.size();j++)
c@278	472 {
c@278	473 double mu = static_cast<double> (beat_period[i]);
c@278	474 txwt[j] = exp( -0.5pow(tightness log((round(2*mu)-j)/mu),2));
c@277	475
c@278	476 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278	477 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277	478
c@278	479 int cscore_ind = i+prange_min+j;
c@278	480 if (cscore_ind >= 0)
c@278	481 {
c@278	482 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278	483 }
c@278	484 }
c@277	485
c@278	486 // find max value and index of maximum value
c@278	487 double vv = get_max_val(scorecands);
c@278	488 int xx = get_max_ind(scorecands);
c@277	489
c@278	490 cumscore[i] = alphavv + (1.-alpha)localscore[i];
c@278	491 backlink[i] = i+prange_min+xx;
c@278	492 }
c@278	493
c@278	494 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278	495 d_vec_t tmp_vec;
c@278	496 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278	497 {
c@278	498 tmp_vec.push_back(cumscore[i]);
c@278	499 }
c@278	500
c@278	501 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278	502
c@278	503 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278	504 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278	505 i_vec_t ibeats;
c@278	506 ibeats.push_back(startpoint);
c@278	507 while (backlink[ibeats.back()] > 0)
c@278	508 {
c@278	509 ibeats.push_back(backlink[ibeats.back()]);
c@278	510 }
c@277	511
c@278	512 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@278	513 for (uint i=0; i<ibeats.size(); i++)
c@278	514 {
c@278	515 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278	516 }
c@277	517 }
c@277	518
c@277	519

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/TempoTrackV2.cpp @ 278:833ca65b0820