qm-dsp: dsp/tempotracking/TempoTrackV2.cpp annotate

annotate dsp/tempotracking/TempoTrackV2.cpp @ 277:09bceb0aeff6

* Add Matthew's newer beat tracking implementation

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Tue, 20 Jan 2009 15:01:01 +0000
parents
children	796170a9c8e4

rev	line source
c@277	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@277	2
c@277	3 /*
c@277	4 QM DSP Library
c@277	5
c@277	6 Centre for Digital Music, Queen Mary, University of London.
c@277	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@277	8 All rights reserved.
c@277	9 */
c@277	10
c@277	11 #include "TempoTrackV2.h"
c@277	12
c@277	13 #include <cmath>
c@277	14 #include <cstdlib>
c@277	15
c@277	16
c@277	17 //#define FRAMESIZE 512
c@277	18 //#define BIGFRAMESIZE 1024
c@277	19 #define TWOPI 6.283185307179586232
c@277	20 #define EPS 0.0000008 // just some arbitrary small number
c@277	21
c@277	22 TempoTrackV2::TempoTrackV2() { }
c@277	23 TempoTrackV2::~TempoTrackV2() { }
c@277	24
c@277	25 void
c@277	26 TempoTrackV2::adapt_thresh(d_vec_t &df)
c@277	27 {
c@277	28
c@277	29 d_vec_t smoothed(df.size());
c@277	30
c@277	31 int p_post = 7;
c@277	32 int p_pre = 8;
c@277	33
c@277	34 int t = std::min(static_cast<int>(df.size()),p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays
c@277	35
c@277	36 // find threshold for first 't' samples, where a full average cannot be computed yet
c@277	37 for (int i = 0;i <= t;i++)
c@277	38 {
c@277	39 int k = std::min((i+p_pre),static_cast<int>(df.size()));
c@277	40 smoothed[i] = mean_array(df,1,k);
c@277	41 }
c@277	42 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post]
c@277	43 for (uint i = t+1;i < df.size()-p_post;i++)
c@277	44 {
c@277	45 smoothed[i] = mean_array(df,i-p_pre,i+p_post);
c@277	46 }
c@277	47 // for last few samples calculate threshold, again, not enough samples to do as above
c@277	48 for (uint i = df.size()-p_post;i < df.size();i++)
c@277	49 {
c@277	50 int k = std::max((static_cast<int> (i) -p_post),1);
c@277	51 smoothed[i] = mean_array(df,k,df.size());
c@277	52 }
c@277	53
c@277	54 // subtract the threshold from the detection function and check that it is not less than 0
c@277	55 for (uint i = 0;i < df.size();i++)
c@277	56 {
c@277	57 df[i] -= smoothed[i];
c@277	58 if (df[i] < 0)
c@277	59 {
c@277	60 df[i] = 0;
c@277	61 }
c@277	62 }
c@277	63 }
c@277	64
c@277	65 double
c@277	66 TempoTrackV2::mean_array(const d_vec_t &dfin,int start,int end)
c@277	67 {
c@277	68
c@277	69 double sum = 0.;
c@277	70
c@277	71 // find sum
c@277	72 for (int i = start;i < end+1;i++)
c@277	73 {
c@277	74 sum += dfin[i];
c@277	75 }
c@277	76
c@277	77 return static_cast<double> (sum / (end - start + 1) ); // average and return
c@277	78 }
c@277	79
c@277	80 void
c@277	81 TempoTrackV2::filter_df(d_vec_t &df)
c@277	82 {
c@277	83
c@277	84
c@277	85 d_vec_t a(3);
c@277	86 d_vec_t b(3);
c@277	87 d_vec_t lp_df(df.size());
c@277	88
c@277	89 //equivalent in matlab to [b,a] = butter(2,0.4);
c@277	90 a[0] = 1.0000;
c@277	91 a[1] = -0.3695;
c@277	92 a[2] = 0.1958;
c@277	93 b[0] = 0.2066;
c@277	94 b[1] = 0.4131;
c@277	95 b[2] = 0.2066;
c@277	96
c@277	97 double inp1 = 0.;
c@277	98 double inp2 = 0.;
c@277	99 double out1 = 0.;
c@277	100 double out2 = 0.;
c@277	101
c@277	102
c@277	103 // forwards filtering
c@277	104 for (uint i = 0;i < df.size();i++)
c@277	105 {
c@277	106 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@277	107 inp2 = inp1;
c@277	108 inp1 = df[i];
c@277	109 out2 = out1;
c@277	110 out1 = lp_df[i];
c@277	111 }
c@277	112
c@277	113
c@277	114 // copy forwards filtering to df...
c@277	115 // but, time-reversed, ready for backwards filtering
c@277	116 for (uint i = 0;i < df.size();i++)
c@277	117 {
c@277	118 df[i] = lp_df[df.size()-i];
c@277	119 }
c@277	120
c@277	121 for (uint i = 0;i < df.size();i++)
c@277	122 {
c@277	123 lp_df[i] = 0.;
c@277	124 }
c@277	125
c@277	126 inp1 = 0.; inp2 = 0.;
c@277	127 out1 = 0.; out2 = 0.;
c@277	128
c@277	129 // backwards filetering on time-reversed df
c@277	130 for (uint i = 0;i < df.size();i++)
c@277	131 {
c@277	132 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@277	133 inp2 = inp1;
c@277	134 inp1 = df[i];
c@277	135 out2 = out1;
c@277	136 out1 = lp_df[i];
c@277	137 }
c@277	138
c@277	139 // write the re-reversed (i.e. forward) version back to df
c@277	140 for (uint i = 0;i < df.size();i++)
c@277	141 {
c@277	142 df[i] = lp_df[df.size()-i];
c@277	143 }
c@277	144
c@277	145
c@277	146 }
c@277	147
c@277	148
c@277	149 void
c@277	150 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period)
c@277	151 {
c@277	152
c@277	153 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@277	154 // calculate the acf,
c@277	155 // then the rcf.. and then stick the rcfs as columns of a matrix
c@277	156 // then call viterbi decoding with weight vector and transition matrix
c@277	157 // and get best path
c@277	158
c@277	159 uint wv_len = 128;
c@277	160 double rayparam = 43.;
c@277	161
c@277	162 // make rayleigh weighting curve
c@277	163 d_vec_t wv(wv_len);
c@277	164 for (uint i=0; i<wv.size(); i++)
c@277	165 {
c@277	166 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.pow(-static_cast<double> (i),2.)) / (2.pow(rayparam,2.)));
c@277	167 }
c@277	168
c@277	169
c@277	170 uint winlen = 512;
c@277	171 uint step = 128;
c@277	172
c@277	173 d_mat_t rcfmat;
c@277	174 int col_counter = -1;
c@277	175 // main loop for beat period calculation
c@277	176 for (uint i=0; i<(df.size()-winlen); i+=step)
c@277	177 {
c@277	178 // get dfframe
c@277	179 d_vec_t dfframe(winlen);
c@277	180 for (uint k=0; k<winlen; k++)
c@277	181 {
c@277	182 dfframe[k] = df[i+k];
c@277	183 }
c@277	184 // get rcf vector for current frame
c@277	185 d_vec_t rcf(wv_len);
c@277	186 get_rcf(dfframe,wv,rcf);
c@277	187
c@277	188 rcfmat.push_back( d_vec_t() ); // adds a new column
c@277	189 col_counter++;
c@277	190 for (uint j=0; j<rcf.size(); j++)
c@277	191 {
c@277	192 rcfmat[col_counter].push_back( rcf[j] );
c@277	193 }
c@277	194
c@277	195 }
c@277	196
c@277	197 // now call viterbi decoding function
c@277	198 viterbi_decode(rcfmat,wv,beat_period);
c@277	199
c@277	200
c@277	201
c@277	202 }
c@277	203
c@277	204
c@277	205 void
c@277	206 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277	207 {
c@277	208 // calculate autocorrelation function
c@277	209 // then rcf
c@277	210 // just hard code for now... don't really need separate functions to do this
c@277	211
c@277	212 // make acf
c@277	213
c@277	214 d_vec_t dfframe(dfframe_in);
c@277	215
c@277	216 adapt_thresh(dfframe);
c@277	217
c@277	218 d_vec_t acf(dfframe.size());
c@277	219
c@277	220
c@277	221 for (uint lag=0; lag<dfframe.size(); lag++)
c@277	222 {
c@277	223
c@277	224 double sum = 0.;
c@277	225 double tmp = 0.;
c@277	226
c@277	227 for (uint n=0; n<(dfframe.size()-lag); n++)
c@277	228 {
c@277	229 tmp = dfframe[n] * dfframe[n+lag];
c@277	230 sum += tmp;
c@277	231 }
c@277	232 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@277	233 }
c@277	234
c@277	235
c@277	236 // for (uint i=0; i<dfframe.size(); i++)
c@277	237 // {
c@277	238 // cout << dfframe[i] << " " << acf[i] << endl;
c@277	239 // }
c@277	240
c@277	241 // cout << "~~~~~~~~~~~~~~" << endl;
c@277	242
c@277	243
c@277	244
c@277	245
c@277	246
c@277	247 // now apply comb filtering
c@277	248 int numelem = 4;
c@277	249
c@277	250 // for (uint i = 1;i < 118;i++) // max beat period
c@277	251 for (uint i = 2;i < rcf.size();i++) // max beat period
c@277	252 {
c@277	253 for (int a = 1;a <= numelem;a++) // number of comb elements
c@277	254 {
c@277	255 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@277	256 {
c@277	257 rcf[i-1] += ( acf[(ai+b)-1]wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@277	258 }
c@277	259 }
c@277	260 }
c@277	261
c@277	262 // apply adaptive threshold to rcf
c@277	263 adapt_thresh(rcf);
c@277	264
c@277	265 double rcfsum =0.;
c@277	266 for (uint i=0; i<rcf.size(); i++)
c@277	267 {
c@277	268 // rcf[i] *= acf[i];
c@277	269 rcf[i] += EPS ;
c@277	270 rcfsum += rcf[i];
c@277	271 }
c@277	272
c@277	273 // normalise rcf to sum to unity
c@277	274 for (uint i=0; i<rcf.size(); i++)
c@277	275 {
c@277	276 rcf[i] /= (rcfsum + EPS);
c@277	277 }
c@277	278
c@277	279
c@277	280
c@277	281 }
c@277	282
c@277	283 void
c@277	284 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period)
c@277	285 {
c@277	286
c@277	287 // make transition matrix
c@277	288 d_mat_t tmat;
c@277	289 for (uint i=0;i<wv.size();i++)
c@277	290 {
c@277	291 tmat.push_back ( d_vec_t() ); // adds a new column
c@277	292 for (uint j=0; j<wv.size(); j++)
c@277	293 {
c@277	294 tmat[i].push_back(0.); // fill with zeros initially
c@277	295 }
c@277	296 }
c@277	297
c@277	298 double sigma = 8.;
c@277	299 for (uint i=20;i <wv.size()-20; i++)
c@277	300 {
c@277	301 for (uint j=20; j<wv.size()-20; j++)
c@277	302 {
c@277	303 double mu = static_cast<double>(i);
c@277	304 tmat[i][j] = exp( (-1.pow((j-mu),2.)) / (2.pow(sigma,2.)) );
c@277	305 }
c@277	306 }
c@277	307
c@277	308 d_mat_t delta;
c@277	309 i_mat_t psi;
c@277	310 for (uint i=0;i <rcfmat.size(); i++)
c@277	311 {
c@277	312 delta.push_back( d_vec_t());
c@277	313 psi.push_back( i_vec_t());
c@277	314 for (uint j=0; j<rcfmat[i].size(); j++)
c@277	315 {
c@277	316 delta[i].push_back(0.); // fill with zeros initially
c@277	317 psi[i].push_back(0); // fill with zeros initially
c@277	318 }
c@277	319 }
c@277	320
c@277	321
c@277	322 uint T = delta.size();
c@277	323 uint Q = delta[0].size();
c@277	324
c@277	325 // initialize first column of delta
c@277	326 for (uint j=0; j<Q; j++)
c@277	327 {
c@277	328 delta[0][j] = wv[j] * rcfmat[0][j];
c@277	329 psi[0][j] = 0;
c@277	330 }
c@277	331
c@277	332 double deltasum = 0.;
c@277	333 for (uint i=0; i<Q; i++)
c@277	334 {
c@277	335 deltasum += delta[0][i];
c@277	336 }
c@277	337 for (uint i=0; i<Q; i++)
c@277	338 {
c@277	339 delta[0][i] /= (deltasum + EPS);
c@277	340 }
c@277	341
c@277	342
c@277	343
c@277	344 for (uint t=1; t<T; t++)
c@277	345 {
c@277	346 d_vec_t tmp_vec(Q);
c@277	347
c@277	348 for (uint j=0; j<Q; j++)
c@277	349 {
c@277	350
c@277	351 for (uint i=0; i<Q; i++)
c@277	352 {
c@277	353 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@277	354 }
c@277	355
c@277	356 delta[t][j] = get_max_val(tmp_vec);
c@277	357
c@277	358 psi[t][j] = get_max_ind(tmp_vec);
c@277	359
c@277	360 delta[t][j] *= rcfmat[t][j];
c@277	361
c@277	362
c@277	363 }
c@277	364
c@277	365 double deltasum = 0.;
c@277	366 for (uint i=0; i<Q; i++)
c@277	367 {
c@277	368 deltasum += delta[t][i];
c@277	369 }
c@277	370 for (uint i=0; i<Q; i++)
c@277	371 {
c@277	372 delta[t][i] /= (deltasum + EPS);
c@277	373 }
c@277	374
c@277	375
c@277	376
c@277	377
c@277	378 }
c@277	379
c@277	380
c@277	381 // ofstream tmatfile;
c@277	382 // tmatfile.open("/home/matthewd/Desktop/tmat.txt");
c@277	383
c@277	384 // for (uint i=0;i <delta.size(); i++)
c@277	385 // {
c@277	386 // for (uint j=0; j<delta[i].size(); j++)
c@277	387 // {
c@277	388 // tmatfile << rcfmat[i][j] << endl;
c@277	389 // }
c@277	390 // }
c@277	391
c@277	392 // tmatfile.close();
c@277	393
c@277	394 i_vec_t bestpath(T);
c@277	395 d_vec_t tmp_vec(Q);
c@277	396 for (uint i=0; i<Q; i++)
c@277	397 {
c@277	398 tmp_vec[i] = delta[T-1][i];
c@277	399 }
c@277	400
c@277	401
c@277	402 bestpath[T-1] = get_max_ind(tmp_vec);
c@277	403
c@277	404 for (uint t=T-2; t>0 ;t--)
c@277	405 {
c@277	406 bestpath[t] = psi[t+1][bestpath[t+1]];
c@277	407 }
c@277	408 // very weird hack!
c@277	409 bestpath[0] = psi[1][bestpath[1]];
c@277	410
c@277	411 // for (uint i=0; i<bestpath.size(); i++)
c@277	412 // {
c@277	413 // cout << bestpath[i] << endl;
c@277	414 // }
c@277	415
c@277	416
c@277	417 uint lastind = 0;
c@277	418 for (uint i=0; i<T; i++)
c@277	419 {
c@277	420 uint step = 128;
c@277	421 // cout << bestpath[i] << " " << i << endl;
c@277	422 for (uint j=0; j<step; j++)
c@277	423 {
c@277	424 lastind = i*step+j;
c@277	425 beat_period[lastind] = bestpath[i];
c@277	426
c@277	427 }
c@277	428 }
c@277	429
c@277	430 //fill in the last values...
c@277	431 for (uint i=lastind; i<beat_period.size(); i++)
c@277	432 {
c@277	433 beat_period[i] = beat_period[lastind];
c@277	434 }
c@277	435
c@277	436
c@277	437
c@277	438 }
c@277	439
c@277	440 double
c@277	441 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277	442 {
c@277	443 double maxval = 0.;
c@277	444 for (uint i=0; i<df.size(); i++)
c@277	445 {
c@277	446
c@277	447 if (maxval < df[i])
c@277	448 {
c@277	449 maxval = df[i];
c@277	450 }
c@277	451
c@277	452 }
c@277	453
c@277	454
c@277	455 return maxval;
c@277	456
c@277	457 }
c@277	458
c@277	459 int
c@277	460 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277	461 {
c@277	462
c@277	463 double maxval = 0.;
c@277	464 int ind = 0;
c@277	465 for (uint i=0; i<df.size(); i++)
c@277	466 {
c@277	467 if (maxval < df[i])
c@277	468 {
c@277	469 maxval = df[i];
c@277	470 ind = i;
c@277	471 }
c@277	472
c@277	473 }
c@277	474
c@277	475 return ind;
c@277	476
c@277	477 }
c@277	478
c@277	479 void
c@277	480 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277	481 {
c@277	482 double sum = 0.;
c@277	483 for (uint i=0; i<df.size(); i++)
c@277	484 {
c@277	485 sum += df[i];
c@277	486 }
c@277	487
c@277	488 for (uint i=0; i<df.size(); i++)
c@277	489 {
c@277	490 df[i]/= (sum + EPS);
c@277	491 }
c@277	492
c@277	493
c@277	494 }
c@277	495
c@277	496 void
c@277	497 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
c@277	498 d_vec_t &beats)
c@277	499 {
c@277	500
c@277	501 d_vec_t cumscore(df.size());
c@277	502 i_vec_t backlink(df.size());
c@277	503 d_vec_t localscore(df.size());
c@277	504
c@277	505 // WHEN I FIGURE OUT HOW, I'LL WANT TO DO SOME FILTERING ON THIS...
c@277	506 for (uint i=0; i<df.size(); i++)
c@277	507 {
c@277	508 localscore[i] = df[i];
c@277	509 backlink[i] = -1;
c@277	510 }
c@277	511
c@277	512 double tightness = 4.;
c@277	513 double alpha = 0.9;
c@277	514
c@277	515 // main loop
c@277	516 for (uint i=3*beat_period[0]; i<localscore.size(); i++)
c@277	517 {
c@277	518 int prange_min = -2*beat_period[i];
c@277	519 int prange_max = round(-0.5*beat_period[i]);
c@277	520
c@277	521 d_vec_t txwt (prange_max - prange_min + 1);
c@277	522 d_vec_t scorecands (txwt.size());
c@277	523
c@277	524 for (uint j=0;j<txwt.size();j++)
c@277	525 {
c@277	526 double mu = static_cast<double> (beat_period[i]);
c@277	527 txwt[j] = exp( -0.5pow(tightness log((round(2*mu)-j)/mu),2));
c@277	528
c@277	529 scorecands[j] = txwt[j] * cumscore[i+prange_min+j];
c@277	530 }
c@277	531
c@277	532 double vv = get_max_val(scorecands);
c@277	533 int xx = get_max_ind(scorecands);
c@277	534
c@277	535 cumscore[i] = alphavv + (1.-alpha)localscore[i];
c@277	536
c@277	537 backlink[i] = i+prange_min+xx;
c@277	538
c@277	539 }
c@277	540
c@277	541
c@277	542 d_vec_t tmp_vec;
c@277	543 for (uint i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@277	544 {
c@277	545 tmp_vec.push_back(cumscore[i]);
c@277	546 }
c@277	547
c@277	548 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@277	549
c@277	550 i_vec_t ibeats;
c@277	551 ibeats.push_back(startpoint);
c@277	552 while (backlink[ibeats.back()] > 3*beat_period[0])
c@277	553 {
c@277	554 ibeats.push_back(backlink[ibeats.back()]);
c@277	555 }
c@277	556
c@277	557
c@277	558 for (uint i=0; i<ibeats.size(); i++)
c@277	559 {
c@277	560
c@277	561 beats.push_back( static_cast<double>(ibeats[i]) );
c@277	562
c@277	563 // cout << ibeats[i] << " " << beats[i] <<endl;
c@277	564 }
c@277	565 }
c@277	566
c@277	567

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/TempoTrackV2.cpp @ 277:09bceb0aeff6