qm-dsp: dsp/tempotracking/TempoTrackV2.cpp annotate

annotate dsp/tempotracking/TempoTrackV2.cpp @ 309:d5014ab8b0e5

* Add GPL and README; some tidying

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Mon, 13 Dec 2010 14:55:28 +0000
parents	702ff8c08137
children	d7619173d43c 37449f085a4c

rev	line source
c@277	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@277	2
c@277	3 /*
c@277	4 QM DSP Library
c@277	5
c@277	6 Centre for Digital Music, Queen Mary, University of London.
c@277	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@309	8
c@309	9 This program is free software; you can redistribute it and/or
c@309	10 modify it under the terms of the GNU General Public License as
c@309	11 published by the Free Software Foundation; either version 2 of the
c@309	12 License, or (at your option) any later version. See the file
c@309	13 COPYING included with this distribution for more information.
c@277	14 */
c@277	15
c@277	16 #include "TempoTrackV2.h"
c@277	17
c@277	18 #include <cmath>
c@277	19 #include <cstdlib>
c@278	20 #include <iostream>
c@277	21
c@279	22 #include "maths/MathUtilities.h"
c@277	23
c@277	24 #define EPS 0.0000008 // just some arbitrary small number
c@277	25
c@279	26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279	27 m_rate(rate), m_increment(increment) { }
c@277	28 TempoTrackV2::~TempoTrackV2() { }
c@277	29
c@277	30 void
c@277	31 TempoTrackV2::filter_df(d_vec_t &df)
c@277	32 {
c@278	33 d_vec_t a(3);
c@278	34 d_vec_t b(3);
c@278	35 d_vec_t lp_df(df.size());
c@277	36
c@278	37 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278	38 a[0] = 1.0000;
c@278	39 a[1] = -0.3695;
c@278	40 a[2] = 0.1958;
c@278	41 b[0] = 0.2066;
c@278	42 b[1] = 0.4131;
c@278	43 b[2] = 0.2066;
c@278	44
c@278	45 double inp1 = 0.;
c@278	46 double inp2 = 0.;
c@278	47 double out1 = 0.;
c@278	48 double out2 = 0.;
c@277	49
c@277	50
c@278	51 // forwards filtering
c@295	52 for (unsigned int i = 0;i < df.size();i++)
c@278	53 {
c@278	54 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	55 inp2 = inp1;
c@278	56 inp1 = df[i];
c@278	57 out2 = out1;
c@278	58 out1 = lp_df[i];
c@278	59 }
c@277	60
c@278	61 // copy forwards filtering to df...
c@278	62 // but, time-reversed, ready for backwards filtering
c@295	63 for (unsigned int i = 0;i < df.size();i++)
c@278	64 {
c@278	65 df[i] = lp_df[df.size()-i-1];
c@278	66 }
c@277	67
c@295	68 for (unsigned int i = 0;i < df.size();i++)
c@278	69 {
c@278	70 lp_df[i] = 0.;
c@278	71 }
c@277	72
c@278	73 inp1 = 0.; inp2 = 0.;
c@278	74 out1 = 0.; out2 = 0.;
c@277	75
c@277	76 // backwards filetering on time-reversed df
c@295	77 for (unsigned int i = 0;i < df.size();i++)
c@278	78 {
c@278	79 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	80 inp2 = inp1;
c@278	81 inp1 = df[i];
c@278	82 out2 = out1;
c@278	83 out1 = lp_df[i];
c@278	84 }
c@277	85
c@277	86 // write the re-reversed (i.e. forward) version back to df
c@295	87 for (unsigned int i = 0;i < df.size();i++)
c@278	88 {
c@278	89 df[i] = lp_df[df.size()-i-1];
c@278	90 }
c@277	91 }
c@277	92
c@277	93
c@277	94 void
c@304	95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
c@304	96 vector<double> &beat_period,
c@304	97 vector<double> &tempi)
c@277	98 {
c@278	99 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278	100 // calculate the acf,
c@278	101 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278	102 // then call viterbi decoding with weight vector and transition matrix
c@278	103 // and get best path
c@277	104
c@295	105 unsigned int wv_len = 128;
c@278	106 double rayparam = 43.;
c@277	107
c@278	108 // make rayleigh weighting curve
c@278	109 d_vec_t wv(wv_len);
c@295	110 for (unsigned int i=0; i<wv.size(); i++)
c@277	111 {
c@278	112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.pow(-static_cast<double> (i),2.)) / (2.pow(rayparam,2.)));
c@277	113 }
c@277	114
c@278	115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295	116 unsigned int winlen = 512;
c@295	117 unsigned int step = 128;
c@278	118
c@278	119 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278	120 d_mat_t rcfmat;
c@278	121 int col_counter = -1;
c@278	122
c@278	123 // main loop for beat period calculation
c@295	124 for (unsigned int i=0; i+winlen<df.size(); i+=step)
c@278	125 {
c@278	126 // get dfframe
c@278	127 d_vec_t dfframe(winlen);
c@295	128 for (unsigned int k=0; k<winlen; k++)
c@278	129 {
c@278	130 dfframe[k] = df[i+k];
c@278	131 }
c@278	132 // get rcf vector for current frame
c@278	133 d_vec_t rcf(wv_len);
c@278	134 get_rcf(dfframe,wv,rcf);
c@277	135
c@278	136 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278	137 col_counter++;
c@295	138 for (unsigned int j=0; j<rcf.size(); j++)
c@278	139 {
c@278	140 rcfmat[col_counter].push_back( rcf[j] );
c@278	141 }
c@278	142 }
c@278	143
c@278	144 // now call viterbi decoding function
c@278	145 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277	146 }
c@277	147
c@277	148
c@277	149 void
c@277	150 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277	151 {
c@278	152 // calculate autocorrelation function
c@278	153 // then rcf
c@278	154 // just hard code for now... don't really need separate functions to do this
c@277	155
c@278	156 // make acf
c@277	157
c@278	158 d_vec_t dfframe(dfframe_in);
c@277	159
c@279	160 MathUtilities::adaptiveThreshold(dfframe);
c@277	161
c@278	162 d_vec_t acf(dfframe.size());
c@277	163
c@278	164
c@295	165 for (unsigned int lag=0; lag<dfframe.size(); lag++)
c@278	166 {
c@278	167 double sum = 0.;
c@278	168 double tmp = 0.;
c@277	169
c@295	170 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
c@278	171 {
c@278	172 tmp = dfframe[n] * dfframe[n+lag];
c@278	173 sum += tmp;
c@278	174 }
c@278	175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278	176 }
c@277	177
c@278	178 // now apply comb filtering
c@278	179 int numelem = 4;
c@278	180
c@295	181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
c@278	182 {
c@278	183 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278	184 {
c@278	185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278	186 {
c@278	187 rcf[i-1] += ( acf[(ai+b)-1]wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278	188 }
c@278	189 }
c@278	190 }
c@278	191
c@278	192 // apply adaptive threshold to rcf
c@279	193 MathUtilities::adaptiveThreshold(rcf);
c@278	194
c@278	195 double rcfsum =0.;
c@295	196 for (unsigned int i=0; i<rcf.size(); i++)
c@278	197 {
c@278	198 rcf[i] += EPS ;
c@278	199 rcfsum += rcf[i];
c@278	200 }
c@277	201
c@278	202 // normalise rcf to sum to unity
c@295	203 for (unsigned int i=0; i<rcf.size(); i++)
c@277	204 {
c@278	205 rcf[i] /= (rcfsum + EPS);
c@277	206 }
c@277	207 }
c@277	208
c@277	209 void
c@278	210 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277	211 {
c@278	212 // following Kevin Murphy's Viterbi decoding to get best path of
c@278	213 // beat periods through rfcmat
c@277	214
c@278	215 // make transition matrix
c@278	216 d_mat_t tmat;
c@295	217 for (unsigned int i=0;i<wv.size();i++)
c@278	218 {
c@278	219 tmat.push_back ( d_vec_t() ); // adds a new column
c@295	220 for (unsigned int j=0; j<wv.size(); j++)
c@278	221 {
c@278	222 tmat[i].push_back(0.); // fill with zeros initially
c@278	223 }
c@278	224 }
c@278	225
c@278	226 // variance of Gaussians in transition matrix
c@278	227 // formed of Gaussians on diagonal - implies slow tempo change
c@278	228 double sigma = 8.;
c@278	229 // don't want really short beat periods, or really long ones
c@295	230 for (unsigned int i=20;i <wv.size()-20; i++)
c@278	231 {
c@295	232 for (unsigned int j=20; j<wv.size()-20; j++)
c@278	233 {
c@278	234 double mu = static_cast<double>(i);
c@278	235 tmat[i][j] = exp( (-1.pow((j-mu),2.)) / (2.pow(sigma,2.)) );
c@278	236 }
c@278	237 }
c@277	238
c@278	239 // parameters for Viterbi decoding... this part is taken from
c@278	240 // Murphy's matlab
c@277	241
c@278	242 d_mat_t delta;
c@278	243 i_mat_t psi;
c@295	244 for (unsigned int i=0;i <rcfmat.size(); i++)
c@278	245 {
c@278	246 delta.push_back( d_vec_t());
c@278	247 psi.push_back( i_vec_t());
c@295	248 for (unsigned int j=0; j<rcfmat[i].size(); j++)
c@278	249 {
c@278	250 delta[i].push_back(0.); // fill with zeros initially
c@278	251 psi[i].push_back(0); // fill with zeros initially
c@278	252 }
c@278	253 }
c@277	254
c@277	255
c@295	256 unsigned int T = delta.size();
c@281	257
c@281	258 if (T < 2) return; // can't do anything at all meaningful
c@281	259
c@295	260 unsigned int Q = delta[0].size();
c@277	261
c@278	262 // initialize first column of delta
c@295	263 for (unsigned int j=0; j<Q; j++)
c@277	264 {
c@278	265 delta[0][j] = wv[j] * rcfmat[0][j];
c@278	266 psi[0][j] = 0;
c@277	267 }
c@278	268
c@277	269 double deltasum = 0.;
c@295	270 for (unsigned int i=0; i<Q; i++)
c@277	271 {
c@278	272 deltasum += delta[0][i];
c@277	273 }
c@295	274 for (unsigned int i=0; i<Q; i++)
c@277	275 {
c@278	276 delta[0][i] /= (deltasum + EPS);
c@277	277 }
c@277	278
c@277	279
c@295	280 for (unsigned int t=1; t<T; t++)
c@278	281 {
c@278	282 d_vec_t tmp_vec(Q);
c@277	283
c@295	284 for (unsigned int j=0; j<Q; j++)
c@278	285 {
c@295	286 for (unsigned int i=0; i<Q; i++)
c@278	287 {
c@278	288 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278	289 }
c@278	290
c@278	291 delta[t][j] = get_max_val(tmp_vec);
c@277	292
c@278	293 psi[t][j] = get_max_ind(tmp_vec);
c@278	294
c@278	295 delta[t][j] *= rcfmat[t][j];
c@278	296 }
c@277	297
c@278	298 // normalise current delta column
c@278	299 double deltasum = 0.;
c@295	300 for (unsigned int i=0; i<Q; i++)
c@278	301 {
c@278	302 deltasum += delta[t][i];
c@278	303 }
c@295	304 for (unsigned int i=0; i<Q; i++)
c@278	305 {
c@278	306 delta[t][i] /= (deltasum + EPS);
c@278	307 }
c@278	308 }
c@277	309
c@278	310 i_vec_t bestpath(T);
c@278	311 d_vec_t tmp_vec(Q);
c@295	312 for (unsigned int i=0; i<Q; i++)
c@278	313 {
c@278	314 tmp_vec[i] = delta[T-1][i];
c@278	315 }
c@277	316
c@278	317 // find starting point - best beat period for "last" frame
c@278	318 bestpath[T-1] = get_max_ind(tmp_vec);
c@278	319
c@278	320 // backtrace through index of maximum values in psi
c@295	321 for (unsigned int t=T-2; t>0 ;t--)
c@278	322 {
c@278	323 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278	324 }
c@277	325
c@278	326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278	327 bestpath[0] = psi[1][bestpath[1]];
c@277	328
c@295	329 unsigned int lastind = 0;
c@295	330 for (unsigned int i=0; i<T; i++)
c@278	331 {
c@295	332 unsigned int step = 128;
c@295	333 for (unsigned int j=0; j<step; j++)
c@278	334 {
c@278	335 lastind = i*step+j;
c@278	336 beat_period[lastind] = bestpath[i];
c@278	337 }
c@282	338 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << istep << " to " << istep+step-1 << ")" << std::endl;
c@278	339 }
c@277	340
c@278	341 //fill in the last values...
c@295	342 for (unsigned int i=lastind; i<beat_period.size(); i++)
c@278	343 {
c@278	344 beat_period[i] = beat_period[lastind];
c@278	345 }
c@277	346
c@295	347 for (unsigned int i = 0; i < beat_period.size(); i++)
c@277	348 {
c@279	349 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277	350 }
c@277	351 }
c@277	352
c@277	353 double
c@277	354 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277	355 {
c@278	356 double maxval = 0.;
c@295	357 for (unsigned int i=0; i<df.size(); i++)
c@277	358 {
c@278	359 if (maxval < df[i])
c@278	360 {
c@278	361 maxval = df[i];
c@278	362 }
c@277	363 }
c@277	364
c@278	365 return maxval;
c@277	366 }
c@277	367
c@277	368 int
c@277	369 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277	370 {
c@278	371 double maxval = 0.;
c@278	372 int ind = 0;
c@295	373 for (unsigned int i=0; i<df.size(); i++)
c@277	374 {
c@278	375 if (maxval < df[i])
c@278	376 {
c@278	377 maxval = df[i];
c@278	378 ind = i;
c@278	379 }
c@277	380 }
c@278	381
c@278	382 return ind;
c@277	383 }
c@277	384
c@277	385 void
c@277	386 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277	387 {
c@278	388 double sum = 0.;
c@295	389 for (unsigned int i=0; i<df.size(); i++)
c@278	390 {
c@278	391 sum += df[i];
c@278	392 }
c@278	393
c@295	394 for (unsigned int i=0; i<df.size(); i++)
c@278	395 {
c@278	396 df[i]/= (sum + EPS);
c@278	397 }
c@277	398 }
c@277	399
c@277	400 void
c@304	401 TempoTrackV2::calculateBeats(const vector<double> &df,
c@304	402 const vector<double> &beat_period,
c@304	403 vector<double> &beats)
c@277	404 {
c@281	405 if (df.empty() \|\| beat_period.empty()) return;
c@281	406
c@278	407 d_vec_t cumscore(df.size()); // store cumulative score
c@278	408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278	409 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277	410
c@295	411 for (unsigned int i=0; i<df.size(); i++)
c@277	412 {
c@278	413 localscore[i] = df[i];
c@278	414 backlink[i] = -1;
c@277	415 }
c@277	416
c@278	417 double tightness = 4.;
c@278	418 double alpha = 0.9;
c@277	419
c@278	420 // main loop
c@295	421 for (unsigned int i=0; i<localscore.size(); i++)
c@278	422 {
c@278	423 int prange_min = -2*beat_period[i];
c@278	424 int prange_max = round(-0.5*beat_period[i]);
c@277	425
c@278	426 // transition range
c@278	427 d_vec_t txwt (prange_max - prange_min + 1);
c@278	428 d_vec_t scorecands (txwt.size());
c@277	429
c@295	430 for (unsigned int j=0;j<txwt.size();j++)
c@278	431 {
c@278	432 double mu = static_cast<double> (beat_period[i]);
c@278	433 txwt[j] = exp( -0.5pow(tightness log((round(2*mu)-j)/mu),2));
c@277	434
c@278	435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278	436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277	437
c@278	438 int cscore_ind = i+prange_min+j;
c@278	439 if (cscore_ind >= 0)
c@278	440 {
c@278	441 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278	442 }
c@278	443 }
c@277	444
c@278	445 // find max value and index of maximum value
c@278	446 double vv = get_max_val(scorecands);
c@278	447 int xx = get_max_ind(scorecands);
c@277	448
c@278	449 cumscore[i] = alphavv + (1.-alpha)localscore[i];
c@278	450 backlink[i] = i+prange_min+xx;
c@280	451
c@282	452 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278	453 }
c@278	454
c@278	455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278	456 d_vec_t tmp_vec;
c@295	457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278	458 {
c@278	459 tmp_vec.push_back(cumscore[i]);
c@278	460 }
c@278	461
c@278	462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278	463
c@281	464 // can happen if no results obtained earlier (e.g. input too short)
c@281	465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
c@281	466
c@278	467 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278	468 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278	469 i_vec_t ibeats;
c@278	470 ibeats.push_back(startpoint);
c@282	471 // std::cerr << "startpoint = " << startpoint << std::endl;
c@278	472 while (backlink[ibeats.back()] > 0)
c@278	473 {
c@282	474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281	475 int b = ibeats.back();
c@281	476 if (backlink[b] == b) break; // shouldn't happen... haha
c@281	477 ibeats.push_back(backlink[b]);
c@278	478 }
c@277	479
c@278	480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@295	481 for (unsigned int i=0; i<ibeats.size(); i++)
c@278	482 {
c@278	483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278	484 }
c@277	485 }
c@277	486
c@277	487

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/TempoTrackV2.cpp @ 309:d5014ab8b0e5