qm-dsp: dsp/tempotracking/TempoTrackV2.cpp annotate

annotate dsp/tempotracking/TempoTrackV2.cpp @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Fri, 05 Jun 2009 15:12:39 +0000
parents	1c9258dd155e
children	054c384d860d

rev	line source
c@277	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@277	2
c@277	3 /*
c@277	4 QM DSP Library
c@277	5
c@277	6 Centre for Digital Music, Queen Mary, University of London.
c@277	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@277	8 All rights reserved.
c@277	9 */
c@277	10
c@277	11 #include "TempoTrackV2.h"
c@277	12
c@277	13 #include <cmath>
c@277	14 #include <cstdlib>
c@278	15 #include <iostream>
c@277	16
c@279	17 #include "maths/MathUtilities.h"
c@277	18
c@277	19 #define EPS 0.0000008 // just some arbitrary small number
c@277	20
c@279	21 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279	22 m_rate(rate), m_increment(increment) { }
c@277	23 TempoTrackV2::~TempoTrackV2() { }
c@277	24
c@277	25 void
c@277	26 TempoTrackV2::filter_df(d_vec_t &df)
c@277	27 {
c@278	28 d_vec_t a(3);
c@278	29 d_vec_t b(3);
c@278	30 d_vec_t lp_df(df.size());
c@277	31
c@278	32 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278	33 a[0] = 1.0000;
c@278	34 a[1] = -0.3695;
c@278	35 a[2] = 0.1958;
c@278	36 b[0] = 0.2066;
c@278	37 b[1] = 0.4131;
c@278	38 b[2] = 0.2066;
c@278	39
c@278	40 double inp1 = 0.;
c@278	41 double inp2 = 0.;
c@278	42 double out1 = 0.;
c@278	43 double out2 = 0.;
c@277	44
c@277	45
c@278	46 // forwards filtering
c@295	47 for (unsigned int i = 0;i < df.size();i++)
c@278	48 {
c@278	49 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	50 inp2 = inp1;
c@278	51 inp1 = df[i];
c@278	52 out2 = out1;
c@278	53 out1 = lp_df[i];
c@278	54 }
c@277	55
c@278	56 // copy forwards filtering to df...
c@278	57 // but, time-reversed, ready for backwards filtering
c@295	58 for (unsigned int i = 0;i < df.size();i++)
c@278	59 {
c@278	60 df[i] = lp_df[df.size()-i-1];
c@278	61 }
c@277	62
c@295	63 for (unsigned int i = 0;i < df.size();i++)
c@278	64 {
c@278	65 lp_df[i] = 0.;
c@278	66 }
c@277	67
c@278	68 inp1 = 0.; inp2 = 0.;
c@278	69 out1 = 0.; out2 = 0.;
c@277	70
c@277	71 // backwards filetering on time-reversed df
c@295	72 for (unsigned int i = 0;i < df.size();i++)
c@278	73 {
c@278	74 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	75 inp2 = inp1;
c@278	76 inp1 = df[i];
c@278	77 out2 = out1;
c@278	78 out1 = lp_df[i];
c@278	79 }
c@277	80
c@277	81 // write the re-reversed (i.e. forward) version back to df
c@295	82 for (unsigned int i = 0;i < df.size();i++)
c@278	83 {
c@278	84 df[i] = lp_df[df.size()-i-1];
c@278	85 }
c@277	86 }
c@277	87
c@277	88
c@277	89 void
c@278	90 TempoTrackV2::calculateBeatPeriod(const d_vec_t &df, d_vec_t &beat_period,
c@278	91 d_vec_t &tempi)
c@277	92 {
c@278	93 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278	94 // calculate the acf,
c@278	95 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278	96 // then call viterbi decoding with weight vector and transition matrix
c@278	97 // and get best path
c@277	98
c@295	99 unsigned int wv_len = 128;
c@278	100 double rayparam = 43.;
c@277	101
c@278	102 // make rayleigh weighting curve
c@278	103 d_vec_t wv(wv_len);
c@295	104 for (unsigned int i=0; i<wv.size(); i++)
c@277	105 {
c@278	106 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.pow(-static_cast<double> (i),2.)) / (2.pow(rayparam,2.)));
c@277	107 }
c@277	108
c@278	109 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295	110 unsigned int winlen = 512;
c@295	111 unsigned int step = 128;
c@278	112
c@278	113 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278	114 d_mat_t rcfmat;
c@278	115 int col_counter = -1;
c@278	116
c@278	117 // main loop for beat period calculation
c@295	118 for (unsigned int i=0; i+winlen<df.size(); i+=step)
c@278	119 {
c@278	120 // get dfframe
c@278	121 d_vec_t dfframe(winlen);
c@295	122 for (unsigned int k=0; k<winlen; k++)
c@278	123 {
c@278	124 dfframe[k] = df[i+k];
c@278	125 }
c@278	126 // get rcf vector for current frame
c@278	127 d_vec_t rcf(wv_len);
c@278	128 get_rcf(dfframe,wv,rcf);
c@277	129
c@278	130 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278	131 col_counter++;
c@295	132 for (unsigned int j=0; j<rcf.size(); j++)
c@278	133 {
c@278	134 rcfmat[col_counter].push_back( rcf[j] );
c@278	135 }
c@278	136 }
c@278	137
c@278	138 // now call viterbi decoding function
c@278	139 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277	140 }
c@277	141
c@277	142
c@277	143 void
c@277	144 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277	145 {
c@278	146 // calculate autocorrelation function
c@278	147 // then rcf
c@278	148 // just hard code for now... don't really need separate functions to do this
c@277	149
c@278	150 // make acf
c@277	151
c@278	152 d_vec_t dfframe(dfframe_in);
c@277	153
c@279	154 MathUtilities::adaptiveThreshold(dfframe);
c@277	155
c@278	156 d_vec_t acf(dfframe.size());
c@277	157
c@278	158
c@295	159 for (unsigned int lag=0; lag<dfframe.size(); lag++)
c@278	160 {
c@278	161 double sum = 0.;
c@278	162 double tmp = 0.;
c@277	163
c@295	164 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
c@278	165 {
c@278	166 tmp = dfframe[n] * dfframe[n+lag];
c@278	167 sum += tmp;
c@278	168 }
c@278	169 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278	170 }
c@277	171
c@278	172 // now apply comb filtering
c@278	173 int numelem = 4;
c@278	174
c@295	175 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
c@278	176 {
c@278	177 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278	178 {
c@278	179 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278	180 {
c@278	181 rcf[i-1] += ( acf[(ai+b)-1]wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278	182 }
c@278	183 }
c@278	184 }
c@278	185
c@278	186 // apply adaptive threshold to rcf
c@279	187 MathUtilities::adaptiveThreshold(rcf);
c@278	188
c@278	189 double rcfsum =0.;
c@295	190 for (unsigned int i=0; i<rcf.size(); i++)
c@278	191 {
c@278	192 rcf[i] += EPS ;
c@278	193 rcfsum += rcf[i];
c@278	194 }
c@277	195
c@278	196 // normalise rcf to sum to unity
c@295	197 for (unsigned int i=0; i<rcf.size(); i++)
c@277	198 {
c@278	199 rcf[i] /= (rcfsum + EPS);
c@277	200 }
c@277	201 }
c@277	202
c@277	203 void
c@278	204 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277	205 {
c@278	206 // following Kevin Murphy's Viterbi decoding to get best path of
c@278	207 // beat periods through rfcmat
c@277	208
c@278	209 // make transition matrix
c@278	210 d_mat_t tmat;
c@295	211 for (unsigned int i=0;i<wv.size();i++)
c@278	212 {
c@278	213 tmat.push_back ( d_vec_t() ); // adds a new column
c@295	214 for (unsigned int j=0; j<wv.size(); j++)
c@278	215 {
c@278	216 tmat[i].push_back(0.); // fill with zeros initially
c@278	217 }
c@278	218 }
c@278	219
c@278	220 // variance of Gaussians in transition matrix
c@278	221 // formed of Gaussians on diagonal - implies slow tempo change
c@278	222 double sigma = 8.;
c@278	223 // don't want really short beat periods, or really long ones
c@295	224 for (unsigned int i=20;i <wv.size()-20; i++)
c@278	225 {
c@295	226 for (unsigned int j=20; j<wv.size()-20; j++)
c@278	227 {
c@278	228 double mu = static_cast<double>(i);
c@278	229 tmat[i][j] = exp( (-1.pow((j-mu),2.)) / (2.pow(sigma,2.)) );
c@278	230 }
c@278	231 }
c@277	232
c@278	233 // parameters for Viterbi decoding... this part is taken from
c@278	234 // Murphy's matlab
c@277	235
c@278	236 d_mat_t delta;
c@278	237 i_mat_t psi;
c@295	238 for (unsigned int i=0;i <rcfmat.size(); i++)
c@278	239 {
c@278	240 delta.push_back( d_vec_t());
c@278	241 psi.push_back( i_vec_t());
c@295	242 for (unsigned int j=0; j<rcfmat[i].size(); j++)
c@278	243 {
c@278	244 delta[i].push_back(0.); // fill with zeros initially
c@278	245 psi[i].push_back(0); // fill with zeros initially
c@278	246 }
c@278	247 }
c@277	248
c@277	249
c@295	250 unsigned int T = delta.size();
c@281	251
c@281	252 if (T < 2) return; // can't do anything at all meaningful
c@281	253
c@295	254 unsigned int Q = delta[0].size();
c@277	255
c@278	256 // initialize first column of delta
c@295	257 for (unsigned int j=0; j<Q; j++)
c@277	258 {
c@278	259 delta[0][j] = wv[j] * rcfmat[0][j];
c@278	260 psi[0][j] = 0;
c@277	261 }
c@278	262
c@277	263 double deltasum = 0.;
c@295	264 for (unsigned int i=0; i<Q; i++)
c@277	265 {
c@278	266 deltasum += delta[0][i];
c@277	267 }
c@295	268 for (unsigned int i=0; i<Q; i++)
c@277	269 {
c@278	270 delta[0][i] /= (deltasum + EPS);
c@277	271 }
c@277	272
c@277	273
c@295	274 for (unsigned int t=1; t<T; t++)
c@278	275 {
c@278	276 d_vec_t tmp_vec(Q);
c@277	277
c@295	278 for (unsigned int j=0; j<Q; j++)
c@278	279 {
c@295	280 for (unsigned int i=0; i<Q; i++)
c@278	281 {
c@278	282 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278	283 }
c@278	284
c@278	285 delta[t][j] = get_max_val(tmp_vec);
c@277	286
c@278	287 psi[t][j] = get_max_ind(tmp_vec);
c@278	288
c@278	289 delta[t][j] *= rcfmat[t][j];
c@278	290 }
c@277	291
c@278	292 // normalise current delta column
c@278	293 double deltasum = 0.;
c@295	294 for (unsigned int i=0; i<Q; i++)
c@278	295 {
c@278	296 deltasum += delta[t][i];
c@278	297 }
c@295	298 for (unsigned int i=0; i<Q; i++)
c@278	299 {
c@278	300 delta[t][i] /= (deltasum + EPS);
c@278	301 }
c@278	302 }
c@277	303
c@278	304 i_vec_t bestpath(T);
c@278	305 d_vec_t tmp_vec(Q);
c@295	306 for (unsigned int i=0; i<Q; i++)
c@278	307 {
c@278	308 tmp_vec[i] = delta[T-1][i];
c@278	309 }
c@277	310
c@278	311 // find starting point - best beat period for "last" frame
c@278	312 bestpath[T-1] = get_max_ind(tmp_vec);
c@278	313
c@278	314 // backtrace through index of maximum values in psi
c@295	315 for (unsigned int t=T-2; t>0 ;t--)
c@278	316 {
c@278	317 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278	318 }
c@277	319
c@278	320 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278	321 bestpath[0] = psi[1][bestpath[1]];
c@277	322
c@295	323 unsigned int lastind = 0;
c@295	324 for (unsigned int i=0; i<T; i++)
c@278	325 {
c@295	326 unsigned int step = 128;
c@295	327 for (unsigned int j=0; j<step; j++)
c@278	328 {
c@278	329 lastind = i*step+j;
c@278	330 beat_period[lastind] = bestpath[i];
c@278	331 }
c@282	332 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << istep << " to " << istep+step-1 << ")" << std::endl;
c@278	333 }
c@277	334
c@278	335 //fill in the last values...
c@295	336 for (unsigned int i=lastind; i<beat_period.size(); i++)
c@278	337 {
c@278	338 beat_period[i] = beat_period[lastind];
c@278	339 }
c@277	340
c@295	341 for (unsigned int i = 0; i < beat_period.size(); i++)
c@277	342 {
c@279	343 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277	344 }
c@277	345 }
c@277	346
c@277	347 double
c@277	348 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277	349 {
c@278	350 double maxval = 0.;
c@295	351 for (unsigned int i=0; i<df.size(); i++)
c@277	352 {
c@278	353 if (maxval < df[i])
c@278	354 {
c@278	355 maxval = df[i];
c@278	356 }
c@277	357 }
c@277	358
c@278	359 return maxval;
c@277	360 }
c@277	361
c@277	362 int
c@277	363 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277	364 {
c@278	365 double maxval = 0.;
c@278	366 int ind = 0;
c@295	367 for (unsigned int i=0; i<df.size(); i++)
c@277	368 {
c@278	369 if (maxval < df[i])
c@278	370 {
c@278	371 maxval = df[i];
c@278	372 ind = i;
c@278	373 }
c@277	374 }
c@278	375
c@278	376 return ind;
c@277	377 }
c@277	378
c@277	379 void
c@277	380 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277	381 {
c@278	382 double sum = 0.;
c@295	383 for (unsigned int i=0; i<df.size(); i++)
c@278	384 {
c@278	385 sum += df[i];
c@278	386 }
c@278	387
c@295	388 for (unsigned int i=0; i<df.size(); i++)
c@278	389 {
c@278	390 df[i]/= (sum + EPS);
c@278	391 }
c@277	392 }
c@277	393
c@277	394 void
c@277	395 TempoTrackV2::calculateBeats(const d_vec_t &df, const d_vec_t &beat_period,
c@277	396 d_vec_t &beats)
c@277	397 {
c@281	398 if (df.empty() \|\| beat_period.empty()) return;
c@281	399
c@278	400 d_vec_t cumscore(df.size()); // store cumulative score
c@278	401 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278	402 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277	403
c@295	404 for (unsigned int i=0; i<df.size(); i++)
c@277	405 {
c@278	406 localscore[i] = df[i];
c@278	407 backlink[i] = -1;
c@277	408 }
c@277	409
c@278	410 double tightness = 4.;
c@278	411 double alpha = 0.9;
c@277	412
c@278	413 // main loop
c@295	414 for (unsigned int i=0; i<localscore.size(); i++)
c@278	415 {
c@278	416 int prange_min = -2*beat_period[i];
c@278	417 int prange_max = round(-0.5*beat_period[i]);
c@277	418
c@278	419 // transition range
c@278	420 d_vec_t txwt (prange_max - prange_min + 1);
c@278	421 d_vec_t scorecands (txwt.size());
c@277	422
c@295	423 for (unsigned int j=0;j<txwt.size();j++)
c@278	424 {
c@278	425 double mu = static_cast<double> (beat_period[i]);
c@278	426 txwt[j] = exp( -0.5pow(tightness log((round(2*mu)-j)/mu),2));
c@277	427
c@278	428 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278	429 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277	430
c@278	431 int cscore_ind = i+prange_min+j;
c@278	432 if (cscore_ind >= 0)
c@278	433 {
c@278	434 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278	435 }
c@278	436 }
c@277	437
c@278	438 // find max value and index of maximum value
c@278	439 double vv = get_max_val(scorecands);
c@278	440 int xx = get_max_ind(scorecands);
c@277	441
c@278	442 cumscore[i] = alphavv + (1.-alpha)localscore[i];
c@278	443 backlink[i] = i+prange_min+xx;
c@280	444
c@282	445 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278	446 }
c@278	447
c@278	448 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278	449 d_vec_t tmp_vec;
c@295	450 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278	451 {
c@278	452 tmp_vec.push_back(cumscore[i]);
c@278	453 }
c@278	454
c@278	455 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278	456
c@281	457 // can happen if no results obtained earlier (e.g. input too short)
c@281	458 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
c@281	459
c@278	460 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278	461 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278	462 i_vec_t ibeats;
c@278	463 ibeats.push_back(startpoint);
c@282	464 // std::cerr << "startpoint = " << startpoint << std::endl;
c@278	465 while (backlink[ibeats.back()] > 0)
c@278	466 {
c@282	467 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281	468 int b = ibeats.back();
c@281	469 if (backlink[b] == b) break; // shouldn't happen... haha
c@281	470 ibeats.push_back(backlink[b]);
c@278	471 }
c@277	472
c@278	473 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@295	474 for (unsigned int i=0; i<ibeats.size(); i++)
c@278	475 {
c@278	476 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278	477 }
c@277	478 }
c@277	479
c@277	480

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/TempoTrackV2.cpp @ 298:255e431ae3d4