qm-dsp: dsp/tempotracking/TempoTrackV2.cpp annotate

annotate dsp/tempotracking/TempoTrackV2.cpp @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h

author	mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date	Mon, 20 Jun 2011 19:01:48 +0100
parents	d5014ab8b0e5
children	d7619173d43c 37449f085a4c

rev	line source
c@277	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@277	2
c@277	3 /*
c@277	4 QM DSP Library
c@277	5
c@277	6 Centre for Digital Music, Queen Mary, University of London.
c@277	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@309	8
c@309	9 This program is free software; you can redistribute it and/or
c@309	10 modify it under the terms of the GNU General Public License as
c@309	11 published by the Free Software Foundation; either version 2 of the
c@309	12 License, or (at your option) any later version. See the file
c@309	13 COPYING included with this distribution for more information.
c@277	14 */
c@277	15
c@277	16 #include "TempoTrackV2.h"
c@277	17
c@277	18 #include <cmath>
c@277	19 #include <cstdlib>
c@278	20 #include <iostream>
c@277	21
c@279	22 #include "maths/MathUtilities.h"
c@277	23
c@277	24 #define EPS 0.0000008 // just some arbitrary small number
c@277	25
c@279	26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
c@279	27 m_rate(rate), m_increment(increment) { }
c@277	28 TempoTrackV2::~TempoTrackV2() { }
c@277	29
c@277	30 void
c@277	31 TempoTrackV2::filter_df(d_vec_t &df)
c@277	32 {
c@278	33 d_vec_t a(3);
c@278	34 d_vec_t b(3);
c@278	35 d_vec_t lp_df(df.size());
c@277	36
c@278	37 //equivalent in matlab to [b,a] = butter(2,0.4);
c@278	38 a[0] = 1.0000;
c@278	39 a[1] = -0.3695;
c@278	40 a[2] = 0.1958;
c@278	41 b[0] = 0.2066;
c@278	42 b[1] = 0.4131;
c@278	43 b[2] = 0.2066;
c@278	44
c@278	45 double inp1 = 0.;
c@278	46 double inp2 = 0.;
c@278	47 double out1 = 0.;
c@278	48 double out2 = 0.;
c@277	49
c@277	50
c@278	51 // forwards filtering
c@295	52 for (unsigned int i = 0;i < df.size();i++)
c@278	53 {
c@278	54 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	55 inp2 = inp1;
c@278	56 inp1 = df[i];
c@278	57 out2 = out1;
c@278	58 out1 = lp_df[i];
c@278	59 }
c@277	60
c@278	61 // copy forwards filtering to df...
c@278	62 // but, time-reversed, ready for backwards filtering
c@295	63 for (unsigned int i = 0;i < df.size();i++)
c@278	64 {
c@278	65 df[i] = lp_df[df.size()-i-1];
c@278	66 }
c@277	67
c@295	68 for (unsigned int i = 0;i < df.size();i++)
c@278	69 {
c@278	70 lp_df[i] = 0.;
c@278	71 }
c@277	72
c@278	73 inp1 = 0.; inp2 = 0.;
c@278	74 out1 = 0.; out2 = 0.;
c@277	75
c@277	76 // backwards filetering on time-reversed df
c@295	77 for (unsigned int i = 0;i < df.size();i++)
c@278	78 {
c@278	79 lp_df[i] = b[0]df[i] + b[1]inp1 + b[2]inp2 - a[1]out1 - a[2]*out2;
c@278	80 inp2 = inp1;
c@278	81 inp1 = df[i];
c@278	82 out2 = out1;
c@278	83 out1 = lp_df[i];
c@278	84 }
c@277	85
c@277	86 // write the re-reversed (i.e. forward) version back to df
c@295	87 for (unsigned int i = 0;i < df.size();i++)
c@278	88 {
c@278	89 df[i] = lp_df[df.size()-i-1];
c@278	90 }
c@277	91 }
c@277	92
c@277	93
c@277	94 void
c@304	95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
c@304	96 vector<double> &beat_period,
c@304	97 vector<double> &tempi)
c@277	98 {
c@278	99 // to follow matlab.. split into 512 sample frames with a 128 hop size
c@278	100 // calculate the acf,
c@278	101 // then the rcf.. and then stick the rcfs as columns of a matrix
c@278	102 // then call viterbi decoding with weight vector and transition matrix
c@278	103 // and get best path
c@277	104
c@295	105 unsigned int wv_len = 128;
c@278	106 double rayparam = 43.;
c@277	107
c@278	108 // make rayleigh weighting curve
c@278	109 d_vec_t wv(wv_len);
c@295	110 for (unsigned int i=0; i<wv.size(); i++)
c@277	111 {
c@278	112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.pow(-static_cast<double> (i),2.)) / (2.pow(rayparam,2.)));
c@277	113 }
c@277	114
c@278	115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
c@295	116 unsigned int winlen = 512;
c@295	117 unsigned int step = 128;
c@278	118
c@278	119 // matrix to store output of comb filter bank, increment column of matrix at each frame
c@278	120 d_mat_t rcfmat;
c@278	121 int col_counter = -1;
c@278	122
c@278	123 // main loop for beat period calculation
c@295	124 for (unsigned int i=0; i+winlen<df.size(); i+=step)
c@278	125 {
c@278	126 // get dfframe
c@278	127 d_vec_t dfframe(winlen);
c@295	128 for (unsigned int k=0; k<winlen; k++)
c@278	129 {
c@278	130 dfframe[k] = df[i+k];
c@278	131 }
c@278	132 // get rcf vector for current frame
c@278	133 d_vec_t rcf(wv_len);
c@278	134 get_rcf(dfframe,wv,rcf);
c@277	135
c@278	136 rcfmat.push_back( d_vec_t() ); // adds a new column
c@278	137 col_counter++;
c@295	138 for (unsigned int j=0; j<rcf.size(); j++)
c@278	139 {
c@278	140 rcfmat[col_counter].push_back( rcf[j] );
c@278	141 }
c@278	142 }
c@278	143
c@278	144 // now call viterbi decoding function
c@278	145 viterbi_decode(rcfmat,wv,beat_period,tempi);
c@277	146 }
c@277	147
c@277	148
c@277	149 void
c@277	150 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
c@277	151 {
c@278	152 // calculate autocorrelation function
c@278	153 // then rcf
c@278	154 // just hard code for now... don't really need separate functions to do this
c@277	155
c@278	156 // make acf
c@277	157
c@278	158 d_vec_t dfframe(dfframe_in);
c@277	159
c@279	160 MathUtilities::adaptiveThreshold(dfframe);
c@277	161
c@278	162 d_vec_t acf(dfframe.size());
c@277	163
c@278	164
c@295	165 for (unsigned int lag=0; lag<dfframe.size(); lag++)
c@278	166 {
c@278	167 double sum = 0.;
c@278	168 double tmp = 0.;
c@277	169
c@295	170 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
c@278	171 {
c@278	172 tmp = dfframe[n] * dfframe[n+lag];
c@278	173 sum += tmp;
c@278	174 }
c@278	175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
c@278	176 }
c@277	177
c@278	178 // now apply comb filtering
c@278	179 int numelem = 4;
c@278	180
c@295	181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
c@278	182 {
c@278	183 for (int a = 1;a <= numelem;a++) // number of comb elements
c@278	184 {
c@278	185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
c@278	186 {
c@278	187 rcf[i-1] += ( acf[(ai+b)-1]wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
c@278	188 }
c@278	189 }
c@278	190 }
c@278	191
c@278	192 // apply adaptive threshold to rcf
c@279	193 MathUtilities::adaptiveThreshold(rcf);
c@278	194
c@278	195 double rcfsum =0.;
c@295	196 for (unsigned int i=0; i<rcf.size(); i++)
c@278	197 {
c@278	198 rcf[i] += EPS ;
c@278	199 rcfsum += rcf[i];
c@278	200 }
c@277	201
c@278	202 // normalise rcf to sum to unity
c@295	203 for (unsigned int i=0; i<rcf.size(); i++)
c@277	204 {
c@278	205 rcf[i] /= (rcfsum + EPS);
c@277	206 }
c@277	207 }
c@277	208
c@277	209 void
c@278	210 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
c@277	211 {
c@278	212 // following Kevin Murphy's Viterbi decoding to get best path of
c@278	213 // beat periods through rfcmat
c@277	214
c@278	215 // make transition matrix
c@278	216 d_mat_t tmat;
c@295	217 for (unsigned int i=0;i<wv.size();i++)
c@278	218 {
c@278	219 tmat.push_back ( d_vec_t() ); // adds a new column
c@295	220 for (unsigned int j=0; j<wv.size(); j++)
c@278	221 {
c@278	222 tmat[i].push_back(0.); // fill with zeros initially
c@278	223 }
c@278	224 }
c@278	225
c@278	226 // variance of Gaussians in transition matrix
c@278	227 // formed of Gaussians on diagonal - implies slow tempo change
c@278	228 double sigma = 8.;
c@278	229 // don't want really short beat periods, or really long ones
c@295	230 for (unsigned int i=20;i <wv.size()-20; i++)
c@278	231 {
c@295	232 for (unsigned int j=20; j<wv.size()-20; j++)
c@278	233 {
c@278	234 double mu = static_cast<double>(i);
c@278	235 tmat[i][j] = exp( (-1.pow((j-mu),2.)) / (2.pow(sigma,2.)) );
c@278	236 }
c@278	237 }
c@277	238
c@278	239 // parameters for Viterbi decoding... this part is taken from
c@278	240 // Murphy's matlab
c@277	241
c@278	242 d_mat_t delta;
c@278	243 i_mat_t psi;
c@295	244 for (unsigned int i=0;i <rcfmat.size(); i++)
c@278	245 {
c@278	246 delta.push_back( d_vec_t());
c@278	247 psi.push_back( i_vec_t());
c@295	248 for (unsigned int j=0; j<rcfmat[i].size(); j++)
c@278	249 {
c@278	250 delta[i].push_back(0.); // fill with zeros initially
c@278	251 psi[i].push_back(0); // fill with zeros initially
c@278	252 }
c@278	253 }
c@277	254
c@277	255
c@295	256 unsigned int T = delta.size();
c@281	257
c@281	258 if (T < 2) return; // can't do anything at all meaningful
c@281	259
c@295	260 unsigned int Q = delta[0].size();
c@277	261
c@278	262 // initialize first column of delta
c@295	263 for (unsigned int j=0; j<Q; j++)
c@277	264 {
c@278	265 delta[0][j] = wv[j] * rcfmat[0][j];
c@278	266 psi[0][j] = 0;
c@277	267 }
c@278	268
c@277	269 double deltasum = 0.;
c@295	270 for (unsigned int i=0; i<Q; i++)
c@277	271 {
c@278	272 deltasum += delta[0][i];
c@277	273 }
c@295	274 for (unsigned int i=0; i<Q; i++)
c@277	275 {
c@278	276 delta[0][i] /= (deltasum + EPS);
c@277	277 }
c@277	278
c@277	279
c@295	280 for (unsigned int t=1; t<T; t++)
c@278	281 {
c@278	282 d_vec_t tmp_vec(Q);
c@277	283
c@295	284 for (unsigned int j=0; j<Q; j++)
c@278	285 {
c@295	286 for (unsigned int i=0; i<Q; i++)
c@278	287 {
c@278	288 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
c@278	289 }
c@278	290
c@278	291 delta[t][j] = get_max_val(tmp_vec);
c@277	292
c@278	293 psi[t][j] = get_max_ind(tmp_vec);
c@278	294
c@278	295 delta[t][j] *= rcfmat[t][j];
c@278	296 }
c@277	297
c@278	298 // normalise current delta column
c@278	299 double deltasum = 0.;
c@295	300 for (unsigned int i=0; i<Q; i++)
c@278	301 {
c@278	302 deltasum += delta[t][i];
c@278	303 }
c@295	304 for (unsigned int i=0; i<Q; i++)
c@278	305 {
c@278	306 delta[t][i] /= (deltasum + EPS);
c@278	307 }
c@278	308 }
c@277	309
c@278	310 i_vec_t bestpath(T);
c@278	311 d_vec_t tmp_vec(Q);
c@295	312 for (unsigned int i=0; i<Q; i++)
c@278	313 {
c@278	314 tmp_vec[i] = delta[T-1][i];
c@278	315 }
c@277	316
c@278	317 // find starting point - best beat period for "last" frame
c@278	318 bestpath[T-1] = get_max_ind(tmp_vec);
c@278	319
c@278	320 // backtrace through index of maximum values in psi
c@295	321 for (unsigned int t=T-2; t>0 ;t--)
c@278	322 {
c@278	323 bestpath[t] = psi[t+1][bestpath[t+1]];
c@278	324 }
c@277	325
c@278	326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
c@278	327 bestpath[0] = psi[1][bestpath[1]];
c@277	328
c@295	329 unsigned int lastind = 0;
c@295	330 for (unsigned int i=0; i<T; i++)
c@278	331 {
c@295	332 unsigned int step = 128;
c@295	333 for (unsigned int j=0; j<step; j++)
c@278	334 {
c@278	335 lastind = i*step+j;
c@278	336 beat_period[lastind] = bestpath[i];
c@278	337 }
c@282	338 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << istep << " to " << istep+step-1 << ")" << std::endl;
c@278	339 }
c@277	340
c@278	341 //fill in the last values...
c@295	342 for (unsigned int i=lastind; i<beat_period.size(); i++)
c@278	343 {
c@278	344 beat_period[i] = beat_period[lastind];
c@278	345 }
c@277	346
c@295	347 for (unsigned int i = 0; i < beat_period.size(); i++)
c@277	348 {
c@279	349 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
c@277	350 }
c@277	351 }
c@277	352
c@277	353 double
c@277	354 TempoTrackV2::get_max_val(const d_vec_t &df)
c@277	355 {
c@278	356 double maxval = 0.;
c@295	357 for (unsigned int i=0; i<df.size(); i++)
c@277	358 {
c@278	359 if (maxval < df[i])
c@278	360 {
c@278	361 maxval = df[i];
c@278	362 }
c@277	363 }
c@277	364
c@278	365 return maxval;
c@277	366 }
c@277	367
c@277	368 int
c@277	369 TempoTrackV2::get_max_ind(const d_vec_t &df)
c@277	370 {
c@278	371 double maxval = 0.;
c@278	372 int ind = 0;
c@295	373 for (unsigned int i=0; i<df.size(); i++)
c@277	374 {
c@278	375 if (maxval < df[i])
c@278	376 {
c@278	377 maxval = df[i];
c@278	378 ind = i;
c@278	379 }
c@277	380 }
c@278	381
c@278	382 return ind;
c@277	383 }
c@277	384
c@277	385 void
c@277	386 TempoTrackV2::normalise_vec(d_vec_t &df)
c@277	387 {
c@278	388 double sum = 0.;
c@295	389 for (unsigned int i=0; i<df.size(); i++)
c@278	390 {
c@278	391 sum += df[i];
c@278	392 }
c@278	393
c@295	394 for (unsigned int i=0; i<df.size(); i++)
c@278	395 {
c@278	396 df[i]/= (sum + EPS);
c@278	397 }
c@277	398 }
c@277	399
c@277	400 void
c@304	401 TempoTrackV2::calculateBeats(const vector<double> &df,
c@304	402 const vector<double> &beat_period,
c@304	403 vector<double> &beats)
c@277	404 {
c@281	405 if (df.empty() \|\| beat_period.empty()) return;
c@281	406
c@278	407 d_vec_t cumscore(df.size()); // store cumulative score
c@278	408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
c@278	409 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
c@277	410
c@295	411 for (unsigned int i=0; i<df.size(); i++)
c@277	412 {
c@278	413 localscore[i] = df[i];
c@278	414 backlink[i] = -1;
c@277	415 }
c@277	416
c@278	417 double tightness = 4.;
c@278	418 double alpha = 0.9;
c@277	419
c@278	420 // main loop
c@295	421 for (unsigned int i=0; i<localscore.size(); i++)
c@278	422 {
c@278	423 int prange_min = -2*beat_period[i];
c@278	424 int prange_max = round(-0.5*beat_period[i]);
c@277	425
c@278	426 // transition range
c@278	427 d_vec_t txwt (prange_max - prange_min + 1);
c@278	428 d_vec_t scorecands (txwt.size());
c@277	429
c@295	430 for (unsigned int j=0;j<txwt.size();j++)
c@278	431 {
c@278	432 double mu = static_cast<double> (beat_period[i]);
c@278	433 txwt[j] = exp( -0.5pow(tightness log((round(2*mu)-j)/mu),2));
c@277	434
c@278	435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
c@278	436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
c@277	437
c@278	438 int cscore_ind = i+prange_min+j;
c@278	439 if (cscore_ind >= 0)
c@278	440 {
c@278	441 scorecands[j] = txwt[j] * cumscore[cscore_ind];
c@278	442 }
c@278	443 }
c@277	444
c@278	445 // find max value and index of maximum value
c@278	446 double vv = get_max_val(scorecands);
c@278	447 int xx = get_max_ind(scorecands);
c@277	448
c@278	449 cumscore[i] = alphavv + (1.-alpha)localscore[i];
c@278	450 backlink[i] = i+prange_min+xx;
c@280	451
c@282	452 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
c@278	453 }
c@278	454
c@278	455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
c@278	456 d_vec_t tmp_vec;
c@295	457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
c@278	458 {
c@278	459 tmp_vec.push_back(cumscore[i]);
c@278	460 }
c@278	461
c@278	462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
c@278	463
c@281	464 // can happen if no results obtained earlier (e.g. input too short)
c@281	465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
c@281	466
c@278	467 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
c@278	468 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
c@278	469 i_vec_t ibeats;
c@278	470 ibeats.push_back(startpoint);
c@282	471 // std::cerr << "startpoint = " << startpoint << std::endl;
c@278	472 while (backlink[ibeats.back()] > 0)
c@278	473 {
c@282	474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
c@281	475 int b = ibeats.back();
c@281	476 if (backlink[b] == b) break; // shouldn't happen... haha
c@281	477 ibeats.push_back(backlink[b]);
c@278	478 }
c@277	479
c@278	480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
c@295	481 for (unsigned int i=0; i<ibeats.size(); i++)
c@278	482 {
c@278	483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
c@278	484 }
c@277	485 }
c@277	486
c@277	487

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/TempoTrackV2.cpp @ 321:f1e6be2de9a5