Mercurial > hg > qm-dsp
view dsp/tempotracking/TempoTrackV2.cpp @ 104:1e433aaa44ad
Merge mepd_new_params branch
author | Chris Cannam |
---|---|
date | Mon, 02 Sep 2013 09:26:46 +0100 |
parents | 37449f085a4c d7619173d43c |
children | c32a2446f7fe |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* QM DSP Library Centre for Digital Music, Queen Mary, University of London. This file copyright 2008-2009 Matthew Davies and QMUL. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "TempoTrackV2.h" #include <cmath> #include <cstdlib> #include <iostream> #include "maths/MathUtilities.h" #define EPS 0.0000008 // just some arbitrary small number TempoTrackV2::TempoTrackV2(float rate, size_t increment) : m_rate(rate), m_increment(increment) { } TempoTrackV2::~TempoTrackV2() { } void TempoTrackV2::filter_df(d_vec_t &df) { d_vec_t a(3); d_vec_t b(3); d_vec_t lp_df(df.size()); //equivalent in matlab to [b,a] = butter(2,0.4); a[0] = 1.0000; a[1] = -0.3695; a[2] = 0.1958; b[0] = 0.2066; b[1] = 0.4131; b[2] = 0.2066; double inp1 = 0.; double inp2 = 0.; double out1 = 0.; double out2 = 0.; // forwards filtering for (unsigned int i = 0;i < df.size();i++) { lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; inp2 = inp1; inp1 = df[i]; out2 = out1; out1 = lp_df[i]; } // copy forwards filtering to df... // but, time-reversed, ready for backwards filtering for (unsigned int i = 0;i < df.size();i++) { df[i] = lp_df[df.size()-i-1]; } for (unsigned int i = 0;i < df.size();i++) { lp_df[i] = 0.; } inp1 = 0.; inp2 = 0.; out1 = 0.; out2 = 0.; // backwards filetering on time-reversed df for (unsigned int i = 0;i < df.size();i++) { lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; inp2 = inp1; inp1 = df[i]; out2 = out1; out1 = lp_df[i]; } // write the re-reversed (i.e. forward) version back to df for (unsigned int i = 0;i < df.size();i++) { df[i] = lp_df[df.size()-i-1]; } } // MEPD 28/11/12 // This function now allows for a user to specify an inputtempo (in BPM) // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities // with a gaussian which is centered around the input tempo // Note, if inputtempo = 120 and constraintempo = false, then functionality is // as it was before void TempoTrackV2::calculateBeatPeriod(const vector<double> &df, vector<double> &beat_period, vector<double> &tempi, double inputtempo, bool constraintempo) { // to follow matlab.. split into 512 sample frames with a 128 hop size // calculate the acf, // then the rcf.. and then stick the rcfs as columns of a matrix // then call viterbi decoding with weight vector and transition matrix // and get best path unsigned int wv_len = 128; // MEPD 28/11/12 // the default value of inputtempo in the beat tracking plugin is 120 // so if the user specifies a different inputtempo, the rayparam will be updated // accordingly. // note: 60*44100/512 is a magic number // this might (will?) break if a user specifies a different frame rate for the onset detection function double rayparam = (60*44100/512)/inputtempo; // these debug statements can be removed. std::cout << "inputtempo" << inputtempo << std::endl; std::cout << "rayparam" << rayparam << std::endl; std::cout << "constraintempo" << constraintempo << std::endl; // make rayleigh weighting curve d_vec_t wv(wv_len); // check whether or not to use rayleigh weighting (if constraintempo is false) // or use gaussian weighting it (constraintempo is true) if (constraintempo) { for (unsigned int i=0; i<wv.size(); i++) { // MEPD 28/11/12 // do a gaussian weighting instead of rayleigh wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) ); } } else { for (unsigned int i=0; i<wv.size(); i++) { // MEPD 28/11/12 // standard rayleigh weighting over periodicities wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.))); } } // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds) unsigned int winlen = 512; unsigned int step = 128; // matrix to store output of comb filter bank, increment column of matrix at each frame d_mat_t rcfmat; int col_counter = -1; // main loop for beat period calculation for (unsigned int i=0; i+winlen<df.size(); i+=step) { // get dfframe d_vec_t dfframe(winlen); for (unsigned int k=0; k<winlen; k++) { dfframe[k] = df[i+k]; } // get rcf vector for current frame d_vec_t rcf(wv_len); get_rcf(dfframe,wv,rcf); rcfmat.push_back( d_vec_t() ); // adds a new column col_counter++; for (unsigned int j=0; j<rcf.size(); j++) { rcfmat[col_counter].push_back( rcf[j] ); } } // now call viterbi decoding function viterbi_decode(rcfmat,wv,beat_period,tempi); } void TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf) { // calculate autocorrelation function // then rcf // just hard code for now... don't really need separate functions to do this // make acf d_vec_t dfframe(dfframe_in); MathUtilities::adaptiveThreshold(dfframe); d_vec_t acf(dfframe.size()); for (unsigned int lag=0; lag<dfframe.size(); lag++) { double sum = 0.; double tmp = 0.; for (unsigned int n=0; n<(dfframe.size()-lag); n++) { tmp = dfframe[n] * dfframe[n+lag]; sum += tmp; } acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); } // now apply comb filtering int numelem = 4; for (unsigned int i = 2;i < rcf.size();i++) // max beat period { for (int a = 1;a <= numelem;a++) // number of comb elements { for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements { rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row } } } // apply adaptive threshold to rcf MathUtilities::adaptiveThreshold(rcf); double rcfsum =0.; for (unsigned int i=0; i<rcf.size(); i++) { rcf[i] += EPS ; rcfsum += rcf[i]; } // normalise rcf to sum to unity for (unsigned int i=0; i<rcf.size(); i++) { rcf[i] /= (rcfsum + EPS); } } void TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi) { // following Kevin Murphy's Viterbi decoding to get best path of // beat periods through rfcmat // make transition matrix d_mat_t tmat; for (unsigned int i=0;i<wv.size();i++) { tmat.push_back ( d_vec_t() ); // adds a new column for (unsigned int j=0; j<wv.size(); j++) { tmat[i].push_back(0.); // fill with zeros initially } } // variance of Gaussians in transition matrix // formed of Gaussians on diagonal - implies slow tempo change double sigma = 8.; // don't want really short beat periods, or really long ones for (unsigned int i=20;i <wv.size()-20; i++) { for (unsigned int j=20; j<wv.size()-20; j++) { double mu = static_cast<double>(i); tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); } } // parameters for Viterbi decoding... this part is taken from // Murphy's matlab d_mat_t delta; i_mat_t psi; for (unsigned int i=0;i <rcfmat.size(); i++) { delta.push_back( d_vec_t()); psi.push_back( i_vec_t()); for (unsigned int j=0; j<rcfmat[i].size(); j++) { delta[i].push_back(0.); // fill with zeros initially psi[i].push_back(0); // fill with zeros initially } } unsigned int T = delta.size(); if (T < 2) return; // can't do anything at all meaningful unsigned int Q = delta[0].size(); // initialize first column of delta for (unsigned int j=0; j<Q; j++) { delta[0][j] = wv[j] * rcfmat[0][j]; psi[0][j] = 0; } double deltasum = 0.; for (unsigned int i=0; i<Q; i++) { deltasum += delta[0][i]; } for (unsigned int i=0; i<Q; i++) { delta[0][i] /= (deltasum + EPS); } for (unsigned int t=1; t<T; t++) { d_vec_t tmp_vec(Q); for (unsigned int j=0; j<Q; j++) { for (unsigned int i=0; i<Q; i++) { tmp_vec[i] = delta[t-1][i] * tmat[j][i]; } delta[t][j] = get_max_val(tmp_vec); psi[t][j] = get_max_ind(tmp_vec); delta[t][j] *= rcfmat[t][j]; } // normalise current delta column double deltasum = 0.; for (unsigned int i=0; i<Q; i++) { deltasum += delta[t][i]; } for (unsigned int i=0; i<Q; i++) { delta[t][i] /= (deltasum + EPS); } } i_vec_t bestpath(T); d_vec_t tmp_vec(Q); for (unsigned int i=0; i<Q; i++) { tmp_vec[i] = delta[T-1][i]; } // find starting point - best beat period for "last" frame bestpath[T-1] = get_max_ind(tmp_vec); // backtrace through index of maximum values in psi for (unsigned int t=T-2; t>0 ;t--) { bestpath[t] = psi[t+1][bestpath[t+1]]; } // weird but necessary hack -- couldn't get above loop to terminate at t >= 0 bestpath[0] = psi[1][bestpath[1]]; unsigned int lastind = 0; for (unsigned int i=0; i<T; i++) { unsigned int step = 128; for (unsigned int j=0; j<step; j++) { lastind = i*step+j; beat_period[lastind] = bestpath[i]; } // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl; } //fill in the last values... for (unsigned int i=lastind; i<beat_period.size(); i++) { beat_period[i] = beat_period[lastind]; } for (unsigned int i = 0; i < beat_period.size(); i++) { tempi.push_back((60. * m_rate / m_increment)/beat_period[i]); } } double TempoTrackV2::get_max_val(const d_vec_t &df) { double maxval = 0.; for (unsigned int i=0; i<df.size(); i++) { if (maxval < df[i]) { maxval = df[i]; } } return maxval; } int TempoTrackV2::get_max_ind(const d_vec_t &df) { double maxval = 0.; int ind = 0; for (unsigned int i=0; i<df.size(); i++) { if (maxval < df[i]) { maxval = df[i]; ind = i; } } return ind; } void TempoTrackV2::normalise_vec(d_vec_t &df) { double sum = 0.; for (unsigned int i=0; i<df.size(); i++) { sum += df[i]; } for (unsigned int i=0; i<df.size(); i++) { df[i]/= (sum + EPS); } } // MEPD 28/11/12 // this function has been updated to allow the "alpha" and "tightness" parameters // of the dynamic program to be set by the user // the default value of alpha = 0.9 and tightness = 4 void TempoTrackV2::calculateBeats(const vector<double> &df, const vector<double> &beat_period, vector<double> &beats, double alpha, double tightness) { if (df.empty() || beat_period.empty()) return; d_vec_t cumscore(df.size()); // store cumulative score i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant) d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function for (unsigned int i=0; i<df.size(); i++) { localscore[i] = df[i]; backlink[i] = -1; } //double tightness = 4.; //double alpha = 0.9; // MEPD 28/11/12 // debug statements that can be removed. std::cout << "alpha" << alpha << std::endl; std::cout << "tightness" << tightness << std::endl; // main loop for (unsigned int i=0; i<localscore.size(); i++) { int prange_min = -2*beat_period[i]; int prange_max = round(-0.5*beat_period[i]); // transition range d_vec_t txwt (prange_max - prange_min + 1); d_vec_t scorecands (txwt.size()); for (unsigned int j=0;j<txwt.size();j++) { double mu = static_cast<double> (beat_period[i]); txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2)); // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE()); int cscore_ind = i+prange_min+j; if (cscore_ind >= 0) { scorecands[j] = txwt[j] * cumscore[cscore_ind]; } } // find max value and index of maximum value double vv = get_max_val(scorecands); int xx = get_max_ind(scorecands); cumscore[i] = alpha*vv + (1.-alpha)*localscore[i]; backlink[i] = i+prange_min+xx; // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl; } // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR d_vec_t tmp_vec; for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) { tmp_vec.push_back(cumscore[i]); } int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ; // can happen if no results obtained earlier (e.g. input too short) if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1; // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE) // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0 i_vec_t ibeats; ibeats.push_back(startpoint); // std::cerr << "startpoint = " << startpoint << std::endl; while (backlink[ibeats.back()] > 0) { // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl; int b = ibeats.back(); if (backlink[b] == b) break; // shouldn't happen... haha ibeats.push_back(backlink[b]); } // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS for (unsigned int i=0; i<ibeats.size(); i++) { beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) ); } }