Mercurial > hg > btrack
comparison src/BTrack.cpp @ 117:ca2d83d29814 tip master
Merge branch 'release/1.0.5'
author | Adam Stark <adamstark.uk@gmail.com> |
---|---|
date | Fri, 18 Aug 2023 20:07:33 +0200 |
parents | 54c657d621dd |
children |
comparison
equal
deleted
inserted
replaced
96:c58f01834337 | 117:ca2d83d29814 |
---|---|
19 */ | 19 */ |
20 //======================================================================= | 20 //======================================================================= |
21 | 21 |
22 #include <cmath> | 22 #include <cmath> |
23 #include <algorithm> | 23 #include <algorithm> |
24 #include <numeric> | |
24 #include "BTrack.h" | 25 #include "BTrack.h" |
25 #include "samplerate.h" | 26 #include "samplerate.h" |
26 #include <iostream> | 27 #include <iostream> |
27 | 28 |
28 //======================================================================= | 29 //======================================================================= |
29 BTrack::BTrack() | 30 BTrack::BTrack() |
30 : odf (512, 1024, ComplexSpectralDifferenceHWR, HanningWindow) | 31 : odf (512, 1024, ComplexSpectralDifferenceHWR, HanningWindow) |
31 { | 32 { |
32 initialise (512, 1024); | 33 initialise (512); |
33 } | 34 } |
34 | 35 |
35 //======================================================================= | 36 //======================================================================= |
36 BTrack::BTrack (int hopSize_) | 37 BTrack::BTrack (int hop) |
37 : odf(hopSize_, 2*hopSize_, ComplexSpectralDifferenceHWR, HanningWindow) | 38 : odf (hop, 2 * hop, ComplexSpectralDifferenceHWR, HanningWindow) |
38 { | 39 { |
39 initialise (hopSize_, 2*hopSize_); | 40 initialise (hop); |
40 } | 41 } |
41 | 42 |
42 //======================================================================= | 43 //======================================================================= |
43 BTrack::BTrack (int hopSize_, int frameSize_) | 44 BTrack::BTrack (int hop, int frame) |
44 : odf (hopSize_, frameSize_, ComplexSpectralDifferenceHWR, HanningWindow) | 45 : odf (hop, frame, ComplexSpectralDifferenceHWR, HanningWindow) |
45 { | 46 { |
46 initialise (hopSize_, frameSize_); | 47 initialise (hop); |
47 } | 48 } |
48 | 49 |
49 //======================================================================= | 50 //======================================================================= |
50 BTrack::~BTrack() | 51 BTrack::~BTrack() |
51 { | 52 { |
64 delete [] fftOut; | 65 delete [] fftOut; |
65 #endif | 66 #endif |
66 } | 67 } |
67 | 68 |
68 //======================================================================= | 69 //======================================================================= |
69 double BTrack::getBeatTimeInSeconds (long frameNumber, int hopSize, int fs) | 70 double BTrack::getBeatTimeInSeconds (long frameNumber, int hopSize, int samplingFrequency) |
70 { | 71 { |
71 double hop = (double) hopSize; | 72 return ((static_cast<double> (hopSize) / static_cast<double> (samplingFrequency)) * static_cast<double> (frameNumber)); |
72 double samplingFrequency = (double) fs; | 73 } |
73 double frameNum = (double) frameNumber; | 74 |
74 | 75 //======================================================================= |
75 return ((hop / samplingFrequency) * frameNum); | 76 void BTrack::initialise (int hop) |
76 } | 77 { |
77 | 78 // set vector sizes |
78 //======================================================================= | 79 resampledOnsetDF.resize (512); |
79 double BTrack::getBeatTimeInSeconds (int frameNumber, int hopSize, int fs) | 80 acf.resize (512); |
80 { | 81 weightingVector.resize (128); |
81 long frameNum = (long) frameNumber; | 82 combFilterBankOutput.resize (128); |
82 | 83 tempoObservationVector.resize (41); |
83 return getBeatTimeInSeconds (frameNum, hopSize, fs); | 84 delta.resize (41); |
84 } | 85 prevDelta.resize (41); |
85 | 86 prevDeltaFixed.resize (41); |
86 | 87 |
87 | 88 double rayleighParameter = 43; |
88 //======================================================================= | |
89 void BTrack::initialise (int hopSize_, int frameSize_) | |
90 { | |
91 double rayparam = 43; | |
92 double pi = 3.14159265; | |
93 | |
94 | 89 |
95 // initialise parameters | 90 // initialise parameters |
96 tightness = 5; | 91 tightness = 5; |
97 alpha = 0.9; | 92 alpha = 0.9; |
98 tempo = 120; | |
99 estimatedTempo = 120.0; | 93 estimatedTempo = 120.0; |
100 tempoToLagFactor = 60.*44100./512.; | 94 |
101 | 95 timeToNextPrediction = 10; |
102 m0 = 10; | 96 timeToNextBeat = -1; |
103 beatCounter = -1; | |
104 | 97 |
105 beatDueInFrame = false; | 98 beatDueInFrame = false; |
106 | 99 |
107 | 100 |
108 // create rayleigh weighting vector | 101 // create rayleigh weighting vector |
109 for (int n = 0; n < 128; n++) | 102 for (int n = 0; n < 128; n++) |
110 { | 103 weightingVector[n] = ((double) n / pow (rayleighParameter, 2)) * exp((-1 * pow((double) - n, 2)) / (2 * pow (rayleighParameter, 2))); |
111 weightingVector[n] = ((double) n / pow(rayparam,2)) * exp((-1*pow((double)-n,2)) / (2*pow(rayparam,2))); | 104 |
112 } | 105 // initialise prevDelta |
113 | 106 std::fill (prevDelta.begin(), prevDelta.end(), 1); |
114 // initialise prev_delta | 107 |
115 for (int i = 0; i < 41; i++) | |
116 { | |
117 prevDelta[i] = 1; | |
118 } | |
119 | |
120 double t_mu = 41/2; | 108 double t_mu = 41/2; |
121 double m_sig; | 109 double m_sig; |
122 double x; | 110 double x; |
123 // create tempo transition matrix | 111 // create tempo transition matrix |
124 m_sig = 41/8; | 112 m_sig = 41/8; |
125 for (int i = 0;i < 41;i++) | 113 |
126 { | 114 for (int i = 0; i < 41; i++) |
127 for (int j = 0;j < 41;j++) | 115 { |
128 { | 116 for (int j = 0; j < 41; j++) |
129 x = j+1; | 117 { |
130 t_mu = i+1; | 118 x = j + 1; |
131 tempoTransitionMatrix[i][j] = (1 / (m_sig * sqrt(2*pi))) * exp( (-1*pow((x-t_mu),2)) / (2*pow(m_sig,2)) ); | 119 t_mu = i + 1; |
120 tempoTransitionMatrix[i][j] = (1 / (m_sig * sqrt (2 * M_PI))) * exp((-1 * pow ((x - t_mu), 2)) / (2 * pow (m_sig, 2)) ); | |
132 } | 121 } |
133 } | 122 } |
134 | 123 |
135 // tempo is not fixed | 124 // tempo is not fixed |
136 tempoFixed = false; | 125 tempoFixed = false; |
137 | 126 |
138 // initialise latest cumulative score value | |
139 // in case it is requested before any processing takes place | |
140 latestCumulativeScoreValue = 0; | |
141 | |
142 // initialise algorithm given the hopsize | 127 // initialise algorithm given the hopsize |
143 setHopSize(hopSize_); | 128 setHopSize (hop); |
144 | |
145 | 129 |
146 // Set up FFT for calculating the auto-correlation function | 130 // Set up FFT for calculating the auto-correlation function |
147 FFTLengthForACFCalculation = 1024; | 131 FFTLengthForACFCalculation = 1024; |
148 | 132 |
149 #ifdef USE_FFTW | 133 #ifdef USE_FFTW |
161 cfgBackwards = kiss_fft_alloc (FFTLengthForACFCalculation, 1, 0, 0); | 145 cfgBackwards = kiss_fft_alloc (FFTLengthForACFCalculation, 1, 0, 0); |
162 #endif | 146 #endif |
163 } | 147 } |
164 | 148 |
165 //======================================================================= | 149 //======================================================================= |
166 void BTrack::setHopSize (int hopSize_) | 150 void BTrack::setHopSize (int hop) |
167 { | 151 { |
168 hopSize = hopSize_; | 152 hopSize = hop; |
169 onsetDFBufferSize = (512*512)/hopSize; // calculate df buffer size | 153 onsetDFBufferSize = (512 * 512) / hopSize; // calculate df buffer size |
170 | 154 beatPeriod = round (60 / ((((double) hopSize) / 44100) * 120.)); |
171 beatPeriod = round(60/((((double) hopSize)/44100)*tempo)); | |
172 | 155 |
173 // set size of onset detection function buffer | 156 // set size of onset detection function buffer |
174 onsetDF.resize (onsetDFBufferSize); | 157 onsetDF.resize (onsetDFBufferSize); |
175 | 158 |
176 // set size of cumulative score buffer | 159 // set size of cumulative score buffer |
188 } | 171 } |
189 } | 172 } |
190 } | 173 } |
191 | 174 |
192 //======================================================================= | 175 //======================================================================= |
193 void BTrack::updateHopAndFrameSize (int hopSize_, int frameSize_) | 176 void BTrack::updateHopAndFrameSize (int hop, int frame) |
194 { | 177 { |
195 // update the onset detection function object | 178 // update the onset detection function object |
196 odf.initialise (hopSize_, frameSize_); | 179 odf.initialise (hop, frame); |
197 | 180 |
198 // update the hop size being used by the beat tracker | 181 // update the hop size being used by the beat tracker |
199 setHopSize (hopSize_); | 182 setHopSize (hop); |
200 } | 183 } |
201 | 184 |
202 //======================================================================= | 185 //======================================================================= |
203 bool BTrack::beatDueInCurrentFrame() | 186 bool BTrack::beatDueInCurrentFrame() |
204 { | 187 { |
218 } | 201 } |
219 | 202 |
220 //======================================================================= | 203 //======================================================================= |
221 double BTrack::getLatestCumulativeScoreValue() | 204 double BTrack::getLatestCumulativeScoreValue() |
222 { | 205 { |
223 return latestCumulativeScoreValue; | 206 return cumulativeScore[cumulativeScore.size() - 1]; |
224 } | 207 } |
225 | 208 |
226 //======================================================================= | 209 //======================================================================= |
227 void BTrack::processAudioFrame (double* frame) | 210 void BTrack::processAudioFrame (double* frame) |
228 { | 211 { |
242 | 225 |
243 // add a tiny constant to the sample to stop it from ever going | 226 // add a tiny constant to the sample to stop it from ever going |
244 // to zero. this is to avoid problems further down the line | 227 // to zero. this is to avoid problems further down the line |
245 newSample = newSample + 0.0001; | 228 newSample = newSample + 0.0001; |
246 | 229 |
247 m0--; | 230 timeToNextPrediction--; |
248 beatCounter--; | 231 timeToNextBeat--; |
249 beatDueInFrame = false; | 232 beatDueInFrame = false; |
250 | 233 |
251 // add new sample at the end | 234 // add new sample at the end |
252 onsetDF.addSampleToEnd (newSample); | 235 onsetDF.addSampleToEnd (newSample); |
253 | 236 |
254 // update cumulative score | 237 // update cumulative score |
255 updateCumulativeScore (newSample); | 238 updateCumulativeScore (newSample); |
256 | 239 |
257 // if we are halfway between beats | 240 // if we are halfway between beats, predict a beat |
258 if (m0 == 0) | 241 if (timeToNextPrediction == 0) |
259 { | 242 predictBeat(); |
260 predictBeat(); | |
261 } | |
262 | 243 |
263 // if we are at a beat | 244 // if we are at a beat |
264 if (beatCounter == 0) | 245 if (timeToNextBeat == 0) |
265 { | 246 { |
266 beatDueInFrame = true; // indicate a beat should be output | 247 beatDueInFrame = true; // indicate a beat should be output |
267 | 248 |
268 // recalculate the tempo | 249 // recalculate the tempo |
269 resampleOnsetDetectionFunction(); | 250 resampleOnsetDetectionFunction(); |
271 } | 252 } |
272 } | 253 } |
273 | 254 |
274 //======================================================================= | 255 //======================================================================= |
275 void BTrack::setTempo (double tempo) | 256 void BTrack::setTempo (double tempo) |
276 { | 257 { |
277 | |
278 /////////// TEMPO INDICATION RESET ////////////////// | 258 /////////// TEMPO INDICATION RESET ////////////////// |
279 | 259 |
280 // firstly make sure tempo is between 80 and 160 bpm.. | 260 // firstly make sure tempo is between 80 and 160 bpm.. |
281 while (tempo > 160) | 261 while (tempo > 160) |
282 { | 262 tempo = tempo / 2; |
283 tempo = tempo/2; | |
284 } | |
285 | 263 |
286 while (tempo < 80) | 264 while (tempo < 80) |
287 { | 265 tempo = tempo * 2; |
288 tempo = tempo * 2; | |
289 } | |
290 | 266 |
291 // convert tempo from bpm value to integer index of tempo probability | 267 // convert tempo from bpm value to integer index of tempo probability |
292 int tempo_index = (int) round((tempo - 80)/2); | 268 int tempoIndex = (int) round ((tempo - 80.) / 2); |
293 | 269 |
294 // now set previous tempo observations to zero | 270 // now set previous tempo observations to zero and set desired tempo index to 1 |
295 for (int i=0;i < 41;i++) | 271 std::fill (prevDelta.begin(), prevDelta.end(), 0); |
296 { | 272 prevDelta[tempoIndex] = 1; |
297 prevDelta[i] = 0; | |
298 } | |
299 | |
300 // set desired tempo index to 1 | |
301 prevDelta[tempo_index] = 1; | |
302 | |
303 | 273 |
304 /////////// CUMULATIVE SCORE ARTIFICAL TEMPO UPDATE ////////////////// | 274 /////////// CUMULATIVE SCORE ARTIFICAL TEMPO UPDATE ////////////////// |
305 | 275 |
306 // calculate new beat period | 276 // calculate new beat period |
307 int new_bperiod = (int) round(60/((((double) hopSize)/44100)*tempo)); | 277 int newBeatPeriod = (int) round (60 / ((((double) hopSize) / 44100) * tempo)); |
308 | 278 |
309 int bcounter = 1; | 279 int k = 1; |
310 // initialise df_buffer to zeros | 280 |
311 for (int i = (onsetDFBufferSize-1);i >= 0;i--) | 281 // initialise onset detection function with delta functions spaced |
312 { | 282 // at the new beat period |
313 if (bcounter == 1) | 283 for (int i = onsetDFBufferSize - 1; i >= 0; i--) |
284 { | |
285 if (k == 1) | |
314 { | 286 { |
315 cumulativeScore[i] = 150; | 287 cumulativeScore[i] = 150; |
316 onsetDF[i] = 150; | 288 onsetDF[i] = 150; |
317 } | 289 } |
318 else | 290 else |
319 { | 291 { |
320 cumulativeScore[i] = 10; | 292 cumulativeScore[i] = 10; |
321 onsetDF[i] = 10; | 293 onsetDF[i] = 10; |
322 } | 294 } |
323 | 295 |
324 bcounter++; | 296 k++; |
325 | 297 |
326 if (bcounter > new_bperiod) | 298 if (k > newBeatPeriod) |
327 { | 299 { |
328 bcounter = 1; | 300 k = 1; |
329 } | 301 } |
330 } | 302 } |
331 | 303 |
332 /////////// INDICATE THAT THIS IS A BEAT ////////////////// | 304 /////////// INDICATE THAT THIS IS A BEAT ////////////////// |
333 | 305 |
334 // beat is now | 306 // beat is now |
335 beatCounter = 0; | 307 timeToNextBeat = 0; |
336 | 308 |
337 // offbeat is half of new beat period away | 309 // next prediction is on the offbeat, so half of new beat period away |
338 m0 = (int) round(((double) new_bperiod)/2); | 310 timeToNextPrediction = (int) round (((double) newBeatPeriod) / 2); |
339 } | 311 } |
340 | 312 |
341 //======================================================================= | 313 //======================================================================= |
342 void BTrack::fixTempo (double tempo) | 314 void BTrack::fixTempo (double tempo) |
343 { | 315 { |
344 // firstly make sure tempo is between 80 and 160 bpm.. | 316 // firstly make sure tempo is between 80 and 160 bpm.. |
345 while (tempo > 160) | 317 while (tempo > 160) |
346 { | 318 tempo = tempo / 2; |
347 tempo = tempo/2; | |
348 } | |
349 | 319 |
350 while (tempo < 80) | 320 while (tempo < 80) |
351 { | 321 tempo = tempo * 2; |
352 tempo = tempo * 2; | |
353 } | |
354 | 322 |
355 // convert tempo from bpm value to integer index of tempo probability | 323 // convert tempo from bpm value to integer index of tempo probability |
356 int tempo_index = (int) round((tempo - 80)/2); | 324 int tempoIndex = (int) round((tempo - 80) / 2); |
357 | 325 |
358 // now set previous fixed previous tempo observation values to zero | 326 // now set previous fixed previous tempo observation values to zero |
359 for (int i=0;i < 41;i++) | 327 for (int i = 0; i < 41; i++) |
360 { | 328 { |
361 prevDeltaFixed[i] = 0; | 329 prevDeltaFixed[i] = 0; |
362 } | 330 } |
363 | 331 |
364 // set desired tempo index to 1 | 332 // set desired tempo index to 1 |
365 prevDeltaFixed[tempo_index] = 1; | 333 prevDeltaFixed[tempoIndex] = 1; |
366 | 334 |
367 // set the tempo fix flag | 335 // set the tempo fix flag |
368 tempoFixed = true; | 336 tempoFixed = true; |
369 } | 337 } |
370 | 338 |
377 | 345 |
378 //======================================================================= | 346 //======================================================================= |
379 void BTrack::resampleOnsetDetectionFunction() | 347 void BTrack::resampleOnsetDetectionFunction() |
380 { | 348 { |
381 float output[512]; | 349 float output[512]; |
382 | |
383 float input[onsetDFBufferSize]; | 350 float input[onsetDFBufferSize]; |
384 | 351 |
385 for (int i = 0;i < onsetDFBufferSize;i++) | 352 for (int i = 0; i < onsetDFBufferSize; i++) |
386 { | |
387 input[i] = (float) onsetDF[i]; | 353 input[i] = (float) onsetDF[i]; |
388 } | 354 |
389 | 355 double ratio = 512.0 / ((double) onsetDFBufferSize); |
390 double src_ratio = 512.0/((double) onsetDFBufferSize); | 356 int bufferLength = onsetDFBufferSize; |
391 int BUFFER_LEN = onsetDFBufferSize; | 357 int outputLength = 512; |
392 int output_len; | 358 |
393 SRC_DATA src_data ; | 359 SRC_DATA src_data; |
394 | |
395 //output_len = (int) floor (((double) BUFFER_LEN) * src_ratio) ; | |
396 output_len = 512; | |
397 | |
398 src_data.data_in = input; | 360 src_data.data_in = input; |
399 src_data.input_frames = BUFFER_LEN; | 361 src_data.input_frames = bufferLength; |
400 | 362 src_data.src_ratio = ratio; |
401 src_data.src_ratio = src_ratio; | |
402 | |
403 src_data.data_out = output; | 363 src_data.data_out = output; |
404 src_data.output_frames = output_len; | 364 src_data.output_frames = outputLength; |
405 | 365 |
406 src_simple (&src_data, SRC_SINC_BEST_QUALITY, 1); | 366 src_simple (&src_data, SRC_SINC_BEST_QUALITY, 1); |
407 | 367 |
408 for (int i = 0;i < output_len;i++) | 368 for (int i = 0; i < outputLength; i++) |
409 { | |
410 resampledOnsetDF[i] = (double) src_data.data_out[i]; | 369 resampledOnsetDF[i] = (double) src_data.data_out[i]; |
411 } | |
412 } | 370 } |
413 | 371 |
414 //======================================================================= | 372 //======================================================================= |
415 void BTrack::calculateTempo() | 373 void BTrack::calculateTempo() |
416 { | 374 { |
375 double tempoToLagFactor = 60. * 44100. / 512.; | |
376 | |
417 // adaptive threshold on input | 377 // adaptive threshold on input |
418 adaptiveThreshold (resampledOnsetDF,512); | 378 adaptiveThreshold (resampledOnsetDF); |
419 | 379 |
420 // calculate auto-correlation function of detection function | 380 // calculate auto-correlation function of detection function |
421 calculateBalancedACF (resampledOnsetDF); | 381 calculateBalancedACF (resampledOnsetDF); |
422 | 382 |
423 // calculate output of comb filterbank | 383 // calculate output of comb filterbank |
424 calculateOutputOfCombFilterBank(); | 384 calculateOutputOfCombFilterBank(); |
425 | 385 |
426 // adaptive threshold on rcf | 386 // adaptive threshold on rcf |
427 adaptiveThreshold (combFilterBankOutput,128); | 387 adaptiveThreshold (combFilterBankOutput); |
428 | 388 |
429 | |
430 int t_index; | |
431 int t_index2; | |
432 // calculate tempo observation vector from beat period observation vector | 389 // calculate tempo observation vector from beat period observation vector |
433 for (int i = 0;i < 41;i++) | 390 for (int i = 0; i < 41; i++) |
434 { | 391 { |
435 t_index = (int) round (tempoToLagFactor / ((double) ((2*i)+80))); | 392 int tempoIndex1 = (int) round (tempoToLagFactor / ((double) ((2 * i) + 80))); |
436 t_index2 = (int) round (tempoToLagFactor / ((double) ((4*i)+160))); | 393 int tempoIndex2 = (int) round (tempoToLagFactor / ((double) ((4 * i) + 160))); |
437 | 394 tempoObservationVector[i] = combFilterBankOutput[tempoIndex1 - 1] + combFilterBankOutput[tempoIndex2 - 1]; |
438 | 395 } |
439 tempoObservationVector[i] = combFilterBankOutput[t_index-1] + combFilterBankOutput[t_index2-1]; | |
440 } | |
441 | |
442 | |
443 double maxval; | |
444 double maxind; | |
445 double curval; | |
446 | 396 |
447 // if tempo is fixed then always use a fixed set of tempi as the previous observation probability function | 397 // if tempo is fixed then always use a fixed set of tempi as the previous observation probability function |
448 if (tempoFixed) | 398 if (tempoFixed) |
449 { | 399 { |
450 for (int k = 0;k < 41;k++) | 400 for (int k = 0; k < 41; k++) |
451 { | 401 prevDelta[k] = prevDeltaFixed[k]; |
452 prevDelta[k] = prevDeltaFixed[k]; | 402 } |
403 | |
404 for (int j = 0; j < 41; j++) | |
405 { | |
406 double maxValue = -1; | |
407 | |
408 for (int i = 0; i < 41; i++) | |
409 { | |
410 double currentValue = prevDelta[i] * tempoTransitionMatrix[i][j]; | |
411 | |
412 if (currentValue > maxValue) | |
413 maxValue = currentValue; | |
453 } | 414 } |
454 } | 415 |
455 | 416 delta[j] = maxValue * tempoObservationVector[j]; |
456 for (int j=0;j < 41;j++) | 417 } |
457 { | 418 |
458 maxval = -1; | 419 normaliseVector (delta); |
459 for (int i = 0;i < 41;i++) | 420 |
460 { | 421 double maxIndex = -1; |
461 curval = prevDelta[i] * tempoTransitionMatrix[i][j]; | 422 double maxValue = -1; |
462 | 423 |
463 if (curval > maxval) | 424 for (int j = 0; j < 41; j++) |
425 { | |
426 if (delta[j] > maxValue) | |
427 { | |
428 maxValue = delta[j]; | |
429 maxIndex = j; | |
430 } | |
431 | |
432 prevDelta[j] = delta[j]; | |
433 } | |
434 | |
435 beatPeriod = round ((60.0 * 44100.0) / (((2 * maxIndex) + 80) * ((double) hopSize))); | |
436 | |
437 if (beatPeriod > 0) | |
438 estimatedTempo = 60.0 / ((((double) hopSize) / 44100.0) * beatPeriod); | |
439 } | |
440 | |
441 //======================================================================= | |
442 void BTrack::adaptiveThreshold (std::vector<double>& x) | |
443 { | |
444 int N = static_cast<int> (x.size()); | |
445 double threshold[N]; | |
446 | |
447 int p_post = 7; | |
448 int p_pre = 8; | |
449 | |
450 int t = std::min (N, p_post); // what is smaller, p_post or df size. This is to avoid accessing outside of arrays | |
451 | |
452 // find threshold for first 't' samples, where a full average cannot be computed yet | |
453 for (int i = 0; i <= t; i++) | |
454 { | |
455 int k = std::min ((i + p_pre), N); | |
456 threshold[i] = calculateMeanOfVector (x, 1, k); | |
457 } | |
458 | |
459 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post] | |
460 for (int i = t + 1; i < N - p_post; i++) | |
461 { | |
462 threshold[i] = calculateMeanOfVector (x, i - p_pre, i + p_post); | |
463 } | |
464 | |
465 // for last few samples calculate threshold, again, not enough samples to do as above | |
466 for (int i = N - p_post; i < N; i++) | |
467 { | |
468 int k = std::max ((i - p_post), 1); | |
469 threshold[i] = calculateMeanOfVector (x, k, N); | |
470 } | |
471 | |
472 // subtract the threshold from the detection function and check that it is not less than 0 | |
473 for (int i = 0; i < N; i++) | |
474 { | |
475 x[i] = x[i] - threshold[i]; | |
476 | |
477 if (x[i] < 0) | |
478 x[i] = 0; | |
479 } | |
480 } | |
481 | |
482 //======================================================================= | |
483 void BTrack::calculateOutputOfCombFilterBank() | |
484 { | |
485 std::fill (combFilterBankOutput.begin(), combFilterBankOutput.end(), 0.0); | |
486 int numCombElements = 4; | |
487 | |
488 for (int i = 2; i <= 127; i++) // max beat period | |
489 { | |
490 for (int a = 1; a <= numCombElements; a++) // number of comb elements | |
491 { | |
492 for (int b = 1 - a; b <= a - 1; b++) // general state using normalisation of comb elements | |
464 { | 493 { |
465 maxval = curval; | 494 combFilterBankOutput[i - 1] += (acf[(a * i + b) - 1] * weightingVector[i - 1]) / (2 * a - 1); // calculate value for comb filter row |
466 } | 495 } |
467 } | 496 } |
468 | 497 } |
469 delta[j] = maxval * tempoObservationVector[j]; | 498 } |
470 } | 499 |
471 | 500 //======================================================================= |
472 | 501 void BTrack::calculateBalancedACF (std::vector<double>& onsetDetectionFunction) |
473 normaliseArray(delta,41); | |
474 | |
475 maxind = -1; | |
476 maxval = -1; | |
477 | |
478 for (int j=0;j < 41;j++) | |
479 { | |
480 if (delta[j] > maxval) | |
481 { | |
482 maxval = delta[j]; | |
483 maxind = j; | |
484 } | |
485 | |
486 prevDelta[j] = delta[j]; | |
487 } | |
488 | |
489 beatPeriod = round ((60.0*44100.0)/(((2*maxind)+80)*((double) hopSize))); | |
490 | |
491 if (beatPeriod > 0) | |
492 { | |
493 estimatedTempo = 60.0/((((double) hopSize) / 44100.0) * beatPeriod); | |
494 } | |
495 } | |
496 | |
497 //======================================================================= | |
498 void BTrack::adaptiveThreshold (double* x, int N) | |
499 { | |
500 int i = 0; | |
501 int k,t = 0; | |
502 double x_thresh[N]; | |
503 | |
504 int p_post = 7; | |
505 int p_pre = 8; | |
506 | |
507 t = std::min(N,p_post); // what is smaller, p_post of df size. This is to avoid accessing outside of arrays | |
508 | |
509 // find threshold for first 't' samples, where a full average cannot be computed yet | |
510 for (i = 0;i <= t;i++) | |
511 { | |
512 k = std::min ((i+p_pre),N); | |
513 x_thresh[i] = calculateMeanOfArray (x,1,k); | |
514 } | |
515 // find threshold for bulk of samples across a moving average from [i-p_pre,i+p_post] | |
516 for (i = t+1;i < N-p_post;i++) | |
517 { | |
518 x_thresh[i] = calculateMeanOfArray (x,i-p_pre,i+p_post); | |
519 } | |
520 // for last few samples calculate threshold, again, not enough samples to do as above | |
521 for (i = N-p_post;i < N;i++) | |
522 { | |
523 k = std::max ((i-p_post),1); | |
524 x_thresh[i] = calculateMeanOfArray (x,k,N); | |
525 } | |
526 | |
527 // subtract the threshold from the detection function and check that it is not less than 0 | |
528 for (i = 0; i < N; i++) | |
529 { | |
530 x[i] = x[i] - x_thresh[i]; | |
531 if (x[i] < 0) | |
532 { | |
533 x[i] = 0; | |
534 } | |
535 } | |
536 } | |
537 | |
538 //======================================================================= | |
539 void BTrack::calculateOutputOfCombFilterBank() | |
540 { | |
541 int numelem; | |
542 | |
543 for (int i = 0;i < 128;i++) | |
544 { | |
545 combFilterBankOutput[i] = 0; | |
546 } | |
547 | |
548 numelem = 4; | |
549 | |
550 for (int i = 2; i <= 127; i++) // max beat period | |
551 { | |
552 for (int a = 1; a <= numelem; a++) // number of comb elements | |
553 { | |
554 for (int b = 1-a; b <= a-1; b++) // general state using normalisation of comb elements | |
555 { | |
556 combFilterBankOutput[i-1] = combFilterBankOutput[i-1] + (acf[(a*i+b)-1]*weightingVector[i-1])/(2*a-1); // calculate value for comb filter row | |
557 } | |
558 } | |
559 } | |
560 } | |
561 | |
562 //======================================================================= | |
563 void BTrack::calculateBalancedACF (double* onsetDetectionFunction) | |
564 { | 502 { |
565 int onsetDetectionFunctionLength = 512; | 503 int onsetDetectionFunctionLength = 512; |
566 | 504 |
567 #ifdef USE_FFTW | 505 #ifdef USE_FFTW |
568 // copy into complex array and zero pad | 506 // copy into complex array and zero pad |
569 for (int i = 0;i < FFTLengthForACFCalculation;i++) | 507 for (int i = 0; i < FFTLengthForACFCalculation; i++) |
570 { | 508 { |
571 if (i < onsetDetectionFunctionLength) | 509 if (i < onsetDetectionFunctionLength) |
572 { | 510 { |
573 complexIn[i][0] = onsetDetectionFunction[i]; | 511 complexIn[i][0] = onsetDetectionFunction[i]; |
574 complexIn[i][1] = 0.0; | 512 complexIn[i][1] = 0.0; |
582 | 520 |
583 // perform the fft | 521 // perform the fft |
584 fftw_execute (acfForwardFFT); | 522 fftw_execute (acfForwardFFT); |
585 | 523 |
586 // multiply by complex conjugate | 524 // multiply by complex conjugate |
587 for (int i = 0;i < FFTLengthForACFCalculation;i++) | 525 for (int i = 0; i < FFTLengthForACFCalculation; i++) |
588 { | 526 { |
589 complexOut[i][0] = complexOut[i][0]*complexOut[i][0] + complexOut[i][1]*complexOut[i][1]; | 527 complexOut[i][0] = complexOut[i][0] * complexOut[i][0] + complexOut[i][1] * complexOut[i][1]; |
590 complexOut[i][1] = 0.0; | 528 complexOut[i][1] = 0.0; |
591 } | 529 } |
592 | 530 |
593 // perform the ifft | 531 // perform the ifft |
594 fftw_execute (acfBackwardFFT); | 532 fftw_execute (acfBackwardFFT); |
595 | 533 |
596 #endif | 534 #endif |
597 | 535 |
598 #ifdef USE_KISS_FFT | 536 #ifdef USE_KISS_FFT |
599 // copy into complex array and zero pad | 537 // copy into complex array and zero pad |
600 for (int i = 0;i < FFTLengthForACFCalculation;i++) | 538 for (int i = 0; i < FFTLengthForACFCalculation; i++) |
601 { | 539 { |
602 if (i < onsetDetectionFunctionLength) | 540 if (i < onsetDetectionFunctionLength) |
603 { | 541 { |
604 fftIn[i].r = onsetDetectionFunction[i]; | 542 fftIn[i].r = onsetDetectionFunction[i]; |
605 fftIn[i].i = 0.0; | 543 fftIn[i].i = 0.0; |
613 | 551 |
614 // execute kiss fft | 552 // execute kiss fft |
615 kiss_fft (cfgForwards, fftIn, fftOut); | 553 kiss_fft (cfgForwards, fftIn, fftOut); |
616 | 554 |
617 // multiply by complex conjugate | 555 // multiply by complex conjugate |
618 for (int i = 0;i < FFTLengthForACFCalculation;i++) | 556 for (int i = 0; i < FFTLengthForACFCalculation; i++) |
619 { | 557 { |
620 fftOut[i].r = fftOut[i].r * fftOut[i].r + fftOut[i].i * fftOut[i].i; | 558 fftOut[i].r = fftOut[i].r * fftOut[i].r + fftOut[i].i * fftOut[i].i; |
621 fftOut[i].i = 0.0; | 559 fftOut[i].i = 0.0; |
622 } | 560 } |
623 | 561 |
630 | 568 |
631 for (int i = 0; i < 512; i++) | 569 for (int i = 0; i < 512; i++) |
632 { | 570 { |
633 #ifdef USE_FFTW | 571 #ifdef USE_FFTW |
634 // calculate absolute value of result | 572 // calculate absolute value of result |
635 double absValue = sqrt (complexIn[i][0]*complexIn[i][0] + complexIn[i][1]*complexIn[i][1]); | 573 double absValue = sqrt (complexIn[i][0] * complexIn[i][0] + complexIn[i][1] * complexIn[i][1]); |
636 #endif | 574 #endif |
637 | 575 |
638 #ifdef USE_KISS_FFT | 576 #ifdef USE_KISS_FFT |
639 // calculate absolute value of result | 577 // calculate absolute value of result |
640 double absValue = sqrt (fftIn[i].r * fftIn[i].r + fftIn[i].i * fftIn[i].i); | 578 double absValue = sqrt (fftIn[i].r * fftIn[i].r + fftIn[i].i * fftIn[i].i); |
650 lag = lag - 1.; | 588 lag = lag - 1.; |
651 } | 589 } |
652 } | 590 } |
653 | 591 |
654 //======================================================================= | 592 //======================================================================= |
655 double BTrack::calculateMeanOfArray (double* array, int startIndex, int endIndex) | 593 double BTrack::calculateMeanOfVector (std::vector<double>& vector, int startIndex, int endIndex) |
656 { | 594 { |
657 int i; | |
658 double sum = 0; | |
659 | |
660 int length = endIndex - startIndex; | 595 int length = endIndex - startIndex; |
661 | 596 double sum = std::accumulate (vector.begin() + startIndex, vector.begin() + endIndex, 0.0); |
662 // find sum | 597 |
663 for (i = startIndex; i < endIndex; i++) | |
664 { | |
665 sum = sum + array[i]; | |
666 } | |
667 | |
668 if (length > 0) | 598 if (length > 0) |
599 return sum / static_cast<double> (length); // average and return | |
600 else | |
601 return 0; | |
602 } | |
603 | |
604 //======================================================================= | |
605 void BTrack::normaliseVector (std::vector<double>& vector) | |
606 { | |
607 double sum = std::accumulate (vector.begin(), vector.end(), 0.0); | |
608 | |
609 if (sum > 0) | |
669 { | 610 { |
670 return sum / length; // average and return | 611 for (int i = 0; i < vector.size(); i++) |
612 vector[i] = vector[i] / sum; | |
671 } | 613 } |
672 else | 614 } |
673 { | 615 |
674 return 0; | 616 //======================================================================= |
675 } | 617 void BTrack::updateCumulativeScore (double onsetDetectionFunctionSample) |
676 } | 618 { |
677 | 619 int windowStart = onsetDFBufferSize - round (2. * beatPeriod); |
678 //======================================================================= | 620 int windowEnd = onsetDFBufferSize - round (beatPeriod / 2.); |
679 void BTrack::normaliseArray (double* array, int N) | 621 int windowSize = windowEnd - windowStart + 1; |
680 { | 622 |
681 double sum = 0; | 623 // create log gaussian transition window |
682 | 624 double logGaussianTransitionWeighting[windowSize]; |
683 for (int i = 0; i < N; i++) | 625 createLogGaussianTransitionWeighting (logGaussianTransitionWeighting, windowSize, beatPeriod); |
684 { | 626 |
685 if (array[i] > 0) | 627 // calculate the new cumulative score value |
686 { | 628 double cumulativeScoreValue = calculateNewCumulativeScoreValue (cumulativeScore, logGaussianTransitionWeighting, windowStart, windowEnd, onsetDetectionFunctionSample, alpha); |
687 sum = sum + array[i]; | 629 |
688 } | 630 // add the new cumulative score value to the buffer |
689 } | 631 cumulativeScore.addSampleToEnd (cumulativeScoreValue); |
690 | |
691 if (sum > 0) | |
692 { | |
693 for (int i = 0; i < N; i++) | |
694 { | |
695 array[i] = array[i] / sum; | |
696 } | |
697 } | |
698 } | |
699 | |
700 //======================================================================= | |
701 void BTrack::updateCumulativeScore (double odfSample) | |
702 { | |
703 int start, end, winsize; | |
704 double max; | |
705 | |
706 start = onsetDFBufferSize - round (2 * beatPeriod); | |
707 end = onsetDFBufferSize - round (beatPeriod / 2); | |
708 winsize = end-start+1; | |
709 | |
710 double w1[winsize]; | |
711 double v = -2*beatPeriod; | |
712 double wcumscore; | |
713 | |
714 // create window | |
715 for (int i = 0; i < winsize; i++) | |
716 { | |
717 w1[i] = exp((-1 * pow (tightness * log (-v / beatPeriod), 2)) / 2); | |
718 v = v+1; | |
719 } | |
720 | |
721 // calculate new cumulative score value | |
722 max = 0; | |
723 int n = 0; | |
724 for (int i=start; i <= end; i++) | |
725 { | |
726 wcumscore = cumulativeScore[i]*w1[n]; | |
727 | |
728 if (wcumscore > max) | |
729 { | |
730 max = wcumscore; | |
731 } | |
732 n++; | |
733 } | |
734 | |
735 latestCumulativeScoreValue = ((1 - alpha) * odfSample) + (alpha * max); | |
736 | |
737 cumulativeScore.addSampleToEnd (latestCumulativeScoreValue); | |
738 } | 632 } |
739 | 633 |
740 //======================================================================= | 634 //======================================================================= |
741 void BTrack::predictBeat() | 635 void BTrack::predictBeat() |
742 { | 636 { |
743 int windowSize = (int) beatPeriod; | 637 int beatExpectationWindowSize = static_cast<int> (beatPeriod); |
744 double futureCumulativeScore[onsetDFBufferSize + windowSize]; | 638 double futureCumulativeScore[onsetDFBufferSize + beatExpectationWindowSize]; |
745 double w2[windowSize]; | 639 double beatExpectationWindow[beatExpectationWindowSize]; |
746 | 640 |
747 // copy cumscore to first part of fcumscore | 641 // copy cumulativeScore to first part of futureCumulativeScore |
748 for (int i = 0;i < onsetDFBufferSize;i++) | 642 for (int i = 0; i < onsetDFBufferSize; i++) |
749 { | 643 futureCumulativeScore[i] = cumulativeScore[i]; |
750 futureCumulativeScore[i] = cumulativeScore[i]; | 644 |
751 } | 645 // Create a beat expectation window for predicting future beats from the "future" of the cumulative score. |
752 | 646 // We are making this beat prediction at the midpoint between beats, and so we make a Gaussian |
753 // create future window | 647 // weighting centred on the most likely beat position (half a beat period into the future) |
648 // This is W2 in Adam Stark's PhD thesis, equation 3.6, page 62 | |
649 | |
754 double v = 1; | 650 double v = 1; |
755 for (int i = 0; i < windowSize; i++) | 651 for (int i = 0; i < beatExpectationWindowSize; i++) |
756 { | 652 { |
757 w2[i] = exp((-1*pow((v - (beatPeriod/2)),2)) / (2*pow((beatPeriod/2) ,2))); | 653 beatExpectationWindow[i] = exp((-1 * pow ((v - (beatPeriod / 2)), 2)) / (2 * pow (beatPeriod / 2, 2))); |
758 v++; | 654 v++; |
759 } | 655 } |
760 | 656 |
761 // create past window | 657 // Create window for "synthesizing" the cumulative score into the future |
762 v = -2*beatPeriod; | 658 // It is a log-Gaussian transition weighting running from from 2 beat periods |
763 int start = onsetDFBufferSize - round(2*beatPeriod); | 659 // in the past to half a beat period in the past. It favours the time exactly |
764 int end = onsetDFBufferSize - round(beatPeriod/2); | 660 // one beat period in the past |
765 int pastwinsize = end-start+1; | 661 |
766 double w1[pastwinsize]; | 662 int startIndex = onsetDFBufferSize - round (2 * beatPeriod); |
767 | 663 int endIndex = onsetDFBufferSize - round (beatPeriod / 2); |
768 for (int i = 0;i < pastwinsize;i++) | 664 int pastWindowSize = endIndex - startIndex + 1; |
769 { | 665 |
770 w1[i] = exp((-1*pow(tightness*log(-v/beatPeriod),2))/2); | 666 double logGaussianTransitionWeighting[pastWindowSize]; |
771 v = v+1; | 667 createLogGaussianTransitionWeighting (logGaussianTransitionWeighting, pastWindowSize, beatPeriod); |
772 } | 668 |
773 | 669 // Calculate the future cumulative score, by shifting the log Gaussian transition weighting from its |
774 // calculate future cumulative score | 670 // start position of [-2 beat periods, - 0.5 beat periods] forwards over the size of the beat |
775 double max; | 671 // expectation window, calculating a new cumulative score where the onset detection function sample |
776 int n; | 672 // is zero. This uses the "momentum" of the function to generate itself into the future. |
777 double wcumscore; | 673 for (int i = onsetDFBufferSize; i < (onsetDFBufferSize + beatExpectationWindowSize); i++) |
778 for (int i = onsetDFBufferSize; i < (onsetDFBufferSize + windowSize); i++) | 674 { |
779 { | 675 // note here that we pass 0.0 in for the onset detection function sample and 1.0 for the alpha weighting factor |
780 start = i - round (2*beatPeriod); | 676 // see equation 3.4 and page 60 - 62 of Adam Stark's PhD thesis for details |
781 end = i - round (beatPeriod/2); | 677 futureCumulativeScore[i] = calculateNewCumulativeScoreValue (futureCumulativeScore, logGaussianTransitionWeighting, startIndex, endIndex, 0.0, 1.0); |
782 | 678 |
783 max = 0; | 679 startIndex++; |
784 n = 0; | 680 endIndex++; |
785 for (int k=start;k <= end;k++) | 681 } |
786 { | 682 |
787 wcumscore = futureCumulativeScore[k]*w1[n]; | 683 // Predict the next beat, finding the maximum point of the future cumulative score |
788 | 684 // over the next beat, after being weighted by the beat expectation window |
789 if (wcumscore > max) | 685 |
790 { | 686 double maxValue = 0; |
791 max = wcumscore; | 687 int n = 0; |
792 } | 688 |
793 n++; | 689 for (int i = onsetDFBufferSize; i < (onsetDFBufferSize + beatExpectationWindowSize); i++) |
794 } | 690 { |
795 | 691 double weightedCumulativeScore = futureCumulativeScore[i] * beatExpectationWindow[n]; |
796 futureCumulativeScore[i] = max; | 692 |
797 } | 693 if (weightedCumulativeScore > maxValue) |
798 | 694 { |
799 // predict beat | 695 maxValue = weightedCumulativeScore; |
800 max = 0; | 696 timeToNextBeat = n; |
801 n = 0; | |
802 | |
803 for (int i = onsetDFBufferSize; i < (onsetDFBufferSize + windowSize); i++) | |
804 { | |
805 wcumscore = futureCumulativeScore[i]*w2[n]; | |
806 | |
807 if (wcumscore > max) | |
808 { | |
809 max = wcumscore; | |
810 beatCounter = n; | |
811 } | 697 } |
812 | 698 |
813 n++; | 699 n++; |
814 } | 700 } |
815 | 701 |
816 // set next prediction time | 702 // set next prediction time as on the offbeat after the next beat |
817 m0 = beatCounter + round (beatPeriod / 2); | 703 timeToNextPrediction = timeToNextBeat + round (beatPeriod / 2); |
818 } | 704 } |
705 | |
706 //======================================================================= | |
707 void BTrack::createLogGaussianTransitionWeighting (double* weightingArray, int numSamples, double beatPeriod) | |
708 { | |
709 // (This is W1 in Adam Stark's PhD thesis, equation 3.2, page 60) | |
710 | |
711 double v = -2. * beatPeriod; | |
712 | |
713 for (int i = 0; i < numSamples; i++) | |
714 { | |
715 double a = tightness * log (-v / beatPeriod); | |
716 weightingArray[i] = exp ((-1. * a * a) / 2.); | |
717 v++; | |
718 } | |
719 } | |
720 | |
721 //======================================================================= | |
722 template <typename T> | |
723 double BTrack::calculateNewCumulativeScoreValue (T cumulativeScoreArray, double* logGaussianTransitionWeighting, int startIndex, int endIndex, double onsetDetectionFunctionSample, double alphaWeightingFactor) | |
724 { | |
725 // calculate new cumulative score value by weighting the cumulative score between | |
726 // startIndex and endIndex and finding the maximum value | |
727 double maxValue = 0; | |
728 int n = 0; | |
729 for (int i = startIndex; i <= endIndex; i++) | |
730 { | |
731 double weightedCumulativeScore = cumulativeScoreArray[i] * logGaussianTransitionWeighting[n]; | |
732 | |
733 if (weightedCumulativeScore > maxValue) | |
734 maxValue = weightedCumulativeScore; | |
735 | |
736 n++; | |
737 } | |
738 | |
739 // now mix with the incoming onset detection function sample | |
740 // (equation 3.4 on page 60 of Adam Stark's PhD thesis) | |
741 double cumulativeScoreValue = ((1. - alphaWeightingFactor) * onsetDetectionFunctionSample) + (alphaWeightingFactor * maxValue); | |
742 | |
743 return cumulativeScoreValue; | |
744 } |