annotate dsp/tempotracking/DownBeat.cpp @ 96:88f3cfcff55f

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents e5907ae6de17
children f6ccde089491
rev   line source
cannam@54 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@54 2
cannam@54 3 /*
cannam@54 4 QM DSP Library
cannam@54 5
cannam@54 6 Centre for Digital Music, Queen Mary, University of London.
cannam@54 7 This file copyright 2008-2009 Matthew Davies and QMUL.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@54 14 */
cannam@54 15
cannam@54 16 #include "DownBeat.h"
cannam@54 17
cannam@54 18 #include "maths/MathAliases.h"
cannam@54 19 #include "maths/MathUtilities.h"
cannam@55 20 #include "maths/KLDivergence.h"
cannam@54 21 #include "dsp/transforms/FFT.h"
cannam@54 22
cannam@54 23 #include <iostream>
cannam@54 24 #include <cstdlib>
cannam@54 25
cannam@54 26 DownBeat::DownBeat(float originalSampleRate,
cannam@54 27 size_t decimationFactor,
cannam@54 28 size_t dfIncrement) :
cannam@55 29 m_bpb(0),
cannam@54 30 m_rate(originalSampleRate),
cannam@54 31 m_factor(decimationFactor),
cannam@54 32 m_increment(dfIncrement),
cannam@54 33 m_decimator1(0),
cannam@54 34 m_decimator2(0),
cannam@54 35 m_buffer(0),
cannam@58 36 m_decbuf(0),
cannam@54 37 m_bufsiz(0),
cannam@54 38 m_buffill(0),
cannam@54 39 m_beatframesize(0),
cannam@54 40 m_beatframe(0)
cannam@54 41 {
cannam@54 42 // beat frame size is next power of two up from 1.3 seconds at the
cannam@54 43 // downsampled rate (happens to produce 4096 for 44100 or 48000 at
cannam@54 44 // 16x decimation, which is our expected normal situation)
cannam@55 45 m_beatframesize = MathUtilities::nextPowerOfTwo
cannam@55 46 (int((m_rate / decimationFactor) * 1.3));
cannam@57 47 // std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
cannam@54 48 m_beatframe = new double[m_beatframesize];
cannam@54 49 m_fftRealOut = new double[m_beatframesize];
cannam@54 50 m_fftImagOut = new double[m_beatframesize];
cannam@64 51 m_fft = new FFTReal(m_beatframesize);
cannam@54 52 }
cannam@54 53
cannam@54 54 DownBeat::~DownBeat()
cannam@54 55 {
cannam@54 56 delete m_decimator1;
cannam@54 57 delete m_decimator2;
cannam@54 58 if (m_buffer) free(m_buffer);
cannam@54 59 delete[] m_decbuf;
cannam@54 60 delete[] m_beatframe;
cannam@54 61 delete[] m_fftRealOut;
cannam@54 62 delete[] m_fftImagOut;
cannam@64 63 delete m_fft;
cannam@54 64 }
cannam@54 65
cannam@54 66 void
cannam@55 67 DownBeat::setBeatsPerBar(int bpb)
cannam@55 68 {
cannam@55 69 m_bpb = bpb;
cannam@55 70 }
cannam@55 71
cannam@55 72 void
cannam@54 73 DownBeat::makeDecimators()
cannam@54 74 {
cannam@58 75 // std::cerr << "m_factor = " << m_factor << std::endl;
cannam@54 76 if (m_factor < 2) return;
cannam@77 77 size_t highest = Decimator::getHighestSupportedFactor();
cannam@54 78 if (m_factor <= highest) {
cannam@54 79 m_decimator1 = new Decimator(m_increment, m_factor);
cannam@57 80 // std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl;
cannam@54 81 return;
cannam@54 82 }
cannam@54 83 m_decimator1 = new Decimator(m_increment, highest);
cannam@57 84 // std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl;
cannam@54 85 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest);
cannam@57 86 // std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl;
cannam@55 87 m_decbuf = new float[m_increment / highest];
cannam@54 88 }
cannam@54 89
cannam@54 90 void
cannam@55 91 DownBeat::pushAudioBlock(const float *audio)
cannam@54 92 {
cannam@54 93 if (m_buffill + (m_increment / m_factor) > m_bufsiz) {
cannam@54 94 if (m_bufsiz == 0) m_bufsiz = m_increment * 16;
cannam@54 95 else m_bufsiz = m_bufsiz * 2;
cannam@54 96 if (!m_buffer) {
cannam@55 97 m_buffer = (float *)malloc(m_bufsiz * sizeof(float));
cannam@54 98 } else {
cannam@57 99 // std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl;
cannam@55 100 m_buffer = (float *)realloc(m_buffer, m_bufsiz * sizeof(float));
cannam@54 101 }
cannam@54 102 }
cannam@58 103 if (!m_decimator1 && m_factor > 1) makeDecimators();
cannam@58 104 // float rmsin = 0, rmsout = 0;
cannam@58 105 // for (int i = 0; i < m_increment; ++i) {
cannam@58 106 // rmsin += audio[i] * audio[i];
cannam@58 107 // }
cannam@54 108 if (m_decimator2) {
cannam@54 109 m_decimator1->process(audio, m_decbuf);
cannam@54 110 m_decimator2->process(m_decbuf, m_buffer + m_buffill);
cannam@58 111 } else if (m_decimator1) {
cannam@58 112 m_decimator1->process(audio, m_buffer + m_buffill);
cannam@54 113 } else {
cannam@58 114 // just copy across (m_factor is presumably 1)
cannam@77 115 for (size_t i = 0; i < m_increment; ++i) {
cannam@58 116 (m_buffer + m_buffill)[i] = audio[i];
cannam@58 117 }
cannam@54 118 }
cannam@58 119 // for (int i = 0; i < m_increment / m_factor; ++i) {
cannam@58 120 // rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i];
cannam@58 121 // }
cannam@57 122 // std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
cannam@54 123 m_buffill += m_increment / m_factor;
cannam@54 124 }
cannam@54 125
cannam@55 126 const float *
cannam@54 127 DownBeat::getBufferedAudio(size_t &length) const
cannam@54 128 {
cannam@54 129 length = m_buffill;
cannam@54 130 return m_buffer;
cannam@54 131 }
cannam@54 132
cannam@54 133 void
cannam@55 134 DownBeat::resetAudioBuffer()
cannam@55 135 {
cannam@55 136 if (m_buffer) free(m_buffer);
cannam@58 137 m_buffer = 0;
cannam@55 138 m_buffill = 0;
cannam@55 139 m_bufsiz = 0;
cannam@55 140 }
cannam@55 141
cannam@55 142 void
cannam@55 143 DownBeat::findDownBeats(const float *audio,
cannam@54 144 size_t audioLength,
cannam@54 145 const d_vec_t &beats,
cannam@54 146 i_vec_t &downbeats)
cannam@54 147 {
cannam@54 148 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
cannam@54 149 // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz)
cannam@54 150 // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES
cannam@54 151
cannam@54 152 // IMPLEMENTATION (MOSTLY) FOLLOWS:
cannam@54 153 // DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO"
cannam@54 154 // EUSIPCO 2006, FLORENCE, ITALY
cannam@54 155
cannam@54 156 d_vec_t newspec(m_beatframesize / 2); // magnitude spectrum of current beat
cannam@54 157 d_vec_t oldspec(m_beatframesize / 2); // magnitude spectrum of previous beat
cannam@56 158
cannam@56 159 m_beatsd.clear();
cannam@54 160
cannam@54 161 if (audioLength == 0) return;
cannam@54 162
cannam@54 163 for (size_t i = 0; i + 1 < beats.size(); ++i) {
cannam@54 164
cannam@54 165 // Copy the extents of the current beat from downsampled array
cannam@54 166 // into beat frame buffer
cannam@54 167
cannam@54 168 size_t beatstart = (beats[i] * m_increment) / m_factor;
cannam@55 169 size_t beatend = (beats[i+1] * m_increment) / m_factor;
cannam@54 170 if (beatend >= audioLength) beatend = audioLength - 1;
cannam@54 171 if (beatend < beatstart) beatend = beatstart;
cannam@54 172 size_t beatlen = beatend - beatstart;
cannam@54 173
cannam@54 174 // Also apply a Hanning window to the beat frame buffer, sized
cannam@54 175 // to the beat extents rather than the frame size. (Because
cannam@54 176 // the size varies, it's easier to do this by hand than use
cannam@54 177 // our Window abstraction.)
cannam@54 178
cannam@58 179 // std::cerr << "beatlen = " << beatlen << std::endl;
cannam@58 180
cannam@58 181 // float rms = 0;
cannam@58 182 for (size_t j = 0; j < beatlen && j < m_beatframesize; ++j) {
cannam@54 183 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen))));
cannam@54 184 m_beatframe[j] = audio[beatstart + j] * mul;
cannam@58 185 // rms += m_beatframe[j] * m_beatframe[j];
cannam@54 186 }
cannam@58 187 // rms = sqrt(rms);
cannam@57 188 // std::cerr << "beat " << i << ": audio rms " << rms << std::endl;
cannam@54 189
cannam@54 190 for (size_t j = beatlen; j < m_beatframesize; ++j) {
cannam@54 191 m_beatframe[j] = 0.0;
cannam@54 192 }
cannam@54 193
cannam@54 194 // Now FFT beat frame
cannam@54 195
cannam@64 196 m_fft->process(false, m_beatframe, m_fftRealOut, m_fftImagOut);
cannam@54 197
cannam@54 198 // Calculate magnitudes
cannam@54 199
cannam@54 200 for (size_t j = 0; j < m_beatframesize/2; ++j) {
cannam@54 201 newspec[j] = sqrt(m_fftRealOut[j] * m_fftRealOut[j] +
cannam@54 202 m_fftImagOut[j] * m_fftImagOut[j]);
cannam@54 203 }
cannam@54 204
cannam@54 205 // Preserve peaks by applying adaptive threshold
cannam@54 206
cannam@54 207 MathUtilities::adaptiveThreshold(newspec);
cannam@54 208
cannam@54 209 // Calculate JS divergence between new and old spectral frames
cannam@54 210
cannam@56 211 if (i > 0) { // otherwise we have no previous frame
cannam@56 212 m_beatsd.push_back(measureSpecDiff(oldspec, newspec));
cannam@57 213 // std::cerr << "specdiff: " << m_beatsd[m_beatsd.size()-1] << std::endl;
cannam@56 214 }
cannam@54 215
cannam@54 216 // Copy newspec across to old
cannam@54 217
cannam@54 218 for (size_t j = 0; j < m_beatframesize/2; ++j) {
cannam@54 219 oldspec[j] = newspec[j];
cannam@54 220 }
cannam@54 221 }
cannam@54 222
cannam@54 223 // We now have all spectral difference measures in specdiff
cannam@54 224
cannam@77 225 int timesig = m_bpb;
cannam@55 226 if (timesig == 0) timesig = 4;
cannam@55 227
cannam@54 228 d_vec_t dbcand(timesig); // downbeat candidates
cannam@54 229
cannam@55 230 for (int beat = 0; beat < timesig; ++beat) {
cannam@55 231 dbcand[beat] = 0;
cannam@55 232 }
cannam@55 233
cannam@76 234 // look for beat transition which leads to greatest spectral change
cannam@76 235 for (int beat = 0; beat < timesig; ++beat) {
cannam@76 236 int count = 0;
cannam@77 237 for (int example = beat-1; example < (int)m_beatsd.size(); example += timesig) {
cannam@76 238 if (example < 0) continue;
cannam@76 239 dbcand[beat] += (m_beatsd[example]) / timesig;
cannam@76 240 ++count;
cannam@76 241 }
cannam@76 242 if (count > 0) dbcand[beat] /= count;
cannam@57 243 // std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
cannam@76 244 }
cannam@55 245
cannam@54 246 // first downbeat is beat at index of maximum value of dbcand
cannam@54 247 int dbind = MathUtilities::getMax(dbcand);
cannam@54 248
cannam@54 249 // remaining downbeats are at timesig intervals from the first
cannam@77 250 for (int i = dbind; i < (int)beats.size(); i += timesig) {
cannam@54 251 downbeats.push_back(i);
cannam@54 252 }
cannam@54 253 }
cannam@54 254
cannam@54 255 double
cannam@54 256 DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
cannam@54 257 {
cannam@54 258 // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES
cannam@54 259
cannam@70 260 unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
cannam@54 261 if (SPECSIZE > oldspec.size()/4) {
cannam@54 262 SPECSIZE = oldspec.size()/4;
cannam@54 263 }
cannam@54 264 double SD = 0.;
cannam@54 265 double sd1 = 0.;
cannam@54 266
cannam@54 267 double sumnew = 0.;
cannam@54 268 double sumold = 0.;
cannam@54 269
cannam@70 270 for (unsigned int i = 0;i < SPECSIZE;i++)
cannam@54 271 {
cannam@54 272 newspec[i] +=EPS;
cannam@54 273 oldspec[i] +=EPS;
cannam@54 274
cannam@54 275 sumnew+=newspec[i];
cannam@54 276 sumold+=oldspec[i];
cannam@54 277 }
cannam@54 278
cannam@70 279 for (unsigned int i = 0;i < SPECSIZE;i++)
cannam@54 280 {
cannam@54 281 newspec[i] /= (sumnew);
cannam@54 282 oldspec[i] /= (sumold);
cannam@54 283
cannam@54 284 // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
cannam@54 285 if (newspec[i] == 0)
cannam@54 286 {
cannam@54 287 newspec[i] = 1.;
cannam@54 288 }
cannam@54 289
cannam@54 290 if (oldspec[i] == 0)
cannam@54 291 {
cannam@54 292 oldspec[i] = 1.;
cannam@54 293 }
cannam@54 294
cannam@54 295 // JENSEN-SHANNON CALCULATION
cannam@54 296 sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
cannam@54 297 SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i])));
cannam@54 298 }
cannam@54 299
cannam@54 300 return SD;
cannam@54 301 }
cannam@54 302
cannam@56 303 void
cannam@56 304 DownBeat::getBeatSD(vector<double> &beatsd) const
cannam@56 305 {
cannam@77 306 for (int i = 0; i < (int)m_beatsd.size(); ++i) beatsd.push_back(m_beatsd[i]);
cannam@56 307 }
cannam@56 308