annotate dsp/tempotracking/DownBeat.cpp @ 209:ccd2019190bf msvc

Some MSVC fixes, including (temporarily, probably) renaming the FFT source file to avoid getting it mixed up with the Vamp SDK one in our object dir
author Chris Cannam
date Thu, 01 Feb 2018 16:34:08 +0000
parents a2b3fd07d862
children 7e52c034cf62
rev   line source
cannam@54 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@54 2
cannam@54 3 /*
cannam@54 4 QM DSP Library
cannam@54 5
cannam@54 6 Centre for Digital Music, Queen Mary, University of London.
cannam@54 7 This file copyright 2008-2009 Matthew Davies and QMUL.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@54 14 */
cannam@54 15
cannam@54 16 #include "DownBeat.h"
cannam@54 17
cannam@54 18 #include "maths/MathAliases.h"
cannam@54 19 #include "maths/MathUtilities.h"
cannam@55 20 #include "maths/KLDivergence.h"
cannam@54 21 #include "dsp/transforms/FFT.h"
cannam@54 22
cannam@54 23 #include <iostream>
cannam@54 24 #include <cstdlib>
cannam@54 25
cannam@54 26 DownBeat::DownBeat(float originalSampleRate,
cannam@54 27 size_t decimationFactor,
cannam@54 28 size_t dfIncrement) :
cannam@55 29 m_bpb(0),
cannam@54 30 m_rate(originalSampleRate),
cannam@54 31 m_factor(decimationFactor),
cannam@54 32 m_increment(dfIncrement),
cannam@54 33 m_decimator1(0),
cannam@54 34 m_decimator2(0),
cannam@54 35 m_buffer(0),
cannam@58 36 m_decbuf(0),
cannam@54 37 m_bufsiz(0),
cannam@54 38 m_buffill(0),
cannam@54 39 m_beatframesize(0),
cannam@54 40 m_beatframe(0)
cannam@54 41 {
cannam@54 42 // beat frame size is next power of two up from 1.3 seconds at the
cannam@54 43 // downsampled rate (happens to produce 4096 for 44100 or 48000 at
cannam@54 44 // 16x decimation, which is our expected normal situation)
cannam@55 45 m_beatframesize = MathUtilities::nextPowerOfTwo
cannam@55 46 (int((m_rate / decimationFactor) * 1.3));
Chris@162 47 if (m_beatframesize < 2) {
Chris@162 48 m_beatframesize = 2;
Chris@162 49 }
Chris@162 50 // std::cerr << "rate = " << m_rate << ", dec = " << decimationFactor << ", bfs = " << m_beatframesize << std::endl;
cannam@54 51 m_beatframe = new double[m_beatframesize];
cannam@54 52 m_fftRealOut = new double[m_beatframesize];
cannam@54 53 m_fftImagOut = new double[m_beatframesize];
cannam@64 54 m_fft = new FFTReal(m_beatframesize);
cannam@54 55 }
cannam@54 56
cannam@54 57 DownBeat::~DownBeat()
cannam@54 58 {
cannam@54 59 delete m_decimator1;
cannam@54 60 delete m_decimator2;
cannam@54 61 if (m_buffer) free(m_buffer);
cannam@54 62 delete[] m_decbuf;
cannam@54 63 delete[] m_beatframe;
cannam@54 64 delete[] m_fftRealOut;
cannam@54 65 delete[] m_fftImagOut;
cannam@64 66 delete m_fft;
cannam@54 67 }
cannam@54 68
cannam@54 69 void
cannam@55 70 DownBeat::setBeatsPerBar(int bpb)
cannam@55 71 {
cannam@55 72 m_bpb = bpb;
cannam@55 73 }
cannam@55 74
cannam@55 75 void
cannam@54 76 DownBeat::makeDecimators()
cannam@54 77 {
cannam@58 78 // std::cerr << "m_factor = " << m_factor << std::endl;
cannam@54 79 if (m_factor < 2) return;
cannam@77 80 size_t highest = Decimator::getHighestSupportedFactor();
cannam@54 81 if (m_factor <= highest) {
cannam@54 82 m_decimator1 = new Decimator(m_increment, m_factor);
cannam@57 83 // std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl;
cannam@54 84 return;
cannam@54 85 }
cannam@54 86 m_decimator1 = new Decimator(m_increment, highest);
cannam@57 87 // std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl;
cannam@54 88 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest);
cannam@57 89 // std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl;
cannam@55 90 m_decbuf = new float[m_increment / highest];
cannam@54 91 }
cannam@54 92
cannam@54 93 void
cannam@55 94 DownBeat::pushAudioBlock(const float *audio)
cannam@54 95 {
cannam@54 96 if (m_buffill + (m_increment / m_factor) > m_bufsiz) {
cannam@54 97 if (m_bufsiz == 0) m_bufsiz = m_increment * 16;
cannam@54 98 else m_bufsiz = m_bufsiz * 2;
cannam@54 99 if (!m_buffer) {
cannam@55 100 m_buffer = (float *)malloc(m_bufsiz * sizeof(float));
cannam@54 101 } else {
cannam@57 102 // std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl;
cannam@55 103 m_buffer = (float *)realloc(m_buffer, m_bufsiz * sizeof(float));
cannam@54 104 }
cannam@54 105 }
cannam@58 106 if (!m_decimator1 && m_factor > 1) makeDecimators();
cannam@58 107 // float rmsin = 0, rmsout = 0;
cannam@58 108 // for (int i = 0; i < m_increment; ++i) {
cannam@58 109 // rmsin += audio[i] * audio[i];
cannam@58 110 // }
cannam@54 111 if (m_decimator2) {
cannam@54 112 m_decimator1->process(audio, m_decbuf);
cannam@54 113 m_decimator2->process(m_decbuf, m_buffer + m_buffill);
cannam@58 114 } else if (m_decimator1) {
cannam@58 115 m_decimator1->process(audio, m_buffer + m_buffill);
cannam@54 116 } else {
cannam@58 117 // just copy across (m_factor is presumably 1)
cannam@77 118 for (size_t i = 0; i < m_increment; ++i) {
cannam@58 119 (m_buffer + m_buffill)[i] = audio[i];
cannam@58 120 }
cannam@54 121 }
cannam@58 122 // for (int i = 0; i < m_increment / m_factor; ++i) {
cannam@58 123 // rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i];
cannam@58 124 // }
cannam@57 125 // std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
cannam@54 126 m_buffill += m_increment / m_factor;
cannam@54 127 }
cannam@54 128
cannam@55 129 const float *
cannam@54 130 DownBeat::getBufferedAudio(size_t &length) const
cannam@54 131 {
cannam@54 132 length = m_buffill;
cannam@54 133 return m_buffer;
cannam@54 134 }
cannam@54 135
cannam@54 136 void
cannam@55 137 DownBeat::resetAudioBuffer()
cannam@55 138 {
cannam@55 139 if (m_buffer) free(m_buffer);
cannam@58 140 m_buffer = 0;
cannam@55 141 m_buffill = 0;
cannam@55 142 m_bufsiz = 0;
cannam@55 143 }
cannam@55 144
cannam@55 145 void
cannam@55 146 DownBeat::findDownBeats(const float *audio,
cannam@54 147 size_t audioLength,
cannam@54 148 const d_vec_t &beats,
cannam@54 149 i_vec_t &downbeats)
cannam@54 150 {
cannam@54 151 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
cannam@54 152 // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz)
cannam@54 153 // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES
cannam@54 154
cannam@54 155 // IMPLEMENTATION (MOSTLY) FOLLOWS:
cannam@54 156 // DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO"
cannam@54 157 // EUSIPCO 2006, FLORENCE, ITALY
cannam@54 158
cannam@54 159 d_vec_t newspec(m_beatframesize / 2); // magnitude spectrum of current beat
cannam@54 160 d_vec_t oldspec(m_beatframesize / 2); // magnitude spectrum of previous beat
cannam@56 161
cannam@56 162 m_beatsd.clear();
cannam@54 163
cannam@54 164 if (audioLength == 0) return;
cannam@54 165
cannam@54 166 for (size_t i = 0; i + 1 < beats.size(); ++i) {
cannam@54 167
cannam@54 168 // Copy the extents of the current beat from downsampled array
cannam@54 169 // into beat frame buffer
cannam@54 170
cannam@54 171 size_t beatstart = (beats[i] * m_increment) / m_factor;
cannam@55 172 size_t beatend = (beats[i+1] * m_increment) / m_factor;
cannam@54 173 if (beatend >= audioLength) beatend = audioLength - 1;
cannam@54 174 if (beatend < beatstart) beatend = beatstart;
cannam@54 175 size_t beatlen = beatend - beatstart;
cannam@54 176
cannam@54 177 // Also apply a Hanning window to the beat frame buffer, sized
cannam@54 178 // to the beat extents rather than the frame size. (Because
cannam@54 179 // the size varies, it's easier to do this by hand than use
cannam@54 180 // our Window abstraction.)
cannam@54 181
cannam@58 182 // std::cerr << "beatlen = " << beatlen << std::endl;
cannam@58 183
cannam@58 184 // float rms = 0;
cannam@58 185 for (size_t j = 0; j < beatlen && j < m_beatframesize; ++j) {
cannam@54 186 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen))));
cannam@54 187 m_beatframe[j] = audio[beatstart + j] * mul;
cannam@58 188 // rms += m_beatframe[j] * m_beatframe[j];
cannam@54 189 }
cannam@58 190 // rms = sqrt(rms);
cannam@57 191 // std::cerr << "beat " << i << ": audio rms " << rms << std::endl;
cannam@54 192
cannam@54 193 for (size_t j = beatlen; j < m_beatframesize; ++j) {
cannam@54 194 m_beatframe[j] = 0.0;
cannam@54 195 }
cannam@54 196
cannam@54 197 // Now FFT beat frame
cannam@54 198
Chris@114 199 m_fft->forward(m_beatframe, m_fftRealOut, m_fftImagOut);
cannam@54 200
cannam@54 201 // Calculate magnitudes
cannam@54 202
cannam@54 203 for (size_t j = 0; j < m_beatframesize/2; ++j) {
cannam@54 204 newspec[j] = sqrt(m_fftRealOut[j] * m_fftRealOut[j] +
cannam@54 205 m_fftImagOut[j] * m_fftImagOut[j]);
cannam@54 206 }
cannam@54 207
cannam@54 208 // Preserve peaks by applying adaptive threshold
cannam@54 209
cannam@54 210 MathUtilities::adaptiveThreshold(newspec);
cannam@54 211
cannam@54 212 // Calculate JS divergence between new and old spectral frames
cannam@54 213
cannam@56 214 if (i > 0) { // otherwise we have no previous frame
cannam@56 215 m_beatsd.push_back(measureSpecDiff(oldspec, newspec));
cannam@57 216 // std::cerr << "specdiff: " << m_beatsd[m_beatsd.size()-1] << std::endl;
cannam@56 217 }
cannam@54 218
cannam@54 219 // Copy newspec across to old
cannam@54 220
cannam@54 221 for (size_t j = 0; j < m_beatframesize/2; ++j) {
cannam@54 222 oldspec[j] = newspec[j];
cannam@54 223 }
cannam@54 224 }
cannam@54 225
cannam@54 226 // We now have all spectral difference measures in specdiff
cannam@54 227
cannam@77 228 int timesig = m_bpb;
cannam@55 229 if (timesig == 0) timesig = 4;
cannam@55 230
cannam@54 231 d_vec_t dbcand(timesig); // downbeat candidates
cannam@54 232
cannam@55 233 for (int beat = 0; beat < timesig; ++beat) {
cannam@55 234 dbcand[beat] = 0;
cannam@55 235 }
cannam@55 236
cannam@76 237 // look for beat transition which leads to greatest spectral change
cannam@76 238 for (int beat = 0; beat < timesig; ++beat) {
cannam@76 239 int count = 0;
cannam@77 240 for (int example = beat-1; example < (int)m_beatsd.size(); example += timesig) {
cannam@76 241 if (example < 0) continue;
cannam@76 242 dbcand[beat] += (m_beatsd[example]) / timesig;
cannam@76 243 ++count;
cannam@76 244 }
cannam@76 245 if (count > 0) dbcand[beat] /= count;
cannam@57 246 // std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
cannam@76 247 }
cannam@55 248
cannam@54 249 // first downbeat is beat at index of maximum value of dbcand
cannam@54 250 int dbind = MathUtilities::getMax(dbcand);
cannam@54 251
cannam@54 252 // remaining downbeats are at timesig intervals from the first
cannam@77 253 for (int i = dbind; i < (int)beats.size(); i += timesig) {
cannam@54 254 downbeats.push_back(i);
cannam@54 255 }
cannam@54 256 }
cannam@54 257
cannam@54 258 double
cannam@54 259 DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
cannam@54 260 {
cannam@54 261 // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES
cannam@54 262
cannam@70 263 unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
cannam@54 264 if (SPECSIZE > oldspec.size()/4) {
cannam@54 265 SPECSIZE = oldspec.size()/4;
cannam@54 266 }
cannam@54 267 double SD = 0.;
cannam@54 268 double sd1 = 0.;
cannam@54 269
cannam@54 270 double sumnew = 0.;
cannam@54 271 double sumold = 0.;
cannam@54 272
cannam@70 273 for (unsigned int i = 0;i < SPECSIZE;i++)
cannam@54 274 {
cannam@54 275 newspec[i] +=EPS;
cannam@54 276 oldspec[i] +=EPS;
cannam@54 277
cannam@54 278 sumnew+=newspec[i];
cannam@54 279 sumold+=oldspec[i];
cannam@54 280 }
cannam@54 281
cannam@70 282 for (unsigned int i = 0;i < SPECSIZE;i++)
cannam@54 283 {
cannam@54 284 newspec[i] /= (sumnew);
cannam@54 285 oldspec[i] /= (sumold);
cannam@54 286
cannam@54 287 // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
cannam@54 288 if (newspec[i] == 0)
cannam@54 289 {
cannam@54 290 newspec[i] = 1.;
cannam@54 291 }
cannam@54 292
cannam@54 293 if (oldspec[i] == 0)
cannam@54 294 {
cannam@54 295 oldspec[i] = 1.;
cannam@54 296 }
cannam@54 297
cannam@54 298 // JENSEN-SHANNON CALCULATION
cannam@54 299 sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
cannam@54 300 SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i])));
cannam@54 301 }
cannam@54 302
cannam@54 303 return SD;
cannam@54 304 }
cannam@54 305
cannam@56 306 void
cannam@56 307 DownBeat::getBeatSD(vector<double> &beatsd) const
cannam@56 308 {
cannam@77 309 for (int i = 0; i < (int)m_beatsd.size(); ++i) beatsd.push_back(m_beatsd[i]);
cannam@56 310 }
cannam@56 311