qm-dsp: dsp/tempotracking/DownBeat.cpp annotate

annotate dsp/tempotracking/DownBeat.cpp @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h

author	mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date	Mon, 20 Jun 2011 19:01:48 +0100
parents	d5014ab8b0e5
children	f6ccde089491

rev	line source
c@279	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@279	2
c@279	3 /*
c@279	4 QM DSP Library
c@279	5
c@279	6 Centre for Digital Music, Queen Mary, University of London.
c@279	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@309	8
c@309	9 This program is free software; you can redistribute it and/or
c@309	10 modify it under the terms of the GNU General Public License as
c@309	11 published by the Free Software Foundation; either version 2 of the
c@309	12 License, or (at your option) any later version. See the file
c@309	13 COPYING included with this distribution for more information.
c@279	14 */
c@279	15
c@279	16 #include "DownBeat.h"
c@279	17
c@279	18 #include "maths/MathAliases.h"
c@279	19 #include "maths/MathUtilities.h"
c@280	20 #include "maths/KLDivergence.h"
c@279	21 #include "dsp/transforms/FFT.h"
c@279	22
c@279	23 #include <iostream>
c@279	24 #include <cstdlib>
c@279	25
c@279	26 DownBeat::DownBeat(float originalSampleRate,
c@279	27 size_t decimationFactor,
c@279	28 size_t dfIncrement) :
c@280	29 m_bpb(0),
c@279	30 m_rate(originalSampleRate),
c@279	31 m_factor(decimationFactor),
c@279	32 m_increment(dfIncrement),
c@279	33 m_decimator1(0),
c@279	34 m_decimator2(0),
c@279	35 m_buffer(0),
c@283	36 m_decbuf(0),
c@279	37 m_bufsiz(0),
c@279	38 m_buffill(0),
c@279	39 m_beatframesize(0),
c@279	40 m_beatframe(0)
c@279	41 {
c@279	42 // beat frame size is next power of two up from 1.3 seconds at the
c@279	43 // downsampled rate (happens to produce 4096 for 44100 or 48000 at
c@279	44 // 16x decimation, which is our expected normal situation)
c@280	45 m_beatframesize = MathUtilities::nextPowerOfTwo
c@280	46 (int((m_rate / decimationFactor) * 1.3));
c@282	47 // std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
c@279	48 m_beatframe = new double[m_beatframesize];
c@279	49 m_fftRealOut = new double[m_beatframesize];
c@279	50 m_fftImagOut = new double[m_beatframesize];
c@289	51 m_fft = new FFTReal(m_beatframesize);
c@279	52 }
c@279	53
c@279	54 DownBeat::~DownBeat()
c@279	55 {
c@279	56 delete m_decimator1;
c@279	57 delete m_decimator2;
c@279	58 if (m_buffer) free(m_buffer);
c@279	59 delete[] m_decbuf;
c@279	60 delete[] m_beatframe;
c@279	61 delete[] m_fftRealOut;
c@279	62 delete[] m_fftImagOut;
c@289	63 delete m_fft;
c@279	64 }
c@279	65
c@279	66 void
c@280	67 DownBeat::setBeatsPerBar(int bpb)
c@280	68 {
c@280	69 m_bpb = bpb;
c@280	70 }
c@280	71
c@280	72 void
c@279	73 DownBeat::makeDecimators()
c@279	74 {
c@283	75 // std::cerr << "m_factor = " << m_factor << std::endl;
c@279	76 if (m_factor < 2) return;
c@302	77 size_t highest = Decimator::getHighestSupportedFactor();
c@279	78 if (m_factor <= highest) {
c@279	79 m_decimator1 = new Decimator(m_increment, m_factor);
c@282	80 // std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl;
c@279	81 return;
c@279	82 }
c@279	83 m_decimator1 = new Decimator(m_increment, highest);
c@282	84 // std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl;
c@279	85 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest);
c@282	86 // std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl;
c@280	87 m_decbuf = new float[m_increment / highest];
c@279	88 }
c@279	89
c@279	90 void
c@280	91 DownBeat::pushAudioBlock(const float *audio)
c@279	92 {
c@279	93 if (m_buffill + (m_increment / m_factor) > m_bufsiz) {
c@279	94 if (m_bufsiz == 0) m_bufsiz = m_increment * 16;
c@279	95 else m_bufsiz = m_bufsiz * 2;
c@279	96 if (!m_buffer) {
c@280	97 m_buffer = (float )malloc(m_bufsiz sizeof(float));
c@279	98 } else {
c@282	99 // std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl;
c@280	100 m_buffer = (float )realloc(m_buffer, m_bufsiz sizeof(float));
c@279	101 }
c@279	102 }
c@283	103 if (!m_decimator1 && m_factor > 1) makeDecimators();
c@283	104 // float rmsin = 0, rmsout = 0;
c@283	105 // for (int i = 0; i < m_increment; ++i) {
c@283	106 // rmsin += audio[i] * audio[i];
c@283	107 // }
c@279	108 if (m_decimator2) {
c@279	109 m_decimator1->process(audio, m_decbuf);
c@279	110 m_decimator2->process(m_decbuf, m_buffer + m_buffill);
c@283	111 } else if (m_decimator1) {
c@283	112 m_decimator1->process(audio, m_buffer + m_buffill);
c@279	113 } else {
c@283	114 // just copy across (m_factor is presumably 1)
c@302	115 for (size_t i = 0; i < m_increment; ++i) {
c@283	116 (m_buffer + m_buffill)[i] = audio[i];
c@283	117 }
c@279	118 }
c@283	119 // for (int i = 0; i < m_increment / m_factor; ++i) {
c@283	120 // rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i];
c@283	121 // }
c@282	122 // std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
c@279	123 m_buffill += m_increment / m_factor;
c@279	124 }
c@279	125
c@280	126 const float *
c@279	127 DownBeat::getBufferedAudio(size_t &length) const
c@279	128 {
c@279	129 length = m_buffill;
c@279	130 return m_buffer;
c@279	131 }
c@279	132
c@279	133 void
c@280	134 DownBeat::resetAudioBuffer()
c@280	135 {
c@280	136 if (m_buffer) free(m_buffer);
c@283	137 m_buffer = 0;
c@280	138 m_buffill = 0;
c@280	139 m_bufsiz = 0;
c@280	140 }
c@280	141
c@280	142 void
c@280	143 DownBeat::findDownBeats(const float *audio,
c@279	144 size_t audioLength,
c@279	145 const d_vec_t &beats,
c@279	146 i_vec_t &downbeats)
c@279	147 {
c@279	148 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
c@279	149 // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz)
c@279	150 // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES
c@279	151
c@279	152 // IMPLEMENTATION (MOSTLY) FOLLOWS:
c@279	153 // DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO"
c@279	154 // EUSIPCO 2006, FLORENCE, ITALY
c@279	155
c@279	156 d_vec_t newspec(m_beatframesize / 2); // magnitude spectrum of current beat
c@279	157 d_vec_t oldspec(m_beatframesize / 2); // magnitude spectrum of previous beat
c@281	158
c@281	159 m_beatsd.clear();
c@279	160
c@279	161 if (audioLength == 0) return;
c@279	162
c@279	163 for (size_t i = 0; i + 1 < beats.size(); ++i) {
c@279	164
c@279	165 // Copy the extents of the current beat from downsampled array
c@279	166 // into beat frame buffer
c@279	167
c@279	168 size_t beatstart = (beats[i] * m_increment) / m_factor;
c@280	169 size_t beatend = (beats[i+1] * m_increment) / m_factor;
c@279	170 if (beatend >= audioLength) beatend = audioLength - 1;
c@279	171 if (beatend < beatstart) beatend = beatstart;
c@279	172 size_t beatlen = beatend - beatstart;
c@279	173
c@279	174 // Also apply a Hanning window to the beat frame buffer, sized
c@279	175 // to the beat extents rather than the frame size. (Because
c@279	176 // the size varies, it's easier to do this by hand than use
c@279	177 // our Window abstraction.)
c@279	178
c@283	179 // std::cerr << "beatlen = " << beatlen << std::endl;
c@283	180
c@283	181 // float rms = 0;
c@283	182 for (size_t j = 0; j < beatlen && j < m_beatframesize; ++j) {
c@279	183 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen))));
c@279	184 m_beatframe[j] = audio[beatstart + j] * mul;
c@283	185 // rms += m_beatframe[j] * m_beatframe[j];
c@279	186 }
c@283	187 // rms = sqrt(rms);
c@282	188 // std::cerr << "beat " << i << ": audio rms " << rms << std::endl;
c@279	189
c@279	190 for (size_t j = beatlen; j < m_beatframesize; ++j) {
c@279	191 m_beatframe[j] = 0.0;
c@279	192 }
c@279	193
c@279	194 // Now FFT beat frame
c@279	195
c@289	196 m_fft->process(false, m_beatframe, m_fftRealOut, m_fftImagOut);
c@279	197
c@279	198 // Calculate magnitudes
c@279	199
c@279	200 for (size_t j = 0; j < m_beatframesize/2; ++j) {
c@279	201 newspec[j] = sqrt(m_fftRealOut[j] * m_fftRealOut[j] +
c@279	202 m_fftImagOut[j] * m_fftImagOut[j]);
c@279	203 }
c@279	204
c@279	205 // Preserve peaks by applying adaptive threshold
c@279	206
c@279	207 MathUtilities::adaptiveThreshold(newspec);
c@279	208
c@279	209 // Calculate JS divergence between new and old spectral frames
c@279	210
c@281	211 if (i > 0) { // otherwise we have no previous frame
c@281	212 m_beatsd.push_back(measureSpecDiff(oldspec, newspec));
c@282	213 // std::cerr << "specdiff: " << m_beatsd[m_beatsd.size()-1] << std::endl;
c@281	214 }
c@279	215
c@279	216 // Copy newspec across to old
c@279	217
c@279	218 for (size_t j = 0; j < m_beatframesize/2; ++j) {
c@279	219 oldspec[j] = newspec[j];
c@279	220 }
c@279	221 }
c@279	222
c@279	223 // We now have all spectral difference measures in specdiff
c@279	224
c@302	225 int timesig = m_bpb;
c@280	226 if (timesig == 0) timesig = 4;
c@280	227
c@279	228 d_vec_t dbcand(timesig); // downbeat candidates
c@279	229
c@280	230 for (int beat = 0; beat < timesig; ++beat) {
c@280	231 dbcand[beat] = 0;
c@280	232 }
c@280	233
c@301	234 // look for beat transition which leads to greatest spectral change
c@301	235 for (int beat = 0; beat < timesig; ++beat) {
c@301	236 int count = 0;
c@302	237 for (int example = beat-1; example < (int)m_beatsd.size(); example += timesig) {
c@301	238 if (example < 0) continue;
c@301	239 dbcand[beat] += (m_beatsd[example]) / timesig;
c@301	240 ++count;
c@301	241 }
c@301	242 if (count > 0) dbcand[beat] /= count;
c@282	243 // std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
c@301	244 }
c@280	245
c@279	246 // first downbeat is beat at index of maximum value of dbcand
c@279	247 int dbind = MathUtilities::getMax(dbcand);
c@279	248
c@279	249 // remaining downbeats are at timesig intervals from the first
c@302	250 for (int i = dbind; i < (int)beats.size(); i += timesig) {
c@279	251 downbeats.push_back(i);
c@279	252 }
c@279	253 }
c@279	254
c@279	255 double
c@279	256 DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
c@279	257 {
c@279	258 // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES
c@279	259
c@295	260 unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
c@279	261 if (SPECSIZE > oldspec.size()/4) {
c@279	262 SPECSIZE = oldspec.size()/4;
c@279	263 }
c@279	264 double SD = 0.;
c@279	265 double sd1 = 0.;
c@279	266
c@279	267 double sumnew = 0.;
c@279	268 double sumold = 0.;
c@279	269
c@295	270 for (unsigned int i = 0;i < SPECSIZE;i++)
c@279	271 {
c@279	272 newspec[i] +=EPS;
c@279	273 oldspec[i] +=EPS;
c@279	274
c@279	275 sumnew+=newspec[i];
c@279	276 sumold+=oldspec[i];
c@279	277 }
c@279	278
c@295	279 for (unsigned int i = 0;i < SPECSIZE;i++)
c@279	280 {
c@279	281 newspec[i] /= (sumnew);
c@279	282 oldspec[i] /= (sumold);
c@279	283
c@279	284 // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
c@279	285 if (newspec[i] == 0)
c@279	286 {
c@279	287 newspec[i] = 1.;
c@279	288 }
c@279	289
c@279	290 if (oldspec[i] == 0)
c@279	291 {
c@279	292 oldspec[i] = 1.;
c@279	293 }
c@279	294
c@279	295 // JENSEN-SHANNON CALCULATION
c@279	296 sd1 = 0.5oldspec[i] + 0.5newspec[i];
c@279	297 SD = SD + (-sd1log(sd1)) + (0.5(oldspec[i]log(oldspec[i]))) + (0.5(newspec[i]*log(newspec[i])));
c@279	298 }
c@279	299
c@279	300 return SD;
c@279	301 }
c@279	302
c@281	303 void
c@281	304 DownBeat::getBeatSD(vector<double> &beatsd) const
c@281	305 {
c@302	306 for (int i = 0; i < (int)m_beatsd.size(); ++i) beatsd.push_back(m_beatsd[i]);
c@281	307 }
c@281	308

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/DownBeat.cpp @ 321:f1e6be2de9a5