Mercurial > hg > qm-dsp
comparison dsp/tempotracking/DownBeat.cpp @ 54:5bec06ecc88a
* First cut at Matthew's downbeat estimator -- untested so far
author | cannam |
---|---|
date | Tue, 10 Feb 2009 12:52:43 +0000 |
parents | |
children | 7fe29d8a7eaf |
comparison
equal
deleted
inserted
replaced
53:796170a9c8e4 | 54:5bec06ecc88a |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 QM DSP Library | |
5 | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2008-2009 Matthew Davies and QMUL. | |
8 All rights reserved. | |
9 */ | |
10 | |
11 #include "DownBeat.h" | |
12 | |
13 #include "maths/MathAliases.h" | |
14 #include "maths/MathUtilities.h" | |
15 #include "dsp/transforms/FFT.h" | |
16 | |
17 #include <iostream> | |
18 #include <cstdlib> | |
19 | |
20 DownBeat::DownBeat(float originalSampleRate, | |
21 size_t decimationFactor, | |
22 size_t dfIncrement) : | |
23 m_rate(originalSampleRate), | |
24 m_factor(decimationFactor), | |
25 m_increment(dfIncrement), | |
26 m_decimator1(0), | |
27 m_decimator2(0), | |
28 m_buffer(0), | |
29 m_bufsiz(0), | |
30 m_buffill(0), | |
31 m_beatframesize(0), | |
32 m_beatframe(0) | |
33 { | |
34 // beat frame size is next power of two up from 1.3 seconds at the | |
35 // downsampled rate (happens to produce 4096 for 44100 or 48000 at | |
36 // 16x decimation, which is our expected normal situation) | |
37 int bfs = int((m_rate / decimationFactor) * 1.3); | |
38 m_beatframesize = 1; | |
39 while (bfs) { bfs >>= 1; m_beatframesize <<= 1; } | |
40 std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl; | |
41 m_beatframe = new double[m_beatframesize]; | |
42 m_fftRealOut = new double[m_beatframesize]; | |
43 m_fftImagOut = new double[m_beatframesize]; | |
44 } | |
45 | |
46 DownBeat::~DownBeat() | |
47 { | |
48 delete m_decimator1; | |
49 delete m_decimator2; | |
50 if (m_buffer) free(m_buffer); | |
51 delete[] m_decbuf; | |
52 delete[] m_beatframe; | |
53 delete[] m_fftRealOut; | |
54 delete[] m_fftImagOut; | |
55 } | |
56 | |
57 void | |
58 DownBeat::makeDecimators() | |
59 { | |
60 if (m_factor < 2) return; | |
61 int highest = Decimator::getHighestSupportedFactor(); | |
62 if (m_factor <= highest) { | |
63 m_decimator1 = new Decimator(m_increment, m_factor); | |
64 return; | |
65 } | |
66 m_decimator1 = new Decimator(m_increment, highest); | |
67 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest); | |
68 m_decbuf = new double[m_factor / highest]; | |
69 } | |
70 | |
71 void | |
72 DownBeat::pushAudioBlock(const double *audio) | |
73 { | |
74 if (m_buffill + (m_increment / m_factor) > m_bufsiz) { | |
75 if (m_bufsiz == 0) m_bufsiz = m_increment * 16; | |
76 else m_bufsiz = m_bufsiz * 2; | |
77 if (!m_buffer) { | |
78 m_buffer = (double *)malloc(m_bufsiz * sizeof(double)); | |
79 } else { | |
80 std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl; | |
81 m_buffer = (double *)realloc(m_buffer, m_bufsiz * sizeof(double)); | |
82 } | |
83 } | |
84 if (!m_decimator1) makeDecimators(); | |
85 if (m_decimator2) { | |
86 m_decimator1->process(audio, m_decbuf); | |
87 m_decimator2->process(m_decbuf, m_buffer + m_buffill); | |
88 } else { | |
89 m_decimator1->process(audio, m_buffer + m_buffill); | |
90 } | |
91 m_buffill += m_increment / m_factor; | |
92 } | |
93 | |
94 const double * | |
95 DownBeat::getBufferedAudio(size_t &length) const | |
96 { | |
97 length = m_buffill; | |
98 return m_buffer; | |
99 } | |
100 | |
101 void | |
102 DownBeat::findDownBeats(const double *audio, | |
103 size_t audioLength, | |
104 const d_vec_t &beats, | |
105 i_vec_t &downbeats) | |
106 { | |
107 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS | |
108 // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz) | |
109 // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES | |
110 | |
111 // IMPLEMENTATION (MOSTLY) FOLLOWS: | |
112 // DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO" | |
113 // EUSIPCO 2006, FLORENCE, ITALY | |
114 | |
115 d_vec_t newspec(m_beatframesize / 2); // magnitude spectrum of current beat | |
116 d_vec_t oldspec(m_beatframesize / 2); // magnitude spectrum of previous beat | |
117 d_vec_t specdiff; | |
118 | |
119 if (audioLength == 0) return; | |
120 | |
121 for (size_t i = 0; i + 1 < beats.size(); ++i) { | |
122 | |
123 // Copy the extents of the current beat from downsampled array | |
124 // into beat frame buffer | |
125 | |
126 size_t beatstart = (beats[i] * m_increment) / m_factor; | |
127 size_t beatend = (beats[i] * m_increment) / m_factor; | |
128 if (beatend >= audioLength) beatend = audioLength - 1; | |
129 if (beatend < beatstart) beatend = beatstart; | |
130 size_t beatlen = beatend - beatstart; | |
131 | |
132 // Also apply a Hanning window to the beat frame buffer, sized | |
133 // to the beat extents rather than the frame size. (Because | |
134 // the size varies, it's easier to do this by hand than use | |
135 // our Window abstraction.) | |
136 | |
137 for (size_t j = 0; j < beatlen; ++j) { | |
138 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen)))); | |
139 m_beatframe[j] = audio[beatstart + j] * mul; | |
140 } | |
141 | |
142 for (size_t j = beatlen; j < m_beatframesize; ++j) { | |
143 m_beatframe[j] = 0.0; | |
144 } | |
145 | |
146 // Now FFT beat frame | |
147 | |
148 FFT::process(m_beatframesize, false, | |
149 m_beatframe, 0, m_fftRealOut, m_fftImagOut); | |
150 | |
151 // Calculate magnitudes | |
152 | |
153 for (size_t j = 0; j < m_beatframesize/2; ++j) { | |
154 newspec[j] = sqrt(m_fftRealOut[j] * m_fftRealOut[j] + | |
155 m_fftImagOut[j] * m_fftImagOut[j]); | |
156 } | |
157 | |
158 // Preserve peaks by applying adaptive threshold | |
159 | |
160 MathUtilities::adaptiveThreshold(newspec); | |
161 | |
162 // Calculate JS divergence between new and old spectral frames | |
163 | |
164 specdiff.push_back(measureSpecDiff(oldspec, newspec)); | |
165 | |
166 // Copy newspec across to old | |
167 | |
168 for (size_t j = 0; j < m_beatframesize/2; ++j) { | |
169 oldspec[j] = newspec[j]; | |
170 } | |
171 } | |
172 | |
173 // We now have all spectral difference measures in specdiff | |
174 | |
175 uint timesig = 4; // SHOULD REPLACE THIS WITH A FIND_METER FUNCTION - OR USER PARAMETER | |
176 d_vec_t dbcand(timesig); // downbeat candidates | |
177 | |
178 // look for beat transition which leads to greatest spectral change | |
179 for (int beat = 0; beat < timesig; ++beat) { | |
180 for (int example = beat; example < specdiff.size(); ++example) { | |
181 dbcand[beat] += (specdiff[example]) / timesig; | |
182 } | |
183 } | |
184 | |
185 // first downbeat is beat at index of maximum value of dbcand | |
186 int dbind = MathUtilities::getMax(dbcand); | |
187 | |
188 // remaining downbeats are at timesig intervals from the first | |
189 for (int i = dbind; i < beats.size(); i += timesig) { | |
190 downbeats.push_back(i); | |
191 } | |
192 } | |
193 | |
194 double | |
195 DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec) | |
196 { | |
197 // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES | |
198 | |
199 uint SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM. | |
200 if (SPECSIZE > oldspec.size()/4) { | |
201 SPECSIZE = oldspec.size()/4; | |
202 } | |
203 double SD = 0.; | |
204 double sd1 = 0.; | |
205 | |
206 double sumnew = 0.; | |
207 double sumold = 0.; | |
208 | |
209 for (uint i = 0;i < SPECSIZE;i++) | |
210 { | |
211 newspec[i] +=EPS; | |
212 oldspec[i] +=EPS; | |
213 | |
214 sumnew+=newspec[i]; | |
215 sumold+=oldspec[i]; | |
216 } | |
217 | |
218 for (uint i = 0;i < SPECSIZE;i++) | |
219 { | |
220 newspec[i] /= (sumnew); | |
221 oldspec[i] /= (sumold); | |
222 | |
223 // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1 | |
224 if (newspec[i] == 0) | |
225 { | |
226 newspec[i] = 1.; | |
227 } | |
228 | |
229 if (oldspec[i] == 0) | |
230 { | |
231 oldspec[i] = 1.; | |
232 } | |
233 | |
234 // JENSEN-SHANNON CALCULATION | |
235 sd1 = 0.5*oldspec[i] + 0.5*newspec[i]; | |
236 SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i]))); | |
237 } | |
238 | |
239 return SD; | |
240 } | |
241 |