c@89
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@89
|
2
|
c@89
|
3 /*
|
c@89
|
4 QM Vamp Plugin Set
|
c@89
|
5
|
c@89
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@135
|
7
|
c@135
|
8 This program is free software; you can redistribute it and/or
|
c@135
|
9 modify it under the terms of the GNU General Public License as
|
c@135
|
10 published by the Free Software Foundation; either version 2 of the
|
c@135
|
11 License, or (at your option) any later version. See the file
|
c@135
|
12 COPYING included with this distribution for more information.
|
c@89
|
13 */
|
c@89
|
14
|
c@89
|
15 #include "BarBeatTrack.h"
|
c@89
|
16
|
c@89
|
17 #include <dsp/onsets/DetectionFunction.h>
|
c@89
|
18 #include <dsp/onsets/PeakPicking.h>
|
c@89
|
19 #include <dsp/tempotracking/TempoTrackV2.h>
|
c@89
|
20 #include <dsp/tempotracking/DownBeat.h>
|
c@89
|
21 #include <maths/MathUtilities.h>
|
c@89
|
22
|
c@89
|
23 using std::string;
|
c@89
|
24 using std::vector;
|
c@89
|
25 using std::cerr;
|
c@89
|
26 using std::endl;
|
c@89
|
27
|
c@130
|
28 #ifndef __GNUC__
|
c@130
|
29 #include <alloca.h>
|
c@130
|
30 #endif
|
c@130
|
31
|
c@89
|
32 float BarBeatTracker::m_stepSecs = 0.01161; // 512 samples at 44100
|
c@89
|
33
|
c@89
|
34 class BarBeatTrackerData
|
c@89
|
35 {
|
c@89
|
36 public:
|
c@89
|
37 BarBeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
|
c@89
|
38 df = new DetectionFunction(config);
|
c@89
|
39 // decimation factor aims at resampling to c. 3KHz; must be power of 2
|
c@89
|
40 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
|
c@95
|
41 // std::cerr << "BarBeatTrackerData: factor = " << factor << std::endl;
|
c@89
|
42 downBeat = new DownBeat(rate, factor, config.stepSize);
|
c@89
|
43 }
|
c@89
|
44 ~BarBeatTrackerData() {
|
c@89
|
45 delete df;
|
c@89
|
46 delete downBeat;
|
c@89
|
47 }
|
c@89
|
48 void reset() {
|
c@89
|
49 delete df;
|
c@89
|
50 df = new DetectionFunction(dfConfig);
|
c@89
|
51 dfOutput.clear();
|
c@89
|
52 downBeat->resetAudioBuffer();
|
c@89
|
53 origin = Vamp::RealTime::zeroTime;
|
c@89
|
54 }
|
c@89
|
55
|
c@89
|
56 DFConfig dfConfig;
|
c@89
|
57 DetectionFunction *df;
|
c@89
|
58 DownBeat *downBeat;
|
c@89
|
59 vector<double> dfOutput;
|
c@89
|
60 Vamp::RealTime origin;
|
c@89
|
61 };
|
c@89
|
62
|
c@89
|
63
|
c@89
|
64 BarBeatTracker::BarBeatTracker(float inputSampleRate) :
|
c@89
|
65 Vamp::Plugin(inputSampleRate),
|
c@89
|
66 m_d(0),
|
c@89
|
67 m_bpb(4)
|
c@89
|
68 {
|
c@89
|
69 }
|
c@89
|
70
|
c@89
|
71 BarBeatTracker::~BarBeatTracker()
|
c@89
|
72 {
|
c@89
|
73 delete m_d;
|
c@89
|
74 }
|
c@89
|
75
|
c@89
|
76 string
|
c@89
|
77 BarBeatTracker::getIdentifier() const
|
c@89
|
78 {
|
c@89
|
79 return "qm-barbeattracker";
|
c@89
|
80 }
|
c@89
|
81
|
c@89
|
82 string
|
c@89
|
83 BarBeatTracker::getName() const
|
c@89
|
84 {
|
c@89
|
85 return "Bar and Beat Tracker";
|
c@89
|
86 }
|
c@89
|
87
|
c@89
|
88 string
|
c@89
|
89 BarBeatTracker::getDescription() const
|
c@89
|
90 {
|
c@89
|
91 return "Estimate bar and beat locations";
|
c@89
|
92 }
|
c@89
|
93
|
c@89
|
94 string
|
c@89
|
95 BarBeatTracker::getMaker() const
|
c@89
|
96 {
|
c@89
|
97 return "Queen Mary, University of London";
|
c@89
|
98 }
|
c@89
|
99
|
c@89
|
100 int
|
c@89
|
101 BarBeatTracker::getPluginVersion() const
|
c@89
|
102 {
|
c@131
|
103 return 2;
|
c@89
|
104 }
|
c@89
|
105
|
c@89
|
106 string
|
c@89
|
107 BarBeatTracker::getCopyright() const
|
c@89
|
108 {
|
c@89
|
109 return "Plugin by Matthew Davies, Christian Landone and Chris Cannam. Copyright (c) 2006-2009 QMUL - All Rights Reserved";
|
c@89
|
110 }
|
c@89
|
111
|
c@89
|
112 BarBeatTracker::ParameterList
|
c@89
|
113 BarBeatTracker::getParameterDescriptors() const
|
c@89
|
114 {
|
c@89
|
115 ParameterList list;
|
c@89
|
116
|
c@89
|
117 ParameterDescriptor desc;
|
c@89
|
118
|
c@89
|
119 desc.identifier = "bpb";
|
c@89
|
120 desc.name = "Beats per Bar";
|
c@89
|
121 desc.description = "The number of beats in each bar";
|
c@89
|
122 desc.minValue = 2;
|
c@89
|
123 desc.maxValue = 16;
|
c@89
|
124 desc.defaultValue = 4;
|
c@89
|
125 desc.isQuantized = true;
|
c@89
|
126 desc.quantizeStep = 1;
|
c@89
|
127 list.push_back(desc);
|
c@89
|
128
|
c@89
|
129 return list;
|
c@89
|
130 }
|
c@89
|
131
|
c@89
|
132 float
|
c@89
|
133 BarBeatTracker::getParameter(std::string name) const
|
c@89
|
134 {
|
c@89
|
135 if (name == "bpb") return m_bpb;
|
c@89
|
136 return 0.0;
|
c@89
|
137 }
|
c@89
|
138
|
c@89
|
139 void
|
c@89
|
140 BarBeatTracker::setParameter(std::string name, float value)
|
c@89
|
141 {
|
c@89
|
142 if (name == "bpb") m_bpb = lrintf(value);
|
c@89
|
143 }
|
c@89
|
144
|
c@89
|
145 bool
|
c@89
|
146 BarBeatTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
c@89
|
147 {
|
c@89
|
148 if (m_d) {
|
c@89
|
149 delete m_d;
|
c@89
|
150 m_d = 0;
|
c@89
|
151 }
|
c@89
|
152
|
c@89
|
153 if (channels < getMinChannelCount() ||
|
c@89
|
154 channels > getMaxChannelCount()) {
|
c@89
|
155 std::cerr << "BarBeatTracker::initialise: Unsupported channel count: "
|
c@89
|
156 << channels << std::endl;
|
c@89
|
157 return false;
|
c@89
|
158 }
|
c@89
|
159
|
c@89
|
160 if (stepSize != getPreferredStepSize()) {
|
c@89
|
161 std::cerr << "ERROR: BarBeatTracker::initialise: Unsupported step size for this sample rate: "
|
c@89
|
162 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
|
c@89
|
163 return false;
|
c@89
|
164 }
|
c@89
|
165
|
c@89
|
166 if (blockSize != getPreferredBlockSize()) {
|
c@89
|
167 std::cerr << "WARNING: BarBeatTracker::initialise: Sub-optimal block size for this sample rate: "
|
c@89
|
168 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
|
c@89
|
169 // return false;
|
c@89
|
170 }
|
c@89
|
171
|
c@89
|
172 DFConfig dfConfig;
|
c@89
|
173 dfConfig.DFType = DF_COMPLEXSD;
|
c@89
|
174 dfConfig.stepSize = stepSize;
|
c@89
|
175 dfConfig.frameLength = blockSize;
|
c@89
|
176 dfConfig.dbRise = 3;
|
c@89
|
177 dfConfig.adaptiveWhitening = false;
|
c@89
|
178 dfConfig.whiteningRelaxCoeff = -1;
|
c@89
|
179 dfConfig.whiteningFloor = -1;
|
c@89
|
180
|
c@89
|
181 m_d = new BarBeatTrackerData(m_inputSampleRate, dfConfig);
|
c@89
|
182 m_d->downBeat->setBeatsPerBar(m_bpb);
|
c@89
|
183 return true;
|
c@89
|
184 }
|
c@89
|
185
|
c@89
|
186 void
|
c@89
|
187 BarBeatTracker::reset()
|
c@89
|
188 {
|
c@89
|
189 if (m_d) m_d->reset();
|
c@89
|
190 }
|
c@89
|
191
|
c@89
|
192 size_t
|
c@89
|
193 BarBeatTracker::getPreferredStepSize() const
|
c@89
|
194 {
|
c@89
|
195 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
|
c@95
|
196 if (step < 1) step = 1;
|
c@89
|
197 // std::cerr << "BarBeatTracker::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl;
|
c@89
|
198 return step;
|
c@89
|
199 }
|
c@89
|
200
|
c@89
|
201 size_t
|
c@89
|
202 BarBeatTracker::getPreferredBlockSize() const
|
c@89
|
203 {
|
c@89
|
204 size_t theoretical = getPreferredStepSize() * 2;
|
c@89
|
205
|
c@89
|
206 // I think this is not necessarily going to be a power of two, and
|
c@89
|
207 // the host might have a problem with that, but I'm not sure we
|
c@89
|
208 // can do much about it here
|
c@89
|
209 return theoretical;
|
c@89
|
210 }
|
c@89
|
211
|
c@89
|
212 BarBeatTracker::OutputList
|
c@89
|
213 BarBeatTracker::getOutputDescriptors() const
|
c@89
|
214 {
|
c@89
|
215 OutputList list;
|
c@89
|
216
|
c@89
|
217 OutputDescriptor beat;
|
c@89
|
218 beat.identifier = "beats";
|
c@89
|
219 beat.name = "Beats";
|
c@89
|
220 beat.description = "Beat locations labelled with metrical position";
|
c@89
|
221 beat.unit = "";
|
c@89
|
222 beat.hasFixedBinCount = true;
|
c@89
|
223 beat.binCount = 0;
|
c@89
|
224 beat.sampleType = OutputDescriptor::VariableSampleRate;
|
c@89
|
225 beat.sampleRate = 1.0 / m_stepSecs;
|
c@89
|
226
|
c@89
|
227 OutputDescriptor bars;
|
c@89
|
228 bars.identifier = "bars";
|
c@89
|
229 bars.name = "Bars";
|
c@89
|
230 bars.description = "Bar locations";
|
c@89
|
231 bars.unit = "";
|
c@89
|
232 bars.hasFixedBinCount = true;
|
c@89
|
233 bars.binCount = 0;
|
c@89
|
234 bars.sampleType = OutputDescriptor::VariableSampleRate;
|
c@89
|
235 bars.sampleRate = 1.0 / m_stepSecs;
|
c@89
|
236
|
c@89
|
237 OutputDescriptor beatcounts;
|
c@89
|
238 beatcounts.identifier = "beatcounts";
|
c@89
|
239 beatcounts.name = "Beat Count";
|
c@89
|
240 beatcounts.description = "Beat counter function";
|
c@89
|
241 beatcounts.unit = "";
|
c@89
|
242 beatcounts.hasFixedBinCount = true;
|
c@89
|
243 beatcounts.binCount = 1;
|
c@89
|
244 beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
|
c@89
|
245 beatcounts.sampleRate = 1.0 / m_stepSecs;
|
c@89
|
246
|
c@90
|
247 OutputDescriptor beatsd;
|
c@90
|
248 beatsd.identifier = "beatsd";
|
c@90
|
249 beatsd.name = "Beat Spectral Difference";
|
c@90
|
250 beatsd.description = "Beat spectral difference function used for bar-line detection";
|
c@90
|
251 beatsd.unit = "";
|
c@90
|
252 beatsd.hasFixedBinCount = true;
|
c@90
|
253 beatsd.binCount = 1;
|
c@90
|
254 beatsd.sampleType = OutputDescriptor::VariableSampleRate;
|
c@90
|
255 beatsd.sampleRate = 1.0 / m_stepSecs;
|
c@90
|
256
|
c@89
|
257 list.push_back(beat);
|
c@89
|
258 list.push_back(bars);
|
c@89
|
259 list.push_back(beatcounts);
|
c@90
|
260 list.push_back(beatsd);
|
c@89
|
261
|
c@89
|
262 return list;
|
c@89
|
263 }
|
c@89
|
264
|
c@89
|
265 BarBeatTracker::FeatureSet
|
c@89
|
266 BarBeatTracker::process(const float *const *inputBuffers,
|
c@89
|
267 Vamp::RealTime timestamp)
|
c@89
|
268 {
|
c@89
|
269 if (!m_d) {
|
c@89
|
270 cerr << "ERROR: BarBeatTracker::process: "
|
c@89
|
271 << "BarBeatTracker has not been initialised"
|
c@89
|
272 << endl;
|
c@89
|
273 return FeatureSet();
|
c@89
|
274 }
|
c@89
|
275
|
c@89
|
276 // We use time domain input, because DownBeat requires it -- so we
|
c@89
|
277 // use the time-domain version of DetectionFunction::process which
|
c@89
|
278 // does its own FFT. It requires doubles as input, so we need to
|
c@89
|
279 // make a temporary copy
|
c@89
|
280
|
c@89
|
281 // We only support a single input channel
|
c@89
|
282
|
c@89
|
283 const int fl = m_d->dfConfig.frameLength;
|
c@130
|
284 #ifndef __GNUC__
|
c@130
|
285 double *dfinput = (double *)alloca(fl * sizeof(double));
|
c@130
|
286 #else
|
c@89
|
287 double dfinput[fl];
|
c@130
|
288 #endif
|
c@89
|
289 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
|
c@89
|
290
|
c@89
|
291 double output = m_d->df->process(dfinput);
|
c@89
|
292
|
c@89
|
293 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
|
c@89
|
294
|
c@93
|
295 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
|
c@89
|
296 m_d->dfOutput.push_back(output);
|
c@89
|
297
|
c@89
|
298 // Downsample and store the incoming audio block.
|
c@89
|
299 // We have an overlap on the incoming audio stream (step size is
|
c@89
|
300 // half block size) -- this function is configured to take only a
|
c@89
|
301 // step size's worth, so effectively ignoring the overlap. Note
|
c@89
|
302 // however that this means we omit the last blocksize - stepsize
|
c@89
|
303 // samples completely for the purposes of barline detection
|
c@89
|
304 // (hopefully not a problem)
|
c@89
|
305 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
|
c@89
|
306
|
c@89
|
307 return FeatureSet();
|
c@89
|
308 }
|
c@89
|
309
|
c@89
|
310 BarBeatTracker::FeatureSet
|
c@89
|
311 BarBeatTracker::getRemainingFeatures()
|
c@89
|
312 {
|
c@89
|
313 if (!m_d) {
|
c@89
|
314 cerr << "ERROR: BarBeatTracker::getRemainingFeatures: "
|
c@89
|
315 << "BarBeatTracker has not been initialised"
|
c@89
|
316 << endl;
|
c@89
|
317 return FeatureSet();
|
c@89
|
318 }
|
c@89
|
319
|
c@89
|
320 return barBeatTrack();
|
c@89
|
321 }
|
c@89
|
322
|
c@89
|
323 BarBeatTracker::FeatureSet
|
c@89
|
324 BarBeatTracker::barBeatTrack()
|
c@89
|
325 {
|
c@89
|
326 vector<double> df;
|
c@89
|
327 vector<double> beatPeriod;
|
c@89
|
328 vector<double> tempi;
|
c@89
|
329
|
c@89
|
330 for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts
|
c@89
|
331 df.push_back(m_d->dfOutput[i]);
|
c@89
|
332 beatPeriod.push_back(0.0);
|
c@89
|
333 }
|
c@89
|
334 if (df.empty()) return FeatureSet();
|
c@89
|
335
|
c@89
|
336 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
|
c@89
|
337 tt.calculateBeatPeriod(df, beatPeriod, tempi);
|
c@89
|
338
|
c@89
|
339 vector<double> beats;
|
c@89
|
340 tt.calculateBeats(df, beatPeriod, beats);
|
c@89
|
341
|
c@89
|
342 vector<int> downbeats;
|
c@89
|
343 size_t downLength = 0;
|
c@89
|
344 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
|
c@89
|
345 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
|
c@89
|
346
|
c@90
|
347 vector<double> beatsd;
|
c@90
|
348 m_d->downBeat->getBeatSD(beatsd);
|
c@90
|
349
|
c@89
|
350 // std::cerr << "BarBeatTracker: found downbeats at: ";
|
c@89
|
351 // for (int i = 0; i < downbeats.size(); ++i) std::cerr << downbeats[i] << " " << std::endl;
|
c@89
|
352
|
c@89
|
353 FeatureSet returnFeatures;
|
c@89
|
354
|
c@89
|
355 char label[20];
|
c@89
|
356
|
c@89
|
357 int dbi = 0;
|
c@89
|
358 int beat = 0;
|
c@89
|
359 int bar = 0;
|
c@89
|
360
|
c@124
|
361 if (!downbeats.empty()) {
|
c@124
|
362 // get the right number for the first beat; this will be
|
c@124
|
363 // incremented before use (at top of the following loop)
|
c@124
|
364 int firstDown = downbeats[0];
|
c@124
|
365 beat = m_bpb - firstDown - 1;
|
c@124
|
366 if (beat == m_bpb) beat = 0;
|
c@124
|
367 }
|
c@124
|
368
|
c@89
|
369 for (size_t i = 0; i < beats.size(); ++i) {
|
c@89
|
370
|
c@89
|
371 size_t frame = beats[i] * m_d->dfConfig.stepSize;
|
c@89
|
372
|
c@89
|
373 if (dbi < downbeats.size() && i == downbeats[dbi]) {
|
c@89
|
374 beat = 0;
|
c@89
|
375 ++bar;
|
c@89
|
376 ++dbi;
|
c@89
|
377 } else {
|
c@89
|
378 ++beat;
|
c@89
|
379 }
|
c@89
|
380
|
c@89
|
381 // outputs are:
|
c@89
|
382 //
|
c@89
|
383 // 0 -> beats
|
c@89
|
384 // 1 -> bars
|
c@89
|
385 // 2 -> beat counter function
|
c@89
|
386
|
c@89
|
387 Feature feature;
|
c@89
|
388 feature.hasTimestamp = true;
|
c@89
|
389 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
|
c@89
|
390 (frame, lrintf(m_inputSampleRate));
|
c@89
|
391
|
c@89
|
392 sprintf(label, "%d", beat + 1);
|
c@89
|
393 feature.label = label;
|
c@89
|
394 returnFeatures[0].push_back(feature); // labelled beats
|
c@89
|
395
|
c@89
|
396 feature.values.push_back(beat + 1);
|
c@89
|
397 returnFeatures[2].push_back(feature); // beat function
|
c@89
|
398
|
c@90
|
399 if (i > 0 && i <= beatsd.size()) {
|
c@90
|
400 feature.values.clear();
|
c@90
|
401 feature.values.push_back(beatsd[i-1]);
|
c@90
|
402 feature.label = "";
|
c@90
|
403 returnFeatures[3].push_back(feature); // beat spectral difference
|
c@90
|
404 }
|
c@90
|
405
|
c@89
|
406 if (beat == 0) {
|
c@89
|
407 feature.values.clear();
|
c@89
|
408 sprintf(label, "%d", bar);
|
c@89
|
409 feature.label = label;
|
c@89
|
410 returnFeatures[1].push_back(feature); // bars
|
c@89
|
411 }
|
c@89
|
412 }
|
c@89
|
413
|
c@89
|
414 return returnFeatures;
|
c@89
|
415 }
|
c@89
|
416
|