To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / examples / FixedTempoEstimator.cpp

History | View | Annotate | Download (18.3 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2

    
3
/*
4
    Vamp
5

6
    An API for audio analysis and feature extraction plugins.
7

8
    Centre for Digital Music, Queen Mary, University of London.
9
    Copyright 2006-2009 Chris Cannam and QMUL.
10
  
11
    Permission is hereby granted, free of charge, to any person
12
    obtaining a copy of this software and associated documentation
13
    files (the "Software"), to deal in the Software without
14
    restriction, including without limitation the rights to use, copy,
15
    modify, merge, publish, distribute, sublicense, and/or sell copies
16
    of the Software, and to permit persons to whom the Software is
17
    furnished to do so, subject to the following conditions:
18

19
    The above copyright notice and this permission notice shall be
20
    included in all copies or substantial portions of the Software.
21

22
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
26
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29

30
    Except as contained in this notice, the names of the Centre for
31
    Digital Music; Queen Mary, University of London; and Chris Cannam
32
    shall not be used in advertising or otherwise to promote the sale,
33
    use or other dealings in this Software without prior written
34
    authorization.
35
*/
36

    
37
#include "FixedTempoEstimator.h"
38

    
39
using std::string;
40
using std::vector;
41
using std::cerr;
42
using std::endl;
43

    
44
using Vamp::RealTime;
45

    
46
#include <cmath>
47
#include <cstdio>
48

    
49

    
50
class FixedTempoEstimator::D
51
// this class just avoids us having to declare any data members in the header
52
{
53
public:
54
    D(float inputSampleRate);
55
    ~D();
56

    
57
    size_t getPreferredStepSize() const { return 64; }
58
    size_t getPreferredBlockSize() const { return 256; }
59

    
60
    ParameterList getParameterDescriptors() const;
61
    float getParameter(string id) const;
62
    void setParameter(string id, float value);
63

    
64
    OutputList getOutputDescriptors() const;
65

    
66
    bool initialise(size_t channels, size_t stepSize, size_t blockSize);
67
    void reset();
68
    FeatureSet process(const float *const *, RealTime);
69
    FeatureSet getRemainingFeatures();
70

    
71
private:
72
    void calculate();
73
    FeatureSet assembleFeatures();
74

    
75
    float lag2tempo(int);
76
    int tempo2lag(float);
77

    
78
    float m_inputSampleRate;
79
    size_t m_stepSize;
80
    size_t m_blockSize;
81

    
82
    float m_minbpm;
83
    float m_maxbpm;
84
    float m_maxdflen;
85

    
86
    float *m_priorMagnitudes;
87

    
88
    size_t m_dfsize;
89
    float *m_df;
90
    float *m_r;
91
    float *m_fr;
92
    float *m_t;
93
    size_t m_n;
94

    
95
    Vamp::RealTime m_start;
96
    Vamp::RealTime m_lasttime;
97
};
98

    
99
FixedTempoEstimator::D::D(float inputSampleRate) :
100
    m_inputSampleRate(inputSampleRate),
101
    m_stepSize(0),
102
    m_blockSize(0),
103
    m_minbpm(50),
104
    m_maxbpm(190),
105
    m_maxdflen(10),
106
    m_priorMagnitudes(0),
107
    m_df(0),
108
    m_r(0),
109
    m_fr(0),
110
    m_t(0),
111
    m_n(0)
112
{
113
}
114

    
115
FixedTempoEstimator::D::~D()
116
{
117
    delete[] m_priorMagnitudes;
118
    delete[] m_df;
119
    delete[] m_r;
120
    delete[] m_fr;
121
    delete[] m_t;
122
}
123

    
124
FixedTempoEstimator::ParameterList
125
FixedTempoEstimator::D::getParameterDescriptors() const
126
{
127
    ParameterList list;
128

    
129
    ParameterDescriptor d;
130
    d.identifier = "minbpm";
131
    d.name = "Minimum estimated tempo";
132
    d.description = "Minimum beat-per-minute value which the tempo estimator is able to return";
133
    d.unit = "bpm";
134
    d.minValue = 10;
135
    d.maxValue = 360;
136
    d.defaultValue = 50;
137
    d.isQuantized = false;
138
    list.push_back(d);
139

    
140
    d.identifier = "maxbpm";
141
    d.name = "Maximum estimated tempo";
142
    d.description = "Maximum beat-per-minute value which the tempo estimator is able to return";
143
    d.defaultValue = 190;
144
    list.push_back(d);
145

    
146
    d.identifier = "maxdflen";
147
    d.name = "Input duration to study";
148
    d.description = "Length of audio input, in seconds, which should be taken into account when estimating tempo.  There is no need to supply the plugin with any further input once this time has elapsed since the start of the audio.  The tempo estimator may use only the first part of this, up to eight times the slowest beat duration: increasing this value further than that is unlikely to improve results.";
149
    d.unit = "s";
150
    d.minValue = 2;
151
    d.maxValue = 40;
152
    d.defaultValue = 10;
153
    list.push_back(d);
154

    
155
    return list;
156
}
157

    
158
float
159
FixedTempoEstimator::D::getParameter(string id) const
160
{
161
    if (id == "minbpm") {
162
        return m_minbpm;
163
    } else if (id == "maxbpm") {
164
        return m_maxbpm;
165
    } else if (id == "maxdflen") {
166
        return m_maxdflen;
167
    }
168
    return 0.f;
169
}
170

    
171
void
172
FixedTempoEstimator::D::setParameter(string id, float value)
173
{
174
    if (id == "minbpm") {
175
        m_minbpm = value;
176
    } else if (id == "maxbpm") {
177
        m_maxbpm = value;
178
    } else if (id == "maxdflen") {
179
        m_maxdflen = value;
180
    }
181
}
182

    
183
static int TempoOutput = 0;
184
static int CandidatesOutput = 1;
185
static int DFOutput = 2;
186
static int ACFOutput = 3;
187
static int FilteredACFOutput = 4;
188

    
189
FixedTempoEstimator::OutputList
190
FixedTempoEstimator::D::getOutputDescriptors() const
191
{
192
    OutputList list;
193

    
194
    OutputDescriptor d;
195
    d.identifier = "tempo";
196
    d.name = "Tempo";
197
    d.description = "Estimated tempo";
198
    d.unit = "bpm";
199
    d.hasFixedBinCount = true;
200
    d.binCount = 1;
201
    d.hasKnownExtents = false;
202
    d.isQuantized = false;
203
    d.sampleType = OutputDescriptor::VariableSampleRate;
204
    d.sampleRate = m_inputSampleRate;
205
    d.hasDuration = true; // our returned tempo spans a certain range
206
    list.push_back(d);
207

    
208
    d.identifier = "candidates";
209
    d.name = "Tempo candidates";
210
    d.description = "Possible tempo estimates, one per bin with the most likely in the first bin";
211
    d.unit = "bpm";
212
    d.hasFixedBinCount = false;
213
    list.push_back(d);
214

    
215
    d.identifier = "detectionfunction";
216
    d.name = "Detection Function";
217
    d.description = "Onset detection function";
218
    d.unit = "";
219
    d.hasFixedBinCount = 1;
220
    d.binCount = 1;
221
    d.hasKnownExtents = true;
222
    d.minValue = 0.0;
223
    d.maxValue = 1.0;
224
    d.isQuantized = false;
225
    d.quantizeStep = 0.0;
226
    d.sampleType = OutputDescriptor::FixedSampleRate;
227
    if (m_stepSize) {
228
        d.sampleRate = m_inputSampleRate / m_stepSize;
229
    } else {
230
        d.sampleRate = m_inputSampleRate / (getPreferredBlockSize()/2);
231
    }
232
    d.hasDuration = false;
233
    list.push_back(d);
234

    
235
    d.identifier = "acf";
236
    d.name = "Autocorrelation Function";
237
    d.description = "Autocorrelation of onset detection function";
238
    d.hasKnownExtents = false;
239
    d.unit = "r";
240
    list.push_back(d);
241

    
242
    d.identifier = "filtered_acf";
243
    d.name = "Filtered Autocorrelation";
244
    d.description = "Filtered autocorrelation of onset detection function";
245
    d.unit = "r";
246
    list.push_back(d);
247

    
248
    return list;
249
}
250

    
251
bool
252
FixedTempoEstimator::D::initialise(size_t, size_t stepSize, size_t blockSize)
253
{
254
    m_stepSize = stepSize;
255
    m_blockSize = blockSize;
256

    
257
    float dfLengthSecs = m_maxdflen;
258
    m_dfsize = (dfLengthSecs * m_inputSampleRate) / m_stepSize;
259

    
260
    m_priorMagnitudes = new float[m_blockSize/2];
261
    m_df = new float[m_dfsize];
262

    
263
    for (size_t i = 0; i < m_blockSize/2; ++i) {
264
        m_priorMagnitudes[i] = 0.f;
265
    }
266
    for (size_t i = 0; i < m_dfsize; ++i) {
267
        m_df[i] = 0.f;
268
    }
269

    
270
    m_n = 0;
271

    
272
    return true;
273
}
274

    
275
void
276
FixedTempoEstimator::D::reset()
277
{
278
    if (!m_priorMagnitudes) return;
279

    
280
    for (size_t i = 0; i < m_blockSize/2; ++i) {
281
        m_priorMagnitudes[i] = 0.f;
282
    }
283
    for (size_t i = 0; i < m_dfsize; ++i) {
284
        m_df[i] = 0.f;
285
    }
286

    
287
    delete[] m_r;
288
    m_r = 0;
289

    
290
    delete[] m_fr; 
291
    m_fr = 0;
292

    
293
    delete[] m_t; 
294
    m_t = 0;
295

    
296
    m_n = 0;
297

    
298
    m_start = RealTime::zeroTime;
299
    m_lasttime = RealTime::zeroTime;
300
}
301

    
302
FixedTempoEstimator::FeatureSet
303
FixedTempoEstimator::D::process(const float *const *inputBuffers, RealTime ts)
304
{
305
    FeatureSet fs;
306

    
307
    if (m_stepSize == 0) {
308
        cerr << "ERROR: FixedTempoEstimator::process: "
309
             << "FixedTempoEstimator has not been initialised"
310
             << endl;
311
        return fs;
312
    }
313

    
314
    if (m_n == 0) m_start = ts;
315
    m_lasttime = ts;
316

    
317
    if (m_n == m_dfsize) {
318
        // If we have seen enough input, do the estimation and return
319
        calculate();
320
        fs = assembleFeatures();
321
        ++m_n;
322
        return fs;
323
    }
324

    
325
    // If we have seen more than enough, just discard and return!
326
    if (m_n > m_dfsize) return FeatureSet();
327

    
328
    float value = 0.f;
329

    
330
    // m_df will contain an onset detection function based on the rise
331
    // in overall power from one spectral frame to the next --
332
    // simplistic but reasonably effective for our purposes.
333

    
334
    for (size_t i = 1; i < m_blockSize/2; ++i) {
335

    
336
        float real = inputBuffers[0][i*2];
337
        float imag = inputBuffers[0][i*2 + 1];
338

    
339
        float sqrmag = real * real + imag * imag;
340
        value += fabsf(sqrmag - m_priorMagnitudes[i]);
341

    
342
        m_priorMagnitudes[i] = sqrmag;
343
    }
344

    
345
    m_df[m_n] = value;
346

    
347
    ++m_n;
348
    return fs;
349
}    
350

    
351
FixedTempoEstimator::FeatureSet
352
FixedTempoEstimator::D::getRemainingFeatures()
353
{
354
    FeatureSet fs;
355
    if (m_n > m_dfsize) return fs;
356
    calculate();
357
    fs = assembleFeatures();
358
    ++m_n;
359
    return fs;
360
}
361

    
362
float
363
FixedTempoEstimator::D::lag2tempo(int lag)
364
{
365
    return 60.f / ((lag * m_stepSize) / m_inputSampleRate);
366
}
367

    
368
int
369
FixedTempoEstimator::D::tempo2lag(float tempo)
370
{
371
    return ((60.f / tempo) * m_inputSampleRate) / m_stepSize;
372
}
373

    
374
void
375
FixedTempoEstimator::D::calculate()
376
{    
377
    if (m_r) {
378
        cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl;
379
        return;
380
    }
381

    
382
    if (m_n < m_dfsize / 9 &&
383
        m_n < (1.0 * m_inputSampleRate) / m_stepSize) { // 1 second
384
        cerr << "FixedTempoEstimator::calculate: Input is too short" << endl;
385
        return;
386
    }
387

    
388
    // This function takes m_df (the detection function array filled
389
    // out in process()) and calculates m_r (the raw autocorrelation)
390
    // and m_fr (the filtered autocorrelation from whose peaks tempo
391
    // estimates will be taken).
392

    
393
    int n = m_n; // length of actual df array (m_dfsize is the theoretical max)
394

    
395
    m_r  = new float[n/2]; // raw autocorrelation
396
    m_fr = new float[n/2]; // filtered autocorrelation
397
    m_t  = new float[n/2]; // averaged tempo estimate for each lag value
398

    
399
    for (int i = 0; i < n/2; ++i) {
400
        m_r[i]  = 0.f;
401
        m_fr[i] = 0.f;
402
        m_t[i]  = lag2tempo(i);
403
    }
404

    
405
    // Calculate the raw autocorrelation of the detection function
406

    
407
    for (int i = 0; i < n/2; ++i) {
408

    
409
        for (int j = i; j < n; ++j) {
410
            m_r[i] += m_df[j] * m_df[j - i];
411
        }
412

    
413
        m_r[i] /= n - i - 1;
414
    }
415

    
416
    // Filter the autocorrelation and average out the tempo estimates
417
    
418
    float related[] = { 0.5, 2, 4, 8 };
419

    
420
    for (int i = 1; i < n/2-1; ++i) {
421

    
422
        m_fr[i] = m_r[i];
423

    
424
        int div = 1;
425

    
426
        for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) {
427

    
428
            // Check for an obvious peak at each metrically related lag
429

    
430
            int k0 = int(i * related[j] + 0.5);
431

    
432
            if (k0 >= 0 && k0 < int(n/2)) {
433

    
434
                int kmax = 0;
435
                float kvmax = 0, kvmin = 0;
436
                bool have = false;
437

    
438
                for (int k = k0 - 1; k <= k0 + 1; ++k) {
439

    
440
                    if (k < 0 || k >= n/2) continue;
441

    
442
                    if (!have || (m_r[k] > kvmax)) { kvmax = m_r[k]; kmax = k; }
443
                    if (!have || (m_r[k] < kvmin)) { kvmin = m_r[k]; }
444
                    
445
                    have = true;
446
                }
447
                
448
                // Boost the original lag according to the strongest
449
                // value found close to this related lag
450

    
451
                m_fr[i] += m_r[kmax] / 5;
452

    
453
                if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) &&
454
                    (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) &&
455
                    kvmax > kvmin * 1.05) {
456

    
457
                    // The strongest value close to the related lag is
458
                    // also a pretty good looking peak, so use it to
459
                    // improve our tempo estimate for the original lag
460
                    
461
                    m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
462
                    ++div;
463
                }
464
            }
465
        }
466
        
467
        m_t[i] /= div;
468
        
469
        // Finally apply a primitive perceptual weighting (to prefer
470
        // tempi of around 120-130)
471

    
472
        float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
473
        if (weight < 0.f) weight = 0.f;
474
        weight = weight * weight * weight;
475

    
476
        m_fr[i] += m_fr[i] * (weight / 3);
477
    }
478
}
479
    
480
FixedTempoEstimator::FeatureSet
481
FixedTempoEstimator::D::assembleFeatures()
482
{
483
    FeatureSet fs;
484
    if (!m_r) return fs; // No autocorrelation: no results
485

    
486
    Feature feature;
487
    feature.hasTimestamp = true;
488
    feature.hasDuration = false;
489
    feature.label = "";
490
    feature.values.clear();
491
    feature.values.push_back(0.f);
492

    
493
    char buffer[40];
494

    
495
    int n = m_n;
496

    
497
    for (int i = 0; i < n; ++i) {
498

    
499
        // Return the detection function in the DF output
500

    
501
        feature.timestamp = m_start +
502
            RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
503
        feature.values[0] = m_df[i];
504
        feature.label = "";
505
        fs[DFOutput].push_back(feature);
506
    }
507

    
508
    for (int i = 1; i < n/2; ++i) {
509

    
510
        // Return the raw autocorrelation in the ACF output, each
511
        // value labelled according to its corresponding tempo
512

    
513
        feature.timestamp = m_start +
514
            RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
515
        feature.values[0] = m_r[i];
516
        sprintf(buffer, "%.1f bpm", lag2tempo(i));
517
        if (i == n/2-1) feature.label = "";
518
        else feature.label = buffer;
519
        fs[ACFOutput].push_back(feature);
520
    }
521

    
522
    float t0 = m_minbpm; // our minimum detected tempo
523
    float t1 = m_maxbpm; // our maximum detected tempo
524

    
525
    int p0 = tempo2lag(t1);
526
    int p1 = tempo2lag(t0);
527

    
528
    std::map<float, int> candidates;
529

    
530
    for (int i = p0; i <= p1 && i+1 < n/2; ++i) {
531

    
532
        if (i < 1) continue;
533
        
534
        if (m_fr[i] > m_fr[i-1] &&
535
            m_fr[i] > m_fr[i+1]) {
536

    
537
            // This is a peak in the filtered autocorrelation: stick
538
            // it into the map from filtered autocorrelation to lag
539
            // index -- this sorts our peaks by filtered acf value
540

    
541
            candidates[m_fr[i]] = i;
542
        }
543

    
544
        // Also return the filtered autocorrelation in its own output
545

    
546
        feature.timestamp = m_start +
547
            RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
548
        feature.values[0] = m_fr[i];
549
        sprintf(buffer, "%.1f bpm", lag2tempo(i));
550
        if (i == p1 || i == n/2-2) feature.label = "";
551
        else feature.label = buffer;
552
        fs[FilteredACFOutput].push_back(feature);
553
    }
554

    
555
    if (candidates.empty()) {
556
        cerr << "No tempo candidates!" << endl;
557
        return fs;
558
    }
559

    
560
    feature.hasTimestamp = true;
561
    feature.timestamp = m_start;
562
    
563
    feature.hasDuration = true;
564
    feature.duration = m_lasttime - m_start;
565

    
566
    // The map contains only peaks and is sorted by filtered acf
567
    // value, so the final element in it is our "best" tempo guess
568

    
569
    std::map<float, int>::const_iterator ci = candidates.end();
570
    --ci;
571
    int maxpi = ci->second;
572

    
573
    if (m_t[maxpi] > 0) {
574

    
575
        // This lag has an adjusted tempo from the averaging process:
576
        // use it
577

    
578
        feature.values[0] = m_t[maxpi];
579

    
580
    } else {
581

    
582
        // shouldn't happen -- it would imply that this high value was
583
        // not a peak!
584

    
585
        feature.values[0] = lag2tempo(maxpi);
586
        cerr << "WARNING: No stored tempo for index " << maxpi << endl;
587
    }
588

    
589
    sprintf(buffer, "%.1f bpm", feature.values[0]);
590
    feature.label = buffer;
591

    
592
    // Return the best tempo in the main output
593

    
594
    fs[TempoOutput].push_back(feature);
595

    
596
    // And return the other estimates (up to the arbitrarily chosen
597
    // number of 10 of them) in the candidates output
598

    
599
    feature.values.clear();
600
    feature.label = "";
601

    
602
    while (feature.values.size() < 10) {
603
        if (m_t[ci->second] > 0) {
604
            feature.values.push_back(m_t[ci->second]);
605
        } else {
606
            feature.values.push_back(lag2tempo(ci->second));
607
        }
608
        if (ci == candidates.begin()) break;
609
        --ci;
610
    }
611

    
612
    fs[CandidatesOutput].push_back(feature);
613
    
614
    return fs;
615
}
616

    
617
    
618

    
619
FixedTempoEstimator::FixedTempoEstimator(float inputSampleRate) :
620
    Plugin(inputSampleRate),
621
    m_d(new D(inputSampleRate))
622
{
623
}
624

    
625
FixedTempoEstimator::~FixedTempoEstimator()
626
{
627
    delete m_d;
628
}
629

    
630
string
631
FixedTempoEstimator::getIdentifier() const
632
{
633
    return "fixedtempo";
634
}
635

    
636
string
637
FixedTempoEstimator::getName() const
638
{
639
    return "Simple Fixed Tempo Estimator";
640
}
641

    
642
string
643
FixedTempoEstimator::getDescription() const
644
{
645
    return "Study a short section of audio and estimate its tempo, assuming the tempo is constant";
646
}
647

    
648
string
649
FixedTempoEstimator::getMaker() const
650
{
651
    return "Vamp SDK Example Plugins";
652
}
653

    
654
int
655
FixedTempoEstimator::getPluginVersion() const
656
{
657
    return 1;
658
}
659

    
660
string
661
FixedTempoEstimator::getCopyright() const
662
{
663
    return "Code copyright 2008 Queen Mary, University of London.  Freely redistributable (BSD license)";
664
}
665

    
666
size_t
667
FixedTempoEstimator::getPreferredStepSize() const
668
{
669
    return m_d->getPreferredStepSize();
670
}
671

    
672
size_t
673
FixedTempoEstimator::getPreferredBlockSize() const
674
{
675
    return m_d->getPreferredBlockSize();
676
}
677

    
678
bool
679
FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize)
680
{
681
    if (channels < getMinChannelCount() ||
682
        channels > getMaxChannelCount()) return false;
683

    
684
    return m_d->initialise(channels, stepSize, blockSize);
685
}
686

    
687
void
688
FixedTempoEstimator::reset()
689
{
690
    return m_d->reset();
691
}
692

    
693
FixedTempoEstimator::ParameterList
694
FixedTempoEstimator::getParameterDescriptors() const
695
{
696
    return m_d->getParameterDescriptors();
697
}
698

    
699
float
700
FixedTempoEstimator::getParameter(std::string id) const
701
{
702
    return m_d->getParameter(id);
703
}
704

    
705
void
706
FixedTempoEstimator::setParameter(std::string id, float value)
707
{
708
    m_d->setParameter(id, value);
709
}
710

    
711
FixedTempoEstimator::OutputList
712
FixedTempoEstimator::getOutputDescriptors() const
713
{
714
    return m_d->getOutputDescriptors();
715
}
716

    
717
FixedTempoEstimator::FeatureSet
718
FixedTempoEstimator::process(const float *const *inputBuffers, RealTime ts)
719
{
720
    return m_d->process(inputBuffers, ts);
721
}
722

    
723
FixedTempoEstimator::FeatureSet
724
FixedTempoEstimator::getRemainingFeatures()
725
{
726
    return m_d->getRemainingFeatures();
727
}