To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 20:f9c4c4568fb0

History | View | Annotate | Download (16.2 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34
using Vamp::RealTime;
35

    
36
CepstrumPitchTracker::Hypothesis::Hypothesis()
37
{
38
    m_state = New;
39
    m_age = 0;
40
}
41

    
42
CepstrumPitchTracker::Hypothesis::~Hypothesis()
43
{
44
}
45

    
46
bool
47
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
48
{
49
    if (m_pending.empty()) {
50
        return true;
51
    }
52

    
53
    // check we are within a relatively close tolerance of the last
54
    // candidate
55
    Estimate last = m_pending[m_pending.size()-1];
56
    double r = s.freq / last.freq;
57
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
58
    if (cents < -60 || cents > 60) return false;
59

    
60
    // and within a slightly bigger tolerance of the current mean
61
    double meanFreq = getMeanFrequency();
62
    r = s.freq / meanFreq;
63
    cents = lrint(1200.0 * (log(r) / log(2.0)));
64
    if (cents < -80 || cents > 80) return false;
65
    
66
    return true;
67
}
68

    
69
bool 
70
CepstrumPitchTracker::Hypothesis::isSatisfied()
71
{
72
    if (m_pending.empty()) return false;
73
    
74
    double meanConfidence = 0.0;
75
    for (int i = 0; i < m_pending.size(); ++i) {
76
        meanConfidence += m_pending[i].confidence;
77
    }
78
    meanConfidence /= m_pending.size();
79

    
80
    int lengthRequired = int(2.0 / meanConfidence + 0.5);
81
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
82

    
83
    return (m_pending.size() > lengthRequired);
84
}
85

    
86
void
87
CepstrumPitchTracker::Hypothesis::advanceTime()
88
{
89
    ++m_age;
90
}
91

    
92
bool
93
CepstrumPitchTracker::Hypothesis::test(Estimate s)
94
{
95
    bool accept = false;
96

    
97
    switch (m_state) {
98

    
99
    case New:
100
        m_state = Provisional;
101
        accept = true;
102
        break;
103

    
104
    case Provisional:
105
        if (m_age > 3) {
106
            m_state = Rejected;
107
        } else if (isWithinTolerance(s)) {
108
            accept = true;
109
        }
110
        break;
111
        
112
    case Satisfied:
113
        if (m_age > 3) {
114
            m_state = Expired;
115
        } else if (isWithinTolerance(s)) {
116
            accept = true;
117
        }
118
        break;
119

    
120
    case Rejected:
121
        break;
122

    
123
    case Expired:
124
        break;
125
    }
126

    
127
    if (accept) {
128
        m_pending.push_back(s);
129
        m_age = 0;
130
        if (m_state == Provisional && isSatisfied()) {
131
            m_state = Satisfied;
132
        }
133
    }
134

    
135
    return accept && (m_state == Satisfied);
136
}        
137

    
138
CepstrumPitchTracker::Hypothesis::State
139
CepstrumPitchTracker::Hypothesis::getState()
140
{
141
    return m_state;
142
}
143

    
144
int
145
CepstrumPitchTracker::Hypothesis::getPendingLength()
146
{
147
    return m_pending.size();
148
}
149

    
150
CepstrumPitchTracker::Hypothesis::Estimates
151
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
152
{
153
    if (m_state == Satisfied || m_state == Expired) {
154
        return m_pending;
155
    } else {
156
        return Estimates();
157
    }
158
}
159

    
160
double
161
CepstrumPitchTracker::Hypothesis::getMeanFrequency()
162
{
163
    double acc = 0.0;
164
    for (int i = 0; i < m_pending.size(); ++i) {
165
        acc += m_pending[i].freq;
166
    }
167
    acc /= m_pending.size();
168
    return acc;
169
}
170

    
171
CepstrumPitchTracker::Hypothesis::Note
172
CepstrumPitchTracker::Hypothesis::getAveragedNote()
173
{
174
    Note n;
175

    
176
    if (!(m_state == Satisfied || m_state == Expired)) {
177
        n.freq = 0.0;
178
        n.time = RealTime::zeroTime;
179
        n.duration = RealTime::zeroTime;
180
        return n;
181
    }
182

    
183
    n.time = m_pending.begin()->time;
184

    
185
    Estimates::iterator i = m_pending.end();
186
    --i;
187
    n.duration = i->time - n.time;
188

    
189
    // just mean frequency for now, but this isn't at all right perceptually
190
    n.freq = getMeanFrequency();
191
    
192
    return n;
193
}
194

    
195
void
196
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
197
{
198
    for (int i = 0; i < m_pending.size(); ++i) {
199
        Feature f;
200
        f.hasTimestamp = true;
201
        f.timestamp = m_pending[i].time;
202
        f.values.push_back(m_pending[i].freq);
203
        fs[0].push_back(f);
204
    }
205

    
206
    Feature nf;
207
    nf.hasTimestamp = true;
208
    nf.hasDuration = true;
209
    Note n = getAveragedNote();
210
    nf.timestamp = n.time;
211
    nf.duration = n.duration;
212
    nf.values.push_back(n.freq);
213
    fs[1].push_back(nf);
214
}
215

    
216
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
217
    Plugin(inputSampleRate),
218
    m_channels(0),
219
    m_stepSize(256),
220
    m_blockSize(1024),
221
    m_fmin(50),
222
    m_fmax(1000),
223
    m_vflen(1),
224
    m_binFrom(0),
225
    m_binTo(0),
226
    m_bins(0)
227
{
228
}
229

    
230
CepstrumPitchTracker::~CepstrumPitchTracker()
231
{
232
}
233

    
234
string
235
CepstrumPitchTracker::getIdentifier() const
236
{
237
    return "cepstrum-pitch";
238
}
239

    
240
string
241
CepstrumPitchTracker::getName() const
242
{
243
    return "Cepstrum Pitch Tracker";
244
}
245

    
246
string
247
CepstrumPitchTracker::getDescription() const
248
{
249
    return "Estimate f0 of monophonic material using a cepstrum method.";
250
}
251

    
252
string
253
CepstrumPitchTracker::getMaker() const
254
{
255
    return "Chris Cannam";
256
}
257

    
258
int
259
CepstrumPitchTracker::getPluginVersion() const
260
{
261
    // Increment this each time you release a version that behaves
262
    // differently from the previous one
263
    return 1;
264
}
265

    
266
string
267
CepstrumPitchTracker::getCopyright() const
268
{
269
    return "Freely redistributable (BSD license)";
270
}
271

    
272
CepstrumPitchTracker::InputDomain
273
CepstrumPitchTracker::getInputDomain() const
274
{
275
    return FrequencyDomain;
276
}
277

    
278
size_t
279
CepstrumPitchTracker::getPreferredBlockSize() const
280
{
281
    return 1024;
282
}
283

    
284
size_t 
285
CepstrumPitchTracker::getPreferredStepSize() const
286
{
287
    return 256;
288
}
289

    
290
size_t
291
CepstrumPitchTracker::getMinChannelCount() const
292
{
293
    return 1;
294
}
295

    
296
size_t
297
CepstrumPitchTracker::getMaxChannelCount() const
298
{
299
    return 1;
300
}
301

    
302
CepstrumPitchTracker::ParameterList
303
CepstrumPitchTracker::getParameterDescriptors() const
304
{
305
    ParameterList list;
306
    return list;
307
}
308

    
309
float
310
CepstrumPitchTracker::getParameter(string identifier) const
311
{
312
    return 0.f;
313
}
314

    
315
void
316
CepstrumPitchTracker::setParameter(string identifier, float value) 
317
{
318
}
319

    
320
CepstrumPitchTracker::ProgramList
321
CepstrumPitchTracker::getPrograms() const
322
{
323
    ProgramList list;
324
    return list;
325
}
326

    
327
string
328
CepstrumPitchTracker::getCurrentProgram() const
329
{
330
    return ""; // no programs
331
}
332

    
333
void
334
CepstrumPitchTracker::selectProgram(string name)
335
{
336
}
337

    
338
CepstrumPitchTracker::OutputList
339
CepstrumPitchTracker::getOutputDescriptors() const
340
{
341
    OutputList outputs;
342

    
343
    int n = 0;
344

    
345
    OutputDescriptor d;
346

    
347
    d.identifier = "f0";
348
    d.name = "Estimated f0";
349
    d.description = "Estimated fundamental frequency";
350
    d.unit = "Hz";
351
    d.hasFixedBinCount = true;
352
    d.binCount = 1;
353
    d.hasKnownExtents = true;
354
    d.minValue = m_fmin;
355
    d.maxValue = m_fmax;
356
    d.isQuantized = false;
357
    d.sampleType = OutputDescriptor::FixedSampleRate;
358
    d.sampleRate = (m_inputSampleRate / m_stepSize);
359
    d.hasDuration = false;
360
    outputs.push_back(d);
361

    
362
    d.identifier = "notes";
363
    d.name = "Notes";
364
    d.description = "Derived fixed-pitch note frequencies";
365
    d.unit = "Hz";
366
    d.hasFixedBinCount = true;
367
    d.binCount = 1;
368
    d.hasKnownExtents = true;
369
    d.minValue = m_fmin;
370
    d.maxValue = m_fmax;
371
    d.isQuantized = false;
372
    d.sampleType = OutputDescriptor::FixedSampleRate;
373
    d.sampleRate = (m_inputSampleRate / m_stepSize);
374
    d.hasDuration = true;
375
    outputs.push_back(d);
376

    
377
    return outputs;
378
}
379

    
380
bool
381
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
382
{
383
    if (channels < getMinChannelCount() ||
384
        channels > getMaxChannelCount()) return false;
385

    
386
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
387
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
388
//              << std::endl;
389

    
390
    m_channels = channels;
391
    m_stepSize = stepSize;
392
    m_blockSize = blockSize;
393

    
394
    m_binFrom = int(m_inputSampleRate / m_fmax);
395
    m_binTo = int(m_inputSampleRate / m_fmin); 
396

    
397
    if (m_binTo >= (int)m_blockSize / 2) {
398
        m_binTo = m_blockSize / 2 - 1;
399
    }
400

    
401
    m_bins = (m_binTo - m_binFrom) + 1;
402

    
403
    reset();
404

    
405
    return true;
406
}
407

    
408
void
409
CepstrumPitchTracker::reset()
410
{
411
}
412

    
413
void
414
CepstrumPitchTracker::filter(const double *cep, double *data)
415
{
416
    for (int i = 0; i < m_bins; ++i) {
417
        double v = 0;
418
        int n = 0;
419
        // average according to the vertical filter length
420
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
421
            int ix = i + m_binFrom + j;
422
            if (ix >= 0 && ix < m_blockSize) {
423
                v += cep[ix];
424
                ++n;
425
            }
426
        }
427
        data[i] = v / n;
428
    }
429
}
430

    
431
double
432
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
433
{
434
    double a0 = y[3] - y[2] - y[0] + y[1];
435
    double a1 = y[0] - y[1] - a0;
436
    double a2 = y[2] - y[0];
437
    double a3 = y[1];
438
    return
439
        a0 * x * x * x +
440
        a1 * x * x +
441
        a2 * x +
442
        a3;
443
}
444

    
445
double
446
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
447
{
448
    if (maxbin < 2 || maxbin > m_bins - 3) {
449
        return maxbin;
450
    }
451

    
452
    double maxval = 0.0;
453
    double maxidx = maxbin;
454

    
455
    const int divisions = 10;
456
    double y[4];
457

    
458
    y[0] = in[maxbin-1];
459
    y[1] = in[maxbin];
460
    y[2] = in[maxbin+1];
461
    y[3] = in[maxbin+2];
462
    for (int i = 0; i < divisions; ++i) {
463
        double probe = double(i) / double(divisions);
464
        double value = cubicInterpolate(y, probe);
465
        if (value > maxval) {
466
            maxval = value; 
467
            maxidx = maxbin + probe;
468
        }
469
    }
470

    
471
    y[3] = y[2];
472
    y[2] = y[1];
473
    y[1] = y[0];
474
    y[0] = in[maxbin-2];
475
    for (int i = 0; i < divisions; ++i) {
476
        double probe = double(i) / double(divisions);
477
        double value = cubicInterpolate(y, probe);
478
        if (value > maxval) {
479
            maxval = value; 
480
            maxidx = maxbin - 1 + probe;
481
        }
482
    }
483

    
484
/*
485
    std::cerr << "centre = " << maxbin << ": ["
486
              << in[maxbin-2] << ","
487
              << in[maxbin-1] << ","
488
              << in[maxbin] << ","
489
              << in[maxbin+1] << ","
490
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
491
*/
492

    
493
    return maxidx;
494
}
495

    
496
CepstrumPitchTracker::FeatureSet
497
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
498
{
499
    FeatureSet fs;
500

    
501
    int bs = m_blockSize;
502
    int hs = m_blockSize/2 + 1;
503

    
504
    double *rawcep = new double[bs];
505
    double *io = new double[bs];
506
    double *logmag = new double[bs];
507

    
508
    // The "inverse symmetric" method. Seems to be the most reliable
509
        
510
    for (int i = 0; i < hs; ++i) {
511

    
512
        double power =
513
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
514
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
515
        double mag = sqrt(power);
516
        
517
        double lm = log(mag + 0.00000001);
518
        
519
        logmag[i] = lm;
520
        if (i > 0) logmag[bs - i] = lm;
521
    }
522

    
523
    fft(bs, true, logmag, 0, rawcep, io);
524
    
525
    delete[] logmag;
526
    delete[] io;
527

    
528
    int n = m_bins;
529
    double *data = new double[n];
530
    filter(rawcep, data);
531
    delete[] rawcep;
532

    
533
    double maxval = 0.0;
534
    int maxbin = -1;
535

    
536
    for (int i = 0; i < n; ++i) {
537
        if (data[i] > maxval) {
538
            maxval = data[i];
539
            maxbin = i;
540
        }
541
    }
542

    
543
    if (maxbin < 0) {
544
        delete[] data;
545
        return fs;
546
    }
547

    
548
    double nextPeakVal = 0.0;
549
    for (int i = 1; i+1 < n; ++i) {
550
        if (data[i] > data[i-1] &&
551
            data[i] > data[i+1] &&
552
            i != maxbin &&
553
            data[i] > nextPeakVal) {
554
            nextPeakVal = data[i];
555
        }
556
    }
557

    
558
    double cimax = findInterpolatedPeak(data, maxbin);
559
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
560

    
561
    double confidence = 0.0;
562
    if (nextPeakVal != 0.0) {
563
        confidence = (maxval - nextPeakVal) / 200.0;
564
        if (confidence > 1.0) confidence = 1.0;
565
    }
566

    
567
    Hypothesis::Estimate e;
568
    e.freq = peakfreq;
569
    e.time = timestamp;
570
    e.confidence = confidence;
571

    
572
    m_accepted.advanceTime();
573

    
574
    for (int i = 0; i < m_possible.size(); ++i) {
575
        m_possible[i].advanceTime();
576
    }
577

    
578
    if (!m_accepted.test(e)) {
579

    
580
        int candidate = -1;
581
        bool accepted = false;
582

    
583
        for (int i = 0; i < m_possible.size(); ++i) {
584
            if (m_possible[i].test(e)) {
585
                accepted = true;
586
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
587
                    candidate = i;
588
                }
589
                break;
590
            }
591
        }
592

    
593
        if (!accepted) {
594
            Hypothesis h;
595
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
596
            m_possible.push_back(h);
597
        }
598

    
599
        if (m_accepted.getState() == Hypothesis::Expired) {
600
            m_accepted.addFeatures(fs);
601
        }
602
        
603
        if (m_accepted.getState() == Hypothesis::Expired ||
604
            m_accepted.getState() == Hypothesis::Rejected) {
605
            if (candidate >= 0) {
606
                m_accepted = m_possible[candidate];
607
            } else {
608
                m_accepted = Hypothesis();
609
            }
610
        }
611

    
612
        // reap rejected/expired hypotheses from possible list
613
        Hypotheses toReap = m_possible;
614
        m_possible.clear();
615
        for (int i = 0; i < toReap.size(); ++i) {
616
            Hypothesis h = toReap[i];
617
            if (h.getState() != Hypothesis::Rejected && 
618
                h.getState() != Hypothesis::Expired) {
619
                m_possible.push_back(h);
620
            }
621
        }
622
    }  
623

    
624
    std::cerr << "accepted length = " << m_accepted.getPendingLength()
625
              << ", state = " << m_accepted.getState()
626
              << ", hypothesis count = " << m_possible.size() << std::endl;
627

    
628
    delete[] data;
629
    return fs;
630
}
631

    
632
CepstrumPitchTracker::FeatureSet
633
CepstrumPitchTracker::getRemainingFeatures()
634
{
635
    FeatureSet fs;
636
    if (m_accepted.getState() == Hypothesis::Satisfied) {
637
        m_accepted.addFeatures(fs);
638
    }
639
    return fs;
640
}
641

    
642
void
643
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
644
                    double *ri, double *ii, double *ro, double *io)
645
{
646
    if (!ri || !ro || !io) return;
647

    
648
    unsigned int bits;
649
    unsigned int i, j, k, m;
650
    unsigned int blockSize, blockEnd;
651

    
652
    double tr, ti;
653

    
654
    if (n < 2) return;
655
    if (n & (n-1)) return;
656

    
657
    double angle = 2.0 * M_PI;
658
    if (inverse) angle = -angle;
659

    
660
    for (i = 0; ; ++i) {
661
        if (n & (1 << i)) {
662
            bits = i;
663
            break;
664
        }
665
    }
666

    
667
    static unsigned int tableSize = 0;
668
    static int *table = 0;
669

    
670
    if (tableSize != n) {
671

    
672
        delete[] table;
673

    
674
        table = new int[n];
675

    
676
        for (i = 0; i < n; ++i) {
677
        
678
            m = i;
679

    
680
            for (j = k = 0; j < bits; ++j) {
681
                k = (k << 1) | (m & 1);
682
                m >>= 1;
683
            }
684

    
685
            table[i] = k;
686
        }
687

    
688
        tableSize = n;
689
    }
690

    
691
    if (ii) {
692
        for (i = 0; i < n; ++i) {
693
            ro[table[i]] = ri[i];
694
            io[table[i]] = ii[i];
695
        }
696
    } else {
697
        for (i = 0; i < n; ++i) {
698
            ro[table[i]] = ri[i];
699
            io[table[i]] = 0.0;
700
        }
701
    }
702

    
703
    blockEnd = 1;
704

    
705
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
706

    
707
        double delta = angle / (double)blockSize;
708
        double sm2 = -sin(-2 * delta);
709
        double sm1 = -sin(-delta);
710
        double cm2 = cos(-2 * delta);
711
        double cm1 = cos(-delta);
712
        double w = 2 * cm1;
713
        double ar[3], ai[3];
714

    
715
        for (i = 0; i < n; i += blockSize) {
716

    
717
            ar[2] = cm2;
718
            ar[1] = cm1;
719

    
720
            ai[2] = sm2;
721
            ai[1] = sm1;
722

    
723
            for (j = i, m = 0; m < blockEnd; j++, m++) {
724

    
725
                ar[0] = w * ar[1] - ar[2];
726
                ar[2] = ar[1];
727
                ar[1] = ar[0];
728

    
729
                ai[0] = w * ai[1] - ai[2];
730
                ai[2] = ai[1];
731
                ai[1] = ai[0];
732

    
733
                k = j + blockEnd;
734
                tr = ar[0] * ro[k] - ai[0] * io[k];
735
                ti = ar[0] * io[k] + ai[0] * ro[k];
736

    
737
                ro[k] = ro[j] - tr;
738
                io[k] = io[j] - ti;
739

    
740
                ro[j] += tr;
741
                io[j] += ti;
742
            }
743
        }
744

    
745
        blockEnd = blockSize;
746
    }
747
}
748

    
749