To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 23:0e67ed2777e9

History | View | Annotate | Download (16.4 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34
using Vamp::RealTime;
35

    
36
CepstrumPitchTracker::Hypothesis::Hypothesis()
37
{
38
    m_state = New;
39
    m_age = 0;
40
}
41

    
42
CepstrumPitchTracker::Hypothesis::~Hypothesis()
43
{
44
}
45

    
46
bool
47
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
48
{
49
    if (m_pending.empty()) {
50
        return true;
51
    }
52

    
53
    // check we are within a relatively close tolerance of the last
54
    // candidate
55
    Estimate last = m_pending[m_pending.size()-1];
56
    double r = s.freq / last.freq;
57
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
58
    if (cents < -60 || cents > 60) return false;
59

    
60
    // and within a slightly bigger tolerance of the current mean
61
    double meanFreq = getMeanFrequency();
62
    r = s.freq / meanFreq;
63
    cents = lrint(1200.0 * (log(r) / log(2.0)));
64
    if (cents < -80 || cents > 80) return false;
65
    
66
    return true;
67
}
68

    
69
bool 
70
CepstrumPitchTracker::Hypothesis::isSatisfied()
71
{
72
    if (m_pending.empty()) return false;
73
    
74
    double meanConfidence = 0.0;
75
    for (int i = 0; i < m_pending.size(); ++i) {
76
        meanConfidence += m_pending[i].confidence;
77
    }
78
    meanConfidence /= m_pending.size();
79

    
80
    int lengthRequired = int(2.0 / meanConfidence + 0.5);
81
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
82

    
83
    return (m_pending.size() > lengthRequired);
84
}
85

    
86
void
87
CepstrumPitchTracker::Hypothesis::advanceTime()
88
{
89
    ++m_age;
90
}
91

    
92
bool
93
CepstrumPitchTracker::Hypothesis::test(Estimate s)
94
{
95
    bool accept = false;
96

    
97
    switch (m_state) {
98

    
99
    case New:
100
        m_state = Provisional;
101
        accept = true;
102
        break;
103

    
104
    case Provisional:
105
        if (m_age > 3) {
106
            m_state = Rejected;
107
        } else if (isWithinTolerance(s)) {
108
            accept = true;
109
        }
110
        break;
111
        
112
    case Satisfied:
113
        if (m_age > 3) {
114
            m_state = Expired;
115
        } else if (isWithinTolerance(s)) {
116
            accept = true;
117
        }
118
        break;
119

    
120
    case Rejected:
121
        break;
122

    
123
    case Expired:
124
        break;
125
    }
126

    
127
    if (accept) {
128
        m_pending.push_back(s);
129
        m_age = 0;
130
        if (m_state == Provisional && isSatisfied()) {
131
            m_state = Satisfied;
132
        }
133
    }
134

    
135
    return accept && (m_state == Satisfied);
136
}        
137

    
138
CepstrumPitchTracker::Hypothesis::State
139
CepstrumPitchTracker::Hypothesis::getState()
140
{
141
    return m_state;
142
}
143

    
144
int
145
CepstrumPitchTracker::Hypothesis::getPendingLength()
146
{
147
    return m_pending.size();
148
}
149

    
150
CepstrumPitchTracker::Hypothesis::Estimates
151
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
152
{
153
    if (m_state == Satisfied || m_state == Expired) {
154
        return m_pending;
155
    } else {
156
        return Estimates();
157
    }
158
}
159

    
160
double
161
CepstrumPitchTracker::Hypothesis::getMeanFrequency()
162
{
163
    double acc = 0.0;
164
    for (int i = 0; i < m_pending.size(); ++i) {
165
        acc += m_pending[i].freq;
166
    }
167
    acc /= m_pending.size();
168
    return acc;
169
}
170

    
171
CepstrumPitchTracker::Hypothesis::Note
172
CepstrumPitchTracker::Hypothesis::getAveragedNote()
173
{
174
    Note n;
175

    
176
    if (!(m_state == Satisfied || m_state == Expired)) {
177
        n.freq = 0.0;
178
        n.time = RealTime::zeroTime;
179
        n.duration = RealTime::zeroTime;
180
        return n;
181
    }
182

    
183
    n.time = m_pending.begin()->time;
184

    
185
    Estimates::iterator i = m_pending.end();
186
    --i;
187
    n.duration = i->time - n.time;
188

    
189
    // just mean frequency for now, but this isn't at all right perceptually
190
    n.freq = getMeanFrequency();
191
    
192
    return n;
193
}
194

    
195
void
196
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
197
{
198
    for (int i = 0; i < m_pending.size(); ++i) {
199
        Feature f;
200
        f.hasTimestamp = true;
201
        f.timestamp = m_pending[i].time;
202
        f.values.push_back(m_pending[i].freq);
203
        fs[0].push_back(f);
204
    }
205

    
206
    Feature nf;
207
    nf.hasTimestamp = true;
208
    nf.hasDuration = true;
209
    Note n = getAveragedNote();
210
    nf.timestamp = n.time;
211
    nf.duration = n.duration;
212
    nf.values.push_back(n.freq);
213
    fs[1].push_back(nf);
214
}
215

    
216
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
217
    Plugin(inputSampleRate),
218
    m_channels(0),
219
    m_stepSize(256),
220
    m_blockSize(1024),
221
    m_fmin(50),
222
    m_fmax(1000),
223
    m_vflen(1),
224
    m_binFrom(0),
225
    m_binTo(0),
226
    m_bins(0)
227
{
228
}
229

    
230
CepstrumPitchTracker::~CepstrumPitchTracker()
231
{
232
}
233

    
234
string
235
CepstrumPitchTracker::getIdentifier() const
236
{
237
    return "cepstrum-pitch";
238
}
239

    
240
string
241
CepstrumPitchTracker::getName() const
242
{
243
    return "Cepstrum Pitch Tracker";
244
}
245

    
246
string
247
CepstrumPitchTracker::getDescription() const
248
{
249
    return "Estimate f0 of monophonic material using a cepstrum method.";
250
}
251

    
252
string
253
CepstrumPitchTracker::getMaker() const
254
{
255
    return "Chris Cannam";
256
}
257

    
258
int
259
CepstrumPitchTracker::getPluginVersion() const
260
{
261
    // Increment this each time you release a version that behaves
262
    // differently from the previous one
263
    return 1;
264
}
265

    
266
string
267
CepstrumPitchTracker::getCopyright() const
268
{
269
    return "Freely redistributable (BSD license)";
270
}
271

    
272
CepstrumPitchTracker::InputDomain
273
CepstrumPitchTracker::getInputDomain() const
274
{
275
    return FrequencyDomain;
276
}
277

    
278
size_t
279
CepstrumPitchTracker::getPreferredBlockSize() const
280
{
281
    return 1024;
282
}
283

    
284
size_t 
285
CepstrumPitchTracker::getPreferredStepSize() const
286
{
287
    return 256;
288
}
289

    
290
size_t
291
CepstrumPitchTracker::getMinChannelCount() const
292
{
293
    return 1;
294
}
295

    
296
size_t
297
CepstrumPitchTracker::getMaxChannelCount() const
298
{
299
    return 1;
300
}
301

    
302
CepstrumPitchTracker::ParameterList
303
CepstrumPitchTracker::getParameterDescriptors() const
304
{
305
    ParameterList list;
306
    return list;
307
}
308

    
309
float
310
CepstrumPitchTracker::getParameter(string identifier) const
311
{
312
    return 0.f;
313
}
314

    
315
void
316
CepstrumPitchTracker::setParameter(string identifier, float value) 
317
{
318
}
319

    
320
CepstrumPitchTracker::ProgramList
321
CepstrumPitchTracker::getPrograms() const
322
{
323
    ProgramList list;
324
    return list;
325
}
326

    
327
string
328
CepstrumPitchTracker::getCurrentProgram() const
329
{
330
    return ""; // no programs
331
}
332

    
333
void
334
CepstrumPitchTracker::selectProgram(string name)
335
{
336
}
337

    
338
CepstrumPitchTracker::OutputList
339
CepstrumPitchTracker::getOutputDescriptors() const
340
{
341
    OutputList outputs;
342

    
343
    int n = 0;
344

    
345
    OutputDescriptor d;
346

    
347
    d.identifier = "f0";
348
    d.name = "Estimated f0";
349
    d.description = "Estimated fundamental frequency";
350
    d.unit = "Hz";
351
    d.hasFixedBinCount = true;
352
    d.binCount = 1;
353
    d.hasKnownExtents = true;
354
    d.minValue = m_fmin;
355
    d.maxValue = m_fmax;
356
    d.isQuantized = false;
357
    d.sampleType = OutputDescriptor::FixedSampleRate;
358
    d.sampleRate = (m_inputSampleRate / m_stepSize);
359
    d.hasDuration = false;
360
    outputs.push_back(d);
361

    
362
    d.identifier = "notes";
363
    d.name = "Notes";
364
    d.description = "Derived fixed-pitch note frequencies";
365
    d.unit = "Hz";
366
    d.hasFixedBinCount = true;
367
    d.binCount = 1;
368
    d.hasKnownExtents = true;
369
    d.minValue = m_fmin;
370
    d.maxValue = m_fmax;
371
    d.isQuantized = false;
372
    d.sampleType = OutputDescriptor::FixedSampleRate;
373
    d.sampleRate = (m_inputSampleRate / m_stepSize);
374
    d.hasDuration = true;
375
    outputs.push_back(d);
376

    
377
    return outputs;
378
}
379

    
380
bool
381
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
382
{
383
    if (channels < getMinChannelCount() ||
384
        channels > getMaxChannelCount()) return false;
385

    
386
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
387
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
388
//              << std::endl;
389

    
390
    m_channels = channels;
391
    m_stepSize = stepSize;
392
    m_blockSize = blockSize;
393

    
394
    m_binFrom = int(m_inputSampleRate / m_fmax);
395
    m_binTo = int(m_inputSampleRate / m_fmin); 
396

    
397
    if (m_binTo >= (int)m_blockSize / 2) {
398
        m_binTo = m_blockSize / 2 - 1;
399
    }
400

    
401
    m_bins = (m_binTo - m_binFrom) + 1;
402

    
403
    reset();
404

    
405
    return true;
406
}
407

    
408
void
409
CepstrumPitchTracker::reset()
410
{
411
}
412

    
413
void
414
CepstrumPitchTracker::filter(const double *cep, double *data)
415
{
416
    for (int i = 0; i < m_bins; ++i) {
417
        double v = 0;
418
        int n = 0;
419
        // average according to the vertical filter length
420
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
421
            int ix = i + m_binFrom + j;
422
            if (ix >= 0 && ix < m_blockSize) {
423
                v += cep[ix];
424
                ++n;
425
            }
426
        }
427
        data[i] = v / n;
428
    }
429
}
430

    
431
double
432
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
433
{
434
    double a0 = y[3] - y[2] - y[0] + y[1];
435
    double a1 = y[0] - y[1] - a0;
436
    double a2 = y[2] - y[0];
437
    double a3 = y[1];
438
    return
439
        a0 * x * x * x +
440
        a1 * x * x +
441
        a2 * x +
442
        a3;
443
}
444

    
445
double
446
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
447
{
448
    if (maxbin < 2 || maxbin > m_bins - 3) {
449
        return maxbin;
450
    }
451

    
452
    double maxval = 0.0;
453
    double maxidx = maxbin;
454

    
455
    const int divisions = 10;
456
    double y[4];
457

    
458
    y[0] = in[maxbin-1];
459
    y[1] = in[maxbin];
460
    y[2] = in[maxbin+1];
461
    y[3] = in[maxbin+2];
462
    for (int i = 0; i < divisions; ++i) {
463
        double probe = double(i) / double(divisions);
464
        double value = cubicInterpolate(y, probe);
465
        if (value > maxval) {
466
            maxval = value; 
467
            maxidx = maxbin + probe;
468
        }
469
    }
470

    
471
    y[3] = y[2];
472
    y[2] = y[1];
473
    y[1] = y[0];
474
    y[0] = in[maxbin-2];
475
    for (int i = 0; i < divisions; ++i) {
476
        double probe = double(i) / double(divisions);
477
        double value = cubicInterpolate(y, probe);
478
        if (value > maxval) {
479
            maxval = value; 
480
            maxidx = maxbin - 1 + probe;
481
        }
482
    }
483

    
484
/*
485
    std::cerr << "centre = " << maxbin << ": ["
486
              << in[maxbin-2] << ","
487
              << in[maxbin-1] << ","
488
              << in[maxbin] << ","
489
              << in[maxbin+1] << ","
490
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
491
*/
492

    
493
    return maxidx;
494
}
495

    
496
CepstrumPitchTracker::FeatureSet
497
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
498
{
499
    FeatureSet fs;
500

    
501
    int bs = m_blockSize;
502
    int hs = m_blockSize/2 + 1;
503

    
504
    double *rawcep = new double[bs];
505
    double *io = new double[bs];
506
    double *logmag = new double[bs];
507

    
508
    // The "inverse symmetric" method. Seems to be the most reliable
509
        
510
    for (int i = 0; i < hs; ++i) {
511

    
512
        double power =
513
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
514
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
515
        double mag = sqrt(power);
516
        
517
        double lm = log(mag + 0.00000001);
518
        
519
        logmag[i] = lm;
520
        if (i > 0) logmag[bs - i] = lm;
521
    }
522

    
523
    fft(bs, true, logmag, 0, rawcep, io);
524
    
525
    delete[] logmag;
526
    delete[] io;
527

    
528
    double cep1 = rawcep[1];
529

    
530
    int n = m_bins;
531
    double *data = new double[n];
532
    filter(rawcep, data);
533
    delete[] rawcep;
534

    
535
    double maxval = 0.0;
536
    int maxbin = -1;
537

    
538
    for (int i = 0; i < n; ++i) {
539
        if (data[i] > maxval) {
540
            maxval = data[i];
541
            maxbin = i;
542
        }
543
    }
544

    
545
    if (maxbin < 0) {
546
        delete[] data;
547
        return fs;
548
    }
549

    
550
    double nextPeakVal = 0.0;
551
    for (int i = 1; i+1 < n; ++i) {
552
        if (data[i] > data[i-1] &&
553
            data[i] > data[i+1] &&
554
            i != maxbin &&
555
            data[i] > nextPeakVal) {
556
            nextPeakVal = data[i];
557
        }
558
    }
559

    
560
    double cimax = findInterpolatedPeak(data, maxbin);
561
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
562

    
563
    double confidence = 0.0;
564
    if (nextPeakVal != 0.0) {
565
        std::cerr << "maxval = " << maxval << ", cep1 = " << cep1 << std::endl;
566
        double conf0 = (maxval - nextPeakVal) / 80.0;
567
        double conf1 = (cep1 / bs) / 2;
568
        if (conf0 > 1.0) conf0 = 1.0;
569
        if (conf1 > 1.0) conf1 = 1.0;
570
        confidence = conf0 * conf1;
571
        std::cerr << "conf0 = " << conf0 << ", conf1 = " << conf1 << ", confidence = " << confidence << std::endl;
572
    }
573

    
574
    Hypothesis::Estimate e;
575
    e.freq = peakfreq;
576
    e.time = timestamp;
577
    e.confidence = confidence;
578

    
579
    m_accepted.advanceTime();
580

    
581
    for (int i = 0; i < m_possible.size(); ++i) {
582
        m_possible[i].advanceTime();
583
    }
584

    
585
    if (!m_accepted.test(e)) {
586

    
587
        int candidate = -1;
588
        bool accepted = false;
589

    
590
        for (int i = 0; i < m_possible.size(); ++i) {
591
            if (m_possible[i].test(e)) {
592
                accepted = true;
593
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
594
                    candidate = i;
595
                }
596
                break;
597
            }
598
        }
599

    
600
        if (!accepted) {
601
            Hypothesis h;
602
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
603
            m_possible.push_back(h);
604
        }
605

    
606
        if (m_accepted.getState() == Hypothesis::Expired) {
607
            m_accepted.addFeatures(fs);
608
        }
609
        
610
        if (m_accepted.getState() == Hypothesis::Expired ||
611
            m_accepted.getState() == Hypothesis::Rejected) {
612
            if (candidate >= 0) {
613
                m_accepted = m_possible[candidate];
614
            } else {
615
                m_accepted = Hypothesis();
616
            }
617
        }
618

    
619
        // reap rejected/expired hypotheses from possible list
620
        Hypotheses toReap = m_possible;
621
        m_possible.clear();
622
        for (int i = 0; i < toReap.size(); ++i) {
623
            Hypothesis h = toReap[i];
624
            if (h.getState() != Hypothesis::Rejected && 
625
                h.getState() != Hypothesis::Expired) {
626
                m_possible.push_back(h);
627
            }
628
        }
629
    }  
630

    
631
    std::cerr << "accepted length = " << m_accepted.getPendingLength()
632
              << ", state = " << m_accepted.getState()
633
              << ", hypothesis count = " << m_possible.size() << std::endl;
634

    
635
    delete[] data;
636
    return fs;
637
}
638

    
639
CepstrumPitchTracker::FeatureSet
640
CepstrumPitchTracker::getRemainingFeatures()
641
{
642
    FeatureSet fs;
643
    if (m_accepted.getState() == Hypothesis::Satisfied) {
644
        m_accepted.addFeatures(fs);
645
    }
646
    return fs;
647
}
648

    
649
void
650
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
651
                    double *ri, double *ii, double *ro, double *io)
652
{
653
    if (!ri || !ro || !io) return;
654

    
655
    unsigned int bits;
656
    unsigned int i, j, k, m;
657
    unsigned int blockSize, blockEnd;
658

    
659
    double tr, ti;
660

    
661
    if (n < 2) return;
662
    if (n & (n-1)) return;
663

    
664
    double angle = 2.0 * M_PI;
665
    if (inverse) angle = -angle;
666

    
667
    for (i = 0; ; ++i) {
668
        if (n & (1 << i)) {
669
            bits = i;
670
            break;
671
        }
672
    }
673

    
674
    int *table = new int[n];
675

    
676
    for (i = 0; i < n; ++i) {
677
        
678
        m = i;
679

    
680
        for (j = k = 0; j < bits; ++j) {
681
            k = (k << 1) | (m & 1);
682
            m >>= 1;
683
        }
684

    
685
        table[i] = k;
686
    }
687

    
688
    if (ii) {
689
        for (i = 0; i < n; ++i) {
690
            ro[table[i]] = ri[i];
691
            io[table[i]] = ii[i];
692
        }
693
    } else {
694
        for (i = 0; i < n; ++i) {
695
            ro[table[i]] = ri[i];
696
            io[table[i]] = 0.0;
697
        }
698
    }
699

    
700
    blockEnd = 1;
701

    
702
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
703

    
704
        double delta = angle / (double)blockSize;
705
        double sm2 = -sin(-2 * delta);
706
        double sm1 = -sin(-delta);
707
        double cm2 = cos(-2 * delta);
708
        double cm1 = cos(-delta);
709
        double w = 2 * cm1;
710
        double ar[3], ai[3];
711

    
712
        for (i = 0; i < n; i += blockSize) {
713

    
714
            ar[2] = cm2;
715
            ar[1] = cm1;
716

    
717
            ai[2] = sm2;
718
            ai[1] = sm1;
719

    
720
            for (j = i, m = 0; m < blockEnd; j++, m++) {
721

    
722
                ar[0] = w * ar[1] - ar[2];
723
                ar[2] = ar[1];
724
                ar[1] = ar[0];
725

    
726
                ai[0] = w * ai[1] - ai[2];
727
                ai[2] = ai[1];
728
                ai[1] = ai[0];
729

    
730
                k = j + blockEnd;
731
                tr = ar[0] * ro[k] - ai[0] * io[k];
732
                ti = ar[0] * io[k] + ai[0] * ro[k];
733

    
734
                ro[k] = ro[j] - tr;
735
                io[k] = io[j] - ti;
736

    
737
                ro[j] += tr;
738
                io[j] += ti;
739
            }
740
        }
741

    
742
        blockEnd = blockSize;
743
    }
744
}
745

    
746