To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 18:131b1c40be1a

History | View | Annotate | Download (16.3 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34
using Vamp::RealTime;
35

    
36
CepstrumPitchTracker::Hypothesis::Hypothesis()
37
{
38
    m_state = New;
39
    m_age = 0;
40
}
41

    
42
CepstrumPitchTracker::Hypothesis::~Hypothesis()
43
{
44
}
45

    
46
bool
47
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
48
{
49
    if (m_pending.empty()) {
50
        return true;
51
    }
52

    
53
    // check we are within a relatively close tolerance of the last
54
    // candidate
55
    Estimate last = m_pending[m_pending.size()-1];
56
    double r = s.freq / last.freq;
57
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
58
    if (cents < -60 || cents > 60) return false;
59

    
60
    // and within a slightly bigger tolerance of the current mean
61
    double meanFreq = getMeanFrequency();
62
    r = s.freq / meanFreq;
63
    cents = lrint(1200.0 * (log(r) / log(2.0)));
64
    if (cents < -80 || cents > 80) return false;
65
    
66
    return true;
67
}
68

    
69
bool 
70
CepstrumPitchTracker::Hypothesis::isSatisfied()
71
{
72
    if (m_pending.empty()) return false;
73
    
74
    double meanConfidence = 0.0;
75
    for (int i = 0; i < m_pending.size(); ++i) {
76
        meanConfidence += m_pending[i].confidence;
77
    }
78
    meanConfidence /= m_pending.size();
79

    
80
    int lengthRequired = int(2.0 / meanConfidence + 0.5);
81
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
82

    
83
    return (m_pending.size() > lengthRequired);
84
}
85

    
86
void
87
CepstrumPitchTracker::Hypothesis::advanceTime()
88
{
89
    ++m_age;
90
}
91

    
92
bool
93
CepstrumPitchTracker::Hypothesis::test(Estimate s)
94
{
95
    bool accept = false;
96

    
97
    switch (m_state) {
98

    
99
    case New:
100
        m_state = Provisional;
101
        accept = true;
102
        break;
103

    
104
    case Provisional:
105
        if (m_age > 3) {
106
            m_state = Rejected;
107
        } else if (isWithinTolerance(s)) {
108
            accept = true;
109
        }
110
        break;
111
        
112
    case Satisfied:
113
        if (m_age > 3) {
114
            m_state = Expired;
115
        } else if (isWithinTolerance(s)) {
116
            accept = true;
117
        }
118
        break;
119

    
120
    case Rejected:
121
        break;
122

    
123
    case Expired:
124
        break;
125
    }
126

    
127
    if (accept) {
128
        m_pending.push_back(s);
129
        m_age = 0;
130
        if (m_state == Provisional && isSatisfied()) {
131
            m_state = Satisfied;
132
        }
133
    }
134

    
135
    return accept && (m_state == Satisfied);
136
}        
137

    
138
CepstrumPitchTracker::Hypothesis::State
139
CepstrumPitchTracker::Hypothesis::getState()
140
{
141
    return m_state;
142
}
143

    
144
int
145
CepstrumPitchTracker::Hypothesis::getPendingLength()
146
{
147
    return m_pending.size();
148
}
149

    
150
CepstrumPitchTracker::Hypothesis::Estimates
151
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
152
{
153
    if (m_state == Satisfied || m_state == Expired) {
154
        return m_pending;
155
    } else {
156
        return Estimates();
157
    }
158
}
159

    
160
double
161
CepstrumPitchTracker::Hypothesis::getMeanFrequency()
162
{
163
    double acc = 0.0;
164
    for (int i = 0; i < m_pending.size(); ++i) {
165
        acc += m_pending[i].freq;
166
    }
167
    acc /= m_pending.size();
168
    return acc;
169
}
170

    
171
CepstrumPitchTracker::Hypothesis::Note
172
CepstrumPitchTracker::Hypothesis::getAveragedNote()
173
{
174
    Note n;
175

    
176
    if (!(m_state == Satisfied || m_state == Expired)) {
177
        n.freq = 0.0;
178
        n.time = RealTime::zeroTime;
179
        n.duration = RealTime::zeroTime;
180
        return n;
181
    }
182

    
183
    n.time = m_pending.begin()->time;
184

    
185
    Estimates::iterator i = m_pending.end();
186
    --i;
187
    n.duration = i->time - n.time;
188

    
189
    // just mean frequency for now, but this isn't at all right perceptually
190
    n.freq = getMeanFrequency();
191
    
192
    return n;
193
}
194

    
195
void
196
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
197
{
198
    for (int i = 0; i < m_pending.size(); ++i) {
199
        Feature f;
200
        f.hasTimestamp = true;
201
        f.timestamp = m_pending[i].time;
202
        f.values.push_back(m_pending[i].freq);
203
        fs[0].push_back(f);
204
    }
205

    
206
    Feature nf;
207
    nf.hasTimestamp = true;
208
    nf.hasDuration = true;
209
    Note n = getAveragedNote();
210
    nf.timestamp = n.time;
211
    nf.duration = n.duration;
212
    nf.values.push_back(n.freq);
213
    fs[1].push_back(nf);
214
}
215

    
216
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
217
    Plugin(inputSampleRate),
218
    m_channels(0),
219
    m_stepSize(256),
220
    m_blockSize(1024),
221
    m_fmin(50),
222
    m_fmax(1000),
223
    m_vflen(1),
224
    m_binFrom(0),
225
    m_binTo(0),
226
    m_bins(0)
227
{
228
}
229

    
230
CepstrumPitchTracker::~CepstrumPitchTracker()
231
{
232
}
233

    
234
string
235
CepstrumPitchTracker::getIdentifier() const
236
{
237
    return "cepstrum-pitch";
238
}
239

    
240
string
241
CepstrumPitchTracker::getName() const
242
{
243
    return "Cepstrum Pitch Tracker";
244
}
245

    
246
string
247
CepstrumPitchTracker::getDescription() const
248
{
249
    return "Estimate f0 of monophonic material using a cepstrum method.";
250
}
251

    
252
string
253
CepstrumPitchTracker::getMaker() const
254
{
255
    return "Chris Cannam";
256
}
257

    
258
int
259
CepstrumPitchTracker::getPluginVersion() const
260
{
261
    // Increment this each time you release a version that behaves
262
    // differently from the previous one
263
    return 1;
264
}
265

    
266
string
267
CepstrumPitchTracker::getCopyright() const
268
{
269
    return "Freely redistributable (BSD license)";
270
}
271

    
272
CepstrumPitchTracker::InputDomain
273
CepstrumPitchTracker::getInputDomain() const
274
{
275
    return FrequencyDomain;
276
}
277

    
278
size_t
279
CepstrumPitchTracker::getPreferredBlockSize() const
280
{
281
    return 1024;
282
}
283

    
284
size_t 
285
CepstrumPitchTracker::getPreferredStepSize() const
286
{
287
    return 256;
288
}
289

    
290
size_t
291
CepstrumPitchTracker::getMinChannelCount() const
292
{
293
    return 1;
294
}
295

    
296
size_t
297
CepstrumPitchTracker::getMaxChannelCount() const
298
{
299
    return 1;
300
}
301

    
302
CepstrumPitchTracker::ParameterList
303
CepstrumPitchTracker::getParameterDescriptors() const
304
{
305
    ParameterList list;
306
    return list;
307
}
308

    
309
float
310
CepstrumPitchTracker::getParameter(string identifier) const
311
{
312
    return 0.f;
313
}
314

    
315
void
316
CepstrumPitchTracker::setParameter(string identifier, float value) 
317
{
318
}
319

    
320
CepstrumPitchTracker::ProgramList
321
CepstrumPitchTracker::getPrograms() const
322
{
323
    ProgramList list;
324
    return list;
325
}
326

    
327
string
328
CepstrumPitchTracker::getCurrentProgram() const
329
{
330
    return ""; // no programs
331
}
332

    
333
void
334
CepstrumPitchTracker::selectProgram(string name)
335
{
336
}
337

    
338
CepstrumPitchTracker::OutputList
339
CepstrumPitchTracker::getOutputDescriptors() const
340
{
341
    OutputList outputs;
342

    
343
    int n = 0;
344

    
345
    OutputDescriptor d;
346

    
347
    d.identifier = "f0";
348
    d.name = "Estimated f0";
349
    d.description = "Estimated fundamental frequency";
350
    d.unit = "Hz";
351
    d.hasFixedBinCount = true;
352
    d.binCount = 1;
353
    d.hasKnownExtents = true;
354
    d.minValue = m_fmin;
355
    d.maxValue = m_fmax;
356
    d.isQuantized = false;
357
    d.sampleType = OutputDescriptor::FixedSampleRate;
358
    d.sampleRate = (m_inputSampleRate / m_stepSize);
359
    d.hasDuration = false;
360
    outputs.push_back(d);
361

    
362
    d.identifier = "notes";
363
    d.name = "Notes";
364
    d.description = "Derived fixed-pitch note frequencies";
365
    d.unit = "Hz";
366
    d.hasFixedBinCount = true;
367
    d.binCount = 1;
368
    d.hasKnownExtents = true;
369
    d.minValue = m_fmin;
370
    d.maxValue = m_fmax;
371
    d.isQuantized = false;
372
    d.sampleType = OutputDescriptor::FixedSampleRate;
373
    d.sampleRate = (m_inputSampleRate / m_stepSize);
374
    d.hasDuration = true;
375
    outputs.push_back(d);
376

    
377
    return outputs;
378
}
379

    
380
bool
381
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
382
{
383
    if (channels < getMinChannelCount() ||
384
        channels > getMaxChannelCount()) return false;
385

    
386
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
387
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
388
//              << std::endl;
389

    
390
    m_channels = channels;
391
    m_stepSize = stepSize;
392
    m_blockSize = blockSize;
393

    
394
    m_binFrom = int(m_inputSampleRate / m_fmax);
395
    m_binTo = int(m_inputSampleRate / m_fmin); 
396

    
397
    if (m_binTo >= (int)m_blockSize / 2) {
398
        m_binTo = m_blockSize / 2 - 1;
399
    }
400

    
401
    m_bins = (m_binTo - m_binFrom) + 1;
402

    
403
    reset();
404

    
405
    return true;
406
}
407

    
408
void
409
CepstrumPitchTracker::reset()
410
{
411
}
412

    
413
void
414
CepstrumPitchTracker::filter(const double *cep, double *data)
415
{
416
    for (int i = 0; i < m_bins; ++i) {
417
        double v = 0;
418
        int n = 0;
419
        // average according to the vertical filter length
420
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
421
            int ix = i + m_binFrom + j;
422
            if (ix >= 0 && ix < m_blockSize) {
423
                v += cep[ix];
424
                ++n;
425
            }
426
        }
427
        data[i] = v / n;
428
    }
429
}
430

    
431
double
432
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
433
{
434
    double a0 = y[3] - y[2] - y[0] + y[1];
435
    double a1 = y[0] - y[1] - a0;
436
    double a2 = y[2] - y[0];
437
    double a3 = y[1];
438
    return
439
        a0 * x * x * x +
440
        a1 * x * x +
441
        a2 * x +
442
        a3;
443
}
444

    
445
double
446
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
447
{
448
    if (maxbin < 2 || maxbin > m_bins - 3) {
449
        return maxbin;
450
    }
451

    
452
    double maxval = 0.0;
453
    double maxidx = maxbin;
454

    
455
    const int divisions = 10;
456
    double y[4];
457

    
458
    y[0] = in[maxbin-1];
459
    y[1] = in[maxbin];
460
    y[2] = in[maxbin+1];
461
    y[3] = in[maxbin+2];
462
    for (int i = 0; i < divisions; ++i) {
463
        double probe = double(i) / double(divisions);
464
        double value = cubicInterpolate(y, probe);
465
        if (value > maxval) {
466
            maxval = value; 
467
            maxidx = maxbin + probe;
468
        }
469
    }
470

    
471
    y[3] = y[2];
472
    y[2] = y[1];
473
    y[1] = y[0];
474
    y[0] = in[maxbin-2];
475
    for (int i = 0; i < divisions; ++i) {
476
        double probe = double(i) / double(divisions);
477
        double value = cubicInterpolate(y, probe);
478
        if (value > maxval) {
479
            maxval = value; 
480
            maxidx = maxbin - 1 + probe;
481
        }
482
    }
483

    
484
/*
485
    std::cerr << "centre = " << maxbin << ": ["
486
              << in[maxbin-2] << ","
487
              << in[maxbin-1] << ","
488
              << in[maxbin] << ","
489
              << in[maxbin+1] << ","
490
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
491
*/
492

    
493
    return maxidx;
494
}
495

    
496
CepstrumPitchTracker::FeatureSet
497
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
498
{
499
    FeatureSet fs;
500

    
501
    int bs = m_blockSize;
502
    int hs = m_blockSize/2 + 1;
503

    
504
    double *rawcep = new double[bs];
505
    double *io = new double[bs];
506
    double *logmag = new double[bs];
507

    
508
    // The "inverse symmetric" method. Seems to be the most reliable
509
        
510
    for (int i = 0; i < hs; ++i) {
511

    
512
        double power =
513
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
514
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
515
        double mag = sqrt(power);
516
        
517
        double lm = log(mag + 0.00000001);
518
        
519
        logmag[i] = lm;
520
        if (i > 0) logmag[bs - i] = lm;
521
    }
522

    
523
    fft(bs, true, logmag, 0, rawcep, io);
524
    
525
    delete[] logmag;
526
    delete[] io;
527

    
528
    int n = m_bins;
529
    double *data = new double[n];
530
    filter(rawcep, data);
531
    delete[] rawcep;
532

    
533
    double abstot = 0.0;
534

    
535
    for (int i = 0; i < n; ++i) {
536
        abstot += fabs(data[i]);
537
    }
538

    
539
    double maxval = 0.0;
540
    int maxbin = -1;
541

    
542
    for (int i = 0; i < n; ++i) {
543
        if (data[i] > maxval) {
544
            maxval = data[i];
545
            maxbin = i;
546
        }
547
    }
548

    
549
    if (maxbin < 0) {
550
        delete[] data;
551
        return fs;
552
    }
553

    
554
    double nextPeakVal = 0.0;
555
    for (int i = 1; i+1 < n; ++i) {
556
        if (data[i] > data[i-1] &&
557
            data[i] > data[i+1] &&
558
            i != maxbin &&
559
            data[i] > nextPeakVal) {
560
            nextPeakVal = data[i];
561
        }
562
    }
563

    
564
    double cimax = findInterpolatedPeak(data, maxbin);
565
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
566

    
567
    double confidence = 0.0;
568
    if (nextPeakVal != 0.0) {
569
        confidence = ((maxval / nextPeakVal) - 1.0) / 4.0;
570
        if (confidence > 1.0) confidence = 1.0;
571
    }
572

    
573
    Hypothesis::Estimate e;
574
    e.freq = peakfreq;
575
    e.time = timestamp;
576
    e.confidence = confidence;
577

    
578
    m_accepted.advanceTime();
579

    
580
    for (int i = 0; i < m_possible.size(); ++i) {
581
        m_possible[i].advanceTime();
582
    }
583

    
584
    if (!m_accepted.test(e)) {
585

    
586
        int candidate = -1;
587
        bool accepted = false;
588

    
589
        for (int i = 0; i < m_possible.size(); ++i) {
590
            if (m_possible[i].test(e)) {
591
                accepted = true;
592
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
593
                    candidate = i;
594
                }
595
                break;
596
            }
597
        }
598

    
599
        if (!accepted) {
600
            Hypothesis h;
601
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
602
            m_possible.push_back(h);
603
        }
604

    
605
        if (m_accepted.getState() == Hypothesis::Expired) {
606
            m_accepted.addFeatures(fs);
607
        }
608
        
609
        if (m_accepted.getState() == Hypothesis::Expired ||
610
            m_accepted.getState() == Hypothesis::Rejected) {
611
            if (candidate >= 0) {
612
                m_accepted = m_possible[candidate];
613
            } else {
614
                m_accepted = Hypothesis();
615
            }
616
        }
617

    
618
        // reap rejected/expired hypotheses from possible list
619
        Hypotheses toReap = m_possible;
620
        m_possible.clear();
621
        for (int i = 0; i < toReap.size(); ++i) {
622
            Hypothesis h = toReap[i];
623
            if (h.getState() != Hypothesis::Rejected && 
624
                h.getState() != Hypothesis::Expired) {
625
                m_possible.push_back(h);
626
            }
627
        }
628
    }  
629

    
630
    std::cerr << "accepted length = " << m_accepted.getPendingLength()
631
              << ", state = " << m_accepted.getState()
632
              << ", hypothesis count = " << m_possible.size() << std::endl;
633

    
634
    delete[] data;
635
    return fs;
636
}
637

    
638
CepstrumPitchTracker::FeatureSet
639
CepstrumPitchTracker::getRemainingFeatures()
640
{
641
    FeatureSet fs;
642
    if (m_accepted.getState() == Hypothesis::Satisfied) {
643
        m_accepted.addFeatures(fs);
644
    }
645
    return fs;
646
}
647

    
648
void
649
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
650
                    double *ri, double *ii, double *ro, double *io)
651
{
652
    if (!ri || !ro || !io) return;
653

    
654
    unsigned int bits;
655
    unsigned int i, j, k, m;
656
    unsigned int blockSize, blockEnd;
657

    
658
    double tr, ti;
659

    
660
    if (n < 2) return;
661
    if (n & (n-1)) return;
662

    
663
    double angle = 2.0 * M_PI;
664
    if (inverse) angle = -angle;
665

    
666
    for (i = 0; ; ++i) {
667
        if (n & (1 << i)) {
668
            bits = i;
669
            break;
670
        }
671
    }
672

    
673
    static unsigned int tableSize = 0;
674
    static int *table = 0;
675

    
676
    if (tableSize != n) {
677

    
678
        delete[] table;
679

    
680
        table = new int[n];
681

    
682
        for (i = 0; i < n; ++i) {
683
        
684
            m = i;
685

    
686
            for (j = k = 0; j < bits; ++j) {
687
                k = (k << 1) | (m & 1);
688
                m >>= 1;
689
            }
690

    
691
            table[i] = k;
692
        }
693

    
694
        tableSize = n;
695
    }
696

    
697
    if (ii) {
698
        for (i = 0; i < n; ++i) {
699
            ro[table[i]] = ri[i];
700
            io[table[i]] = ii[i];
701
        }
702
    } else {
703
        for (i = 0; i < n; ++i) {
704
            ro[table[i]] = ri[i];
705
            io[table[i]] = 0.0;
706
        }
707
    }
708

    
709
    blockEnd = 1;
710

    
711
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
712

    
713
        double delta = angle / (double)blockSize;
714
        double sm2 = -sin(-2 * delta);
715
        double sm1 = -sin(-delta);
716
        double cm2 = cos(-2 * delta);
717
        double cm1 = cos(-delta);
718
        double w = 2 * cm1;
719
        double ar[3], ai[3];
720

    
721
        for (i = 0; i < n; i += blockSize) {
722

    
723
            ar[2] = cm2;
724
            ar[1] = cm1;
725

    
726
            ai[2] = sm2;
727
            ai[1] = sm1;
728

    
729
            for (j = i, m = 0; m < blockEnd; j++, m++) {
730

    
731
                ar[0] = w * ar[1] - ar[2];
732
                ar[2] = ar[1];
733
                ar[1] = ar[0];
734

    
735
                ai[0] = w * ai[1] - ai[2];
736
                ai[2] = ai[1];
737
                ai[1] = ai[0];
738

    
739
                k = j + blockEnd;
740
                tr = ar[0] * ro[k] - ai[0] * io[k];
741
                ti = ar[0] * io[k] + ai[0] * ro[k];
742

    
743
                ro[k] = ro[j] - tr;
744
                io[k] = io[j] - ti;
745

    
746
                ro[j] += tr;
747
                io[j] += ti;
748
            }
749
        }
750

    
751
        blockEnd = blockSize;
752
    }
753
}
754

    
755