To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 25:9aee1a0e6223

History | View | Annotate | Download (16.4 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34
using Vamp::RealTime;
35

    
36
CepstrumPitchTracker::Hypothesis::Hypothesis()
37
{
38
    m_state = New;
39
    m_age = 0;
40
}
41

    
42
CepstrumPitchTracker::Hypothesis::~Hypothesis()
43
{
44
}
45

    
46
bool
47
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
48
{
49
    if (m_pending.empty()) {
50
        return true;
51
    }
52

    
53
    // check we are within a relatively close tolerance of the last
54
    // candidate
55
    Estimate last = m_pending[m_pending.size()-1];
56
    double r = s.freq / last.freq;
57
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
58
    if (cents < -60 || cents > 60) return false;
59

    
60
    // and within a slightly bigger tolerance of the current mean
61
    double meanFreq = getMeanFrequency();
62
    r = s.freq / meanFreq;
63
    cents = lrint(1200.0 * (log(r) / log(2.0)));
64
    if (cents < -80 || cents > 80) return false;
65
    
66
    return true;
67
}
68

    
69
bool 
70
CepstrumPitchTracker::Hypothesis::isSatisfied()
71
{
72
    if (m_pending.empty()) return false;
73
    
74
    double meanConfidence = 0.0;
75
    for (int i = 0; i < m_pending.size(); ++i) {
76
        meanConfidence += m_pending[i].confidence;
77
    }
78
    meanConfidence /= m_pending.size();
79

    
80
    int lengthRequired = 10000;
81
    if (meanConfidence > 0.0) {
82
        lengthRequired = int(2.0 / meanConfidence + 0.5);
83
    }
84
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
85

    
86
    return (m_pending.size() > lengthRequired);
87
}
88

    
89
void
90
CepstrumPitchTracker::Hypothesis::advanceTime()
91
{
92
    ++m_age;
93
}
94

    
95
bool
96
CepstrumPitchTracker::Hypothesis::test(Estimate s)
97
{
98
    bool accept = false;
99

    
100
    switch (m_state) {
101

    
102
    case New:
103
        m_state = Provisional;
104
        accept = true;
105
        break;
106

    
107
    case Provisional:
108
        if (m_age > 3) {
109
            m_state = Rejected;
110
        } else if (isWithinTolerance(s)) {
111
            accept = true;
112
        }
113
        break;
114
        
115
    case Satisfied:
116
        if (m_age > 3) {
117
            m_state = Expired;
118
        } else if (isWithinTolerance(s)) {
119
            accept = true;
120
        }
121
        break;
122

    
123
    case Rejected:
124
        break;
125

    
126
    case Expired:
127
        break;
128
    }
129

    
130
    if (accept) {
131
        m_pending.push_back(s);
132
        m_age = 0;
133
        if (m_state == Provisional && isSatisfied()) {
134
            m_state = Satisfied;
135
        }
136
    }
137

    
138
    return accept && (m_state == Satisfied);
139
}        
140

    
141
CepstrumPitchTracker::Hypothesis::State
142
CepstrumPitchTracker::Hypothesis::getState()
143
{
144
    return m_state;
145
}
146

    
147
int
148
CepstrumPitchTracker::Hypothesis::getPendingLength()
149
{
150
    return m_pending.size();
151
}
152

    
153
CepstrumPitchTracker::Hypothesis::Estimates
154
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
155
{
156
    if (m_state == Satisfied || m_state == Expired) {
157
        return m_pending;
158
    } else {
159
        return Estimates();
160
    }
161
}
162

    
163
double
164
CepstrumPitchTracker::Hypothesis::getMeanFrequency()
165
{
166
    double acc = 0.0;
167
    for (int i = 0; i < m_pending.size(); ++i) {
168
        acc += m_pending[i].freq;
169
    }
170
    acc /= m_pending.size();
171
    return acc;
172
}
173

    
174
CepstrumPitchTracker::Hypothesis::Note
175
CepstrumPitchTracker::Hypothesis::getAveragedNote()
176
{
177
    Note n;
178

    
179
    if (!(m_state == Satisfied || m_state == Expired)) {
180
        n.freq = 0.0;
181
        n.time = RealTime::zeroTime;
182
        n.duration = RealTime::zeroTime;
183
        return n;
184
    }
185

    
186
    n.time = m_pending.begin()->time;
187

    
188
    Estimates::iterator i = m_pending.end();
189
    --i;
190
    n.duration = i->time - n.time;
191

    
192
    // just mean frequency for now, but this isn't at all right perceptually
193
    n.freq = getMeanFrequency();
194
    
195
    return n;
196
}
197

    
198
void
199
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
200
{
201
    for (int i = 0; i < m_pending.size(); ++i) {
202
        Feature f;
203
        f.hasTimestamp = true;
204
        f.timestamp = m_pending[i].time;
205
        f.values.push_back(m_pending[i].freq);
206
        fs[0].push_back(f);
207
    }
208

    
209
    Feature nf;
210
    nf.hasTimestamp = true;
211
    nf.hasDuration = true;
212
    Note n = getAveragedNote();
213
    nf.timestamp = n.time;
214
    nf.duration = n.duration;
215
    nf.values.push_back(n.freq);
216
    fs[1].push_back(nf);
217
}
218

    
219
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
220
    Plugin(inputSampleRate),
221
    m_channels(0),
222
    m_stepSize(256),
223
    m_blockSize(1024),
224
    m_fmin(50),
225
    m_fmax(900),
226
    m_vflen(1),
227
    m_binFrom(0),
228
    m_binTo(0),
229
    m_bins(0)
230
{
231
}
232

    
233
CepstrumPitchTracker::~CepstrumPitchTracker()
234
{
235
}
236

    
237
string
238
CepstrumPitchTracker::getIdentifier() const
239
{
240
    return "cepstrum-pitch";
241
}
242

    
243
string
244
CepstrumPitchTracker::getName() const
245
{
246
    return "Cepstrum Pitch Tracker";
247
}
248

    
249
string
250
CepstrumPitchTracker::getDescription() const
251
{
252
    return "Estimate f0 of monophonic material using a cepstrum method.";
253
}
254

    
255
string
256
CepstrumPitchTracker::getMaker() const
257
{
258
    return "Chris Cannam";
259
}
260

    
261
int
262
CepstrumPitchTracker::getPluginVersion() const
263
{
264
    // Increment this each time you release a version that behaves
265
    // differently from the previous one
266
    return 1;
267
}
268

    
269
string
270
CepstrumPitchTracker::getCopyright() const
271
{
272
    return "Freely redistributable (BSD license)";
273
}
274

    
275
CepstrumPitchTracker::InputDomain
276
CepstrumPitchTracker::getInputDomain() const
277
{
278
    return FrequencyDomain;
279
}
280

    
281
size_t
282
CepstrumPitchTracker::getPreferredBlockSize() const
283
{
284
    return 1024;
285
}
286

    
287
size_t 
288
CepstrumPitchTracker::getPreferredStepSize() const
289
{
290
    return 256;
291
}
292

    
293
size_t
294
CepstrumPitchTracker::getMinChannelCount() const
295
{
296
    return 1;
297
}
298

    
299
size_t
300
CepstrumPitchTracker::getMaxChannelCount() const
301
{
302
    return 1;
303
}
304

    
305
CepstrumPitchTracker::ParameterList
306
CepstrumPitchTracker::getParameterDescriptors() const
307
{
308
    ParameterList list;
309
    return list;
310
}
311

    
312
float
313
CepstrumPitchTracker::getParameter(string identifier) const
314
{
315
    return 0.f;
316
}
317

    
318
void
319
CepstrumPitchTracker::setParameter(string identifier, float value) 
320
{
321
}
322

    
323
CepstrumPitchTracker::ProgramList
324
CepstrumPitchTracker::getPrograms() const
325
{
326
    ProgramList list;
327
    return list;
328
}
329

    
330
string
331
CepstrumPitchTracker::getCurrentProgram() const
332
{
333
    return ""; // no programs
334
}
335

    
336
void
337
CepstrumPitchTracker::selectProgram(string name)
338
{
339
}
340

    
341
CepstrumPitchTracker::OutputList
342
CepstrumPitchTracker::getOutputDescriptors() const
343
{
344
    OutputList outputs;
345

    
346
    int n = 0;
347

    
348
    OutputDescriptor d;
349

    
350
    d.identifier = "f0";
351
    d.name = "Estimated f0";
352
    d.description = "Estimated fundamental frequency";
353
    d.unit = "Hz";
354
    d.hasFixedBinCount = true;
355
    d.binCount = 1;
356
    d.hasKnownExtents = true;
357
    d.minValue = m_fmin;
358
    d.maxValue = m_fmax;
359
    d.isQuantized = false;
360
    d.sampleType = OutputDescriptor::FixedSampleRate;
361
    d.sampleRate = (m_inputSampleRate / m_stepSize);
362
    d.hasDuration = false;
363
    outputs.push_back(d);
364

    
365
    d.identifier = "notes";
366
    d.name = "Notes";
367
    d.description = "Derived fixed-pitch note frequencies";
368
    d.unit = "Hz";
369
    d.hasFixedBinCount = true;
370
    d.binCount = 1;
371
    d.hasKnownExtents = true;
372
    d.minValue = m_fmin;
373
    d.maxValue = m_fmax;
374
    d.isQuantized = false;
375
    d.sampleType = OutputDescriptor::FixedSampleRate;
376
    d.sampleRate = (m_inputSampleRate / m_stepSize);
377
    d.hasDuration = true;
378
    outputs.push_back(d);
379

    
380
    return outputs;
381
}
382

    
383
bool
384
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
385
{
386
    if (channels < getMinChannelCount() ||
387
        channels > getMaxChannelCount()) return false;
388

    
389
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
390
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
391
//              << std::endl;
392

    
393
    m_channels = channels;
394
    m_stepSize = stepSize;
395
    m_blockSize = blockSize;
396

    
397
    m_binFrom = int(m_inputSampleRate / m_fmax);
398
    m_binTo = int(m_inputSampleRate / m_fmin); 
399

    
400
    if (m_binTo >= (int)m_blockSize / 2) {
401
        m_binTo = m_blockSize / 2 - 1;
402
    }
403

    
404
    m_bins = (m_binTo - m_binFrom) + 1;
405

    
406
    reset();
407

    
408
    return true;
409
}
410

    
411
void
412
CepstrumPitchTracker::reset()
413
{
414
}
415

    
416
void
417
CepstrumPitchTracker::filter(const double *cep, double *data)
418
{
419
    for (int i = 0; i < m_bins; ++i) {
420
        double v = 0;
421
        int n = 0;
422
        // average according to the vertical filter length
423
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
424
            int ix = i + m_binFrom + j;
425
            if (ix >= 0 && ix < m_blockSize) {
426
                v += cep[ix];
427
                ++n;
428
            }
429
        }
430
        data[i] = v / n;
431
    }
432
}
433

    
434
double
435
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
436
{
437
    double a0 = y[3] - y[2] - y[0] + y[1];
438
    double a1 = y[0] - y[1] - a0;
439
    double a2 = y[2] - y[0];
440
    double a3 = y[1];
441
    return
442
        a0 * x * x * x +
443
        a1 * x * x +
444
        a2 * x +
445
        a3;
446
}
447

    
448
double
449
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
450
{
451
    if (maxbin < 2 || maxbin > m_bins - 3) {
452
        return maxbin;
453
    }
454

    
455
    double maxval = 0.0;
456
    double maxidx = maxbin;
457

    
458
    const int divisions = 10;
459
    double y[4];
460

    
461
    y[0] = in[maxbin-1];
462
    y[1] = in[maxbin];
463
    y[2] = in[maxbin+1];
464
    y[3] = in[maxbin+2];
465
    for (int i = 0; i < divisions; ++i) {
466
        double probe = double(i) / double(divisions);
467
        double value = cubicInterpolate(y, probe);
468
        if (value > maxval) {
469
            maxval = value; 
470
            maxidx = maxbin + probe;
471
        }
472
    }
473

    
474
    y[3] = y[2];
475
    y[2] = y[1];
476
    y[1] = y[0];
477
    y[0] = in[maxbin-2];
478
    for (int i = 0; i < divisions; ++i) {
479
        double probe = double(i) / double(divisions);
480
        double value = cubicInterpolate(y, probe);
481
        if (value > maxval) {
482
            maxval = value; 
483
            maxidx = maxbin - 1 + probe;
484
        }
485
    }
486

    
487
/*
488
    std::cerr << "centre = " << maxbin << ": ["
489
              << in[maxbin-2] << ","
490
              << in[maxbin-1] << ","
491
              << in[maxbin] << ","
492
              << in[maxbin+1] << ","
493
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
494
*/
495

    
496
    return maxidx;
497
}
498

    
499
CepstrumPitchTracker::FeatureSet
500
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
501
{
502
    FeatureSet fs;
503

    
504
    int bs = m_blockSize;
505
    int hs = m_blockSize/2 + 1;
506

    
507
    double *rawcep = new double[bs];
508
    double *io = new double[bs];
509
    double *logmag = new double[bs];
510

    
511
    // The "inverse symmetric" method. Seems to be the most reliable
512
        
513
    double magmean = 0.0;
514

    
515
    for (int i = 0; i < hs; ++i) {
516

    
517
        double power =
518
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
519
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
520
        double mag = sqrt(power);
521

    
522
        magmean += mag;
523

    
524
        double lm = log(mag + 0.00000001);
525
        
526
        logmag[i] = lm;
527
        if (i > 0) logmag[bs - i] = lm;
528
    }
529

    
530
    magmean /= hs;
531
    double threshold = 0.1; // for magmean
532
    
533
    fft(bs, true, logmag, 0, rawcep, io);
534
    
535
    delete[] logmag;
536
    delete[] io;
537

    
538
    int n = m_bins;
539
    double *data = new double[n];
540
    filter(rawcep, data);
541
    delete[] rawcep;
542

    
543
    double maxval = 0.0;
544
    int maxbin = -1;
545

    
546
    for (int i = 0; i < n; ++i) {
547
        if (data[i] > maxval) {
548
            maxval = data[i];
549
            maxbin = i;
550
        }
551
    }
552

    
553
    if (maxbin < 0) {
554
        delete[] data;
555
        return fs;
556
    }
557

    
558
    double nextPeakVal = 0.0;
559
    for (int i = 1; i+1 < n; ++i) {
560
        if (data[i] > data[i-1] &&
561
            data[i] > data[i+1] &&
562
            i != maxbin &&
563
            data[i] > nextPeakVal) {
564
            nextPeakVal = data[i];
565
        }
566
    }
567

    
568
    double cimax = findInterpolatedPeak(data, maxbin);
569
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
570

    
571
    double confidence = 0.0;
572
    if (nextPeakVal != 0.0) {
573
        confidence = (maxval - nextPeakVal) / 100.0;
574
        if (magmean < threshold) confidence = 0.0;
575
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
576
    }
577

    
578
    Hypothesis::Estimate e;
579
    e.freq = peakfreq;
580
    e.time = timestamp;
581
    e.confidence = confidence;
582

    
583
    m_accepted.advanceTime();
584

    
585
    for (int i = 0; i < m_possible.size(); ++i) {
586
        m_possible[i].advanceTime();
587
    }
588

    
589
    if (!m_accepted.test(e)) {
590

    
591
        int candidate = -1;
592
        bool accepted = false;
593

    
594
        for (int i = 0; i < m_possible.size(); ++i) {
595
            if (m_possible[i].test(e)) {
596
                accepted = true;
597
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
598
                    candidate = i;
599
                }
600
                break;
601
            }
602
        }
603

    
604
        if (!accepted) {
605
            Hypothesis h;
606
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
607
            m_possible.push_back(h);
608
        }
609

    
610
        if (m_accepted.getState() == Hypothesis::Expired) {
611
            m_accepted.addFeatures(fs);
612
        }
613
        
614
        if (m_accepted.getState() == Hypothesis::Expired ||
615
            m_accepted.getState() == Hypothesis::Rejected) {
616
            if (candidate >= 0) {
617
                m_accepted = m_possible[candidate];
618
            } else {
619
                m_accepted = Hypothesis();
620
            }
621
        }
622

    
623
        // reap rejected/expired hypotheses from possible list
624
        Hypotheses toReap = m_possible;
625
        m_possible.clear();
626
        for (int i = 0; i < toReap.size(); ++i) {
627
            Hypothesis h = toReap[i];
628
            if (h.getState() != Hypothesis::Rejected && 
629
                h.getState() != Hypothesis::Expired) {
630
                m_possible.push_back(h);
631
            }
632
        }
633
    }  
634

    
635
    std::cerr << "accepted length = " << m_accepted.getPendingLength()
636
              << ", state = " << m_accepted.getState()
637
              << ", hypothesis count = " << m_possible.size() << std::endl;
638

    
639
    delete[] data;
640
    return fs;
641
}
642

    
643
CepstrumPitchTracker::FeatureSet
644
CepstrumPitchTracker::getRemainingFeatures()
645
{
646
    FeatureSet fs;
647
    if (m_accepted.getState() == Hypothesis::Satisfied) {
648
        m_accepted.addFeatures(fs);
649
    }
650
    return fs;
651
}
652

    
653
void
654
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
655
                          double *ri, double *ii, double *ro, double *io)
656
{
657
    if (!ri || !ro || !io) return;
658

    
659
    unsigned int bits;
660
    unsigned int i, j, k, m;
661
    unsigned int blockSize, blockEnd;
662

    
663
    double tr, ti;
664

    
665
    if (n < 2) return;
666
    if (n & (n-1)) return;
667

    
668
    double angle = 2.0 * M_PI;
669
    if (inverse) angle = -angle;
670

    
671
    for (i = 0; ; ++i) {
672
        if (n & (1 << i)) {
673
            bits = i;
674
            break;
675
        }
676
    }
677

    
678
    int table[n];
679

    
680
    for (i = 0; i < n; ++i) {
681
        m = i;
682
        for (j = k = 0; j < bits; ++j) {
683
            k = (k << 1) | (m & 1);
684
            m >>= 1;
685
        }
686
        table[i] = k;
687
    }
688

    
689
    if (ii) {
690
        for (i = 0; i < n; ++i) {
691
            ro[table[i]] = ri[i];
692
            io[table[i]] = ii[i];
693
        }
694
    } else {
695
        for (i = 0; i < n; ++i) {
696
            ro[table[i]] = ri[i];
697
            io[table[i]] = 0.0;
698
        }
699
    }
700

    
701
    blockEnd = 1;
702

    
703
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
704

    
705
        double delta = angle / (double)blockSize;
706
        double sm2 = -sin(-2 * delta);
707
        double sm1 = -sin(-delta);
708
        double cm2 = cos(-2 * delta);
709
        double cm1 = cos(-delta);
710
        double w = 2 * cm1;
711
        double ar[3], ai[3];
712

    
713
        for (i = 0; i < n; i += blockSize) {
714

    
715
            ar[2] = cm2;
716
            ar[1] = cm1;
717

    
718
            ai[2] = sm2;
719
            ai[1] = sm1;
720

    
721
            for (j = i, m = 0; m < blockEnd; j++, m++) {
722

    
723
                ar[0] = w * ar[1] - ar[2];
724
                ar[2] = ar[1];
725
                ar[1] = ar[0];
726

    
727
                ai[0] = w * ai[1] - ai[2];
728
                ai[2] = ai[1];
729
                ai[1] = ai[0];
730

    
731
                k = j + blockEnd;
732
                tr = ar[0] * ro[k] - ai[0] * io[k];
733
                ti = ar[0] * io[k] + ai[0] * ro[k];
734

    
735
                ro[k] = ro[j] - tr;
736
                io[k] = io[j] - ti;
737

    
738
                ro[j] += tr;
739
                io[j] += ti;
740
            }
741
        }
742

    
743
        blockEnd = blockSize;
744
    }
745
}
746

    
747