To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 14:98256077e2a2

History | View | Annotate | Download (15.2 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis()
36
{
37
    m_state = New;
38
    m_age = 0;
39
}
40

    
41
CepstrumPitchTracker::Hypothesis::~Hypothesis()
42
{
43
}
44

    
45
bool
46
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
47
{
48
    if (m_pending.empty()) {
49
        return true;
50
    }
51
    Estimate last = m_pending[m_pending.size()-1];
52
    double r = s.freq / last.freq;
53
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
54
    return (cents > -100 && cents < 100);
55
}
56

    
57
bool 
58
CepstrumPitchTracker::Hypothesis::isSatisfied()
59
{
60
    return (m_pending.size() > 2);
61
}
62

    
63
void
64
CepstrumPitchTracker::Hypothesis::advanceTime()
65
{
66
    ++m_age;
67
}
68

    
69
bool
70
CepstrumPitchTracker::Hypothesis::test(Estimate s)
71
{
72
    bool accept = false;
73

    
74
    switch (m_state) {
75

    
76
    case New:
77
        m_state = Provisional;
78
        accept = true;
79
        break;
80

    
81
    case Provisional:
82
        if (m_age > 3) {
83
            m_state = Rejected;
84
        } else if (isWithinTolerance(s)) {
85
            accept = true;
86
        }
87
        break;
88
        
89
    case Satisfied:
90
        if (m_age > 3) {
91
            m_state = Expired;
92
        } else if (isWithinTolerance(s)) {
93
            accept = true;
94
        }
95
        break;
96

    
97
    case Rejected:
98
        break;
99

    
100
    case Expired:
101
        break;
102
    }
103

    
104
    if (accept) {
105
        m_pending.push_back(s);
106
        m_age = 0;
107
        if (m_state == Provisional && isSatisfied()) {
108
            m_state = Satisfied;
109
        }
110
    }
111

    
112
    return accept && (m_state == Satisfied);
113
}        
114

    
115
CepstrumPitchTracker::Hypothesis::State
116
CepstrumPitchTracker::Hypothesis::getState()
117
{
118
    return m_state;
119
}
120

    
121
int
122
CepstrumPitchTracker::Hypothesis::getPendingLength()
123
{
124
    return m_pending.size();
125
}
126

    
127
CepstrumPitchTracker::Hypothesis::Estimates
128
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
129
{
130
    if (m_state == Satisfied || m_state == Expired) {
131
        return m_pending;
132
    } else {
133
        return Estimates();
134
    }
135
}
136

    
137
void
138
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureList &fl)
139
{
140
    for (int i = 0; i < m_pending.size(); ++i) {
141
        Feature f;
142
        f.hasTimestamp = true;
143
        f.timestamp = m_pending[i].time;
144
        f.values.push_back(m_pending[i].freq);
145
        fl.push_back(f);
146
    }
147
}
148

    
149
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
150
    Plugin(inputSampleRate),
151
    m_channels(0),
152
    m_stepSize(256),
153
    m_blockSize(1024),
154
    m_fmin(50),
155
    m_fmax(1000),
156
    m_histlen(1),
157
    m_vflen(3),
158
    m_binFrom(0),
159
    m_binTo(0),
160
    m_bins(0),
161
    m_history(0),
162
    m_prevpeak(0),
163
    m_prevprop(0)
164
{
165
}
166

    
167
CepstrumPitchTracker::~CepstrumPitchTracker()
168
{
169
    if (m_history) {
170
        for (int i = 0; i < m_histlen; ++i) {
171
            delete[] m_history[i];
172
        }
173
        delete[] m_history;
174
    }
175
}
176

    
177
string
178
CepstrumPitchTracker::getIdentifier() const
179
{
180
    return "cepstrum-pitch";
181
}
182

    
183
string
184
CepstrumPitchTracker::getName() const
185
{
186
    return "Cepstrum Pitch Tracker";
187
}
188

    
189
string
190
CepstrumPitchTracker::getDescription() const
191
{
192
    return "Estimate f0 of monophonic material using a cepstrum method.";
193
}
194

    
195
string
196
CepstrumPitchTracker::getMaker() const
197
{
198
    return "Chris Cannam";
199
}
200

    
201
int
202
CepstrumPitchTracker::getPluginVersion() const
203
{
204
    // Increment this each time you release a version that behaves
205
    // differently from the previous one
206
    return 1;
207
}
208

    
209
string
210
CepstrumPitchTracker::getCopyright() const
211
{
212
    return "Freely redistributable (BSD license)";
213
}
214

    
215
CepstrumPitchTracker::InputDomain
216
CepstrumPitchTracker::getInputDomain() const
217
{
218
    return FrequencyDomain;
219
}
220

    
221
size_t
222
CepstrumPitchTracker::getPreferredBlockSize() const
223
{
224
    return 1024;
225
}
226

    
227
size_t 
228
CepstrumPitchTracker::getPreferredStepSize() const
229
{
230
    return 256;
231
}
232

    
233
size_t
234
CepstrumPitchTracker::getMinChannelCount() const
235
{
236
    return 1;
237
}
238

    
239
size_t
240
CepstrumPitchTracker::getMaxChannelCount() const
241
{
242
    return 1;
243
}
244

    
245
CepstrumPitchTracker::ParameterList
246
CepstrumPitchTracker::getParameterDescriptors() const
247
{
248
    ParameterList list;
249
    return list;
250
}
251

    
252
float
253
CepstrumPitchTracker::getParameter(string identifier) const
254
{
255
    return 0.f;
256
}
257

    
258
void
259
CepstrumPitchTracker::setParameter(string identifier, float value) 
260
{
261
}
262

    
263
CepstrumPitchTracker::ProgramList
264
CepstrumPitchTracker::getPrograms() const
265
{
266
    ProgramList list;
267
    return list;
268
}
269

    
270
string
271
CepstrumPitchTracker::getCurrentProgram() const
272
{
273
    return ""; // no programs
274
}
275

    
276
void
277
CepstrumPitchTracker::selectProgram(string name)
278
{
279
}
280

    
281
CepstrumPitchTracker::OutputList
282
CepstrumPitchTracker::getOutputDescriptors() const
283
{
284
    OutputList outputs;
285

    
286
    int n = 0;
287

    
288
    OutputDescriptor d;
289

    
290
    d.identifier = "f0";
291
    d.name = "Estimated f0";
292
    d.description = "Estimated fundamental frequency";
293
    d.unit = "Hz";
294
    d.hasFixedBinCount = true;
295
    d.binCount = 1;
296
    d.hasKnownExtents = true;
297
    d.minValue = m_fmin;
298
    d.maxValue = m_fmax;
299
    d.isQuantized = false;
300
    d.sampleType = OutputDescriptor::FixedSampleRate;
301
    d.sampleRate = (m_inputSampleRate / m_stepSize);
302
    d.hasDuration = false;
303
    outputs.push_back(d);
304

    
305
    return outputs;
306
}
307

    
308
bool
309
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
310
{
311
    if (channels < getMinChannelCount() ||
312
        channels > getMaxChannelCount()) return false;
313

    
314
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
315
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
316
//              << std::endl;
317

    
318
    m_channels = channels;
319
    m_stepSize = stepSize;
320
    m_blockSize = blockSize;
321

    
322
    m_binFrom = int(m_inputSampleRate / m_fmax);
323
    m_binTo = int(m_inputSampleRate / m_fmin); 
324

    
325
    if (m_binTo >= (int)m_blockSize / 2) {
326
        m_binTo = m_blockSize / 2 - 1;
327
    }
328

    
329
    m_bins = (m_binTo - m_binFrom) + 1;
330

    
331
    m_history = new double *[m_histlen];
332
    for (int i = 0; i < m_histlen; ++i) {
333
        m_history[i] = new double[m_bins];
334
    }
335

    
336
    reset();
337

    
338
    return true;
339
}
340

    
341
void
342
CepstrumPitchTracker::reset()
343
{
344
    for (int i = 0; i < m_histlen; ++i) {
345
        for (int j = 0; j < m_bins; ++j) {
346
            m_history[i][j] = 0.0;
347
        }
348
    }
349
}
350

    
351
void
352
CepstrumPitchTracker::filter(const double *cep, double *result)
353
{
354
    int hix = m_histlen - 1; // current history index
355

    
356
    // roll back the history
357
    if (m_histlen > 1) {
358
        double *oldest = m_history[0];
359
        for (int i = 1; i < m_histlen; ++i) {
360
            m_history[i-1] = m_history[i];
361
        }
362
        // and stick this back in the newest spot, to recycle
363
        m_history[hix] = oldest;
364
    }
365

    
366
    for (int i = 0; i < m_bins; ++i) {
367
        double v = 0;
368
        int n = 0;
369
        // average according to the vertical filter length
370
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
371
            int ix = i + m_binFrom + j;
372
            if (ix >= 0 && ix < m_blockSize) {
373
                v += cep[ix];
374
                ++n;
375
            }
376
        }
377
        m_history[hix][i] = v / n;
378
    }
379

    
380
    for (int i = 0; i < m_bins; ++i) {
381
        double mean = 0.0;
382
        for (int j = 0; j < m_histlen; ++j) {
383
            mean += m_history[j][i];
384
        }
385
        mean /= m_histlen;
386
        result[i] = mean;
387
    }
388
}
389

    
390
double
391
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
392
{
393
    double aroundPeak = data[n];
394
    double peakProportion = 0.0;
395

    
396
    int i = n - 1;
397
    while (i > 0 && data[i] <= data[i+1]) {
398
        aroundPeak += fabs(data[i]);
399
        --i;
400
    }
401
    i = n + 1;
402
    while (i < m_bins && data[i] <= data[i-1]) {
403
        aroundPeak += fabs(data[i]);
404
        ++i;
405
    }
406
    peakProportion = aroundPeak / abstot;
407

    
408
    return peakProportion;
409
}
410

    
411
bool
412
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
413
{
414
    bool accept = false;
415

    
416
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
417
        accept = true;
418
    } else if (peakProportion > m_prevprop * 2) {
419
        accept = true;
420
    }
421

    
422
    return accept;
423
}
424

    
425
CepstrumPitchTracker::FeatureSet
426
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
427
{
428
    FeatureSet fs;
429

    
430
    int bs = m_blockSize;
431
    int hs = m_blockSize/2 + 1;
432

    
433
    double *rawcep = new double[bs];
434
    double *io = new double[bs];
435
    double *logmag = new double[bs];
436

    
437
    // The "inverse symmetric" method. Seems to be the most reliable
438
        
439
    for (int i = 0; i < hs; ++i) {
440

    
441
        double power =
442
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
443
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
444
        double mag = sqrt(power);
445
        
446
        double lm = log(mag + 0.00000001);
447
        
448
        logmag[i] = lm;
449
        if (i > 0) logmag[bs - i] = lm;
450
    }
451

    
452
    fft(bs, true, logmag, 0, rawcep, io);
453
    
454
    delete[] logmag;
455
    delete[] io;
456

    
457
    int n = m_bins;
458
    double *data = new double[n];
459
    filter(rawcep, data);
460
    delete[] rawcep;
461

    
462
    double abstot = 0.0;
463

    
464
    for (int i = 0; i < n; ++i) {
465
        abstot += fabs(data[i]);
466
    }
467

    
468
    double maxval = 0.0;
469
    int maxbin = -1;
470

    
471
    for (int i = 0; i < n; ++i) {
472
        if (data[i] > maxval) {
473
            maxval = data[i];
474
            maxbin = i;
475
        }
476
    }
477

    
478
    if (maxbin < 0) return fs;
479

    
480
    double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
481
    Hypothesis::Estimate e;
482
    e.freq = peakfreq;
483
    e.time = timestamp;
484

    
485
    m_accepted.advanceTime();
486

    
487
    for (int i = 0; i < m_possible.size(); ++i) {
488
        m_possible[i].advanceTime();
489
    }
490

    
491
    if (!m_accepted.test(e)) {
492

    
493
        int candidate = -1;
494
        bool accepted = false;
495

    
496
        for (int i = 0; i < m_possible.size(); ++i) {
497
            if (m_possible[i].test(e)) {
498
                accepted = true;
499
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
500
                    candidate = i;
501
                }
502
                break;
503
            }
504
        }
505

    
506
        if (!accepted) {
507
            Hypothesis h;
508
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
509
            m_possible.push_back(h);
510
        }
511

    
512
        if (m_accepted.getState() == Hypothesis::Expired) {
513
            m_accepted.addFeatures(fs[0]);
514
        }
515
        
516
        if (m_accepted.getState() == Hypothesis::Expired ||
517
            m_accepted.getState() == Hypothesis::Rejected) {
518
            if (candidate >= 0) {
519
                m_accepted = m_possible[candidate];
520
            } else {
521
                m_accepted = Hypothesis();
522
            }
523
        }
524

    
525
        // reap rejected/expired hypotheses from possible list
526
        Hypotheses toReap = m_possible;
527
        m_possible.clear();
528
        for (int i = 0; i < toReap.size(); ++i) {
529
            Hypothesis h = toReap[i];
530
            if (h.getState() != Hypothesis::Rejected && 
531
                h.getState() != Hypothesis::Expired) {
532
                m_possible.push_back(h);
533
            }
534
        }
535
    }  
536

    
537
        std::cerr << "accepted length = " << m_accepted.getPendingLength()
538
                  << ", state = " << m_accepted.getState()
539
                  << ", hypothesis count = " << m_possible.size() << std::endl;
540

    
541
            
542

    
543
/*
544
    bool accepted = false;
545

546
    if (maxbin >= 0) {
547
        double pp = calculatePeakProportion(data, abstot, maxbin);
548
        if (acceptPeak(maxbin, pp)) {
549
            accepted = true;
550
        } else {
551
            // try a secondary peak
552
            maxval = 0.0;
553
            int secondbin = 0;
554
            for (int i = 1; i < n-1; ++i) {
555
                if (i != maxbin &&
556
                    data[i] > data[i-1] &&
557
                    data[i] > data[i+1] &&
558
                    data[i] > maxval) {
559
                    maxval = data[i];
560
                    secondbin = i;
561
                }
562
            }
563
            double spp = calculatePeakProportion(data, abstot, secondbin);
564
            if (acceptPeak(secondbin, spp)) {
565
                maxbin = secondbin;
566
                pp = spp;
567
                accepted = true;
568
            }
569
        }
570
        if (accepted) {
571
            m_prevpeak = maxbin;
572
            m_prevprop = pp;
573
        }
574
    }
575
*/
576
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
577
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
578
//    std::cerr << "bins = " << m_bins << std::endl;
579

    
580
//    if (peakProportion >= (0.00006 * m_bins)) {
581
/*
582
    if (accepted) {
583
        Feature f;
584
        f.hasTimestamp = true;
585
        f.timestamp = timestamp;
586
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
587
        fs[0].push_back(f);
588
    }
589
*/
590
    delete[] data;
591
    return fs;
592
}
593

    
594
CepstrumPitchTracker::FeatureSet
595
CepstrumPitchTracker::getRemainingFeatures()
596
{
597
    FeatureSet fs;
598
    if (m_accepted.getState() != Hypothesis::New) {
599
        m_accepted.addFeatures(fs[0]);
600
    }
601
    return fs;
602
}
603

    
604
void
605
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
606
                    double *ri, double *ii, double *ro, double *io)
607
{
608
    if (!ri || !ro || !io) return;
609

    
610
    unsigned int bits;
611
    unsigned int i, j, k, m;
612
    unsigned int blockSize, blockEnd;
613

    
614
    double tr, ti;
615

    
616
    if (n < 2) return;
617
    if (n & (n-1)) return;
618

    
619
    double angle = 2.0 * M_PI;
620
    if (inverse) angle = -angle;
621

    
622
    for (i = 0; ; ++i) {
623
        if (n & (1 << i)) {
624
            bits = i;
625
            break;
626
        }
627
    }
628

    
629
    static unsigned int tableSize = 0;
630
    static int *table = 0;
631

    
632
    if (tableSize != n) {
633

    
634
        delete[] table;
635

    
636
        table = new int[n];
637

    
638
        for (i = 0; i < n; ++i) {
639
        
640
            m = i;
641

    
642
            for (j = k = 0; j < bits; ++j) {
643
                k = (k << 1) | (m & 1);
644
                m >>= 1;
645
            }
646

    
647
            table[i] = k;
648
        }
649

    
650
        tableSize = n;
651
    }
652

    
653
    if (ii) {
654
        for (i = 0; i < n; ++i) {
655
            ro[table[i]] = ri[i];
656
            io[table[i]] = ii[i];
657
        }
658
    } else {
659
        for (i = 0; i < n; ++i) {
660
            ro[table[i]] = ri[i];
661
            io[table[i]] = 0.0;
662
        }
663
    }
664

    
665
    blockEnd = 1;
666

    
667
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
668

    
669
        double delta = angle / (double)blockSize;
670
        double sm2 = -sin(-2 * delta);
671
        double sm1 = -sin(-delta);
672
        double cm2 = cos(-2 * delta);
673
        double cm1 = cos(-delta);
674
        double w = 2 * cm1;
675
        double ar[3], ai[3];
676

    
677
        for (i = 0; i < n; i += blockSize) {
678

    
679
            ar[2] = cm2;
680
            ar[1] = cm1;
681

    
682
            ai[2] = sm2;
683
            ai[1] = sm1;
684

    
685
            for (j = i, m = 0; m < blockEnd; j++, m++) {
686

    
687
                ar[0] = w * ar[1] - ar[2];
688
                ar[2] = ar[1];
689
                ar[1] = ar[0];
690

    
691
                ai[0] = w * ai[1] - ai[2];
692
                ai[2] = ai[1];
693
                ai[1] = ai[0];
694

    
695
                k = j + blockEnd;
696
                tr = ar[0] * ro[k] - ai[0] * io[k];
697
                ti = ar[0] * io[k] + ai[0] * ro[k];
698

    
699
                ro[k] = ro[j] - tr;
700
                io[k] = io[j] - ti;
701

    
702
                ro[j] += tr;
703
                io[j] += ti;
704
            }
705
        }
706

    
707
        blockEnd = blockSize;
708
    }
709
}
710

    
711