To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 12:1daaf43a5459

History | View | Annotate | Download (14.7 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis()
36
{
37
    m_state = New;
38
    m_age = 0;
39
}
40

    
41
CepstrumPitchTracker::Hypothesis::~Hypothesis()
42
{
43
}
44

    
45
bool
46
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
47
{
48
    if (m_pending.empty()) {
49
        return true;
50
    }
51
    Estimate last = m_pending[m_pending.size()-1];
52
    double r = s.freq / last.freq;
53
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
54
    return (cents > -200 && cents < 200);
55
}
56

    
57
bool 
58
CepstrumPitchTracker::Hypothesis::isSatisfied()
59
{
60
    return (m_pending.size() > 2);
61
}
62

    
63
void
64
CepstrumPitchTracker::Hypothesis::advanceTime()
65
{
66
    ++m_age;
67
}
68

    
69
bool
70
CepstrumPitchTracker::Hypothesis::test(Estimate s)
71
{
72
    bool accept = false;
73

    
74
    switch (m_state) {
75

    
76
    case New:
77
        m_state = Provisional;
78
        accept = true;
79
        break;
80

    
81
    case Provisional:
82
        if (m_age > 3) {
83
            m_state = Rejected;
84
        } else if (isWithinTolerance(s)) {
85
            accept = true;
86
        }
87
        break;
88
        
89
    case Satisfied:
90
        if (m_age > 3) {
91
            m_state = Expired;
92
        } else if (isWithinTolerance(s)) {
93
            accept = true;
94
        }
95
        break;
96

    
97
    case Rejected:
98
        break;
99

    
100
    case Expired:
101
        break;
102
    }
103

    
104
    if (accept) {
105
        m_pending.push_back(s);
106
        m_age = 0;
107
        if (m_state == Provisional && isSatisfied()) {
108
            m_state = Satisfied;
109
        }
110
    }
111

    
112
    return accept;
113
}        
114

    
115
CepstrumPitchTracker::Hypothesis::State
116
CepstrumPitchTracker::Hypothesis::getState()
117
{
118
    return m_state;
119
}
120

    
121
int
122
CepstrumPitchTracker::Hypothesis::getPendingLength()
123
{
124
    return m_pending.size();
125
}
126

    
127
CepstrumPitchTracker::Hypothesis::Estimates
128
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
129
{
130
    if (m_state == Satisfied || m_state == Expired) {
131
        return m_pending;
132
    } else {
133
        return Estimates();
134
    }
135
}
136

    
137
void
138
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureList &fl)
139
{
140
    for (int i = 0; i < m_pending.size(); ++i) {
141
        Feature f;
142
        f.hasTimestamp = true;
143
        f.timestamp = m_pending[i].time;
144
        f.values.push_back(m_pending[i].freq);
145
        fl.push_back(f);
146
    }
147
}
148

    
149
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
150
    Plugin(inputSampleRate),
151
    m_channels(0),
152
    m_stepSize(256),
153
    m_blockSize(1024),
154
    m_fmin(50),
155
    m_fmax(1000),
156
    m_histlen(1),
157
    m_vflen(3),
158
    m_binFrom(0),
159
    m_binTo(0),
160
    m_bins(0),
161
    m_history(0),
162
    m_prevpeak(0),
163
    m_prevprop(0)
164
{
165
}
166

    
167
CepstrumPitchTracker::~CepstrumPitchTracker()
168
{
169
    if (m_history) {
170
        for (int i = 0; i < m_histlen; ++i) {
171
            delete[] m_history[i];
172
        }
173
        delete[] m_history;
174
    }
175
}
176

    
177
string
178
CepstrumPitchTracker::getIdentifier() const
179
{
180
    return "cepstrum-pitch";
181
}
182

    
183
string
184
CepstrumPitchTracker::getName() const
185
{
186
    return "Cepstrum Pitch Tracker";
187
}
188

    
189
string
190
CepstrumPitchTracker::getDescription() const
191
{
192
    return "Estimate f0 of monophonic material using a cepstrum method.";
193
}
194

    
195
string
196
CepstrumPitchTracker::getMaker() const
197
{
198
    return "Chris Cannam";
199
}
200

    
201
int
202
CepstrumPitchTracker::getPluginVersion() const
203
{
204
    // Increment this each time you release a version that behaves
205
    // differently from the previous one
206
    return 1;
207
}
208

    
209
string
210
CepstrumPitchTracker::getCopyright() const
211
{
212
    return "Freely redistributable (BSD license)";
213
}
214

    
215
CepstrumPitchTracker::InputDomain
216
CepstrumPitchTracker::getInputDomain() const
217
{
218
    return FrequencyDomain;
219
}
220

    
221
size_t
222
CepstrumPitchTracker::getPreferredBlockSize() const
223
{
224
    return 1024;
225
}
226

    
227
size_t 
228
CepstrumPitchTracker::getPreferredStepSize() const
229
{
230
    return 256;
231
}
232

    
233
size_t
234
CepstrumPitchTracker::getMinChannelCount() const
235
{
236
    return 1;
237
}
238

    
239
size_t
240
CepstrumPitchTracker::getMaxChannelCount() const
241
{
242
    return 1;
243
}
244

    
245
CepstrumPitchTracker::ParameterList
246
CepstrumPitchTracker::getParameterDescriptors() const
247
{
248
    ParameterList list;
249
    return list;
250
}
251

    
252
float
253
CepstrumPitchTracker::getParameter(string identifier) const
254
{
255
    return 0.f;
256
}
257

    
258
void
259
CepstrumPitchTracker::setParameter(string identifier, float value) 
260
{
261
}
262

    
263
CepstrumPitchTracker::ProgramList
264
CepstrumPitchTracker::getPrograms() const
265
{
266
    ProgramList list;
267
    return list;
268
}
269

    
270
string
271
CepstrumPitchTracker::getCurrentProgram() const
272
{
273
    return ""; // no programs
274
}
275

    
276
void
277
CepstrumPitchTracker::selectProgram(string name)
278
{
279
}
280

    
281
CepstrumPitchTracker::OutputList
282
CepstrumPitchTracker::getOutputDescriptors() const
283
{
284
    OutputList outputs;
285

    
286
    int n = 0;
287

    
288
    OutputDescriptor d;
289

    
290
    d.identifier = "f0";
291
    d.name = "Estimated f0";
292
    d.description = "Estimated fundamental frequency";
293
    d.unit = "Hz";
294
    d.hasFixedBinCount = true;
295
    d.binCount = 1;
296
    d.hasKnownExtents = true;
297
    d.minValue = m_fmin;
298
    d.maxValue = m_fmax;
299
    d.isQuantized = false;
300
    d.sampleType = OutputDescriptor::FixedSampleRate;
301
    d.sampleRate = (m_inputSampleRate / m_stepSize);
302
    d.hasDuration = false;
303
    outputs.push_back(d);
304

    
305
    return outputs;
306
}
307

    
308
bool
309
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
310
{
311
    if (channels < getMinChannelCount() ||
312
        channels > getMaxChannelCount()) return false;
313

    
314
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
315
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
316
//              << std::endl;
317

    
318
    m_channels = channels;
319
    m_stepSize = stepSize;
320
    m_blockSize = blockSize;
321

    
322
    m_binFrom = int(m_inputSampleRate / m_fmax);
323
    m_binTo = int(m_inputSampleRate / m_fmin); 
324

    
325
    if (m_binTo >= (int)m_blockSize / 2) {
326
        m_binTo = m_blockSize / 2 - 1;
327
    }
328

    
329
    m_bins = (m_binTo - m_binFrom) + 1;
330

    
331
    m_history = new double *[m_histlen];
332
    for (int i = 0; i < m_histlen; ++i) {
333
        m_history[i] = new double[m_bins];
334
    }
335

    
336
    reset();
337

    
338
    return true;
339
}
340

    
341
void
342
CepstrumPitchTracker::reset()
343
{
344
    for (int i = 0; i < m_histlen; ++i) {
345
        for (int j = 0; j < m_bins; ++j) {
346
            m_history[i][j] = 0.0;
347
        }
348
    }
349
}
350

    
351
void
352
CepstrumPitchTracker::filter(const double *cep, double *result)
353
{
354
    int hix = m_histlen - 1; // current history index
355

    
356
    // roll back the history
357
    if (m_histlen > 1) {
358
        double *oldest = m_history[0];
359
        for (int i = 1; i < m_histlen; ++i) {
360
            m_history[i-1] = m_history[i];
361
        }
362
        // and stick this back in the newest spot, to recycle
363
        m_history[hix] = oldest;
364
    }
365

    
366
    for (int i = 0; i < m_bins; ++i) {
367
        double v = 0;
368
        int n = 0;
369
        // average according to the vertical filter length
370
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
371
            int ix = i + m_binFrom + j;
372
            if (ix >= 0 && ix < m_blockSize) {
373
                v += cep[ix];
374
                ++n;
375
            }
376
        }
377
        m_history[hix][i] = v / n;
378
    }
379

    
380
    for (int i = 0; i < m_bins; ++i) {
381
        double mean = 0.0;
382
        for (int j = 0; j < m_histlen; ++j) {
383
            mean += m_history[j][i];
384
        }
385
        mean /= m_histlen;
386
        result[i] = mean;
387
    }
388
}
389

    
390
double
391
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
392
{
393
    double aroundPeak = data[n];
394
    double peakProportion = 0.0;
395

    
396
    int i = n - 1;
397
    while (i > 0 && data[i] <= data[i+1]) {
398
        aroundPeak += fabs(data[i]);
399
        --i;
400
    }
401
    i = n + 1;
402
    while (i < m_bins && data[i] <= data[i-1]) {
403
        aroundPeak += fabs(data[i]);
404
        ++i;
405
    }
406
    peakProportion = aroundPeak / abstot;
407

    
408
    return peakProportion;
409
}
410

    
411
bool
412
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
413
{
414
    bool accept = false;
415

    
416
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
417
        accept = true;
418
    } else if (peakProportion > m_prevprop * 2) {
419
        accept = true;
420
    }
421

    
422
    return accept;
423
}
424

    
425
CepstrumPitchTracker::FeatureSet
426
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
427
{
428
    FeatureSet fs;
429

    
430
    int bs = m_blockSize;
431
    int hs = m_blockSize/2 + 1;
432

    
433
    double *rawcep = new double[bs];
434
    double *io = new double[bs];
435
    double *logmag = new double[bs];
436

    
437
    // The "inverse symmetric" method. Seems to be the most reliable
438
        
439
    for (int i = 0; i < hs; ++i) {
440

    
441
        double power =
442
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
443
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
444
        double mag = sqrt(power);
445
        
446
        double lm = log(mag + 0.00000001);
447
        
448
        logmag[i] = lm;
449
        if (i > 0) logmag[bs - i] = lm;
450
    }
451

    
452
    fft(bs, true, logmag, 0, rawcep, io);
453
    
454
    delete[] logmag;
455
    delete[] io;
456

    
457
    int n = m_bins;
458
    double *data = new double[n];
459
    filter(rawcep, data);
460
    delete[] rawcep;
461

    
462
    double abstot = 0.0;
463

    
464
    for (int i = 0; i < n; ++i) {
465
        abstot += fabs(data[i]);
466
    }
467

    
468
    double maxval = 0.0;
469
    int maxbin = -1;
470

    
471
    for (int i = 0; i < n; ++i) {
472
        if (data[i] > maxval) {
473
            maxval = data[i];
474
            maxbin = i;
475
        }
476
    }
477

    
478
    if (maxbin < 0) return fs;
479

    
480
    double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
481
    Hypothesis::Estimate e;
482
    e.freq = peakfreq;
483
    e.time = timestamp;
484

    
485
    m_accepted.advanceTime();
486

    
487
    for (int i = 0; i < m_possible.size(); ++i) {
488
        m_possible[i].advanceTime();
489
    }
490

    
491
    if (!m_accepted.test(e)) {
492
        int candidate = -1;
493
        for (int i = 0; i < m_possible.size(); ++i) {
494
            if (m_possible[i].test(e)) {
495
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
496
                    candidate = i;
497
                }
498
                break;
499
            }
500
        }
501

    
502
        if (m_accepted.getState() == Hypothesis::Expired) {
503
            m_accepted.addFeatures(fs[0]);
504
        }
505
        
506
        if (m_accepted.getState() == Hypothesis::Expired ||
507
            m_accepted.getState() == Hypothesis::Rejected) {
508
            if (candidate >= 0) {
509
                m_accepted = m_possible[candidate];
510
            } else {
511
                m_accepted = Hypothesis();
512
            }
513
        }
514

    
515
        std::cerr << "accepted length = " << m_accepted.getPendingLength()
516
                  << ", state = " << m_accepted.getState()
517
                  << ", hypothesis count = " << m_possible.size() << std::endl;
518

    
519
        //!!! and also need to reap rejected/expired hypotheses from the list
520
    }  
521
            
522

    
523
/*
524
    bool accepted = false;
525

526
    if (maxbin >= 0) {
527
        double pp = calculatePeakProportion(data, abstot, maxbin);
528
        if (acceptPeak(maxbin, pp)) {
529
            accepted = true;
530
        } else {
531
            // try a secondary peak
532
            maxval = 0.0;
533
            int secondbin = 0;
534
            for (int i = 1; i < n-1; ++i) {
535
                if (i != maxbin &&
536
                    data[i] > data[i-1] &&
537
                    data[i] > data[i+1] &&
538
                    data[i] > maxval) {
539
                    maxval = data[i];
540
                    secondbin = i;
541
                }
542
            }
543
            double spp = calculatePeakProportion(data, abstot, secondbin);
544
            if (acceptPeak(secondbin, spp)) {
545
                maxbin = secondbin;
546
                pp = spp;
547
                accepted = true;
548
            }
549
        }
550
        if (accepted) {
551
            m_prevpeak = maxbin;
552
            m_prevprop = pp;
553
        }
554
    }
555
*/
556
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
557
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
558
//    std::cerr << "bins = " << m_bins << std::endl;
559

    
560
//    if (peakProportion >= (0.00006 * m_bins)) {
561
/*
562
    if (accepted) {
563
        Feature f;
564
        f.hasTimestamp = true;
565
        f.timestamp = timestamp;
566
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
567
        fs[0].push_back(f);
568
    }
569
*/
570
    delete[] data;
571
    return fs;
572
}
573

    
574
CepstrumPitchTracker::FeatureSet
575
CepstrumPitchTracker::getRemainingFeatures()
576
{
577
    FeatureSet fs;
578
    if (m_accepted.getState() != Hypothesis::New) {
579
        m_accepted.addFeatures(fs[0]);
580
    }
581
    return fs;
582
}
583

    
584
void
585
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
586
                    double *ri, double *ii, double *ro, double *io)
587
{
588
    if (!ri || !ro || !io) return;
589

    
590
    unsigned int bits;
591
    unsigned int i, j, k, m;
592
    unsigned int blockSize, blockEnd;
593

    
594
    double tr, ti;
595

    
596
    if (n < 2) return;
597
    if (n & (n-1)) return;
598

    
599
    double angle = 2.0 * M_PI;
600
    if (inverse) angle = -angle;
601

    
602
    for (i = 0; ; ++i) {
603
        if (n & (1 << i)) {
604
            bits = i;
605
            break;
606
        }
607
    }
608

    
609
    static unsigned int tableSize = 0;
610
    static int *table = 0;
611

    
612
    if (tableSize != n) {
613

    
614
        delete[] table;
615

    
616
        table = new int[n];
617

    
618
        for (i = 0; i < n; ++i) {
619
        
620
            m = i;
621

    
622
            for (j = k = 0; j < bits; ++j) {
623
                k = (k << 1) | (m & 1);
624
                m >>= 1;
625
            }
626

    
627
            table[i] = k;
628
        }
629

    
630
        tableSize = n;
631
    }
632

    
633
    if (ii) {
634
        for (i = 0; i < n; ++i) {
635
            ro[table[i]] = ri[i];
636
            io[table[i]] = ii[i];
637
        }
638
    } else {
639
        for (i = 0; i < n; ++i) {
640
            ro[table[i]] = ri[i];
641
            io[table[i]] = 0.0;
642
        }
643
    }
644

    
645
    blockEnd = 1;
646

    
647
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
648

    
649
        double delta = angle / (double)blockSize;
650
        double sm2 = -sin(-2 * delta);
651
        double sm1 = -sin(-delta);
652
        double cm2 = cos(-2 * delta);
653
        double cm1 = cos(-delta);
654
        double w = 2 * cm1;
655
        double ar[3], ai[3];
656

    
657
        for (i = 0; i < n; i += blockSize) {
658

    
659
            ar[2] = cm2;
660
            ar[1] = cm1;
661

    
662
            ai[2] = sm2;
663
            ai[1] = sm1;
664

    
665
            for (j = i, m = 0; m < blockEnd; j++, m++) {
666

    
667
                ar[0] = w * ar[1] - ar[2];
668
                ar[2] = ar[1];
669
                ar[1] = ar[0];
670

    
671
                ai[0] = w * ai[1] - ai[2];
672
                ai[2] = ai[1];
673
                ai[1] = ai[0];
674

    
675
                k = j + blockEnd;
676
                tr = ar[0] * ro[k] - ai[0] * io[k];
677
                ti = ar[0] * io[k] + ai[0] * ro[k];
678

    
679
                ro[k] = ro[j] - tr;
680
                io[k] = io[j] - ti;
681

    
682
                ro[j] += tr;
683
                io[j] += ti;
684
            }
685
        }
686

    
687
        blockEnd = blockSize;
688
    }
689
}
690

    
691