To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 11:c938c60de2de

History | View | Annotate | Download (14.2 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis()
36
{
37
    m_state = New;
38
    m_age = 0;
39
}
40

    
41
CepstrumPitchTracker::Hypothesis::~Hypothesis()
42
{
43
}
44

    
45
bool
46
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
47
{
48
    if (m_pending.empty()) {
49
        return true;
50
    }
51
    Estimate last = m_pending[m_pending.size()-1];
52
    double r = s.freq / last.freq;
53
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
54
    return (cents > -200 && cents < 200);
55
}
56

    
57
bool 
58
CepstrumPitchTracker::Hypothesis::isSatisfied()
59
{
60
    return (m_pending.size() > 2);
61
}
62

    
63
void
64
CepstrumPitchTracker::Hypothesis::advanceTime()
65
{
66
    ++m_age;
67
}
68

    
69
bool
70
CepstrumPitchTracker::Hypothesis::test(Estimate s)
71
{
72
    bool accept = false;
73

    
74
    switch (m_state) {
75

    
76
    case New:
77
        m_state = Provisional;
78
        accept = true;
79
        break;
80

    
81
    case Provisional:
82
        if (m_age > 3) {
83
            m_state = Rejected;
84
        } else if (isWithinTolerance(s)) {
85
            accept = true;
86
        }
87
        break;
88
        
89
    case Satisfied:
90
        if (m_age > 3) {
91
            m_state = Expired;
92
        } else if (isWithinTolerance(s)) {
93
            accept = true;
94
        }
95
        break;
96

    
97
    case Rejected:
98
        break;
99

    
100
    case Expired:
101
        break;
102
    }
103

    
104
    if (accept) {
105
        m_pending.push_back(s);
106
        m_age = 0;
107
        if (m_state == Provisional && isSatisfied()) {
108
            m_state = Satisfied;
109
        }
110
    }
111

    
112
    return accept;
113
}        
114

    
115
CepstrumPitchTracker::Hypothesis::State
116
CepstrumPitchTracker::Hypothesis::getState()
117
{
118
    return m_state;
119
}
120

    
121
CepstrumPitchTracker::Hypothesis::Estimates
122
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
123
{
124
    if (m_state == Satisfied || m_state == Expired) {
125
        return m_pending;
126
    } else {
127
        return Estimates();
128
    }
129
}
130

    
131
void
132
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureList &fl)
133
{
134
    for (int i = 0; i < m_pending.size(); ++i) {
135
        Feature f;
136
        f.hasTimestamp = true;
137
        f.timestamp = m_pending[i].time;
138
        f.values.push_back(m_pending[i].freq);
139
        fl.push_back(f);
140
    }
141
}
142

    
143
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
144
    Plugin(inputSampleRate),
145
    m_channels(0),
146
    m_stepSize(256),
147
    m_blockSize(1024),
148
    m_fmin(50),
149
    m_fmax(1000),
150
    m_histlen(1),
151
    m_vflen(3),
152
    m_binFrom(0),
153
    m_binTo(0),
154
    m_bins(0),
155
    m_history(0),
156
    m_prevpeak(0),
157
    m_prevprop(0)
158
{
159
}
160

    
161
CepstrumPitchTracker::~CepstrumPitchTracker()
162
{
163
    if (m_history) {
164
        for (int i = 0; i < m_histlen; ++i) {
165
            delete[] m_history[i];
166
        }
167
        delete[] m_history;
168
    }
169
}
170

    
171
string
172
CepstrumPitchTracker::getIdentifier() const
173
{
174
    return "cepstrum-pitch";
175
}
176

    
177
string
178
CepstrumPitchTracker::getName() const
179
{
180
    return "Cepstrum Pitch Tracker";
181
}
182

    
183
string
184
CepstrumPitchTracker::getDescription() const
185
{
186
    return "Estimate f0 of monophonic material using a cepstrum method.";
187
}
188

    
189
string
190
CepstrumPitchTracker::getMaker() const
191
{
192
    return "Chris Cannam";
193
}
194

    
195
int
196
CepstrumPitchTracker::getPluginVersion() const
197
{
198
    // Increment this each time you release a version that behaves
199
    // differently from the previous one
200
    return 1;
201
}
202

    
203
string
204
CepstrumPitchTracker::getCopyright() const
205
{
206
    return "Freely redistributable (BSD license)";
207
}
208

    
209
CepstrumPitchTracker::InputDomain
210
CepstrumPitchTracker::getInputDomain() const
211
{
212
    return FrequencyDomain;
213
}
214

    
215
size_t
216
CepstrumPitchTracker::getPreferredBlockSize() const
217
{
218
    return 1024;
219
}
220

    
221
size_t 
222
CepstrumPitchTracker::getPreferredStepSize() const
223
{
224
    return 256;
225
}
226

    
227
size_t
228
CepstrumPitchTracker::getMinChannelCount() const
229
{
230
    return 1;
231
}
232

    
233
size_t
234
CepstrumPitchTracker::getMaxChannelCount() const
235
{
236
    return 1;
237
}
238

    
239
CepstrumPitchTracker::ParameterList
240
CepstrumPitchTracker::getParameterDescriptors() const
241
{
242
    ParameterList list;
243
    return list;
244
}
245

    
246
float
247
CepstrumPitchTracker::getParameter(string identifier) const
248
{
249
    return 0.f;
250
}
251

    
252
void
253
CepstrumPitchTracker::setParameter(string identifier, float value) 
254
{
255
}
256

    
257
CepstrumPitchTracker::ProgramList
258
CepstrumPitchTracker::getPrograms() const
259
{
260
    ProgramList list;
261
    return list;
262
}
263

    
264
string
265
CepstrumPitchTracker::getCurrentProgram() const
266
{
267
    return ""; // no programs
268
}
269

    
270
void
271
CepstrumPitchTracker::selectProgram(string name)
272
{
273
}
274

    
275
CepstrumPitchTracker::OutputList
276
CepstrumPitchTracker::getOutputDescriptors() const
277
{
278
    OutputList outputs;
279

    
280
    int n = 0;
281

    
282
    OutputDescriptor d;
283

    
284
    d.identifier = "f0";
285
    d.name = "Estimated f0";
286
    d.description = "Estimated fundamental frequency";
287
    d.unit = "Hz";
288
    d.hasFixedBinCount = true;
289
    d.binCount = 1;
290
    d.hasKnownExtents = true;
291
    d.minValue = m_fmin;
292
    d.maxValue = m_fmax;
293
    d.isQuantized = false;
294
    d.sampleType = OutputDescriptor::FixedSampleRate;
295
    d.sampleRate = (m_inputSampleRate / m_stepSize);
296
    d.hasDuration = false;
297
    outputs.push_back(d);
298

    
299
    return outputs;
300
}
301

    
302
bool
303
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
304
{
305
    if (channels < getMinChannelCount() ||
306
        channels > getMaxChannelCount()) return false;
307

    
308
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
309
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
310
//              << std::endl;
311

    
312
    m_channels = channels;
313
    m_stepSize = stepSize;
314
    m_blockSize = blockSize;
315

    
316
    m_binFrom = int(m_inputSampleRate / m_fmax);
317
    m_binTo = int(m_inputSampleRate / m_fmin); 
318

    
319
    if (m_binTo >= (int)m_blockSize / 2) {
320
        m_binTo = m_blockSize / 2 - 1;
321
    }
322

    
323
    m_bins = (m_binTo - m_binFrom) + 1;
324

    
325
    m_history = new double *[m_histlen];
326
    for (int i = 0; i < m_histlen; ++i) {
327
        m_history[i] = new double[m_bins];
328
    }
329

    
330
    reset();
331

    
332
    return true;
333
}
334

    
335
void
336
CepstrumPitchTracker::reset()
337
{
338
    for (int i = 0; i < m_histlen; ++i) {
339
        for (int j = 0; j < m_bins; ++j) {
340
            m_history[i][j] = 0.0;
341
        }
342
    }
343
}
344

    
345
void
346
CepstrumPitchTracker::filter(const double *cep, double *result)
347
{
348
    int hix = m_histlen - 1; // current history index
349

    
350
    // roll back the history
351
    if (m_histlen > 1) {
352
        double *oldest = m_history[0];
353
        for (int i = 1; i < m_histlen; ++i) {
354
            m_history[i-1] = m_history[i];
355
        }
356
        // and stick this back in the newest spot, to recycle
357
        m_history[hix] = oldest;
358
    }
359

    
360
    for (int i = 0; i < m_bins; ++i) {
361
        double v = 0;
362
        int n = 0;
363
        // average according to the vertical filter length
364
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
365
            int ix = i + m_binFrom + j;
366
            if (ix >= 0 && ix < m_blockSize) {
367
                v += cep[ix];
368
                ++n;
369
            }
370
        }
371
        m_history[hix][i] = v / n;
372
    }
373

    
374
    for (int i = 0; i < m_bins; ++i) {
375
        double mean = 0.0;
376
        for (int j = 0; j < m_histlen; ++j) {
377
            mean += m_history[j][i];
378
        }
379
        mean /= m_histlen;
380
        result[i] = mean;
381
    }
382
}
383

    
384
double
385
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
386
{
387
    double aroundPeak = data[n];
388
    double peakProportion = 0.0;
389

    
390
    int i = n - 1;
391
    while (i > 0 && data[i] <= data[i+1]) {
392
        aroundPeak += fabs(data[i]);
393
        --i;
394
    }
395
    i = n + 1;
396
    while (i < m_bins && data[i] <= data[i-1]) {
397
        aroundPeak += fabs(data[i]);
398
        ++i;
399
    }
400
    peakProportion = aroundPeak / abstot;
401

    
402
    return peakProportion;
403
}
404

    
405
bool
406
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
407
{
408
    bool accept = false;
409

    
410
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
411
        accept = true;
412
    } else if (peakProportion > m_prevprop * 2) {
413
        accept = true;
414
    }
415

    
416
    return accept;
417
}
418

    
419
CepstrumPitchTracker::FeatureSet
420
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
421
{
422
    FeatureSet fs;
423

    
424
    int bs = m_blockSize;
425
    int hs = m_blockSize/2 + 1;
426

    
427
    double *rawcep = new double[bs];
428
    double *io = new double[bs];
429
    double *logmag = new double[bs];
430

    
431
    // The "inverse symmetric" method. Seems to be the most reliable
432
        
433
    for (int i = 0; i < hs; ++i) {
434

    
435
        double power =
436
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
437
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
438
        double mag = sqrt(power);
439
        
440
        double lm = log(mag + 0.00000001);
441
        
442
        logmag[i] = lm;
443
        if (i > 0) logmag[bs - i] = lm;
444
    }
445

    
446
    fft(bs, true, logmag, 0, rawcep, io);
447
    
448
    delete[] logmag;
449
    delete[] io;
450

    
451
    int n = m_bins;
452
    double *data = new double[n];
453
    filter(rawcep, data);
454
    delete[] rawcep;
455

    
456
    double abstot = 0.0;
457

    
458
    for (int i = 0; i < n; ++i) {
459
        abstot += fabs(data[i]);
460
    }
461

    
462
    double maxval = 0.0;
463
    int maxbin = -1;
464

    
465
    for (int i = 0; i < n; ++i) {
466
        if (data[i] > maxval) {
467
            maxval = data[i];
468
            maxbin = i;
469
        }
470
    }
471

    
472
    if (maxbin < 0) return fs;
473

    
474
    double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
475
    Hypothesis::Estimate e;
476
    e.freq = peakfreq;
477
    e.time = timestamp;
478

    
479
    m_accepted.advanceTime();
480

    
481
    for (int i = 0; i < m_possible.size(); ++i) {
482
        m_possible[i].advanceTime();
483
    }
484

    
485
    if (!m_accepted.test(e)) {
486
        int candidate = -1;
487
        for (int i = 0; i < m_possible.size(); ++i) {
488
            if (m_possible[i].test(e)) {
489
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
490
                    candidate = i;
491
                }
492
                break;
493
            }
494
        }
495
        if (m_accepted.getState() == Hypothesis::Expired) {
496
            m_accepted.addFeatures(fs[0]);
497
            if (candidate >= 0) {
498
                m_accepted = m_possible[candidate];
499
            } else {
500
                m_accepted = Hypothesis();
501
            }
502
        }
503

    
504
        //!!! and also need to reap rejected/expired hypotheses from the list
505
    }  
506
            
507

    
508
/*
509
    bool accepted = false;
510

511
    if (maxbin >= 0) {
512
        double pp = calculatePeakProportion(data, abstot, maxbin);
513
        if (acceptPeak(maxbin, pp)) {
514
            accepted = true;
515
        } else {
516
            // try a secondary peak
517
            maxval = 0.0;
518
            int secondbin = 0;
519
            for (int i = 1; i < n-1; ++i) {
520
                if (i != maxbin &&
521
                    data[i] > data[i-1] &&
522
                    data[i] > data[i+1] &&
523
                    data[i] > maxval) {
524
                    maxval = data[i];
525
                    secondbin = i;
526
                }
527
            }
528
            double spp = calculatePeakProportion(data, abstot, secondbin);
529
            if (acceptPeak(secondbin, spp)) {
530
                maxbin = secondbin;
531
                pp = spp;
532
                accepted = true;
533
            }
534
        }
535
        if (accepted) {
536
            m_prevpeak = maxbin;
537
            m_prevprop = pp;
538
        }
539
    }
540
*/
541
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
542
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
543
//    std::cerr << "bins = " << m_bins << std::endl;
544

    
545
//    if (peakProportion >= (0.00006 * m_bins)) {
546
/*
547
    if (accepted) {
548
        Feature f;
549
        f.hasTimestamp = true;
550
        f.timestamp = timestamp;
551
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
552
        fs[0].push_back(f);
553
    }
554
*/
555
    delete[] data;
556
    return fs;
557
}
558

    
559
CepstrumPitchTracker::FeatureSet
560
CepstrumPitchTracker::getRemainingFeatures()
561
{
562
    FeatureSet fs;
563
    if (m_accepted.getState() != Hypothesis::New) {
564
        m_accepted.addFeatures(fs[0]);
565
    }
566
    return fs;
567
}
568

    
569
void
570
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
571
                    double *ri, double *ii, double *ro, double *io)
572
{
573
    if (!ri || !ro || !io) return;
574

    
575
    unsigned int bits;
576
    unsigned int i, j, k, m;
577
    unsigned int blockSize, blockEnd;
578

    
579
    double tr, ti;
580

    
581
    if (n < 2) return;
582
    if (n & (n-1)) return;
583

    
584
    double angle = 2.0 * M_PI;
585
    if (inverse) angle = -angle;
586

    
587
    for (i = 0; ; ++i) {
588
        if (n & (1 << i)) {
589
            bits = i;
590
            break;
591
        }
592
    }
593

    
594
    static unsigned int tableSize = 0;
595
    static int *table = 0;
596

    
597
    if (tableSize != n) {
598

    
599
        delete[] table;
600

    
601
        table = new int[n];
602

    
603
        for (i = 0; i < n; ++i) {
604
        
605
            m = i;
606

    
607
            for (j = k = 0; j < bits; ++j) {
608
                k = (k << 1) | (m & 1);
609
                m >>= 1;
610
            }
611

    
612
            table[i] = k;
613
        }
614

    
615
        tableSize = n;
616
    }
617

    
618
    if (ii) {
619
        for (i = 0; i < n; ++i) {
620
            ro[table[i]] = ri[i];
621
            io[table[i]] = ii[i];
622
        }
623
    } else {
624
        for (i = 0; i < n; ++i) {
625
            ro[table[i]] = ri[i];
626
            io[table[i]] = 0.0;
627
        }
628
    }
629

    
630
    blockEnd = 1;
631

    
632
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
633

    
634
        double delta = angle / (double)blockSize;
635
        double sm2 = -sin(-2 * delta);
636
        double sm1 = -sin(-delta);
637
        double cm2 = cos(-2 * delta);
638
        double cm1 = cos(-delta);
639
        double w = 2 * cm1;
640
        double ar[3], ai[3];
641

    
642
        for (i = 0; i < n; i += blockSize) {
643

    
644
            ar[2] = cm2;
645
            ar[1] = cm1;
646

    
647
            ai[2] = sm2;
648
            ai[1] = sm1;
649

    
650
            for (j = i, m = 0; m < blockEnd; j++, m++) {
651

    
652
                ar[0] = w * ar[1] - ar[2];
653
                ar[2] = ar[1];
654
                ar[1] = ar[0];
655

    
656
                ai[0] = w * ai[1] - ai[2];
657
                ai[2] = ai[1];
658
                ai[1] = ai[0];
659

    
660
                k = j + blockEnd;
661
                tr = ar[0] * ro[k] - ai[0] * io[k];
662
                ti = ar[0] * io[k] + ai[0] * ro[k];
663

    
664
                ro[k] = ro[j] - tr;
665
                io[k] = io[j] - ti;
666

    
667
                ro[j] += tr;
668
                io[j] += ti;
669
            }
670
        }
671

    
672
        blockEnd = blockSize;
673
    }
674
}
675

    
676