To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 13:6f73de098d35

History | View | Annotate | Download (14.9 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis()
36
{
37
    m_state = New;
38
    m_age = 0;
39
}
40

    
41
CepstrumPitchTracker::Hypothesis::~Hypothesis()
42
{
43
}
44

    
45
bool
46
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
47
{
48
    if (m_pending.empty()) {
49
        return true;
50
    }
51
    Estimate last = m_pending[m_pending.size()-1];
52
    double r = s.freq / last.freq;
53
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
54
    return (cents > -200 && cents < 200);
55
}
56

    
57
bool 
58
CepstrumPitchTracker::Hypothesis::isSatisfied()
59
{
60
    return (m_pending.size() > 2);
61
}
62

    
63
void
64
CepstrumPitchTracker::Hypothesis::advanceTime()
65
{
66
    ++m_age;
67
}
68

    
69
bool
70
CepstrumPitchTracker::Hypothesis::test(Estimate s)
71
{
72
    bool accept = false;
73

    
74
    switch (m_state) {
75

    
76
    case New:
77
        m_state = Provisional;
78
        accept = true;
79
        break;
80

    
81
    case Provisional:
82
        if (m_age > 3) {
83
            m_state = Rejected;
84
        } else if (isWithinTolerance(s)) {
85
            accept = true;
86
        }
87
        break;
88
        
89
    case Satisfied:
90
        if (m_age > 3) {
91
            m_state = Expired;
92
        } else if (isWithinTolerance(s)) {
93
            accept = true;
94
        }
95
        break;
96

    
97
    case Rejected:
98
        break;
99

    
100
    case Expired:
101
        break;
102
    }
103

    
104
    if (accept) {
105
        m_pending.push_back(s);
106
        m_age = 0;
107
        if (m_state == Provisional && isSatisfied()) {
108
            m_state = Satisfied;
109
        }
110
    }
111

    
112
    return accept;
113
}        
114

    
115
CepstrumPitchTracker::Hypothesis::State
116
CepstrumPitchTracker::Hypothesis::getState()
117
{
118
    return m_state;
119
}
120

    
121
int
122
CepstrumPitchTracker::Hypothesis::getPendingLength()
123
{
124
    return m_pending.size();
125
}
126

    
127
CepstrumPitchTracker::Hypothesis::Estimates
128
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
129
{
130
    if (m_state == Satisfied || m_state == Expired) {
131
        return m_pending;
132
    } else {
133
        return Estimates();
134
    }
135
}
136

    
137
void
138
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureList &fl)
139
{
140
    for (int i = 0; i < m_pending.size(); ++i) {
141
        Feature f;
142
        f.hasTimestamp = true;
143
        f.timestamp = m_pending[i].time;
144
        f.values.push_back(m_pending[i].freq);
145
        fl.push_back(f);
146
    }
147
}
148

    
149
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
150
    Plugin(inputSampleRate),
151
    m_channels(0),
152
    m_stepSize(256),
153
    m_blockSize(1024),
154
    m_fmin(50),
155
    m_fmax(1000),
156
    m_histlen(1),
157
    m_vflen(3),
158
    m_binFrom(0),
159
    m_binTo(0),
160
    m_bins(0),
161
    m_history(0),
162
    m_prevpeak(0),
163
    m_prevprop(0)
164
{
165
}
166

    
167
CepstrumPitchTracker::~CepstrumPitchTracker()
168
{
169
    if (m_history) {
170
        for (int i = 0; i < m_histlen; ++i) {
171
            delete[] m_history[i];
172
        }
173
        delete[] m_history;
174
    }
175
}
176

    
177
string
178
CepstrumPitchTracker::getIdentifier() const
179
{
180
    return "cepstrum-pitch";
181
}
182

    
183
string
184
CepstrumPitchTracker::getName() const
185
{
186
    return "Cepstrum Pitch Tracker";
187
}
188

    
189
string
190
CepstrumPitchTracker::getDescription() const
191
{
192
    return "Estimate f0 of monophonic material using a cepstrum method.";
193
}
194

    
195
string
196
CepstrumPitchTracker::getMaker() const
197
{
198
    return "Chris Cannam";
199
}
200

    
201
int
202
CepstrumPitchTracker::getPluginVersion() const
203
{
204
    // Increment this each time you release a version that behaves
205
    // differently from the previous one
206
    return 1;
207
}
208

    
209
string
210
CepstrumPitchTracker::getCopyright() const
211
{
212
    return "Freely redistributable (BSD license)";
213
}
214

    
215
CepstrumPitchTracker::InputDomain
216
CepstrumPitchTracker::getInputDomain() const
217
{
218
    return FrequencyDomain;
219
}
220

    
221
size_t
222
CepstrumPitchTracker::getPreferredBlockSize() const
223
{
224
    return 1024;
225
}
226

    
227
size_t 
228
CepstrumPitchTracker::getPreferredStepSize() const
229
{
230
    return 256;
231
}
232

    
233
size_t
234
CepstrumPitchTracker::getMinChannelCount() const
235
{
236
    return 1;
237
}
238

    
239
size_t
240
CepstrumPitchTracker::getMaxChannelCount() const
241
{
242
    return 1;
243
}
244

    
245
CepstrumPitchTracker::ParameterList
246
CepstrumPitchTracker::getParameterDescriptors() const
247
{
248
    ParameterList list;
249
    return list;
250
}
251

    
252
float
253
CepstrumPitchTracker::getParameter(string identifier) const
254
{
255
    return 0.f;
256
}
257

    
258
void
259
CepstrumPitchTracker::setParameter(string identifier, float value) 
260
{
261
}
262

    
263
CepstrumPitchTracker::ProgramList
264
CepstrumPitchTracker::getPrograms() const
265
{
266
    ProgramList list;
267
    return list;
268
}
269

    
270
string
271
CepstrumPitchTracker::getCurrentProgram() const
272
{
273
    return ""; // no programs
274
}
275

    
276
void
277
CepstrumPitchTracker::selectProgram(string name)
278
{
279
}
280

    
281
CepstrumPitchTracker::OutputList
282
CepstrumPitchTracker::getOutputDescriptors() const
283
{
284
    OutputList outputs;
285

    
286
    int n = 0;
287

    
288
    OutputDescriptor d;
289

    
290
    d.identifier = "f0";
291
    d.name = "Estimated f0";
292
    d.description = "Estimated fundamental frequency";
293
    d.unit = "Hz";
294
    d.hasFixedBinCount = true;
295
    d.binCount = 1;
296
    d.hasKnownExtents = true;
297
    d.minValue = m_fmin;
298
    d.maxValue = m_fmax;
299
    d.isQuantized = false;
300
    d.sampleType = OutputDescriptor::FixedSampleRate;
301
    d.sampleRate = (m_inputSampleRate / m_stepSize);
302
    d.hasDuration = false;
303
    outputs.push_back(d);
304

    
305
    return outputs;
306
}
307

    
308
bool
309
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
310
{
311
    if (channels < getMinChannelCount() ||
312
        channels > getMaxChannelCount()) return false;
313

    
314
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
315
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
316
//              << std::endl;
317

    
318
    m_channels = channels;
319
    m_stepSize = stepSize;
320
    m_blockSize = blockSize;
321

    
322
    m_binFrom = int(m_inputSampleRate / m_fmax);
323
    m_binTo = int(m_inputSampleRate / m_fmin); 
324

    
325
    if (m_binTo >= (int)m_blockSize / 2) {
326
        m_binTo = m_blockSize / 2 - 1;
327
    }
328

    
329
    m_bins = (m_binTo - m_binFrom) + 1;
330

    
331
    m_history = new double *[m_histlen];
332
    for (int i = 0; i < m_histlen; ++i) {
333
        m_history[i] = new double[m_bins];
334
    }
335

    
336
    reset();
337

    
338
    return true;
339
}
340

    
341
void
342
CepstrumPitchTracker::reset()
343
{
344
    for (int i = 0; i < m_histlen; ++i) {
345
        for (int j = 0; j < m_bins; ++j) {
346
            m_history[i][j] = 0.0;
347
        }
348
    }
349
}
350

    
351
void
352
CepstrumPitchTracker::filter(const double *cep, double *result)
353
{
354
    int hix = m_histlen - 1; // current history index
355

    
356
    // roll back the history
357
    if (m_histlen > 1) {
358
        double *oldest = m_history[0];
359
        for (int i = 1; i < m_histlen; ++i) {
360
            m_history[i-1] = m_history[i];
361
        }
362
        // and stick this back in the newest spot, to recycle
363
        m_history[hix] = oldest;
364
    }
365

    
366
    for (int i = 0; i < m_bins; ++i) {
367
        double v = 0;
368
        int n = 0;
369
        // average according to the vertical filter length
370
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
371
            int ix = i + m_binFrom + j;
372
            if (ix >= 0 && ix < m_blockSize) {
373
                v += cep[ix];
374
                ++n;
375
            }
376
        }
377
        m_history[hix][i] = v / n;
378
    }
379

    
380
    for (int i = 0; i < m_bins; ++i) {
381
        double mean = 0.0;
382
        for (int j = 0; j < m_histlen; ++j) {
383
            mean += m_history[j][i];
384
        }
385
        mean /= m_histlen;
386
        result[i] = mean;
387
    }
388
}
389

    
390
double
391
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
392
{
393
    double aroundPeak = data[n];
394
    double peakProportion = 0.0;
395

    
396
    int i = n - 1;
397
    while (i > 0 && data[i] <= data[i+1]) {
398
        aroundPeak += fabs(data[i]);
399
        --i;
400
    }
401
    i = n + 1;
402
    while (i < m_bins && data[i] <= data[i-1]) {
403
        aroundPeak += fabs(data[i]);
404
        ++i;
405
    }
406
    peakProportion = aroundPeak / abstot;
407

    
408
    return peakProportion;
409
}
410

    
411
bool
412
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
413
{
414
    bool accept = false;
415

    
416
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
417
        accept = true;
418
    } else if (peakProportion > m_prevprop * 2) {
419
        accept = true;
420
    }
421

    
422
    return accept;
423
}
424

    
425
CepstrumPitchTracker::FeatureSet
426
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
427
{
428
    FeatureSet fs;
429

    
430
    int bs = m_blockSize;
431
    int hs = m_blockSize/2 + 1;
432

    
433
    double *rawcep = new double[bs];
434
    double *io = new double[bs];
435
    double *logmag = new double[bs];
436

    
437
    // The "inverse symmetric" method. Seems to be the most reliable
438
        
439
    for (int i = 0; i < hs; ++i) {
440

    
441
        double power =
442
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
443
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
444
        double mag = sqrt(power);
445
        
446
        double lm = log(mag + 0.00000001);
447
        
448
        logmag[i] = lm;
449
        if (i > 0) logmag[bs - i] = lm;
450
    }
451

    
452
    fft(bs, true, logmag, 0, rawcep, io);
453
    
454
    delete[] logmag;
455
    delete[] io;
456

    
457
    int n = m_bins;
458
    double *data = new double[n];
459
    filter(rawcep, data);
460
    delete[] rawcep;
461

    
462
    double abstot = 0.0;
463

    
464
    for (int i = 0; i < n; ++i) {
465
        abstot += fabs(data[i]);
466
    }
467

    
468
    double maxval = 0.0;
469
    int maxbin = -1;
470

    
471
    for (int i = 0; i < n; ++i) {
472
        if (data[i] > maxval) {
473
            maxval = data[i];
474
            maxbin = i;
475
        }
476
    }
477

    
478
    if (maxbin < 0) return fs;
479

    
480
    double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
481
    Hypothesis::Estimate e;
482
    e.freq = peakfreq;
483
    e.time = timestamp;
484

    
485
    m_accepted.advanceTime();
486

    
487
    for (int i = 0; i < m_possible.size(); ++i) {
488
        m_possible[i].advanceTime();
489
    }
490

    
491
    if (!m_accepted.test(e)) {
492

    
493
        int candidate = -1;
494
        bool accepted = false;
495

    
496
        for (int i = 0; i < m_possible.size(); ++i) {
497
            if (m_possible[i].test(e)) {
498
                accepted = true;
499
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
500
                    candidate = i;
501
                }
502
                break;
503
            }
504
        }
505

    
506
        if (!accepted) {
507
            Hypothesis h;
508
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
509
            m_possible.push_back(h);
510
        }
511

    
512
        if (m_accepted.getState() == Hypothesis::Expired) {
513
            m_accepted.addFeatures(fs[0]);
514
        }
515
        
516
        if (m_accepted.getState() == Hypothesis::Expired ||
517
            m_accepted.getState() == Hypothesis::Rejected) {
518
            if (candidate >= 0) {
519
                m_accepted = m_possible[candidate];
520
            } else {
521
                m_accepted = Hypothesis();
522
            }
523
        }
524

    
525
        std::cerr << "accepted length = " << m_accepted.getPendingLength()
526
                  << ", state = " << m_accepted.getState()
527
                  << ", hypothesis count = " << m_possible.size() << std::endl;
528

    
529
        //!!! and also need to reap rejected/expired hypotheses from the list
530
    }  
531
            
532

    
533
/*
534
    bool accepted = false;
535

536
    if (maxbin >= 0) {
537
        double pp = calculatePeakProportion(data, abstot, maxbin);
538
        if (acceptPeak(maxbin, pp)) {
539
            accepted = true;
540
        } else {
541
            // try a secondary peak
542
            maxval = 0.0;
543
            int secondbin = 0;
544
            for (int i = 1; i < n-1; ++i) {
545
                if (i != maxbin &&
546
                    data[i] > data[i-1] &&
547
                    data[i] > data[i+1] &&
548
                    data[i] > maxval) {
549
                    maxval = data[i];
550
                    secondbin = i;
551
                }
552
            }
553
            double spp = calculatePeakProportion(data, abstot, secondbin);
554
            if (acceptPeak(secondbin, spp)) {
555
                maxbin = secondbin;
556
                pp = spp;
557
                accepted = true;
558
            }
559
        }
560
        if (accepted) {
561
            m_prevpeak = maxbin;
562
            m_prevprop = pp;
563
        }
564
    }
565
*/
566
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
567
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
568
//    std::cerr << "bins = " << m_bins << std::endl;
569

    
570
//    if (peakProportion >= (0.00006 * m_bins)) {
571
/*
572
    if (accepted) {
573
        Feature f;
574
        f.hasTimestamp = true;
575
        f.timestamp = timestamp;
576
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
577
        fs[0].push_back(f);
578
    }
579
*/
580
    delete[] data;
581
    return fs;
582
}
583

    
584
CepstrumPitchTracker::FeatureSet
585
CepstrumPitchTracker::getRemainingFeatures()
586
{
587
    FeatureSet fs;
588
    if (m_accepted.getState() != Hypothesis::New) {
589
        m_accepted.addFeatures(fs[0]);
590
    }
591
    return fs;
592
}
593

    
594
void
595
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
596
                    double *ri, double *ii, double *ro, double *io)
597
{
598
    if (!ri || !ro || !io) return;
599

    
600
    unsigned int bits;
601
    unsigned int i, j, k, m;
602
    unsigned int blockSize, blockEnd;
603

    
604
    double tr, ti;
605

    
606
    if (n < 2) return;
607
    if (n & (n-1)) return;
608

    
609
    double angle = 2.0 * M_PI;
610
    if (inverse) angle = -angle;
611

    
612
    for (i = 0; ; ++i) {
613
        if (n & (1 << i)) {
614
            bits = i;
615
            break;
616
        }
617
    }
618

    
619
    static unsigned int tableSize = 0;
620
    static int *table = 0;
621

    
622
    if (tableSize != n) {
623

    
624
        delete[] table;
625

    
626
        table = new int[n];
627

    
628
        for (i = 0; i < n; ++i) {
629
        
630
            m = i;
631

    
632
            for (j = k = 0; j < bits; ++j) {
633
                k = (k << 1) | (m & 1);
634
                m >>= 1;
635
            }
636

    
637
            table[i] = k;
638
        }
639

    
640
        tableSize = n;
641
    }
642

    
643
    if (ii) {
644
        for (i = 0; i < n; ++i) {
645
            ro[table[i]] = ri[i];
646
            io[table[i]] = ii[i];
647
        }
648
    } else {
649
        for (i = 0; i < n; ++i) {
650
            ro[table[i]] = ri[i];
651
            io[table[i]] = 0.0;
652
        }
653
    }
654

    
655
    blockEnd = 1;
656

    
657
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
658

    
659
        double delta = angle / (double)blockSize;
660
        double sm2 = -sin(-2 * delta);
661
        double sm1 = -sin(-delta);
662
        double cm2 = cos(-2 * delta);
663
        double cm1 = cos(-delta);
664
        double w = 2 * cm1;
665
        double ar[3], ai[3];
666

    
667
        for (i = 0; i < n; i += blockSize) {
668

    
669
            ar[2] = cm2;
670
            ar[1] = cm1;
671

    
672
            ai[2] = sm2;
673
            ai[1] = sm1;
674

    
675
            for (j = i, m = 0; m < blockEnd; j++, m++) {
676

    
677
                ar[0] = w * ar[1] - ar[2];
678
                ar[2] = ar[1];
679
                ar[1] = ar[0];
680

    
681
                ai[0] = w * ai[1] - ai[2];
682
                ai[2] = ai[1];
683
                ai[1] = ai[0];
684

    
685
                k = j + blockEnd;
686
                tr = ar[0] * ro[k] - ai[0] * io[k];
687
                ti = ar[0] * io[k] + ai[0] * ro[k];
688

    
689
                ro[k] = ro[j] - tr;
690
                io[k] = io[j] - ti;
691

    
692
                ro[j] += tr;
693
                io[j] += ti;
694
            }
695
        }
696

    
697
        blockEnd = blockSize;
698
    }
699
}
700

    
701