To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 8:e9d86e129467

History | View | Annotate | Download (13.2 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis()
36
{
37
    m_state = New;
38
    m_age = 0;
39
}
40

    
41
bool
42
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
43
{
44
    if (m_pending.empty()) {
45
        return true;
46
    }
47
    Estimate last = m_pending[m_pending.size()-1];
48
    double r = s.freq / last.freq;
49
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
50
    return (cents > -200 && cents < 200);
51
}
52

    
53
bool 
54
CepstrumPitchTracker::Hypothesis::isSatisfied()
55
{
56
    return (m_pending.size() > 2);
57
}
58

    
59
void
60
CepstrumPitchTracker::Hypothesis::advanceTime()
61
{
62
    ++m_age;
63
}
64

    
65
bool
66
CepstrumPitchTracker::Hypothesis::test(Estimate s)
67
{
68
    bool accept = false;
69

    
70
    switch (m_state) {
71

    
72
    case New:
73
        m_state = Provisional;
74
        accept = true;
75
        break;
76

    
77
    case Provisional:
78
        if (m_age > 3) {
79
            m_state = Rejected;
80
        } else if (isWithinTolerance(s)) {
81
            accept = true;
82
        }
83
        break;
84
        
85
    case Satisfied:
86
        if (m_age > 3) {
87
            m_state = Expired;
88
        } else if (isWithinTolerance(s)) {
89
            accept = true;
90
        }
91
        break;
92

    
93
    case Rejected:
94
        break;
95

    
96
    case Expired:
97
        break;
98
    }
99

    
100
    if (accept) {
101
        m_pending.push_back(s);
102
        m_age = 0;
103
        if (m_state == Provisional && isSatisfied()) {
104
            m_state = Satisfied;
105
        }
106
    }
107

    
108
    return accept;
109
}        
110

    
111
CepstrumPitchTracker::Hypothesis::State
112
CepstrumPitchTracker::Hypothesis::getState()
113
{
114
    return m_state;
115
}
116

    
117
CepstrumPitchTracker::Hypothesis::Estimates
118
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
119
{
120
    if (m_state == Satisfied || m_state == Expired) {
121
        return m_pending;
122
    } else {
123
        return Estimates();
124
    }
125
}
126

    
127

    
128
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
129
    Plugin(inputSampleRate),
130
    m_channels(0),
131
    m_stepSize(256),
132
    m_blockSize(1024),
133
    m_fmin(50),
134
    m_fmax(1000),
135
    m_histlen(1),
136
    m_vflen(3),
137
    m_binFrom(0),
138
    m_binTo(0),
139
    m_bins(0),
140
    m_history(0),
141
    m_prevpeak(0),
142
    m_prevprop(0)
143
{
144
}
145

    
146
CepstrumPitchTracker::~CepstrumPitchTracker()
147
{
148
    if (m_history) {
149
        for (int i = 0; i < m_histlen; ++i) {
150
            delete[] m_history[i];
151
        }
152
        delete[] m_history;
153
    }
154
}
155

    
156
string
157
CepstrumPitchTracker::getIdentifier() const
158
{
159
    return "cepstrum-pitch";
160
}
161

    
162
string
163
CepstrumPitchTracker::getName() const
164
{
165
    return "Cepstrum Pitch Tracker";
166
}
167

    
168
string
169
CepstrumPitchTracker::getDescription() const
170
{
171
    return "Estimate f0 of monophonic material using a cepstrum method.";
172
}
173

    
174
string
175
CepstrumPitchTracker::getMaker() const
176
{
177
    return "Chris Cannam";
178
}
179

    
180
int
181
CepstrumPitchTracker::getPluginVersion() const
182
{
183
    // Increment this each time you release a version that behaves
184
    // differently from the previous one
185
    return 1;
186
}
187

    
188
string
189
CepstrumPitchTracker::getCopyright() const
190
{
191
    return "Freely redistributable (BSD license)";
192
}
193

    
194
CepstrumPitchTracker::InputDomain
195
CepstrumPitchTracker::getInputDomain() const
196
{
197
    return FrequencyDomain;
198
}
199

    
200
size_t
201
CepstrumPitchTracker::getPreferredBlockSize() const
202
{
203
    return 1024;
204
}
205

    
206
size_t 
207
CepstrumPitchTracker::getPreferredStepSize() const
208
{
209
    return 256;
210
}
211

    
212
size_t
213
CepstrumPitchTracker::getMinChannelCount() const
214
{
215
    return 1;
216
}
217

    
218
size_t
219
CepstrumPitchTracker::getMaxChannelCount() const
220
{
221
    return 1;
222
}
223

    
224
CepstrumPitchTracker::ParameterList
225
CepstrumPitchTracker::getParameterDescriptors() const
226
{
227
    ParameterList list;
228
    return list;
229
}
230

    
231
float
232
CepstrumPitchTracker::getParameter(string identifier) const
233
{
234
    return 0.f;
235
}
236

    
237
void
238
CepstrumPitchTracker::setParameter(string identifier, float value) 
239
{
240
}
241

    
242
CepstrumPitchTracker::ProgramList
243
CepstrumPitchTracker::getPrograms() const
244
{
245
    ProgramList list;
246
    return list;
247
}
248

    
249
string
250
CepstrumPitchTracker::getCurrentProgram() const
251
{
252
    return ""; // no programs
253
}
254

    
255
void
256
CepstrumPitchTracker::selectProgram(string name)
257
{
258
}
259

    
260
CepstrumPitchTracker::OutputList
261
CepstrumPitchTracker::getOutputDescriptors() const
262
{
263
    OutputList outputs;
264

    
265
    int n = 0;
266

    
267
    OutputDescriptor d;
268

    
269
    d.identifier = "f0";
270
    d.name = "Estimated f0";
271
    d.description = "Estimated fundamental frequency";
272
    d.unit = "Hz";
273
    d.hasFixedBinCount = true;
274
    d.binCount = 1;
275
    d.hasKnownExtents = true;
276
    d.minValue = m_fmin;
277
    d.maxValue = m_fmax;
278
    d.isQuantized = false;
279
    d.sampleType = OutputDescriptor::FixedSampleRate;
280
    d.sampleRate = (m_inputSampleRate / m_stepSize);
281
    d.hasDuration = false;
282
    outputs.push_back(d);
283

    
284
    return outputs;
285
}
286

    
287
bool
288
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
289
{
290
    if (channels < getMinChannelCount() ||
291
        channels > getMaxChannelCount()) return false;
292

    
293
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
294
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
295
//              << std::endl;
296

    
297
    m_channels = channels;
298
    m_stepSize = stepSize;
299
    m_blockSize = blockSize;
300

    
301
    m_binFrom = int(m_inputSampleRate / m_fmax);
302
    m_binTo = int(m_inputSampleRate / m_fmin); 
303

    
304
    if (m_binTo >= (int)m_blockSize / 2) {
305
        m_binTo = m_blockSize / 2 - 1;
306
    }
307

    
308
    m_bins = (m_binTo - m_binFrom) + 1;
309

    
310
    m_history = new double *[m_histlen];
311
    for (int i = 0; i < m_histlen; ++i) {
312
        m_history[i] = new double[m_bins];
313
    }
314

    
315
    reset();
316

    
317
    return true;
318
}
319

    
320
void
321
CepstrumPitchTracker::reset()
322
{
323
    for (int i = 0; i < m_histlen; ++i) {
324
        for (int j = 0; j < m_bins; ++j) {
325
            m_history[i][j] = 0.0;
326
        }
327
    }
328
}
329

    
330
void
331
CepstrumPitchTracker::filter(const double *cep, double *result)
332
{
333
    int hix = m_histlen - 1; // current history index
334

    
335
    // roll back the history
336
    if (m_histlen > 1) {
337
        double *oldest = m_history[0];
338
        for (int i = 1; i < m_histlen; ++i) {
339
            m_history[i-1] = m_history[i];
340
        }
341
        // and stick this back in the newest spot, to recycle
342
        m_history[hix] = oldest;
343
    }
344

    
345
    for (int i = 0; i < m_bins; ++i) {
346
        double v = 0;
347
        int n = 0;
348
        // average according to the vertical filter length
349
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
350
            int ix = i + m_binFrom + j;
351
            if (ix >= 0 && ix < m_blockSize) {
352
                v += cep[ix];
353
                ++n;
354
            }
355
        }
356
        m_history[hix][i] = v / n;
357
    }
358

    
359
    for (int i = 0; i < m_bins; ++i) {
360
        double mean = 0.0;
361
        for (int j = 0; j < m_histlen; ++j) {
362
            mean += m_history[j][i];
363
        }
364
        mean /= m_histlen;
365
        result[i] = mean;
366
    }
367
}
368

    
369
double
370
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
371
{
372
    double aroundPeak = data[n];
373
    double peakProportion = 0.0;
374

    
375
    int i = n - 1;
376
    while (i > 0 && data[i] <= data[i+1]) {
377
        aroundPeak += fabs(data[i]);
378
        --i;
379
    }
380
    i = n + 1;
381
    while (i < m_bins && data[i] <= data[i-1]) {
382
        aroundPeak += fabs(data[i]);
383
        ++i;
384
    }
385
    peakProportion = aroundPeak / abstot;
386

    
387
    return peakProportion;
388
}
389

    
390
bool
391
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
392
{
393
    bool accept = false;
394

    
395
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
396
        accept = true;
397
    } else if (peakProportion > m_prevprop * 2) {
398
        accept = true;
399
    }
400

    
401
    return accept;
402
}
403

    
404
CepstrumPitchTracker::FeatureSet
405
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
406
{
407
    FeatureSet fs;
408

    
409
    int bs = m_blockSize;
410
    int hs = m_blockSize/2 + 1;
411

    
412
    double *rawcep = new double[bs];
413
    double *io = new double[bs];
414
    double *logmag = new double[bs];
415

    
416
    // The "inverse symmetric" method. Seems to be the most reliable
417
        
418
    for (int i = 0; i < hs; ++i) {
419

    
420
        double power =
421
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
422
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
423
        double mag = sqrt(power);
424
        
425
        double lm = log(mag + 0.00000001);
426
        
427
        logmag[i] = lm;
428
        if (i > 0) logmag[bs - i] = lm;
429
    }
430

    
431
    fft(bs, true, logmag, 0, rawcep, io);
432
    
433
    delete[] logmag;
434
    delete[] io;
435

    
436
    int n = m_bins;
437
    double *data = new double[n];
438
    filter(rawcep, data);
439
    delete[] rawcep;
440

    
441
    double abstot = 0.0;
442

    
443
    for (int i = 0; i < n; ++i) {
444
        abstot += fabs(data[i]);
445
    }
446

    
447
    double maxval = 0.0;
448
    int maxbin = -1;
449

    
450
    for (int i = 0; i < n; ++i) {
451
        if (data[i] > maxval) {
452
            maxval = data[i];
453
            maxbin = i;
454
        }
455
    }
456

    
457
    if (maxbin < 0) return fs;
458

    
459
    double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
460
    Hypothesis::Estimate e;
461
    e.freq = peakfreq;
462
    e.time = timestamp;
463

    
464
    m_accepted.advanceTime();
465
    for (int i = 0; i < m_possible.size(); ++i) {
466
        m_possible[i].advanceTime();
467
    }
468

    
469
    if (m_accepted.test(e)) {
470
        return fs;
471
    }
472

    
473
    //...
474

    
475
/*
476
    bool accepted = false;
477

478
    if (maxbin >= 0) {
479
        double pp = calculatePeakProportion(data, abstot, maxbin);
480
        if (acceptPeak(maxbin, pp)) {
481
            accepted = true;
482
        } else {
483
            // try a secondary peak
484
            maxval = 0.0;
485
            int secondbin = 0;
486
            for (int i = 1; i < n-1; ++i) {
487
                if (i != maxbin &&
488
                    data[i] > data[i-1] &&
489
                    data[i] > data[i+1] &&
490
                    data[i] > maxval) {
491
                    maxval = data[i];
492
                    secondbin = i;
493
                }
494
            }
495
            double spp = calculatePeakProportion(data, abstot, secondbin);
496
            if (acceptPeak(secondbin, spp)) {
497
                maxbin = secondbin;
498
                pp = spp;
499
                accepted = true;
500
            }
501
        }
502
        if (accepted) {
503
            m_prevpeak = maxbin;
504
            m_prevprop = pp;
505
        }
506
    }
507
*/
508
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
509
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
510
//    std::cerr << "bins = " << m_bins << std::endl;
511

    
512
//    if (peakProportion >= (0.00006 * m_bins)) {
513
    if (accepted) {
514
        Feature f;
515
        f.hasTimestamp = true;
516
        f.timestamp = timestamp;
517
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
518
        fs[0].push_back(f);
519
    }
520

    
521
    delete[] data;
522
    return fs;
523
}
524

    
525
CepstrumPitchTracker::FeatureSet
526
CepstrumPitchTracker::getRemainingFeatures()
527
{
528
    FeatureSet fs;
529
    return fs;
530
}
531

    
532
void
533
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
534
                    double *ri, double *ii, double *ro, double *io)
535
{
536
    if (!ri || !ro || !io) return;
537

    
538
    unsigned int bits;
539
    unsigned int i, j, k, m;
540
    unsigned int blockSize, blockEnd;
541

    
542
    double tr, ti;
543

    
544
    if (n < 2) return;
545
    if (n & (n-1)) return;
546

    
547
    double angle = 2.0 * M_PI;
548
    if (inverse) angle = -angle;
549

    
550
    for (i = 0; ; ++i) {
551
        if (n & (1 << i)) {
552
            bits = i;
553
            break;
554
        }
555
    }
556

    
557
    static unsigned int tableSize = 0;
558
    static int *table = 0;
559

    
560
    if (tableSize != n) {
561

    
562
        delete[] table;
563

    
564
        table = new int[n];
565

    
566
        for (i = 0; i < n; ++i) {
567
        
568
            m = i;
569

    
570
            for (j = k = 0; j < bits; ++j) {
571
                k = (k << 1) | (m & 1);
572
                m >>= 1;
573
            }
574

    
575
            table[i] = k;
576
        }
577

    
578
        tableSize = n;
579
    }
580

    
581
    if (ii) {
582
        for (i = 0; i < n; ++i) {
583
            ro[table[i]] = ri[i];
584
            io[table[i]] = ii[i];
585
        }
586
    } else {
587
        for (i = 0; i < n; ++i) {
588
            ro[table[i]] = ri[i];
589
            io[table[i]] = 0.0;
590
        }
591
    }
592

    
593
    blockEnd = 1;
594

    
595
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
596

    
597
        double delta = angle / (double)blockSize;
598
        double sm2 = -sin(-2 * delta);
599
        double sm1 = -sin(-delta);
600
        double cm2 = cos(-2 * delta);
601
        double cm1 = cos(-delta);
602
        double w = 2 * cm1;
603
        double ar[3], ai[3];
604

    
605
        for (i = 0; i < n; i += blockSize) {
606

    
607
            ar[2] = cm2;
608
            ar[1] = cm1;
609

    
610
            ai[2] = sm2;
611
            ai[1] = sm1;
612

    
613
            for (j = i, m = 0; m < blockEnd; j++, m++) {
614

    
615
                ar[0] = w * ar[1] - ar[2];
616
                ar[2] = ar[1];
617
                ar[1] = ar[0];
618

    
619
                ai[0] = w * ai[1] - ai[2];
620
                ai[2] = ai[1];
621
                ai[1] = ai[0];
622

    
623
                k = j + blockEnd;
624
                tr = ar[0] * ro[k] - ai[0] * io[k];
625
                ti = ar[0] * io[k] + ai[0] * ro[k];
626

    
627
                ro[k] = ro[j] - tr;
628
                io[k] = io[j] - ti;
629

    
630
                ro[j] += tr;
631
                io[j] += ti;
632
            }
633
        }
634

    
635
        blockEnd = blockSize;
636
    }
637
}
638

    
639