To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 9:83603936e4d2

History | View | Annotate | Download (13.5 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis(Estimate s)
36
{
37
    m_state = Provisional;
38
    m_pending.push_back(s);
39
    m_age = 0;
40
}
41

    
42
bool
43
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
44
{
45
    if (m_pending.empty()) {
46
        return true;
47
    }
48
    Estimate last = m_pending[m_pending.size()-1];
49
    double r = s.freq / last.freq;
50
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
51
    return (cents > -200 && cents < 200);
52
}
53

    
54
bool 
55
CepstrumPitchTracker::Hypothesis::isSatisfied()
56
{
57
    return (m_pending.size() > 2);
58
}
59

    
60
bool
61
CepstrumPitchTracker::Hypothesis::test(Estimate s)
62
{
63
    if (m_state == Rejected || m_state == Expired) {
64
        return false;
65
    }
66

    
67
    if (++m_age > 3) {
68
        if (m_state == Satisfied) {
69
            m_state = Expired;
70
        } else {
71
            m_state = Rejected;
72
        }
73
        return false;
74
    }
75

    
76
    if (isWithinTolerance(s)) {
77
        m_pending.push_back(s);
78
        if (m_state == Provisional) {
79
            if (isSatisfied()) {
80
                m_state == Satisfied;
81
            }
82
        }
83
        m_age = 0;
84
        return true;
85
    }
86
    
87
    return false;
88
}
89

    
90
CepstrumPitchTracker::Hypothesis::State
91
CepstrumPitchTracker::Hypothesis::getState()
92
{
93
    return m_state;
94
}
95

    
96
CepstrumPitchTracker::Hypothesis::Estimates
97
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
98
{
99
    if (m_state == Satisfied || m_state == Expired) {
100
        return m_pending;
101
    } else {
102
        return Estimates();
103
    }
104
}
105

    
106

    
107
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
108
    Plugin(inputSampleRate),
109
    m_channels(0),
110
    m_stepSize(256),
111
    m_blockSize(1024),
112
    m_fmin(50),
113
    m_fmax(1000),
114
    m_histlen(1),
115
    m_vflen(3),
116
    m_ppflen(4),
117
    m_binFrom(0),
118
    m_binTo(0),
119
    m_bins(0),
120
    m_accepted(0),
121
    m_history(0),
122
    m_ppfilter(0),
123
    m_prevpeak(0),
124
    m_prevprop(0)
125
{
126
}
127

    
128
CepstrumPitchTracker::~CepstrumPitchTracker()
129
{
130
    if (m_history) {
131
        for (int i = 0; i < m_histlen; ++i) {
132
            delete[] m_history[i];
133
        }
134
        delete[] m_history;
135
    }
136
    delete[] m_ppfilter;
137
}
138

    
139
string
140
CepstrumPitchTracker::getIdentifier() const
141
{
142
    return "cepstrum-pitch";
143
}
144

    
145
string
146
CepstrumPitchTracker::getName() const
147
{
148
    return "Cepstrum Pitch Tracker";
149
}
150

    
151
string
152
CepstrumPitchTracker::getDescription() const
153
{
154
    return "Estimate f0 of monophonic material using a cepstrum method.";
155
}
156

    
157
string
158
CepstrumPitchTracker::getMaker() const
159
{
160
    return "Chris Cannam";
161
}
162

    
163
int
164
CepstrumPitchTracker::getPluginVersion() const
165
{
166
    // Increment this each time you release a version that behaves
167
    // differently from the previous one
168
    return 1;
169
}
170

    
171
string
172
CepstrumPitchTracker::getCopyright() const
173
{
174
    return "Freely redistributable (BSD license)";
175
}
176

    
177
CepstrumPitchTracker::InputDomain
178
CepstrumPitchTracker::getInputDomain() const
179
{
180
    return FrequencyDomain;
181
}
182

    
183
size_t
184
CepstrumPitchTracker::getPreferredBlockSize() const
185
{
186
    return 1024;
187
}
188

    
189
size_t 
190
CepstrumPitchTracker::getPreferredStepSize() const
191
{
192
    return 256;
193
}
194

    
195
size_t
196
CepstrumPitchTracker::getMinChannelCount() const
197
{
198
    return 1;
199
}
200

    
201
size_t
202
CepstrumPitchTracker::getMaxChannelCount() const
203
{
204
    return 1;
205
}
206

    
207
CepstrumPitchTracker::ParameterList
208
CepstrumPitchTracker::getParameterDescriptors() const
209
{
210
    ParameterList list;
211
    return list;
212
}
213

    
214
float
215
CepstrumPitchTracker::getParameter(string identifier) const
216
{
217
    return 0.f;
218
}
219

    
220
void
221
CepstrumPitchTracker::setParameter(string identifier, float value) 
222
{
223
}
224

    
225
CepstrumPitchTracker::ProgramList
226
CepstrumPitchTracker::getPrograms() const
227
{
228
    ProgramList list;
229
    return list;
230
}
231

    
232
string
233
CepstrumPitchTracker::getCurrentProgram() const
234
{
235
    return ""; // no programs
236
}
237

    
238
void
239
CepstrumPitchTracker::selectProgram(string name)
240
{
241
}
242

    
243
CepstrumPitchTracker::OutputList
244
CepstrumPitchTracker::getOutputDescriptors() const
245
{
246
    OutputList outputs;
247

    
248
    int n = 0;
249

    
250
    OutputDescriptor d;
251

    
252
    d.identifier = "f0";
253
    d.name = "Estimated f0";
254
    d.description = "Estimated fundamental frequency";
255
    d.unit = "Hz";
256
    d.hasFixedBinCount = true;
257
    d.binCount = 1;
258
    d.hasKnownExtents = true;
259
    d.minValue = m_fmin;
260
    d.maxValue = m_fmax;
261
    d.isQuantized = false;
262
    d.sampleType = OutputDescriptor::FixedSampleRate;
263
    d.sampleRate = (m_inputSampleRate / m_stepSize);
264
    d.hasDuration = false;
265
    outputs.push_back(d);
266

    
267
    return outputs;
268
}
269

    
270
bool
271
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
272
{
273
    if (channels < getMinChannelCount() ||
274
        channels > getMaxChannelCount()) return false;
275

    
276
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
277
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
278
//              << std::endl;
279

    
280
    m_channels = channels;
281
    m_stepSize = stepSize;
282
    m_blockSize = blockSize;
283

    
284
    m_binFrom = int(m_inputSampleRate / m_fmax);
285
    m_binTo = int(m_inputSampleRate / m_fmin); 
286

    
287
    if (m_binTo >= (int)m_blockSize / 2) {
288
        m_binTo = m_blockSize / 2 - 1;
289
    }
290

    
291
    m_bins = (m_binTo - m_binFrom) + 1;
292

    
293
    m_history = new double *[m_histlen];
294
    for (int i = 0; i < m_histlen; ++i) {
295
        m_history[i] = new double[m_bins];
296
    }
297

    
298
    m_ppfilter = new double[m_ppflen];
299

    
300
    reset();
301

    
302
    return true;
303
}
304

    
305
void
306
CepstrumPitchTracker::reset()
307
{
308
    for (int i = 0; i < m_histlen; ++i) {
309
        for (int j = 0; j < m_bins; ++j) {
310
            m_history[i][j] = 0.0;
311
        }
312
    }
313
    for (int i = 0; i < m_ppflen; ++i) {
314
        m_ppfilter[i] = 0.0;
315
    }
316
}
317

    
318
void
319
CepstrumPitchTracker::filter(const double *cep, double *result)
320
{
321
    int hix = m_histlen - 1; // current history index
322

    
323
    // roll back the history
324
    if (m_histlen > 1) {
325
        double *oldest = m_history[0];
326
        for (int i = 1; i < m_histlen; ++i) {
327
            m_history[i-1] = m_history[i];
328
        }
329
        // and stick this back in the newest spot, to recycle
330
        m_history[hix] = oldest;
331
    }
332

    
333
    for (int i = 0; i < m_bins; ++i) {
334
        double v = 0;
335
        int n = 0;
336
        // average according to the vertical filter length
337
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
338
            int ix = i + m_binFrom + j;
339
            if (ix >= 0 && ix < m_blockSize) {
340
                v += cep[ix];
341
                ++n;
342
            }
343
        }
344
        m_history[hix][i] = v / n;
345
    }
346

    
347
    for (int i = 0; i < m_bins; ++i) {
348
        double mean = 0.0;
349
        for (int j = 0; j < m_histlen; ++j) {
350
            mean += m_history[j][i];
351
        }
352
        mean /= m_histlen;
353
        result[i] = mean;
354
    }
355
}
356

    
357
double
358
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
359
{
360
    double aroundPeak = data[n];
361
    double peakProportion = 0.0;
362

    
363
    int i = n - 1;
364
    while (i > 0 && data[i] <= data[i+1]) {
365
        aroundPeak += fabs(data[i]);
366
        --i;
367
    }
368
    i = n + 1;
369
    while (i < m_bins && data[i] <= data[i-1]) {
370
        aroundPeak += fabs(data[i]);
371
        ++i;
372
    }
373
    peakProportion = aroundPeak / abstot;
374

    
375
    return peakProportion;
376
}
377

    
378
bool
379
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
380
{
381
    bool accept = false;
382

    
383
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
384
        std::cerr << "accepting " << n << " [as prevpeak was " << m_prevpeak
385
                  << "]" << std::endl;
386
        accept = true;
387
    } else if (peakProportion > m_prevprop * 2) {
388
        std::cerr << "accepting " << n << " [as peakProportion " << peakProportion << " much higher than prev " << m_prevprop << "]" << std::endl;
389
        accept = true;
390
    } else {
391
        std::cerr << "rejecting " << n << " with " << peakProportion
392
                  << " [prevpeak " << m_prevpeak
393
                  << ", prevprop " << m_prevprop << "]" << std::endl;
394
    }
395

    
396
    return accept;
397
}
398

    
399
void
400
CepstrumPitchTracker::updatePropFilter(double prop)
401
{
402
    double tot = 0.0;
403
    for (int i = 1; i < m_ppflen; ++i) {
404
        double val = m_ppfilter[i];
405
        m_ppfilter[i-1] = val;
406
        tot += val;
407
    }
408
    double mean = tot / (m_ppflen - 1);
409
    m_ppfilter[m_ppflen-1] = prop;
410
    m_prevprop = mean;
411
}
412

    
413
CepstrumPitchTracker::FeatureSet
414
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
415
{
416
    FeatureSet fs;
417

    
418
    int bs = m_blockSize;
419
    int hs = m_blockSize/2 + 1;
420

    
421
    double *rawcep = new double[bs];
422
    double *io = new double[bs];
423
    double *logmag = new double[bs];
424

    
425
    // The "inverse symmetric" method. Seems to be the most reliable
426
        
427
    for (int i = 0; i < hs; ++i) {
428

    
429
        double power =
430
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
431
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
432
        double mag = sqrt(power);
433
        
434
        double lm = log(mag + 0.00000001);
435
        
436
        logmag[i] = lm;
437
        if (i > 0) logmag[bs - i] = lm;
438
    }
439

    
440
    fft(bs, true, logmag, 0, rawcep, io);
441
    
442
    delete[] logmag;
443
    delete[] io;
444

    
445
    int n = m_bins;
446
    double *data = new double[n];
447
    filter(rawcep, data);
448
    delete[] rawcep;
449

    
450
    double abstot = 0.0;
451

    
452
    for (int i = 0; i < n; ++i) {
453
        abstot += fabs(data[i]);
454
    }
455

    
456
    double maxval = 0.0;
457
    int maxbin = -1;
458

    
459
    for (int i = 0; i < n; ++i) {
460
        if (data[i] > maxval) {
461
            maxval = data[i];
462
            maxbin = i;
463
        }
464
    }
465

    
466
    bool accepted = false;
467

    
468
    if (maxbin >= 0) {
469
        double pp = calculatePeakProportion(data, abstot, maxbin);
470
        if (acceptPeak(maxbin, pp)) {
471
            accepted = true;
472
        } else {
473
            // try a secondary peak
474
            maxval = 0.0;
475
            int secondbin = 0;
476
            for (int i = 1; i < n-1; ++i) {
477
                if (i != maxbin &&
478
                    data[i] > data[i-1] &&
479
                    data[i] > data[i+1] &&
480
                    data[i] > maxval) {
481
                    maxval = data[i];
482
                    secondbin = i;
483
                }
484
            }
485
            double spp = calculatePeakProportion(data, abstot, secondbin);
486
            if (acceptPeak(secondbin, spp)) {
487
                maxbin = secondbin;
488
                pp = spp;
489
                accepted = true;
490
            }
491
        }
492
        if (accepted) {
493
            m_prevpeak = maxbin;
494
        }
495
        updatePropFilter(pp);
496
    }
497
            
498
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
499
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
500
//    std::cerr << "bins = " << m_bins << std::endl;
501

    
502
//    if (peakProportion >= (0.00006 * m_bins)) {
503
    if (accepted) {
504
        Feature f;
505
        f.hasTimestamp = true;
506
        f.timestamp = timestamp;
507
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
508
        fs[0].push_back(f);
509
    }
510

    
511
    delete[] data;
512
    return fs;
513
}
514

    
515
CepstrumPitchTracker::FeatureSet
516
CepstrumPitchTracker::getRemainingFeatures()
517
{
518
    FeatureSet fs;
519
    return fs;
520
}
521

    
522
void
523
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
524
                    double *ri, double *ii, double *ro, double *io)
525
{
526
    if (!ri || !ro || !io) return;
527

    
528
    unsigned int bits;
529
    unsigned int i, j, k, m;
530
    unsigned int blockSize, blockEnd;
531

    
532
    double tr, ti;
533

    
534
    if (n < 2) return;
535
    if (n & (n-1)) return;
536

    
537
    double angle = 2.0 * M_PI;
538
    if (inverse) angle = -angle;
539

    
540
    for (i = 0; ; ++i) {
541
        if (n & (1 << i)) {
542
            bits = i;
543
            break;
544
        }
545
    }
546

    
547
    static unsigned int tableSize = 0;
548
    static int *table = 0;
549

    
550
    if (tableSize != n) {
551

    
552
        delete[] table;
553

    
554
        table = new int[n];
555

    
556
        for (i = 0; i < n; ++i) {
557
        
558
            m = i;
559

    
560
            for (j = k = 0; j < bits; ++j) {
561
                k = (k << 1) | (m & 1);
562
                m >>= 1;
563
            }
564

    
565
            table[i] = k;
566
        }
567

    
568
        tableSize = n;
569
    }
570

    
571
    if (ii) {
572
        for (i = 0; i < n; ++i) {
573
            ro[table[i]] = ri[i];
574
            io[table[i]] = ii[i];
575
        }
576
    } else {
577
        for (i = 0; i < n; ++i) {
578
            ro[table[i]] = ri[i];
579
            io[table[i]] = 0.0;
580
        }
581
    }
582

    
583
    blockEnd = 1;
584

    
585
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
586

    
587
        double delta = angle / (double)blockSize;
588
        double sm2 = -sin(-2 * delta);
589
        double sm1 = -sin(-delta);
590
        double cm2 = cos(-2 * delta);
591
        double cm1 = cos(-delta);
592
        double w = 2 * cm1;
593
        double ar[3], ai[3];
594

    
595
        for (i = 0; i < n; i += blockSize) {
596

    
597
            ar[2] = cm2;
598
            ar[1] = cm1;
599

    
600
            ai[2] = sm2;
601
            ai[1] = sm1;
602

    
603
            for (j = i, m = 0; m < blockEnd; j++, m++) {
604

    
605
                ar[0] = w * ar[1] - ar[2];
606
                ar[2] = ar[1];
607
                ar[1] = ar[0];
608

    
609
                ai[0] = w * ai[1] - ai[2];
610
                ai[2] = ai[1];
611
                ai[1] = ai[0];
612

    
613
                k = j + blockEnd;
614
                tr = ar[0] * ro[k] - ai[0] * io[k];
615
                ti = ar[0] * io[k] + ai[0] * ro[k];
616

    
617
                ro[k] = ro[j] - tr;
618
                io[k] = io[j] - ti;
619

    
620
                ro[j] += tr;
621
                io[j] += ti;
622
            }
623
        }
624

    
625
        blockEnd = blockSize;
626
    }
627
}
628

    
629