To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 7:32defdb2f9d9

History | View | Annotate | Download (12.6 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34

    
35
CepstrumPitchTracker::Hypothesis::Hypothesis(Estimate s)
36
{
37
    m_state = Provisional;
38
    m_pending.push_back(s);
39
    m_age = 0;
40
}
41

    
42
bool
43
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
44
{
45
    if (m_pending.empty()) {
46
        return true;
47
    }
48
    Estimate last = m_pending[m_pending.size()-1];
49
    double r = s.freq / last.freq;
50
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
51
    return (cents > -200 && cents < 200);
52
}
53

    
54
bool 
55
CepstrumPitchTracker::Hypothesis::isSatisfied()
56
{
57
    return (m_pending.size() > 2);
58
}
59

    
60
bool
61
CepstrumPitchTracker::Hypothesis::test(Estimate s)
62
{
63
    if (m_state == Rejected || m_state == Expired) {
64
        return false;
65
    }
66

    
67
    if (++m_age > 3) {
68
        if (m_state == Satisfied) {
69
            m_state = Expired;
70
        } else {
71
            m_state = Rejected;
72
        }
73
        return false;
74
    }
75

    
76
    if (isWithinTolerance(s)) {
77
        m_pending.push_back(s);
78
        if (m_state == Provisional) {
79
            if (isSatisfied()) {
80
                m_state == Satisfied;
81
            }
82
        }
83
        m_age = 0;
84
        return true;
85
    }
86
    
87
    return false;
88
}
89

    
90
CepstrumPitchTracker::Hypothesis::State
91
CepstrumPitchTracker::Hypothesis::getState()
92
{
93
    return m_state;
94
}
95

    
96
CepstrumPitchTracker::Hypothesis::Estimates
97
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
98
{
99
    if (m_state == Satisfied || m_state == Expired) {
100
        return m_pending;
101
    } else {
102
        return Estimates();
103
    }
104
}
105

    
106

    
107
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
108
    Plugin(inputSampleRate),
109
    m_channels(0),
110
    m_stepSize(256),
111
    m_blockSize(1024),
112
    m_fmin(50),
113
    m_fmax(1000),
114
    m_histlen(1),
115
    m_vflen(3),
116
    m_binFrom(0),
117
    m_binTo(0),
118
    m_bins(0),
119
    m_accepted(0),
120
    m_history(0),
121
    m_prevpeak(0),
122
    m_prevprop(0)
123
{
124
}
125

    
126
CepstrumPitchTracker::~CepstrumPitchTracker()
127
{
128
    if (m_history) {
129
        for (int i = 0; i < m_histlen; ++i) {
130
            delete[] m_history[i];
131
        }
132
        delete[] m_history;
133
    }
134
}
135

    
136
string
137
CepstrumPitchTracker::getIdentifier() const
138
{
139
    return "cepstrum-pitch";
140
}
141

    
142
string
143
CepstrumPitchTracker::getName() const
144
{
145
    return "Cepstrum Pitch Tracker";
146
}
147

    
148
string
149
CepstrumPitchTracker::getDescription() const
150
{
151
    return "Estimate f0 of monophonic material using a cepstrum method.";
152
}
153

    
154
string
155
CepstrumPitchTracker::getMaker() const
156
{
157
    return "Chris Cannam";
158
}
159

    
160
int
161
CepstrumPitchTracker::getPluginVersion() const
162
{
163
    // Increment this each time you release a version that behaves
164
    // differently from the previous one
165
    return 1;
166
}
167

    
168
string
169
CepstrumPitchTracker::getCopyright() const
170
{
171
    return "Freely redistributable (BSD license)";
172
}
173

    
174
CepstrumPitchTracker::InputDomain
175
CepstrumPitchTracker::getInputDomain() const
176
{
177
    return FrequencyDomain;
178
}
179

    
180
size_t
181
CepstrumPitchTracker::getPreferredBlockSize() const
182
{
183
    return 1024;
184
}
185

    
186
size_t 
187
CepstrumPitchTracker::getPreferredStepSize() const
188
{
189
    return 256;
190
}
191

    
192
size_t
193
CepstrumPitchTracker::getMinChannelCount() const
194
{
195
    return 1;
196
}
197

    
198
size_t
199
CepstrumPitchTracker::getMaxChannelCount() const
200
{
201
    return 1;
202
}
203

    
204
CepstrumPitchTracker::ParameterList
205
CepstrumPitchTracker::getParameterDescriptors() const
206
{
207
    ParameterList list;
208
    return list;
209
}
210

    
211
float
212
CepstrumPitchTracker::getParameter(string identifier) const
213
{
214
    return 0.f;
215
}
216

    
217
void
218
CepstrumPitchTracker::setParameter(string identifier, float value) 
219
{
220
}
221

    
222
CepstrumPitchTracker::ProgramList
223
CepstrumPitchTracker::getPrograms() const
224
{
225
    ProgramList list;
226
    return list;
227
}
228

    
229
string
230
CepstrumPitchTracker::getCurrentProgram() const
231
{
232
    return ""; // no programs
233
}
234

    
235
void
236
CepstrumPitchTracker::selectProgram(string name)
237
{
238
}
239

    
240
CepstrumPitchTracker::OutputList
241
CepstrumPitchTracker::getOutputDescriptors() const
242
{
243
    OutputList outputs;
244

    
245
    int n = 0;
246

    
247
    OutputDescriptor d;
248

    
249
    d.identifier = "f0";
250
    d.name = "Estimated f0";
251
    d.description = "Estimated fundamental frequency";
252
    d.unit = "Hz";
253
    d.hasFixedBinCount = true;
254
    d.binCount = 1;
255
    d.hasKnownExtents = true;
256
    d.minValue = m_fmin;
257
    d.maxValue = m_fmax;
258
    d.isQuantized = false;
259
    d.sampleType = OutputDescriptor::FixedSampleRate;
260
    d.sampleRate = (m_inputSampleRate / m_stepSize);
261
    d.hasDuration = false;
262
    outputs.push_back(d);
263

    
264
    return outputs;
265
}
266

    
267
bool
268
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
269
{
270
    if (channels < getMinChannelCount() ||
271
        channels > getMaxChannelCount()) return false;
272

    
273
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
274
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
275
//              << std::endl;
276

    
277
    m_channels = channels;
278
    m_stepSize = stepSize;
279
    m_blockSize = blockSize;
280

    
281
    m_binFrom = int(m_inputSampleRate / m_fmax);
282
    m_binTo = int(m_inputSampleRate / m_fmin); 
283

    
284
    if (m_binTo >= (int)m_blockSize / 2) {
285
        m_binTo = m_blockSize / 2 - 1;
286
    }
287

    
288
    m_bins = (m_binTo - m_binFrom) + 1;
289

    
290
    m_history = new double *[m_histlen];
291
    for (int i = 0; i < m_histlen; ++i) {
292
        m_history[i] = new double[m_bins];
293
    }
294

    
295
    reset();
296

    
297
    return true;
298
}
299

    
300
void
301
CepstrumPitchTracker::reset()
302
{
303
    for (int i = 0; i < m_histlen; ++i) {
304
        for (int j = 0; j < m_bins; ++j) {
305
            m_history[i][j] = 0.0;
306
        }
307
    }
308
}
309

    
310
void
311
CepstrumPitchTracker::filter(const double *cep, double *result)
312
{
313
    int hix = m_histlen - 1; // current history index
314

    
315
    // roll back the history
316
    if (m_histlen > 1) {
317
        double *oldest = m_history[0];
318
        for (int i = 1; i < m_histlen; ++i) {
319
            m_history[i-1] = m_history[i];
320
        }
321
        // and stick this back in the newest spot, to recycle
322
        m_history[hix] = oldest;
323
    }
324

    
325
    for (int i = 0; i < m_bins; ++i) {
326
        double v = 0;
327
        int n = 0;
328
        // average according to the vertical filter length
329
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
330
            int ix = i + m_binFrom + j;
331
            if (ix >= 0 && ix < m_blockSize) {
332
                v += cep[ix];
333
                ++n;
334
            }
335
        }
336
        m_history[hix][i] = v / n;
337
    }
338

    
339
    for (int i = 0; i < m_bins; ++i) {
340
        double mean = 0.0;
341
        for (int j = 0; j < m_histlen; ++j) {
342
            mean += m_history[j][i];
343
        }
344
        mean /= m_histlen;
345
        result[i] = mean;
346
    }
347
}
348

    
349
double
350
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
351
{
352
    double aroundPeak = data[n];
353
    double peakProportion = 0.0;
354

    
355
    int i = n - 1;
356
    while (i > 0 && data[i] <= data[i+1]) {
357
        aroundPeak += fabs(data[i]);
358
        --i;
359
    }
360
    i = n + 1;
361
    while (i < m_bins && data[i] <= data[i-1]) {
362
        aroundPeak += fabs(data[i]);
363
        ++i;
364
    }
365
    peakProportion = aroundPeak / abstot;
366

    
367
    return peakProportion;
368
}
369

    
370
bool
371
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
372
{
373
    bool accept = false;
374

    
375
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
376
        accept = true;
377
    } else if (peakProportion > m_prevprop * 2) {
378
        accept = true;
379
    }
380

    
381
    return accept;
382
}
383

    
384
CepstrumPitchTracker::FeatureSet
385
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
386
{
387
    FeatureSet fs;
388

    
389
    int bs = m_blockSize;
390
    int hs = m_blockSize/2 + 1;
391

    
392
    double *rawcep = new double[bs];
393
    double *io = new double[bs];
394
    double *logmag = new double[bs];
395

    
396
    // The "inverse symmetric" method. Seems to be the most reliable
397
        
398
    for (int i = 0; i < hs; ++i) {
399

    
400
        double power =
401
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
402
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
403
        double mag = sqrt(power);
404
        
405
        double lm = log(mag + 0.00000001);
406
        
407
        logmag[i] = lm;
408
        if (i > 0) logmag[bs - i] = lm;
409
    }
410

    
411
    fft(bs, true, logmag, 0, rawcep, io);
412
    
413
    delete[] logmag;
414
    delete[] io;
415

    
416
    int n = m_bins;
417
    double *data = new double[n];
418
    filter(rawcep, data);
419
    delete[] rawcep;
420

    
421
    double abstot = 0.0;
422

    
423
    for (int i = 0; i < n; ++i) {
424
        abstot += fabs(data[i]);
425
    }
426

    
427
    double maxval = 0.0;
428
    int maxbin = -1;
429

    
430
    for (int i = 0; i < n; ++i) {
431
        if (data[i] > maxval) {
432
            maxval = data[i];
433
            maxbin = i;
434
        }
435
    }
436

    
437
    bool accepted = false;
438

    
439
    if (maxbin >= 0) {
440
        double pp = calculatePeakProportion(data, abstot, maxbin);
441
        if (acceptPeak(maxbin, pp)) {
442
            accepted = true;
443
        } else {
444
            // try a secondary peak
445
            maxval = 0.0;
446
            int secondbin = 0;
447
            for (int i = 1; i < n-1; ++i) {
448
                if (i != maxbin &&
449
                    data[i] > data[i-1] &&
450
                    data[i] > data[i+1] &&
451
                    data[i] > maxval) {
452
                    maxval = data[i];
453
                    secondbin = i;
454
                }
455
            }
456
            double spp = calculatePeakProportion(data, abstot, secondbin);
457
            if (acceptPeak(secondbin, spp)) {
458
                maxbin = secondbin;
459
                pp = spp;
460
                accepted = true;
461
            }
462
        }
463
        if (accepted) {
464
            m_prevpeak = maxbin;
465
            m_prevprop = pp;
466
        }
467
    }
468
            
469
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
470
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
471
//    std::cerr << "bins = " << m_bins << std::endl;
472

    
473
//    if (peakProportion >= (0.00006 * m_bins)) {
474
    if (accepted) {
475
        Feature f;
476
        f.hasTimestamp = true;
477
        f.timestamp = timestamp;
478
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
479
        fs[0].push_back(f);
480
    }
481

    
482
    delete[] data;
483
    return fs;
484
}
485

    
486
CepstrumPitchTracker::FeatureSet
487
CepstrumPitchTracker::getRemainingFeatures()
488
{
489
    FeatureSet fs;
490
    return fs;
491
}
492

    
493
void
494
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
495
                    double *ri, double *ii, double *ro, double *io)
496
{
497
    if (!ri || !ro || !io) return;
498

    
499
    unsigned int bits;
500
    unsigned int i, j, k, m;
501
    unsigned int blockSize, blockEnd;
502

    
503
    double tr, ti;
504

    
505
    if (n < 2) return;
506
    if (n & (n-1)) return;
507

    
508
    double angle = 2.0 * M_PI;
509
    if (inverse) angle = -angle;
510

    
511
    for (i = 0; ; ++i) {
512
        if (n & (1 << i)) {
513
            bits = i;
514
            break;
515
        }
516
    }
517

    
518
    static unsigned int tableSize = 0;
519
    static int *table = 0;
520

    
521
    if (tableSize != n) {
522

    
523
        delete[] table;
524

    
525
        table = new int[n];
526

    
527
        for (i = 0; i < n; ++i) {
528
        
529
            m = i;
530

    
531
            for (j = k = 0; j < bits; ++j) {
532
                k = (k << 1) | (m & 1);
533
                m >>= 1;
534
            }
535

    
536
            table[i] = k;
537
        }
538

    
539
        tableSize = n;
540
    }
541

    
542
    if (ii) {
543
        for (i = 0; i < n; ++i) {
544
            ro[table[i]] = ri[i];
545
            io[table[i]] = ii[i];
546
        }
547
    } else {
548
        for (i = 0; i < n; ++i) {
549
            ro[table[i]] = ri[i];
550
            io[table[i]] = 0.0;
551
        }
552
    }
553

    
554
    blockEnd = 1;
555

    
556
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
557

    
558
        double delta = angle / (double)blockSize;
559
        double sm2 = -sin(-2 * delta);
560
        double sm1 = -sin(-delta);
561
        double cm2 = cos(-2 * delta);
562
        double cm1 = cos(-delta);
563
        double w = 2 * cm1;
564
        double ar[3], ai[3];
565

    
566
        for (i = 0; i < n; i += blockSize) {
567

    
568
            ar[2] = cm2;
569
            ar[1] = cm1;
570

    
571
            ai[2] = sm2;
572
            ai[1] = sm1;
573

    
574
            for (j = i, m = 0; m < blockEnd; j++, m++) {
575

    
576
                ar[0] = w * ar[1] - ar[2];
577
                ar[2] = ar[1];
578
                ar[1] = ar[0];
579

    
580
                ai[0] = w * ai[1] - ai[2];
581
                ai[2] = ai[1];
582
                ai[1] = ai[0];
583

    
584
                k = j + blockEnd;
585
                tr = ar[0] * ro[k] - ai[0] * io[k];
586
                ti = ar[0] * io[k] + ai[0] * ro[k];
587

    
588
                ro[k] = ro[j] - tr;
589
                io[k] = io[j] - ti;
590

    
591
                ro[j] += tr;
592
                io[j] += ti;
593
            }
594
        }
595

    
596
        blockEnd = blockSize;
597
    }
598
}
599

    
600