To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 6:291c75f6e837

History | View | Annotate | Download (11.1 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33

    
34
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
35
    Plugin(inputSampleRate),
36
    m_channels(0),
37
    m_stepSize(256),
38
    m_blockSize(1024),
39
    m_fmin(50),
40
    m_fmax(1000),
41
    m_histlen(1),
42
    m_vflen(3),
43
    m_binFrom(0),
44
    m_binTo(0),
45
    m_bins(0),
46
    m_history(0),
47
    m_prevpeak(0),
48
    m_prevprop(0)
49
{
50
}
51

    
52
CepstrumPitchTracker::~CepstrumPitchTracker()
53
{
54
    if (m_history) {
55
        for (int i = 0; i < m_histlen; ++i) {
56
            delete[] m_history[i];
57
        }
58
        delete[] m_history;
59
    }
60
}
61

    
62
string
63
CepstrumPitchTracker::getIdentifier() const
64
{
65
    return "cepstrum-pitch";
66
}
67

    
68
string
69
CepstrumPitchTracker::getName() const
70
{
71
    return "Cepstrum Pitch Tracker";
72
}
73

    
74
string
75
CepstrumPitchTracker::getDescription() const
76
{
77
    return "Estimate f0 of monophonic material using a cepstrum method.";
78
}
79

    
80
string
81
CepstrumPitchTracker::getMaker() const
82
{
83
    return "Chris Cannam";
84
}
85

    
86
int
87
CepstrumPitchTracker::getPluginVersion() const
88
{
89
    // Increment this each time you release a version that behaves
90
    // differently from the previous one
91
    return 1;
92
}
93

    
94
string
95
CepstrumPitchTracker::getCopyright() const
96
{
97
    return "Freely redistributable (BSD license)";
98
}
99

    
100
CepstrumPitchTracker::InputDomain
101
CepstrumPitchTracker::getInputDomain() const
102
{
103
    return FrequencyDomain;
104
}
105

    
106
size_t
107
CepstrumPitchTracker::getPreferredBlockSize() const
108
{
109
    return 1024;
110
}
111

    
112
size_t 
113
CepstrumPitchTracker::getPreferredStepSize() const
114
{
115
    return 256;
116
}
117

    
118
size_t
119
CepstrumPitchTracker::getMinChannelCount() const
120
{
121
    return 1;
122
}
123

    
124
size_t
125
CepstrumPitchTracker::getMaxChannelCount() const
126
{
127
    return 1;
128
}
129

    
130
CepstrumPitchTracker::ParameterList
131
CepstrumPitchTracker::getParameterDescriptors() const
132
{
133
    ParameterList list;
134
    return list;
135
}
136

    
137
float
138
CepstrumPitchTracker::getParameter(string identifier) const
139
{
140
    return 0.f;
141
}
142

    
143
void
144
CepstrumPitchTracker::setParameter(string identifier, float value) 
145
{
146
}
147

    
148
CepstrumPitchTracker::ProgramList
149
CepstrumPitchTracker::getPrograms() const
150
{
151
    ProgramList list;
152
    return list;
153
}
154

    
155
string
156
CepstrumPitchTracker::getCurrentProgram() const
157
{
158
    return ""; // no programs
159
}
160

    
161
void
162
CepstrumPitchTracker::selectProgram(string name)
163
{
164
}
165

    
166
CepstrumPitchTracker::OutputList
167
CepstrumPitchTracker::getOutputDescriptors() const
168
{
169
    OutputList outputs;
170

    
171
    int n = 0;
172

    
173
    OutputDescriptor d;
174

    
175
    d.identifier = "f0";
176
    d.name = "Estimated f0";
177
    d.description = "Estimated fundamental frequency";
178
    d.unit = "Hz";
179
    d.hasFixedBinCount = true;
180
    d.binCount = 1;
181
    d.hasKnownExtents = true;
182
    d.minValue = m_fmin;
183
    d.maxValue = m_fmax;
184
    d.isQuantized = false;
185
    d.sampleType = OutputDescriptor::FixedSampleRate;
186
    d.sampleRate = (m_inputSampleRate / m_stepSize);
187
    d.hasDuration = false;
188
    outputs.push_back(d);
189

    
190
    return outputs;
191
}
192

    
193
bool
194
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
195
{
196
    if (channels < getMinChannelCount() ||
197
        channels > getMaxChannelCount()) return false;
198

    
199
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
200
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
201
//              << std::endl;
202

    
203
    m_channels = channels;
204
    m_stepSize = stepSize;
205
    m_blockSize = blockSize;
206

    
207
    m_binFrom = int(m_inputSampleRate / m_fmax);
208
    m_binTo = int(m_inputSampleRate / m_fmin); 
209

    
210
    if (m_binTo >= (int)m_blockSize / 2) {
211
        m_binTo = m_blockSize / 2 - 1;
212
    }
213

    
214
    m_bins = (m_binTo - m_binFrom) + 1;
215

    
216
    m_history = new double *[m_histlen];
217
    for (int i = 0; i < m_histlen; ++i) {
218
        m_history[i] = new double[m_bins];
219
    }
220

    
221
    reset();
222

    
223
    return true;
224
}
225

    
226
void
227
CepstrumPitchTracker::reset()
228
{
229
    for (int i = 0; i < m_histlen; ++i) {
230
        for (int j = 0; j < m_bins; ++j) {
231
            m_history[i][j] = 0.0;
232
        }
233
    }
234
}
235

    
236
void
237
CepstrumPitchTracker::filter(const double *cep, double *result)
238
{
239
    int hix = m_histlen - 1; // current history index
240

    
241
    // roll back the history
242
    if (m_histlen > 1) {
243
        double *oldest = m_history[0];
244
        for (int i = 1; i < m_histlen; ++i) {
245
            m_history[i-1] = m_history[i];
246
        }
247
        // and stick this back in the newest spot, to recycle
248
        m_history[hix] = oldest;
249
    }
250

    
251
    for (int i = 0; i < m_bins; ++i) {
252
        double v = 0;
253
        int n = 0;
254
        // average according to the vertical filter length
255
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
256
            int ix = i + m_binFrom + j;
257
            if (ix >= 0 && ix < m_blockSize) {
258
                v += cep[ix];
259
                ++n;
260
            }
261
        }
262
        m_history[hix][i] = v / n;
263
    }
264

    
265
    for (int i = 0; i < m_bins; ++i) {
266
        double mean = 0.0;
267
        for (int j = 0; j < m_histlen; ++j) {
268
            mean += m_history[j][i];
269
        }
270
        mean /= m_histlen;
271
        result[i] = mean;
272
    }
273
}
274

    
275
double
276
CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
277
{
278
    double aroundPeak = data[n];
279
    double peakProportion = 0.0;
280

    
281
    int i = n - 1;
282
    while (i > 0 && data[i] <= data[i+1]) {
283
        aroundPeak += fabs(data[i]);
284
        --i;
285
    }
286
    i = n + 1;
287
    while (i < m_bins && data[i] <= data[i-1]) {
288
        aroundPeak += fabs(data[i]);
289
        ++i;
290
    }
291
    peakProportion = aroundPeak / abstot;
292

    
293
    return peakProportion;
294
}
295

    
296
bool
297
CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
298
{
299
    bool accept = false;
300

    
301
    if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
302
        accept = true;
303
    } else if (peakProportion > m_prevprop * 2) {
304
        accept = true;
305
    }
306

    
307
    return accept;
308
}
309

    
310
CepstrumPitchTracker::FeatureSet
311
CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
312
{
313
    FeatureSet fs;
314

    
315
    int bs = m_blockSize;
316
    int hs = m_blockSize/2 + 1;
317

    
318
    double *rawcep = new double[bs];
319
    double *io = new double[bs];
320
    double *logmag = new double[bs];
321

    
322
    // The "inverse symmetric" method. Seems to be the most reliable
323
        
324
    for (int i = 0; i < hs; ++i) {
325

    
326
        double power =
327
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
328
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
329
        double mag = sqrt(power);
330
        
331
        double lm = log(mag + 0.00000001);
332
        
333
        logmag[i] = lm;
334
        if (i > 0) logmag[bs - i] = lm;
335
    }
336

    
337
    fft(bs, true, logmag, 0, rawcep, io);
338
    
339
    delete[] logmag;
340
    delete[] io;
341

    
342
    int n = m_bins;
343
    double *data = new double[n];
344
    filter(rawcep, data);
345
    delete[] rawcep;
346

    
347
    double abstot = 0.0;
348

    
349
    for (int i = 0; i < n; ++i) {
350
        abstot += fabs(data[i]);
351
    }
352

    
353
    double maxval = 0.0;
354
    int maxbin = -1;
355

    
356
    for (int i = 0; i < n; ++i) {
357
        if (data[i] > maxval) {
358
            maxval = data[i];
359
            maxbin = i;
360
        }
361
    }
362

    
363
    bool accepted = false;
364

    
365
    if (maxbin >= 0) {
366
        double pp = calculatePeakProportion(data, abstot, maxbin);
367
        if (acceptPeak(maxbin, pp)) {
368
            accepted = true;
369
        } else {
370
            // try a secondary peak
371
            maxval = 0.0;
372
            int secondbin = 0;
373
            for (int i = 1; i < n-1; ++i) {
374
                if (i != maxbin &&
375
                    data[i] > data[i-1] &&
376
                    data[i] > data[i+1] &&
377
                    data[i] > maxval) {
378
                    maxval = data[i];
379
                    secondbin = i;
380
                }
381
            }
382
            double spp = calculatePeakProportion(data, abstot, secondbin);
383
            if (acceptPeak(secondbin, spp)) {
384
                maxbin = secondbin;
385
                pp = spp;
386
                accepted = true;
387
            }
388
        }
389
        if (accepted) {
390
            m_prevpeak = maxbin;
391
            m_prevprop = pp;
392
        }
393
    }
394
            
395
//    std::cerr << "peakProportion = " << peakProportion << std::endl;
396
//    std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
397
//    std::cerr << "bins = " << m_bins << std::endl;
398

    
399
//    if (peakProportion >= (0.00006 * m_bins)) {
400
    if (accepted) {
401
        Feature f;
402
        f.hasTimestamp = true;
403
        f.timestamp = timestamp;
404
        f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
405
        fs[0].push_back(f);
406
    }
407

    
408
    delete[] data;
409
    return fs;
410
}
411

    
412
CepstrumPitchTracker::FeatureSet
413
CepstrumPitchTracker::getRemainingFeatures()
414
{
415
    FeatureSet fs;
416
    return fs;
417
}
418

    
419
void
420
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
421
                    double *ri, double *ii, double *ro, double *io)
422
{
423
    if (!ri || !ro || !io) return;
424

    
425
    unsigned int bits;
426
    unsigned int i, j, k, m;
427
    unsigned int blockSize, blockEnd;
428

    
429
    double tr, ti;
430

    
431
    if (n < 2) return;
432
    if (n & (n-1)) return;
433

    
434
    double angle = 2.0 * M_PI;
435
    if (inverse) angle = -angle;
436

    
437
    for (i = 0; ; ++i) {
438
        if (n & (1 << i)) {
439
            bits = i;
440
            break;
441
        }
442
    }
443

    
444
    static unsigned int tableSize = 0;
445
    static int *table = 0;
446

    
447
    if (tableSize != n) {
448

    
449
        delete[] table;
450

    
451
        table = new int[n];
452

    
453
        for (i = 0; i < n; ++i) {
454
        
455
            m = i;
456

    
457
            for (j = k = 0; j < bits; ++j) {
458
                k = (k << 1) | (m & 1);
459
                m >>= 1;
460
            }
461

    
462
            table[i] = k;
463
        }
464

    
465
        tableSize = n;
466
    }
467

    
468
    if (ii) {
469
        for (i = 0; i < n; ++i) {
470
            ro[table[i]] = ri[i];
471
            io[table[i]] = ii[i];
472
        }
473
    } else {
474
        for (i = 0; i < n; ++i) {
475
            ro[table[i]] = ri[i];
476
            io[table[i]] = 0.0;
477
        }
478
    }
479

    
480
    blockEnd = 1;
481

    
482
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
483

    
484
        double delta = angle / (double)blockSize;
485
        double sm2 = -sin(-2 * delta);
486
        double sm1 = -sin(-delta);
487
        double cm2 = cos(-2 * delta);
488
        double cm1 = cos(-delta);
489
        double w = 2 * cm1;
490
        double ar[3], ai[3];
491

    
492
        for (i = 0; i < n; i += blockSize) {
493

    
494
            ar[2] = cm2;
495
            ar[1] = cm1;
496

    
497
            ai[2] = sm2;
498
            ai[1] = sm1;
499

    
500
            for (j = i, m = 0; m < blockEnd; j++, m++) {
501

    
502
                ar[0] = w * ar[1] - ar[2];
503
                ar[2] = ar[1];
504
                ar[1] = ar[0];
505

    
506
                ai[0] = w * ai[1] - ai[2];
507
                ai[2] = ai[1];
508
                ai[1] = ai[0];
509

    
510
                k = j + blockEnd;
511
                tr = ar[0] * ro[k] - ai[0] * io[k];
512
                ti = ar[0] * io[k] + ai[0] * ro[k];
513

    
514
                ro[k] = ro[j] - tr;
515
                io[k] = io[j] - ti;
516

    
517
                ro[j] += tr;
518
                io[j] += ti;
519
            }
520
        }
521

    
522
        blockEnd = blockSize;
523
    }
524
}
525

    
526