To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 26:13568f1ccff0

History | View | Annotate | Download (14.8 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include "vamp-sdk/FFT.h"
26

    
27
#include <vector>
28
#include <algorithm>
29

    
30
#include <cstdio>
31
#include <cmath>
32
#include <complex>
33

    
34
using std::string;
35
using std::vector;
36
using Vamp::RealTime;
37

    
38
CepstrumPitchTracker::Hypothesis::Hypothesis()
39
{
40
    m_state = New;
41
    m_age = 0;
42
}
43

    
44
CepstrumPitchTracker::Hypothesis::~Hypothesis()
45
{
46
}
47

    
48
bool
49
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
50
{
51
    if (m_pending.empty()) {
52
        return true;
53
    }
54

    
55
    // check we are within a relatively close tolerance of the last
56
    // candidate
57
    Estimate last = m_pending[m_pending.size()-1];
58
    double r = s.freq / last.freq;
59
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
60
    if (cents < -60 || cents > 60) return false;
61

    
62
    // and within a slightly bigger tolerance of the current mean
63
    double meanFreq = getMeanFrequency();
64
    r = s.freq / meanFreq;
65
    cents = lrint(1200.0 * (log(r) / log(2.0)));
66
    if (cents < -80 || cents > 80) return false;
67
    
68
    return true;
69
}
70

    
71
bool 
72
CepstrumPitchTracker::Hypothesis::isSatisfied()
73
{
74
    if (m_pending.empty()) return false;
75
    
76
    double meanConfidence = 0.0;
77
    for (int i = 0; i < m_pending.size(); ++i) {
78
        meanConfidence += m_pending[i].confidence;
79
    }
80
    meanConfidence /= m_pending.size();
81

    
82
    int lengthRequired = 10000;
83
    if (meanConfidence > 0.0) {
84
        lengthRequired = int(2.0 / meanConfidence + 0.5);
85
    }
86
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
87

    
88
    return (m_pending.size() > lengthRequired);
89
}
90

    
91
void
92
CepstrumPitchTracker::Hypothesis::advanceTime()
93
{
94
    ++m_age;
95
}
96

    
97
bool
98
CepstrumPitchTracker::Hypothesis::test(Estimate s)
99
{
100
    bool accept = false;
101

    
102
    switch (m_state) {
103

    
104
    case New:
105
        m_state = Provisional;
106
        accept = true;
107
        break;
108

    
109
    case Provisional:
110
        if (m_age > 3) {
111
            m_state = Rejected;
112
        } else if (isWithinTolerance(s)) {
113
            accept = true;
114
        }
115
        break;
116
        
117
    case Satisfied:
118
        if (m_age > 3) {
119
            m_state = Expired;
120
        } else if (isWithinTolerance(s)) {
121
            accept = true;
122
        }
123
        break;
124

    
125
    case Rejected:
126
        break;
127

    
128
    case Expired:
129
        break;
130
    }
131

    
132
    if (accept) {
133
        m_pending.push_back(s);
134
        m_age = 0;
135
        if (m_state == Provisional && isSatisfied()) {
136
            m_state = Satisfied;
137
        }
138
    }
139

    
140
    return accept && (m_state == Satisfied);
141
}        
142

    
143
CepstrumPitchTracker::Hypothesis::State
144
CepstrumPitchTracker::Hypothesis::getState()
145
{
146
    return m_state;
147
}
148

    
149
int
150
CepstrumPitchTracker::Hypothesis::getPendingLength()
151
{
152
    return m_pending.size();
153
}
154

    
155
CepstrumPitchTracker::Hypothesis::Estimates
156
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
157
{
158
    if (m_state == Satisfied || m_state == Expired) {
159
        return m_pending;
160
    } else {
161
        return Estimates();
162
    }
163
}
164

    
165
double
166
CepstrumPitchTracker::Hypothesis::getMeanFrequency()
167
{
168
    double acc = 0.0;
169
    for (int i = 0; i < m_pending.size(); ++i) {
170
        acc += m_pending[i].freq;
171
    }
172
    acc /= m_pending.size();
173
    return acc;
174
}
175

    
176
CepstrumPitchTracker::Hypothesis::Note
177
CepstrumPitchTracker::Hypothesis::getAveragedNote()
178
{
179
    Note n;
180

    
181
    if (!(m_state == Satisfied || m_state == Expired)) {
182
        n.freq = 0.0;
183
        n.time = RealTime::zeroTime;
184
        n.duration = RealTime::zeroTime;
185
        return n;
186
    }
187

    
188
    n.time = m_pending.begin()->time;
189

    
190
    Estimates::iterator i = m_pending.end();
191
    --i;
192
    n.duration = i->time - n.time;
193

    
194
    // just mean frequency for now, but this isn't at all right perceptually
195
    n.freq = getMeanFrequency();
196
    
197
    return n;
198
}
199

    
200
void
201
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
202
{
203
    for (int i = 0; i < m_pending.size(); ++i) {
204
        Feature f;
205
        f.hasTimestamp = true;
206
        f.timestamp = m_pending[i].time;
207
        f.values.push_back(m_pending[i].freq);
208
        fs[0].push_back(f);
209
    }
210

    
211
    Feature nf;
212
    nf.hasTimestamp = true;
213
    nf.hasDuration = true;
214
    Note n = getAveragedNote();
215
    nf.timestamp = n.time;
216
    nf.duration = n.duration;
217
    nf.values.push_back(n.freq);
218
    fs[1].push_back(nf);
219
}
220

    
221
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
222
    Plugin(inputSampleRate),
223
    m_channels(0),
224
    m_stepSize(256),
225
    m_blockSize(1024),
226
    m_fmin(50),
227
    m_fmax(900),
228
    m_vflen(1),
229
    m_binFrom(0),
230
    m_binTo(0),
231
    m_bins(0)
232
{
233
}
234

    
235
CepstrumPitchTracker::~CepstrumPitchTracker()
236
{
237
}
238

    
239
string
240
CepstrumPitchTracker::getIdentifier() const
241
{
242
    return "cepstrum-pitch";
243
}
244

    
245
string
246
CepstrumPitchTracker::getName() const
247
{
248
    return "Cepstrum Pitch Tracker";
249
}
250

    
251
string
252
CepstrumPitchTracker::getDescription() const
253
{
254
    return "Estimate f0 of monophonic material using a cepstrum method.";
255
}
256

    
257
string
258
CepstrumPitchTracker::getMaker() const
259
{
260
    return "Chris Cannam";
261
}
262

    
263
int
264
CepstrumPitchTracker::getPluginVersion() const
265
{
266
    // Increment this each time you release a version that behaves
267
    // differently from the previous one
268
    return 1;
269
}
270

    
271
string
272
CepstrumPitchTracker::getCopyright() const
273
{
274
    return "Freely redistributable (BSD license)";
275
}
276

    
277
CepstrumPitchTracker::InputDomain
278
CepstrumPitchTracker::getInputDomain() const
279
{
280
    return FrequencyDomain;
281
}
282

    
283
size_t
284
CepstrumPitchTracker::getPreferredBlockSize() const
285
{
286
    return 1024;
287
}
288

    
289
size_t 
290
CepstrumPitchTracker::getPreferredStepSize() const
291
{
292
    return 256;
293
}
294

    
295
size_t
296
CepstrumPitchTracker::getMinChannelCount() const
297
{
298
    return 1;
299
}
300

    
301
size_t
302
CepstrumPitchTracker::getMaxChannelCount() const
303
{
304
    return 1;
305
}
306

    
307
CepstrumPitchTracker::ParameterList
308
CepstrumPitchTracker::getParameterDescriptors() const
309
{
310
    ParameterList list;
311
    return list;
312
}
313

    
314
float
315
CepstrumPitchTracker::getParameter(string identifier) const
316
{
317
    return 0.f;
318
}
319

    
320
void
321
CepstrumPitchTracker::setParameter(string identifier, float value) 
322
{
323
}
324

    
325
CepstrumPitchTracker::ProgramList
326
CepstrumPitchTracker::getPrograms() const
327
{
328
    ProgramList list;
329
    return list;
330
}
331

    
332
string
333
CepstrumPitchTracker::getCurrentProgram() const
334
{
335
    return ""; // no programs
336
}
337

    
338
void
339
CepstrumPitchTracker::selectProgram(string name)
340
{
341
}
342

    
343
CepstrumPitchTracker::OutputList
344
CepstrumPitchTracker::getOutputDescriptors() const
345
{
346
    OutputList outputs;
347

    
348
    int n = 0;
349

    
350
    OutputDescriptor d;
351

    
352
    d.identifier = "f0";
353
    d.name = "Estimated f0";
354
    d.description = "Estimated fundamental frequency";
355
    d.unit = "Hz";
356
    d.hasFixedBinCount = true;
357
    d.binCount = 1;
358
    d.hasKnownExtents = true;
359
    d.minValue = m_fmin;
360
    d.maxValue = m_fmax;
361
    d.isQuantized = false;
362
    d.sampleType = OutputDescriptor::FixedSampleRate;
363
    d.sampleRate = (m_inputSampleRate / m_stepSize);
364
    d.hasDuration = false;
365
    outputs.push_back(d);
366

    
367
    d.identifier = "notes";
368
    d.name = "Notes";
369
    d.description = "Derived fixed-pitch note frequencies";
370
    d.unit = "Hz";
371
    d.hasFixedBinCount = true;
372
    d.binCount = 1;
373
    d.hasKnownExtents = true;
374
    d.minValue = m_fmin;
375
    d.maxValue = m_fmax;
376
    d.isQuantized = false;
377
    d.sampleType = OutputDescriptor::FixedSampleRate;
378
    d.sampleRate = (m_inputSampleRate / m_stepSize);
379
    d.hasDuration = true;
380
    outputs.push_back(d);
381

    
382
    return outputs;
383
}
384

    
385
bool
386
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
387
{
388
    if (channels < getMinChannelCount() ||
389
        channels > getMaxChannelCount()) return false;
390

    
391
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
392
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
393
//              << std::endl;
394

    
395
    m_channels = channels;
396
    m_stepSize = stepSize;
397
    m_blockSize = blockSize;
398

    
399
    m_binFrom = int(m_inputSampleRate / m_fmax);
400
    m_binTo = int(m_inputSampleRate / m_fmin); 
401

    
402
    if (m_binTo >= (int)m_blockSize / 2) {
403
        m_binTo = m_blockSize / 2 - 1;
404
    }
405

    
406
    m_bins = (m_binTo - m_binFrom) + 1;
407

    
408
    reset();
409

    
410
    return true;
411
}
412

    
413
void
414
CepstrumPitchTracker::reset()
415
{
416
}
417

    
418
void
419
CepstrumPitchTracker::filter(const double *cep, double *data)
420
{
421
    for (int i = 0; i < m_bins; ++i) {
422
        double v = 0;
423
        int n = 0;
424
        // average according to the vertical filter length
425
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
426
            int ix = i + m_binFrom + j;
427
            if (ix >= 0 && ix < m_blockSize) {
428
                v += cep[ix];
429
                ++n;
430
            }
431
        }
432
        data[i] = v / n;
433
    }
434
}
435

    
436
double
437
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
438
{
439
    double a0 = y[3] - y[2] - y[0] + y[1];
440
    double a1 = y[0] - y[1] - a0;
441
    double a2 = y[2] - y[0];
442
    double a3 = y[1];
443
    return
444
        a0 * x * x * x +
445
        a1 * x * x +
446
        a2 * x +
447
        a3;
448
}
449

    
450
double
451
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
452
{
453
    if (maxbin < 2 || maxbin > m_bins - 3) {
454
        return maxbin;
455
    }
456

    
457
    double maxval = 0.0;
458
    double maxidx = maxbin;
459

    
460
    const int divisions = 10;
461
    double y[4];
462

    
463
    y[0] = in[maxbin-1];
464
    y[1] = in[maxbin];
465
    y[2] = in[maxbin+1];
466
    y[3] = in[maxbin+2];
467
    for (int i = 0; i < divisions; ++i) {
468
        double probe = double(i) / double(divisions);
469
        double value = cubicInterpolate(y, probe);
470
        if (value > maxval) {
471
            maxval = value; 
472
            maxidx = maxbin + probe;
473
        }
474
    }
475

    
476
    y[3] = y[2];
477
    y[2] = y[1];
478
    y[1] = y[0];
479
    y[0] = in[maxbin-2];
480
    for (int i = 0; i < divisions; ++i) {
481
        double probe = double(i) / double(divisions);
482
        double value = cubicInterpolate(y, probe);
483
        if (value > maxval) {
484
            maxval = value; 
485
            maxidx = maxbin - 1 + probe;
486
        }
487
    }
488

    
489
/*
490
    std::cerr << "centre = " << maxbin << ": ["
491
              << in[maxbin-2] << ","
492
              << in[maxbin-1] << ","
493
              << in[maxbin] << ","
494
              << in[maxbin+1] << ","
495
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
496
*/
497

    
498
    return maxidx;
499
}
500

    
501
CepstrumPitchTracker::FeatureSet
502
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
503
{
504
    FeatureSet fs;
505

    
506
    int bs = m_blockSize;
507
    int hs = m_blockSize/2 + 1;
508

    
509
    double *rawcep = new double[bs];
510
    double *io = new double[bs];
511
    double *logmag = new double[bs];
512

    
513
    // The "inverse symmetric" method. Seems to be the most reliable
514
        
515
    double magmean = 0.0;
516

    
517
    for (int i = 0; i < hs; ++i) {
518

    
519
        double power =
520
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
521
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
522
        double mag = sqrt(power);
523

    
524
        magmean += mag;
525

    
526
        double lm = log(mag + 0.00000001);
527
        
528
        logmag[i] = lm;
529
        if (i > 0) logmag[bs - i] = lm;
530
    }
531

    
532
    magmean /= hs;
533
    double threshold = 0.1; // for magmean
534
    
535
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
536
    
537
    delete[] logmag;
538
    delete[] io;
539

    
540
    int n = m_bins;
541
    double *data = new double[n];
542
    filter(rawcep, data);
543
    delete[] rawcep;
544

    
545
    double maxval = 0.0;
546
    int maxbin = -1;
547

    
548
    for (int i = 0; i < n; ++i) {
549
        if (data[i] > maxval) {
550
            maxval = data[i];
551
            maxbin = i;
552
        }
553
    }
554

    
555
    if (maxbin < 0) {
556
        delete[] data;
557
        return fs;
558
    }
559

    
560
    double nextPeakVal = 0.0;
561
    for (int i = 1; i+1 < n; ++i) {
562
        if (data[i] > data[i-1] &&
563
            data[i] > data[i+1] &&
564
            i != maxbin &&
565
            data[i] > nextPeakVal) {
566
            nextPeakVal = data[i];
567
        }
568
    }
569

    
570
    double cimax = findInterpolatedPeak(data, maxbin);
571
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
572

    
573
    double confidence = 0.0;
574
    if (nextPeakVal != 0.0) {
575
        confidence = (maxval - nextPeakVal) / 100.0;
576
        if (magmean < threshold) confidence = 0.0;
577
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
578
    }
579

    
580
    Hypothesis::Estimate e;
581
    e.freq = peakfreq;
582
    e.time = timestamp;
583
    e.confidence = confidence;
584

    
585
    m_accepted.advanceTime();
586

    
587
    for (int i = 0; i < m_possible.size(); ++i) {
588
        m_possible[i].advanceTime();
589
    }
590

    
591
    if (!m_accepted.test(e)) {
592

    
593
        int candidate = -1;
594
        bool accepted = false;
595

    
596
        for (int i = 0; i < m_possible.size(); ++i) {
597
            if (m_possible[i].test(e)) {
598
                accepted = true;
599
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
600
                    candidate = i;
601
                }
602
                break;
603
            }
604
        }
605

    
606
        if (!accepted) {
607
            Hypothesis h;
608
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
609
            m_possible.push_back(h);
610
        }
611

    
612
        if (m_accepted.getState() == Hypothesis::Expired) {
613
            m_accepted.addFeatures(fs);
614
        }
615
        
616
        if (m_accepted.getState() == Hypothesis::Expired ||
617
            m_accepted.getState() == Hypothesis::Rejected) {
618
            if (candidate >= 0) {
619
                m_accepted = m_possible[candidate];
620
            } else {
621
                m_accepted = Hypothesis();
622
            }
623
        }
624

    
625
        // reap rejected/expired hypotheses from possible list
626
        Hypotheses toReap = m_possible;
627
        m_possible.clear();
628
        for (int i = 0; i < toReap.size(); ++i) {
629
            Hypothesis h = toReap[i];
630
            if (h.getState() != Hypothesis::Rejected && 
631
                h.getState() != Hypothesis::Expired) {
632
                m_possible.push_back(h);
633
            }
634
        }
635
    }  
636

    
637
    std::cerr << "accepted length = " << m_accepted.getPendingLength()
638
              << ", state = " << m_accepted.getState()
639
              << ", hypothesis count = " << m_possible.size() << std::endl;
640

    
641
    delete[] data;
642
    return fs;
643
}
644

    
645
CepstrumPitchTracker::FeatureSet
646
CepstrumPitchTracker::getRemainingFeatures()
647
{
648
    FeatureSet fs;
649
    if (m_accepted.getState() == Hypothesis::Satisfied) {
650
        m_accepted.addFeatures(fs);
651
    }
652
    return fs;
653
}