To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 29:afcd1f4e603c

History | View | Annotate | Download (14.6 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include "vamp-sdk/FFT.h"
26

    
27
#include <vector>
28
#include <algorithm>
29

    
30
#include <cstdio>
31
#include <cmath>
32
#include <complex>
33

    
34
using std::string;
35
using std::vector;
36
using Vamp::RealTime;
37

    
38
CepstrumPitchTracker::Hypothesis::Hypothesis()
39
{
40
    m_state = New;
41
}
42

    
43
CepstrumPitchTracker::Hypothesis::~Hypothesis()
44
{
45
}
46

    
47
bool
48
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
49
{
50
    if (m_pending.empty()) {
51
        return true;
52
    }
53

    
54
    // check we are within a relatively close tolerance of the last
55
    // candidate
56
    Estimate last = m_pending[m_pending.size()-1];
57
    double r = s.freq / last.freq;
58
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
59
    if (cents < -60 || cents > 60) return false;
60

    
61
    // and within a slightly bigger tolerance of the current mean
62
    double meanFreq = getMeanFrequency();
63
    r = s.freq / meanFreq;
64
    cents = lrint(1200.0 * (log(r) / log(2.0)));
65
    if (cents < -80 || cents > 80) return false;
66
    
67
    return true;
68
}
69

    
70
bool
71
CepstrumPitchTracker::Hypothesis::isOutOfDateFor(Estimate s)
72
{
73
    if (m_pending.empty()) return false;
74

    
75
    return ((s.time - m_pending[m_pending.size()-1].time) > 
76
            RealTime::fromMilliseconds(40));
77
}
78

    
79
bool 
80
CepstrumPitchTracker::Hypothesis::isSatisfied()
81
{
82
    if (m_pending.empty()) return false;
83
    
84
    double meanConfidence = 0.0;
85
    for (int i = 0; i < m_pending.size(); ++i) {
86
        meanConfidence += m_pending[i].confidence;
87
    }
88
    meanConfidence /= m_pending.size();
89

    
90
    int lengthRequired = 10000;
91
    if (meanConfidence > 0.0) {
92
        lengthRequired = int(2.0 / meanConfidence + 0.5);
93
    }
94
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
95

    
96
    return (m_pending.size() > lengthRequired);
97
}
98

    
99
bool
100
CepstrumPitchTracker::Hypothesis::accept(Estimate s)
101
{
102
    bool accept = false;
103

    
104
    switch (m_state) {
105

    
106
    case New:
107
        m_state = Provisional;
108
        accept = true;
109
        break;
110

    
111
    case Provisional:
112
        if (isOutOfDateFor(s)) {
113
            m_state = Rejected;
114
        } else if (isWithinTolerance(s)) {
115
            accept = true;
116
        }
117
        break;
118
        
119
    case Satisfied:
120
        if (isOutOfDateFor(s)) {
121
            m_state = Expired;
122
        } else if (isWithinTolerance(s)) {
123
            accept = true;
124
        }
125
        break;
126

    
127
    case Rejected:
128
        break;
129

    
130
    case Expired:
131
        break;
132
    }
133

    
134
    if (accept) {
135
        m_pending.push_back(s);
136
        if (m_state == Provisional && isSatisfied()) {
137
            m_state = Satisfied;
138
        }
139
    }
140

    
141
    return accept;
142
}        
143

    
144
CepstrumPitchTracker::Hypothesis::State
145
CepstrumPitchTracker::Hypothesis::getState()
146
{
147
    return m_state;
148
}
149

    
150
CepstrumPitchTracker::Hypothesis::Estimates
151
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
152
{
153
    if (m_state == Satisfied || m_state == Expired) {
154
        return m_pending;
155
    } else {
156
        return Estimates();
157
    }
158
}
159

    
160
double
161
CepstrumPitchTracker::Hypothesis::getMeanFrequency()
162
{
163
    double acc = 0.0;
164
    for (int i = 0; i < m_pending.size(); ++i) {
165
        acc += m_pending[i].freq;
166
    }
167
    acc /= m_pending.size();
168
    return acc;
169
}
170

    
171
CepstrumPitchTracker::Hypothesis::Note
172
CepstrumPitchTracker::Hypothesis::getAveragedNote()
173
{
174
    Note n;
175

    
176
    if (!(m_state == Satisfied || m_state == Expired)) {
177
        n.freq = 0.0;
178
        n.time = RealTime::zeroTime;
179
        n.duration = RealTime::zeroTime;
180
        return n;
181
    }
182

    
183
    n.time = m_pending.begin()->time;
184

    
185
    Estimates::iterator i = m_pending.end();
186
    --i;
187
    n.duration = i->time - n.time;
188

    
189
    // just mean frequency for now, but this isn't at all right perceptually
190
    n.freq = getMeanFrequency();
191
    
192
    return n;
193
}
194

    
195
void
196
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
197
{
198
    for (int i = 0; i < m_pending.size(); ++i) {
199
        Feature f;
200
        f.hasTimestamp = true;
201
        f.timestamp = m_pending[i].time;
202
        f.values.push_back(m_pending[i].freq);
203
        fs[0].push_back(f);
204
    }
205

    
206
    Feature nf;
207
    nf.hasTimestamp = true;
208
    nf.hasDuration = true;
209
    Note n = getAveragedNote();
210
    nf.timestamp = n.time;
211
    nf.duration = n.duration;
212
    nf.values.push_back(n.freq);
213
    fs[1].push_back(nf);
214
}
215

    
216
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
217
    Plugin(inputSampleRate),
218
    m_channels(0),
219
    m_stepSize(256),
220
    m_blockSize(1024),
221
    m_fmin(50),
222
    m_fmax(900),
223
    m_vflen(1),
224
    m_binFrom(0),
225
    m_binTo(0),
226
    m_bins(0)
227
{
228
}
229

    
230
CepstrumPitchTracker::~CepstrumPitchTracker()
231
{
232
}
233

    
234
string
235
CepstrumPitchTracker::getIdentifier() const
236
{
237
    return "cepstrum-pitch";
238
}
239

    
240
string
241
CepstrumPitchTracker::getName() const
242
{
243
    return "Cepstrum Pitch Tracker";
244
}
245

    
246
string
247
CepstrumPitchTracker::getDescription() const
248
{
249
    return "Estimate f0 of monophonic material using a cepstrum method.";
250
}
251

    
252
string
253
CepstrumPitchTracker::getMaker() const
254
{
255
    return "Chris Cannam";
256
}
257

    
258
int
259
CepstrumPitchTracker::getPluginVersion() const
260
{
261
    // Increment this each time you release a version that behaves
262
    // differently from the previous one
263
    return 1;
264
}
265

    
266
string
267
CepstrumPitchTracker::getCopyright() const
268
{
269
    return "Freely redistributable (BSD license)";
270
}
271

    
272
CepstrumPitchTracker::InputDomain
273
CepstrumPitchTracker::getInputDomain() const
274
{
275
    return FrequencyDomain;
276
}
277

    
278
size_t
279
CepstrumPitchTracker::getPreferredBlockSize() const
280
{
281
    return 1024;
282
}
283

    
284
size_t 
285
CepstrumPitchTracker::getPreferredStepSize() const
286
{
287
    return 256;
288
}
289

    
290
size_t
291
CepstrumPitchTracker::getMinChannelCount() const
292
{
293
    return 1;
294
}
295

    
296
size_t
297
CepstrumPitchTracker::getMaxChannelCount() const
298
{
299
    return 1;
300
}
301

    
302
CepstrumPitchTracker::ParameterList
303
CepstrumPitchTracker::getParameterDescriptors() const
304
{
305
    ParameterList list;
306
    return list;
307
}
308

    
309
float
310
CepstrumPitchTracker::getParameter(string identifier) const
311
{
312
    return 0.f;
313
}
314

    
315
void
316
CepstrumPitchTracker::setParameter(string identifier, float value) 
317
{
318
}
319

    
320
CepstrumPitchTracker::ProgramList
321
CepstrumPitchTracker::getPrograms() const
322
{
323
    ProgramList list;
324
    return list;
325
}
326

    
327
string
328
CepstrumPitchTracker::getCurrentProgram() const
329
{
330
    return ""; // no programs
331
}
332

    
333
void
334
CepstrumPitchTracker::selectProgram(string name)
335
{
336
}
337

    
338
CepstrumPitchTracker::OutputList
339
CepstrumPitchTracker::getOutputDescriptors() const
340
{
341
    OutputList outputs;
342

    
343
    int n = 0;
344

    
345
    OutputDescriptor d;
346

    
347
    d.identifier = "f0";
348
    d.name = "Estimated f0";
349
    d.description = "Estimated fundamental frequency";
350
    d.unit = "Hz";
351
    d.hasFixedBinCount = true;
352
    d.binCount = 1;
353
    d.hasKnownExtents = true;
354
    d.minValue = m_fmin;
355
    d.maxValue = m_fmax;
356
    d.isQuantized = false;
357
    d.sampleType = OutputDescriptor::FixedSampleRate;
358
    d.sampleRate = (m_inputSampleRate / m_stepSize);
359
    d.hasDuration = false;
360
    outputs.push_back(d);
361

    
362
    d.identifier = "notes";
363
    d.name = "Notes";
364
    d.description = "Derived fixed-pitch note frequencies";
365
    d.unit = "Hz";
366
    d.hasFixedBinCount = true;
367
    d.binCount = 1;
368
    d.hasKnownExtents = true;
369
    d.minValue = m_fmin;
370
    d.maxValue = m_fmax;
371
    d.isQuantized = false;
372
    d.sampleType = OutputDescriptor::FixedSampleRate;
373
    d.sampleRate = (m_inputSampleRate / m_stepSize);
374
    d.hasDuration = true;
375
    outputs.push_back(d);
376

    
377
    return outputs;
378
}
379

    
380
bool
381
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
382
{
383
    if (channels < getMinChannelCount() ||
384
        channels > getMaxChannelCount()) return false;
385

    
386
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
387
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
388
//              << std::endl;
389

    
390
    m_channels = channels;
391
    m_stepSize = stepSize;
392
    m_blockSize = blockSize;
393

    
394
    m_binFrom = int(m_inputSampleRate / m_fmax);
395
    m_binTo = int(m_inputSampleRate / m_fmin); 
396

    
397
    if (m_binTo >= (int)m_blockSize / 2) {
398
        m_binTo = m_blockSize / 2 - 1;
399
    }
400

    
401
    m_bins = (m_binTo - m_binFrom) + 1;
402

    
403
    reset();
404

    
405
    return true;
406
}
407

    
408
void
409
CepstrumPitchTracker::reset()
410
{
411
}
412

    
413
void
414
CepstrumPitchTracker::filter(const double *cep, double *data)
415
{
416
    for (int i = 0; i < m_bins; ++i) {
417
        double v = 0;
418
        int n = 0;
419
        // average according to the vertical filter length
420
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
421
            int ix = i + m_binFrom + j;
422
            if (ix >= 0 && ix < m_blockSize) {
423
                v += cep[ix];
424
                ++n;
425
            }
426
        }
427
        data[i] = v / n;
428
    }
429
}
430

    
431
double
432
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
433
{
434
    double a0 = y[3] - y[2] - y[0] + y[1];
435
    double a1 = y[0] - y[1] - a0;
436
    double a2 = y[2] - y[0];
437
    double a3 = y[1];
438
    return
439
        a0 * x * x * x +
440
        a1 * x * x +
441
        a2 * x +
442
        a3;
443
}
444

    
445
double
446
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
447
{
448
    if (maxbin < 2 || maxbin > m_bins - 3) {
449
        return maxbin;
450
    }
451

    
452
    double maxval = 0.0;
453
    double maxidx = maxbin;
454

    
455
    const int divisions = 10;
456
    double y[4];
457

    
458
    y[0] = in[maxbin-1];
459
    y[1] = in[maxbin];
460
    y[2] = in[maxbin+1];
461
    y[3] = in[maxbin+2];
462
    for (int i = 0; i < divisions; ++i) {
463
        double probe = double(i) / double(divisions);
464
        double value = cubicInterpolate(y, probe);
465
        if (value > maxval) {
466
            maxval = value; 
467
            maxidx = maxbin + probe;
468
        }
469
    }
470

    
471
    y[3] = y[2];
472
    y[2] = y[1];
473
    y[1] = y[0];
474
    y[0] = in[maxbin-2];
475
    for (int i = 0; i < divisions; ++i) {
476
        double probe = double(i) / double(divisions);
477
        double value = cubicInterpolate(y, probe);
478
        if (value > maxval) {
479
            maxval = value; 
480
            maxidx = maxbin - 1 + probe;
481
        }
482
    }
483

    
484
/*
485
    std::cerr << "centre = " << maxbin << ": ["
486
              << in[maxbin-2] << ","
487
              << in[maxbin-1] << ","
488
              << in[maxbin] << ","
489
              << in[maxbin+1] << ","
490
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
491
*/
492

    
493
    return maxidx;
494
}
495

    
496
CepstrumPitchTracker::FeatureSet
497
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
498
{
499
    FeatureSet fs;
500

    
501
    int bs = m_blockSize;
502
    int hs = m_blockSize/2 + 1;
503

    
504
    double *rawcep = new double[bs];
505
    double *io = new double[bs];
506
    double *logmag = new double[bs];
507

    
508
    // The "inverse symmetric" method. Seems to be the most reliable
509
        
510
    double magmean = 0.0;
511

    
512
    for (int i = 0; i < hs; ++i) {
513

    
514
        double power =
515
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
516
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
517
        double mag = sqrt(power);
518

    
519
        magmean += mag;
520

    
521
        double lm = log(mag + 0.00000001);
522
        
523
        logmag[i] = lm;
524
        if (i > 0) logmag[bs - i] = lm;
525
    }
526

    
527
    magmean /= hs;
528
    double threshold = 0.1; // for magmean
529
    
530
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
531
    
532
    delete[] logmag;
533
    delete[] io;
534

    
535
    int n = m_bins;
536
    double *data = new double[n];
537
    filter(rawcep, data);
538
    delete[] rawcep;
539

    
540
    double maxval = 0.0;
541
    int maxbin = -1;
542

    
543
    for (int i = 0; i < n; ++i) {
544
        if (data[i] > maxval) {
545
            maxval = data[i];
546
            maxbin = i;
547
        }
548
    }
549

    
550
    if (maxbin < 0) {
551
        delete[] data;
552
        return fs;
553
    }
554

    
555
    double nextPeakVal = 0.0;
556
    for (int i = 1; i+1 < n; ++i) {
557
        if (data[i] > data[i-1] &&
558
            data[i] > data[i+1] &&
559
            i != maxbin &&
560
            data[i] > nextPeakVal) {
561
            nextPeakVal = data[i];
562
        }
563
    }
564

    
565
    double cimax = findInterpolatedPeak(data, maxbin);
566
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
567

    
568
    double confidence = 0.0;
569
    if (nextPeakVal != 0.0) {
570
        confidence = (maxval - nextPeakVal) * 10.0;
571
        if (magmean < threshold) confidence = 0.0;
572
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
573
    }
574

    
575
    Hypothesis::Estimate e;
576
    e.freq = peakfreq;
577
    e.time = timestamp;
578
    e.confidence = confidence;
579

    
580
//    m_good.advanceTime();
581
    for (int i = 0; i < m_possible.size(); ++i) {
582
//        m_possible[i].advanceTime();
583
    }
584

    
585
    if (!m_good.accept(e)) {
586

    
587
        int candidate = -1;
588
        bool accepted = false;
589

    
590
        for (int i = 0; i < m_possible.size(); ++i) {
591
            if (m_possible[i].accept(e)) {
592
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
593
                    accepted = true;
594
                    candidate = i;
595
                }
596
                break;
597
            }
598
        }
599

    
600
        if (!accepted) {
601
            Hypothesis h;
602
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
603
            m_possible.push_back(h);
604
        }
605

    
606
        if (m_good.getState() == Hypothesis::Expired) {
607
            m_good.addFeatures(fs);
608
        }
609
        
610
        if (m_good.getState() == Hypothesis::Expired ||
611
            m_good.getState() == Hypothesis::Rejected) {
612
            if (candidate >= 0) {
613
                m_good = m_possible[candidate];
614
            } else {
615
                m_good = Hypothesis();
616
            }
617
        }
618

    
619
        // reap rejected/expired hypotheses from possible list
620
        Hypotheses toReap = m_possible;
621
        m_possible.clear();
622
        for (int i = 0; i < toReap.size(); ++i) {
623
            Hypothesis h = toReap[i];
624
            if (h.getState() != Hypothesis::Rejected && 
625
                h.getState() != Hypothesis::Expired) {
626
                m_possible.push_back(h);
627
            }
628
        }
629
    }  
630

    
631
    delete[] data;
632
    return fs;
633
}
634

    
635
CepstrumPitchTracker::FeatureSet
636
CepstrumPitchTracker::getRemainingFeatures()
637
{
638
    FeatureSet fs;
639
    if (m_good.getState() == Hypothesis::Satisfied) {
640
        m_good.addFeatures(fs);
641
    }
642
    return fs;
643
}