To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 30:2554aab152a5

History | View | Annotate | Download (14.5 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include "vamp-sdk/FFT.h"
26

    
27
#include <vector>
28
#include <algorithm>
29

    
30
#include <cstdio>
31
#include <cmath>
32
#include <complex>
33

    
34
using std::string;
35
using std::vector;
36
using Vamp::RealTime;
37

    
38
CepstrumPitchTracker::Hypothesis::Hypothesis()
39
{
40
    m_state = New;
41
}
42

    
43
CepstrumPitchTracker::Hypothesis::~Hypothesis()
44
{
45
}
46

    
47
bool
48
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s) const
49
{
50
    if (m_pending.empty()) {
51
        return true;
52
    }
53

    
54
    // check we are within a relatively close tolerance of the last
55
    // candidate
56
    Estimate last = m_pending[m_pending.size()-1];
57
    double r = s.freq / last.freq;
58
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
59
    if (cents < -60 || cents > 60) return false;
60

    
61
    // and within a slightly bigger tolerance of the current mean
62
    double meanFreq = getMeanFrequency();
63
    r = s.freq / meanFreq;
64
    cents = lrint(1200.0 * (log(r) / log(2.0)));
65
    if (cents < -80 || cents > 80) return false;
66
    
67
    return true;
68
}
69

    
70
bool
71
CepstrumPitchTracker::Hypothesis::isOutOfDateFor(Estimate s) const
72
{
73
    if (m_pending.empty()) return false;
74

    
75
    return ((s.time - m_pending[m_pending.size()-1].time) > 
76
            RealTime::fromMilliseconds(40));
77
}
78

    
79
bool 
80
CepstrumPitchTracker::Hypothesis::isSatisfied() const
81
{
82
    if (m_pending.empty()) return false;
83
    
84
    double meanConfidence = 0.0;
85
    for (int i = 0; i < m_pending.size(); ++i) {
86
        meanConfidence += m_pending[i].confidence;
87
    }
88
    meanConfidence /= m_pending.size();
89

    
90
    int lengthRequired = 10000;
91
    if (meanConfidence > 0.0) {
92
        lengthRequired = int(2.0 / meanConfidence + 0.5);
93
    }
94

    
95
    return (m_pending.size() > lengthRequired);
96
}
97

    
98
bool
99
CepstrumPitchTracker::Hypothesis::accept(Estimate s)
100
{
101
    bool accept = false;
102

    
103
    switch (m_state) {
104

    
105
    case New:
106
        m_state = Provisional;
107
        accept = true;
108
        break;
109

    
110
    case Provisional:
111
        if (isOutOfDateFor(s)) {
112
            m_state = Rejected;
113
        } else if (isWithinTolerance(s)) {
114
            accept = true;
115
        }
116
        break;
117
        
118
    case Satisfied:
119
        if (isOutOfDateFor(s)) {
120
            m_state = Expired;
121
        } else if (isWithinTolerance(s)) {
122
            accept = true;
123
        }
124
        break;
125

    
126
    case Rejected:
127
        break;
128

    
129
    case Expired:
130
        break;
131
    }
132

    
133
    if (accept) {
134
        m_pending.push_back(s);
135
        if (m_state == Provisional && isSatisfied()) {
136
            m_state = Satisfied;
137
        }
138
    }
139

    
140
    return accept;
141
}        
142

    
143
CepstrumPitchTracker::Hypothesis::State
144
CepstrumPitchTracker::Hypothesis::getState() const
145
{
146
    return m_state;
147
}
148

    
149
CepstrumPitchTracker::Hypothesis::Estimates
150
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates() const
151
{
152
    if (m_state == Satisfied || m_state == Expired) {
153
        return m_pending;
154
    } else {
155
        return Estimates();
156
    }
157
}
158

    
159
double
160
CepstrumPitchTracker::Hypothesis::getMeanFrequency() const
161
{
162
    double acc = 0.0;
163
    for (int i = 0; i < m_pending.size(); ++i) {
164
        acc += m_pending[i].freq;
165
    }
166
    acc /= m_pending.size();
167
    return acc;
168
}
169

    
170
CepstrumPitchTracker::Hypothesis::Note
171
CepstrumPitchTracker::Hypothesis::getAveragedNote() const
172
{
173
    Note n;
174

    
175
    if (!(m_state == Satisfied || m_state == Expired)) {
176
        n.freq = 0.0;
177
        n.time = RealTime::zeroTime;
178
        n.duration = RealTime::zeroTime;
179
        return n;
180
    }
181

    
182
    n.time = m_pending.begin()->time;
183

    
184
    Estimates::const_iterator i = m_pending.end();
185
    --i;
186
    n.duration = i->time - n.time;
187

    
188
    // just mean frequency for now, but this isn't at all right perceptually
189
    n.freq = getMeanFrequency();
190
    
191
    return n;
192
}
193

    
194
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
195
    Plugin(inputSampleRate),
196
    m_channels(0),
197
    m_stepSize(256),
198
    m_blockSize(1024),
199
    m_fmin(50),
200
    m_fmax(900),
201
    m_vflen(1),
202
    m_binFrom(0),
203
    m_binTo(0),
204
    m_bins(0)
205
{
206
}
207

    
208
CepstrumPitchTracker::~CepstrumPitchTracker()
209
{
210
}
211

    
212
string
213
CepstrumPitchTracker::getIdentifier() const
214
{
215
    return "cepstrum-pitch";
216
}
217

    
218
string
219
CepstrumPitchTracker::getName() const
220
{
221
    return "Cepstrum Pitch Tracker";
222
}
223

    
224
string
225
CepstrumPitchTracker::getDescription() const
226
{
227
    return "Estimate f0 of monophonic material using a cepstrum method.";
228
}
229

    
230
string
231
CepstrumPitchTracker::getMaker() const
232
{
233
    return "Chris Cannam";
234
}
235

    
236
int
237
CepstrumPitchTracker::getPluginVersion() const
238
{
239
    // Increment this each time you release a version that behaves
240
    // differently from the previous one
241
    return 1;
242
}
243

    
244
string
245
CepstrumPitchTracker::getCopyright() const
246
{
247
    return "Freely redistributable (BSD license)";
248
}
249

    
250
CepstrumPitchTracker::InputDomain
251
CepstrumPitchTracker::getInputDomain() const
252
{
253
    return FrequencyDomain;
254
}
255

    
256
size_t
257
CepstrumPitchTracker::getPreferredBlockSize() const
258
{
259
    return 1024;
260
}
261

    
262
size_t 
263
CepstrumPitchTracker::getPreferredStepSize() const
264
{
265
    return 256;
266
}
267

    
268
size_t
269
CepstrumPitchTracker::getMinChannelCount() const
270
{
271
    return 1;
272
}
273

    
274
size_t
275
CepstrumPitchTracker::getMaxChannelCount() const
276
{
277
    return 1;
278
}
279

    
280
CepstrumPitchTracker::ParameterList
281
CepstrumPitchTracker::getParameterDescriptors() const
282
{
283
    ParameterList list;
284
    return list;
285
}
286

    
287
float
288
CepstrumPitchTracker::getParameter(string identifier) const
289
{
290
    return 0.f;
291
}
292

    
293
void
294
CepstrumPitchTracker::setParameter(string identifier, float value) 
295
{
296
}
297

    
298
CepstrumPitchTracker::ProgramList
299
CepstrumPitchTracker::getPrograms() const
300
{
301
    ProgramList list;
302
    return list;
303
}
304

    
305
string
306
CepstrumPitchTracker::getCurrentProgram() const
307
{
308
    return ""; // no programs
309
}
310

    
311
void
312
CepstrumPitchTracker::selectProgram(string name)
313
{
314
}
315

    
316
CepstrumPitchTracker::OutputList
317
CepstrumPitchTracker::getOutputDescriptors() const
318
{
319
    OutputList outputs;
320

    
321
    int n = 0;
322

    
323
    OutputDescriptor d;
324

    
325
    d.identifier = "f0";
326
    d.name = "Estimated f0";
327
    d.description = "Estimated fundamental frequency";
328
    d.unit = "Hz";
329
    d.hasFixedBinCount = true;
330
    d.binCount = 1;
331
    d.hasKnownExtents = true;
332
    d.minValue = m_fmin;
333
    d.maxValue = m_fmax;
334
    d.isQuantized = false;
335
    d.sampleType = OutputDescriptor::FixedSampleRate;
336
    d.sampleRate = (m_inputSampleRate / m_stepSize);
337
    d.hasDuration = false;
338
    outputs.push_back(d);
339

    
340
    d.identifier = "notes";
341
    d.name = "Notes";
342
    d.description = "Derived fixed-pitch note frequencies";
343
    d.unit = "Hz";
344
    d.hasFixedBinCount = true;
345
    d.binCount = 1;
346
    d.hasKnownExtents = true;
347
    d.minValue = m_fmin;
348
    d.maxValue = m_fmax;
349
    d.isQuantized = false;
350
    d.sampleType = OutputDescriptor::FixedSampleRate;
351
    d.sampleRate = (m_inputSampleRate / m_stepSize);
352
    d.hasDuration = true;
353
    outputs.push_back(d);
354

    
355
    return outputs;
356
}
357

    
358
bool
359
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
360
{
361
    if (channels < getMinChannelCount() ||
362
        channels > getMaxChannelCount()) return false;
363

    
364
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
365
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
366
//              << std::endl;
367

    
368
    m_channels = channels;
369
    m_stepSize = stepSize;
370
    m_blockSize = blockSize;
371

    
372
    m_binFrom = int(m_inputSampleRate / m_fmax);
373
    m_binTo = int(m_inputSampleRate / m_fmin); 
374

    
375
    if (m_binTo >= (int)m_blockSize / 2) {
376
        m_binTo = m_blockSize / 2 - 1;
377
    }
378

    
379
    m_bins = (m_binTo - m_binFrom) + 1;
380

    
381
    reset();
382

    
383
    return true;
384
}
385

    
386
void
387
CepstrumPitchTracker::reset()
388
{
389
}
390

    
391
void
392
CepstrumPitchTracker::addFeaturesFrom(Hypothesis h, FeatureSet &fs)
393
{
394
    Hypothesis::Estimates es = h.getAcceptedEstimates();
395

    
396
    for (int i = 0; i < es.size(); ++i) {
397
        Feature f;
398
        f.hasTimestamp = true;
399
        f.timestamp = es[i].time;
400
        f.values.push_back(es[i].freq);
401
        fs[0].push_back(f);
402
    }
403

    
404
    Feature nf;
405
    nf.hasTimestamp = true;
406
    nf.hasDuration = true;
407
    Hypothesis::Note n = h.getAveragedNote();
408
    nf.timestamp = n.time;
409
    nf.duration = n.duration;
410
    nf.values.push_back(n.freq);
411
    fs[1].push_back(nf);
412
}
413

    
414
void
415
CepstrumPitchTracker::filter(const double *cep, double *data)
416
{
417
    for (int i = 0; i < m_bins; ++i) {
418
        double v = 0;
419
        int n = 0;
420
        // average according to the vertical filter length
421
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
422
            int ix = i + m_binFrom + j;
423
            if (ix >= 0 && ix < m_blockSize) {
424
                v += cep[ix];
425
                ++n;
426
            }
427
        }
428
        data[i] = v / n;
429
    }
430
}
431

    
432
double
433
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
434
{
435
    double a0 = y[3] - y[2] - y[0] + y[1];
436
    double a1 = y[0] - y[1] - a0;
437
    double a2 = y[2] - y[0];
438
    double a3 = y[1];
439
    return
440
        a0 * x * x * x +
441
        a1 * x * x +
442
        a2 * x +
443
        a3;
444
}
445

    
446
double
447
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
448
{
449
    if (maxbin < 2 || maxbin > m_bins - 3) {
450
        return maxbin;
451
    }
452

    
453
    double maxval = 0.0;
454
    double maxidx = maxbin;
455

    
456
    const int divisions = 10;
457
    double y[4];
458

    
459
    y[0] = in[maxbin-1];
460
    y[1] = in[maxbin];
461
    y[2] = in[maxbin+1];
462
    y[3] = in[maxbin+2];
463
    for (int i = 0; i < divisions; ++i) {
464
        double probe = double(i) / double(divisions);
465
        double value = cubicInterpolate(y, probe);
466
        if (value > maxval) {
467
            maxval = value; 
468
            maxidx = maxbin + probe;
469
        }
470
    }
471

    
472
    y[3] = y[2];
473
    y[2] = y[1];
474
    y[1] = y[0];
475
    y[0] = in[maxbin-2];
476
    for (int i = 0; i < divisions; ++i) {
477
        double probe = double(i) / double(divisions);
478
        double value = cubicInterpolate(y, probe);
479
        if (value > maxval) {
480
            maxval = value; 
481
            maxidx = maxbin - 1 + probe;
482
        }
483
    }
484

    
485
/*
486
    std::cerr << "centre = " << maxbin << ": ["
487
              << in[maxbin-2] << ","
488
              << in[maxbin-1] << ","
489
              << in[maxbin] << ","
490
              << in[maxbin+1] << ","
491
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
492
*/
493

    
494
    return maxidx;
495
}
496

    
497
CepstrumPitchTracker::FeatureSet
498
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
499
{
500
    FeatureSet fs;
501

    
502
    int bs = m_blockSize;
503
    int hs = m_blockSize/2 + 1;
504

    
505
    double *rawcep = new double[bs];
506
    double *io = new double[bs];
507
    double *logmag = new double[bs];
508

    
509
    // The "inverse symmetric" method. Seems to be the most reliable
510
        
511
    double magmean = 0.0;
512

    
513
    for (int i = 0; i < hs; ++i) {
514

    
515
        double power =
516
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
517
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
518
        double mag = sqrt(power);
519

    
520
        magmean += mag;
521

    
522
        double lm = log(mag + 0.00000001);
523
        
524
        logmag[i] = lm;
525
        if (i > 0) logmag[bs - i] = lm;
526
    }
527

    
528
    magmean /= hs;
529
    double threshold = 0.1; // for magmean
530
    
531
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
532
    
533
    delete[] logmag;
534
    delete[] io;
535

    
536
    int n = m_bins;
537
    double *data = new double[n];
538
    filter(rawcep, data);
539
    delete[] rawcep;
540

    
541
    double maxval = 0.0;
542
    int maxbin = -1;
543

    
544
    for (int i = 0; i < n; ++i) {
545
        if (data[i] > maxval) {
546
            maxval = data[i];
547
            maxbin = i;
548
        }
549
    }
550

    
551
    if (maxbin < 0) {
552
        delete[] data;
553
        return fs;
554
    }
555

    
556
    double nextPeakVal = 0.0;
557
    for (int i = 1; i+1 < n; ++i) {
558
        if (data[i] > data[i-1] &&
559
            data[i] > data[i+1] &&
560
            i != maxbin &&
561
            data[i] > nextPeakVal) {
562
            nextPeakVal = data[i];
563
        }
564
    }
565

    
566
    double cimax = findInterpolatedPeak(data, maxbin);
567
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
568

    
569
    double confidence = 0.0;
570
    if (nextPeakVal != 0.0) {
571
        confidence = (maxval - nextPeakVal) * 10.0;
572
        if (magmean < threshold) confidence = 0.0;
573
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
574
    }
575

    
576
    Hypothesis::Estimate e;
577
    e.freq = peakfreq;
578
    e.time = timestamp;
579
    e.confidence = confidence;
580

    
581
//    m_good.advanceTime();
582
    for (int i = 0; i < m_possible.size(); ++i) {
583
//        m_possible[i].advanceTime();
584
    }
585

    
586
    if (!m_good.accept(e)) {
587

    
588
        int candidate = -1;
589
        bool accepted = false;
590

    
591
        for (int i = 0; i < m_possible.size(); ++i) {
592
            if (m_possible[i].accept(e)) {
593
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
594
                    accepted = true;
595
                    candidate = i;
596
                }
597
                break;
598
            }
599
        }
600

    
601
        if (!accepted) {
602
            Hypothesis h;
603
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
604
            m_possible.push_back(h);
605
        }
606

    
607
        if (m_good.getState() == Hypothesis::Expired) {
608
            addFeaturesFrom(m_good, fs);
609
        }
610
        
611
        if (m_good.getState() == Hypothesis::Expired ||
612
            m_good.getState() == Hypothesis::Rejected) {
613
            if (candidate >= 0) {
614
                m_good = m_possible[candidate];
615
            } else {
616
                m_good = Hypothesis();
617
            }
618
        }
619

    
620
        // reap rejected/expired hypotheses from possible list
621
        Hypotheses toReap = m_possible;
622
        m_possible.clear();
623
        for (int i = 0; i < toReap.size(); ++i) {
624
            Hypothesis h = toReap[i];
625
            if (h.getState() != Hypothesis::Rejected && 
626
                h.getState() != Hypothesis::Expired) {
627
                m_possible.push_back(h);
628
            }
629
        }
630
    }  
631

    
632
    delete[] data;
633
    return fs;
634
}
635

    
636
CepstrumPitchTracker::FeatureSet
637
CepstrumPitchTracker::getRemainingFeatures()
638
{
639
    FeatureSet fs;
640
    if (m_good.getState() == Hypothesis::Satisfied) {
641
        addFeaturesFrom(m_good, fs);
642
    }
643
    return fs;
644
}