To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstralPitchTracker.cpp @ 31:2c175adf8736

History | View | Annotate | Download (14.6 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    This file is Copyright (c) 2012 Chris Cannam
4
  
5
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24

    
25
#include "CepstralPitchTracker.h"
26

    
27
#include "vamp-sdk/FFT.h"
28

    
29
#include <vector>
30
#include <algorithm>
31

    
32
#include <cstdio>
33
#include <cmath>
34
#include <complex>
35

    
36
using std::string;
37
using std::vector;
38
using Vamp::RealTime;
39

    
40
CepstralPitchTracker::Hypothesis::Hypothesis()
41
{
42
    m_state = New;
43
}
44

    
45
CepstralPitchTracker::Hypothesis::~Hypothesis()
46
{
47
}
48

    
49
bool
50
CepstralPitchTracker::Hypothesis::isWithinTolerance(Estimate s) const
51
{
52
    if (m_pending.empty()) {
53
        return true;
54
    }
55

    
56
    // check we are within a relatively close tolerance of the last
57
    // candidate
58
    Estimate last = m_pending[m_pending.size()-1];
59
    double r = s.freq / last.freq;
60
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
61
    if (cents < -60 || cents > 60) return false;
62

    
63
    // and within a slightly bigger tolerance of the current mean
64
    double meanFreq = getMeanFrequency();
65
    r = s.freq / meanFreq;
66
    cents = lrint(1200.0 * (log(r) / log(2.0)));
67
    if (cents < -80 || cents > 80) return false;
68
    
69
    return true;
70
}
71

    
72
bool
73
CepstralPitchTracker::Hypothesis::isOutOfDateFor(Estimate s) const
74
{
75
    if (m_pending.empty()) return false;
76

    
77
    return ((s.time - m_pending[m_pending.size()-1].time) > 
78
            RealTime::fromMilliseconds(40));
79
}
80

    
81
bool 
82
CepstralPitchTracker::Hypothesis::isSatisfied() const
83
{
84
    if (m_pending.empty()) return false;
85
    
86
    double meanConfidence = 0.0;
87
    for (int i = 0; i < m_pending.size(); ++i) {
88
        meanConfidence += m_pending[i].confidence;
89
    }
90
    meanConfidence /= m_pending.size();
91

    
92
    int lengthRequired = 10000;
93
    if (meanConfidence > 0.0) {
94
        lengthRequired = int(2.0 / meanConfidence + 0.5);
95
    }
96

    
97
    return (m_pending.size() > lengthRequired);
98
}
99

    
100
bool
101
CepstralPitchTracker::Hypothesis::accept(Estimate s)
102
{
103
    bool accept = false;
104

    
105
    switch (m_state) {
106

    
107
    case New:
108
        m_state = Provisional;
109
        accept = true;
110
        break;
111

    
112
    case Provisional:
113
        if (isOutOfDateFor(s)) {
114
            m_state = Rejected;
115
        } else if (isWithinTolerance(s)) {
116
            accept = true;
117
        }
118
        break;
119
        
120
    case Satisfied:
121
        if (isOutOfDateFor(s)) {
122
            m_state = Expired;
123
        } else if (isWithinTolerance(s)) {
124
            accept = true;
125
        }
126
        break;
127

    
128
    case Rejected:
129
        break;
130

    
131
    case Expired:
132
        break;
133
    }
134

    
135
    if (accept) {
136
        m_pending.push_back(s);
137
        if (m_state == Provisional && isSatisfied()) {
138
            m_state = Satisfied;
139
        }
140
    }
141

    
142
    return accept;
143
}        
144

    
145
CepstralPitchTracker::Hypothesis::State
146
CepstralPitchTracker::Hypothesis::getState() const
147
{
148
    return m_state;
149
}
150

    
151
CepstralPitchTracker::Hypothesis::Estimates
152
CepstralPitchTracker::Hypothesis::getAcceptedEstimates() const
153
{
154
    if (m_state == Satisfied || m_state == Expired) {
155
        return m_pending;
156
    } else {
157
        return Estimates();
158
    }
159
}
160

    
161
double
162
CepstralPitchTracker::Hypothesis::getMeanFrequency() const
163
{
164
    double acc = 0.0;
165
    for (int i = 0; i < m_pending.size(); ++i) {
166
        acc += m_pending[i].freq;
167
    }
168
    acc /= m_pending.size();
169
    return acc;
170
}
171

    
172
CepstralPitchTracker::Hypothesis::Note
173
CepstralPitchTracker::Hypothesis::getAveragedNote() const
174
{
175
    Note n;
176

    
177
    if (!(m_state == Satisfied || m_state == Expired)) {
178
        n.freq = 0.0;
179
        n.time = RealTime::zeroTime;
180
        n.duration = RealTime::zeroTime;
181
        return n;
182
    }
183

    
184
    n.time = m_pending.begin()->time;
185

    
186
    Estimates::const_iterator i = m_pending.end();
187
    --i;
188
    n.duration = i->time - n.time;
189

    
190
    // just mean frequency for now, but this isn't at all right perceptually
191
    n.freq = getMeanFrequency();
192
    
193
    return n;
194
}
195

    
196
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
197
    Plugin(inputSampleRate),
198
    m_channels(0),
199
    m_stepSize(256),
200
    m_blockSize(1024),
201
    m_fmin(50),
202
    m_fmax(900),
203
    m_vflen(1),
204
    m_binFrom(0),
205
    m_binTo(0),
206
    m_bins(0)
207
{
208
}
209

    
210
CepstralPitchTracker::~CepstralPitchTracker()
211
{
212
}
213

    
214
string
215
CepstralPitchTracker::getIdentifier() const
216
{
217
    return "cepstrum-pitch";
218
}
219

    
220
string
221
CepstralPitchTracker::getName() const
222
{
223
    return "Cepstrum Pitch Tracker";
224
}
225

    
226
string
227
CepstralPitchTracker::getDescription() const
228
{
229
    return "Estimate f0 of monophonic material using a cepstrum method.";
230
}
231

    
232
string
233
CepstralPitchTracker::getMaker() const
234
{
235
    return "Chris Cannam";
236
}
237

    
238
int
239
CepstralPitchTracker::getPluginVersion() const
240
{
241
    // Increment this each time you release a version that behaves
242
    // differently from the previous one
243
    return 1;
244
}
245

    
246
string
247
CepstralPitchTracker::getCopyright() const
248
{
249
    return "Freely redistributable (BSD license)";
250
}
251

    
252
CepstralPitchTracker::InputDomain
253
CepstralPitchTracker::getInputDomain() const
254
{
255
    return FrequencyDomain;
256
}
257

    
258
size_t
259
CepstralPitchTracker::getPreferredBlockSize() const
260
{
261
    return 1024;
262
}
263

    
264
size_t 
265
CepstralPitchTracker::getPreferredStepSize() const
266
{
267
    return 256;
268
}
269

    
270
size_t
271
CepstralPitchTracker::getMinChannelCount() const
272
{
273
    return 1;
274
}
275

    
276
size_t
277
CepstralPitchTracker::getMaxChannelCount() const
278
{
279
    return 1;
280
}
281

    
282
CepstralPitchTracker::ParameterList
283
CepstralPitchTracker::getParameterDescriptors() const
284
{
285
    ParameterList list;
286
    return list;
287
}
288

    
289
float
290
CepstralPitchTracker::getParameter(string identifier) const
291
{
292
    return 0.f;
293
}
294

    
295
void
296
CepstralPitchTracker::setParameter(string identifier, float value) 
297
{
298
}
299

    
300
CepstralPitchTracker::ProgramList
301
CepstralPitchTracker::getPrograms() const
302
{
303
    ProgramList list;
304
    return list;
305
}
306

    
307
string
308
CepstralPitchTracker::getCurrentProgram() const
309
{
310
    return ""; // no programs
311
}
312

    
313
void
314
CepstralPitchTracker::selectProgram(string name)
315
{
316
}
317

    
318
CepstralPitchTracker::OutputList
319
CepstralPitchTracker::getOutputDescriptors() const
320
{
321
    OutputList outputs;
322

    
323
    int n = 0;
324

    
325
    OutputDescriptor d;
326

    
327
    d.identifier = "f0";
328
    d.name = "Estimated f0";
329
    d.description = "Estimated fundamental frequency";
330
    d.unit = "Hz";
331
    d.hasFixedBinCount = true;
332
    d.binCount = 1;
333
    d.hasKnownExtents = true;
334
    d.minValue = m_fmin;
335
    d.maxValue = m_fmax;
336
    d.isQuantized = false;
337
    d.sampleType = OutputDescriptor::FixedSampleRate;
338
    d.sampleRate = (m_inputSampleRate / m_stepSize);
339
    d.hasDuration = false;
340
    outputs.push_back(d);
341

    
342
    d.identifier = "notes";
343
    d.name = "Notes";
344
    d.description = "Derived fixed-pitch note frequencies";
345
    d.unit = "Hz";
346
    d.hasFixedBinCount = true;
347
    d.binCount = 1;
348
    d.hasKnownExtents = true;
349
    d.minValue = m_fmin;
350
    d.maxValue = m_fmax;
351
    d.isQuantized = false;
352
    d.sampleType = OutputDescriptor::FixedSampleRate;
353
    d.sampleRate = (m_inputSampleRate / m_stepSize);
354
    d.hasDuration = true;
355
    outputs.push_back(d);
356

    
357
    return outputs;
358
}
359

    
360
bool
361
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
362
{
363
    if (channels < getMinChannelCount() ||
364
        channels > getMaxChannelCount()) return false;
365

    
366
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
367
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
368
//              << std::endl;
369

    
370
    m_channels = channels;
371
    m_stepSize = stepSize;
372
    m_blockSize = blockSize;
373

    
374
    m_binFrom = int(m_inputSampleRate / m_fmax);
375
    m_binTo = int(m_inputSampleRate / m_fmin); 
376

    
377
    if (m_binTo >= (int)m_blockSize / 2) {
378
        m_binTo = m_blockSize / 2 - 1;
379
    }
380

    
381
    m_bins = (m_binTo - m_binFrom) + 1;
382

    
383
    reset();
384

    
385
    return true;
386
}
387

    
388
void
389
CepstralPitchTracker::reset()
390
{
391
}
392

    
393
void
394
CepstralPitchTracker::addFeaturesFrom(Hypothesis h, FeatureSet &fs)
395
{
396
    Hypothesis::Estimates es = h.getAcceptedEstimates();
397

    
398
    for (int i = 0; i < es.size(); ++i) {
399
        Feature f;
400
        f.hasTimestamp = true;
401
        f.timestamp = es[i].time;
402
        f.values.push_back(es[i].freq);
403
        fs[0].push_back(f);
404
    }
405

    
406
    Feature nf;
407
    nf.hasTimestamp = true;
408
    nf.hasDuration = true;
409
    Hypothesis::Note n = h.getAveragedNote();
410
    nf.timestamp = n.time;
411
    nf.duration = n.duration;
412
    nf.values.push_back(n.freq);
413
    fs[1].push_back(nf);
414
}
415

    
416
void
417
CepstralPitchTracker::filter(const double *cep, double *data)
418
{
419
    for (int i = 0; i < m_bins; ++i) {
420
        double v = 0;
421
        int n = 0;
422
        // average according to the vertical filter length
423
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
424
            int ix = i + m_binFrom + j;
425
            if (ix >= 0 && ix < m_blockSize) {
426
                v += cep[ix];
427
                ++n;
428
            }
429
        }
430
        data[i] = v / n;
431
    }
432
}
433

    
434
double
435
CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
436
{
437
    double a0 = y[3] - y[2] - y[0] + y[1];
438
    double a1 = y[0] - y[1] - a0;
439
    double a2 = y[2] - y[0];
440
    double a3 = y[1];
441
    return
442
        a0 * x * x * x +
443
        a1 * x * x +
444
        a2 * x +
445
        a3;
446
}
447

    
448
double
449
CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
450
{
451
    if (maxbin < 2 || maxbin > m_bins - 3) {
452
        return maxbin;
453
    }
454

    
455
    double maxval = 0.0;
456
    double maxidx = maxbin;
457

    
458
    const int divisions = 10;
459
    double y[4];
460

    
461
    y[0] = in[maxbin-1];
462
    y[1] = in[maxbin];
463
    y[2] = in[maxbin+1];
464
    y[3] = in[maxbin+2];
465
    for (int i = 0; i < divisions; ++i) {
466
        double probe = double(i) / double(divisions);
467
        double value = cubicInterpolate(y, probe);
468
        if (value > maxval) {
469
            maxval = value; 
470
            maxidx = maxbin + probe;
471
        }
472
    }
473

    
474
    y[3] = y[2];
475
    y[2] = y[1];
476
    y[1] = y[0];
477
    y[0] = in[maxbin-2];
478
    for (int i = 0; i < divisions; ++i) {
479
        double probe = double(i) / double(divisions);
480
        double value = cubicInterpolate(y, probe);
481
        if (value > maxval) {
482
            maxval = value; 
483
            maxidx = maxbin - 1 + probe;
484
        }
485
    }
486

    
487
/*
488
    std::cerr << "centre = " << maxbin << ": ["
489
              << in[maxbin-2] << ","
490
              << in[maxbin-1] << ","
491
              << in[maxbin] << ","
492
              << in[maxbin+1] << ","
493
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
494
*/
495

    
496
    return maxidx;
497
}
498

    
499
CepstralPitchTracker::FeatureSet
500
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
501
{
502
    FeatureSet fs;
503

    
504
    int bs = m_blockSize;
505
    int hs = m_blockSize/2 + 1;
506

    
507
    double *rawcep = new double[bs];
508
    double *io = new double[bs];
509
    double *logmag = new double[bs];
510

    
511
    // The "inverse symmetric" method. Seems to be the most reliable
512
        
513
    double magmean = 0.0;
514

    
515
    for (int i = 0; i < hs; ++i) {
516

    
517
        double power =
518
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
519
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
520
        double mag = sqrt(power);
521

    
522
        magmean += mag;
523

    
524
        double lm = log(mag + 0.00000001);
525
        
526
        logmag[i] = lm;
527
        if (i > 0) logmag[bs - i] = lm;
528
    }
529

    
530
    magmean /= hs;
531
    double threshold = 0.1; // for magmean
532
    
533
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
534
    
535
    delete[] logmag;
536
    delete[] io;
537

    
538
    int n = m_bins;
539
    double *data = new double[n];
540
    filter(rawcep, data);
541
    delete[] rawcep;
542

    
543
    double maxval = 0.0;
544
    int maxbin = -1;
545

    
546
    for (int i = 0; i < n; ++i) {
547
        if (data[i] > maxval) {
548
            maxval = data[i];
549
            maxbin = i;
550
        }
551
    }
552

    
553
    if (maxbin < 0) {
554
        delete[] data;
555
        return fs;
556
    }
557

    
558
    double nextPeakVal = 0.0;
559
    for (int i = 1; i+1 < n; ++i) {
560
        if (data[i] > data[i-1] &&
561
            data[i] > data[i+1] &&
562
            i != maxbin &&
563
            data[i] > nextPeakVal) {
564
            nextPeakVal = data[i];
565
        }
566
    }
567

    
568
    double cimax = findInterpolatedPeak(data, maxbin);
569
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
570

    
571
    double confidence = 0.0;
572
    if (nextPeakVal != 0.0) {
573
        confidence = (maxval - nextPeakVal) * 10.0;
574
        if (magmean < threshold) confidence = 0.0;
575
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
576
    }
577

    
578
    Hypothesis::Estimate e;
579
    e.freq = peakfreq;
580
    e.time = timestamp;
581
    e.confidence = confidence;
582

    
583
//    m_good.advanceTime();
584
    for (int i = 0; i < m_possible.size(); ++i) {
585
//        m_possible[i].advanceTime();
586
    }
587

    
588
    if (!m_good.accept(e)) {
589

    
590
        int candidate = -1;
591
        bool accepted = false;
592

    
593
        for (int i = 0; i < m_possible.size(); ++i) {
594
            if (m_possible[i].accept(e)) {
595
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
596
                    accepted = true;
597
                    candidate = i;
598
                }
599
                break;
600
            }
601
        }
602

    
603
        if (!accepted) {
604
            Hypothesis h;
605
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
606
            m_possible.push_back(h);
607
        }
608

    
609
        if (m_good.getState() == Hypothesis::Expired) {
610
            addFeaturesFrom(m_good, fs);
611
        }
612
        
613
        if (m_good.getState() == Hypothesis::Expired ||
614
            m_good.getState() == Hypothesis::Rejected) {
615
            if (candidate >= 0) {
616
                m_good = m_possible[candidate];
617
            } else {
618
                m_good = Hypothesis();
619
            }
620
        }
621

    
622
        // reap rejected/expired hypotheses from possible list
623
        Hypotheses toReap = m_possible;
624
        m_possible.clear();
625
        for (int i = 0; i < toReap.size(); ++i) {
626
            Hypothesis h = toReap[i];
627
            if (h.getState() != Hypothesis::Rejected && 
628
                h.getState() != Hypothesis::Expired) {
629
                m_possible.push_back(h);
630
            }
631
        }
632
    }  
633

    
634
    delete[] data;
635
    return fs;
636
}
637

    
638
CepstralPitchTracker::FeatureSet
639
CepstralPitchTracker::getRemainingFeatures()
640
{
641
    FeatureSet fs;
642
    if (m_good.getState() == Hypothesis::Satisfied) {
643
        addFeaturesFrom(m_good, fs);
644
    }
645
    return fs;
646
}