Revision 31:2c175adf8736

View differences:

CepstralPitchTracker.cpp
1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    This file is Copyright (c) 2012 Chris Cannam
4
  
5
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

  
13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

  
16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24

  
25
#include "CepstralPitchTracker.h"
26

  
27
#include "vamp-sdk/FFT.h"
28

  
29
#include <vector>
30
#include <algorithm>
31

  
32
#include <cstdio>
33
#include <cmath>
34
#include <complex>
35

  
36
using std::string;
37
using std::vector;
38
using Vamp::RealTime;
39

  
40
CepstralPitchTracker::Hypothesis::Hypothesis()
41
{
42
    m_state = New;
43
}
44

  
45
CepstralPitchTracker::Hypothesis::~Hypothesis()
46
{
47
}
48

  
49
bool
50
CepstralPitchTracker::Hypothesis::isWithinTolerance(Estimate s) const
51
{
52
    if (m_pending.empty()) {
53
        return true;
54
    }
55

  
56
    // check we are within a relatively close tolerance of the last
57
    // candidate
58
    Estimate last = m_pending[m_pending.size()-1];
59
    double r = s.freq / last.freq;
60
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
61
    if (cents < -60 || cents > 60) return false;
62

  
63
    // and within a slightly bigger tolerance of the current mean
64
    double meanFreq = getMeanFrequency();
65
    r = s.freq / meanFreq;
66
    cents = lrint(1200.0 * (log(r) / log(2.0)));
67
    if (cents < -80 || cents > 80) return false;
68
    
69
    return true;
70
}
71

  
72
bool
73
CepstralPitchTracker::Hypothesis::isOutOfDateFor(Estimate s) const
74
{
75
    if (m_pending.empty()) return false;
76

  
77
    return ((s.time - m_pending[m_pending.size()-1].time) > 
78
            RealTime::fromMilliseconds(40));
79
}
80

  
81
bool 
82
CepstralPitchTracker::Hypothesis::isSatisfied() const
83
{
84
    if (m_pending.empty()) return false;
85
    
86
    double meanConfidence = 0.0;
87
    for (int i = 0; i < m_pending.size(); ++i) {
88
        meanConfidence += m_pending[i].confidence;
89
    }
90
    meanConfidence /= m_pending.size();
91

  
92
    int lengthRequired = 10000;
93
    if (meanConfidence > 0.0) {
94
        lengthRequired = int(2.0 / meanConfidence + 0.5);
95
    }
96

  
97
    return (m_pending.size() > lengthRequired);
98
}
99

  
100
bool
101
CepstralPitchTracker::Hypothesis::accept(Estimate s)
102
{
103
    bool accept = false;
104

  
105
    switch (m_state) {
106

  
107
    case New:
108
        m_state = Provisional;
109
        accept = true;
110
        break;
111

  
112
    case Provisional:
113
        if (isOutOfDateFor(s)) {
114
            m_state = Rejected;
115
        } else if (isWithinTolerance(s)) {
116
            accept = true;
117
        }
118
        break;
119
        
120
    case Satisfied:
121
        if (isOutOfDateFor(s)) {
122
            m_state = Expired;
123
        } else if (isWithinTolerance(s)) {
124
            accept = true;
125
        }
126
        break;
127

  
128
    case Rejected:
129
        break;
130

  
131
    case Expired:
132
        break;
133
    }
134

  
135
    if (accept) {
136
        m_pending.push_back(s);
137
        if (m_state == Provisional && isSatisfied()) {
138
            m_state = Satisfied;
139
        }
140
    }
141

  
142
    return accept;
143
}        
144

  
145
CepstralPitchTracker::Hypothesis::State
146
CepstralPitchTracker::Hypothesis::getState() const
147
{
148
    return m_state;
149
}
150

  
151
CepstralPitchTracker::Hypothesis::Estimates
152
CepstralPitchTracker::Hypothesis::getAcceptedEstimates() const
153
{
154
    if (m_state == Satisfied || m_state == Expired) {
155
        return m_pending;
156
    } else {
157
        return Estimates();
158
    }
159
}
160

  
161
double
162
CepstralPitchTracker::Hypothesis::getMeanFrequency() const
163
{
164
    double acc = 0.0;
165
    for (int i = 0; i < m_pending.size(); ++i) {
166
        acc += m_pending[i].freq;
167
    }
168
    acc /= m_pending.size();
169
    return acc;
170
}
171

  
172
CepstralPitchTracker::Hypothesis::Note
173
CepstralPitchTracker::Hypothesis::getAveragedNote() const
174
{
175
    Note n;
176

  
177
    if (!(m_state == Satisfied || m_state == Expired)) {
178
        n.freq = 0.0;
179
        n.time = RealTime::zeroTime;
180
        n.duration = RealTime::zeroTime;
181
        return n;
182
    }
183

  
184
    n.time = m_pending.begin()->time;
185

  
186
    Estimates::const_iterator i = m_pending.end();
187
    --i;
188
    n.duration = i->time - n.time;
189

  
190
    // just mean frequency for now, but this isn't at all right perceptually
191
    n.freq = getMeanFrequency();
192
    
193
    return n;
194
}
195

  
196
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
197
    Plugin(inputSampleRate),
198
    m_channels(0),
199
    m_stepSize(256),
200
    m_blockSize(1024),
201
    m_fmin(50),
202
    m_fmax(900),
203
    m_vflen(1),
204
    m_binFrom(0),
205
    m_binTo(0),
206
    m_bins(0)
207
{
208
}
209

  
210
CepstralPitchTracker::~CepstralPitchTracker()
211
{
212
}
213

  
214
string
215
CepstralPitchTracker::getIdentifier() const
216
{
217
    return "cepstrum-pitch";
218
}
219

  
220
string
221
CepstralPitchTracker::getName() const
222
{
223
    return "Cepstrum Pitch Tracker";
224
}
225

  
226
string
227
CepstralPitchTracker::getDescription() const
228
{
229
    return "Estimate f0 of monophonic material using a cepstrum method.";
230
}
231

  
232
string
233
CepstralPitchTracker::getMaker() const
234
{
235
    return "Chris Cannam";
236
}
237

  
238
int
239
CepstralPitchTracker::getPluginVersion() const
240
{
241
    // Increment this each time you release a version that behaves
242
    // differently from the previous one
243
    return 1;
244
}
245

  
246
string
247
CepstralPitchTracker::getCopyright() const
248
{
249
    return "Freely redistributable (BSD license)";
250
}
251

  
252
CepstralPitchTracker::InputDomain
253
CepstralPitchTracker::getInputDomain() const
254
{
255
    return FrequencyDomain;
256
}
257

  
258
size_t
259
CepstralPitchTracker::getPreferredBlockSize() const
260
{
261
    return 1024;
262
}
263

  
264
size_t 
265
CepstralPitchTracker::getPreferredStepSize() const
266
{
267
    return 256;
268
}
269

  
270
size_t
271
CepstralPitchTracker::getMinChannelCount() const
272
{
273
    return 1;
274
}
275

  
276
size_t
277
CepstralPitchTracker::getMaxChannelCount() const
278
{
279
    return 1;
280
}
281

  
282
CepstralPitchTracker::ParameterList
283
CepstralPitchTracker::getParameterDescriptors() const
284
{
285
    ParameterList list;
286
    return list;
287
}
288

  
289
float
290
CepstralPitchTracker::getParameter(string identifier) const
291
{
292
    return 0.f;
293
}
294

  
295
void
296
CepstralPitchTracker::setParameter(string identifier, float value) 
297
{
298
}
299

  
300
CepstralPitchTracker::ProgramList
301
CepstralPitchTracker::getPrograms() const
302
{
303
    ProgramList list;
304
    return list;
305
}
306

  
307
string
308
CepstralPitchTracker::getCurrentProgram() const
309
{
310
    return ""; // no programs
311
}
312

  
313
void
314
CepstralPitchTracker::selectProgram(string name)
315
{
316
}
317

  
318
CepstralPitchTracker::OutputList
319
CepstralPitchTracker::getOutputDescriptors() const
320
{
321
    OutputList outputs;
322

  
323
    int n = 0;
324

  
325
    OutputDescriptor d;
326

  
327
    d.identifier = "f0";
328
    d.name = "Estimated f0";
329
    d.description = "Estimated fundamental frequency";
330
    d.unit = "Hz";
331
    d.hasFixedBinCount = true;
332
    d.binCount = 1;
333
    d.hasKnownExtents = true;
334
    d.minValue = m_fmin;
335
    d.maxValue = m_fmax;
336
    d.isQuantized = false;
337
    d.sampleType = OutputDescriptor::FixedSampleRate;
338
    d.sampleRate = (m_inputSampleRate / m_stepSize);
339
    d.hasDuration = false;
340
    outputs.push_back(d);
341

  
342
    d.identifier = "notes";
343
    d.name = "Notes";
344
    d.description = "Derived fixed-pitch note frequencies";
345
    d.unit = "Hz";
346
    d.hasFixedBinCount = true;
347
    d.binCount = 1;
348
    d.hasKnownExtents = true;
349
    d.minValue = m_fmin;
350
    d.maxValue = m_fmax;
351
    d.isQuantized = false;
352
    d.sampleType = OutputDescriptor::FixedSampleRate;
353
    d.sampleRate = (m_inputSampleRate / m_stepSize);
354
    d.hasDuration = true;
355
    outputs.push_back(d);
356

  
357
    return outputs;
358
}
359

  
360
bool
361
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
362
{
363
    if (channels < getMinChannelCount() ||
364
	channels > getMaxChannelCount()) return false;
365

  
366
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
367
//	      << ", stepSize = " << stepSize << ", blockSize = " << blockSize
368
//	      << std::endl;
369

  
370
    m_channels = channels;
371
    m_stepSize = stepSize;
372
    m_blockSize = blockSize;
373

  
374
    m_binFrom = int(m_inputSampleRate / m_fmax);
375
    m_binTo = int(m_inputSampleRate / m_fmin); 
376

  
377
    if (m_binTo >= (int)m_blockSize / 2) {
378
        m_binTo = m_blockSize / 2 - 1;
379
    }
380

  
381
    m_bins = (m_binTo - m_binFrom) + 1;
382

  
383
    reset();
384

  
385
    return true;
386
}
387

  
388
void
389
CepstralPitchTracker::reset()
390
{
391
}
392

  
393
void
394
CepstralPitchTracker::addFeaturesFrom(Hypothesis h, FeatureSet &fs)
395
{
396
    Hypothesis::Estimates es = h.getAcceptedEstimates();
397

  
398
    for (int i = 0; i < es.size(); ++i) {
399
	Feature f;
400
	f.hasTimestamp = true;
401
	f.timestamp = es[i].time;
402
	f.values.push_back(es[i].freq);
403
	fs[0].push_back(f);
404
    }
405

  
406
    Feature nf;
407
    nf.hasTimestamp = true;
408
    nf.hasDuration = true;
409
    Hypothesis::Note n = h.getAveragedNote();
410
    nf.timestamp = n.time;
411
    nf.duration = n.duration;
412
    nf.values.push_back(n.freq);
413
    fs[1].push_back(nf);
414
}
415

  
416
void
417
CepstralPitchTracker::filter(const double *cep, double *data)
418
{
419
    for (int i = 0; i < m_bins; ++i) {
420
        double v = 0;
421
        int n = 0;
422
        // average according to the vertical filter length
423
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
424
            int ix = i + m_binFrom + j;
425
            if (ix >= 0 && ix < m_blockSize) {
426
                v += cep[ix];
427
                ++n;
428
            }
429
        }
430
        data[i] = v / n;
431
    }
432
}
433

  
434
double
435
CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
436
{
437
    double a0 = y[3] - y[2] - y[0] + y[1];
438
    double a1 = y[0] - y[1] - a0;
439
    double a2 = y[2] - y[0];
440
    double a3 = y[1];
441
    return
442
        a0 * x * x * x +
443
        a1 * x * x +
444
        a2 * x +
445
        a3;
446
}
447

  
448
double
449
CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
450
{
451
    if (maxbin < 2 || maxbin > m_bins - 3) {
452
        return maxbin;
453
    }
454

  
455
    double maxval = 0.0;
456
    double maxidx = maxbin;
457

  
458
    const int divisions = 10;
459
    double y[4];
460

  
461
    y[0] = in[maxbin-1];
462
    y[1] = in[maxbin];
463
    y[2] = in[maxbin+1];
464
    y[3] = in[maxbin+2];
465
    for (int i = 0; i < divisions; ++i) {
466
        double probe = double(i) / double(divisions);
467
        double value = cubicInterpolate(y, probe);
468
        if (value > maxval) {
469
            maxval = value; 
470
            maxidx = maxbin + probe;
471
        }
472
    }
473

  
474
    y[3] = y[2];
475
    y[2] = y[1];
476
    y[1] = y[0];
477
    y[0] = in[maxbin-2];
478
    for (int i = 0; i < divisions; ++i) {
479
        double probe = double(i) / double(divisions);
480
        double value = cubicInterpolate(y, probe);
481
        if (value > maxval) {
482
            maxval = value; 
483
            maxidx = maxbin - 1 + probe;
484
        }
485
    }
486

  
487
/*
488
    std::cerr << "centre = " << maxbin << ": ["
489
              << in[maxbin-2] << ","
490
              << in[maxbin-1] << ","
491
              << in[maxbin] << ","
492
              << in[maxbin+1] << ","
493
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
494
*/
495

  
496
    return maxidx;
497
}
498

  
499
CepstralPitchTracker::FeatureSet
500
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
501
{
502
    FeatureSet fs;
503

  
504
    int bs = m_blockSize;
505
    int hs = m_blockSize/2 + 1;
506

  
507
    double *rawcep = new double[bs];
508
    double *io = new double[bs];
509
    double *logmag = new double[bs];
510

  
511
    // The "inverse symmetric" method. Seems to be the most reliable
512
        
513
    double magmean = 0.0;
514

  
515
    for (int i = 0; i < hs; ++i) {
516

  
517
	double power =
518
	    inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
519
	    inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
520
	double mag = sqrt(power);
521

  
522
        magmean += mag;
523

  
524
	double lm = log(mag + 0.00000001);
525
	
526
	logmag[i] = lm;
527
	if (i > 0) logmag[bs - i] = lm;
528
    }
529

  
530
    magmean /= hs;
531
    double threshold = 0.1; // for magmean
532
    
533
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
534
    
535
    delete[] logmag;
536
    delete[] io;
537

  
538
    int n = m_bins;
539
    double *data = new double[n];
540
    filter(rawcep, data);
541
    delete[] rawcep;
542

  
543
    double maxval = 0.0;
544
    int maxbin = -1;
545

  
546
    for (int i = 0; i < n; ++i) {
547
        if (data[i] > maxval) {
548
            maxval = data[i];
549
            maxbin = i;
550
        }
551
    }
552

  
553
    if (maxbin < 0) {
554
        delete[] data;
555
        return fs;
556
    }
557

  
558
    double nextPeakVal = 0.0;
559
    for (int i = 1; i+1 < n; ++i) {
560
        if (data[i] > data[i-1] &&
561
            data[i] > data[i+1] &&
562
            i != maxbin &&
563
            data[i] > nextPeakVal) {
564
            nextPeakVal = data[i];
565
        }
566
    }
567

  
568
    double cimax = findInterpolatedPeak(data, maxbin);
569
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
570

  
571
    double confidence = 0.0;
572
    if (nextPeakVal != 0.0) {
573
        confidence = (maxval - nextPeakVal) * 10.0;
574
        if (magmean < threshold) confidence = 0.0;
575
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
576
    }
577

  
578
    Hypothesis::Estimate e;
579
    e.freq = peakfreq;
580
    e.time = timestamp;
581
    e.confidence = confidence;
582

  
583
//    m_good.advanceTime();
584
    for (int i = 0; i < m_possible.size(); ++i) {
585
//        m_possible[i].advanceTime();
586
    }
587

  
588
    if (!m_good.accept(e)) {
589

  
590
        int candidate = -1;
591
        bool accepted = false;
592

  
593
        for (int i = 0; i < m_possible.size(); ++i) {
594
            if (m_possible[i].accept(e)) {
595
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
596
                    accepted = true;
597
                    candidate = i;
598
                }
599
                break;
600
            }
601
        }
602

  
603
        if (!accepted) {
604
            Hypothesis h;
605
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
606
            m_possible.push_back(h);
607
        }
608

  
609
        if (m_good.getState() == Hypothesis::Expired) {
610
            addFeaturesFrom(m_good, fs);
611
        }
612
        
613
        if (m_good.getState() == Hypothesis::Expired ||
614
            m_good.getState() == Hypothesis::Rejected) {
615
            if (candidate >= 0) {
616
                m_good = m_possible[candidate];
617
            } else {
618
                m_good = Hypothesis();
619
            }
620
        }
621

  
622
        // reap rejected/expired hypotheses from possible list
623
        Hypotheses toReap = m_possible;
624
        m_possible.clear();
625
        for (int i = 0; i < toReap.size(); ++i) {
626
            Hypothesis h = toReap[i];
627
            if (h.getState() != Hypothesis::Rejected && 
628
                h.getState() != Hypothesis::Expired) {
629
                m_possible.push_back(h);
630
            }
631
        }
632
    }  
633

  
634
    delete[] data;
635
    return fs;
636
}
637

  
638
CepstralPitchTracker::FeatureSet
639
CepstralPitchTracker::getRemainingFeatures()
640
{
641
    FeatureSet fs;
642
    if (m_good.getState() == Hypothesis::Satisfied) {
643
        addFeaturesFrom(m_good, fs);
644
    }
645
    return fs;
646
}
CepstralPitchTracker.h
1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    This file is Copyright (c) 2012 Chris Cannam
4
  
5
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

  
13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

  
16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24

  
25
#ifndef _CEPSTRUM_PITCH_H_
26
#define _CEPSTRUM_PITCH_H_
27

  
28
#include <vamp-sdk/Plugin.h>
29

  
30
class CepstralPitchTracker : public Vamp::Plugin
31
{
32
public:
33
    CepstralPitchTracker(float inputSampleRate);
34
    virtual ~CepstralPitchTracker();
35

  
36
    std::string getIdentifier() const;
37
    std::string getName() const;
38
    std::string getDescription() const;
39
    std::string getMaker() const;
40
    int getPluginVersion() const;
41
    std::string getCopyright() const;
42

  
43
    InputDomain getInputDomain() const;
44
    size_t getPreferredBlockSize() const;
45
    size_t getPreferredStepSize() const;
46
    size_t getMinChannelCount() const;
47
    size_t getMaxChannelCount() const;
48

  
49
    ParameterList getParameterDescriptors() const;
50
    float getParameter(std::string identifier) const;
51
    void setParameter(std::string identifier, float value);
52

  
53
    ProgramList getPrograms() const;
54
    std::string getCurrentProgram() const;
55
    void selectProgram(std::string name);
56

  
57
    OutputList getOutputDescriptors() const;
58

  
59
    bool initialise(size_t channels, size_t stepSize, size_t blockSize);
60
    void reset();
61

  
62
    FeatureSet process(const float *const *inputBuffers,
63
                       Vamp::RealTime timestamp);
64

  
65
    FeatureSet getRemainingFeatures();
66

  
67
protected:
68
    size_t m_channels;
69
    size_t m_stepSize;
70
    size_t m_blockSize;
71
    float m_fmin;
72
    float m_fmax;
73
    int m_vflen;
74

  
75
    int m_binFrom;
76
    int m_binTo;
77
    int m_bins; // count of "interesting" bins, those returned in m_cepOutput
78

  
79
    class Hypothesis {
80

  
81
    public:
82
        struct Estimate {
83
            double freq;
84
            Vamp::RealTime time;
85
            double confidence;
86
        };
87
        typedef std::vector<Estimate> Estimates;
88

  
89
        struct Note {
90
            double freq;
91
            Vamp::RealTime time;
92
            Vamp::RealTime duration;
93
        };
94
        
95
        Hypothesis();
96
        ~Hypothesis();
97

  
98
        enum State {
99
            New,
100
            Provisional,
101
            Satisfied,
102
            Rejected,
103
            Expired
104
        };
105

  
106
        bool accept(Estimate);
107

  
108
        State getState() const;
109
        Estimates getAcceptedEstimates() const;
110
        Note getAveragedNote() const;
111

  
112
    private:
113
        bool isWithinTolerance(Estimate) const;
114
        bool isOutOfDateFor(Estimate) const;
115
        bool isSatisfied() const;
116
        double getMeanFrequency() const;
117

  
118
        State m_state;
119
        Estimates m_pending;
120
    };
121

  
122
    typedef std::vector<Hypothesis> Hypotheses;
123
    Hypotheses m_possible;
124
    Hypothesis m_good;
125

  
126
    void addFeaturesFrom(Hypothesis h, FeatureSet &fs);
127

  
128
    void filter(const double *in, double *out);
129
    double cubicInterpolate(const double[4], double);
130
    double findInterpolatedPeak(const double *in, int maxbin);
131
};
132

  
133
#endif
CepstrumPitchTracker.cpp
1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

  
11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

  
14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

  
23
#include "CepstrumPitchTracker.h"
24

  
25
#include "vamp-sdk/FFT.h"
26

  
27
#include <vector>
28
#include <algorithm>
29

  
30
#include <cstdio>
31
#include <cmath>
32
#include <complex>
33

  
34
using std::string;
35
using std::vector;
36
using Vamp::RealTime;
37

  
38
CepstrumPitchTracker::Hypothesis::Hypothesis()
39
{
40
    m_state = New;
41
}
42

  
43
CepstrumPitchTracker::Hypothesis::~Hypothesis()
44
{
45
}
46

  
47
bool
48
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s) const
49
{
50
    if (m_pending.empty()) {
51
        return true;
52
    }
53

  
54
    // check we are within a relatively close tolerance of the last
55
    // candidate
56
    Estimate last = m_pending[m_pending.size()-1];
57
    double r = s.freq / last.freq;
58
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
59
    if (cents < -60 || cents > 60) return false;
60

  
61
    // and within a slightly bigger tolerance of the current mean
62
    double meanFreq = getMeanFrequency();
63
    r = s.freq / meanFreq;
64
    cents = lrint(1200.0 * (log(r) / log(2.0)));
65
    if (cents < -80 || cents > 80) return false;
66
    
67
    return true;
68
}
69

  
70
bool
71
CepstrumPitchTracker::Hypothesis::isOutOfDateFor(Estimate s) const
72
{
73
    if (m_pending.empty()) return false;
74

  
75
    return ((s.time - m_pending[m_pending.size()-1].time) > 
76
            RealTime::fromMilliseconds(40));
77
}
78

  
79
bool 
80
CepstrumPitchTracker::Hypothesis::isSatisfied() const
81
{
82
    if (m_pending.empty()) return false;
83
    
84
    double meanConfidence = 0.0;
85
    for (int i = 0; i < m_pending.size(); ++i) {
86
        meanConfidence += m_pending[i].confidence;
87
    }
88
    meanConfidence /= m_pending.size();
89

  
90
    int lengthRequired = 10000;
91
    if (meanConfidence > 0.0) {
92
        lengthRequired = int(2.0 / meanConfidence + 0.5);
93
    }
94

  
95
    return (m_pending.size() > lengthRequired);
96
}
97

  
98
bool
99
CepstrumPitchTracker::Hypothesis::accept(Estimate s)
100
{
101
    bool accept = false;
102

  
103
    switch (m_state) {
104

  
105
    case New:
106
        m_state = Provisional;
107
        accept = true;
108
        break;
109

  
110
    case Provisional:
111
        if (isOutOfDateFor(s)) {
112
            m_state = Rejected;
113
        } else if (isWithinTolerance(s)) {
114
            accept = true;
115
        }
116
        break;
117
        
118
    case Satisfied:
119
        if (isOutOfDateFor(s)) {
120
            m_state = Expired;
121
        } else if (isWithinTolerance(s)) {
122
            accept = true;
123
        }
124
        break;
125

  
126
    case Rejected:
127
        break;
128

  
129
    case Expired:
130
        break;
131
    }
132

  
133
    if (accept) {
134
        m_pending.push_back(s);
135
        if (m_state == Provisional && isSatisfied()) {
136
            m_state = Satisfied;
137
        }
138
    }
139

  
140
    return accept;
141
}        
142

  
143
CepstrumPitchTracker::Hypothesis::State
144
CepstrumPitchTracker::Hypothesis::getState() const
145
{
146
    return m_state;
147
}
148

  
149
CepstrumPitchTracker::Hypothesis::Estimates
150
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates() const
151
{
152
    if (m_state == Satisfied || m_state == Expired) {
153
        return m_pending;
154
    } else {
155
        return Estimates();
156
    }
157
}
158

  
159
double
160
CepstrumPitchTracker::Hypothesis::getMeanFrequency() const
161
{
162
    double acc = 0.0;
163
    for (int i = 0; i < m_pending.size(); ++i) {
164
        acc += m_pending[i].freq;
165
    }
166
    acc /= m_pending.size();
167
    return acc;
168
}
169

  
170
CepstrumPitchTracker::Hypothesis::Note
171
CepstrumPitchTracker::Hypothesis::getAveragedNote() const
172
{
173
    Note n;
174

  
175
    if (!(m_state == Satisfied || m_state == Expired)) {
176
        n.freq = 0.0;
177
        n.time = RealTime::zeroTime;
178
        n.duration = RealTime::zeroTime;
179
        return n;
180
    }
181

  
182
    n.time = m_pending.begin()->time;
183

  
184
    Estimates::const_iterator i = m_pending.end();
185
    --i;
186
    n.duration = i->time - n.time;
187

  
188
    // just mean frequency for now, but this isn't at all right perceptually
189
    n.freq = getMeanFrequency();
190
    
191
    return n;
192
}
193

  
194
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
195
    Plugin(inputSampleRate),
196
    m_channels(0),
197
    m_stepSize(256),
198
    m_blockSize(1024),
199
    m_fmin(50),
200
    m_fmax(900),
201
    m_vflen(1),
202
    m_binFrom(0),
203
    m_binTo(0),
204
    m_bins(0)
205
{
206
}
207

  
208
CepstrumPitchTracker::~CepstrumPitchTracker()
209
{
210
}
211

  
212
string
213
CepstrumPitchTracker::getIdentifier() const
214
{
215
    return "cepstrum-pitch";
216
}
217

  
218
string
219
CepstrumPitchTracker::getName() const
220
{
221
    return "Cepstrum Pitch Tracker";
222
}
223

  
224
string
225
CepstrumPitchTracker::getDescription() const
226
{
227
    return "Estimate f0 of monophonic material using a cepstrum method.";
228
}
229

  
230
string
231
CepstrumPitchTracker::getMaker() const
232
{
233
    return "Chris Cannam";
234
}
235

  
236
int
237
CepstrumPitchTracker::getPluginVersion() const
238
{
239
    // Increment this each time you release a version that behaves
240
    // differently from the previous one
241
    return 1;
242
}
243

  
244
string
245
CepstrumPitchTracker::getCopyright() const
246
{
247
    return "Freely redistributable (BSD license)";
248
}
249

  
250
CepstrumPitchTracker::InputDomain
251
CepstrumPitchTracker::getInputDomain() const
252
{
253
    return FrequencyDomain;
254
}
255

  
256
size_t
257
CepstrumPitchTracker::getPreferredBlockSize() const
258
{
259
    return 1024;
260
}
261

  
262
size_t 
263
CepstrumPitchTracker::getPreferredStepSize() const
264
{
265
    return 256;
266
}
267

  
268
size_t
269
CepstrumPitchTracker::getMinChannelCount() const
270
{
271
    return 1;
272
}
273

  
274
size_t
275
CepstrumPitchTracker::getMaxChannelCount() const
276
{
277
    return 1;
278
}
279

  
280
CepstrumPitchTracker::ParameterList
281
CepstrumPitchTracker::getParameterDescriptors() const
282
{
283
    ParameterList list;
284
    return list;
285
}
286

  
287
float
288
CepstrumPitchTracker::getParameter(string identifier) const
289
{
290
    return 0.f;
291
}
292

  
293
void
294
CepstrumPitchTracker::setParameter(string identifier, float value) 
295
{
296
}
297

  
298
CepstrumPitchTracker::ProgramList
299
CepstrumPitchTracker::getPrograms() const
300
{
301
    ProgramList list;
302
    return list;
303
}
304

  
305
string
306
CepstrumPitchTracker::getCurrentProgram() const
307
{
308
    return ""; // no programs
309
}
310

  
311
void
312
CepstrumPitchTracker::selectProgram(string name)
313
{
314
}
315

  
316
CepstrumPitchTracker::OutputList
317
CepstrumPitchTracker::getOutputDescriptors() const
318
{
319
    OutputList outputs;
320

  
321
    int n = 0;
322

  
323
    OutputDescriptor d;
324

  
325
    d.identifier = "f0";
326
    d.name = "Estimated f0";
327
    d.description = "Estimated fundamental frequency";
328
    d.unit = "Hz";
329
    d.hasFixedBinCount = true;
330
    d.binCount = 1;
331
    d.hasKnownExtents = true;
332
    d.minValue = m_fmin;
333
    d.maxValue = m_fmax;
334
    d.isQuantized = false;
335
    d.sampleType = OutputDescriptor::FixedSampleRate;
336
    d.sampleRate = (m_inputSampleRate / m_stepSize);
337
    d.hasDuration = false;
338
    outputs.push_back(d);
339

  
340
    d.identifier = "notes";
341
    d.name = "Notes";
342
    d.description = "Derived fixed-pitch note frequencies";
343
    d.unit = "Hz";
344
    d.hasFixedBinCount = true;
345
    d.binCount = 1;
346
    d.hasKnownExtents = true;
347
    d.minValue = m_fmin;
348
    d.maxValue = m_fmax;
349
    d.isQuantized = false;
350
    d.sampleType = OutputDescriptor::FixedSampleRate;
351
    d.sampleRate = (m_inputSampleRate / m_stepSize);
352
    d.hasDuration = true;
353
    outputs.push_back(d);
354

  
355
    return outputs;
356
}
357

  
358
bool
359
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
360
{
361
    if (channels < getMinChannelCount() ||
362
	channels > getMaxChannelCount()) return false;
363

  
364
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
365
//	      << ", stepSize = " << stepSize << ", blockSize = " << blockSize
366
//	      << std::endl;
367

  
368
    m_channels = channels;
369
    m_stepSize = stepSize;
370
    m_blockSize = blockSize;
371

  
372
    m_binFrom = int(m_inputSampleRate / m_fmax);
373
    m_binTo = int(m_inputSampleRate / m_fmin); 
374

  
375
    if (m_binTo >= (int)m_blockSize / 2) {
376
        m_binTo = m_blockSize / 2 - 1;
377
    }
378

  
379
    m_bins = (m_binTo - m_binFrom) + 1;
380

  
381
    reset();
382

  
383
    return true;
384
}
385

  
386
void
387
CepstrumPitchTracker::reset()
388
{
389
}
390

  
391
void
392
CepstrumPitchTracker::addFeaturesFrom(Hypothesis h, FeatureSet &fs)
393
{
394
    Hypothesis::Estimates es = h.getAcceptedEstimates();
395

  
396
    for (int i = 0; i < es.size(); ++i) {
397
	Feature f;
398
	f.hasTimestamp = true;
399
	f.timestamp = es[i].time;
400
	f.values.push_back(es[i].freq);
401
	fs[0].push_back(f);
402
    }
403

  
404
    Feature nf;
405
    nf.hasTimestamp = true;
406
    nf.hasDuration = true;
407
    Hypothesis::Note n = h.getAveragedNote();
408
    nf.timestamp = n.time;
409
    nf.duration = n.duration;
410
    nf.values.push_back(n.freq);
411
    fs[1].push_back(nf);
412
}
413

  
414
void
415
CepstrumPitchTracker::filter(const double *cep, double *data)
416
{
417
    for (int i = 0; i < m_bins; ++i) {
418
        double v = 0;
419
        int n = 0;
420
        // average according to the vertical filter length
421
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
422
            int ix = i + m_binFrom + j;
423
            if (ix >= 0 && ix < m_blockSize) {
424
                v += cep[ix];
425
                ++n;
426
            }
427
        }
428
        data[i] = v / n;
429
    }
430
}
431

  
432
double
433
CepstrumPitchTracker::cubicInterpolate(const double y[4], double x)
434
{
435
    double a0 = y[3] - y[2] - y[0] + y[1];
436
    double a1 = y[0] - y[1] - a0;
437
    double a2 = y[2] - y[0];
438
    double a3 = y[1];
439
    return
440
        a0 * x * x * x +
441
        a1 * x * x +
442
        a2 * x +
443
        a3;
444
}
445

  
446
double
447
CepstrumPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
448
{
449
    if (maxbin < 2 || maxbin > m_bins - 3) {
450
        return maxbin;
451
    }
452

  
453
    double maxval = 0.0;
454
    double maxidx = maxbin;
455

  
456
    const int divisions = 10;
457
    double y[4];
458

  
459
    y[0] = in[maxbin-1];
460
    y[1] = in[maxbin];
461
    y[2] = in[maxbin+1];
462
    y[3] = in[maxbin+2];
463
    for (int i = 0; i < divisions; ++i) {
464
        double probe = double(i) / double(divisions);
465
        double value = cubicInterpolate(y, probe);
466
        if (value > maxval) {
467
            maxval = value; 
468
            maxidx = maxbin + probe;
469
        }
470
    }
471

  
472
    y[3] = y[2];
473
    y[2] = y[1];
474
    y[1] = y[0];
475
    y[0] = in[maxbin-2];
476
    for (int i = 0; i < divisions; ++i) {
477
        double probe = double(i) / double(divisions);
478
        double value = cubicInterpolate(y, probe);
479
        if (value > maxval) {
480
            maxval = value; 
481
            maxidx = maxbin - 1 + probe;
482
        }
483
    }
484

  
485
/*
486
    std::cerr << "centre = " << maxbin << ": ["
487
              << in[maxbin-2] << ","
488
              << in[maxbin-1] << ","
489
              << in[maxbin] << ","
490
              << in[maxbin+1] << ","
491
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
492
*/
493

  
494
    return maxidx;
495
}
496

  
497
CepstrumPitchTracker::FeatureSet
498
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
499
{
500
    FeatureSet fs;
501

  
502
    int bs = m_blockSize;
503
    int hs = m_blockSize/2 + 1;
504

  
505
    double *rawcep = new double[bs];
506
    double *io = new double[bs];
507
    double *logmag = new double[bs];
508

  
509
    // The "inverse symmetric" method. Seems to be the most reliable
510
        
511
    double magmean = 0.0;
512

  
513
    for (int i = 0; i < hs; ++i) {
514

  
515
	double power =
516
	    inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
517
	    inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
518
	double mag = sqrt(power);
519

  
520
        magmean += mag;
521

  
522
	double lm = log(mag + 0.00000001);
523
	
524
	logmag[i] = lm;
525
	if (i > 0) logmag[bs - i] = lm;
526
    }
527

  
528
    magmean /= hs;
529
    double threshold = 0.1; // for magmean
530
    
531
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
532
    
533
    delete[] logmag;
534
    delete[] io;
535

  
536
    int n = m_bins;
537
    double *data = new double[n];
538
    filter(rawcep, data);
539
    delete[] rawcep;
540

  
541
    double maxval = 0.0;
542
    int maxbin = -1;
543

  
544
    for (int i = 0; i < n; ++i) {
545
        if (data[i] > maxval) {
546
            maxval = data[i];
547
            maxbin = i;
548
        }
549
    }
550

  
551
    if (maxbin < 0) {
552
        delete[] data;
553
        return fs;
554
    }
555

  
556
    double nextPeakVal = 0.0;
557
    for (int i = 1; i+1 < n; ++i) {
558
        if (data[i] > data[i-1] &&
559
            data[i] > data[i+1] &&
560
            i != maxbin &&
561
            data[i] > nextPeakVal) {
562
            nextPeakVal = data[i];
563
        }
564
    }
565

  
566
    double cimax = findInterpolatedPeak(data, maxbin);
567
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
568

  
569
    double confidence = 0.0;
570
    if (nextPeakVal != 0.0) {
571
        confidence = (maxval - nextPeakVal) * 10.0;
572
        if (magmean < threshold) confidence = 0.0;
573
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
574
    }
575

  
576
    Hypothesis::Estimate e;
577
    e.freq = peakfreq;
578
    e.time = timestamp;
579
    e.confidence = confidence;
580

  
581
//    m_good.advanceTime();
582
    for (int i = 0; i < m_possible.size(); ++i) {
583
//        m_possible[i].advanceTime();
584
    }
585

  
586
    if (!m_good.accept(e)) {
587

  
588
        int candidate = -1;
589
        bool accepted = false;
590

  
591
        for (int i = 0; i < m_possible.size(); ++i) {
592
            if (m_possible[i].accept(e)) {
593
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
594
                    accepted = true;
595
                    candidate = i;
596
                }
597
                break;
598
            }
599
        }
600

  
601
        if (!accepted) {
602
            Hypothesis h;
603
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
604
            m_possible.push_back(h);
605
        }
606

  
607
        if (m_good.getState() == Hypothesis::Expired) {
608
            addFeaturesFrom(m_good, fs);
609
        }
610
        
611
        if (m_good.getState() == Hypothesis::Expired ||
612
            m_good.getState() == Hypothesis::Rejected) {
613
            if (candidate >= 0) {
614
                m_good = m_possible[candidate];
615
            } else {
616
                m_good = Hypothesis();
617
            }
618
        }
619

  
620
        // reap rejected/expired hypotheses from possible list
621
        Hypotheses toReap = m_possible;
622
        m_possible.clear();
623
        for (int i = 0; i < toReap.size(); ++i) {
624
            Hypothesis h = toReap[i];
625
            if (h.getState() != Hypothesis::Rejected && 
626
                h.getState() != Hypothesis::Expired) {
627
                m_possible.push_back(h);
628
            }
629
        }
630
    }  
631

  
632
    delete[] data;
633
    return fs;
634
}
635

  
636
CepstrumPitchTracker::FeatureSet
637
CepstrumPitchTracker::getRemainingFeatures()
638
{
639
    FeatureSet fs;
640
    if (m_good.getState() == Hypothesis::Satisfied) {
641
        addFeaturesFrom(m_good, fs);
642
    }
643
    return fs;
644
}
CepstrumPitchTracker.h
1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

  
11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

  
14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

  
23
#ifndef _CEPSTRUM_PITCH_H_
24
#define _CEPSTRUM_PITCH_H_
25

  
26
#include <vamp-sdk/Plugin.h>
27

  
28
class CepstrumPitchTracker : public Vamp::Plugin
29
{
30
public:
31
    CepstrumPitchTracker(float inputSampleRate);
32
    virtual ~CepstrumPitchTracker();
33

  
34
    std::string getIdentifier() const;
35
    std::string getName() const;
36
    std::string getDescription() const;
37
    std::string getMaker() const;
38
    int getPluginVersion() const;
39
    std::string getCopyright() const;
40

  
41
    InputDomain getInputDomain() const;
42
    size_t getPreferredBlockSize() const;
43
    size_t getPreferredStepSize() const;
44
    size_t getMinChannelCount() const;
45
    size_t getMaxChannelCount() const;
46

  
47
    ParameterList getParameterDescriptors() const;
48
    float getParameter(std::string identifier) const;
49
    void setParameter(std::string identifier, float value);
50

  
51
    ProgramList getPrograms() const;
52
    std::string getCurrentProgram() const;
53
    void selectProgram(std::string name);
54

  
55
    OutputList getOutputDescriptors() const;
56

  
57
    bool initialise(size_t channels, size_t stepSize, size_t blockSize);
58
    void reset();
59

  
60
    FeatureSet process(const float *const *inputBuffers,
61
                       Vamp::RealTime timestamp);
62

  
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff