To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstrumPitchTracker.cpp @ 16:d717911aca3c

History | View | Annotate | Download (14.6 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    Permission is hereby granted, free of charge, to any person
4
    obtaining a copy of this software and associated documentation
5
    files (the "Software"), to deal in the Software without
6
    restriction, including without limitation the rights to use, copy,
7
    modify, merge, publish, distribute, sublicense, and/or sell copies
8
    of the Software, and to permit persons to whom the Software is
9
    furnished to do so, subject to the following conditions:
10

11
    The above copyright notice and this permission notice shall be
12
    included in all copies or substantial portions of the Software.
13

14
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*/
22

    
23
#include "CepstrumPitchTracker.h"
24

    
25
#include <vector>
26
#include <algorithm>
27

    
28
#include <cstdio>
29
#include <cmath>
30
#include <complex>
31

    
32
using std::string;
33
using std::vector;
34
using Vamp::RealTime;
35

    
36
CepstrumPitchTracker::Hypothesis::Hypothesis()
37
{
38
    m_state = New;
39
    m_age = 0;
40
}
41

    
42
CepstrumPitchTracker::Hypothesis::~Hypothesis()
43
{
44
}
45

    
46
bool
47
CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
48
{
49
    if (m_pending.empty()) {
50
        return true;
51
    }
52

    
53
    Estimate first = m_pending[0];
54
    Estimate last = m_pending[m_pending.size()-1];
55

    
56
    // check we are within a relatively close tolerance of the last
57
    // candidate
58
    double r = s.freq / last.freq;
59
    int cents = lrint(1200.0 * (log(r) / log(2.0)));
60
    if (cents < -40 || cents > 40) return false;
61

    
62
    // and within a wider tolerance of our starting candidate
63
    r = s.freq / first.freq;
64
    cents = lrint(1200.0 * (log(r) / log(2.0)));
65
    if (cents < -80 || cents > 80) return false;
66
    
67
    return true;
68
}
69

    
70
bool 
71
CepstrumPitchTracker::Hypothesis::isSatisfied()
72
{
73
    if (m_pending.empty()) return false;
74
    
75
    double meanConfidence = 0.0;
76
    for (int i = 0; i < m_pending.size(); ++i) {
77
        meanConfidence += m_pending[i].confidence;
78
    }
79
    meanConfidence /= m_pending.size();
80

    
81
    int lengthRequired = int(2.0 / meanConfidence + 0.5);
82
    std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
83

    
84
    return (m_pending.size() > lengthRequired);
85
}
86

    
87
void
88
CepstrumPitchTracker::Hypothesis::advanceTime()
89
{
90
    ++m_age;
91
}
92

    
93
bool
94
CepstrumPitchTracker::Hypothesis::test(Estimate s)
95
{
96
    bool accept = false;
97

    
98
    switch (m_state) {
99

    
100
    case New:
101
        m_state = Provisional;
102
        accept = true;
103
        break;
104

    
105
    case Provisional:
106
        if (m_age > 3) {
107
            m_state = Rejected;
108
        } else if (isWithinTolerance(s)) {
109
            accept = true;
110
        }
111
        break;
112
        
113
    case Satisfied:
114
        if (m_age > 3) {
115
            m_state = Expired;
116
        } else if (isWithinTolerance(s)) {
117
            accept = true;
118
        }
119
        break;
120

    
121
    case Rejected:
122
        break;
123

    
124
    case Expired:
125
        break;
126
    }
127

    
128
    if (accept) {
129
        m_pending.push_back(s);
130
        m_age = 0;
131
        if (m_state == Provisional && isSatisfied()) {
132
            m_state = Satisfied;
133
        }
134
    }
135

    
136
    return accept && (m_state == Satisfied);
137
}        
138

    
139
CepstrumPitchTracker::Hypothesis::State
140
CepstrumPitchTracker::Hypothesis::getState()
141
{
142
    return m_state;
143
}
144

    
145
int
146
CepstrumPitchTracker::Hypothesis::getPendingLength()
147
{
148
    return m_pending.size();
149
}
150

    
151
CepstrumPitchTracker::Hypothesis::Estimates
152
CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
153
{
154
    if (m_state == Satisfied || m_state == Expired) {
155
        return m_pending;
156
    } else {
157
        return Estimates();
158
    }
159
}
160

    
161
CepstrumPitchTracker::Hypothesis::Note
162
CepstrumPitchTracker::Hypothesis::getAveragedNote()
163
{
164
    Note n;
165

    
166
    if (!(m_state == Satisfied || m_state == Expired)) {
167
        n.freq = 0.0;
168
        n.time = RealTime::zeroTime;
169
        n.duration = RealTime::zeroTime;
170
        return n;
171
    }
172

    
173
    n.time = m_pending.begin()->time;
174

    
175
    Estimates::iterator i = m_pending.end();
176
    --i;
177
    n.duration = i->time - n.time;
178

    
179
    // just mean frequency for now, but this isn't at all right
180
    double acc = 0.0;
181
    for (int i = 0; i < m_pending.size(); ++i) {
182
        acc += m_pending[i].freq;
183
    }
184
    acc /= m_pending.size();
185
    n.freq = acc;
186
    
187
    return n;
188
}
189

    
190
void
191
CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
192
{
193
    for (int i = 0; i < m_pending.size(); ++i) {
194
        Feature f;
195
        f.hasTimestamp = true;
196
        f.timestamp = m_pending[i].time;
197
        f.values.push_back(m_pending[i].freq);
198
        fs[0].push_back(f);
199
    }
200

    
201
    Feature nf;
202
    nf.hasTimestamp = true;
203
    nf.hasDuration = true;
204
    Note n = getAveragedNote();
205
    nf.timestamp = n.time;
206
    nf.duration = n.duration;
207
    nf.values.push_back(n.freq);
208
    fs[1].push_back(nf);
209
}
210

    
211
CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
212
    Plugin(inputSampleRate),
213
    m_channels(0),
214
    m_stepSize(256),
215
    m_blockSize(1024),
216
    m_fmin(50),
217
    m_fmax(1000),
218
    m_vflen(3),
219
    m_binFrom(0),
220
    m_binTo(0),
221
    m_bins(0)
222
{
223
}
224

    
225
CepstrumPitchTracker::~CepstrumPitchTracker()
226
{
227
}
228

    
229
string
230
CepstrumPitchTracker::getIdentifier() const
231
{
232
    return "cepstrum-pitch";
233
}
234

    
235
string
236
CepstrumPitchTracker::getName() const
237
{
238
    return "Cepstrum Pitch Tracker";
239
}
240

    
241
string
242
CepstrumPitchTracker::getDescription() const
243
{
244
    return "Estimate f0 of monophonic material using a cepstrum method.";
245
}
246

    
247
string
248
CepstrumPitchTracker::getMaker() const
249
{
250
    return "Chris Cannam";
251
}
252

    
253
int
254
CepstrumPitchTracker::getPluginVersion() const
255
{
256
    // Increment this each time you release a version that behaves
257
    // differently from the previous one
258
    return 1;
259
}
260

    
261
string
262
CepstrumPitchTracker::getCopyright() const
263
{
264
    return "Freely redistributable (BSD license)";
265
}
266

    
267
CepstrumPitchTracker::InputDomain
268
CepstrumPitchTracker::getInputDomain() const
269
{
270
    return FrequencyDomain;
271
}
272

    
273
size_t
274
CepstrumPitchTracker::getPreferredBlockSize() const
275
{
276
    return 1024;
277
}
278

    
279
size_t 
280
CepstrumPitchTracker::getPreferredStepSize() const
281
{
282
    return 256;
283
}
284

    
285
size_t
286
CepstrumPitchTracker::getMinChannelCount() const
287
{
288
    return 1;
289
}
290

    
291
size_t
292
CepstrumPitchTracker::getMaxChannelCount() const
293
{
294
    return 1;
295
}
296

    
297
CepstrumPitchTracker::ParameterList
298
CepstrumPitchTracker::getParameterDescriptors() const
299
{
300
    ParameterList list;
301
    return list;
302
}
303

    
304
float
305
CepstrumPitchTracker::getParameter(string identifier) const
306
{
307
    return 0.f;
308
}
309

    
310
void
311
CepstrumPitchTracker::setParameter(string identifier, float value) 
312
{
313
}
314

    
315
CepstrumPitchTracker::ProgramList
316
CepstrumPitchTracker::getPrograms() const
317
{
318
    ProgramList list;
319
    return list;
320
}
321

    
322
string
323
CepstrumPitchTracker::getCurrentProgram() const
324
{
325
    return ""; // no programs
326
}
327

    
328
void
329
CepstrumPitchTracker::selectProgram(string name)
330
{
331
}
332

    
333
CepstrumPitchTracker::OutputList
334
CepstrumPitchTracker::getOutputDescriptors() const
335
{
336
    OutputList outputs;
337

    
338
    int n = 0;
339

    
340
    OutputDescriptor d;
341

    
342
    d.identifier = "f0";
343
    d.name = "Estimated f0";
344
    d.description = "Estimated fundamental frequency";
345
    d.unit = "Hz";
346
    d.hasFixedBinCount = true;
347
    d.binCount = 1;
348
    d.hasKnownExtents = true;
349
    d.minValue = m_fmin;
350
    d.maxValue = m_fmax;
351
    d.isQuantized = false;
352
    d.sampleType = OutputDescriptor::FixedSampleRate;
353
    d.sampleRate = (m_inputSampleRate / m_stepSize);
354
    d.hasDuration = false;
355
    outputs.push_back(d);
356

    
357
    d.identifier = "notes";
358
    d.name = "Notes";
359
    d.description = "Derived fixed-pitch note frequencies";
360
    d.unit = "Hz";
361
    d.hasFixedBinCount = true;
362
    d.binCount = 1;
363
    d.hasKnownExtents = true;
364
    d.minValue = m_fmin;
365
    d.maxValue = m_fmax;
366
    d.isQuantized = false;
367
    d.sampleType = OutputDescriptor::FixedSampleRate;
368
    d.sampleRate = (m_inputSampleRate / m_stepSize);
369
    d.hasDuration = true;
370
    outputs.push_back(d);
371

    
372
    return outputs;
373
}
374

    
375
bool
376
CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
377
{
378
    if (channels < getMinChannelCount() ||
379
        channels > getMaxChannelCount()) return false;
380

    
381
//    std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
382
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
383
//              << std::endl;
384

    
385
    m_channels = channels;
386
    m_stepSize = stepSize;
387
    m_blockSize = blockSize;
388

    
389
    m_binFrom = int(m_inputSampleRate / m_fmax);
390
    m_binTo = int(m_inputSampleRate / m_fmin); 
391

    
392
    if (m_binTo >= (int)m_blockSize / 2) {
393
        m_binTo = m_blockSize / 2 - 1;
394
    }
395

    
396
    m_bins = (m_binTo - m_binFrom) + 1;
397

    
398
    reset();
399

    
400
    return true;
401
}
402

    
403
void
404
CepstrumPitchTracker::reset()
405
{
406
}
407

    
408
void
409
CepstrumPitchTracker::filter(const double *cep, double *data)
410
{
411
    for (int i = 0; i < m_bins; ++i) {
412
        double v = 0;
413
        int n = 0;
414
        // average according to the vertical filter length
415
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
416
            int ix = i + m_binFrom + j;
417
            if (ix >= 0 && ix < m_blockSize) {
418
                v += cep[ix];
419
                ++n;
420
            }
421
        }
422
        data[i] = v / n;
423
    }
424
}
425

    
426
CepstrumPitchTracker::FeatureSet
427
CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
428
{
429
    FeatureSet fs;
430

    
431
    int bs = m_blockSize;
432
    int hs = m_blockSize/2 + 1;
433

    
434
    double *rawcep = new double[bs];
435
    double *io = new double[bs];
436
    double *logmag = new double[bs];
437

    
438
    // The "inverse symmetric" method. Seems to be the most reliable
439
        
440
    for (int i = 0; i < hs; ++i) {
441

    
442
        double power =
443
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
444
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
445
        double mag = sqrt(power);
446
        
447
        double lm = log(mag + 0.00000001);
448
        
449
        logmag[i] = lm;
450
        if (i > 0) logmag[bs - i] = lm;
451
    }
452

    
453
    fft(bs, true, logmag, 0, rawcep, io);
454
    
455
    delete[] logmag;
456
    delete[] io;
457

    
458
    int n = m_bins;
459
    double *data = new double[n];
460
    filter(rawcep, data);
461
    delete[] rawcep;
462

    
463
    double abstot = 0.0;
464

    
465
    for (int i = 0; i < n; ++i) {
466
        abstot += fabs(data[i]);
467
    }
468

    
469
    double maxval = 0.0;
470
    int maxbin = -1;
471

    
472
    for (int i = 0; i < n; ++i) {
473
        if (data[i] > maxval) {
474
            maxval = data[i];
475
            maxbin = i;
476
        }
477
    }
478

    
479
    if (maxbin < 0) {
480
        delete[] data;
481
        return fs;
482
    }
483

    
484
    double nextPeakVal = 0.0;
485
    for (int i = 1; i+1 < n; ++i) {
486
        if (data[i] > data[i-1] &&
487
            data[i] > data[i+1] &&
488
            i != maxbin &&
489
            data[i] > nextPeakVal) {
490
            nextPeakVal = data[i];
491
        }
492
    }
493

    
494
    double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
495

    
496
    double confidence = 0.0;
497
    if (nextPeakVal != 0.0) {
498
        confidence = ((maxval / nextPeakVal) - 1.0) / 4.0;
499
        if (confidence > 1.0) confidence = 1.0;
500
    }
501

    
502
    Hypothesis::Estimate e;
503
    e.freq = peakfreq;
504
    e.time = timestamp;
505
    e.confidence = confidence;
506

    
507
    m_accepted.advanceTime();
508

    
509
    for (int i = 0; i < m_possible.size(); ++i) {
510
        m_possible[i].advanceTime();
511
    }
512

    
513
    if (!m_accepted.test(e)) {
514

    
515
        int candidate = -1;
516
        bool accepted = false;
517

    
518
        for (int i = 0; i < m_possible.size(); ++i) {
519
            if (m_possible[i].test(e)) {
520
                accepted = true;
521
                if (m_possible[i].getState() == Hypothesis::Satisfied) {
522
                    candidate = i;
523
                }
524
                break;
525
            }
526
        }
527

    
528
        if (!accepted) {
529
            Hypothesis h;
530
            h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
531
            m_possible.push_back(h);
532
        }
533

    
534
        if (m_accepted.getState() == Hypothesis::Expired) {
535
            m_accepted.addFeatures(fs);
536
        }
537
        
538
        if (m_accepted.getState() == Hypothesis::Expired ||
539
            m_accepted.getState() == Hypothesis::Rejected) {
540
            if (candidate >= 0) {
541
                m_accepted = m_possible[candidate];
542
            } else {
543
                m_accepted = Hypothesis();
544
            }
545
        }
546

    
547
        // reap rejected/expired hypotheses from possible list
548
        Hypotheses toReap = m_possible;
549
        m_possible.clear();
550
        for (int i = 0; i < toReap.size(); ++i) {
551
            Hypothesis h = toReap[i];
552
            if (h.getState() != Hypothesis::Rejected && 
553
                h.getState() != Hypothesis::Expired) {
554
                m_possible.push_back(h);
555
            }
556
        }
557
    }  
558

    
559
    std::cerr << "accepted length = " << m_accepted.getPendingLength()
560
              << ", state = " << m_accepted.getState()
561
              << ", hypothesis count = " << m_possible.size() << std::endl;
562

    
563
    delete[] data;
564
    return fs;
565
}
566

    
567
CepstrumPitchTracker::FeatureSet
568
CepstrumPitchTracker::getRemainingFeatures()
569
{
570
    FeatureSet fs;
571
    if (m_accepted.getState() == Hypothesis::Satisfied) {
572
        m_accepted.addFeatures(fs);
573
    }
574
    return fs;
575
}
576

    
577
void
578
CepstrumPitchTracker::fft(unsigned int n, bool inverse,
579
                    double *ri, double *ii, double *ro, double *io)
580
{
581
    if (!ri || !ro || !io) return;
582

    
583
    unsigned int bits;
584
    unsigned int i, j, k, m;
585
    unsigned int blockSize, blockEnd;
586

    
587
    double tr, ti;
588

    
589
    if (n < 2) return;
590
    if (n & (n-1)) return;
591

    
592
    double angle = 2.0 * M_PI;
593
    if (inverse) angle = -angle;
594

    
595
    for (i = 0; ; ++i) {
596
        if (n & (1 << i)) {
597
            bits = i;
598
            break;
599
        }
600
    }
601

    
602
    static unsigned int tableSize = 0;
603
    static int *table = 0;
604

    
605
    if (tableSize != n) {
606

    
607
        delete[] table;
608

    
609
        table = new int[n];
610

    
611
        for (i = 0; i < n; ++i) {
612
        
613
            m = i;
614

    
615
            for (j = k = 0; j < bits; ++j) {
616
                k = (k << 1) | (m & 1);
617
                m >>= 1;
618
            }
619

    
620
            table[i] = k;
621
        }
622

    
623
        tableSize = n;
624
    }
625

    
626
    if (ii) {
627
        for (i = 0; i < n; ++i) {
628
            ro[table[i]] = ri[i];
629
            io[table[i]] = ii[i];
630
        }
631
    } else {
632
        for (i = 0; i < n; ++i) {
633
            ro[table[i]] = ri[i];
634
            io[table[i]] = 0.0;
635
        }
636
    }
637

    
638
    blockEnd = 1;
639

    
640
    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
641

    
642
        double delta = angle / (double)blockSize;
643
        double sm2 = -sin(-2 * delta);
644
        double sm1 = -sin(-delta);
645
        double cm2 = cos(-2 * delta);
646
        double cm1 = cos(-delta);
647
        double w = 2 * cm1;
648
        double ar[3], ai[3];
649

    
650
        for (i = 0; i < n; i += blockSize) {
651

    
652
            ar[2] = cm2;
653
            ar[1] = cm1;
654

    
655
            ai[2] = sm2;
656
            ai[1] = sm1;
657

    
658
            for (j = i, m = 0; m < blockEnd; j++, m++) {
659

    
660
                ar[0] = w * ar[1] - ar[2];
661
                ar[2] = ar[1];
662
                ar[1] = ar[0];
663

    
664
                ai[0] = w * ai[1] - ai[2];
665
                ai[2] = ai[1];
666
                ai[1] = ai[0];
667

    
668
                k = j + blockEnd;
669
                tr = ar[0] * ro[k] - ai[0] * io[k];
670
                ti = ar[0] * io[k] + ai[0] * ro[k];
671

    
672
                ro[k] = ro[j] - tr;
673
                io[k] = io[j] - ti;
674

    
675
                ro[j] += tr;
676
                io[j] += ti;
677
            }
678
        }
679

    
680
        blockEnd = blockSize;
681
    }
682
}
683

    
684