To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstralPitchTracker.cpp @ 35:2f5b169e4a3b

History | View | Annotate | Download (11.2 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    This file is Copyright (c) 2012 Chris Cannam
4
  
5
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24

    
25
#include "CepstralPitchTracker.h"
26

    
27
#include "vamp-sdk/FFT.h"
28

    
29
#include <vector>
30
#include <algorithm>
31

    
32
#include <cstdio>
33
#include <cmath>
34
#include <complex>
35

    
36
using std::string;
37
using std::vector;
38
using Vamp::RealTime;
39

    
40

    
41
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
42
    Plugin(inputSampleRate),
43
    m_channels(0),
44
    m_stepSize(256),
45
    m_blockSize(1024),
46
    m_fmin(50),
47
    m_fmax(900),
48
    m_vflen(1),
49
    m_binFrom(0),
50
    m_binTo(0),
51
    m_bins(0)
52
{
53
}
54

    
55
CepstralPitchTracker::~CepstralPitchTracker()
56
{
57
}
58

    
59
string
60
CepstralPitchTracker::getIdentifier() const
61
{
62
    return "cepstrum-pitch";
63
}
64

    
65
string
66
CepstralPitchTracker::getName() const
67
{
68
    return "Cepstrum Pitch Tracker";
69
}
70

    
71
string
72
CepstralPitchTracker::getDescription() const
73
{
74
    return "Estimate f0 of monophonic material using a cepstrum method.";
75
}
76

    
77
string
78
CepstralPitchTracker::getMaker() const
79
{
80
    return "Chris Cannam";
81
}
82

    
83
int
84
CepstralPitchTracker::getPluginVersion() const
85
{
86
    // Increment this each time you release a version that behaves
87
    // differently from the previous one
88
    return 1;
89
}
90

    
91
string
92
CepstralPitchTracker::getCopyright() const
93
{
94
    return "Freely redistributable (BSD license)";
95
}
96

    
97
CepstralPitchTracker::InputDomain
98
CepstralPitchTracker::getInputDomain() const
99
{
100
    return FrequencyDomain;
101
}
102

    
103
size_t
104
CepstralPitchTracker::getPreferredBlockSize() const
105
{
106
    return 1024;
107
}
108

    
109
size_t 
110
CepstralPitchTracker::getPreferredStepSize() const
111
{
112
    return 256;
113
}
114

    
115
size_t
116
CepstralPitchTracker::getMinChannelCount() const
117
{
118
    return 1;
119
}
120

    
121
size_t
122
CepstralPitchTracker::getMaxChannelCount() const
123
{
124
    return 1;
125
}
126

    
127
CepstralPitchTracker::ParameterList
128
CepstralPitchTracker::getParameterDescriptors() const
129
{
130
    ParameterList list;
131
    return list;
132
}
133

    
134
float
135
CepstralPitchTracker::getParameter(string identifier) const
136
{
137
    return 0.f;
138
}
139

    
140
void
141
CepstralPitchTracker::setParameter(string identifier, float value) 
142
{
143
}
144

    
145
CepstralPitchTracker::ProgramList
146
CepstralPitchTracker::getPrograms() const
147
{
148
    ProgramList list;
149
    return list;
150
}
151

    
152
string
153
CepstralPitchTracker::getCurrentProgram() const
154
{
155
    return ""; // no programs
156
}
157

    
158
void
159
CepstralPitchTracker::selectProgram(string name)
160
{
161
}
162

    
163
CepstralPitchTracker::OutputList
164
CepstralPitchTracker::getOutputDescriptors() const
165
{
166
    OutputList outputs;
167

    
168
    OutputDescriptor d;
169

    
170
    d.identifier = "f0";
171
    d.name = "Estimated f0";
172
    d.description = "Estimated fundamental frequency";
173
    d.unit = "Hz";
174
    d.hasFixedBinCount = true;
175
    d.binCount = 1;
176
    d.hasKnownExtents = true;
177
    d.minValue = m_fmin;
178
    d.maxValue = m_fmax;
179
    d.isQuantized = false;
180
    d.sampleType = OutputDescriptor::FixedSampleRate;
181
    d.sampleRate = (m_inputSampleRate / m_stepSize);
182
    d.hasDuration = false;
183
    outputs.push_back(d);
184

    
185
    d.identifier = "notes";
186
    d.name = "Notes";
187
    d.description = "Derived fixed-pitch note frequencies";
188
    d.unit = "Hz";
189
    d.hasFixedBinCount = true;
190
    d.binCount = 1;
191
    d.hasKnownExtents = true;
192
    d.minValue = m_fmin;
193
    d.maxValue = m_fmax;
194
    d.isQuantized = false;
195
    d.sampleType = OutputDescriptor::FixedSampleRate;
196
    d.sampleRate = (m_inputSampleRate / m_stepSize);
197
    d.hasDuration = true;
198
    outputs.push_back(d);
199

    
200
    return outputs;
201
}
202

    
203
bool
204
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
205
{
206
    if (channels < getMinChannelCount() ||
207
        channels > getMaxChannelCount()) return false;
208

    
209
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
210
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
211
//              << std::endl;
212

    
213
    m_channels = channels;
214
    m_stepSize = stepSize;
215
    m_blockSize = blockSize;
216

    
217
    m_binFrom = int(m_inputSampleRate / m_fmax);
218
    m_binTo = int(m_inputSampleRate / m_fmin); 
219

    
220
    if (m_binTo >= (int)m_blockSize / 2) {
221
        m_binTo = m_blockSize / 2 - 1;
222
    }
223

    
224
    m_bins = (m_binTo - m_binFrom) + 1;
225

    
226
    reset();
227

    
228
    return true;
229
}
230

    
231
void
232
CepstralPitchTracker::reset()
233
{
234
}
235

    
236
void
237
CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
238
{
239
    NoteHypothesis::Estimates es = h.getAcceptedEstimates();
240

    
241
    for (int i = 0; i < (int)es.size(); ++i) {
242
        Feature f;
243
        f.hasTimestamp = true;
244
        f.timestamp = es[i].time;
245
        f.values.push_back(es[i].freq);
246
        fs[0].push_back(f);
247
    }
248

    
249
    Feature nf;
250
    nf.hasTimestamp = true;
251
    nf.hasDuration = true;
252
    NoteHypothesis::Note n = h.getAveragedNote();
253
    nf.timestamp = n.time;
254
    nf.duration = n.duration;
255
    nf.values.push_back(n.freq);
256
    fs[1].push_back(nf);
257
}
258

    
259
void
260
CepstralPitchTracker::filter(const double *cep, double *data)
261
{
262
    for (int i = 0; i < m_bins; ++i) {
263
        double v = 0;
264
        int n = 0;
265
        // average according to the vertical filter length
266
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
267
            int ix = i + m_binFrom + j;
268
            if (ix >= 0 && ix < (int)m_blockSize) {
269
                v += cep[ix];
270
                ++n;
271
            }
272
        }
273
        data[i] = v / n;
274
    }
275
}
276

    
277
double
278
CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
279
{
280
    double a0 = y[3] - y[2] - y[0] + y[1];
281
    double a1 = y[0] - y[1] - a0;
282
    double a2 = y[2] - y[0];
283
    double a3 = y[1];
284
    return
285
        a0 * x * x * x +
286
        a1 * x * x +
287
        a2 * x +
288
        a3;
289
}
290

    
291
double
292
CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
293
{
294
    if (maxbin < 2 || maxbin > m_bins - 3) {
295
        return maxbin;
296
    }
297

    
298
    double maxval = 0.0;
299
    double maxidx = maxbin;
300

    
301
    const int divisions = 10;
302
    double y[4];
303

    
304
    y[0] = in[maxbin-1];
305
    y[1] = in[maxbin];
306
    y[2] = in[maxbin+1];
307
    y[3] = in[maxbin+2];
308
    for (int i = 0; i < divisions; ++i) {
309
        double probe = double(i) / double(divisions);
310
        double value = cubicInterpolate(y, probe);
311
        if (value > maxval) {
312
            maxval = value; 
313
            maxidx = maxbin + probe;
314
        }
315
    }
316

    
317
    y[3] = y[2];
318
    y[2] = y[1];
319
    y[1] = y[0];
320
    y[0] = in[maxbin-2];
321
    for (int i = 0; i < divisions; ++i) {
322
        double probe = double(i) / double(divisions);
323
        double value = cubicInterpolate(y, probe);
324
        if (value > maxval) {
325
            maxval = value; 
326
            maxidx = maxbin - 1 + probe;
327
        }
328
    }
329

    
330
/*
331
    std::cerr << "centre = " << maxbin << ": ["
332
              << in[maxbin-2] << ","
333
              << in[maxbin-1] << ","
334
              << in[maxbin] << ","
335
              << in[maxbin+1] << ","
336
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
337
*/
338

    
339
    return maxidx;
340
}
341

    
342
CepstralPitchTracker::FeatureSet
343
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
344
{
345
    FeatureSet fs;
346

    
347
    int bs = m_blockSize;
348
    int hs = m_blockSize/2 + 1;
349

    
350
    double *rawcep = new double[bs];
351
    double *io = new double[bs];
352
    double *logmag = new double[bs];
353

    
354
    // The "inverse symmetric" method. Seems to be the most reliable
355
        
356
    double magmean = 0.0;
357

    
358
    for (int i = 0; i < hs; ++i) {
359

    
360
        double power =
361
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
362
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
363
        double mag = sqrt(power);
364

    
365
        magmean += mag;
366

    
367
        double lm = log(mag + 0.00000001);
368
        
369
        logmag[i] = lm;
370
        if (i > 0) logmag[bs - i] = lm;
371
    }
372

    
373
    magmean /= hs;
374
    double threshold = 0.1; // for magmean
375
    
376
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
377
    
378
    delete[] logmag;
379
    delete[] io;
380

    
381
    int n = m_bins;
382
    double *data = new double[n];
383
    filter(rawcep, data);
384
    delete[] rawcep;
385

    
386
    double maxval = 0.0;
387
    int maxbin = -1;
388

    
389
    for (int i = 0; i < n; ++i) {
390
        if (data[i] > maxval) {
391
            maxval = data[i];
392
            maxbin = i;
393
        }
394
    }
395

    
396
    if (maxbin < 0) {
397
        delete[] data;
398
        return fs;
399
    }
400

    
401
    double nextPeakVal = 0.0;
402
    for (int i = 1; i+1 < n; ++i) {
403
        if (data[i] > data[i-1] &&
404
            data[i] > data[i+1] &&
405
            i != maxbin &&
406
            data[i] > nextPeakVal) {
407
            nextPeakVal = data[i];
408
        }
409
    }
410

    
411
    double cimax = findInterpolatedPeak(data, maxbin);
412
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
413

    
414
    double confidence = 0.0;
415
    if (nextPeakVal != 0.0) {
416
        confidence = (maxval - nextPeakVal) * 10.0;
417
        if (magmean < threshold) confidence = 0.0;
418
        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
419
    }
420

    
421
    NoteHypothesis::Estimate e;
422
    e.freq = peakfreq;
423
    e.time = timestamp;
424
    e.confidence = confidence;
425

    
426
    if (!m_good.accept(e)) {
427

    
428
        int candidate = -1;
429
        bool accepted = false;
430

    
431
        for (int i = 0; i < (int)m_possible.size(); ++i) {
432
            if (m_possible[i].accept(e)) {
433
                if (m_possible[i].getState() == NoteHypothesis::Satisfied) {
434
                    accepted = true;
435
                    candidate = i;
436
                }
437
                break;
438
            }
439
        }
440

    
441
        if (!accepted) {
442
            NoteHypothesis h;
443
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
444
            m_possible.push_back(h);
445
        }
446

    
447
        if (m_good.getState() == NoteHypothesis::Expired) {
448
            addFeaturesFrom(m_good, fs);
449
        }
450
        
451
        if (m_good.getState() == NoteHypothesis::Expired ||
452
            m_good.getState() == NoteHypothesis::Rejected) {
453
            if (candidate >= 0) {
454
                m_good = m_possible[candidate];
455
            } else {
456
                m_good = NoteHypothesis();
457
            }
458
        }
459

    
460
        // reap rejected/expired hypotheses from possible list
461
        Hypotheses toReap = m_possible;
462
        m_possible.clear();
463
        for (int i = 0; i < (int)toReap.size(); ++i) {
464
            NoteHypothesis h = toReap[i];
465
            if (h.getState() != NoteHypothesis::Rejected && 
466
                h.getState() != NoteHypothesis::Expired) {
467
                m_possible.push_back(h);
468
            }
469
        }
470
    }  
471

    
472
    delete[] data;
473
    return fs;
474
}
475

    
476
CepstralPitchTracker::FeatureSet
477
CepstralPitchTracker::getRemainingFeatures()
478
{
479
    FeatureSet fs;
480
    if (m_good.getState() == NoteHypothesis::Satisfied) {
481
        addFeaturesFrom(m_good, fs);
482
    }
483
    return fs;
484
}