To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstralPitchTracker.cpp @ 39:822cf7b8e070

History | View | Annotate | Download (11.2 KB)

1 3:9366c8a58778 Chris
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3 31:2c175adf8736 Chris
    This file is Copyright (c) 2012 Chris Cannam
4

5 3:9366c8a58778 Chris
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25 31:2c175adf8736 Chris
#include "CepstralPitchTracker.h"
26 3:9366c8a58778 Chris
27 26:13568f1ccff0 Chris
#include "vamp-sdk/FFT.h"
28
29 3:9366c8a58778 Chris
#include <vector>
30
#include <algorithm>
31
32
#include <cstdio>
33
#include <cmath>
34
#include <complex>
35
36
using std::string;
37 7:32defdb2f9d9 Chris
using std::vector;
38 16:d717911aca3c Chris
using Vamp::RealTime;
39 7:32defdb2f9d9 Chris
40 16:d717911aca3c Chris
41 31:2c175adf8736 Chris
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
42 3:9366c8a58778 Chris
    Plugin(inputSampleRate),
43
    m_channels(0),
44
    m_stepSize(256),
45
    m_blockSize(1024),
46
    m_fmin(50),
47 25:9aee1a0e6223 Chris
    m_fmax(900),
48 18:131b1c40be1a Chris
    m_vflen(1),
49 3:9366c8a58778 Chris
    m_binFrom(0),
50
    m_binTo(0),
51 15:bd7fb10646fc Chris
    m_bins(0)
52 3:9366c8a58778 Chris
{
53
}
54
55 31:2c175adf8736 Chris
CepstralPitchTracker::~CepstralPitchTracker()
56 3:9366c8a58778 Chris
{
57
}
58
59
string
60 31:2c175adf8736 Chris
CepstralPitchTracker::getIdentifier() const
61 3:9366c8a58778 Chris
{
62 39:822cf7b8e070 Chris
    return "cepstral-pitchtracker";
63 3:9366c8a58778 Chris
}
64
65
string
66 31:2c175adf8736 Chris
CepstralPitchTracker::getName() const
67 3:9366c8a58778 Chris
{
68 39:822cf7b8e070 Chris
    return "Cepstral Pitch Tracker";
69 3:9366c8a58778 Chris
}
70
71
string
72 31:2c175adf8736 Chris
CepstralPitchTracker::getDescription() const
73 3:9366c8a58778 Chris
{
74
    return "Estimate f0 of monophonic material using a cepstrum method.";
75
}
76
77
string
78 31:2c175adf8736 Chris
CepstralPitchTracker::getMaker() const
79 3:9366c8a58778 Chris
{
80
    return "Chris Cannam";
81
}
82
83
int
84 31:2c175adf8736 Chris
CepstralPitchTracker::getPluginVersion() const
85 3:9366c8a58778 Chris
{
86
    // Increment this each time you release a version that behaves
87
    // differently from the previous one
88
    return 1;
89
}
90
91
string
92 31:2c175adf8736 Chris
CepstralPitchTracker::getCopyright() const
93 3:9366c8a58778 Chris
{
94
    return "Freely redistributable (BSD license)";
95
}
96
97 31:2c175adf8736 Chris
CepstralPitchTracker::InputDomain
98
CepstralPitchTracker::getInputDomain() const
99 3:9366c8a58778 Chris
{
100
    return FrequencyDomain;
101
}
102
103
size_t
104 31:2c175adf8736 Chris
CepstralPitchTracker::getPreferredBlockSize() const
105 3:9366c8a58778 Chris
{
106
    return 1024;
107
}
108
109
size_t
110 31:2c175adf8736 Chris
CepstralPitchTracker::getPreferredStepSize() const
111 3:9366c8a58778 Chris
{
112
    return 256;
113
}
114
115
size_t
116 31:2c175adf8736 Chris
CepstralPitchTracker::getMinChannelCount() const
117 3:9366c8a58778 Chris
{
118
    return 1;
119
}
120
121
size_t
122 31:2c175adf8736 Chris
CepstralPitchTracker::getMaxChannelCount() const
123 3:9366c8a58778 Chris
{
124
    return 1;
125
}
126
127 31:2c175adf8736 Chris
CepstralPitchTracker::ParameterList
128
CepstralPitchTracker::getParameterDescriptors() const
129 3:9366c8a58778 Chris
{
130
    ParameterList list;
131
    return list;
132
}
133
134
float
135 31:2c175adf8736 Chris
CepstralPitchTracker::getParameter(string identifier) const
136 3:9366c8a58778 Chris
{
137
    return 0.f;
138
}
139
140
void
141 31:2c175adf8736 Chris
CepstralPitchTracker::setParameter(string identifier, float value)
142 3:9366c8a58778 Chris
{
143
}
144
145 31:2c175adf8736 Chris
CepstralPitchTracker::ProgramList
146
CepstralPitchTracker::getPrograms() const
147 3:9366c8a58778 Chris
{
148
    ProgramList list;
149
    return list;
150
}
151
152
string
153 31:2c175adf8736 Chris
CepstralPitchTracker::getCurrentProgram() const
154 3:9366c8a58778 Chris
{
155
    return ""; // no programs
156
}
157
158
void
159 31:2c175adf8736 Chris
CepstralPitchTracker::selectProgram(string name)
160 3:9366c8a58778 Chris
{
161
}
162
163 31:2c175adf8736 Chris
CepstralPitchTracker::OutputList
164
CepstralPitchTracker::getOutputDescriptors() const
165 3:9366c8a58778 Chris
{
166
    OutputList outputs;
167
168
    OutputDescriptor d;
169
170
    d.identifier = "f0";
171
    d.name = "Estimated f0";
172
    d.description = "Estimated fundamental frequency";
173
    d.unit = "Hz";
174
    d.hasFixedBinCount = true;
175
    d.binCount = 1;
176
    d.hasKnownExtents = true;
177
    d.minValue = m_fmin;
178
    d.maxValue = m_fmax;
179
    d.isQuantized = false;
180
    d.sampleType = OutputDescriptor::FixedSampleRate;
181
    d.sampleRate = (m_inputSampleRate / m_stepSize);
182
    d.hasDuration = false;
183
    outputs.push_back(d);
184
185 16:d717911aca3c Chris
    d.identifier = "notes";
186
    d.name = "Notes";
187
    d.description = "Derived fixed-pitch note frequencies";
188
    d.unit = "Hz";
189
    d.hasFixedBinCount = true;
190
    d.binCount = 1;
191
    d.hasKnownExtents = true;
192
    d.minValue = m_fmin;
193
    d.maxValue = m_fmax;
194
    d.isQuantized = false;
195
    d.sampleType = OutputDescriptor::FixedSampleRate;
196
    d.sampleRate = (m_inputSampleRate / m_stepSize);
197
    d.hasDuration = true;
198
    outputs.push_back(d);
199
200 3:9366c8a58778 Chris
    return outputs;
201
}
202
203
bool
204 31:2c175adf8736 Chris
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
205 3:9366c8a58778 Chris
{
206
    if (channels < getMinChannelCount() ||
207
        channels > getMaxChannelCount()) return false;
208
209 31:2c175adf8736 Chris
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
210 3:9366c8a58778 Chris
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
211
//              << std::endl;
212
213
    m_channels = channels;
214
    m_stepSize = stepSize;
215
    m_blockSize = blockSize;
216
217
    m_binFrom = int(m_inputSampleRate / m_fmax);
218
    m_binTo = int(m_inputSampleRate / m_fmin);
219
220
    if (m_binTo >= (int)m_blockSize / 2) {
221
        m_binTo = m_blockSize / 2 - 1;
222
    }
223
224
    m_bins = (m_binTo - m_binFrom) + 1;
225
226
    reset();
227
228
    return true;
229
}
230
231
void
232 31:2c175adf8736 Chris
CepstralPitchTracker::reset()
233 3:9366c8a58778 Chris
{
234
}
235
236
void
237 35:2f5b169e4a3b Chris
CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
238 30:2554aab152a5 Chris
{
239 35:2f5b169e4a3b Chris
    NoteHypothesis::Estimates es = h.getAcceptedEstimates();
240 30:2554aab152a5 Chris
241 35:2f5b169e4a3b Chris
    for (int i = 0; i < (int)es.size(); ++i) {
242 30:2554aab152a5 Chris
        Feature f;
243
        f.hasTimestamp = true;
244
        f.timestamp = es[i].time;
245
        f.values.push_back(es[i].freq);
246
        fs[0].push_back(f);
247
    }
248
249
    Feature nf;
250
    nf.hasTimestamp = true;
251
    nf.hasDuration = true;
252 35:2f5b169e4a3b Chris
    NoteHypothesis::Note n = h.getAveragedNote();
253 30:2554aab152a5 Chris
    nf.timestamp = n.time;
254
    nf.duration = n.duration;
255
    nf.values.push_back(n.freq);
256
    fs[1].push_back(nf);
257
}
258
259
void
260 31:2c175adf8736 Chris
CepstralPitchTracker::filter(const double *cep, double *data)
261 3:9366c8a58778 Chris
{
262
    for (int i = 0; i < m_bins; ++i) {
263 5:383c5b497f4a Chris
        double v = 0;
264
        int n = 0;
265
        // average according to the vertical filter length
266
        for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
267
            int ix = i + m_binFrom + j;
268 35:2f5b169e4a3b Chris
            if (ix >= 0 && ix < (int)m_blockSize) {
269 5:383c5b497f4a Chris
                v += cep[ix];
270
                ++n;
271
            }
272
        }
273 15:bd7fb10646fc Chris
        data[i] = v / n;
274 3:9366c8a58778 Chris
    }
275 6:291c75f6e837 Chris
}
276
277 18:131b1c40be1a Chris
double
278 31:2c175adf8736 Chris
CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
279 18:131b1c40be1a Chris
{
280
    double a0 = y[3] - y[2] - y[0] + y[1];
281
    double a1 = y[0] - y[1] - a0;
282
    double a2 = y[2] - y[0];
283
    double a3 = y[1];
284
    return
285
        a0 * x * x * x +
286
        a1 * x * x +
287
        a2 * x +
288
        a3;
289
}
290
291
double
292 31:2c175adf8736 Chris
CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
293 18:131b1c40be1a Chris
{
294
    if (maxbin < 2 || maxbin > m_bins - 3) {
295
        return maxbin;
296
    }
297
298
    double maxval = 0.0;
299
    double maxidx = maxbin;
300
301
    const int divisions = 10;
302
    double y[4];
303
304
    y[0] = in[maxbin-1];
305
    y[1] = in[maxbin];
306
    y[2] = in[maxbin+1];
307
    y[3] = in[maxbin+2];
308
    for (int i = 0; i < divisions; ++i) {
309
        double probe = double(i) / double(divisions);
310
        double value = cubicInterpolate(y, probe);
311
        if (value > maxval) {
312
            maxval = value;
313
            maxidx = maxbin + probe;
314
        }
315
    }
316
317
    y[3] = y[2];
318
    y[2] = y[1];
319
    y[1] = y[0];
320
    y[0] = in[maxbin-2];
321
    for (int i = 0; i < divisions; ++i) {
322
        double probe = double(i) / double(divisions);
323
        double value = cubicInterpolate(y, probe);
324
        if (value > maxval) {
325
            maxval = value;
326
            maxidx = maxbin - 1 + probe;
327
        }
328
    }
329
330
/*
331
    std::cerr << "centre = " << maxbin << ": ["
332
              << in[maxbin-2] << ","
333
              << in[maxbin-1] << ","
334
              << in[maxbin] << ","
335
              << in[maxbin+1] << ","
336
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
337
*/
338
339
    return maxidx;
340
}
341
342 31:2c175adf8736 Chris
CepstralPitchTracker::FeatureSet
343
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
344 3:9366c8a58778 Chris
{
345
    FeatureSet fs;
346
347
    int bs = m_blockSize;
348
    int hs = m_blockSize/2 + 1;
349
350
    double *rawcep = new double[bs];
351
    double *io = new double[bs];
352
    double *logmag = new double[bs];
353
354 4:c74846514b09 Chris
    // The "inverse symmetric" method. Seems to be the most reliable
355 3:9366c8a58778 Chris
356 25:9aee1a0e6223 Chris
    double magmean = 0.0;
357
358 3:9366c8a58778 Chris
    for (int i = 0; i < hs; ++i) {
359
360
        double power =
361
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
362
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
363
        double mag = sqrt(power);
364 25:9aee1a0e6223 Chris
365
        magmean += mag;
366
367 3:9366c8a58778 Chris
        double lm = log(mag + 0.00000001);
368
369 4:c74846514b09 Chris
        logmag[i] = lm;
370
        if (i > 0) logmag[bs - i] = lm;
371 3:9366c8a58778 Chris
    }
372
373 25:9aee1a0e6223 Chris
    magmean /= hs;
374
    double threshold = 0.1; // for magmean
375
376 26:13568f1ccff0 Chris
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
377 3:9366c8a58778 Chris
378
    delete[] logmag;
379
    delete[] io;
380
381
    int n = m_bins;
382
    double *data = new double[n];
383
    filter(rawcep, data);
384
    delete[] rawcep;
385
386
    double maxval = 0.0;
387 6:291c75f6e837 Chris
    int maxbin = -1;
388 3:9366c8a58778 Chris
389
    for (int i = 0; i < n; ++i) {
390
        if (data[i] > maxval) {
391
            maxval = data[i];
392
            maxbin = i;
393
        }
394
    }
395
396 15:bd7fb10646fc Chris
    if (maxbin < 0) {
397
        delete[] data;
398
        return fs;
399
    }
400
401
    double nextPeakVal = 0.0;
402
    for (int i = 1; i+1 < n; ++i) {
403
        if (data[i] > data[i-1] &&
404
            data[i] > data[i+1] &&
405
            i != maxbin &&
406
            data[i] > nextPeakVal) {
407
            nextPeakVal = data[i];
408
        }
409
    }
410 8:e9d86e129467 Chris
411 18:131b1c40be1a Chris
    double cimax = findInterpolatedPeak(data, maxbin);
412
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
413 15:bd7fb10646fc Chris
414
    double confidence = 0.0;
415
    if (nextPeakVal != 0.0) {
416 27:e358f133e670 Chris
        confidence = (maxval - nextPeakVal) * 10.0;
417 25:9aee1a0e6223 Chris
        if (magmean < threshold) confidence = 0.0;
418 39:822cf7b8e070 Chris
//        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
419 15:bd7fb10646fc Chris
    }
420
421 35:2f5b169e4a3b Chris
    NoteHypothesis::Estimate e;
422 8:e9d86e129467 Chris
    e.freq = peakfreq;
423
    e.time = timestamp;
424 15:bd7fb10646fc Chris
    e.confidence = confidence;
425 8:e9d86e129467 Chris
426 28:7927e7afbe07 Chris
    if (!m_good.accept(e)) {
427 13:6f73de098d35 Chris
428 11:c938c60de2de Chris
        int candidate = -1;
429 13:6f73de098d35 Chris
        bool accepted = false;
430
431 35:2f5b169e4a3b Chris
        for (int i = 0; i < (int)m_possible.size(); ++i) {
432 28:7927e7afbe07 Chris
            if (m_possible[i].accept(e)) {
433 35:2f5b169e4a3b Chris
                if (m_possible[i].getState() == NoteHypothesis::Satisfied) {
434 28:7927e7afbe07 Chris
                    accepted = true;
435 11:c938c60de2de Chris
                    candidate = i;
436
                }
437
                break;
438
            }
439
        }
440 12:1daaf43a5459 Chris
441 13:6f73de098d35 Chris
        if (!accepted) {
442 35:2f5b169e4a3b Chris
            NoteHypothesis h;
443 28:7927e7afbe07 Chris
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
444 13:6f73de098d35 Chris
            m_possible.push_back(h);
445
        }
446
447 35:2f5b169e4a3b Chris
        if (m_good.getState() == NoteHypothesis::Expired) {
448 30:2554aab152a5 Chris
            addFeaturesFrom(m_good, fs);
449 12:1daaf43a5459 Chris
        }
450
451 35:2f5b169e4a3b Chris
        if (m_good.getState() == NoteHypothesis::Expired ||
452
            m_good.getState() == NoteHypothesis::Rejected) {
453 11:c938c60de2de Chris
            if (candidate >= 0) {
454 28:7927e7afbe07 Chris
                m_good = m_possible[candidate];
455 11:c938c60de2de Chris
            } else {
456 35:2f5b169e4a3b Chris
                m_good = NoteHypothesis();
457 11:c938c60de2de Chris
            }
458
        }
459 8:e9d86e129467 Chris
460 14:98256077e2a2 Chris
        // reap rejected/expired hypotheses from possible list
461
        Hypotheses toReap = m_possible;
462
        m_possible.clear();
463 35:2f5b169e4a3b Chris
        for (int i = 0; i < (int)toReap.size(); ++i) {
464
            NoteHypothesis h = toReap[i];
465
            if (h.getState() != NoteHypothesis::Rejected &&
466
                h.getState() != NoteHypothesis::Expired) {
467 14:98256077e2a2 Chris
                m_possible.push_back(h);
468
            }
469
        }
470
    }
471
472 3:9366c8a58778 Chris
    delete[] data;
473
    return fs;
474
}
475
476 31:2c175adf8736 Chris
CepstralPitchTracker::FeatureSet
477
CepstralPitchTracker::getRemainingFeatures()
478 3:9366c8a58778 Chris
{
479
    FeatureSet fs;
480 35:2f5b169e4a3b Chris
    if (m_good.getState() == NoteHypothesis::Satisfied) {
481 30:2554aab152a5 Chris
        addFeaturesFrom(m_good, fs);
482 11:c938c60de2de Chris
    }
483 3:9366c8a58778 Chris
    return fs;
484
}