To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstralPitchTracker.cpp @ 67:dd5ab48fd58a

History | View | Annotate | Download (11 KB)

1 3:9366c8a58778 Chris
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3 31:2c175adf8736 Chris
    This file is Copyright (c) 2012 Chris Cannam
4

5 3:9366c8a58778 Chris
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25 31:2c175adf8736 Chris
#include "CepstralPitchTracker.h"
26 51:0997774f5fdc Chris
#include "Cepstrum.h"
27 47:f72a470fe4b5 Chris
#include "MeanFilter.h"
28 50:d84049e20c61 Chris
#include "PeakInterpolator.h"
29 56:d8eeba570d70 Chris
#include "AgentFeeder.h"
30 3:9366c8a58778 Chris
31 26:13568f1ccff0 Chris
#include "vamp-sdk/FFT.h"
32
33 3:9366c8a58778 Chris
#include <vector>
34
#include <algorithm>
35
36
#include <cstdio>
37
#include <cmath>
38
#include <complex>
39
40
using std::string;
41 7:32defdb2f9d9 Chris
using std::vector;
42 16:d717911aca3c Chris
using Vamp::RealTime;
43 7:32defdb2f9d9 Chris
44 16:d717911aca3c Chris
45 31:2c175adf8736 Chris
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
46 3:9366c8a58778 Chris
    Plugin(inputSampleRate),
47
    m_channels(0),
48
    m_stepSize(256),
49
    m_blockSize(1024),
50
    m_fmin(50),
51 25:9aee1a0e6223 Chris
    m_fmax(900),
52 18:131b1c40be1a Chris
    m_vflen(1),
53 66:7ad142c710c6 Chris
    m_slack(40),
54
    m_sensitivity(10),
55
    m_threshold(0.1),
56 67:dd5ab48fd58a Chris
    m_fillGaps(false),
57 3:9366c8a58778 Chris
    m_binFrom(0),
58
    m_binTo(0),
59 56:d8eeba570d70 Chris
    m_bins(0),
60 57:82b3cdf6ca6b Chris
    m_nAccepted(0),
61 56:d8eeba570d70 Chris
    m_feeder(0)
62 3:9366c8a58778 Chris
{
63
}
64
65 31:2c175adf8736 Chris
CepstralPitchTracker::~CepstralPitchTracker()
66 3:9366c8a58778 Chris
{
67 56:d8eeba570d70 Chris
    delete m_feeder;
68 3:9366c8a58778 Chris
}
69
70
string
71 31:2c175adf8736 Chris
CepstralPitchTracker::getIdentifier() const
72 3:9366c8a58778 Chris
{
73 39:822cf7b8e070 Chris
    return "cepstral-pitchtracker";
74 3:9366c8a58778 Chris
}
75
76
string
77 31:2c175adf8736 Chris
CepstralPitchTracker::getName() const
78 3:9366c8a58778 Chris
{
79 39:822cf7b8e070 Chris
    return "Cepstral Pitch Tracker";
80 3:9366c8a58778 Chris
}
81
82
string
83 31:2c175adf8736 Chris
CepstralPitchTracker::getDescription() const
84 3:9366c8a58778 Chris
{
85
    return "Estimate f0 of monophonic material using a cepstrum method.";
86
}
87
88
string
89 31:2c175adf8736 Chris
CepstralPitchTracker::getMaker() const
90 3:9366c8a58778 Chris
{
91
    return "Chris Cannam";
92
}
93
94
int
95 31:2c175adf8736 Chris
CepstralPitchTracker::getPluginVersion() const
96 3:9366c8a58778 Chris
{
97
    // Increment this each time you release a version that behaves
98
    // differently from the previous one
99 66:7ad142c710c6 Chris
    return 2;
100 3:9366c8a58778 Chris
}
101
102
string
103 31:2c175adf8736 Chris
CepstralPitchTracker::getCopyright() const
104 3:9366c8a58778 Chris
{
105
    return "Freely redistributable (BSD license)";
106
}
107
108 31:2c175adf8736 Chris
CepstralPitchTracker::InputDomain
109
CepstralPitchTracker::getInputDomain() const
110 3:9366c8a58778 Chris
{
111
    return FrequencyDomain;
112
}
113
114
size_t
115 31:2c175adf8736 Chris
CepstralPitchTracker::getPreferredBlockSize() const
116 3:9366c8a58778 Chris
{
117
    return 1024;
118
}
119
120
size_t
121 31:2c175adf8736 Chris
CepstralPitchTracker::getPreferredStepSize() const
122 3:9366c8a58778 Chris
{
123
    return 256;
124
}
125
126
size_t
127 31:2c175adf8736 Chris
CepstralPitchTracker::getMinChannelCount() const
128 3:9366c8a58778 Chris
{
129
    return 1;
130
}
131
132
size_t
133 31:2c175adf8736 Chris
CepstralPitchTracker::getMaxChannelCount() const
134 3:9366c8a58778 Chris
{
135
    return 1;
136
}
137
138 31:2c175adf8736 Chris
CepstralPitchTracker::ParameterList
139
CepstralPitchTracker::getParameterDescriptors() const
140 3:9366c8a58778 Chris
{
141
    ParameterList list;
142 66:7ad142c710c6 Chris
143
    ParameterDescriptor d;
144
    d.identifier = "sensitivity";
145
    d.name = "Sensitivity";
146
    d.description = "Sensitivity of the voicing detector";
147
    d.unit = "";
148
    d.minValue = 0;
149
    d.maxValue = 100;
150
    d.defaultValue = 10;
151
    d.isQuantized = true;
152
    d.quantizeStep = 1;
153
    list.push_back(d);
154
155
    d.identifier = "slack";
156
    d.name = "Slack";
157
    d.description = "Maximum permissible length of voicing gap for a continuous note";
158
    d.unit = "ms";
159
    d.minValue = 0;
160
    d.maxValue = 200;
161
    d.defaultValue = 40;
162
    d.isQuantized = true;
163
    d.quantizeStep = 1;
164
    list.push_back(d);
165
166
    d.identifier = "threshold";
167
    d.name = "Silence threshold";
168
    d.description = "Threshold for silence detection";
169
    d.unit = ""; //!!! todo: convert this threshold to a meaningful unit!
170
    d.minValue = 0;
171
    d.maxValue = 0.5;
172
    d.defaultValue = 0.1;
173
    d.isQuantized = false;
174
    list.push_back(d);
175
176 67:dd5ab48fd58a Chris
    d.identifier = "fill";
177
    d.name = "Fill f0 gaps within a note";
178
    d.description = "Return an f0 value for every frame within each discovered note, interpolating results into any gaps in the measurement";
179
    d.unit = ""; //!!! todo: convert this threshold to a meaningful unit!
180
    d.minValue = 0;
181
    d.maxValue = 1;
182
    d.defaultValue = 0;
183
    d.isQuantized = true;
184
    d.quantizeStep = 1;
185
    list.push_back(d);
186
187 3:9366c8a58778 Chris
    return list;
188
}
189
190
float
191 31:2c175adf8736 Chris
CepstralPitchTracker::getParameter(string identifier) const
192 3:9366c8a58778 Chris
{
193 66:7ad142c710c6 Chris
    if (identifier == "sensitivity") return m_sensitivity;
194
    else if (identifier == "slack") return m_slack;
195
    else if (identifier == "threshold") return m_threshold;
196 67:dd5ab48fd58a Chris
    else if (identifier == "fill") return (m_fillGaps ? 1 : 0);
197 3:9366c8a58778 Chris
    return 0.f;
198
}
199
200
void
201 31:2c175adf8736 Chris
CepstralPitchTracker::setParameter(string identifier, float value)
202 3:9366c8a58778 Chris
{
203 66:7ad142c710c6 Chris
    if (identifier == "sensitivity") m_sensitivity = value;
204
    else if (identifier == "slack") m_slack = value;
205
    else if (identifier == "threshold") m_threshold = value;
206 67:dd5ab48fd58a Chris
    else if (identifier == "fill") m_fillGaps = (value > 0.5);
207 3:9366c8a58778 Chris
}
208
209 31:2c175adf8736 Chris
CepstralPitchTracker::ProgramList
210
CepstralPitchTracker::getPrograms() const
211 3:9366c8a58778 Chris
{
212
    ProgramList list;
213
    return list;
214
}
215
216
string
217 31:2c175adf8736 Chris
CepstralPitchTracker::getCurrentProgram() const
218 3:9366c8a58778 Chris
{
219
    return ""; // no programs
220
}
221
222
void
223 31:2c175adf8736 Chris
CepstralPitchTracker::selectProgram(string name)
224 3:9366c8a58778 Chris
{
225
}
226
227 31:2c175adf8736 Chris
CepstralPitchTracker::OutputList
228
CepstralPitchTracker::getOutputDescriptors() const
229 3:9366c8a58778 Chris
{
230
    OutputList outputs;
231
232
    OutputDescriptor d;
233
234
    d.identifier = "f0";
235
    d.name = "Estimated f0";
236
    d.description = "Estimated fundamental frequency";
237
    d.unit = "Hz";
238
    d.hasFixedBinCount = true;
239
    d.binCount = 1;
240
    d.hasKnownExtents = true;
241
    d.minValue = m_fmin;
242
    d.maxValue = m_fmax;
243
    d.isQuantized = false;
244
    d.sampleType = OutputDescriptor::FixedSampleRate;
245
    d.sampleRate = (m_inputSampleRate / m_stepSize);
246
    d.hasDuration = false;
247
    outputs.push_back(d);
248
249 16:d717911aca3c Chris
    d.identifier = "notes";
250
    d.name = "Notes";
251
    d.description = "Derived fixed-pitch note frequencies";
252
    d.unit = "Hz";
253
    d.hasFixedBinCount = true;
254
    d.binCount = 1;
255
    d.hasKnownExtents = true;
256
    d.minValue = m_fmin;
257
    d.maxValue = m_fmax;
258
    d.isQuantized = false;
259
    d.sampleType = OutputDescriptor::FixedSampleRate;
260
    d.sampleRate = (m_inputSampleRate / m_stepSize);
261
    d.hasDuration = true;
262
    outputs.push_back(d);
263
264 66:7ad142c710c6 Chris
    d.identifier = "raw";
265
    d.name = "Raw frequencies";
266
    d.description = "Raw peak frequencies from cepstrum, including unvoiced segments";
267
    d.unit = "Hz";
268
    d.hasFixedBinCount = true;
269
    d.binCount = 1;
270
    d.hasKnownExtents = true;
271
    d.minValue = m_fmin;
272
    d.maxValue = m_fmax;
273
    d.isQuantized = false;
274
    d.sampleType = OutputDescriptor::OneSamplePerStep;
275
    d.hasDuration = false;
276
    outputs.push_back(d);
277
278 3:9366c8a58778 Chris
    return outputs;
279
}
280
281
bool
282 31:2c175adf8736 Chris
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
283 3:9366c8a58778 Chris
{
284
    if (channels < getMinChannelCount() ||
285
        channels > getMaxChannelCount()) return false;
286
287 31:2c175adf8736 Chris
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
288 3:9366c8a58778 Chris
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
289
//              << std::endl;
290
291
    m_channels = channels;
292
    m_stepSize = stepSize;
293
    m_blockSize = blockSize;
294
295
    m_binFrom = int(m_inputSampleRate / m_fmax);
296
    m_binTo = int(m_inputSampleRate / m_fmin);
297
298
    if (m_binTo >= (int)m_blockSize / 2) {
299
        m_binTo = m_blockSize / 2 - 1;
300
    }
301 56:d8eeba570d70 Chris
    if (m_binFrom >= m_binTo) {
302
        // shouldn't happen except for degenerate samplerate / blocksize combos
303
        m_binFrom = m_binTo - 1;
304
    }
305 3:9366c8a58778 Chris
306
    m_bins = (m_binTo - m_binFrom) + 1;
307
308
    reset();
309
310
    return true;
311
}
312
313
void
314 31:2c175adf8736 Chris
CepstralPitchTracker::reset()
315 3:9366c8a58778 Chris
{
316 56:d8eeba570d70 Chris
    delete m_feeder;
317 66:7ad142c710c6 Chris
    m_feeder = new AgentFeeder(m_slack);
318 57:82b3cdf6ca6b Chris
    m_nAccepted = 0;
319 3:9366c8a58778 Chris
}
320
321
void
322 35:2f5b169e4a3b Chris
CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
323 30:2554aab152a5 Chris
{
324 35:2f5b169e4a3b Chris
    NoteHypothesis::Estimates es = h.getAcceptedEstimates();
325 67:dd5ab48fd58a Chris
    NoteHypothesis::Note n = h.getAveragedNote();
326 30:2554aab152a5 Chris
327 67:dd5ab48fd58a Chris
    if (!m_fillGaps) {
328
329
        for (int i = 0; i < (int)es.size(); ++i) {
330
            Feature f;
331
            f.hasTimestamp = true;
332
            f.timestamp = es[i].time;
333
            f.values.push_back(es[i].freq);
334
            fs[0].push_back(f);
335
        }
336
337
    } else {
338
339
        int ix = 0;
340
        RealTime increment = RealTime::frame2RealTime
341
            (m_stepSize, m_inputSampleRate);
342
343
        float freq = 0;
344
345
        for (RealTime t = n.time; t < n.time + n.duration; t = t + increment) {
346
            if (ix < (int)es.size() && t >= es[ix].time) {
347
                freq = es[ix].freq;
348
                ++ix;
349
            }
350
            if (freq > 0) {
351
                Feature f;
352
                f.hasTimestamp = true;
353
                f.timestamp = t;
354
                f.values.push_back(freq);
355
                fs[0].push_back(f);
356
            }
357
        }
358 30:2554aab152a5 Chris
    }
359
360
    Feature nf;
361
    nf.hasTimestamp = true;
362
    nf.hasDuration = true;
363
    nf.timestamp = n.time;
364
    nf.duration = n.duration;
365
    nf.values.push_back(n.freq);
366
    fs[1].push_back(nf);
367
}
368
369 57:82b3cdf6ca6b Chris
void
370
CepstralPitchTracker::addNewFeatures(FeatureSet &fs)
371
{
372
    int n = m_feeder->getAcceptedHypotheses().size();
373
    if (n == m_nAccepted) return;
374
375
    AgentFeeder::Hypotheses accepted = m_feeder->getAcceptedHypotheses();
376
377
    for (int i = m_nAccepted; i < n; ++i) {
378
        addFeaturesFrom(accepted[i], fs);
379
    }
380
381
    m_nAccepted = n;
382
}
383
384 31:2c175adf8736 Chris
CepstralPitchTracker::FeatureSet
385
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
386 3:9366c8a58778 Chris
{
387 51:0997774f5fdc Chris
    double *rawcep = new double[m_blockSize];
388
    double magmean = Cepstrum(m_blockSize).process(inputBuffers[0], rawcep);
389 3:9366c8a58778 Chris
390
    int n = m_bins;
391
    double *data = new double[n];
392 51:0997774f5fdc Chris
    MeanFilter(m_vflen).filterSubsequence
393
        (rawcep, data, m_blockSize, n, m_binFrom);
394
395 3:9366c8a58778 Chris
    delete[] rawcep;
396
397
    double maxval = 0.0;
398 6:291c75f6e837 Chris
    int maxbin = -1;
399 3:9366c8a58778 Chris
400
    for (int i = 0; i < n; ++i) {
401
        if (data[i] > maxval) {
402
            maxval = data[i];
403
            maxbin = i;
404
        }
405
    }
406
407 15:bd7fb10646fc Chris
    if (maxbin < 0) {
408
        delete[] data;
409 57:82b3cdf6ca6b Chris
        return FeatureSet();
410 15:bd7fb10646fc Chris
    }
411
412
    double nextPeakVal = 0.0;
413
    for (int i = 1; i+1 < n; ++i) {
414
        if (data[i] > data[i-1] &&
415
            data[i] > data[i+1] &&
416
            i != maxbin &&
417
            data[i] > nextPeakVal) {
418
            nextPeakVal = data[i];
419
        }
420
    }
421 8:e9d86e129467 Chris
422 50:d84049e20c61 Chris
    PeakInterpolator pi;
423
    double cimax = pi.findPeakLocation(data, m_bins, maxbin);
424 18:131b1c40be1a Chris
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
425 15:bd7fb10646fc Chris
426 66:7ad142c710c6 Chris
    FeatureSet fs;
427
    Feature rawf;
428
    rawf.hasTimestamp = false;
429
    rawf.hasDuration = false;
430
    rawf.values.push_back(peakfreq);
431
    fs[2].push_back(rawf);
432
433 15:bd7fb10646fc Chris
    double confidence = 0.0;
434 51:0997774f5fdc Chris
435 15:bd7fb10646fc Chris
    if (nextPeakVal != 0.0) {
436 66:7ad142c710c6 Chris
        confidence = (maxval - nextPeakVal) * m_sensitivity;
437
        if (magmean < m_threshold) confidence = 0.0;
438 15:bd7fb10646fc Chris
    }
439
440 57:82b3cdf6ca6b Chris
    delete[] data;
441
442 35:2f5b169e4a3b Chris
    NoteHypothesis::Estimate e;
443 8:e9d86e129467 Chris
    e.freq = peakfreq;
444
    e.time = timestamp;
445 15:bd7fb10646fc Chris
    e.confidence = confidence;
446 8:e9d86e129467 Chris
447 56:d8eeba570d70 Chris
    m_feeder->feed(e);
448 14:98256077e2a2 Chris
449 57:82b3cdf6ca6b Chris
    addNewFeatures(fs);
450 3:9366c8a58778 Chris
    return fs;
451
}
452
453 31:2c175adf8736 Chris
CepstralPitchTracker::FeatureSet
454
CepstralPitchTracker::getRemainingFeatures()
455 3:9366c8a58778 Chris
{
456 56:d8eeba570d70 Chris
    m_feeder->finish();
457
458 3:9366c8a58778 Chris
    FeatureSet fs;
459 57:82b3cdf6ca6b Chris
    addNewFeatures(fs);
460 3:9366c8a58778 Chris
    return fs;
461
}