To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstralPitchTracker.cpp @ 66:7ad142c710c6

History | View | Annotate | Download (9.8 KB)

1 3:9366c8a58778 Chris
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3 31:2c175adf8736 Chris
    This file is Copyright (c) 2012 Chris Cannam
4

5 3:9366c8a58778 Chris
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25 31:2c175adf8736 Chris
#include "CepstralPitchTracker.h"
26 51:0997774f5fdc Chris
#include "Cepstrum.h"
27 47:f72a470fe4b5 Chris
#include "MeanFilter.h"
28 50:d84049e20c61 Chris
#include "PeakInterpolator.h"
29 56:d8eeba570d70 Chris
#include "AgentFeeder.h"
30 3:9366c8a58778 Chris
31 26:13568f1ccff0 Chris
#include "vamp-sdk/FFT.h"
32
33 3:9366c8a58778 Chris
#include <vector>
34
#include <algorithm>
35
36
#include <cstdio>
37
#include <cmath>
38
#include <complex>
39
40
using std::string;
41 7:32defdb2f9d9 Chris
using std::vector;
42 16:d717911aca3c Chris
using Vamp::RealTime;
43 7:32defdb2f9d9 Chris
44 16:d717911aca3c Chris
45 31:2c175adf8736 Chris
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
46 3:9366c8a58778 Chris
    Plugin(inputSampleRate),
47
    m_channels(0),
48
    m_stepSize(256),
49
    m_blockSize(1024),
50
    m_fmin(50),
51 25:9aee1a0e6223 Chris
    m_fmax(900),
52 18:131b1c40be1a Chris
    m_vflen(1),
53 66:7ad142c710c6 Chris
    m_slack(40),
54
    m_sensitivity(10),
55
    m_threshold(0.1),
56 3:9366c8a58778 Chris
    m_binFrom(0),
57
    m_binTo(0),
58 56:d8eeba570d70 Chris
    m_bins(0),
59 57:82b3cdf6ca6b Chris
    m_nAccepted(0),
60 56:d8eeba570d70 Chris
    m_feeder(0)
61 3:9366c8a58778 Chris
{
62
}
63
64 31:2c175adf8736 Chris
CepstralPitchTracker::~CepstralPitchTracker()
65 3:9366c8a58778 Chris
{
66 56:d8eeba570d70 Chris
    delete m_feeder;
67 3:9366c8a58778 Chris
}
68
69
string
70 31:2c175adf8736 Chris
CepstralPitchTracker::getIdentifier() const
71 3:9366c8a58778 Chris
{
72 39:822cf7b8e070 Chris
    return "cepstral-pitchtracker";
73 3:9366c8a58778 Chris
}
74
75
string
76 31:2c175adf8736 Chris
CepstralPitchTracker::getName() const
77 3:9366c8a58778 Chris
{
78 39:822cf7b8e070 Chris
    return "Cepstral Pitch Tracker";
79 3:9366c8a58778 Chris
}
80
81
string
82 31:2c175adf8736 Chris
CepstralPitchTracker::getDescription() const
83 3:9366c8a58778 Chris
{
84
    return "Estimate f0 of monophonic material using a cepstrum method.";
85
}
86
87
string
88 31:2c175adf8736 Chris
CepstralPitchTracker::getMaker() const
89 3:9366c8a58778 Chris
{
90
    return "Chris Cannam";
91
}
92
93
int
94 31:2c175adf8736 Chris
CepstralPitchTracker::getPluginVersion() const
95 3:9366c8a58778 Chris
{
96
    // Increment this each time you release a version that behaves
97
    // differently from the previous one
98 66:7ad142c710c6 Chris
    return 2;
99 3:9366c8a58778 Chris
}
100
101
string
102 31:2c175adf8736 Chris
CepstralPitchTracker::getCopyright() const
103 3:9366c8a58778 Chris
{
104
    return "Freely redistributable (BSD license)";
105
}
106
107 31:2c175adf8736 Chris
CepstralPitchTracker::InputDomain
108
CepstralPitchTracker::getInputDomain() const
109 3:9366c8a58778 Chris
{
110
    return FrequencyDomain;
111
}
112
113
size_t
114 31:2c175adf8736 Chris
CepstralPitchTracker::getPreferredBlockSize() const
115 3:9366c8a58778 Chris
{
116
    return 1024;
117
}
118
119
size_t
120 31:2c175adf8736 Chris
CepstralPitchTracker::getPreferredStepSize() const
121 3:9366c8a58778 Chris
{
122
    return 256;
123
}
124
125
size_t
126 31:2c175adf8736 Chris
CepstralPitchTracker::getMinChannelCount() const
127 3:9366c8a58778 Chris
{
128
    return 1;
129
}
130
131
size_t
132 31:2c175adf8736 Chris
CepstralPitchTracker::getMaxChannelCount() const
133 3:9366c8a58778 Chris
{
134
    return 1;
135
}
136
137 31:2c175adf8736 Chris
CepstralPitchTracker::ParameterList
138
CepstralPitchTracker::getParameterDescriptors() const
139 3:9366c8a58778 Chris
{
140
    ParameterList list;
141 66:7ad142c710c6 Chris
142
    ParameterDescriptor d;
143
    d.identifier = "sensitivity";
144
    d.name = "Sensitivity";
145
    d.description = "Sensitivity of the voicing detector";
146
    d.unit = "";
147
    d.minValue = 0;
148
    d.maxValue = 100;
149
    d.defaultValue = 10;
150
    d.isQuantized = true;
151
    d.quantizeStep = 1;
152
    list.push_back(d);
153
154
    d.identifier = "slack";
155
    d.name = "Slack";
156
    d.description = "Maximum permissible length of voicing gap for a continuous note";
157
    d.unit = "ms";
158
    d.minValue = 0;
159
    d.maxValue = 200;
160
    d.defaultValue = 40;
161
    d.isQuantized = true;
162
    d.quantizeStep = 1;
163
    list.push_back(d);
164
165
    d.identifier = "threshold";
166
    d.name = "Silence threshold";
167
    d.description = "Threshold for silence detection";
168
    d.unit = ""; //!!! todo: convert this threshold to a meaningful unit!
169
    d.minValue = 0;
170
    d.maxValue = 0.5;
171
    d.defaultValue = 0.1;
172
    d.isQuantized = false;
173
    list.push_back(d);
174
175 3:9366c8a58778 Chris
    return list;
176
}
177
178
float
179 31:2c175adf8736 Chris
CepstralPitchTracker::getParameter(string identifier) const
180 3:9366c8a58778 Chris
{
181 66:7ad142c710c6 Chris
    if (identifier == "sensitivity") return m_sensitivity;
182
    else if (identifier == "slack") return m_slack;
183
    else if (identifier == "threshold") return m_threshold;
184 3:9366c8a58778 Chris
    return 0.f;
185
}
186
187
void
188 31:2c175adf8736 Chris
CepstralPitchTracker::setParameter(string identifier, float value)
189 3:9366c8a58778 Chris
{
190 66:7ad142c710c6 Chris
    if (identifier == "sensitivity") m_sensitivity = value;
191
    else if (identifier == "slack") m_slack = value;
192
    else if (identifier == "threshold") m_threshold = value;
193 3:9366c8a58778 Chris
}
194
195 31:2c175adf8736 Chris
CepstralPitchTracker::ProgramList
196
CepstralPitchTracker::getPrograms() const
197 3:9366c8a58778 Chris
{
198
    ProgramList list;
199
    return list;
200
}
201
202
string
203 31:2c175adf8736 Chris
CepstralPitchTracker::getCurrentProgram() const
204 3:9366c8a58778 Chris
{
205
    return ""; // no programs
206
}
207
208
void
209 31:2c175adf8736 Chris
CepstralPitchTracker::selectProgram(string name)
210 3:9366c8a58778 Chris
{
211
}
212
213 31:2c175adf8736 Chris
CepstralPitchTracker::OutputList
214
CepstralPitchTracker::getOutputDescriptors() const
215 3:9366c8a58778 Chris
{
216
    OutputList outputs;
217
218
    OutputDescriptor d;
219
220
    d.identifier = "f0";
221
    d.name = "Estimated f0";
222
    d.description = "Estimated fundamental frequency";
223
    d.unit = "Hz";
224
    d.hasFixedBinCount = true;
225
    d.binCount = 1;
226
    d.hasKnownExtents = true;
227
    d.minValue = m_fmin;
228
    d.maxValue = m_fmax;
229
    d.isQuantized = false;
230
    d.sampleType = OutputDescriptor::FixedSampleRate;
231
    d.sampleRate = (m_inputSampleRate / m_stepSize);
232
    d.hasDuration = false;
233
    outputs.push_back(d);
234
235 16:d717911aca3c Chris
    d.identifier = "notes";
236
    d.name = "Notes";
237
    d.description = "Derived fixed-pitch note frequencies";
238
    d.unit = "Hz";
239
    d.hasFixedBinCount = true;
240
    d.binCount = 1;
241
    d.hasKnownExtents = true;
242
    d.minValue = m_fmin;
243
    d.maxValue = m_fmax;
244
    d.isQuantized = false;
245
    d.sampleType = OutputDescriptor::FixedSampleRate;
246
    d.sampleRate = (m_inputSampleRate / m_stepSize);
247
    d.hasDuration = true;
248
    outputs.push_back(d);
249
250 66:7ad142c710c6 Chris
    d.identifier = "raw";
251
    d.name = "Raw frequencies";
252
    d.description = "Raw peak frequencies from cepstrum, including unvoiced segments";
253
    d.unit = "Hz";
254
    d.hasFixedBinCount = true;
255
    d.binCount = 1;
256
    d.hasKnownExtents = true;
257
    d.minValue = m_fmin;
258
    d.maxValue = m_fmax;
259
    d.isQuantized = false;
260
    d.sampleType = OutputDescriptor::OneSamplePerStep;
261
    d.hasDuration = false;
262
    outputs.push_back(d);
263
264 3:9366c8a58778 Chris
    return outputs;
265
}
266
267
bool
268 31:2c175adf8736 Chris
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
269 3:9366c8a58778 Chris
{
270
    if (channels < getMinChannelCount() ||
271
        channels > getMaxChannelCount()) return false;
272
273 31:2c175adf8736 Chris
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
274 3:9366c8a58778 Chris
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
275
//              << std::endl;
276
277
    m_channels = channels;
278
    m_stepSize = stepSize;
279
    m_blockSize = blockSize;
280
281
    m_binFrom = int(m_inputSampleRate / m_fmax);
282
    m_binTo = int(m_inputSampleRate / m_fmin);
283
284
    if (m_binTo >= (int)m_blockSize / 2) {
285
        m_binTo = m_blockSize / 2 - 1;
286
    }
287 56:d8eeba570d70 Chris
    if (m_binFrom >= m_binTo) {
288
        // shouldn't happen except for degenerate samplerate / blocksize combos
289
        m_binFrom = m_binTo - 1;
290
    }
291 3:9366c8a58778 Chris
292
    m_bins = (m_binTo - m_binFrom) + 1;
293
294
    reset();
295
296
    return true;
297
}
298
299
void
300 31:2c175adf8736 Chris
CepstralPitchTracker::reset()
301 3:9366c8a58778 Chris
{
302 56:d8eeba570d70 Chris
    delete m_feeder;
303 66:7ad142c710c6 Chris
    m_feeder = new AgentFeeder(m_slack);
304 57:82b3cdf6ca6b Chris
    m_nAccepted = 0;
305 3:9366c8a58778 Chris
}
306
307
void
308 35:2f5b169e4a3b Chris
CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
309 30:2554aab152a5 Chris
{
310 35:2f5b169e4a3b Chris
    NoteHypothesis::Estimates es = h.getAcceptedEstimates();
311 30:2554aab152a5 Chris
312 35:2f5b169e4a3b Chris
    for (int i = 0; i < (int)es.size(); ++i) {
313 30:2554aab152a5 Chris
        Feature f;
314
        f.hasTimestamp = true;
315
        f.timestamp = es[i].time;
316
        f.values.push_back(es[i].freq);
317
        fs[0].push_back(f);
318
    }
319
320
    Feature nf;
321
    nf.hasTimestamp = true;
322
    nf.hasDuration = true;
323 35:2f5b169e4a3b Chris
    NoteHypothesis::Note n = h.getAveragedNote();
324 30:2554aab152a5 Chris
    nf.timestamp = n.time;
325
    nf.duration = n.duration;
326
    nf.values.push_back(n.freq);
327
    fs[1].push_back(nf);
328
}
329
330 57:82b3cdf6ca6b Chris
void
331
CepstralPitchTracker::addNewFeatures(FeatureSet &fs)
332
{
333
    int n = m_feeder->getAcceptedHypotheses().size();
334
    if (n == m_nAccepted) return;
335
336
    AgentFeeder::Hypotheses accepted = m_feeder->getAcceptedHypotheses();
337
338
    for (int i = m_nAccepted; i < n; ++i) {
339
        addFeaturesFrom(accepted[i], fs);
340
    }
341
342
    m_nAccepted = n;
343
}
344
345 31:2c175adf8736 Chris
CepstralPitchTracker::FeatureSet
346
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
347 3:9366c8a58778 Chris
{
348 51:0997774f5fdc Chris
    double *rawcep = new double[m_blockSize];
349
    double magmean = Cepstrum(m_blockSize).process(inputBuffers[0], rawcep);
350 3:9366c8a58778 Chris
351
    int n = m_bins;
352
    double *data = new double[n];
353 51:0997774f5fdc Chris
    MeanFilter(m_vflen).filterSubsequence
354
        (rawcep, data, m_blockSize, n, m_binFrom);
355
356 3:9366c8a58778 Chris
    delete[] rawcep;
357
358
    double maxval = 0.0;
359 6:291c75f6e837 Chris
    int maxbin = -1;
360 3:9366c8a58778 Chris
361
    for (int i = 0; i < n; ++i) {
362
        if (data[i] > maxval) {
363
            maxval = data[i];
364
            maxbin = i;
365
        }
366
    }
367
368 15:bd7fb10646fc Chris
    if (maxbin < 0) {
369
        delete[] data;
370 57:82b3cdf6ca6b Chris
        return FeatureSet();
371 15:bd7fb10646fc Chris
    }
372
373
    double nextPeakVal = 0.0;
374
    for (int i = 1; i+1 < n; ++i) {
375
        if (data[i] > data[i-1] &&
376
            data[i] > data[i+1] &&
377
            i != maxbin &&
378
            data[i] > nextPeakVal) {
379
            nextPeakVal = data[i];
380
        }
381
    }
382 8:e9d86e129467 Chris
383 50:d84049e20c61 Chris
    PeakInterpolator pi;
384
    double cimax = pi.findPeakLocation(data, m_bins, maxbin);
385 18:131b1c40be1a Chris
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
386 15:bd7fb10646fc Chris
387 66:7ad142c710c6 Chris
    FeatureSet fs;
388
    Feature rawf;
389
    rawf.hasTimestamp = false;
390
    rawf.hasDuration = false;
391
    rawf.values.push_back(peakfreq);
392
    fs[2].push_back(rawf);
393
394 15:bd7fb10646fc Chris
    double confidence = 0.0;
395 51:0997774f5fdc Chris
396 15:bd7fb10646fc Chris
    if (nextPeakVal != 0.0) {
397 66:7ad142c710c6 Chris
        confidence = (maxval - nextPeakVal) * m_sensitivity;
398
        if (magmean < m_threshold) confidence = 0.0;
399 15:bd7fb10646fc Chris
    }
400
401 57:82b3cdf6ca6b Chris
    delete[] data;
402
403 35:2f5b169e4a3b Chris
    NoteHypothesis::Estimate e;
404 8:e9d86e129467 Chris
    e.freq = peakfreq;
405
    e.time = timestamp;
406 15:bd7fb10646fc Chris
    e.confidence = confidence;
407 8:e9d86e129467 Chris
408 56:d8eeba570d70 Chris
    m_feeder->feed(e);
409 14:98256077e2a2 Chris
410 57:82b3cdf6ca6b Chris
    addNewFeatures(fs);
411 3:9366c8a58778 Chris
    return fs;
412
}
413
414 31:2c175adf8736 Chris
CepstralPitchTracker::FeatureSet
415
CepstralPitchTracker::getRemainingFeatures()
416 3:9366c8a58778 Chris
{
417 56:d8eeba570d70 Chris
    m_feeder->finish();
418
419 3:9366c8a58778 Chris
    FeatureSet fs;
420 57:82b3cdf6ca6b Chris
    addNewFeatures(fs);
421 3:9366c8a58778 Chris
    return fs;
422
}