To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / CepstralPitchTracker.cpp @ 47:f72a470fe4b5

History | View | Annotate | Download (10.8 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
/*
3
    This file is Copyright (c) 2012 Chris Cannam
4
  
5
    Permission is hereby granted, free of charge, to any person
6
    obtaining a copy of this software and associated documentation
7
    files (the "Software"), to deal in the Software without
8
    restriction, including without limitation the rights to use, copy,
9
    modify, merge, publish, distribute, sublicense, and/or sell copies
10
    of the Software, and to permit persons to whom the Software is
11
    furnished to do so, subject to the following conditions:
12

13
    The above copyright notice and this permission notice shall be
14
    included in all copies or substantial portions of the Software.
15

16
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
20
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24

    
25
#include "CepstralPitchTracker.h"
26
#include "MeanFilter.h"
27

    
28
#include "vamp-sdk/FFT.h"
29

    
30
#include <vector>
31
#include <algorithm>
32

    
33
#include <cstdio>
34
#include <cmath>
35
#include <complex>
36

    
37
using std::string;
38
using std::vector;
39
using Vamp::RealTime;
40

    
41

    
42
CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
43
    Plugin(inputSampleRate),
44
    m_channels(0),
45
    m_stepSize(256),
46
    m_blockSize(1024),
47
    m_fmin(50),
48
    m_fmax(900),
49
    m_vflen(1),
50
    m_binFrom(0),
51
    m_binTo(0),
52
    m_bins(0)
53
{
54
}
55

    
56
CepstralPitchTracker::~CepstralPitchTracker()
57
{
58
}
59

    
60
string
61
CepstralPitchTracker::getIdentifier() const
62
{
63
    return "cepstral-pitchtracker";
64
}
65

    
66
string
67
CepstralPitchTracker::getName() const
68
{
69
    return "Cepstral Pitch Tracker";
70
}
71

    
72
string
73
CepstralPitchTracker::getDescription() const
74
{
75
    return "Estimate f0 of monophonic material using a cepstrum method.";
76
}
77

    
78
string
79
CepstralPitchTracker::getMaker() const
80
{
81
    return "Chris Cannam";
82
}
83

    
84
int
85
CepstralPitchTracker::getPluginVersion() const
86
{
87
    // Increment this each time you release a version that behaves
88
    // differently from the previous one
89
    return 1;
90
}
91

    
92
string
93
CepstralPitchTracker::getCopyright() const
94
{
95
    return "Freely redistributable (BSD license)";
96
}
97

    
98
CepstralPitchTracker::InputDomain
99
CepstralPitchTracker::getInputDomain() const
100
{
101
    return FrequencyDomain;
102
}
103

    
104
size_t
105
CepstralPitchTracker::getPreferredBlockSize() const
106
{
107
    return 1024;
108
}
109

    
110
size_t 
111
CepstralPitchTracker::getPreferredStepSize() const
112
{
113
    return 256;
114
}
115

    
116
size_t
117
CepstralPitchTracker::getMinChannelCount() const
118
{
119
    return 1;
120
}
121

    
122
size_t
123
CepstralPitchTracker::getMaxChannelCount() const
124
{
125
    return 1;
126
}
127

    
128
CepstralPitchTracker::ParameterList
129
CepstralPitchTracker::getParameterDescriptors() const
130
{
131
    ParameterList list;
132
    return list;
133
}
134

    
135
float
136
CepstralPitchTracker::getParameter(string identifier) const
137
{
138
    return 0.f;
139
}
140

    
141
void
142
CepstralPitchTracker::setParameter(string identifier, float value) 
143
{
144
}
145

    
146
CepstralPitchTracker::ProgramList
147
CepstralPitchTracker::getPrograms() const
148
{
149
    ProgramList list;
150
    return list;
151
}
152

    
153
string
154
CepstralPitchTracker::getCurrentProgram() const
155
{
156
    return ""; // no programs
157
}
158

    
159
void
160
CepstralPitchTracker::selectProgram(string name)
161
{
162
}
163

    
164
CepstralPitchTracker::OutputList
165
CepstralPitchTracker::getOutputDescriptors() const
166
{
167
    OutputList outputs;
168

    
169
    OutputDescriptor d;
170

    
171
    d.identifier = "f0";
172
    d.name = "Estimated f0";
173
    d.description = "Estimated fundamental frequency";
174
    d.unit = "Hz";
175
    d.hasFixedBinCount = true;
176
    d.binCount = 1;
177
    d.hasKnownExtents = true;
178
    d.minValue = m_fmin;
179
    d.maxValue = m_fmax;
180
    d.isQuantized = false;
181
    d.sampleType = OutputDescriptor::FixedSampleRate;
182
    d.sampleRate = (m_inputSampleRate / m_stepSize);
183
    d.hasDuration = false;
184
    outputs.push_back(d);
185

    
186
    d.identifier = "notes";
187
    d.name = "Notes";
188
    d.description = "Derived fixed-pitch note frequencies";
189
    d.unit = "Hz";
190
    d.hasFixedBinCount = true;
191
    d.binCount = 1;
192
    d.hasKnownExtents = true;
193
    d.minValue = m_fmin;
194
    d.maxValue = m_fmax;
195
    d.isQuantized = false;
196
    d.sampleType = OutputDescriptor::FixedSampleRate;
197
    d.sampleRate = (m_inputSampleRate / m_stepSize);
198
    d.hasDuration = true;
199
    outputs.push_back(d);
200

    
201
    return outputs;
202
}
203

    
204
bool
205
CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
206
{
207
    if (channels < getMinChannelCount() ||
208
        channels > getMaxChannelCount()) return false;
209

    
210
//    std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
211
//              << ", stepSize = " << stepSize << ", blockSize = " << blockSize
212
//              << std::endl;
213

    
214
    m_channels = channels;
215
    m_stepSize = stepSize;
216
    m_blockSize = blockSize;
217

    
218
    m_binFrom = int(m_inputSampleRate / m_fmax);
219
    m_binTo = int(m_inputSampleRate / m_fmin); 
220

    
221
    if (m_binTo >= (int)m_blockSize / 2) {
222
        m_binTo = m_blockSize / 2 - 1;
223
    }
224

    
225
    m_bins = (m_binTo - m_binFrom) + 1;
226

    
227
    reset();
228

    
229
    return true;
230
}
231

    
232
void
233
CepstralPitchTracker::reset()
234
{
235
}
236

    
237
void
238
CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
239
{
240
    NoteHypothesis::Estimates es = h.getAcceptedEstimates();
241

    
242
    for (int i = 0; i < (int)es.size(); ++i) {
243
        Feature f;
244
        f.hasTimestamp = true;
245
        f.timestamp = es[i].time;
246
        f.values.push_back(es[i].freq);
247
        fs[0].push_back(f);
248
    }
249

    
250
    Feature nf;
251
    nf.hasTimestamp = true;
252
    nf.hasDuration = true;
253
    NoteHypothesis::Note n = h.getAveragedNote();
254
    nf.timestamp = n.time;
255
    nf.duration = n.duration;
256
    nf.values.push_back(n.freq);
257
    fs[1].push_back(nf);
258
}
259

    
260
double
261
CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
262
{
263
    double a0 = y[3] - y[2] - y[0] + y[1];
264
    double a1 = y[0] - y[1] - a0;
265
    double a2 = y[2] - y[0];
266
    double a3 = y[1];
267
    return
268
        a0 * x * x * x +
269
        a1 * x * x +
270
        a2 * x +
271
        a3;
272
}
273

    
274
double
275
CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
276
{
277
    if (maxbin < 2 || maxbin > m_bins - 3) {
278
        return maxbin;
279
    }
280

    
281
    double maxval = 0.0;
282
    double maxidx = maxbin;
283

    
284
    const int divisions = 10;
285
    double y[4];
286

    
287
    y[0] = in[maxbin-1];
288
    y[1] = in[maxbin];
289
    y[2] = in[maxbin+1];
290
    y[3] = in[maxbin+2];
291
    for (int i = 0; i < divisions; ++i) {
292
        double probe = double(i) / double(divisions);
293
        double value = cubicInterpolate(y, probe);
294
        if (value > maxval) {
295
            maxval = value; 
296
            maxidx = maxbin + probe;
297
        }
298
    }
299

    
300
    y[3] = y[2];
301
    y[2] = y[1];
302
    y[1] = y[0];
303
    y[0] = in[maxbin-2];
304
    for (int i = 0; i < divisions; ++i) {
305
        double probe = double(i) / double(divisions);
306
        double value = cubicInterpolate(y, probe);
307
        if (value > maxval) {
308
            maxval = value; 
309
            maxidx = maxbin - 1 + probe;
310
        }
311
    }
312

    
313
/*
314
    std::cerr << "centre = " << maxbin << ": ["
315
              << in[maxbin-2] << ","
316
              << in[maxbin-1] << ","
317
              << in[maxbin] << ","
318
              << in[maxbin+1] << ","
319
              << in[maxbin+2] << "] -> " << maxidx << std::endl;
320
*/
321

    
322
    return maxidx;
323
}
324

    
325
CepstralPitchTracker::FeatureSet
326
CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
327
{
328
    FeatureSet fs;
329

    
330
    int bs = m_blockSize;
331
    int hs = m_blockSize/2 + 1;
332

    
333
    double *rawcep = new double[bs];
334
    double *io = new double[bs];
335
    double *logmag = new double[bs];
336

    
337
    // The "inverse symmetric" method. Seems to be the most reliable
338
        
339
    double magmean = 0.0;
340

    
341
    for (int i = 0; i < hs; ++i) {
342

    
343
        double power =
344
            inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
345
            inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
346
        double mag = sqrt(power);
347

    
348
        magmean += mag;
349

    
350
        double lm = log(mag + 0.00000001);
351
        
352
        logmag[i] = lm;
353
        if (i > 0) logmag[bs - i] = lm;
354
    }
355

    
356
    magmean /= hs;
357
    double threshold = 0.1; // for magmean
358
    
359
    Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
360
    
361
    delete[] logmag;
362
    delete[] io;
363

    
364
    int n = m_bins;
365
    double *data = new double[n];
366
    MeanFilter(m_vflen).filterSubsequence(rawcep, data, m_blockSize, n, m_binFrom);
367
    delete[] rawcep;
368

    
369
    double maxval = 0.0;
370
    int maxbin = -1;
371

    
372
    for (int i = 0; i < n; ++i) {
373
        if (data[i] > maxval) {
374
            maxval = data[i];
375
            maxbin = i;
376
        }
377
    }
378

    
379
    if (maxbin < 0) {
380
        delete[] data;
381
        return fs;
382
    }
383

    
384
    double nextPeakVal = 0.0;
385
    for (int i = 1; i+1 < n; ++i) {
386
        if (data[i] > data[i-1] &&
387
            data[i] > data[i+1] &&
388
            i != maxbin &&
389
            data[i] > nextPeakVal) {
390
            nextPeakVal = data[i];
391
        }
392
    }
393

    
394
    double cimax = findInterpolatedPeak(data, maxbin);
395
    double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
396

    
397
    double confidence = 0.0;
398
    if (nextPeakVal != 0.0) {
399
        confidence = (maxval - nextPeakVal) * 10.0;
400
        if (magmean < threshold) confidence = 0.0;
401
//        std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
402
    }
403

    
404
    NoteHypothesis::Estimate e;
405
    e.freq = peakfreq;
406
    e.time = timestamp;
407
    e.confidence = confidence;
408

    
409
    if (!m_good.accept(e)) {
410

    
411
        int candidate = -1;
412
        bool accepted = false;
413

    
414
        for (int i = 0; i < (int)m_possible.size(); ++i) {
415
            if (m_possible[i].accept(e)) {
416
                if (m_possible[i].getState() == NoteHypothesis::Satisfied) {
417
                    accepted = true;
418
                    candidate = i;
419
                }
420
                break;
421
            }
422
        }
423

    
424
        if (!accepted) {
425
            NoteHypothesis h;
426
            h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
427
            m_possible.push_back(h);
428
        }
429

    
430
        if (m_good.getState() == NoteHypothesis::Expired) {
431
            addFeaturesFrom(m_good, fs);
432
        }
433
        
434
        if (m_good.getState() == NoteHypothesis::Expired ||
435
            m_good.getState() == NoteHypothesis::Rejected) {
436
            if (candidate >= 0) {
437
                m_good = m_possible[candidate];
438
            } else {
439
                m_good = NoteHypothesis();
440
            }
441
        }
442

    
443
        // reap rejected/expired hypotheses from possible list
444
        Hypotheses toReap = m_possible;
445
        m_possible.clear();
446
        for (int i = 0; i < (int)toReap.size(); ++i) {
447
            NoteHypothesis h = toReap[i];
448
            if (h.getState() != NoteHypothesis::Rejected && 
449
                h.getState() != NoteHypothesis::Expired) {
450
                m_possible.push_back(h);
451
            }
452
        }
453
    }  
454

    
455
    delete[] data;
456
    return fs;
457
}
458

    
459
CepstralPitchTracker::FeatureSet
460
CepstralPitchTracker::getRemainingFeatures()
461
{
462
    FeatureSet fs;
463
    if (m_good.getState() == NoteHypothesis::Satisfied) {
464
        addFeaturesFrom(m_good, fs);
465
    }
466
    return fs;
467
}