comparison CepstrumPitchTracker.cpp @ 8:10dfd77951bf track

Experimental tracker (on branch)
author Chris Cannam
date Mon, 25 Jun 2012 15:28:51 +0100
parents
children 0510372cb340
comparison
equal deleted inserted replaced
7:47355877a58d 8:10dfd77951bf
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 /*
3 Permission is hereby granted, free of charge, to any person
4 obtaining a copy of this software and associated documentation
5 files (the "Software"), to deal in the Software without
6 restriction, including without limitation the rights to use, copy,
7 modify, merge, publish, distribute, sublicense, and/or sell copies
8 of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "CepstrumPitchTracker.h"
24
25 #include <vector>
26 #include <algorithm>
27
28 #include <cstdio>
29 #include <cmath>
30 #include <complex>
31
32 using std::string;
33
34 CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
35 Plugin(inputSampleRate),
36 m_channels(0),
37 m_stepSize(256),
38 m_blockSize(1024),
39 m_fmin(50),
40 m_fmax(1000),
41 m_histlen(3),
42 m_binFrom(0),
43 m_binTo(0),
44 m_bins(0),
45 m_history(0)
46 {
47 }
48
49 CepstrumPitchTracker::~CepstrumPitchTracker()
50 {
51 if (m_history) {
52 for (int i = 0; i < m_histlen; ++i) {
53 delete[] m_history[i];
54 }
55 delete[] m_history;
56 }
57 }
58
59 string
60 CepstrumPitchTracker::getIdentifier() const
61 {
62 return "cepstrum-pitch";
63 }
64
65 string
66 CepstrumPitchTracker::getName() const
67 {
68 return "Cepstrum Pitch Tracker";
69 }
70
71 string
72 CepstrumPitchTracker::getDescription() const
73 {
74 return "Estimate f0 of monophonic material using a cepstrum method.";
75 }
76
77 string
78 CepstrumPitchTracker::getMaker() const
79 {
80 return "Chris Cannam";
81 }
82
83 int
84 CepstrumPitchTracker::getPluginVersion() const
85 {
86 // Increment this each time you release a version that behaves
87 // differently from the previous one
88 return 1;
89 }
90
91 string
92 CepstrumPitchTracker::getCopyright() const
93 {
94 return "Freely redistributable (BSD license)";
95 }
96
97 CepstrumPitchTracker::InputDomain
98 CepstrumPitchTracker::getInputDomain() const
99 {
100 return FrequencyDomain;
101 }
102
103 size_t
104 CepstrumPitchTracker::getPreferredBlockSize() const
105 {
106 return 1024;
107 }
108
109 size_t
110 CepstrumPitchTracker::getPreferredStepSize() const
111 {
112 return 256;
113 }
114
115 size_t
116 CepstrumPitchTracker::getMinChannelCount() const
117 {
118 return 1;
119 }
120
121 size_t
122 CepstrumPitchTracker::getMaxChannelCount() const
123 {
124 return 1;
125 }
126
127 CepstrumPitchTracker::ParameterList
128 CepstrumPitchTracker::getParameterDescriptors() const
129 {
130 ParameterList list;
131 return list;
132 }
133
134 float
135 CepstrumPitchTracker::getParameter(string identifier) const
136 {
137 return 0.f;
138 }
139
140 void
141 CepstrumPitchTracker::setParameter(string identifier, float value)
142 {
143 }
144
145 CepstrumPitchTracker::ProgramList
146 CepstrumPitchTracker::getPrograms() const
147 {
148 ProgramList list;
149 return list;
150 }
151
152 string
153 CepstrumPitchTracker::getCurrentProgram() const
154 {
155 return ""; // no programs
156 }
157
158 void
159 CepstrumPitchTracker::selectProgram(string name)
160 {
161 }
162
163 CepstrumPitchTracker::OutputList
164 CepstrumPitchTracker::getOutputDescriptors() const
165 {
166 OutputList outputs;
167
168 int n = 0;
169
170 OutputDescriptor d;
171
172 d.identifier = "f0";
173 d.name = "Estimated f0";
174 d.description = "Estimated fundamental frequency";
175 d.unit = "Hz";
176 d.hasFixedBinCount = true;
177 d.binCount = 1;
178 d.hasKnownExtents = true;
179 d.minValue = m_fmin;
180 d.maxValue = m_fmax;
181 d.isQuantized = false;
182 d.sampleType = OutputDescriptor::FixedSampleRate;
183 d.sampleRate = (m_inputSampleRate / m_stepSize);
184 d.hasDuration = false;
185 outputs.push_back(d);
186
187 return outputs;
188 }
189
190 bool
191 CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
192 {
193 if (channels < getMinChannelCount() ||
194 channels > getMaxChannelCount()) return false;
195
196 // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
197 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
198 // << std::endl;
199
200 m_channels = channels;
201 m_stepSize = stepSize;
202 m_blockSize = blockSize;
203
204 m_binFrom = int(m_inputSampleRate / m_fmax);
205 m_binTo = int(m_inputSampleRate / m_fmin);
206
207 if (m_binTo >= (int)m_blockSize / 2) {
208 m_binTo = m_blockSize / 2 - 1;
209 }
210
211 m_bins = (m_binTo - m_binFrom) + 1;
212
213 m_history = new double *[m_histlen];
214 for (int i = 0; i < m_histlen; ++i) {
215 m_history[i] = new double[m_bins];
216 }
217
218 reset();
219
220 return true;
221 }
222
223 void
224 CepstrumPitchTracker::reset()
225 {
226 for (int i = 0; i < m_histlen; ++i) {
227 for (int j = 0; j < m_bins; ++j) {
228 m_history[i][j] = 0.0;
229 }
230 }
231 }
232
233 void
234 CepstrumPitchTracker::filter(const double *cep, double *result)
235 {
236 int hix = m_histlen - 1; // current history index
237
238 // roll back the history
239 if (m_histlen > 1) {
240 double *oldest = m_history[0];
241 for (int i = 1; i < m_histlen; ++i) {
242 m_history[i-1] = m_history[i];
243 }
244 // and stick this back in the newest spot, to recycle
245 m_history[hix] = oldest;
246 }
247
248 for (int i = 0; i < m_bins; ++i) {
249 m_history[hix][i] = cep[i + m_binFrom];
250 }
251
252 for (int i = 0; i < m_bins; ++i) {
253 double mean = 0.0;
254 for (int j = 0; j < m_histlen; ++j) {
255 mean += m_history[j][i];
256 }
257 mean /= m_histlen;
258 result[i] = mean;
259 }
260 }
261
262 CepstrumPitchTracker::FeatureSet
263 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
264 {
265 FeatureSet fs;
266
267 int bs = m_blockSize;
268 int hs = m_blockSize/2 + 1;
269
270 double *rawcep = new double[bs];
271 double *io = new double[bs];
272 double *logmag = new double[bs];
273
274 // The "forward difference" method
275
276 for (int i = 0; i < hs; ++i) {
277
278 double power =
279 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
280 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
281 double mag = sqrt(power);
282
283 double lm = log(mag + 0.00000001);
284
285 logmag[bs/2 + i - 1] = lm;
286 if (i < hs-1) {
287 logmag[bs/2 - i - 1] = lm;
288 }
289 }
290
291 fft(bs, false, logmag, 0, rawcep, io);
292
293 for (int i = 0; i < hs; ++i) {
294 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]);
295 }
296
297 delete[] logmag;
298 delete[] io;
299
300 int n = m_bins;
301 double *data = new double[n];
302 filter(rawcep, data);
303 delete[] rawcep;
304
305 double maxval = 0.0;
306 int maxbin = 0;
307 double abstot = 0.0;
308
309 for (int i = 0; i < n; ++i) {
310 if (data[i] > maxval) {
311 maxval = data[i];
312 maxbin = i;
313 }
314 abstot += fabs(data[i]);
315 }
316
317 double aroundPeak = 0.0;
318 double peakProportion = 0.0;
319 if (maxval > 0.0) {
320 aroundPeak += fabs(maxval);
321 int i = maxbin - 1;
322 while (i > 0 && data[i] <= data[i+1]) {
323 aroundPeak += fabs(data[i]);
324 --i;
325 }
326 i = maxbin + 1;
327 while (i < n && data[i] <= data[i-1]) {
328 aroundPeak += fabs(data[i]);
329 ++i;
330 }
331 }
332 peakProportion = aroundPeak / abstot;
333
334 // std::cerr << "peakProportion = " << peakProportion << std::endl;
335 // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
336
337 if (peakProportion >= 0.03) {
338 Feature f;
339 f.hasTimestamp = true;
340 f.timestamp = timestamp;
341 f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
342 fs[0].push_back(f);
343 }
344
345 delete[] data;
346 return fs;
347 }
348
349 CepstrumPitchTracker::FeatureSet
350 CepstrumPitchTracker::getRemainingFeatures()
351 {
352 FeatureSet fs;
353 return fs;
354 }
355
356 void
357 CepstrumPitchTracker::fft(unsigned int n, bool inverse,
358 double *ri, double *ii, double *ro, double *io)
359 {
360 if (!ri || !ro || !io) return;
361
362 unsigned int bits;
363 unsigned int i, j, k, m;
364 unsigned int blockSize, blockEnd;
365
366 double tr, ti;
367
368 if (n < 2) return;
369 if (n & (n-1)) return;
370
371 double angle = 2.0 * M_PI;
372 if (inverse) angle = -angle;
373
374 for (i = 0; ; ++i) {
375 if (n & (1 << i)) {
376 bits = i;
377 break;
378 }
379 }
380
381 static unsigned int tableSize = 0;
382 static int *table = 0;
383
384 if (tableSize != n) {
385
386 delete[] table;
387
388 table = new int[n];
389
390 for (i = 0; i < n; ++i) {
391
392 m = i;
393
394 for (j = k = 0; j < bits; ++j) {
395 k = (k << 1) | (m & 1);
396 m >>= 1;
397 }
398
399 table[i] = k;
400 }
401
402 tableSize = n;
403 }
404
405 if (ii) {
406 for (i = 0; i < n; ++i) {
407 ro[table[i]] = ri[i];
408 io[table[i]] = ii[i];
409 }
410 } else {
411 for (i = 0; i < n; ++i) {
412 ro[table[i]] = ri[i];
413 io[table[i]] = 0.0;
414 }
415 }
416
417 blockEnd = 1;
418
419 for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
420
421 double delta = angle / (double)blockSize;
422 double sm2 = -sin(-2 * delta);
423 double sm1 = -sin(-delta);
424 double cm2 = cos(-2 * delta);
425 double cm1 = cos(-delta);
426 double w = 2 * cm1;
427 double ar[3], ai[3];
428
429 for (i = 0; i < n; i += blockSize) {
430
431 ar[2] = cm2;
432 ar[1] = cm1;
433
434 ai[2] = sm2;
435 ai[1] = sm1;
436
437 for (j = i, m = 0; m < blockEnd; j++, m++) {
438
439 ar[0] = w * ar[1] - ar[2];
440 ar[2] = ar[1];
441 ar[1] = ar[0];
442
443 ai[0] = w * ai[1] - ai[2];
444 ai[2] = ai[1];
445 ai[1] = ai[0];
446
447 k = j + blockEnd;
448 tr = ar[0] * ro[k] - ai[0] * io[k];
449 ti = ar[0] * io[k] + ai[0] * ro[k];
450
451 ro[k] = ro[j] - tr;
452 io[k] = io[j] - ti;
453
454 ro[j] += tr;
455 io[j] += ti;
456 }
457 }
458
459 blockEnd = blockSize;
460 }
461 }
462
463