Mercurial > hg > vamp-simple-cepstrum
comparison CepstrumPitchTracker.cpp @ 8:10dfd77951bf track
Experimental tracker (on branch)
author | Chris Cannam |
---|---|
date | Mon, 25 Jun 2012 15:28:51 +0100 |
parents | |
children | 0510372cb340 |
comparison
equal
deleted
inserted
replaced
7:47355877a58d | 8:10dfd77951bf |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 /* | |
3 Permission is hereby granted, free of charge, to any person | |
4 obtaining a copy of this software and associated documentation | |
5 files (the "Software"), to deal in the Software without | |
6 restriction, including without limitation the rights to use, copy, | |
7 modify, merge, publish, distribute, sublicense, and/or sell copies | |
8 of the Software, and to permit persons to whom the Software is | |
9 furnished to do so, subject to the following conditions: | |
10 | |
11 The above copyright notice and this permission notice shall be | |
12 included in all copies or substantial portions of the Software. | |
13 | |
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR | |
18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF | |
19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
21 */ | |
22 | |
23 #include "CepstrumPitchTracker.h" | |
24 | |
25 #include <vector> | |
26 #include <algorithm> | |
27 | |
28 #include <cstdio> | |
29 #include <cmath> | |
30 #include <complex> | |
31 | |
32 using std::string; | |
33 | |
34 CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) : | |
35 Plugin(inputSampleRate), | |
36 m_channels(0), | |
37 m_stepSize(256), | |
38 m_blockSize(1024), | |
39 m_fmin(50), | |
40 m_fmax(1000), | |
41 m_histlen(3), | |
42 m_binFrom(0), | |
43 m_binTo(0), | |
44 m_bins(0), | |
45 m_history(0) | |
46 { | |
47 } | |
48 | |
49 CepstrumPitchTracker::~CepstrumPitchTracker() | |
50 { | |
51 if (m_history) { | |
52 for (int i = 0; i < m_histlen; ++i) { | |
53 delete[] m_history[i]; | |
54 } | |
55 delete[] m_history; | |
56 } | |
57 } | |
58 | |
59 string | |
60 CepstrumPitchTracker::getIdentifier() const | |
61 { | |
62 return "cepstrum-pitch"; | |
63 } | |
64 | |
65 string | |
66 CepstrumPitchTracker::getName() const | |
67 { | |
68 return "Cepstrum Pitch Tracker"; | |
69 } | |
70 | |
71 string | |
72 CepstrumPitchTracker::getDescription() const | |
73 { | |
74 return "Estimate f0 of monophonic material using a cepstrum method."; | |
75 } | |
76 | |
77 string | |
78 CepstrumPitchTracker::getMaker() const | |
79 { | |
80 return "Chris Cannam"; | |
81 } | |
82 | |
83 int | |
84 CepstrumPitchTracker::getPluginVersion() const | |
85 { | |
86 // Increment this each time you release a version that behaves | |
87 // differently from the previous one | |
88 return 1; | |
89 } | |
90 | |
91 string | |
92 CepstrumPitchTracker::getCopyright() const | |
93 { | |
94 return "Freely redistributable (BSD license)"; | |
95 } | |
96 | |
97 CepstrumPitchTracker::InputDomain | |
98 CepstrumPitchTracker::getInputDomain() const | |
99 { | |
100 return FrequencyDomain; | |
101 } | |
102 | |
103 size_t | |
104 CepstrumPitchTracker::getPreferredBlockSize() const | |
105 { | |
106 return 1024; | |
107 } | |
108 | |
109 size_t | |
110 CepstrumPitchTracker::getPreferredStepSize() const | |
111 { | |
112 return 256; | |
113 } | |
114 | |
115 size_t | |
116 CepstrumPitchTracker::getMinChannelCount() const | |
117 { | |
118 return 1; | |
119 } | |
120 | |
121 size_t | |
122 CepstrumPitchTracker::getMaxChannelCount() const | |
123 { | |
124 return 1; | |
125 } | |
126 | |
127 CepstrumPitchTracker::ParameterList | |
128 CepstrumPitchTracker::getParameterDescriptors() const | |
129 { | |
130 ParameterList list; | |
131 return list; | |
132 } | |
133 | |
134 float | |
135 CepstrumPitchTracker::getParameter(string identifier) const | |
136 { | |
137 return 0.f; | |
138 } | |
139 | |
140 void | |
141 CepstrumPitchTracker::setParameter(string identifier, float value) | |
142 { | |
143 } | |
144 | |
145 CepstrumPitchTracker::ProgramList | |
146 CepstrumPitchTracker::getPrograms() const | |
147 { | |
148 ProgramList list; | |
149 return list; | |
150 } | |
151 | |
152 string | |
153 CepstrumPitchTracker::getCurrentProgram() const | |
154 { | |
155 return ""; // no programs | |
156 } | |
157 | |
158 void | |
159 CepstrumPitchTracker::selectProgram(string name) | |
160 { | |
161 } | |
162 | |
163 CepstrumPitchTracker::OutputList | |
164 CepstrumPitchTracker::getOutputDescriptors() const | |
165 { | |
166 OutputList outputs; | |
167 | |
168 int n = 0; | |
169 | |
170 OutputDescriptor d; | |
171 | |
172 d.identifier = "f0"; | |
173 d.name = "Estimated f0"; | |
174 d.description = "Estimated fundamental frequency"; | |
175 d.unit = "Hz"; | |
176 d.hasFixedBinCount = true; | |
177 d.binCount = 1; | |
178 d.hasKnownExtents = true; | |
179 d.minValue = m_fmin; | |
180 d.maxValue = m_fmax; | |
181 d.isQuantized = false; | |
182 d.sampleType = OutputDescriptor::FixedSampleRate; | |
183 d.sampleRate = (m_inputSampleRate / m_stepSize); | |
184 d.hasDuration = false; | |
185 outputs.push_back(d); | |
186 | |
187 return outputs; | |
188 } | |
189 | |
190 bool | |
191 CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
192 { | |
193 if (channels < getMinChannelCount() || | |
194 channels > getMaxChannelCount()) return false; | |
195 | |
196 // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels | |
197 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize | |
198 // << std::endl; | |
199 | |
200 m_channels = channels; | |
201 m_stepSize = stepSize; | |
202 m_blockSize = blockSize; | |
203 | |
204 m_binFrom = int(m_inputSampleRate / m_fmax); | |
205 m_binTo = int(m_inputSampleRate / m_fmin); | |
206 | |
207 if (m_binTo >= (int)m_blockSize / 2) { | |
208 m_binTo = m_blockSize / 2 - 1; | |
209 } | |
210 | |
211 m_bins = (m_binTo - m_binFrom) + 1; | |
212 | |
213 m_history = new double *[m_histlen]; | |
214 for (int i = 0; i < m_histlen; ++i) { | |
215 m_history[i] = new double[m_bins]; | |
216 } | |
217 | |
218 reset(); | |
219 | |
220 return true; | |
221 } | |
222 | |
223 void | |
224 CepstrumPitchTracker::reset() | |
225 { | |
226 for (int i = 0; i < m_histlen; ++i) { | |
227 for (int j = 0; j < m_bins; ++j) { | |
228 m_history[i][j] = 0.0; | |
229 } | |
230 } | |
231 } | |
232 | |
233 void | |
234 CepstrumPitchTracker::filter(const double *cep, double *result) | |
235 { | |
236 int hix = m_histlen - 1; // current history index | |
237 | |
238 // roll back the history | |
239 if (m_histlen > 1) { | |
240 double *oldest = m_history[0]; | |
241 for (int i = 1; i < m_histlen; ++i) { | |
242 m_history[i-1] = m_history[i]; | |
243 } | |
244 // and stick this back in the newest spot, to recycle | |
245 m_history[hix] = oldest; | |
246 } | |
247 | |
248 for (int i = 0; i < m_bins; ++i) { | |
249 m_history[hix][i] = cep[i + m_binFrom]; | |
250 } | |
251 | |
252 for (int i = 0; i < m_bins; ++i) { | |
253 double mean = 0.0; | |
254 for (int j = 0; j < m_histlen; ++j) { | |
255 mean += m_history[j][i]; | |
256 } | |
257 mean /= m_histlen; | |
258 result[i] = mean; | |
259 } | |
260 } | |
261 | |
262 CepstrumPitchTracker::FeatureSet | |
263 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | |
264 { | |
265 FeatureSet fs; | |
266 | |
267 int bs = m_blockSize; | |
268 int hs = m_blockSize/2 + 1; | |
269 | |
270 double *rawcep = new double[bs]; | |
271 double *io = new double[bs]; | |
272 double *logmag = new double[bs]; | |
273 | |
274 // The "forward difference" method | |
275 | |
276 for (int i = 0; i < hs; ++i) { | |
277 | |
278 double power = | |
279 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + | |
280 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; | |
281 double mag = sqrt(power); | |
282 | |
283 double lm = log(mag + 0.00000001); | |
284 | |
285 logmag[bs/2 + i - 1] = lm; | |
286 if (i < hs-1) { | |
287 logmag[bs/2 - i - 1] = lm; | |
288 } | |
289 } | |
290 | |
291 fft(bs, false, logmag, 0, rawcep, io); | |
292 | |
293 for (int i = 0; i < hs; ++i) { | |
294 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]); | |
295 } | |
296 | |
297 delete[] logmag; | |
298 delete[] io; | |
299 | |
300 int n = m_bins; | |
301 double *data = new double[n]; | |
302 filter(rawcep, data); | |
303 delete[] rawcep; | |
304 | |
305 double maxval = 0.0; | |
306 int maxbin = 0; | |
307 double abstot = 0.0; | |
308 | |
309 for (int i = 0; i < n; ++i) { | |
310 if (data[i] > maxval) { | |
311 maxval = data[i]; | |
312 maxbin = i; | |
313 } | |
314 abstot += fabs(data[i]); | |
315 } | |
316 | |
317 double aroundPeak = 0.0; | |
318 double peakProportion = 0.0; | |
319 if (maxval > 0.0) { | |
320 aroundPeak += fabs(maxval); | |
321 int i = maxbin - 1; | |
322 while (i > 0 && data[i] <= data[i+1]) { | |
323 aroundPeak += fabs(data[i]); | |
324 --i; | |
325 } | |
326 i = maxbin + 1; | |
327 while (i < n && data[i] <= data[i-1]) { | |
328 aroundPeak += fabs(data[i]); | |
329 ++i; | |
330 } | |
331 } | |
332 peakProportion = aroundPeak / abstot; | |
333 | |
334 // std::cerr << "peakProportion = " << peakProportion << std::endl; | |
335 // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl; | |
336 | |
337 if (peakProportion >= 0.03) { | |
338 Feature f; | |
339 f.hasTimestamp = true; | |
340 f.timestamp = timestamp; | |
341 f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom)); | |
342 fs[0].push_back(f); | |
343 } | |
344 | |
345 delete[] data; | |
346 return fs; | |
347 } | |
348 | |
349 CepstrumPitchTracker::FeatureSet | |
350 CepstrumPitchTracker::getRemainingFeatures() | |
351 { | |
352 FeatureSet fs; | |
353 return fs; | |
354 } | |
355 | |
356 void | |
357 CepstrumPitchTracker::fft(unsigned int n, bool inverse, | |
358 double *ri, double *ii, double *ro, double *io) | |
359 { | |
360 if (!ri || !ro || !io) return; | |
361 | |
362 unsigned int bits; | |
363 unsigned int i, j, k, m; | |
364 unsigned int blockSize, blockEnd; | |
365 | |
366 double tr, ti; | |
367 | |
368 if (n < 2) return; | |
369 if (n & (n-1)) return; | |
370 | |
371 double angle = 2.0 * M_PI; | |
372 if (inverse) angle = -angle; | |
373 | |
374 for (i = 0; ; ++i) { | |
375 if (n & (1 << i)) { | |
376 bits = i; | |
377 break; | |
378 } | |
379 } | |
380 | |
381 static unsigned int tableSize = 0; | |
382 static int *table = 0; | |
383 | |
384 if (tableSize != n) { | |
385 | |
386 delete[] table; | |
387 | |
388 table = new int[n]; | |
389 | |
390 for (i = 0; i < n; ++i) { | |
391 | |
392 m = i; | |
393 | |
394 for (j = k = 0; j < bits; ++j) { | |
395 k = (k << 1) | (m & 1); | |
396 m >>= 1; | |
397 } | |
398 | |
399 table[i] = k; | |
400 } | |
401 | |
402 tableSize = n; | |
403 } | |
404 | |
405 if (ii) { | |
406 for (i = 0; i < n; ++i) { | |
407 ro[table[i]] = ri[i]; | |
408 io[table[i]] = ii[i]; | |
409 } | |
410 } else { | |
411 for (i = 0; i < n; ++i) { | |
412 ro[table[i]] = ri[i]; | |
413 io[table[i]] = 0.0; | |
414 } | |
415 } | |
416 | |
417 blockEnd = 1; | |
418 | |
419 for (blockSize = 2; blockSize <= n; blockSize <<= 1) { | |
420 | |
421 double delta = angle / (double)blockSize; | |
422 double sm2 = -sin(-2 * delta); | |
423 double sm1 = -sin(-delta); | |
424 double cm2 = cos(-2 * delta); | |
425 double cm1 = cos(-delta); | |
426 double w = 2 * cm1; | |
427 double ar[3], ai[3]; | |
428 | |
429 for (i = 0; i < n; i += blockSize) { | |
430 | |
431 ar[2] = cm2; | |
432 ar[1] = cm1; | |
433 | |
434 ai[2] = sm2; | |
435 ai[1] = sm1; | |
436 | |
437 for (j = i, m = 0; m < blockEnd; j++, m++) { | |
438 | |
439 ar[0] = w * ar[1] - ar[2]; | |
440 ar[2] = ar[1]; | |
441 ar[1] = ar[0]; | |
442 | |
443 ai[0] = w * ai[1] - ai[2]; | |
444 ai[2] = ai[1]; | |
445 ai[1] = ai[0]; | |
446 | |
447 k = j + blockEnd; | |
448 tr = ar[0] * ro[k] - ai[0] * io[k]; | |
449 ti = ar[0] * io[k] + ai[0] * ro[k]; | |
450 | |
451 ro[k] = ro[j] - tr; | |
452 io[k] = io[j] - ti; | |
453 | |
454 ro[j] += tr; | |
455 io[j] += ti; | |
456 } | |
457 } | |
458 | |
459 blockEnd = blockSize; | |
460 } | |
461 } | |
462 | |
463 |