Mercurial > hg > vamp-simple-cepstrum
comparison CepstrumPitchTracker.cpp @ 23:1ae8041ae31b
Merge from branch "track"
author | Chris Cannam |
---|---|
date | Thu, 05 Jul 2012 08:29:20 +0100 |
parents | a949c0278d7d |
children | a15d8c89a36e |
comparison
equal
deleted
inserted
replaced
7:47355877a58d | 23:1ae8041ae31b |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 /* | |
3 Permission is hereby granted, free of charge, to any person | |
4 obtaining a copy of this software and associated documentation | |
5 files (the "Software"), to deal in the Software without | |
6 restriction, including without limitation the rights to use, copy, | |
7 modify, merge, publish, distribute, sublicense, and/or sell copies | |
8 of the Software, and to permit persons to whom the Software is | |
9 furnished to do so, subject to the following conditions: | |
10 | |
11 The above copyright notice and this permission notice shall be | |
12 included in all copies or substantial portions of the Software. | |
13 | |
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR | |
18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF | |
19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
21 */ | |
22 | |
23 #include "CepstrumPitchTracker.h" | |
24 | |
25 #include <vector> | |
26 #include <algorithm> | |
27 | |
28 #include <cstdio> | |
29 #include <cmath> | |
30 #include <complex> | |
31 | |
32 using std::string; | |
33 using std::vector; | |
34 using Vamp::RealTime; | |
35 | |
36 CepstrumPitchTracker::Hypothesis::Hypothesis() | |
37 { | |
38 m_state = New; | |
39 m_age = 0; | |
40 } | |
41 | |
42 CepstrumPitchTracker::Hypothesis::~Hypothesis() | |
43 { | |
44 } | |
45 | |
46 bool | |
47 CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s) | |
48 { | |
49 if (m_pending.empty()) { | |
50 return true; | |
51 } | |
52 | |
53 // check we are within a relatively close tolerance of the last | |
54 // candidate | |
55 Estimate last = m_pending[m_pending.size()-1]; | |
56 double r = s.freq / last.freq; | |
57 int cents = lrint(1200.0 * (log(r) / log(2.0))); | |
58 if (cents < -60 || cents > 60) return false; | |
59 | |
60 // and within a slightly bigger tolerance of the current mean | |
61 double meanFreq = getMeanFrequency(); | |
62 r = s.freq / meanFreq; | |
63 cents = lrint(1200.0 * (log(r) / log(2.0))); | |
64 if (cents < -80 || cents > 80) return false; | |
65 | |
66 return true; | |
67 } | |
68 | |
69 bool | |
70 CepstrumPitchTracker::Hypothesis::isSatisfied() | |
71 { | |
72 if (m_pending.empty()) return false; | |
73 | |
74 double meanConfidence = 0.0; | |
75 for (int i = 0; i < m_pending.size(); ++i) { | |
76 meanConfidence += m_pending[i].confidence; | |
77 } | |
78 meanConfidence /= m_pending.size(); | |
79 | |
80 int lengthRequired = int(2.0 / meanConfidence + 0.5); | |
81 std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl; | |
82 | |
83 return (m_pending.size() > lengthRequired); | |
84 } | |
85 | |
86 void | |
87 CepstrumPitchTracker::Hypothesis::advanceTime() | |
88 { | |
89 ++m_age; | |
90 } | |
91 | |
92 bool | |
93 CepstrumPitchTracker::Hypothesis::test(Estimate s) | |
94 { | |
95 bool accept = false; | |
96 | |
97 switch (m_state) { | |
98 | |
99 case New: | |
100 m_state = Provisional; | |
101 accept = true; | |
102 break; | |
103 | |
104 case Provisional: | |
105 if (m_age > 3) { | |
106 m_state = Rejected; | |
107 } else if (isWithinTolerance(s)) { | |
108 accept = true; | |
109 } | |
110 break; | |
111 | |
112 case Satisfied: | |
113 if (m_age > 3) { | |
114 m_state = Expired; | |
115 } else if (isWithinTolerance(s)) { | |
116 accept = true; | |
117 } | |
118 break; | |
119 | |
120 case Rejected: | |
121 break; | |
122 | |
123 case Expired: | |
124 break; | |
125 } | |
126 | |
127 if (accept) { | |
128 m_pending.push_back(s); | |
129 m_age = 0; | |
130 if (m_state == Provisional && isSatisfied()) { | |
131 m_state = Satisfied; | |
132 } | |
133 } | |
134 | |
135 return accept && (m_state == Satisfied); | |
136 } | |
137 | |
138 CepstrumPitchTracker::Hypothesis::State | |
139 CepstrumPitchTracker::Hypothesis::getState() | |
140 { | |
141 return m_state; | |
142 } | |
143 | |
144 int | |
145 CepstrumPitchTracker::Hypothesis::getPendingLength() | |
146 { | |
147 return m_pending.size(); | |
148 } | |
149 | |
150 CepstrumPitchTracker::Hypothesis::Estimates | |
151 CepstrumPitchTracker::Hypothesis::getAcceptedEstimates() | |
152 { | |
153 if (m_state == Satisfied || m_state == Expired) { | |
154 return m_pending; | |
155 } else { | |
156 return Estimates(); | |
157 } | |
158 } | |
159 | |
160 double | |
161 CepstrumPitchTracker::Hypothesis::getMeanFrequency() | |
162 { | |
163 double acc = 0.0; | |
164 for (int i = 0; i < m_pending.size(); ++i) { | |
165 acc += m_pending[i].freq; | |
166 } | |
167 acc /= m_pending.size(); | |
168 return acc; | |
169 } | |
170 | |
171 CepstrumPitchTracker::Hypothesis::Note | |
172 CepstrumPitchTracker::Hypothesis::getAveragedNote() | |
173 { | |
174 Note n; | |
175 | |
176 if (!(m_state == Satisfied || m_state == Expired)) { | |
177 n.freq = 0.0; | |
178 n.time = RealTime::zeroTime; | |
179 n.duration = RealTime::zeroTime; | |
180 return n; | |
181 } | |
182 | |
183 n.time = m_pending.begin()->time; | |
184 | |
185 Estimates::iterator i = m_pending.end(); | |
186 --i; | |
187 n.duration = i->time - n.time; | |
188 | |
189 // just mean frequency for now, but this isn't at all right perceptually | |
190 n.freq = getMeanFrequency(); | |
191 | |
192 return n; | |
193 } | |
194 | |
195 void | |
196 CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs) | |
197 { | |
198 for (int i = 0; i < m_pending.size(); ++i) { | |
199 Feature f; | |
200 f.hasTimestamp = true; | |
201 f.timestamp = m_pending[i].time; | |
202 f.values.push_back(m_pending[i].freq); | |
203 fs[0].push_back(f); | |
204 } | |
205 | |
206 Feature nf; | |
207 nf.hasTimestamp = true; | |
208 nf.hasDuration = true; | |
209 Note n = getAveragedNote(); | |
210 nf.timestamp = n.time; | |
211 nf.duration = n.duration; | |
212 nf.values.push_back(n.freq); | |
213 fs[1].push_back(nf); | |
214 } | |
215 | |
216 CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) : | |
217 Plugin(inputSampleRate), | |
218 m_channels(0), | |
219 m_stepSize(256), | |
220 m_blockSize(1024), | |
221 m_fmin(50), | |
222 m_fmax(1000), | |
223 m_vflen(3), | |
224 m_binFrom(0), | |
225 m_binTo(0), | |
226 m_bins(0) | |
227 { | |
228 } | |
229 | |
230 CepstrumPitchTracker::~CepstrumPitchTracker() | |
231 { | |
232 } | |
233 | |
234 string | |
235 CepstrumPitchTracker::getIdentifier() const | |
236 { | |
237 return "cepstrum-pitch"; | |
238 } | |
239 | |
240 string | |
241 CepstrumPitchTracker::getName() const | |
242 { | |
243 return "Cepstrum Pitch Tracker"; | |
244 } | |
245 | |
246 string | |
247 CepstrumPitchTracker::getDescription() const | |
248 { | |
249 return "Estimate f0 of monophonic material using a cepstrum method."; | |
250 } | |
251 | |
252 string | |
253 CepstrumPitchTracker::getMaker() const | |
254 { | |
255 return "Chris Cannam"; | |
256 } | |
257 | |
258 int | |
259 CepstrumPitchTracker::getPluginVersion() const | |
260 { | |
261 // Increment this each time you release a version that behaves | |
262 // differently from the previous one | |
263 return 1; | |
264 } | |
265 | |
266 string | |
267 CepstrumPitchTracker::getCopyright() const | |
268 { | |
269 return "Freely redistributable (BSD license)"; | |
270 } | |
271 | |
272 CepstrumPitchTracker::InputDomain | |
273 CepstrumPitchTracker::getInputDomain() const | |
274 { | |
275 return FrequencyDomain; | |
276 } | |
277 | |
278 size_t | |
279 CepstrumPitchTracker::getPreferredBlockSize() const | |
280 { | |
281 return 1024; | |
282 } | |
283 | |
284 size_t | |
285 CepstrumPitchTracker::getPreferredStepSize() const | |
286 { | |
287 return 256; | |
288 } | |
289 | |
290 size_t | |
291 CepstrumPitchTracker::getMinChannelCount() const | |
292 { | |
293 return 1; | |
294 } | |
295 | |
296 size_t | |
297 CepstrumPitchTracker::getMaxChannelCount() const | |
298 { | |
299 return 1; | |
300 } | |
301 | |
302 CepstrumPitchTracker::ParameterList | |
303 CepstrumPitchTracker::getParameterDescriptors() const | |
304 { | |
305 ParameterList list; | |
306 return list; | |
307 } | |
308 | |
309 float | |
310 CepstrumPitchTracker::getParameter(string identifier) const | |
311 { | |
312 return 0.f; | |
313 } | |
314 | |
315 void | |
316 CepstrumPitchTracker::setParameter(string identifier, float value) | |
317 { | |
318 } | |
319 | |
320 CepstrumPitchTracker::ProgramList | |
321 CepstrumPitchTracker::getPrograms() const | |
322 { | |
323 ProgramList list; | |
324 return list; | |
325 } | |
326 | |
327 string | |
328 CepstrumPitchTracker::getCurrentProgram() const | |
329 { | |
330 return ""; // no programs | |
331 } | |
332 | |
333 void | |
334 CepstrumPitchTracker::selectProgram(string name) | |
335 { | |
336 } | |
337 | |
338 CepstrumPitchTracker::OutputList | |
339 CepstrumPitchTracker::getOutputDescriptors() const | |
340 { | |
341 OutputList outputs; | |
342 | |
343 int n = 0; | |
344 | |
345 OutputDescriptor d; | |
346 | |
347 d.identifier = "f0"; | |
348 d.name = "Estimated f0"; | |
349 d.description = "Estimated fundamental frequency"; | |
350 d.unit = "Hz"; | |
351 d.hasFixedBinCount = true; | |
352 d.binCount = 1; | |
353 d.hasKnownExtents = true; | |
354 d.minValue = m_fmin; | |
355 d.maxValue = m_fmax; | |
356 d.isQuantized = false; | |
357 d.sampleType = OutputDescriptor::FixedSampleRate; | |
358 d.sampleRate = (m_inputSampleRate / m_stepSize); | |
359 d.hasDuration = false; | |
360 outputs.push_back(d); | |
361 | |
362 d.identifier = "notes"; | |
363 d.name = "Notes"; | |
364 d.description = "Derived fixed-pitch note frequencies"; | |
365 d.unit = "Hz"; | |
366 d.hasFixedBinCount = true; | |
367 d.binCount = 1; | |
368 d.hasKnownExtents = true; | |
369 d.minValue = m_fmin; | |
370 d.maxValue = m_fmax; | |
371 d.isQuantized = false; | |
372 d.sampleType = OutputDescriptor::FixedSampleRate; | |
373 d.sampleRate = (m_inputSampleRate / m_stepSize); | |
374 d.hasDuration = true; | |
375 outputs.push_back(d); | |
376 | |
377 return outputs; | |
378 } | |
379 | |
380 bool | |
381 CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
382 { | |
383 if (channels < getMinChannelCount() || | |
384 channels > getMaxChannelCount()) return false; | |
385 | |
386 // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels | |
387 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize | |
388 // << std::endl; | |
389 | |
390 m_channels = channels; | |
391 m_stepSize = stepSize; | |
392 m_blockSize = blockSize; | |
393 | |
394 m_binFrom = int(m_inputSampleRate / m_fmax); | |
395 m_binTo = int(m_inputSampleRate / m_fmin); | |
396 | |
397 if (m_binTo >= (int)m_blockSize / 2) { | |
398 m_binTo = m_blockSize / 2 - 1; | |
399 } | |
400 | |
401 m_bins = (m_binTo - m_binFrom) + 1; | |
402 | |
403 reset(); | |
404 | |
405 return true; | |
406 } | |
407 | |
408 void | |
409 CepstrumPitchTracker::reset() | |
410 { | |
411 } | |
412 | |
413 void | |
414 CepstrumPitchTracker::filter(const double *cep, double *data) | |
415 { | |
416 for (int i = 0; i < m_bins; ++i) { | |
417 double v = 0; | |
418 int n = 0; | |
419 // average according to the vertical filter length | |
420 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { | |
421 int ix = i + m_binFrom + j; | |
422 if (ix >= 0 && ix < m_blockSize) { | |
423 v += cep[ix]; | |
424 ++n; | |
425 } | |
426 } | |
427 data[i] = v / n; | |
428 } | |
429 } | |
430 | |
431 CepstrumPitchTracker::FeatureSet | |
432 CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp) | |
433 { | |
434 FeatureSet fs; | |
435 | |
436 int bs = m_blockSize; | |
437 int hs = m_blockSize/2 + 1; | |
438 | |
439 double *rawcep = new double[bs]; | |
440 double *io = new double[bs]; | |
441 double *logmag = new double[bs]; | |
442 | |
443 // The "inverse symmetric" method. Seems to be the most reliable | |
444 | |
445 for (int i = 0; i < hs; ++i) { | |
446 | |
447 double power = | |
448 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + | |
449 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; | |
450 double mag = sqrt(power); | |
451 | |
452 double lm = log(mag + 0.00000001); | |
453 | |
454 logmag[i] = lm; | |
455 if (i > 0) logmag[bs - i] = lm; | |
456 } | |
457 | |
458 fft(bs, true, logmag, 0, rawcep, io); | |
459 | |
460 delete[] logmag; | |
461 delete[] io; | |
462 | |
463 int n = m_bins; | |
464 double *data = new double[n]; | |
465 filter(rawcep, data); | |
466 delete[] rawcep; | |
467 | |
468 double abstot = 0.0; | |
469 | |
470 for (int i = 0; i < n; ++i) { | |
471 abstot += fabs(data[i]); | |
472 } | |
473 | |
474 double maxval = 0.0; | |
475 int maxbin = -1; | |
476 | |
477 for (int i = 0; i < n; ++i) { | |
478 if (data[i] > maxval) { | |
479 maxval = data[i]; | |
480 maxbin = i; | |
481 } | |
482 } | |
483 | |
484 if (maxbin < 0) { | |
485 delete[] data; | |
486 return fs; | |
487 } | |
488 | |
489 double nextPeakVal = 0.0; | |
490 for (int i = 1; i+1 < n; ++i) { | |
491 if (data[i] > data[i-1] && | |
492 data[i] > data[i+1] && | |
493 i != maxbin && | |
494 data[i] > nextPeakVal) { | |
495 nextPeakVal = data[i]; | |
496 } | |
497 } | |
498 | |
499 double peakfreq = m_inputSampleRate / (maxbin + m_binFrom); | |
500 | |
501 double confidence = 0.0; | |
502 if (nextPeakVal != 0.0) { | |
503 confidence = ((maxval / nextPeakVal) - 1.0) / 4.0; | |
504 if (confidence > 1.0) confidence = 1.0; | |
505 } | |
506 | |
507 Hypothesis::Estimate e; | |
508 e.freq = peakfreq; | |
509 e.time = timestamp; | |
510 e.confidence = confidence; | |
511 | |
512 m_accepted.advanceTime(); | |
513 | |
514 for (int i = 0; i < m_possible.size(); ++i) { | |
515 m_possible[i].advanceTime(); | |
516 } | |
517 | |
518 if (!m_accepted.test(e)) { | |
519 | |
520 int candidate = -1; | |
521 bool accepted = false; | |
522 | |
523 for (int i = 0; i < m_possible.size(); ++i) { | |
524 if (m_possible[i].test(e)) { | |
525 accepted = true; | |
526 if (m_possible[i].getState() == Hypothesis::Satisfied) { | |
527 candidate = i; | |
528 } | |
529 break; | |
530 } | |
531 } | |
532 | |
533 if (!accepted) { | |
534 Hypothesis h; | |
535 h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this | |
536 m_possible.push_back(h); | |
537 } | |
538 | |
539 if (m_accepted.getState() == Hypothesis::Expired) { | |
540 m_accepted.addFeatures(fs); | |
541 } | |
542 | |
543 if (m_accepted.getState() == Hypothesis::Expired || | |
544 m_accepted.getState() == Hypothesis::Rejected) { | |
545 if (candidate >= 0) { | |
546 m_accepted = m_possible[candidate]; | |
547 } else { | |
548 m_accepted = Hypothesis(); | |
549 } | |
550 } | |
551 | |
552 // reap rejected/expired hypotheses from possible list | |
553 Hypotheses toReap = m_possible; | |
554 m_possible.clear(); | |
555 for (int i = 0; i < toReap.size(); ++i) { | |
556 Hypothesis h = toReap[i]; | |
557 if (h.getState() != Hypothesis::Rejected && | |
558 h.getState() != Hypothesis::Expired) { | |
559 m_possible.push_back(h); | |
560 } | |
561 } | |
562 } | |
563 | |
564 std::cerr << "accepted length = " << m_accepted.getPendingLength() | |
565 << ", state = " << m_accepted.getState() | |
566 << ", hypothesis count = " << m_possible.size() << std::endl; | |
567 | |
568 delete[] data; | |
569 return fs; | |
570 } | |
571 | |
572 CepstrumPitchTracker::FeatureSet | |
573 CepstrumPitchTracker::getRemainingFeatures() | |
574 { | |
575 FeatureSet fs; | |
576 if (m_accepted.getState() == Hypothesis::Satisfied) { | |
577 m_accepted.addFeatures(fs); | |
578 } | |
579 return fs; | |
580 } | |
581 | |
582 void | |
583 CepstrumPitchTracker::fft(unsigned int n, bool inverse, | |
584 double *ri, double *ii, double *ro, double *io) | |
585 { | |
586 if (!ri || !ro || !io) return; | |
587 | |
588 unsigned int bits; | |
589 unsigned int i, j, k, m; | |
590 unsigned int blockSize, blockEnd; | |
591 | |
592 double tr, ti; | |
593 | |
594 if (n < 2) return; | |
595 if (n & (n-1)) return; | |
596 | |
597 double angle = 2.0 * M_PI; | |
598 if (inverse) angle = -angle; | |
599 | |
600 for (i = 0; ; ++i) { | |
601 if (n & (1 << i)) { | |
602 bits = i; | |
603 break; | |
604 } | |
605 } | |
606 | |
607 static unsigned int tableSize = 0; | |
608 static int *table = 0; | |
609 | |
610 if (tableSize != n) { | |
611 | |
612 delete[] table; | |
613 | |
614 table = new int[n]; | |
615 | |
616 for (i = 0; i < n; ++i) { | |
617 | |
618 m = i; | |
619 | |
620 for (j = k = 0; j < bits; ++j) { | |
621 k = (k << 1) | (m & 1); | |
622 m >>= 1; | |
623 } | |
624 | |
625 table[i] = k; | |
626 } | |
627 | |
628 tableSize = n; | |
629 } | |
630 | |
631 if (ii) { | |
632 for (i = 0; i < n; ++i) { | |
633 ro[table[i]] = ri[i]; | |
634 io[table[i]] = ii[i]; | |
635 } | |
636 } else { | |
637 for (i = 0; i < n; ++i) { | |
638 ro[table[i]] = ri[i]; | |
639 io[table[i]] = 0.0; | |
640 } | |
641 } | |
642 | |
643 blockEnd = 1; | |
644 | |
645 for (blockSize = 2; blockSize <= n; blockSize <<= 1) { | |
646 | |
647 double delta = angle / (double)blockSize; | |
648 double sm2 = -sin(-2 * delta); | |
649 double sm1 = -sin(-delta); | |
650 double cm2 = cos(-2 * delta); | |
651 double cm1 = cos(-delta); | |
652 double w = 2 * cm1; | |
653 double ar[3], ai[3]; | |
654 | |
655 for (i = 0; i < n; i += blockSize) { | |
656 | |
657 ar[2] = cm2; | |
658 ar[1] = cm1; | |
659 | |
660 ai[2] = sm2; | |
661 ai[1] = sm1; | |
662 | |
663 for (j = i, m = 0; m < blockEnd; j++, m++) { | |
664 | |
665 ar[0] = w * ar[1] - ar[2]; | |
666 ar[2] = ar[1]; | |
667 ar[1] = ar[0]; | |
668 | |
669 ai[0] = w * ai[1] - ai[2]; | |
670 ai[2] = ai[1]; | |
671 ai[1] = ai[0]; | |
672 | |
673 k = j + blockEnd; | |
674 tr = ar[0] * ro[k] - ai[0] * io[k]; | |
675 ti = ar[0] * io[k] + ai[0] * ro[k]; | |
676 | |
677 ro[k] = ro[j] - tr; | |
678 io[k] = io[j] - ti; | |
679 | |
680 ro[j] += tr; | |
681 io[j] += ti; | |
682 } | |
683 } | |
684 | |
685 blockEnd = blockSize; | |
686 } | |
687 } | |
688 | |
689 |