comparison CepstrumPitchTracker.cpp @ 23:1ae8041ae31b

Merge from branch "track"
author Chris Cannam
date Thu, 05 Jul 2012 08:29:20 +0100
parents a949c0278d7d
children a15d8c89a36e
comparison
equal deleted inserted replaced
7:47355877a58d 23:1ae8041ae31b
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 /*
3 Permission is hereby granted, free of charge, to any person
4 obtaining a copy of this software and associated documentation
5 files (the "Software"), to deal in the Software without
6 restriction, including without limitation the rights to use, copy,
7 modify, merge, publish, distribute, sublicense, and/or sell copies
8 of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "CepstrumPitchTracker.h"
24
25 #include <vector>
26 #include <algorithm>
27
28 #include <cstdio>
29 #include <cmath>
30 #include <complex>
31
32 using std::string;
33 using std::vector;
34 using Vamp::RealTime;
35
36 CepstrumPitchTracker::Hypothesis::Hypothesis()
37 {
38 m_state = New;
39 m_age = 0;
40 }
41
42 CepstrumPitchTracker::Hypothesis::~Hypothesis()
43 {
44 }
45
46 bool
47 CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s)
48 {
49 if (m_pending.empty()) {
50 return true;
51 }
52
53 // check we are within a relatively close tolerance of the last
54 // candidate
55 Estimate last = m_pending[m_pending.size()-1];
56 double r = s.freq / last.freq;
57 int cents = lrint(1200.0 * (log(r) / log(2.0)));
58 if (cents < -60 || cents > 60) return false;
59
60 // and within a slightly bigger tolerance of the current mean
61 double meanFreq = getMeanFrequency();
62 r = s.freq / meanFreq;
63 cents = lrint(1200.0 * (log(r) / log(2.0)));
64 if (cents < -80 || cents > 80) return false;
65
66 return true;
67 }
68
69 bool
70 CepstrumPitchTracker::Hypothesis::isSatisfied()
71 {
72 if (m_pending.empty()) return false;
73
74 double meanConfidence = 0.0;
75 for (int i = 0; i < m_pending.size(); ++i) {
76 meanConfidence += m_pending[i].confidence;
77 }
78 meanConfidence /= m_pending.size();
79
80 int lengthRequired = int(2.0 / meanConfidence + 0.5);
81 std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
82
83 return (m_pending.size() > lengthRequired);
84 }
85
86 void
87 CepstrumPitchTracker::Hypothesis::advanceTime()
88 {
89 ++m_age;
90 }
91
92 bool
93 CepstrumPitchTracker::Hypothesis::test(Estimate s)
94 {
95 bool accept = false;
96
97 switch (m_state) {
98
99 case New:
100 m_state = Provisional;
101 accept = true;
102 break;
103
104 case Provisional:
105 if (m_age > 3) {
106 m_state = Rejected;
107 } else if (isWithinTolerance(s)) {
108 accept = true;
109 }
110 break;
111
112 case Satisfied:
113 if (m_age > 3) {
114 m_state = Expired;
115 } else if (isWithinTolerance(s)) {
116 accept = true;
117 }
118 break;
119
120 case Rejected:
121 break;
122
123 case Expired:
124 break;
125 }
126
127 if (accept) {
128 m_pending.push_back(s);
129 m_age = 0;
130 if (m_state == Provisional && isSatisfied()) {
131 m_state = Satisfied;
132 }
133 }
134
135 return accept && (m_state == Satisfied);
136 }
137
138 CepstrumPitchTracker::Hypothesis::State
139 CepstrumPitchTracker::Hypothesis::getState()
140 {
141 return m_state;
142 }
143
144 int
145 CepstrumPitchTracker::Hypothesis::getPendingLength()
146 {
147 return m_pending.size();
148 }
149
150 CepstrumPitchTracker::Hypothesis::Estimates
151 CepstrumPitchTracker::Hypothesis::getAcceptedEstimates()
152 {
153 if (m_state == Satisfied || m_state == Expired) {
154 return m_pending;
155 } else {
156 return Estimates();
157 }
158 }
159
160 double
161 CepstrumPitchTracker::Hypothesis::getMeanFrequency()
162 {
163 double acc = 0.0;
164 for (int i = 0; i < m_pending.size(); ++i) {
165 acc += m_pending[i].freq;
166 }
167 acc /= m_pending.size();
168 return acc;
169 }
170
171 CepstrumPitchTracker::Hypothesis::Note
172 CepstrumPitchTracker::Hypothesis::getAveragedNote()
173 {
174 Note n;
175
176 if (!(m_state == Satisfied || m_state == Expired)) {
177 n.freq = 0.0;
178 n.time = RealTime::zeroTime;
179 n.duration = RealTime::zeroTime;
180 return n;
181 }
182
183 n.time = m_pending.begin()->time;
184
185 Estimates::iterator i = m_pending.end();
186 --i;
187 n.duration = i->time - n.time;
188
189 // just mean frequency for now, but this isn't at all right perceptually
190 n.freq = getMeanFrequency();
191
192 return n;
193 }
194
195 void
196 CepstrumPitchTracker::Hypothesis::addFeatures(FeatureSet &fs)
197 {
198 for (int i = 0; i < m_pending.size(); ++i) {
199 Feature f;
200 f.hasTimestamp = true;
201 f.timestamp = m_pending[i].time;
202 f.values.push_back(m_pending[i].freq);
203 fs[0].push_back(f);
204 }
205
206 Feature nf;
207 nf.hasTimestamp = true;
208 nf.hasDuration = true;
209 Note n = getAveragedNote();
210 nf.timestamp = n.time;
211 nf.duration = n.duration;
212 nf.values.push_back(n.freq);
213 fs[1].push_back(nf);
214 }
215
216 CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
217 Plugin(inputSampleRate),
218 m_channels(0),
219 m_stepSize(256),
220 m_blockSize(1024),
221 m_fmin(50),
222 m_fmax(1000),
223 m_vflen(3),
224 m_binFrom(0),
225 m_binTo(0),
226 m_bins(0)
227 {
228 }
229
230 CepstrumPitchTracker::~CepstrumPitchTracker()
231 {
232 }
233
234 string
235 CepstrumPitchTracker::getIdentifier() const
236 {
237 return "cepstrum-pitch";
238 }
239
240 string
241 CepstrumPitchTracker::getName() const
242 {
243 return "Cepstrum Pitch Tracker";
244 }
245
246 string
247 CepstrumPitchTracker::getDescription() const
248 {
249 return "Estimate f0 of monophonic material using a cepstrum method.";
250 }
251
252 string
253 CepstrumPitchTracker::getMaker() const
254 {
255 return "Chris Cannam";
256 }
257
258 int
259 CepstrumPitchTracker::getPluginVersion() const
260 {
261 // Increment this each time you release a version that behaves
262 // differently from the previous one
263 return 1;
264 }
265
266 string
267 CepstrumPitchTracker::getCopyright() const
268 {
269 return "Freely redistributable (BSD license)";
270 }
271
272 CepstrumPitchTracker::InputDomain
273 CepstrumPitchTracker::getInputDomain() const
274 {
275 return FrequencyDomain;
276 }
277
278 size_t
279 CepstrumPitchTracker::getPreferredBlockSize() const
280 {
281 return 1024;
282 }
283
284 size_t
285 CepstrumPitchTracker::getPreferredStepSize() const
286 {
287 return 256;
288 }
289
290 size_t
291 CepstrumPitchTracker::getMinChannelCount() const
292 {
293 return 1;
294 }
295
296 size_t
297 CepstrumPitchTracker::getMaxChannelCount() const
298 {
299 return 1;
300 }
301
302 CepstrumPitchTracker::ParameterList
303 CepstrumPitchTracker::getParameterDescriptors() const
304 {
305 ParameterList list;
306 return list;
307 }
308
309 float
310 CepstrumPitchTracker::getParameter(string identifier) const
311 {
312 return 0.f;
313 }
314
315 void
316 CepstrumPitchTracker::setParameter(string identifier, float value)
317 {
318 }
319
320 CepstrumPitchTracker::ProgramList
321 CepstrumPitchTracker::getPrograms() const
322 {
323 ProgramList list;
324 return list;
325 }
326
327 string
328 CepstrumPitchTracker::getCurrentProgram() const
329 {
330 return ""; // no programs
331 }
332
333 void
334 CepstrumPitchTracker::selectProgram(string name)
335 {
336 }
337
338 CepstrumPitchTracker::OutputList
339 CepstrumPitchTracker::getOutputDescriptors() const
340 {
341 OutputList outputs;
342
343 int n = 0;
344
345 OutputDescriptor d;
346
347 d.identifier = "f0";
348 d.name = "Estimated f0";
349 d.description = "Estimated fundamental frequency";
350 d.unit = "Hz";
351 d.hasFixedBinCount = true;
352 d.binCount = 1;
353 d.hasKnownExtents = true;
354 d.minValue = m_fmin;
355 d.maxValue = m_fmax;
356 d.isQuantized = false;
357 d.sampleType = OutputDescriptor::FixedSampleRate;
358 d.sampleRate = (m_inputSampleRate / m_stepSize);
359 d.hasDuration = false;
360 outputs.push_back(d);
361
362 d.identifier = "notes";
363 d.name = "Notes";
364 d.description = "Derived fixed-pitch note frequencies";
365 d.unit = "Hz";
366 d.hasFixedBinCount = true;
367 d.binCount = 1;
368 d.hasKnownExtents = true;
369 d.minValue = m_fmin;
370 d.maxValue = m_fmax;
371 d.isQuantized = false;
372 d.sampleType = OutputDescriptor::FixedSampleRate;
373 d.sampleRate = (m_inputSampleRate / m_stepSize);
374 d.hasDuration = true;
375 outputs.push_back(d);
376
377 return outputs;
378 }
379
380 bool
381 CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
382 {
383 if (channels < getMinChannelCount() ||
384 channels > getMaxChannelCount()) return false;
385
386 // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
387 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
388 // << std::endl;
389
390 m_channels = channels;
391 m_stepSize = stepSize;
392 m_blockSize = blockSize;
393
394 m_binFrom = int(m_inputSampleRate / m_fmax);
395 m_binTo = int(m_inputSampleRate / m_fmin);
396
397 if (m_binTo >= (int)m_blockSize / 2) {
398 m_binTo = m_blockSize / 2 - 1;
399 }
400
401 m_bins = (m_binTo - m_binFrom) + 1;
402
403 reset();
404
405 return true;
406 }
407
408 void
409 CepstrumPitchTracker::reset()
410 {
411 }
412
413 void
414 CepstrumPitchTracker::filter(const double *cep, double *data)
415 {
416 for (int i = 0; i < m_bins; ++i) {
417 double v = 0;
418 int n = 0;
419 // average according to the vertical filter length
420 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
421 int ix = i + m_binFrom + j;
422 if (ix >= 0 && ix < m_blockSize) {
423 v += cep[ix];
424 ++n;
425 }
426 }
427 data[i] = v / n;
428 }
429 }
430
431 CepstrumPitchTracker::FeatureSet
432 CepstrumPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
433 {
434 FeatureSet fs;
435
436 int bs = m_blockSize;
437 int hs = m_blockSize/2 + 1;
438
439 double *rawcep = new double[bs];
440 double *io = new double[bs];
441 double *logmag = new double[bs];
442
443 // The "inverse symmetric" method. Seems to be the most reliable
444
445 for (int i = 0; i < hs; ++i) {
446
447 double power =
448 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
449 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
450 double mag = sqrt(power);
451
452 double lm = log(mag + 0.00000001);
453
454 logmag[i] = lm;
455 if (i > 0) logmag[bs - i] = lm;
456 }
457
458 fft(bs, true, logmag, 0, rawcep, io);
459
460 delete[] logmag;
461 delete[] io;
462
463 int n = m_bins;
464 double *data = new double[n];
465 filter(rawcep, data);
466 delete[] rawcep;
467
468 double abstot = 0.0;
469
470 for (int i = 0; i < n; ++i) {
471 abstot += fabs(data[i]);
472 }
473
474 double maxval = 0.0;
475 int maxbin = -1;
476
477 for (int i = 0; i < n; ++i) {
478 if (data[i] > maxval) {
479 maxval = data[i];
480 maxbin = i;
481 }
482 }
483
484 if (maxbin < 0) {
485 delete[] data;
486 return fs;
487 }
488
489 double nextPeakVal = 0.0;
490 for (int i = 1; i+1 < n; ++i) {
491 if (data[i] > data[i-1] &&
492 data[i] > data[i+1] &&
493 i != maxbin &&
494 data[i] > nextPeakVal) {
495 nextPeakVal = data[i];
496 }
497 }
498
499 double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
500
501 double confidence = 0.0;
502 if (nextPeakVal != 0.0) {
503 confidence = ((maxval / nextPeakVal) - 1.0) / 4.0;
504 if (confidence > 1.0) confidence = 1.0;
505 }
506
507 Hypothesis::Estimate e;
508 e.freq = peakfreq;
509 e.time = timestamp;
510 e.confidence = confidence;
511
512 m_accepted.advanceTime();
513
514 for (int i = 0; i < m_possible.size(); ++i) {
515 m_possible[i].advanceTime();
516 }
517
518 if (!m_accepted.test(e)) {
519
520 int candidate = -1;
521 bool accepted = false;
522
523 for (int i = 0; i < m_possible.size(); ++i) {
524 if (m_possible[i].test(e)) {
525 accepted = true;
526 if (m_possible[i].getState() == Hypothesis::Satisfied) {
527 candidate = i;
528 }
529 break;
530 }
531 }
532
533 if (!accepted) {
534 Hypothesis h;
535 h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
536 m_possible.push_back(h);
537 }
538
539 if (m_accepted.getState() == Hypothesis::Expired) {
540 m_accepted.addFeatures(fs);
541 }
542
543 if (m_accepted.getState() == Hypothesis::Expired ||
544 m_accepted.getState() == Hypothesis::Rejected) {
545 if (candidate >= 0) {
546 m_accepted = m_possible[candidate];
547 } else {
548 m_accepted = Hypothesis();
549 }
550 }
551
552 // reap rejected/expired hypotheses from possible list
553 Hypotheses toReap = m_possible;
554 m_possible.clear();
555 for (int i = 0; i < toReap.size(); ++i) {
556 Hypothesis h = toReap[i];
557 if (h.getState() != Hypothesis::Rejected &&
558 h.getState() != Hypothesis::Expired) {
559 m_possible.push_back(h);
560 }
561 }
562 }
563
564 std::cerr << "accepted length = " << m_accepted.getPendingLength()
565 << ", state = " << m_accepted.getState()
566 << ", hypothesis count = " << m_possible.size() << std::endl;
567
568 delete[] data;
569 return fs;
570 }
571
572 CepstrumPitchTracker::FeatureSet
573 CepstrumPitchTracker::getRemainingFeatures()
574 {
575 FeatureSet fs;
576 if (m_accepted.getState() == Hypothesis::Satisfied) {
577 m_accepted.addFeatures(fs);
578 }
579 return fs;
580 }
581
582 void
583 CepstrumPitchTracker::fft(unsigned int n, bool inverse,
584 double *ri, double *ii, double *ro, double *io)
585 {
586 if (!ri || !ro || !io) return;
587
588 unsigned int bits;
589 unsigned int i, j, k, m;
590 unsigned int blockSize, blockEnd;
591
592 double tr, ti;
593
594 if (n < 2) return;
595 if (n & (n-1)) return;
596
597 double angle = 2.0 * M_PI;
598 if (inverse) angle = -angle;
599
600 for (i = 0; ; ++i) {
601 if (n & (1 << i)) {
602 bits = i;
603 break;
604 }
605 }
606
607 static unsigned int tableSize = 0;
608 static int *table = 0;
609
610 if (tableSize != n) {
611
612 delete[] table;
613
614 table = new int[n];
615
616 for (i = 0; i < n; ++i) {
617
618 m = i;
619
620 for (j = k = 0; j < bits; ++j) {
621 k = (k << 1) | (m & 1);
622 m >>= 1;
623 }
624
625 table[i] = k;
626 }
627
628 tableSize = n;
629 }
630
631 if (ii) {
632 for (i = 0; i < n; ++i) {
633 ro[table[i]] = ri[i];
634 io[table[i]] = ii[i];
635 }
636 } else {
637 for (i = 0; i < n; ++i) {
638 ro[table[i]] = ri[i];
639 io[table[i]] = 0.0;
640 }
641 }
642
643 blockEnd = 1;
644
645 for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
646
647 double delta = angle / (double)blockSize;
648 double sm2 = -sin(-2 * delta);
649 double sm1 = -sin(-delta);
650 double cm2 = cos(-2 * delta);
651 double cm1 = cos(-delta);
652 double w = 2 * cm1;
653 double ar[3], ai[3];
654
655 for (i = 0; i < n; i += blockSize) {
656
657 ar[2] = cm2;
658 ar[1] = cm1;
659
660 ai[2] = sm2;
661 ai[1] = sm1;
662
663 for (j = i, m = 0; m < blockEnd; j++, m++) {
664
665 ar[0] = w * ar[1] - ar[2];
666 ar[2] = ar[1];
667 ar[1] = ar[0];
668
669 ai[0] = w * ai[1] - ai[2];
670 ai[2] = ai[1];
671 ai[1] = ai[0];
672
673 k = j + blockEnd;
674 tr = ar[0] * ro[k] - ai[0] * io[k];
675 ti = ar[0] * io[k] + ai[0] * ro[k];
676
677 ro[k] = ro[j] - tr;
678 io[k] = io[j] - ti;
679
680 ro[j] += tr;
681 io[j] += ti;
682 }
683 }
684
685 blockEnd = blockSize;
686 }
687 }
688
689