comparison CepstrumPitchTracker.cpp @ 20:7786d595d2f2 track

Introduce peak-to-second-peak ratio, which looks like a reasonable proxy for harmonic-ness. Use it to ascribe a confidence to estimates in the pitch tracker & rely on that to determine how many similar estimates make a satisfied hypothesis
author Chris Cannam
date Mon, 02 Jul 2012 21:37:02 +0100
parents c9cac05ef9f2
children df41333abbc9
comparison
equal deleted inserted replaced
19:c9cac05ef9f2 20:7786d595d2f2
55 } 55 }
56 56
57 bool 57 bool
58 CepstrumPitchTracker::Hypothesis::isSatisfied() 58 CepstrumPitchTracker::Hypothesis::isSatisfied()
59 { 59 {
60 return (m_pending.size() > 2); 60 if (m_pending.empty()) return false;
61
62 double meanConfidence = 0.0;
63 for (int i = 0; i < m_pending.size(); ++i) {
64 meanConfidence += m_pending[i].confidence;
65 }
66 meanConfidence /= m_pending.size();
67
68 int lengthRequired = int(2.0 / meanConfidence + 0.5);
69 std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl;
70
71 return (m_pending.size() > lengthRequired);
61 } 72 }
62 73
63 void 74 void
64 CepstrumPitchTracker::Hypothesis::advanceTime() 75 CepstrumPitchTracker::Hypothesis::advanceTime()
65 { 76 {
151 m_channels(0), 162 m_channels(0),
152 m_stepSize(256), 163 m_stepSize(256),
153 m_blockSize(1024), 164 m_blockSize(1024),
154 m_fmin(50), 165 m_fmin(50),
155 m_fmax(1000), 166 m_fmax(1000),
156 m_histlen(1),
157 m_vflen(3), 167 m_vflen(3),
158 m_binFrom(0), 168 m_binFrom(0),
159 m_binTo(0), 169 m_binTo(0),
160 m_bins(0), 170 m_bins(0)
161 m_history(0),
162 m_prevpeak(0),
163 m_prevprop(0)
164 { 171 {
165 } 172 }
166 173
167 CepstrumPitchTracker::~CepstrumPitchTracker() 174 CepstrumPitchTracker::~CepstrumPitchTracker()
168 { 175 {
169 if (m_history) {
170 for (int i = 0; i < m_histlen; ++i) {
171 delete[] m_history[i];
172 }
173 delete[] m_history;
174 }
175 } 176 }
176 177
177 string 178 string
178 CepstrumPitchTracker::getIdentifier() const 179 CepstrumPitchTracker::getIdentifier() const
179 { 180 {
326 m_binTo = m_blockSize / 2 - 1; 327 m_binTo = m_blockSize / 2 - 1;
327 } 328 }
328 329
329 m_bins = (m_binTo - m_binFrom) + 1; 330 m_bins = (m_binTo - m_binFrom) + 1;
330 331
331 m_history = new double *[m_histlen];
332 for (int i = 0; i < m_histlen; ++i) {
333 m_history[i] = new double[m_bins];
334 }
335
336 reset(); 332 reset();
337 333
338 return true; 334 return true;
339 } 335 }
340 336
341 void 337 void
342 CepstrumPitchTracker::reset() 338 CepstrumPitchTracker::reset()
343 { 339 {
344 for (int i = 0; i < m_histlen; ++i) {
345 for (int j = 0; j < m_bins; ++j) {
346 m_history[i][j] = 0.0;
347 }
348 }
349 } 340 }
350 341
351 void 342 void
352 CepstrumPitchTracker::filter(const double *cep, double *result) 343 CepstrumPitchTracker::filter(const double *cep, double *data)
353 { 344 {
354 int hix = m_histlen - 1; // current history index
355
356 // roll back the history
357 if (m_histlen > 1) {
358 double *oldest = m_history[0];
359 for (int i = 1; i < m_histlen; ++i) {
360 m_history[i-1] = m_history[i];
361 }
362 // and stick this back in the newest spot, to recycle
363 m_history[hix] = oldest;
364 }
365
366 for (int i = 0; i < m_bins; ++i) { 345 for (int i = 0; i < m_bins; ++i) {
367 double v = 0; 346 double v = 0;
368 int n = 0; 347 int n = 0;
369 // average according to the vertical filter length 348 // average according to the vertical filter length
370 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { 349 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
372 if (ix >= 0 && ix < m_blockSize) { 351 if (ix >= 0 && ix < m_blockSize) {
373 v += cep[ix]; 352 v += cep[ix];
374 ++n; 353 ++n;
375 } 354 }
376 } 355 }
377 m_history[hix][i] = v / n; 356 data[i] = v / n;
378 } 357 }
379
380 for (int i = 0; i < m_bins; ++i) {
381 double mean = 0.0;
382 for (int j = 0; j < m_histlen; ++j) {
383 mean += m_history[j][i];
384 }
385 mean /= m_histlen;
386 result[i] = mean;
387 }
388 }
389
390 double
391 CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
392 {
393 double aroundPeak = data[n];
394 double peakProportion = 0.0;
395
396 int i = n - 1;
397 while (i > 0 && data[i] <= data[i+1]) {
398 aroundPeak += fabs(data[i]);
399 --i;
400 }
401 i = n + 1;
402 while (i < m_bins && data[i] <= data[i-1]) {
403 aroundPeak += fabs(data[i]);
404 ++i;
405 }
406 peakProportion = aroundPeak / abstot;
407
408 return peakProportion;
409 }
410
411 bool
412 CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
413 {
414 bool accept = false;
415
416 if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
417 accept = true;
418 } else if (peakProportion > m_prevprop * 2) {
419 accept = true;
420 }
421
422 return accept;
423 } 358 }
424 359
425 CepstrumPitchTracker::FeatureSet 360 CepstrumPitchTracker::FeatureSet
426 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 361 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
427 { 362 {
473 maxval = data[i]; 408 maxval = data[i];
474 maxbin = i; 409 maxbin = i;
475 } 410 }
476 } 411 }
477 412
478 if (maxbin < 0) return fs; 413 if (maxbin < 0) {
414 delete[] data;
415 return fs;
416 }
417
418 double nextPeakVal = 0.0;
419 for (int i = 1; i+1 < n; ++i) {
420 if (data[i] > data[i-1] &&
421 data[i] > data[i+1] &&
422 i != maxbin &&
423 data[i] > nextPeakVal) {
424 nextPeakVal = data[i];
425 }
426 }
479 427
480 double peakfreq = m_inputSampleRate / (maxbin + m_binFrom); 428 double peakfreq = m_inputSampleRate / (maxbin + m_binFrom);
429
430 double confidence = 0.0;
431 if (nextPeakVal != 0.0) {
432 confidence = ((maxval / nextPeakVal) - 1.0) / 4.0;
433 if (confidence > 1.0) confidence = 1.0;
434 }
435
481 Hypothesis::Estimate e; 436 Hypothesis::Estimate e;
482 e.freq = peakfreq; 437 e.freq = peakfreq;
483 e.time = timestamp; 438 e.time = timestamp;
439 e.confidence = confidence;
484 440
485 m_accepted.advanceTime(); 441 m_accepted.advanceTime();
486 442
487 for (int i = 0; i < m_possible.size(); ++i) { 443 for (int i = 0; i < m_possible.size(); ++i) {
488 m_possible[i].advanceTime(); 444 m_possible[i].advanceTime();
532 m_possible.push_back(h); 488 m_possible.push_back(h);
533 } 489 }
534 } 490 }
535 } 491 }
536 492
537 std::cerr << "accepted length = " << m_accepted.getPendingLength() 493 std::cerr << "accepted length = " << m_accepted.getPendingLength()
538 << ", state = " << m_accepted.getState() 494 << ", state = " << m_accepted.getState()
539 << ", hypothesis count = " << m_possible.size() << std::endl; 495 << ", hypothesis count = " << m_possible.size() << std::endl;
540 496
541
542
543 /*
544 bool accepted = false;
545
546 if (maxbin >= 0) {
547 double pp = calculatePeakProportion(data, abstot, maxbin);
548 if (acceptPeak(maxbin, pp)) {
549 accepted = true;
550 } else {
551 // try a secondary peak
552 maxval = 0.0;
553 int secondbin = 0;
554 for (int i = 1; i < n-1; ++i) {
555 if (i != maxbin &&
556 data[i] > data[i-1] &&
557 data[i] > data[i+1] &&
558 data[i] > maxval) {
559 maxval = data[i];
560 secondbin = i;
561 }
562 }
563 double spp = calculatePeakProportion(data, abstot, secondbin);
564 if (acceptPeak(secondbin, spp)) {
565 maxbin = secondbin;
566 pp = spp;
567 accepted = true;
568 }
569 }
570 if (accepted) {
571 m_prevpeak = maxbin;
572 m_prevprop = pp;
573 }
574 }
575 */
576 // std::cerr << "peakProportion = " << peakProportion << std::endl;
577 // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
578 // std::cerr << "bins = " << m_bins << std::endl;
579
580 // if (peakProportion >= (0.00006 * m_bins)) {
581 /*
582 if (accepted) {
583 Feature f;
584 f.hasTimestamp = true;
585 f.timestamp = timestamp;
586 f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
587 fs[0].push_back(f);
588 }
589 */
590 delete[] data; 497 delete[] data;
591 return fs; 498 return fs;
592 } 499 }
593 500
594 CepstrumPitchTracker::FeatureSet 501 CepstrumPitchTracker::FeatureSet
595 CepstrumPitchTracker::getRemainingFeatures() 502 CepstrumPitchTracker::getRemainingFeatures()
596 { 503 {
597 FeatureSet fs; 504 FeatureSet fs;
598 if (m_accepted.getState() != Hypothesis::New) { 505 if (m_accepted.getState() == Hypothesis::Satisfied) {
599 m_accepted.addFeatures(fs[0]); 506 m_accepted.addFeatures(fs[0]);
600 } 507 }
601 return fs; 508 return fs;
602 } 509 }
603 510