Mercurial > hg > vamp-simple-cepstrum
comparison CepstrumPitchTracker.cpp @ 20:7786d595d2f2 track
Introduce peak-to-second-peak ratio, which looks like a reasonable proxy for harmonic-ness. Use it to ascribe a confidence to estimates in the pitch tracker & rely on that to determine how many similar estimates make a satisfied hypothesis
author | Chris Cannam |
---|---|
date | Mon, 02 Jul 2012 21:37:02 +0100 |
parents | c9cac05ef9f2 |
children | df41333abbc9 |
comparison
equal
deleted
inserted
replaced
19:c9cac05ef9f2 | 20:7786d595d2f2 |
---|---|
55 } | 55 } |
56 | 56 |
57 bool | 57 bool |
58 CepstrumPitchTracker::Hypothesis::isSatisfied() | 58 CepstrumPitchTracker::Hypothesis::isSatisfied() |
59 { | 59 { |
60 return (m_pending.size() > 2); | 60 if (m_pending.empty()) return false; |
61 | |
62 double meanConfidence = 0.0; | |
63 for (int i = 0; i < m_pending.size(); ++i) { | |
64 meanConfidence += m_pending[i].confidence; | |
65 } | |
66 meanConfidence /= m_pending.size(); | |
67 | |
68 int lengthRequired = int(2.0 / meanConfidence + 0.5); | |
69 std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl; | |
70 | |
71 return (m_pending.size() > lengthRequired); | |
61 } | 72 } |
62 | 73 |
63 void | 74 void |
64 CepstrumPitchTracker::Hypothesis::advanceTime() | 75 CepstrumPitchTracker::Hypothesis::advanceTime() |
65 { | 76 { |
151 m_channels(0), | 162 m_channels(0), |
152 m_stepSize(256), | 163 m_stepSize(256), |
153 m_blockSize(1024), | 164 m_blockSize(1024), |
154 m_fmin(50), | 165 m_fmin(50), |
155 m_fmax(1000), | 166 m_fmax(1000), |
156 m_histlen(1), | |
157 m_vflen(3), | 167 m_vflen(3), |
158 m_binFrom(0), | 168 m_binFrom(0), |
159 m_binTo(0), | 169 m_binTo(0), |
160 m_bins(0), | 170 m_bins(0) |
161 m_history(0), | |
162 m_prevpeak(0), | |
163 m_prevprop(0) | |
164 { | 171 { |
165 } | 172 } |
166 | 173 |
167 CepstrumPitchTracker::~CepstrumPitchTracker() | 174 CepstrumPitchTracker::~CepstrumPitchTracker() |
168 { | 175 { |
169 if (m_history) { | |
170 for (int i = 0; i < m_histlen; ++i) { | |
171 delete[] m_history[i]; | |
172 } | |
173 delete[] m_history; | |
174 } | |
175 } | 176 } |
176 | 177 |
177 string | 178 string |
178 CepstrumPitchTracker::getIdentifier() const | 179 CepstrumPitchTracker::getIdentifier() const |
179 { | 180 { |
326 m_binTo = m_blockSize / 2 - 1; | 327 m_binTo = m_blockSize / 2 - 1; |
327 } | 328 } |
328 | 329 |
329 m_bins = (m_binTo - m_binFrom) + 1; | 330 m_bins = (m_binTo - m_binFrom) + 1; |
330 | 331 |
331 m_history = new double *[m_histlen]; | |
332 for (int i = 0; i < m_histlen; ++i) { | |
333 m_history[i] = new double[m_bins]; | |
334 } | |
335 | |
336 reset(); | 332 reset(); |
337 | 333 |
338 return true; | 334 return true; |
339 } | 335 } |
340 | 336 |
341 void | 337 void |
342 CepstrumPitchTracker::reset() | 338 CepstrumPitchTracker::reset() |
343 { | 339 { |
344 for (int i = 0; i < m_histlen; ++i) { | |
345 for (int j = 0; j < m_bins; ++j) { | |
346 m_history[i][j] = 0.0; | |
347 } | |
348 } | |
349 } | 340 } |
350 | 341 |
351 void | 342 void |
352 CepstrumPitchTracker::filter(const double *cep, double *result) | 343 CepstrumPitchTracker::filter(const double *cep, double *data) |
353 { | 344 { |
354 int hix = m_histlen - 1; // current history index | |
355 | |
356 // roll back the history | |
357 if (m_histlen > 1) { | |
358 double *oldest = m_history[0]; | |
359 for (int i = 1; i < m_histlen; ++i) { | |
360 m_history[i-1] = m_history[i]; | |
361 } | |
362 // and stick this back in the newest spot, to recycle | |
363 m_history[hix] = oldest; | |
364 } | |
365 | |
366 for (int i = 0; i < m_bins; ++i) { | 345 for (int i = 0; i < m_bins; ++i) { |
367 double v = 0; | 346 double v = 0; |
368 int n = 0; | 347 int n = 0; |
369 // average according to the vertical filter length | 348 // average according to the vertical filter length |
370 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { | 349 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { |
372 if (ix >= 0 && ix < m_blockSize) { | 351 if (ix >= 0 && ix < m_blockSize) { |
373 v += cep[ix]; | 352 v += cep[ix]; |
374 ++n; | 353 ++n; |
375 } | 354 } |
376 } | 355 } |
377 m_history[hix][i] = v / n; | 356 data[i] = v / n; |
378 } | 357 } |
379 | |
380 for (int i = 0; i < m_bins; ++i) { | |
381 double mean = 0.0; | |
382 for (int j = 0; j < m_histlen; ++j) { | |
383 mean += m_history[j][i]; | |
384 } | |
385 mean /= m_histlen; | |
386 result[i] = mean; | |
387 } | |
388 } | |
389 | |
390 double | |
391 CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n) | |
392 { | |
393 double aroundPeak = data[n]; | |
394 double peakProportion = 0.0; | |
395 | |
396 int i = n - 1; | |
397 while (i > 0 && data[i] <= data[i+1]) { | |
398 aroundPeak += fabs(data[i]); | |
399 --i; | |
400 } | |
401 i = n + 1; | |
402 while (i < m_bins && data[i] <= data[i-1]) { | |
403 aroundPeak += fabs(data[i]); | |
404 ++i; | |
405 } | |
406 peakProportion = aroundPeak / abstot; | |
407 | |
408 return peakProportion; | |
409 } | |
410 | |
411 bool | |
412 CepstrumPitchTracker::acceptPeak(int n, double peakProportion) | |
413 { | |
414 bool accept = false; | |
415 | |
416 if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count | |
417 accept = true; | |
418 } else if (peakProportion > m_prevprop * 2) { | |
419 accept = true; | |
420 } | |
421 | |
422 return accept; | |
423 } | 358 } |
424 | 359 |
425 CepstrumPitchTracker::FeatureSet | 360 CepstrumPitchTracker::FeatureSet |
426 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 361 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp) |
427 { | 362 { |
473 maxval = data[i]; | 408 maxval = data[i]; |
474 maxbin = i; | 409 maxbin = i; |
475 } | 410 } |
476 } | 411 } |
477 | 412 |
478 if (maxbin < 0) return fs; | 413 if (maxbin < 0) { |
414 delete[] data; | |
415 return fs; | |
416 } | |
417 | |
418 double nextPeakVal = 0.0; | |
419 for (int i = 1; i+1 < n; ++i) { | |
420 if (data[i] > data[i-1] && | |
421 data[i] > data[i+1] && | |
422 i != maxbin && | |
423 data[i] > nextPeakVal) { | |
424 nextPeakVal = data[i]; | |
425 } | |
426 } | |
479 | 427 |
480 double peakfreq = m_inputSampleRate / (maxbin + m_binFrom); | 428 double peakfreq = m_inputSampleRate / (maxbin + m_binFrom); |
429 | |
430 double confidence = 0.0; | |
431 if (nextPeakVal != 0.0) { | |
432 confidence = ((maxval / nextPeakVal) - 1.0) / 4.0; | |
433 if (confidence > 1.0) confidence = 1.0; | |
434 } | |
435 | |
481 Hypothesis::Estimate e; | 436 Hypothesis::Estimate e; |
482 e.freq = peakfreq; | 437 e.freq = peakfreq; |
483 e.time = timestamp; | 438 e.time = timestamp; |
439 e.confidence = confidence; | |
484 | 440 |
485 m_accepted.advanceTime(); | 441 m_accepted.advanceTime(); |
486 | 442 |
487 for (int i = 0; i < m_possible.size(); ++i) { | 443 for (int i = 0; i < m_possible.size(); ++i) { |
488 m_possible[i].advanceTime(); | 444 m_possible[i].advanceTime(); |
532 m_possible.push_back(h); | 488 m_possible.push_back(h); |
533 } | 489 } |
534 } | 490 } |
535 } | 491 } |
536 | 492 |
537 std::cerr << "accepted length = " << m_accepted.getPendingLength() | 493 std::cerr << "accepted length = " << m_accepted.getPendingLength() |
538 << ", state = " << m_accepted.getState() | 494 << ", state = " << m_accepted.getState() |
539 << ", hypothesis count = " << m_possible.size() << std::endl; | 495 << ", hypothesis count = " << m_possible.size() << std::endl; |
540 | 496 |
541 | |
542 | |
543 /* | |
544 bool accepted = false; | |
545 | |
546 if (maxbin >= 0) { | |
547 double pp = calculatePeakProportion(data, abstot, maxbin); | |
548 if (acceptPeak(maxbin, pp)) { | |
549 accepted = true; | |
550 } else { | |
551 // try a secondary peak | |
552 maxval = 0.0; | |
553 int secondbin = 0; | |
554 for (int i = 1; i < n-1; ++i) { | |
555 if (i != maxbin && | |
556 data[i] > data[i-1] && | |
557 data[i] > data[i+1] && | |
558 data[i] > maxval) { | |
559 maxval = data[i]; | |
560 secondbin = i; | |
561 } | |
562 } | |
563 double spp = calculatePeakProportion(data, abstot, secondbin); | |
564 if (acceptPeak(secondbin, spp)) { | |
565 maxbin = secondbin; | |
566 pp = spp; | |
567 accepted = true; | |
568 } | |
569 } | |
570 if (accepted) { | |
571 m_prevpeak = maxbin; | |
572 m_prevprop = pp; | |
573 } | |
574 } | |
575 */ | |
576 // std::cerr << "peakProportion = " << peakProportion << std::endl; | |
577 // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl; | |
578 // std::cerr << "bins = " << m_bins << std::endl; | |
579 | |
580 // if (peakProportion >= (0.00006 * m_bins)) { | |
581 /* | |
582 if (accepted) { | |
583 Feature f; | |
584 f.hasTimestamp = true; | |
585 f.timestamp = timestamp; | |
586 f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom)); | |
587 fs[0].push_back(f); | |
588 } | |
589 */ | |
590 delete[] data; | 497 delete[] data; |
591 return fs; | 498 return fs; |
592 } | 499 } |
593 | 500 |
594 CepstrumPitchTracker::FeatureSet | 501 CepstrumPitchTracker::FeatureSet |
595 CepstrumPitchTracker::getRemainingFeatures() | 502 CepstrumPitchTracker::getRemainingFeatures() |
596 { | 503 { |
597 FeatureSet fs; | 504 FeatureSet fs; |
598 if (m_accepted.getState() != Hypothesis::New) { | 505 if (m_accepted.getState() == Hypothesis::Satisfied) { |
599 m_accepted.addFeatures(fs[0]); | 506 m_accepted.addFeatures(fs[0]); |
600 } | 507 } |
601 return fs; | 508 return fs; |
602 } | 509 } |
603 | 510 |