Mercurial > hg > pyin
comparison PYinVamp.cpp @ 146:8404827a4b02 memory
Avoid calculating a temporary obsprob matrix for note tracking; + some tidying
author | Chris Cannam |
---|---|
date | Wed, 17 May 2017 14:50:10 +0100 |
parents | 0432723faf03 |
children | e3f53fac06c3 |
comparison
equal
deleted
inserted
replaced
145:0432723faf03 | 146:8404827a4b02 |
---|---|
320 d.identifier = "f0candidates"; | 320 d.identifier = "f0candidates"; |
321 d.name = "F0 Candidates"; | 321 d.name = "F0 Candidates"; |
322 d.description = "Estimated fundamental frequency candidates."; | 322 d.description = "Estimated fundamental frequency candidates."; |
323 d.unit = "Hz"; | 323 d.unit = "Hz"; |
324 d.hasFixedBinCount = false; | 324 d.hasFixedBinCount = false; |
325 // d.binCount = 1; | |
326 d.hasKnownExtents = true; | 325 d.hasKnownExtents = true; |
327 d.minValue = m_fmin; | 326 d.minValue = m_fmin; |
328 d.maxValue = 500; | 327 d.maxValue = 500; |
329 d.isQuantized = false; | 328 d.isQuantized = false; |
330 d.sampleType = OutputDescriptor::FixedSampleRate; | 329 d.sampleType = OutputDescriptor::FixedSampleRate; |
333 outputs.push_back(d); | 332 outputs.push_back(d); |
334 m_oF0Candidates = outputNumber++; | 333 m_oF0Candidates = outputNumber++; |
335 | 334 |
336 d.identifier = "f0probs"; | 335 d.identifier = "f0probs"; |
337 d.name = "Candidate Probabilities"; | 336 d.name = "Candidate Probabilities"; |
338 d.description = "Probabilities of estimated fundamental frequency candidates."; | 337 d.description = "Probabilities of estimated fundamental frequency candidates."; |
339 d.unit = ""; | 338 d.unit = ""; |
340 d.hasFixedBinCount = false; | 339 d.hasFixedBinCount = false; |
341 // d.binCount = 1; | |
342 d.hasKnownExtents = true; | 340 d.hasKnownExtents = true; |
343 d.minValue = 0; | 341 d.minValue = 0; |
344 d.maxValue = 1; | 342 d.maxValue = 1; |
345 d.isQuantized = false; | 343 d.isQuantized = false; |
346 d.sampleType = OutputDescriptor::FixedSampleRate; | 344 d.sampleType = OutputDescriptor::FixedSampleRate; |
380 outputs.push_back(d); | 378 outputs.push_back(d); |
381 m_oCandidateSalience = outputNumber++; | 379 m_oCandidateSalience = outputNumber++; |
382 | 380 |
383 d.identifier = "smoothedpitchtrack"; | 381 d.identifier = "smoothedpitchtrack"; |
384 d.name = "Smoothed Pitch Track"; | 382 d.name = "Smoothed Pitch Track"; |
385 d.description = "."; | 383 d.description = "Frame-by-frame pitch estimate after smoothing"; |
386 d.unit = "Hz"; | 384 d.unit = "Hz"; |
387 d.hasFixedBinCount = true; | 385 d.hasFixedBinCount = true; |
388 d.binCount = 1; | 386 d.binCount = 1; |
389 d.hasKnownExtents = false; | 387 d.hasKnownExtents = false; |
390 // d.minValue = 0; | |
391 // d.maxValue = 1; | |
392 d.isQuantized = false; | 388 d.isQuantized = false; |
393 d.sampleType = OutputDescriptor::FixedSampleRate; | 389 d.sampleType = OutputDescriptor::FixedSampleRate; |
394 d.sampleRate = (m_inputSampleRate / m_stepSize); | 390 d.sampleRate = (m_inputSampleRate / m_stepSize); |
395 d.hasDuration = false; | 391 d.hasDuration = false; |
396 outputs.push_back(d); | 392 outputs.push_back(d); |
397 m_oSmoothedPitchTrack = outputNumber++; | 393 m_oSmoothedPitchTrack = outputNumber++; |
398 | 394 |
399 d.identifier = "notes"; | 395 d.identifier = "notes"; |
400 d.name = "Notes"; | 396 d.name = "Notes"; |
401 d.description = "Derived fixed-pitch note frequencies"; | 397 d.description = "Derived fixed-pitch note frequencies"; |
402 // d.unit = "MIDI unit"; | |
403 d.unit = "Hz"; | 398 d.unit = "Hz"; |
404 d.hasFixedBinCount = true; | 399 d.hasFixedBinCount = true; |
405 d.binCount = 1; | 400 d.binCount = 1; |
406 d.hasKnownExtents = false; | 401 d.hasKnownExtents = false; |
407 d.isQuantized = false; | 402 d.isQuantized = false; |
418 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) | 413 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) |
419 { | 414 { |
420 if (channels < getMinChannelCount() || | 415 if (channels < getMinChannelCount() || |
421 channels > getMaxChannelCount()) return false; | 416 channels > getMaxChannelCount()) return false; |
422 | 417 |
423 /* | |
424 std::cerr << "PYinVamp::initialise: channels = " << channels | |
425 << ", stepSize = " << stepSize << ", blockSize = " << blockSize | |
426 << std::endl; | |
427 */ | |
428 m_channels = channels; | 418 m_channels = channels; |
429 m_stepSize = stepSize; | 419 m_stepSize = stepSize; |
430 m_blockSize = blockSize; | 420 m_blockSize = blockSize; |
431 | 421 |
432 reset(); | 422 reset(); |
446 | 436 |
447 m_pitchProb.clear(); | 437 m_pitchProb.clear(); |
448 m_timestamp.clear(); | 438 m_timestamp.clear(); |
449 m_level.clear(); | 439 m_level.clear(); |
450 m_pitchTrack.clear(); | 440 m_pitchTrack.clear(); |
451 /* | |
452 std::cerr << "PYinVamp::reset" | |
453 << ", blockSize = " << m_blockSize | |
454 << std::endl; | |
455 */ | |
456 } | 441 } |
457 | 442 |
458 PYinVamp::FeatureSet | 443 PYinVamp::FeatureSet |
459 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) | 444 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) |
460 { | 445 { |
461 // std::cerr << timestamp << std::endl; | |
462 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; | 446 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; |
463 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, | 447 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, |
464 lrintf(m_inputSampleRate)); | 448 lrintf(m_inputSampleRate)); |
465 | 449 |
466 FeatureSet fs; | 450 FeatureSet fs; |
540 fs[m_oSmoothedPitchTrack].push_back(f); | 524 fs[m_oSmoothedPitchTrack].push_back(f); |
541 } | 525 } |
542 } | 526 } |
543 } | 527 } |
544 | 528 |
545 | |
546 // F0 CANDIDATES | 529 // F0 CANDIDATES |
547 Feature f; | 530 Feature f; |
548 f.hasTimestamp = true; | 531 f.hasTimestamp = true; |
549 f.timestamp = timestamp; | 532 f.timestamp = timestamp; |
550 for (size_t i = 0; i < yo.freqProb.size(); ++i) | 533 for (size_t i = 0; i < yo.freqProb.size(); ++i) |
582 | 565 |
583 PYinVamp::FeatureSet | 566 PYinVamp::FeatureSet |
584 PYinVamp::getRemainingFeatures() | 567 PYinVamp::getRemainingFeatures() |
585 { | 568 { |
586 FeatureSet fs; | 569 FeatureSet fs; |
570 | |
571 if (m_pitchProb.empty()) { | |
572 return fs; | |
573 } | |
574 | |
587 Feature f; | 575 Feature f; |
588 f.hasTimestamp = true; | 576 f.hasTimestamp = true; |
589 f.hasDuration = false; | 577 f.hasDuration = false; |
590 | |
591 if (m_pitchProb.empty()) { | |
592 return fs; | |
593 } | |
594 | 578 |
595 // ================== P I T C H T R A C K ================================= | 579 // ================== P I T C H T R A C K ================================= |
596 | 580 |
581 // NB we do this even in fixed-lag mode, as we still have the last | |
582 // lag's-worth of pitch probs to consume | |
583 | |
597 vector<int> rawPitchPath = m_pitchHmm.track(); | 584 vector<int> rawPitchPath = m_pitchHmm.track(); |
598 | 585 |
599 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) | 586 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) |
600 { | 587 { |
601 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame], | 588 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame], |
602 m_pitchProb[iFrame]); | 589 m_pitchProb[iFrame]); |
603 m_pitchTrack.push_back(freq); // for note processing below | 590 m_pitchTrack.push_back(freq); // for note processing below |
604 | 591 |
605 f.timestamp = m_timestamp[iFrame]; | 592 f.timestamp = m_timestamp[iFrame]; |
606 f.values.clear(); | 593 f.values.clear(); |
607 | 594 |
608 // different output modes | 595 // different output modes |
609 if (freq < 0 && (m_outputUnvoiced==0)) continue; | 596 if (freq < 0 && (m_outputUnvoiced==0)) continue; |
610 if (m_outputUnvoiced == 1) | 597 if (m_outputUnvoiced == 1) |
611 { | 598 { |
612 f.values.push_back(fabs(freq)); | 599 f.values.push_back(fabs(freq)); |
613 } else { | 600 } else { |
614 f.values.push_back(freq); | 601 f.values.push_back(freq); |
615 } | 602 } |
616 fs[m_oSmoothedPitchTrack].push_back(f); | 603 fs[m_oSmoothedPitchTrack].push_back(f); |
617 } | 604 } |
618 | 605 |
619 // ======================== N O T E S ====================================== | 606 addNoteFeatures(fs); |
620 MonoNote mn; | 607 |
608 return fs; | |
609 } | |
610 | |
611 void | |
612 PYinVamp::addNoteFeatures(FeatureSet &fs) | |
613 { | |
621 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; | 614 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; |
622 for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) { | 615 for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) { |
623 std::vector<std::pair<double, double> > temp; | 616 std::vector<std::pair<double, double> > temp; |
624 if (m_pitchTrack[iFrame] > 0) | 617 if (m_pitchTrack[iFrame] > 0) |
625 { | 618 { |
626 double tempPitch = 12 * | 619 double tempPitch = 12 * |
627 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69; | 620 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69; |
628 temp.push_back(std::pair<double,double>(tempPitch, .9)); | 621 temp.push_back(std::pair<double,double>(tempPitch, .9)); |
629 // std::cerr << "tempPitch: " << tempPitch << std::endl; | |
630 } | 622 } |
631 // std::cerr << "temp size: " << temp.size() << std::endl; | |
632 smoothedPitch.push_back(temp); | 623 smoothedPitch.push_back(temp); |
633 } | 624 } |
634 | 625 |
626 MonoNote mn; | |
635 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); | 627 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); |
628 | |
636 std::cerr << "mnOut size: " << mnOut.size() << std::endl; | 629 std::cerr << "mnOut size: " << mnOut.size() << std::endl; |
637 std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl; | 630 std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl; |
638 | 631 |
639 // turning feature into a note feature | 632 // turning feature into a note feature |
633 Feature f; | |
640 f.hasTimestamp = true; | 634 f.hasTimestamp = true; |
641 f.hasDuration = true; | 635 f.hasDuration = true; |
642 f.values.clear(); | 636 f.values.clear(); |
643 | 637 |
644 int onsetFrame = 0; | 638 int onsetFrame = 0; |
700 notePitchTrack.clear(); | 694 notePitchTrack.clear(); |
701 } | 695 } |
702 } | 696 } |
703 oldIsVoiced = isVoiced; | 697 oldIsVoiced = isVoiced; |
704 } | 698 } |
705 return fs; | 699 } |
706 } |