comparison PYinVamp.cpp @ 146:8404827a4b02 memory

Avoid calculating a temporary obsprob matrix for note tracking; + some tidying
author Chris Cannam
date Wed, 17 May 2017 14:50:10 +0100
parents 0432723faf03
children e3f53fac06c3
comparison
equal deleted inserted replaced
145:0432723faf03 146:8404827a4b02
320 d.identifier = "f0candidates"; 320 d.identifier = "f0candidates";
321 d.name = "F0 Candidates"; 321 d.name = "F0 Candidates";
322 d.description = "Estimated fundamental frequency candidates."; 322 d.description = "Estimated fundamental frequency candidates.";
323 d.unit = "Hz"; 323 d.unit = "Hz";
324 d.hasFixedBinCount = false; 324 d.hasFixedBinCount = false;
325 // d.binCount = 1;
326 d.hasKnownExtents = true; 325 d.hasKnownExtents = true;
327 d.minValue = m_fmin; 326 d.minValue = m_fmin;
328 d.maxValue = 500; 327 d.maxValue = 500;
329 d.isQuantized = false; 328 d.isQuantized = false;
330 d.sampleType = OutputDescriptor::FixedSampleRate; 329 d.sampleType = OutputDescriptor::FixedSampleRate;
333 outputs.push_back(d); 332 outputs.push_back(d);
334 m_oF0Candidates = outputNumber++; 333 m_oF0Candidates = outputNumber++;
335 334
336 d.identifier = "f0probs"; 335 d.identifier = "f0probs";
337 d.name = "Candidate Probabilities"; 336 d.name = "Candidate Probabilities";
338 d.description = "Probabilities of estimated fundamental frequency candidates."; 337 d.description = "Probabilities of estimated fundamental frequency candidates.";
339 d.unit = ""; 338 d.unit = "";
340 d.hasFixedBinCount = false; 339 d.hasFixedBinCount = false;
341 // d.binCount = 1;
342 d.hasKnownExtents = true; 340 d.hasKnownExtents = true;
343 d.minValue = 0; 341 d.minValue = 0;
344 d.maxValue = 1; 342 d.maxValue = 1;
345 d.isQuantized = false; 343 d.isQuantized = false;
346 d.sampleType = OutputDescriptor::FixedSampleRate; 344 d.sampleType = OutputDescriptor::FixedSampleRate;
380 outputs.push_back(d); 378 outputs.push_back(d);
381 m_oCandidateSalience = outputNumber++; 379 m_oCandidateSalience = outputNumber++;
382 380
383 d.identifier = "smoothedpitchtrack"; 381 d.identifier = "smoothedpitchtrack";
384 d.name = "Smoothed Pitch Track"; 382 d.name = "Smoothed Pitch Track";
385 d.description = "."; 383 d.description = "Frame-by-frame pitch estimate after smoothing";
386 d.unit = "Hz"; 384 d.unit = "Hz";
387 d.hasFixedBinCount = true; 385 d.hasFixedBinCount = true;
388 d.binCount = 1; 386 d.binCount = 1;
389 d.hasKnownExtents = false; 387 d.hasKnownExtents = false;
390 // d.minValue = 0;
391 // d.maxValue = 1;
392 d.isQuantized = false; 388 d.isQuantized = false;
393 d.sampleType = OutputDescriptor::FixedSampleRate; 389 d.sampleType = OutputDescriptor::FixedSampleRate;
394 d.sampleRate = (m_inputSampleRate / m_stepSize); 390 d.sampleRate = (m_inputSampleRate / m_stepSize);
395 d.hasDuration = false; 391 d.hasDuration = false;
396 outputs.push_back(d); 392 outputs.push_back(d);
397 m_oSmoothedPitchTrack = outputNumber++; 393 m_oSmoothedPitchTrack = outputNumber++;
398 394
399 d.identifier = "notes"; 395 d.identifier = "notes";
400 d.name = "Notes"; 396 d.name = "Notes";
401 d.description = "Derived fixed-pitch note frequencies"; 397 d.description = "Derived fixed-pitch note frequencies";
402 // d.unit = "MIDI unit";
403 d.unit = "Hz"; 398 d.unit = "Hz";
404 d.hasFixedBinCount = true; 399 d.hasFixedBinCount = true;
405 d.binCount = 1; 400 d.binCount = 1;
406 d.hasKnownExtents = false; 401 d.hasKnownExtents = false;
407 d.isQuantized = false; 402 d.isQuantized = false;
418 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) 413 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
419 { 414 {
420 if (channels < getMinChannelCount() || 415 if (channels < getMinChannelCount() ||
421 channels > getMaxChannelCount()) return false; 416 channels > getMaxChannelCount()) return false;
422 417
423 /*
424 std::cerr << "PYinVamp::initialise: channels = " << channels
425 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
426 << std::endl;
427 */
428 m_channels = channels; 418 m_channels = channels;
429 m_stepSize = stepSize; 419 m_stepSize = stepSize;
430 m_blockSize = blockSize; 420 m_blockSize = blockSize;
431 421
432 reset(); 422 reset();
446 436
447 m_pitchProb.clear(); 437 m_pitchProb.clear();
448 m_timestamp.clear(); 438 m_timestamp.clear();
449 m_level.clear(); 439 m_level.clear();
450 m_pitchTrack.clear(); 440 m_pitchTrack.clear();
451 /*
452 std::cerr << "PYinVamp::reset"
453 << ", blockSize = " << m_blockSize
454 << std::endl;
455 */
456 } 441 }
457 442
458 PYinVamp::FeatureSet 443 PYinVamp::FeatureSet
459 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) 444 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
460 { 445 {
461 // std::cerr << timestamp << std::endl;
462 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; 446 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
463 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, 447 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset,
464 lrintf(m_inputSampleRate)); 448 lrintf(m_inputSampleRate));
465 449
466 FeatureSet fs; 450 FeatureSet fs;
540 fs[m_oSmoothedPitchTrack].push_back(f); 524 fs[m_oSmoothedPitchTrack].push_back(f);
541 } 525 }
542 } 526 }
543 } 527 }
544 528
545
546 // F0 CANDIDATES 529 // F0 CANDIDATES
547 Feature f; 530 Feature f;
548 f.hasTimestamp = true; 531 f.hasTimestamp = true;
549 f.timestamp = timestamp; 532 f.timestamp = timestamp;
550 for (size_t i = 0; i < yo.freqProb.size(); ++i) 533 for (size_t i = 0; i < yo.freqProb.size(); ++i)
582 565
583 PYinVamp::FeatureSet 566 PYinVamp::FeatureSet
584 PYinVamp::getRemainingFeatures() 567 PYinVamp::getRemainingFeatures()
585 { 568 {
586 FeatureSet fs; 569 FeatureSet fs;
570
571 if (m_pitchProb.empty()) {
572 return fs;
573 }
574
587 Feature f; 575 Feature f;
588 f.hasTimestamp = true; 576 f.hasTimestamp = true;
589 f.hasDuration = false; 577 f.hasDuration = false;
590
591 if (m_pitchProb.empty()) {
592 return fs;
593 }
594 578
595 // ================== P I T C H T R A C K ================================= 579 // ================== P I T C H T R A C K =================================
596 580
581 // NB we do this even in fixed-lag mode, as we still have the last
582 // lag's-worth of pitch probs to consume
583
597 vector<int> rawPitchPath = m_pitchHmm.track(); 584 vector<int> rawPitchPath = m_pitchHmm.track();
598 585
599 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame) 586 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
600 { 587 {
601 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame], 588 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
602 m_pitchProb[iFrame]); 589 m_pitchProb[iFrame]);
603 m_pitchTrack.push_back(freq); // for note processing below 590 m_pitchTrack.push_back(freq); // for note processing below
604 591
605 f.timestamp = m_timestamp[iFrame]; 592 f.timestamp = m_timestamp[iFrame];
606 f.values.clear(); 593 f.values.clear();
607 594
608 // different output modes 595 // different output modes
609 if (freq < 0 && (m_outputUnvoiced==0)) continue; 596 if (freq < 0 && (m_outputUnvoiced==0)) continue;
610 if (m_outputUnvoiced == 1) 597 if (m_outputUnvoiced == 1)
611 { 598 {
612 f.values.push_back(fabs(freq)); 599 f.values.push_back(fabs(freq));
613 } else { 600 } else {
614 f.values.push_back(freq); 601 f.values.push_back(freq);
615 } 602 }
616 fs[m_oSmoothedPitchTrack].push_back(f); 603 fs[m_oSmoothedPitchTrack].push_back(f);
617 } 604 }
618 605
619 // ======================== N O T E S ====================================== 606 addNoteFeatures(fs);
620 MonoNote mn; 607
608 return fs;
609 }
610
611 void
612 PYinVamp::addNoteFeatures(FeatureSet &fs)
613 {
621 std::vector<std::vector<std::pair<double, double> > > smoothedPitch; 614 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
622 for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) { 615 for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) {
623 std::vector<std::pair<double, double> > temp; 616 std::vector<std::pair<double, double> > temp;
624 if (m_pitchTrack[iFrame] > 0) 617 if (m_pitchTrack[iFrame] > 0)
625 { 618 {
626 double tempPitch = 12 * 619 double tempPitch = 12 *
627 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69; 620 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69;
628 temp.push_back(std::pair<double,double>(tempPitch, .9)); 621 temp.push_back(std::pair<double,double>(tempPitch, .9));
629 // std::cerr << "tempPitch: " << tempPitch << std::endl;
630 } 622 }
631 // std::cerr << "temp size: " << temp.size() << std::endl;
632 smoothedPitch.push_back(temp); 623 smoothedPitch.push_back(temp);
633 } 624 }
634 625
626 MonoNote mn;
635 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch); 627 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
628
636 std::cerr << "mnOut size: " << mnOut.size() << std::endl; 629 std::cerr << "mnOut size: " << mnOut.size() << std::endl;
637 std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl; 630 std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl;
638 631
639 // turning feature into a note feature 632 // turning feature into a note feature
633 Feature f;
640 f.hasTimestamp = true; 634 f.hasTimestamp = true;
641 f.hasDuration = true; 635 f.hasDuration = true;
642 f.values.clear(); 636 f.values.clear();
643 637
644 int onsetFrame = 0; 638 int onsetFrame = 0;
700 notePitchTrack.clear(); 694 notePitchTrack.clear();
701 } 695 }
702 } 696 }
703 oldIsVoiced = isVoiced; 697 oldIsVoiced = isVoiced;
704 } 698 }
705 return fs; 699 }
706 }