Mercurial > hg > match-vamp
comparison src/SubsequenceMatchVampPlugin.cpp @ 246:aac9ad4064ea subsequence tip
Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author | Chris Cannam |
---|---|
date | Fri, 24 Jul 2020 14:29:55 +0100 |
parents | f68277668ad4 |
children |
comparison
equal
deleted
inserted
replaced
245:91796976e8c6 | 246:aac9ad4064ea |
---|---|
29 using std::vector; | 29 using std::vector; |
30 using std::cerr; | 30 using std::cerr; |
31 using std::cout; | 31 using std::cout; |
32 using std::endl; | 32 using std::endl; |
33 | 33 |
34 //#define DEBUG_SUBSEQUENCE_MATCH 1 | |
35 | |
34 // We want to ensure our freq map / crossover bin are always valid | 36 // We want to ensure our freq map / crossover bin are always valid |
35 // with a fixed FFT length in seconds, so must reject low sample rates | 37 // with a fixed FFT length in seconds, so must reject low sample rates |
36 static float sampleRateMin = 5000.f; | 38 static float sampleRateMin = 5000.f; |
37 | 39 |
38 static float defaultStepTime = 0.020f; | 40 static float defaultStepTime = 0.020f; |
44 Plugin(inputSampleRate), | 46 Plugin(inputSampleRate), |
45 m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)), | 47 m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)), |
46 m_stepTime(defaultStepTime), | 48 m_stepTime(defaultStepTime), |
47 m_blockSize(2048), | 49 m_blockSize(2048), |
48 m_coarseDownsample(defaultCoarseDownsample), | 50 m_coarseDownsample(defaultCoarseDownsample), |
51 m_downsamplePeaks(false), | |
49 m_serialise(false), | 52 m_serialise(false), |
50 m_smooth(false), | 53 m_smooth(false), |
51 m_channelCount(0), | 54 m_channelCount(0), |
52 m_params(defaultStepTime), | 55 m_params(defaultStepTime), |
53 m_defaultParams(defaultStepTime), | 56 m_defaultParams(defaultStepTime), |
136 desc.unit = "Hz"; | 139 desc.unit = "Hz"; |
137 list.push_back(desc); | 140 list.push_back(desc); |
138 | 141 |
139 desc.identifier = "freq2"; | 142 desc.identifier = "freq2"; |
140 desc.name = "Tuning frequency of second input"; | 143 desc.name = "Tuning frequency of second input"; |
141 desc.description = "Tuning frequency (concert A) for the other audio"; | 144 desc.description = "Tuning frequency (concert A) for the other audio."; |
142 desc.minValue = 220.0; | 145 desc.minValue = 220.0; |
143 desc.maxValue = 880.0; | 146 desc.maxValue = 880.0; |
144 desc.defaultValue = float(m_defaultFeParams.referenceFrequency); | 147 desc.defaultValue = float(m_defaultFeParams.referenceFrequency); |
145 desc.isQuantized = false; | 148 desc.isQuantized = false; |
146 desc.unit = "Hz"; | 149 desc.unit = "Hz"; |
176 desc.defaultValue = float(defaultCoarseDownsample); | 179 desc.defaultValue = float(defaultCoarseDownsample); |
177 desc.isQuantized = true; | 180 desc.isQuantized = true; |
178 desc.quantizeStep = 1; | 181 desc.quantizeStep = 1; |
179 list.push_back(desc); | 182 list.push_back(desc); |
180 | 183 |
184 desc.identifier = "downsamplemethod"; | |
185 desc.name = "Coarse alignment downsample method"; | |
186 desc.description = "Downsample method for features used in first coarse subsequence-alignment step"; | |
187 desc.minValue = 0; | |
188 desc.maxValue = 1; | |
189 desc.defaultValue = 0; | |
190 desc.isQuantized = true; | |
191 desc.quantizeStep = 1; | |
192 desc.valueNames.clear(); | |
193 desc.valueNames.push_back("Average"); | |
194 desc.valueNames.push_back("Peak"); | |
195 list.push_back(desc); | |
196 | |
181 desc.identifier = "usechroma"; | 197 desc.identifier = "usechroma"; |
182 desc.name = "Feature type"; | 198 desc.name = "Feature type"; |
183 desc.description = "Whether to use warped spectrogram or chroma frequency map"; | 199 desc.description = "Whether to use warped spectrogram or chroma frequency map"; |
184 desc.minValue = 0; | 200 desc.minValue = 0; |
185 desc.maxValue = 1; | 201 desc.maxValue = 1; |
193 | 209 |
194 desc.valueNames.clear(); | 210 desc.valueNames.clear(); |
195 | 211 |
196 desc.identifier = "usespecdiff"; | 212 desc.identifier = "usespecdiff"; |
197 desc.name = "Use feature difference"; | 213 desc.name = "Use feature difference"; |
198 desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature"; | 214 desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature (does not apply to downsampled features)"; |
199 desc.minValue = 0; | 215 desc.minValue = 0; |
200 desc.maxValue = 1; | 216 desc.maxValue = 1; |
201 desc.defaultValue = float(m_defaultFcParams.order); | 217 desc.defaultValue = float(m_defaultFcParams.order); |
202 desc.isQuantized = true; | 218 desc.isQuantized = true; |
203 desc.quantizeStep = 1; | 219 desc.quantizeStep = 1; |
383 return float(m_feParams.minFrequency); | 399 return float(m_feParams.minFrequency); |
384 } else if (name == "maxfreq") { | 400 } else if (name == "maxfreq") { |
385 return float(m_feParams.maxFrequency); | 401 return float(m_feParams.maxFrequency); |
386 } else if (name == "coarsedownsample") { | 402 } else if (name == "coarsedownsample") { |
387 return float(m_coarseDownsample); | 403 return float(m_coarseDownsample); |
404 } else if (name == "downsamplemethod") { | |
405 return m_downsamplePeaks ? 1.0 : 0.0; | |
388 } | 406 } |
389 | 407 |
390 return 0.0; | 408 return 0.0; |
391 } | 409 } |
392 | 410 |
429 m_feParams.minFrequency = value; | 447 m_feParams.minFrequency = value; |
430 } else if (name == "maxfreq") { | 448 } else if (name == "maxfreq") { |
431 m_feParams.maxFrequency = value; | 449 m_feParams.maxFrequency = value; |
432 } else if (name == "coarsedownsample") { | 450 } else if (name == "coarsedownsample") { |
433 m_coarseDownsample = int(value + 0.1); | 451 m_coarseDownsample = int(value + 0.1); |
434 } | 452 } else if (name == "downsamplemethod") { |
453 m_downsamplePeaks = (value > 0.5); | |
454 } | |
455 } | |
456 | |
457 SubsequenceMatchVampPlugin::InputDomain | |
458 SubsequenceMatchVampPlugin::getInputDomain() const | |
459 { | |
460 return FrequencyDomain; | |
435 } | 461 } |
436 | 462 |
437 size_t | 463 size_t |
438 SubsequenceMatchVampPlugin::getPreferredStepSize() const | 464 SubsequenceMatchVampPlugin::getPreferredStepSize() const |
439 { | 465 { |
442 | 468 |
443 size_t | 469 size_t |
444 SubsequenceMatchVampPlugin::getPreferredBlockSize() const | 470 SubsequenceMatchVampPlugin::getPreferredBlockSize() const |
445 { | 471 { |
446 return m_defaultFeParams.fftSize; | 472 return m_defaultFeParams.fftSize; |
473 } | |
474 | |
475 size_t | |
476 SubsequenceMatchVampPlugin::getMinChannelCount() const | |
477 { | |
478 return 2; | |
479 } | |
480 | |
481 size_t | |
482 SubsequenceMatchVampPlugin::getMaxChannelCount() const | |
483 { | |
484 return 2; | |
447 } | 485 } |
448 | 486 |
449 bool | 487 bool |
450 SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) | 488 SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) |
451 { | 489 { |
567 } | 605 } |
568 | 606 |
569 return {}; | 607 return {}; |
570 } | 608 } |
571 | 609 |
572 featureseq_t | 610 size_t |
573 SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff) | 611 SubsequenceMatchVampPlugin::findNonEmptyLength(const featureseq_t &ff) |
574 { | 612 { |
575 if (ff.empty()) { | 613 bool haveNonEmpty = false; |
576 return ff; | |
577 } | |
578 | |
579 size_t lastNonEmpty = 0; | 614 size_t lastNonEmpty = 0; |
580 for (size_t i = ff.size(); i > 0; ) { | 615 for (size_t i = ff.size(); i > 0; ) { |
581 --i; | 616 --i; |
582 if (MatchPipeline::isAboveEndingThreshold(ff[i])) { | 617 if (MatchPipeline::isAboveEndingThreshold(ff[i])) { |
618 haveNonEmpty = true; | |
583 lastNonEmpty = i; | 619 lastNonEmpty = i; |
584 break; | 620 break; |
585 } | 621 } |
622 } | |
623 if (haveNonEmpty) { | |
624 return lastNonEmpty + 1; | |
625 } else { | |
626 return 0; | |
627 } | |
628 } | |
629 | |
630 featureseq_t | |
631 SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff, | |
632 size_t inLength) | |
633 { | |
634 if (ff.empty()) { | |
635 return ff; | |
586 } | 636 } |
587 | 637 |
588 FeatureConditioner::Parameters fcParams(m_fcParams); | 638 FeatureConditioner::Parameters fcParams(m_fcParams); |
589 fcParams.order = FeatureConditioner::OutputFeatures; // not the difference | 639 fcParams.order = FeatureConditioner::OutputFeatures; // not the difference |
590 FeatureConditioner fc(fcParams); | 640 FeatureConditioner fc(fcParams); |
592 int featureSize = m_featureExtractors[0].getFeatureSize(); | 642 int featureSize = m_featureExtractors[0].getFeatureSize(); |
593 | 643 |
594 featureseq_t d; | 644 featureseq_t d; |
595 | 645 |
596 size_t i = 0; | 646 size_t i = 0; |
597 while (i < lastNonEmpty) { | 647 while (i < inLength) { |
598 feature_t acc(featureSize, 0); | 648 feature_t acc(featureSize, 0); |
599 int j = 0; | 649 int j = 0; |
600 while (j < m_coarseDownsample) { | 650 while (j < m_coarseDownsample) { |
601 if (i >= ff.size()) break; | 651 if (i >= ff.size()) break; |
602 feature_t feature = fc.process(ff[i]); | 652 feature_t feature = fc.process(ff[i]); |
603 for (int k = 0; k < featureSize; ++k) { | 653 if (m_downsamplePeaks) { |
604 acc[k] += feature[k]; | 654 for (int k = 0; k < featureSize; ++k) { |
655 if (feature[k] > acc[k]) { | |
656 acc[k] = feature[k]; | |
657 } | |
658 } | |
659 } else { | |
660 for (int k = 0; k < featureSize; ++k) { | |
661 acc[k] += feature[k]; | |
662 } | |
605 } | 663 } |
606 ++i; | 664 ++i; |
607 ++j; | 665 ++j; |
608 } | 666 } |
609 if (j > 0) { | 667 if (!m_downsamplePeaks && j > 0) { |
610 for (int k = 0; k < featureSize; ++k) { | 668 for (int k = 0; k < featureSize; ++k) { |
611 acc[k] /= float(j); | 669 acc[k] /= float(j); |
612 } | 670 } |
613 } | 671 } |
614 d.push_back(acc); | 672 d.push_back(acc); |
662 } | 720 } |
663 | 721 |
664 SubsequenceMatchVampPlugin::FeatureSet | 722 SubsequenceMatchVampPlugin::FeatureSet |
665 SubsequenceMatchVampPlugin::performAlignment() | 723 SubsequenceMatchVampPlugin::performAlignment() |
666 { | 724 { |
667 featureseq_t downsampledRef = downsample(m_features[0]); | 725 size_t refLength = findNonEmptyLength(m_features[0]); |
668 | 726 featureseq_t downsampledRef = downsample(m_features[0], refLength); |
669 cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << endl; | 727 |
728 #ifdef DEBUG_SUBSEQUENCE_MATCH | |
729 cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << " (from " << refLength << " non-empty of " << m_features[0].size() << " total)" << endl; | |
730 #endif | |
670 | 731 |
671 FullDTW dtw(m_fdParams, m_dParams); | 732 FullDTW dtw(m_fdParams, m_dParams); |
672 | 733 |
673 FeatureSet returnFeatures; | 734 FeatureSet returnFeatures; |
674 int featureSize = m_featureExtractors[0].getFeatureSize(); | 735 int featureSize = m_featureExtractors[0].getFeatureSize(); |
675 | 736 |
676 int rate = int(m_inputSampleRate + 0.5); | 737 int rate = int(m_inputSampleRate + 0.5); |
677 | 738 |
678 for (size_t c = 1; c < m_channelCount; ++c) { | 739 for (size_t c = 1; c < m_channelCount; ++c) { |
679 | 740 |
680 featureseq_t downsampledOther = downsample(m_features[c]); | 741 size_t otherLength = findNonEmptyLength(m_features[c]); |
681 | 742 featureseq_t downsampledOther = downsample(m_features[c], otherLength); |
682 cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << endl; | 743 |
744 #ifdef DEBUG_SUBSEQUENCE_MATCH | |
745 cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << " (from " << otherLength << " non-empty of " << m_features[c].size() << " total)" << endl; | |
746 #endif | |
683 | 747 |
684 vector<size_t> subsequenceAlignment = dtw.align(downsampledRef, | 748 vector<size_t> subsequenceAlignment = dtw.align(downsampledRef, |
685 downsampledOther); | 749 downsampledOther); |
686 | 750 |
687 if (subsequenceAlignment.empty()) { | 751 if (subsequenceAlignment.empty()) { |
689 continue; | 753 continue; |
690 } | 754 } |
691 | 755 |
692 int64_t first = subsequenceAlignment[0]; | 756 int64_t first = subsequenceAlignment[0]; |
693 int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1]; | 757 int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1]; |
694 cerr << "Subsequence alignment span: " << first << " to " << last << endl; | 758 |
695 | 759 #ifdef DEBUG_SUBSEQUENCE_MATCH |
760 cerr << "Subsequence alignment maps 0 -> " << subsequenceAlignment.size()-1 << " to " << first << " -> " << last << endl; | |
761 #endif | |
696 | 762 |
697 if (last <= first) { | 763 if (last <= first) { |
698 cerr << "NOTE: Invalid span (" << first << " to " << last | 764 cerr << "NOTE: Invalid span (" << first << " to " << last |
699 << "), reverting to aligning against whole of reference" | 765 << "), reverting to aligning against whole of reference" |
700 << endl; | 766 << endl; |
727 | 793 |
728 featureseq_t referenceSubsequence | 794 featureseq_t referenceSubsequence |
729 (m_features[0].begin() + firstAtOriginalRate, | 795 (m_features[0].begin() + firstAtOriginalRate, |
730 m_features[0].begin() + lastAtOriginalRate); | 796 m_features[0].begin() + lastAtOriginalRate); |
731 | 797 |
798 #ifdef DEBUG_SUBSEQUENCE_MATCH | |
799 cerr << "Reference subsequence length = " << referenceSubsequence.size() | |
800 << endl; | |
801 cerr << "Other sequence length = " << otherLength << endl; | |
802 #endif | |
803 | |
732 MatchPipeline pipeline(m_feParams, | 804 MatchPipeline pipeline(m_feParams, |
733 m_fcParams, | 805 m_fcParams, |
734 m_dParams, | 806 m_dParams, |
735 m_params, | 807 m_params, |
736 m_secondReferenceFrequency); | 808 m_secondReferenceFrequency); |
737 | 809 |
738 for (size_t i = 0; i < referenceSubsequence.size() && | 810 size_t sequenceLength = std::max(referenceSubsequence.size(), |
739 i < m_features[c].size(); ++i) { | 811 otherLength); |
812 | |
813 #ifdef DEBUG_SUBSEQUENCE_MATCH | |
814 cerr << "MATCH input sequences have length " << sequenceLength << endl; | |
815 #endif | |
816 | |
817 for (size_t i = 0; i < sequenceLength; ++i) { | |
740 feature_t f1(featureSize, 0); | 818 feature_t f1(featureSize, 0); |
741 feature_t f2(featureSize, 0); | 819 feature_t f2(featureSize, 0); |
742 if (i < referenceSubsequence.size()) { | 820 if (i < referenceSubsequence.size()) { |
743 f1 = referenceSubsequence[i]; | 821 f1 = referenceSubsequence[i]; |
744 } | 822 } |
745 if (i < m_features[c].size()) { | 823 if (i < otherLength) { |
746 f2 = m_features[c][i]; | 824 f2 = m_features[c][i]; |
747 } | 825 } |
748 pipeline.feedFeatures(f1, f2); | 826 pipeline.feedFeatures(f1, f2); |
749 } | 827 } |
750 | 828 |
753 vector<int> pathx; | 831 vector<int> pathx; |
754 vector<int> pathy; | 832 vector<int> pathy; |
755 int len = pipeline.retrievePath(m_smooth, pathx, pathy); | 833 int len = pipeline.retrievePath(m_smooth, pathx, pathy); |
756 | 834 |
757 int prevy = 0; | 835 int prevy = 0; |
836 | |
837 #ifdef DEBUG_SUBSEQUENCE_MATCH | |
838 cerr << "MATCH path has length " << len; | |
839 if (len > 0) { | |
840 cerr << " and goes from (" | |
841 << pathx[0] << ", " << pathy[0] << ") to (" | |
842 << pathx[len-1] << ", " << pathy[len-1] << ")"; | |
843 if (len > 2) { | |
844 cerr << " with penultimate point at (" | |
845 << pathx[len-2] << ", " << pathy[len-2] << ")"; | |
846 } | |
847 cerr << endl; | |
848 } else { | |
849 cerr << endl; | |
850 } | |
851 #endif | |
758 | 852 |
759 for (int i = 0; i < len; ++i) { | 853 for (int i = 0; i < len; ++i) { |
760 | 854 |
761 int x = pathx[i]; | 855 int x = pathx[i]; |
762 int y = pathy[i] + int(first * m_coarseDownsample); | 856 int y = pathy[i] + int(first * m_coarseDownsample); |
763 | 857 |
764 Vamp::RealTime xt = Vamp::RealTime::frame2RealTime | 858 Vamp::RealTime xt = Vamp::RealTime::frame2RealTime |
765 (x * m_stepSize, rate) + m_startTime; | 859 (x * m_stepSize, rate) + m_startTime; |
766 Vamp::RealTime yt = Vamp::RealTime::frame2RealTime | 860 Vamp::RealTime yt = Vamp::RealTime::frame2RealTime |
767 (y * m_stepSize, rate) + m_startTime; | 861 (y * m_stepSize, rate) + m_startTime; |
768 | 862 |
783 | 877 |
784 prevy = y; | 878 prevy = y; |
785 } | 879 } |
786 } | 880 } |
787 | 881 |
882 #ifdef DEBUG_SUBSEQUENCE_MATCH | |
883 cerr << endl; | |
884 #endif | |
885 | |
788 return returnFeatures; | 886 return returnFeatures; |
789 } | 887 } |