match-vamp: src/SubsequenceMatchVampPlugin.cpp comparison

comparison src/SubsequenceMatchVampPlugin.cpp @ 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes

author	Chris Cannam
date	Fri, 24 Jul 2020 14:29:55 +0100
parents	f68277668ad4
children

comparison

equal deleted inserted replaced

-:91796976e8c6
+:aac9ad4064ea
 using std::vector;
 using std::cerr;
 using std::cout;
 using std::endl;
+//#define DEBUG_SUBSEQUENCE_MATCH 1
 // We want to ensure our freq map / crossover bin are always valid
 // with a fixed FFT length in seconds, so must reject low sample rates
 static float sampleRateMin = 5000.f;
 static float defaultStepTime = 0.020f;
 Plugin(inputSampleRate),
 m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)),
 m_stepTime(defaultStepTime),
 m_blockSize(2048),
 m_coarseDownsample(defaultCoarseDownsample),
+m_downsamplePeaks(false),
 m_serialise(false),
 m_smooth(false),
 m_channelCount(0),
 m_params(defaultStepTime),
 m_defaultParams(defaultStepTime),
 desc.unit = "Hz";
 list.push_back(desc);
 desc.identifier = "freq2";
 desc.name = "Tuning frequency of second input";
-desc.description = "Tuning frequency (concert A) for the other audio";
+desc.description = "Tuning frequency (concert A) for the other audio.";
 desc.minValue = 220.0;
 desc.maxValue = 880.0;
 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
 desc.isQuantized = false;
 desc.unit = "Hz";
 desc.defaultValue = float(defaultCoarseDownsample);
 desc.isQuantized = true;
 desc.quantizeStep = 1;
 list.push_back(desc);
+desc.identifier = "downsamplemethod";
+desc.name = "Coarse alignment downsample method";
+desc.description = "Downsample method for features used in first coarse subsequence-alignment step";
+desc.minValue = 0;
+desc.maxValue = 1;
+desc.defaultValue = 0;
+desc.isQuantized = true;
+desc.quantizeStep = 1;
+desc.valueNames.clear();
+desc.valueNames.push_back("Average");
+desc.valueNames.push_back("Peak");
+list.push_back(desc);
 desc.identifier = "usechroma";
 desc.name = "Feature type";
 desc.description = "Whether to use warped spectrogram or chroma frequency map";
 desc.minValue = 0;
 desc.maxValue = 1;
 desc.valueNames.clear();
 desc.identifier = "usespecdiff";
 desc.name = "Use feature difference";
-desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature";
+desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature (does not apply to downsampled features)";
 desc.minValue = 0;
 desc.maxValue = 1;
 desc.defaultValue = float(m_defaultFcParams.order);
 desc.isQuantized = true;
 desc.quantizeStep = 1;
 return float(m_feParams.minFrequency);
 } else if (name == "maxfreq") {
 return float(m_feParams.maxFrequency);
 } else if (name == "coarsedownsample") {
 return float(m_coarseDownsample);
+} else if (name == "downsamplemethod") {
+return m_downsamplePeaks ? 1.0 : 0.0;
 }
 return 0.0;
 }
 m_feParams.minFrequency = value;
 } else if (name == "maxfreq") {
 m_feParams.maxFrequency = value;
 } else if (name == "coarsedownsample") {
 m_coarseDownsample = int(value + 0.1);
-}
+} else if (name == "downsamplemethod") {
+m_downsamplePeaks = (value > 0.5);
+}
+}
+SubsequenceMatchVampPlugin::InputDomain
+SubsequenceMatchVampPlugin::getInputDomain() const
+{
+return FrequencyDomain;
 }
 size_t
 SubsequenceMatchVampPlugin::getPreferredStepSize() const
 {
 size_t
 SubsequenceMatchVampPlugin::getPreferredBlockSize() const
 {
 return m_defaultFeParams.fftSize;
+}
+size_t
+SubsequenceMatchVampPlugin::getMinChannelCount() const
+{
+return 2;
+}
+size_t
+SubsequenceMatchVampPlugin::getMaxChannelCount() const
+{
+return 2;
 }
 bool
 SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
 {
 }
 return {};
 }
-featureseq_t
+size_t
-SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff)
+SubsequenceMatchVampPlugin::findNonEmptyLength(const featureseq_t &ff)
 {
-if (ff.empty()) {
+bool haveNonEmpty = false;
-return ff;
-}
 size_t lastNonEmpty = 0;
 for (size_t i = ff.size(); i > 0; ) {
 --i;
 if (MatchPipeline::isAboveEndingThreshold(ff[i])) {
+haveNonEmpty = true;
 lastNonEmpty = i;
 break;
 }
+}
+if (haveNonEmpty) {
+return lastNonEmpty + 1;
+} else {
+return 0;
+}
+}
+featureseq_t
+SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff,
+size_t inLength)
+{
+if (ff.empty()) {
+return ff;
 }
 FeatureConditioner::Parameters fcParams(m_fcParams);
 fcParams.order = FeatureConditioner::OutputFeatures; // not the difference
 FeatureConditioner fc(fcParams);
 int featureSize = m_featureExtractors[0].getFeatureSize();
 featureseq_t d;
 size_t i = 0;
-while (i < lastNonEmpty) {
+while (i < inLength) {
 feature_t acc(featureSize, 0);
 int j = 0;
 while (j < m_coarseDownsample) {
 if (i >= ff.size()) break;
 feature_t feature = fc.process(ff[i]);
-for (int k = 0; k < featureSize; ++k) {
+if (m_downsamplePeaks) {
-acc[k] += feature[k];
+for (int k = 0; k < featureSize; ++k) {
+if (feature[k] > acc[k]) {
+acc[k] = feature[k];
+}
+}
+} else {
+for (int k = 0; k < featureSize; ++k) {
+acc[k] += feature[k];
+}
 }
 ++i;
 ++j;
 }
-if (j > 0) {
+if (!m_downsamplePeaks && j > 0) {
 for (int k = 0; k < featureSize; ++k) {
 acc[k] /= float(j);
 }
 }
 d.push_back(acc);
 }
 SubsequenceMatchVampPlugin::FeatureSet
 SubsequenceMatchVampPlugin::performAlignment()
 {
-featureseq_t downsampledRef = downsample(m_features[0]);
+size_t refLength = findNonEmptyLength(m_features[0]);
+featureseq_t downsampledRef = downsample(m_features[0], refLength);
-cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << endl;
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << " (from " << refLength << " non-empty of " << m_features[0].size() << " total)" << endl;
+#endif
 FullDTW dtw(m_fdParams, m_dParams);
 FeatureSet returnFeatures;
 int featureSize = m_featureExtractors[0].getFeatureSize();
 int rate = int(m_inputSampleRate + 0.5);
 for (size_t c = 1; c < m_channelCount; ++c) {
-featureseq_t downsampledOther = downsample(m_features[c]);
+size_t otherLength = findNonEmptyLength(m_features[c]);
+featureseq_t downsampledOther = downsample(m_features[c], otherLength);
-cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << endl;
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << " (from " << otherLength << " non-empty of " << m_features[c].size() << " total)" << endl;
+#endif
 vector<size_t> subsequenceAlignment = dtw.align(downsampledRef,
 downsampledOther);
 if (subsequenceAlignment.empty()) {
 continue;
 }
 int64_t first = subsequenceAlignment[0];
 int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1];
-cerr << "Subsequence alignment span: " << first << " to " << last << endl;
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << "Subsequence alignment maps 0 -> " << subsequenceAlignment.size()-1 << " to " << first << " -> " << last << endl;
+#endif
 if (last <= first) {
 cerr << "NOTE: Invalid span (" << first << " to " << last
 << "), reverting to aligning against whole of reference"
 << endl;
 featureseq_t referenceSubsequence
 (m_features[0].begin() + firstAtOriginalRate,
 m_features[0].begin() + lastAtOriginalRate);
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << "Reference subsequence length = " << referenceSubsequence.size()
+<< endl;
+cerr << "Other sequence length = " << otherLength << endl;
+#endif
 MatchPipeline pipeline(m_feParams,
 m_fcParams,
 m_dParams,
 m_params,
 m_secondReferenceFrequency);
-for (size_t i = 0; i < referenceSubsequence.size() &&
+size_t sequenceLength = std::max(referenceSubsequence.size(),
-i < m_features[c].size(); ++i) {
+otherLength);
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << "MATCH input sequences have length " << sequenceLength << endl;
+#endif
+for (size_t i = 0; i < sequenceLength; ++i) {
 feature_t f1(featureSize, 0);
 feature_t f2(featureSize, 0);
 if (i < referenceSubsequence.size()) {
 f1 = referenceSubsequence[i];
 }
-if (i < m_features[c].size()) {
+if (i < otherLength) {
 f2 = m_features[c][i];
 }
 pipeline.feedFeatures(f1, f2);
 }
 vector<int> pathx;
 vector<int> pathy;
 int len = pipeline.retrievePath(m_smooth, pathx, pathy);
 int prevy = 0;
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << "MATCH path has length " << len;
+if (len > 0) {
+cerr << " and goes from ("
+<< pathx[0] << ", " << pathy[0] << ") to ("
+<< pathx[len-1] << ", " << pathy[len-1] << ")";
+if (len > 2) {
+cerr << " with penultimate point at ("
+<< pathx[len-2] << ", " << pathy[len-2] << ")";
+}
+cerr << endl;
+} else {
+cerr << endl;
+}
+#endif
 for (int i = 0; i < len; ++i) {
 int x = pathx[i];
 int y = pathy[i] + int(first * m_coarseDownsample);
 Vamp::RealTime xt = Vamp::RealTime::frame2RealTime
 (x * m_stepSize, rate) + m_startTime;
 Vamp::RealTime yt = Vamp::RealTime::frame2RealTime
 (y * m_stepSize, rate) + m_startTime;
 prevy = y;
 }
 }
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+cerr << endl;
+#endif
 return returnFeatures;
 }

Mercurial > hg > match-vamp

comparison src/SubsequenceMatchVampPlugin.cpp @ 246:aac9ad4064ea subsequence tip