changeset 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author Chris Cannam
date Fri, 24 Jul 2020 14:29:55 +0100
parents 91796976e8c6
children
files src/FullDTW.cpp src/SubsequenceMatchVampPlugin.cpp src/SubsequenceMatchVampPlugin.h
diffstat 3 files changed, 131 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/src/FullDTW.cpp	Fri Jul 17 18:15:27 2020 +0100
+++ b/src/FullDTW.cpp	Fri Jul 24 14:29:55 2020 +0100
@@ -176,7 +176,7 @@
     }
 
 #ifdef DEBUG_DTW
-    std::cerr << "Alignment:" << std::endl;
+    std::cerr << "Costed path:" << std::endl;
     pathcost_t prevcost = 0;
     int indent = 0;
     size_t prevj = 0;
@@ -201,6 +201,11 @@
         if (prevcost == 0) prevcost = 1;
     }
     std::cerr << "\n";
+
+    std::cerr << "Alignment:" << std::endl;
+    for (size_t i = 0; i < alignment.size(); ++i) {
+        std::cerr << i << " -> " << alignment[i] << "\n";
+    }
 #endif
     
     return alignment;
--- a/src/SubsequenceMatchVampPlugin.cpp	Fri Jul 17 18:15:27 2020 +0100
+++ b/src/SubsequenceMatchVampPlugin.cpp	Fri Jul 24 14:29:55 2020 +0100
@@ -31,6 +31,8 @@
 using std::cout;
 using std::endl;
 
+//#define DEBUG_SUBSEQUENCE_MATCH 1
+
 // We want to ensure our freq map / crossover bin are always valid
 // with a fixed FFT length in seconds, so must reject low sample rates
 static float sampleRateMin = 5000.f;
@@ -46,6 +48,7 @@
     m_stepTime(defaultStepTime),
     m_blockSize(2048),
     m_coarseDownsample(defaultCoarseDownsample),
+    m_downsamplePeaks(false),
     m_serialise(false),
     m_smooth(false),
     m_channelCount(0),
@@ -138,7 +141,7 @@
 
     desc.identifier = "freq2";
     desc.name = "Tuning frequency of second input";
-    desc.description = "Tuning frequency (concert A) for the other audio";
+    desc.description = "Tuning frequency (concert A) for the other audio.";
     desc.minValue = 220.0;
     desc.maxValue = 880.0;
     desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
@@ -178,6 +181,19 @@
     desc.quantizeStep = 1;
     list.push_back(desc);
     
+    desc.identifier = "downsamplemethod";
+    desc.name = "Coarse alignment downsample method";
+    desc.description = "Downsample method for features used in first coarse subsequence-alignment step";
+    desc.minValue = 0;
+    desc.maxValue = 1;
+    desc.defaultValue = 0;
+    desc.isQuantized = true;
+    desc.quantizeStep = 1;
+    desc.valueNames.clear();
+    desc.valueNames.push_back("Average");
+    desc.valueNames.push_back("Peak");
+    list.push_back(desc);
+    
     desc.identifier = "usechroma";
     desc.name = "Feature type";
     desc.description = "Whether to use warped spectrogram or chroma frequency map";
@@ -195,7 +211,7 @@
 
     desc.identifier = "usespecdiff";
     desc.name = "Use feature difference";
-    desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature";
+    desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature (does not apply to downsampled features)";
     desc.minValue = 0;
     desc.maxValue = 1;
     desc.defaultValue = float(m_defaultFcParams.order);
@@ -385,6 +401,8 @@
         return float(m_feParams.maxFrequency);
     } else if (name == "coarsedownsample") {
         return float(m_coarseDownsample);
+    } else if (name == "downsamplemethod") {
+        return m_downsamplePeaks ? 1.0 : 0.0;
     }
     
     return 0.0;
@@ -431,9 +449,17 @@
         m_feParams.maxFrequency = value;
     } else if (name == "coarsedownsample") {
         m_coarseDownsample = int(value + 0.1);
+    } else if (name == "downsamplemethod") {
+        m_downsamplePeaks = (value > 0.5);
     }
 }
 
+SubsequenceMatchVampPlugin::InputDomain
+SubsequenceMatchVampPlugin::getInputDomain() const
+{
+    return FrequencyDomain;
+}
+
 size_t
 SubsequenceMatchVampPlugin::getPreferredStepSize() const
 {
@@ -446,6 +472,18 @@
     return m_defaultFeParams.fftSize;
 }
 
+size_t
+SubsequenceMatchVampPlugin::getMinChannelCount() const
+{
+    return 2;
+}
+
+size_t
+SubsequenceMatchVampPlugin::getMaxChannelCount() const
+{
+    return 2;
+}
+
 bool
 SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
 {
@@ -569,21 +607,33 @@
     return {};
 }
 
-featureseq_t
-SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff)
+size_t
+SubsequenceMatchVampPlugin::findNonEmptyLength(const featureseq_t &ff)
 {
-    if (ff.empty()) {
-        return ff;
-    }
-
+    bool haveNonEmpty = false;
     size_t lastNonEmpty = 0;
     for (size_t i = ff.size(); i > 0; ) {
         --i;
         if (MatchPipeline::isAboveEndingThreshold(ff[i])) {
+            haveNonEmpty = true;
             lastNonEmpty = i;
             break;
         }
     }
+    if (haveNonEmpty) {
+        return lastNonEmpty + 1;
+    } else {
+        return 0;
+    }
+}
+
+featureseq_t
+SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff,
+                                       size_t inLength)
+{
+    if (ff.empty()) {
+        return ff;
+    }
 
     FeatureConditioner::Parameters fcParams(m_fcParams);
     fcParams.order = FeatureConditioner::OutputFeatures; // not the difference
@@ -594,19 +644,27 @@
     featureseq_t d;
 
     size_t i = 0;
-    while (i < lastNonEmpty) {
+    while (i < inLength) {
         feature_t acc(featureSize, 0);
         int j = 0;
         while (j < m_coarseDownsample) {
             if (i >= ff.size()) break;
             feature_t feature = fc.process(ff[i]);
-            for (int k = 0; k < featureSize; ++k) {
-                acc[k] += feature[k];
+            if (m_downsamplePeaks) {
+                for (int k = 0; k < featureSize; ++k) {
+                    if (feature[k] > acc[k]) {
+                        acc[k] = feature[k];
+                    }
+                }
+            } else {
+                for (int k = 0; k < featureSize; ++k) {
+                    acc[k] += feature[k];
+                }
             }
             ++i;
             ++j;
         }
-        if (j > 0) {
+        if (!m_downsamplePeaks && j > 0) {
             for (int k = 0; k < featureSize; ++k) {
                 acc[k] /= float(j);
             }
@@ -664,9 +722,12 @@
 SubsequenceMatchVampPlugin::FeatureSet
 SubsequenceMatchVampPlugin::performAlignment()
 {
-    featureseq_t downsampledRef = downsample(m_features[0]);
+    size_t refLength = findNonEmptyLength(m_features[0]);
+    featureseq_t downsampledRef = downsample(m_features[0], refLength);
 
-    cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << endl;
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+    cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << " (from " << refLength << " non-empty of " << m_features[0].size() << " total)" << endl;
+#endif
     
     FullDTW dtw(m_fdParams, m_dParams);
     
@@ -677,9 +738,12 @@
     
     for (size_t c = 1; c < m_channelCount; ++c) {
 
-        featureseq_t downsampledOther = downsample(m_features[c]);
+        size_t otherLength = findNonEmptyLength(m_features[c]);
+        featureseq_t downsampledOther = downsample(m_features[c], otherLength);
 
-        cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << endl;
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+        cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << " (from " << otherLength << " non-empty of " << m_features[c].size() << " total)" << endl;
+#endif
 
         vector<size_t> subsequenceAlignment = dtw.align(downsampledRef,
                                                         downsampledOther);
@@ -691,8 +755,10 @@
         
         int64_t first = subsequenceAlignment[0];
         int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1];
-        cerr << "Subsequence alignment span: " << first << " to " << last << endl;
 
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+        cerr << "Subsequence alignment maps 0 -> " << subsequenceAlignment.size()-1 << " to " << first << " -> " << last << endl;
+#endif
 
         if (last <= first) {
             cerr << "NOTE: Invalid span (" << first << " to " << last
@@ -729,20 +795,32 @@
             (m_features[0].begin() + firstAtOriginalRate,
              m_features[0].begin() + lastAtOriginalRate);
 
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+        cerr << "Reference subsequence length = " << referenceSubsequence.size()
+             << endl;
+        cerr << "Other sequence length = " << otherLength << endl;
+#endif
+
         MatchPipeline pipeline(m_feParams,
                                m_fcParams,
                                m_dParams,
                                m_params,
                                m_secondReferenceFrequency);
 
-        for (size_t i = 0; i < referenceSubsequence.size() &&
-                 i < m_features[c].size(); ++i) {
+        size_t sequenceLength = std::max(referenceSubsequence.size(),
+                                         otherLength);
+        
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+        cerr << "MATCH input sequences have length " << sequenceLength << endl;
+#endif
+    
+        for (size_t i = 0; i < sequenceLength; ++i) {
             feature_t f1(featureSize, 0);
             feature_t f2(featureSize, 0);
             if (i < referenceSubsequence.size()) {
                 f1 = referenceSubsequence[i];
             }
-            if (i < m_features[c].size()) {
+            if (i < otherLength) {
                 f2 = m_features[c][i];
             }
             pipeline.feedFeatures(f1, f2);
@@ -755,12 +833,28 @@
         int len = pipeline.retrievePath(m_smooth, pathx, pathy);
 
         int prevy = 0;
+
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+        cerr << "MATCH path has length " << len;
+        if (len > 0) {
+            cerr << " and goes from ("
+                 << pathx[0] << ", " << pathy[0] << ") to ("
+                 << pathx[len-1] << ", " << pathy[len-1] << ")";
+            if (len > 2) {
+                cerr << " with penultimate point at ("
+                     << pathx[len-2] << ", " << pathy[len-2] << ")";
+            }
+            cerr << endl;
+        } else {
+            cerr << endl;
+        }
+#endif
     
         for (int i = 0; i < len; ++i) {
 
             int x = pathx[i];
             int y = pathy[i] + int(first * m_coarseDownsample);
-
+            
             Vamp::RealTime xt = Vamp::RealTime::frame2RealTime
                 (x * m_stepSize, rate) + m_startTime;
             Vamp::RealTime yt = Vamp::RealTime::frame2RealTime
@@ -785,5 +879,9 @@
         }
     }
 
+#ifdef DEBUG_SUBSEQUENCE_MATCH
+    cerr << endl;
+#endif
+    
     return returnFeatures;
 }
--- a/src/SubsequenceMatchVampPlugin.h	Fri Jul 17 18:15:27 2020 +0100
+++ b/src/SubsequenceMatchVampPlugin.h	Fri Jul 24 14:29:55 2020 +0100
@@ -41,13 +41,13 @@
     bool initialise(size_t channels, size_t stepSize, size_t blockSize);
     void reset();
 
-    InputDomain getInputDomain() const { return FrequencyDomain; }
+    InputDomain getInputDomain() const;
 
     size_t getPreferredStepSize() const;
     size_t getPreferredBlockSize() const;
 
-    size_t getMinChannelCount() const { return 2; }
-    size_t getMaxChannelCount() const { return 2; }
+    size_t getMinChannelCount() const;
+    size_t getMaxChannelCount() const;
 
     std::string getIdentifier() const;
     std::string getName() const;
@@ -72,6 +72,7 @@
     float m_stepTime;
     int m_blockSize;
     int m_coarseDownsample;
+    bool m_downsamplePeaks; // use peaks as opposed to averaging for downsample
     bool m_serialise;
     bool m_smooth;
 
@@ -98,7 +99,8 @@
     std::vector<featureseq_t> m_features; // unconditioned features
 
     FeatureSet performAlignment();
-    featureseq_t downsample(const featureseq_t &);
+    size_t findNonEmptyLength(const featureseq_t &);
+    featureseq_t downsample(const featureseq_t &, size_t inLength);
     
     mutable int m_pathOutNo;
     mutable int m_baOutNo;