changeset 237:2f3ecf5d2651 subsequence

Actually implement the subsequence match plugin
author Chris Cannam
date Fri, 03 Jul 2020 17:32:38 +0100
parents 39fe8728e1ca
children b9defaa87d96
files src/MatchPipeline.cpp src/MatchPipeline.h src/SubsequenceMatchVampPlugin.cpp src/SubsequenceMatchVampPlugin.h
diffstat 4 files changed, 268 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/src/MatchPipeline.cpp	Thu Jul 02 17:26:58 2020 +0100
+++ b/src/MatchPipeline.cpp	Fri Jul 03 17:32:38 2020 +0100
@@ -92,8 +92,8 @@
     
     m_feeder.feed(c1, c2);
 
-    if (aboveThreshold(c1)) m_lastFrameIn1 = m_frameNo;
-    if (aboveThreshold(c2)) m_lastFrameIn2 = m_frameNo;
+    if (isAboveEndingThreshold(c1)) m_lastFrameIn1 = m_frameNo;
+    if (isAboveEndingThreshold(c2)) m_lastFrameIn2 = m_frameNo;
 
 #ifdef DEBUG_MATCH_PIPELINE
     cerr << "last frames are " << m_lastFrameIn1 << ", " << m_lastFrameIn2
@@ -118,20 +118,16 @@
 }
 
 bool
-MatchPipeline::aboveThreshold(const feature_t &f)
+MatchPipeline::isAboveEndingThreshold(const feature_t &f)
 {
-    // This threshold is used only to determine when either of the
-    // input streams has ended -- the last frame for a stream is
-    // considered to be the last one that was above the
-    // threshold. This is different from the silence threshold in
-    // FeatureConditioner.
     double threshold = 1e-4f;
     double sum = 0.f;
     for (int i = 0; i < int(f.size()); ++i) {
         sum += f[i] * f[i];
     }
 #ifdef DEBUG_MATCH_PIPELINE
-    cerr << "aboveThreshold: sum " << sum << ", threshold " << threshold
+    cerr << "isAboveEndingThreshold: sum " << sum
+         << ", threshold " << threshold
          << ", returning " << (sum >= threshold) << endl;
 #endif
     return (sum >= threshold);
--- a/src/MatchPipeline.h	Thu Jul 02 17:26:58 2020 +0100
+++ b/src/MatchPipeline.h	Fri Jul 03 17:32:38 2020 +0100
@@ -120,6 +120,20 @@
      */
     double getOverallCost();
 
+    /**
+     * Return true if the feature's level is above a low threshold,
+     * intended to determine when either of the input streams has
+     * ended (the last frame for a stream is considered to be the last
+     * one that was above the threshold). This is different from the
+     * silence threshold in FeatureConditioner.
+     *
+     * Users of this class do not normally need to call this function
+     * explicitly: it's used internally when processing the
+     * streams. It is exposed here in case other code wants to perform
+     * a similar test in a consistent way.
+     */
+    static bool isAboveEndingThreshold(const feature_t &f);
+    
 private:
     FeatureExtractor m_fe1;
     FeatureExtractor m_fe2;
--- a/src/SubsequenceMatchVampPlugin.cpp	Thu Jul 02 17:26:58 2020 +0100
+++ b/src/SubsequenceMatchVampPlugin.cpp	Fri Jul 03 17:32:38 2020 +0100
@@ -16,6 +16,8 @@
 */
 
 #include "SubsequenceMatchVampPlugin.h"
+#include "FullDTW.h"
+#include "MatchPipeline.h"
 
 #include <vamp/vamp.h>
 #include <vamp-sdk/RealTime.h>
@@ -24,6 +26,10 @@
 #include <algorithm>
 
 using std::string;
+using std::vector;
+using std::cerr;
+using std::cout;
+using std::endl;
 
 // We want to ensure our freq map / crossover bin are always valid
 // with a fixed FFT length in seconds, so must reject low sample rates
@@ -39,21 +45,21 @@
     m_coarseDownsample(50),
     m_serialise(false),
     m_smooth(false),
+    m_secondReferenceFrequency(m_defaultFeParams.referenceFrequency),
+    m_channelCount(0),
     m_params(defaultStepTime),
     m_defaultParams(defaultStepTime),
     m_feParams(inputSampleRate),
     m_defaultFeParams(44100), // parameter descriptors can't depend on samplerate
-    m_secondReferenceFrequency(m_defaultFeParams.referenceFrequency),
     m_fcParams(),
     m_defaultFcParams(),
     m_dParams(),
     m_defaultDParams()
 {
     if (inputSampleRate < sampleRateMin) {
-        std::cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate "
-                  << inputSampleRate << " < min supported rate "
-                  << sampleRateMin << ", plugin will refuse to initialise"
-                  << std::endl;
+        cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate "
+             << inputSampleRate << " < min supported rate "
+             << sampleRateMin << ", plugin will refuse to initialise" << endl;
     }
 }
 
@@ -64,7 +70,7 @@
 string
 SubsequenceMatchVampPlugin::getIdentifier() const
 {
-    return "match_subsequence";
+    return "match-subsequence";
 }
 
 string
@@ -403,15 +409,19 @@
 SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
 {
     if (m_inputSampleRate < sampleRateMin) {
-        std::cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate "
-                  << m_inputSampleRate << " < min supported rate "
-                  << sampleRateMin << std::endl;
+        cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate "
+             << m_inputSampleRate << " < min supported rate "
+             << sampleRateMin << endl;
         return false;
     }
     if (channels < getMinChannelCount() ||
-	channels > getMaxChannelCount()) return false;
+	channels > getMaxChannelCount()) {
+        return false;
+    }
     if (stepSize > blockSize/2 ||
-        blockSize != getPreferredBlockSize()) return false;
+        blockSize != getPreferredBlockSize()) {
+        return false;
+    }
 
     m_stepSize = int(stepSize);
     m_stepTime = float(stepSize) / m_inputSampleRate;
@@ -420,13 +430,29 @@
     m_params.hopTime = m_stepTime;
     m_feParams.fftSize = m_blockSize;
 
+    m_channelCount = channels;
+
+    reset();
+        
     return true;
 }
 
 void
 SubsequenceMatchVampPlugin::reset()
 {
-    //!!!
+    m_featureExtractors.clear();
+    m_features.clear();
+    m_startTime = Vamp::RealTime::zeroTime;
+
+    FeatureExtractor::Parameters feParams(m_feParams);
+    
+    for (size_t c = 0; c < m_channelCount; ++c) {
+        if (c > 0) {
+            feParams.referenceFrequency = m_secondReferenceFrequency;
+        }
+        m_featureExtractors.push_back(FeatureExtractor(feParams));
+        m_features.push_back(featureseq_t());
+    }
 }
 
 SubsequenceMatchVampPlugin::OutputList
@@ -451,7 +477,6 @@
     m_pathOutNo = int(list.size());
     list.push_back(desc);
 
-
     desc.identifier = "b_a";
     desc.name = "B-A Timeline";
     desc.description = "Timing in performance A corresponding to moments in performance B";
@@ -464,6 +489,20 @@
     desc.sampleRate = outRate;
     m_baOutNo = int(list.size());
     list.push_back(desc);
+
+    desc.identifier = "span";
+    desc.name = "Subsequence Span";
+    desc.description = "Region in performance A corresponding to the whole of performance B";
+    desc.unit = "";
+    desc.hasFixedBinCount = true;
+    desc.binCount = 0;
+    desc.hasKnownExtents = false;
+    desc.isQuantized = false;
+    desc.sampleType = OutputDescriptor::VariableSampleRate;
+    desc.sampleRate = outRate;
+    desc.hasDuration = true;
+    m_spanOutNo = int(list.size());
+    list.push_back(desc);
     
     return list;
 }
@@ -472,21 +511,95 @@
 SubsequenceMatchVampPlugin::process(const float *const *inputBuffers,
                                     Vamp::RealTime timestamp)
 {
-    FeatureSet returnFeatures;
+    if (m_featureExtractors.empty()) {
+        cerr << "SubsequenceMatchVampPlugin::process: Plugin has not been (properly?) initialised" << endl;
+        return {};
+    }
+
+    if (m_features[0].empty()) {
+        m_startTime = timestamp;
+    }
     
-    return returnFeatures;
+    for (size_t c = 0; c < m_featureExtractors.size(); ++c) {
+        m_features[c].push_back(m_featureExtractors[c].process
+                                (inputBuffers[c]));
+    }        
+
+    return {};
+}
+
+featureseq_t
+SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff)
+{
+    if (ff.empty()) {
+        return ff;
+    }
+
+    size_t lastNonEmpty = 0;
+    for (size_t i = ff.size(); i > 0; ) {
+        --i;
+        if (MatchPipeline::isAboveEndingThreshold(ff[i])) {
+            lastNonEmpty = i;
+            break;
+        }
+    }
+
+    FeatureConditioner::Parameters fcParams(m_fcParams);
+    fcParams.order = FeatureConditioner::OutputFeatures; // not the difference
+    FeatureConditioner fc(fcParams);
+    
+    int featureSize = m_featureExtractors[0].getFeatureSize();
+    
+    featureseq_t d;
+
+    size_t i = 0;
+    while (i < lastNonEmpty) {
+        feature_t acc(featureSize, 0);
+        int j = 0;
+        while (j < m_coarseDownsample) {
+            if (i >= ff.size()) break;
+            feature_t feature = fc.process(ff[i]);
+            for (int k = 0; k < featureSize; ++k) {
+                acc[k] += feature[k];
+            }
+            ++i;
+            ++j;
+        }
+        if (j > 0) {
+            for (int k = 0; k < featureSize; ++k) {
+                acc[k] /= float(j);
+            }
+        }
+        d.push_back(acc);
+    }
+
+    return d;
 }
 
 SubsequenceMatchVampPlugin::FeatureSet
 SubsequenceMatchVampPlugin::getRemainingFeatures()
 {
+    if (m_featureExtractors.empty()) {
+        cerr << "SubsequenceMatchVampPlugin::getRemainingFeatures: Plugin has not been (properly?) initialised" << endl;
+        return {};
+    }
+    
 #ifdef _WIN32
-    HANDLE mutex;
+    static HANDLE mutex;
 #else
-    pthread_mutex_t mutex;
+    static pthread_mutex_t mutex;
 #endif
+    static bool mutexInitialised = false;
 
     if (m_serialise) {
+        if (!mutexInitialised) {
+#ifdef _WIN32
+            mutex = CreateMutex(NULL, FALSE, NULL);
+#else
+            pthread_mutex_init(&mutex, 0);
+#endif
+            mutexInitialised = true;
+        }
 #ifdef _WIN32
         WaitForSingleObject(mutex, INFINITE);
 #else
@@ -494,8 +607,117 @@
 #endif
     }
 
+    featureseq_t downsampledRef = downsample(m_features[0]);
+
+    cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << endl;
+    
+    FullDTW::Parameters dtwParams(m_stepTime);
+    dtwParams.diagonalWeight = m_params.diagonalWeight;
+    dtwParams.subsequence = true;
+    FullDTW dtw(dtwParams, m_dParams);
+    
     FeatureSet returnFeatures;
+    int featureSize = m_featureExtractors[0].getFeatureSize();
 
+    int rate = int(m_inputSampleRate + 0.5);
+    
+    for (size_t c = 1; c < m_channelCount; ++c) {
+
+        featureseq_t downsampledOther = downsample(m_features[c]);
+
+        cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << endl;
+
+        vector<size_t> subsequenceAlignment = dtw.align(downsampledRef,
+                                                        downsampledOther);
+
+        if (subsequenceAlignment.empty()) {
+            cerr << "No subsequenceAlignment??" << endl;
+            continue;
+        }
+        
+        int64_t first = subsequenceAlignment[0];
+        int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1];
+        cerr << "Subsequence alignment span: " << first << " -> " << last << endl;
+
+
+        if (last <= first) {
+            cerr << "Invalid span" << endl;
+            continue;
+        }
+        if (first < 0 || last >= long(downsampledRef.size())) {
+            cerr << "Span end points out of range" << endl;
+            continue;
+        }
+            
+        Feature span;
+        span.hasTimestamp = true;
+        span.timestamp = Vamp::RealTime::frame2RealTime
+            (first * m_coarseDownsample * m_stepSize, rate);
+        span.hasDuration = true;
+        span.duration = Vamp::RealTime::frame2RealTime
+            ((last - first) * m_coarseDownsample * m_stepSize, rate);
+        returnFeatures[m_spanOutNo].push_back(span);
+
+        featureseq_t referenceSubsequence
+            (m_features[0].begin() + first * m_coarseDownsample,
+             m_features[0].begin() + last * m_coarseDownsample);
+
+        MatchPipeline pipeline(m_feParams,
+                               m_fcParams,
+                               m_dParams,
+                               m_params,
+                               m_secondReferenceFrequency);
+
+        for (size_t i = 0; i < referenceSubsequence.size() &&
+                 i < m_features[c].size(); ++i) {
+            feature_t f1(featureSize, 0);
+            feature_t f2(featureSize, 0);
+            if (i < referenceSubsequence.size()) {
+                f1 = referenceSubsequence[i];
+            }
+            if (i < m_features[c].size()) {
+                f2 = m_features[c][i];
+            }
+            pipeline.feedFeatures(f1, f2);
+        }
+
+        pipeline.finish();
+            
+        vector<int> pathx;
+        vector<int> pathy;
+        int len = pipeline.retrievePath(m_smooth, pathx, pathy);
+
+        int prevy = 0;
+    
+        for (int i = 0; i < len; ++i) {
+
+            int x = pathx[i];
+            int y = pathy[i] + int(first * m_coarseDownsample);
+
+            Vamp::RealTime xt = Vamp::RealTime::frame2RealTime
+                (x * m_stepSize, rate) + m_startTime;
+            Vamp::RealTime yt = Vamp::RealTime::frame2RealTime
+                (y * m_stepSize, rate) + m_startTime;
+
+            Feature feature;
+            feature.hasTimestamp = true;
+            feature.timestamp = xt;
+            feature.values.clear();
+            feature.values.push_back(float(yt.sec + double(yt.nsec)/1.0e9));
+            returnFeatures[m_pathOutNo].push_back(feature);
+        
+            if (y != prevy) {
+                feature.hasTimestamp = true;
+                feature.timestamp = yt;
+                feature.values.clear();
+                feature.values.push_back(float(xt.sec + xt.msec()/1000.0));
+                returnFeatures[m_baOutNo].push_back(feature);
+            }
+
+            prevy = y;
+        }
+    }
+    
     if (m_serialise) {
 #ifdef _WIN32
         ReleaseMutex(mutex);
--- a/src/SubsequenceMatchVampPlugin.h	Thu Jul 02 17:26:58 2020 +0100
+++ b/src/SubsequenceMatchVampPlugin.h	Fri Jul 03 17:32:38 2020 +0100
@@ -73,13 +73,16 @@
     int m_coarseDownsample;
     bool m_serialise;
     bool m_smooth;
+    double m_secondReferenceFrequency;
+
+    size_t m_channelCount;
+    Vamp::RealTime m_startTime;
     
     Matcher::Parameters m_params;
     Matcher::Parameters m_defaultParams;
 
     FeatureExtractor::Parameters m_feParams;
     FeatureExtractor::Parameters m_defaultFeParams;
-    double m_secondReferenceFrequency;
 
     FeatureConditioner::Parameters m_fcParams;
     FeatureConditioner::Parameters m_defaultFcParams;
@@ -87,8 +90,14 @@
     DistanceMetric::Parameters m_dParams;
     DistanceMetric::Parameters m_defaultDParams;
 
+    std::vector<FeatureExtractor> m_featureExtractors;
+    std::vector<featureseq_t> m_features; // unconditioned features
+
+    featureseq_t downsample(const featureseq_t &);
+    
     mutable int m_pathOutNo;
     mutable int m_baOutNo;
+    mutable int m_spanOutNo;
 };
 
 #endif