changeset 180:9a58bd07aa4d

* Part way to providing support for continuous-time averaging summaries
author cannam
date Wed, 03 Sep 2008 15:59:09 +0000
parents 72bf540da84f
children cd16cbf80c87
files vamp-sdk/hostext/PluginSummarisingAdapter.cpp vamp-sdk/hostext/PluginSummarisingAdapter.h
diffstat 2 files changed, 184 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/vamp-sdk/hostext/PluginSummarisingAdapter.cpp	Thu Aug 07 16:34:23 2008 +0000
+++ b/vamp-sdk/hostext/PluginSummarisingAdapter.cpp	Wed Sep 03 15:59:09 2008 +0000
@@ -54,8 +54,12 @@
 
     void setSummarySegmentBoundaries(const SegmentBoundaries &);
 
-    FeatureList getSummaryForOutput(int output, SummaryType type);
-    FeatureSet getSummaryForAllOutputs(SummaryType type);
+    FeatureList getSummaryForOutput(int output,
+                                    SummaryType type,
+                                    AveragingMethod avg);
+
+    FeatureSet getSummaryForAllOutputs(SummaryType type,
+                                       AveragingMethod avg);
 
 protected:
     Plugin *m_plugin;
@@ -64,24 +68,40 @@
     
     typedef std::vector<float> ValueList;
     typedef std::map<int, ValueList> BinValueMap;
+    typedef std::vector<RealTime> DurationList;
     
     struct OutputAccumulator {
         int count;
-        BinValueMap values;
-        OutputAccumulator() : count(0), values() { }
+        BinValueMap values; // bin number -> values ordered by time
+        DurationList durations;
+        OutputAccumulator() : count(0), values(), durations() { }
     };
 
     typedef std::map<int, OutputAccumulator> OutputAccumulatorMap;
-    OutputAccumulatorMap m_accumulators;
+    OutputAccumulatorMap m_accumulators; // output number -> accumulator
+
+    typedef std::map<int, RealTime> OutputTimestampMap;
+    OutputTimestampMap m_prevTimestamps; // output number -> timestamp
 
     struct OutputBinSummary {
+
+        int count;
+
+        // extents
         float minimum;
         float maximum;
+        float sum;
+
+        // sample-average results
         float median;
         float mode;
-        float sum;
         float variance;
-        int count;
+
+        // continuous-time average results
+        float median_c;
+        float mode_c;
+        float mean_c;
+        float variance_c;
     };
 
     typedef std::map<int, OutputBinSummary> OutputSummary;
@@ -91,9 +111,10 @@
     OutputSummarySegmentMap m_summaries;
 
     RealTime m_lastTimestamp;
+    RealTime m_prevDuration;
 
-    void accumulate(const FeatureSet &fs, RealTime);
-    void accumulate(int output, const Feature &f, RealTime);
+    void accumulate(const FeatureSet &fs, RealTime, bool final);
+    void accumulate(int output, const Feature &f, RealTime, bool final);
     void reduce();
 };
     
@@ -121,15 +142,18 @@
 }
 
 Plugin::FeatureList
-PluginSummarisingAdapter::getSummaryForOutput(int output, SummaryType type)
+PluginSummarisingAdapter::getSummaryForOutput(int output,
+                                              SummaryType type,
+                                              AveragingMethod avg)
 {
-    return m_impl->getSummaryForOutput(output, type);
+    return m_impl->getSummaryForOutput(output, type, avg);
 }
 
 Plugin::FeatureSet
-PluginSummarisingAdapter::getSummaryForAllOutputs(SummaryType type)
+PluginSummarisingAdapter::getSummaryForAllOutputs(SummaryType type,
+                                                  AveragingMethod avg)
 {
-    return m_impl->getSummaryForAllOutputs(type);
+    return m_impl->getSummaryForAllOutputs(type, avg);
 }
 
 PluginSummarisingAdapter::Impl::Impl(Plugin *plugin, float inputSampleRate) :
@@ -145,7 +169,7 @@
 PluginSummarisingAdapter::Impl::process(const float *const *inputBuffers, RealTime timestamp)
 {
     FeatureSet fs = m_plugin->process(inputBuffers, timestamp);
-    accumulate(fs, timestamp);
+    accumulate(fs, timestamp, false);
     m_lastTimestamp = timestamp;
     return fs;
 }
@@ -154,14 +178,18 @@
 PluginSummarisingAdapter::Impl::getRemainingFeatures()
 {
     FeatureSet fs = m_plugin->getRemainingFeatures();
-    accumulate(fs, m_lastTimestamp);
+    accumulate(fs, m_lastTimestamp, true);
     reduce();
     return fs;
 }
 
 Plugin::FeatureList
-PluginSummarisingAdapter::Impl::getSummaryForOutput(int output, SummaryType type)
+PluginSummarisingAdapter::Impl::getSummaryForOutput(int output,
+                                                    SummaryType type,
+                                                    AveragingMethod avg)
 {
+    bool continuous = (avg == ContinuousTimeAverage);
+
     //!!! need to ensure that this is only called after processing is
     //!!! complete (at the moment processing is "completed" in the
     //!!! call to getRemainingFeatures, but we don't want to require
@@ -199,17 +227,21 @@
                 break;
 
             case Mean:
-                if (summary.count) {
+                if (continuous) {
+                    result = summary.mean_c;
+                } else if (summary.count) {
                     result = summary.sum / summary.count;
                 }
                 break;
 
             case Median:
-                result = summary.median;
+                if (continuous) result = summary.median_c;
+                else result = summary.median;
                 break;
 
             case Mode:
-                result = summary.mode;
+                if (continuous) result = summary.mode_c;
+                else result = summary.mode;
                 break;
 
             case Sum:
@@ -217,16 +249,21 @@
                 break;
 
             case Variance:
-                result = summary.variance;
+                if (continuous) result = summary.variance_c;
+                else result = summary.variance;
                 break;
 
             case StandardDeviation:
-                result = sqrtf(summary.variance);
+                if (continuous) result = sqrtf(summary.variance_c);
+                else result = sqrtf(summary.variance);
                 break;
 
             case Count:
                 result = summary.count;
                 break;
+
+            default:
+                break;
             }
             
             f.values.push_back(result);
@@ -238,24 +275,26 @@
 }
 
 Plugin::FeatureSet
-PluginSummarisingAdapter::Impl::getSummaryForAllOutputs(SummaryType type)
+PluginSummarisingAdapter::Impl::getSummaryForAllOutputs(SummaryType type,
+                                                        AveragingMethod avg)
 {
     FeatureSet fs;
     for (OutputSummarySegmentMap::const_iterator i = m_summaries.begin();
          i != m_summaries.end(); ++i) {
-        fs[i->first] = getSummaryForOutput(i->first, type);
+        fs[i->first] = getSummaryForOutput(i->first, type, avg);
     }
     return fs;
 }
 
 void
 PluginSummarisingAdapter::Impl::accumulate(const FeatureSet &fs,
-                                           RealTime timestamp)
+                                           RealTime timestamp, 
+                                           bool final)
 {
     for (FeatureSet::const_iterator i = fs.begin(); i != fs.end(); ++i) {
         for (FeatureList::const_iterator j = i->second.begin();
              j != i->second.end(); ++j) {
-            accumulate(i->first, *j, timestamp);
+            accumulate(i->first, *j, timestamp, final);
         }
     }
 }
@@ -263,28 +302,45 @@
 void
 PluginSummarisingAdapter::Impl::accumulate(int output,
                                            const Feature &f,
-                                           RealTime timestamp)
+                                           RealTime timestamp,
+                                           bool final)
 {
-//!!! use timestamp to determine which segment we're on
+//!!! to do: use timestamp to determine which segment we're on
+
     m_accumulators[output].count++;
+
+    if (m_prevDuration == RealTime::zeroTime) {
+        if (m_prevTimestamps.find(output) != m_prevTimestamps.end()) {
+            m_prevDuration = timestamp - m_prevTimestamps[output];
+        }
+    }
+    if (m_prevDuration != RealTime::zeroTime ||
+        !m_accumulators[output].durations.empty()) {
+        // ... i.e. if not first result.  We don't push a duration
+        // when we process the first result; then the duration we push
+        // each time is that for the result before the one we're
+        // processing, and we push an extra one at the end.  This
+        // permits handling the case where the feature itself doesn't
+        // have a duration field, and we have to calculate it from the
+        // time to the following feature.  The net effect is simply
+        // that values[n] and durations[n] refer to the same result.
+        m_accumulators[output].durations.push_back(m_prevDuration);
+    }
+
+    m_prevTimestamps[output] = timestamp;
+
     for (int i = 0; i < int(f.values.size()); ++i) {
-
-
-        //!!! we really want to associate this occurrence of this
-        //!!! value with the duration it covers.
-        
-        //!!! for dense values, the duration can be 1 or the sample
-        //!!! rate or whatever -- doesn't matter so long as it's the
-        //!!! same for every value.
-
-        //!!! for sparse values, the duration should be that from this
-        //!!! feature to the next.
-
-        //!!! if the feature has a duration, should be using that
-        //!!! instead.
-
         m_accumulators[output].values[i].push_back(f.values[i]);
     }
+
+    if (final) {
+        RealTime finalDuration;
+        if (f.hasDuration) finalDuration = f.duration;
+        m_accumulators[output].durations.push_back(finalDuration);
+    }
+
+    if (f.hasDuration) m_prevDuration = f.duration;
+    else m_prevDuration = RealTime::zeroTime;
 }
 
 void
@@ -298,25 +354,52 @@
         int output = i->first;
         OutputAccumulator &accumulator = i->second;
 
+        RealTime totalDuration;
+        size_t dindex = 0;
+
+        while (dindex < accumulator.durations.size()) {
+            totalDuration = totalDuration + accumulator.durations[dindex++];
+        }
+
+        dindex = 0;
+
         for (BinValueMap::iterator j = accumulator.values.begin();
              j != accumulator.values.end(); ++j) {
 
+            // work on all values over time for a single bin
+
             int bin = j->first;
             ValueList &values = j->second;
+            const DurationList &durations = accumulator.durations;
 
             OutputBinSummary summary;
+
+            summary.count = accumulator.count;
+
             summary.minimum = 0.f;
             summary.maximum = 0.f;
+
             summary.median = 0.f;
             summary.mode = 0.f;
             summary.sum = 0.f;
             summary.variance = 0.f;
-            summary.count = accumulator.count;
+
+            summary.median_c = 0.f;
+            summary.mode_c = 0.f;
+            summary.mean_c = 0.f;
+            summary.variance_c = 0.f;
+
             if (summary.count == 0 || values.empty()) continue;
 
             std::sort(values.begin(), values.end());
             int sz = values.size();
 
+            if (sz != durations.size()) {
+                //!!! is this reasonable?
+                std::cerr << "WARNING: sz " << sz << " != durations.size() "
+                          << durations.size() << std::endl;
+            }
+
             summary.minimum = values[0];
             summary.maximum = values[sz-1];
 
@@ -330,17 +413,11 @@
 
             for (int k = 0; k < sz; ++k) {
                 summary.sum += values[k];
-                ++distribution[values[k]];
+                distribution[values[k]] += 1;
             }
 
             int md = 0;
 
-            //!!! I don't like this.  Really the mode should be the
-            //!!! value that spans the longest period of time, not the
-            //!!! one that appears in the largest number of distinct
-            //!!! features.  I suppose that a median by time rather
-            //!!! than number of features would also be useful.
-            
             for (std::map<float, int>::iterator di = distribution.begin();
                  di != distribution.end(); ++di) {
                 if (di->second > md) {
@@ -351,6 +428,30 @@
 
             distribution.clear();
 
+            //!!! we want to omit this bit if the features all have
+            //!!! equal duration (and set mode_c equal to mode instead)
+            
+            std::map<float, RealTime> distribution_c;
+
+            for (int k = 0; k < sz; ++k) {
+                distribution_c[values[k]] =
+                    distribution_c[values[k]] + durations[k];
+            }
+
+            RealTime mrd = RealTime::zeroTime;
+
+            for (std::map<float, RealTime>::iterator di = distribution_c.begin();
+                 di != distribution_c.end(); ++di) {
+                if (di->second > mrd) {
+                    mrd = di->second;
+                    summary.mode_c = di->first;
+                }
+            }
+
+            distribution_c.clear();
+
+            //!!! handle mean_c, median_c, variance_c
+
             float mean = summary.sum / summary.count;
 
             for (int k = 0; k < sz; ++k) {
--- a/vamp-sdk/hostext/PluginSummarisingAdapter.h	Thu Aug 07 16:34:23 2008 +0000
+++ b/vamp-sdk/hostext/PluginSummarisingAdapter.h	Wed Sep 03 15:59:09 2008 +0000
@@ -81,8 +81,39 @@
         UnknownSummaryType = 999
     };
 
-    FeatureList getSummaryForOutput(int output, SummaryType type);
-    FeatureSet getSummaryForAllOutputs(SummaryType type);
+    /**
+     * AveragingMethod indicates how the adapter should handle
+     * average-based summaries of features whose results are not
+     * equally spaced in time.
+     *
+     * If SampleAverage is specified, summary types based on averages
+     * will be calculated by treating each result individually without
+     * regard to its time: for example, the mean will be the sum of
+     * all values divided by the number of values.
+     *
+     * If ContinuousTimeAverage is specified, each feature will be
+     * considered to have a duration, either as specified in the
+     * feature's duration field, or until the following feature: thus,
+     * for example, the mean will be the sum of the products of values
+     * and durations, divided by the total duration.
+     *
+     * Although SampleAverage is useful for many types of feature,
+     * ContinuousTimeAverage is essential for some situations, for
+     * example finding the result that spans the largest proportion of
+     * the input given a feature that emits a new result only when the
+     * value changes (the modal value integrated over time).
+     */
+    enum AveragingMethod {
+        SampleAverage         = 0,
+        ContinuousTimeAverage = 1,
+    };
+
+    FeatureList getSummaryForOutput(int output,
+                                    SummaryType type,
+                                    AveragingMethod method = SampleAverage);
+
+    FeatureSet getSummaryForAllOutputs(SummaryType type,
+                                       AveragingMethod method = SampleAverage);
 
 protected:
     class Impl;