changeset 149:65eeb604253f tracks

Add tracks output shape
author Chris Cannam
date Fri, 21 Apr 2017 13:09:41 +0100
parents af5961718456
children 37d2fd57723e
files test/test_collect.py vamp/collect.py
diffstat 2 files changed, 74 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/test/test_collect.py	Wed Nov 09 13:15:59 2016 +0000
+++ b/test/test_collect.py	Fri Apr 21 13:09:41 2017 +0100
@@ -93,11 +93,19 @@
 def test_collect_fixed_sample_rate_2():
     buf = input_data(blocksize * 10)
     rdict = vamp.collect(buf, rate, plugin_key, "curve-fsr-timed")
-    step, results = rdict["vector"]
-    assert abs(float(step) - 0.4) < eps
-    assert len(results) == 10
+    results = rdict["tracks"]
+    assert len(results) == 8
+    expected_starts = [ 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0 ]
+    expected_lengths = [ 1, 1, 2, 1, 1, 2, 1, 1 ]
+    expected_values = [ [ 0.0 ], [ 0.1 ], [ 0.2, 0.3 ], [ 0.4 ], [ 0.5 ],
+                        [ 0.6, 0.7 ], [ 0.8 ], [ 0.9 ] ] 
     for i in range(len(results)):
-        assert abs(results[i] - i * 0.1) < eps
+        track = results[i]
+        assert abs(float(track["step"]) - 0.4) < eps
+        assert abs(float(track["start"]) - expected_starts[i]) < eps
+        assert len(track["values"]) == expected_lengths[i]
+        for j in range(expected_lengths[i]):
+            assert abs(track["values"][j] - expected_values[i][j]) < eps
         
 def test_collect_variable_sample_rate():
     buf = input_data(blocksize * 10)
--- a/vamp/collect.py	Wed Nov 09 13:15:59 2016 +0000
+++ b/vamp/collect.py	Fri Apr 21 13:09:41 2017 +0100
@@ -55,7 +55,7 @@
     elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
         output_rate = output_desc["sampleRate"]
         for f in features:
-            if "has_timestamp" in f:
+            if "timestamp" in f:
                 n = int(f["timestamp"].to_float() * output_rate + 0.5)
             else:
                 n = n + 1
@@ -90,14 +90,50 @@
     return "matrix"
 
 
+def reshape_vector(results, out_step, output_desc):
+
+    output = output_desc["identifier"]
+    tracks = []
+    current_track = []
+    current_start_time = 0
+    out_step_secs = out_step.to_float()
+
+    n = -1
+    
+    for r in results:
+        f = r[output]
+        n = n + 1
+        if output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
+            if "timestamp" in f:
+                m = int(round(f["timestamp"].to_float() / out_step_secs))
+                if m != n:
+                    if current_track != []:
+                        tracks.append({ "start": current_start_time,
+                                        "step": out_step,
+                                        "values": np.array(current_track, np.float32) })
+                        current_track = []
+                        n = m
+                    current_start_time = vampyhost.RealTime('seconds', m * out_step_secs)
+        current_track.append(f["values"][0])
+
+    if tracks != []:
+        if current_track != []:
+            tracks.append({ "start": current_start_time,
+                            "step": out_step,
+                            "values": np.array(current_track, np.float32) })
+        return ("tracks", tracks)
+    else:
+        return ("vector", (out_step, np.array(current_track, np.float32)))
+
+    
 def reshape(results, sample_rate, step_size, output_desc, shape):
 
     output = output_desc["identifier"]
     out_step = get_feature_step_time(sample_rate, step_size, output_desc)
+    adjusted_shape = shape
 
     if shape == "vector":
-        rv = ( out_step,
-               np.array([r[output]["values"][0] for r in results], np.float32) )
+        (adjusted_shape, rv) = reshape_vector(results, out_step, output_desc)
     elif shape == "matrix":
         #!!! todo: check that each feature has the right number of bins?
         outseq = [r[output]["values"] for r in results]
@@ -105,11 +141,12 @@
     else:
         rv = list(fill_timestamps(results, sample_rate, step_size, output_desc))
 
-    return rv
+    return (adjusted_shape, rv)
 
         
 def collect(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs):
     """Process audio data with a Vamp plugin, and make the results from a
+
     single plugin output available as a single structure.
 
     The provided data should be a 1- or 2-dimensional list or NumPy
@@ -127,16 +164,18 @@
 
     The results are returned in a dictionary which will always contain
     exactly one element, whose key is one of the strings "vector",
-    "matrix", or "list". Which one is used depends on the structure of
-    features set out in the output descriptor for the requested plugin
-    output:
+    "matrix", "list", or "tracks".
 
-    * If the plugin output emits single-valued features at a fixed
-    sample-rate, then the "vector" element will be used. It will
-    contain a tuple of step time (the time in seconds between
-    consecutive feature values) and a one-dimensional NumPy array of
-    feature values. An example of such a feature might be a loudness
-    curve against time.
+    Which one is used depends on the structure of features set out in
+    the output descriptor for the requested plugin output, and sometimes
+    on the features themselves:
+
+    * If the plugin output emits single-valued features continuously at
+    a fixed sample-rate starting at the beginning of the input, then the
+    "vector" element will be used. It will contain a tuple of step time
+    (the time in seconds between consecutive feature values) and a
+    one-dimensional NumPy array of feature values. An example of such a
+    feature might be a loudness curve against time.
 
     * If the plugin output emits multiple-valued features, with an
     equal number of bins per feature, at a fixed sample-rate, then
@@ -145,6 +184,14 @@
     values) and a two-dimensional NumPy array of feature values. An
     example of such a feature might be a spectrogram.
 
+    * If the plugin output emits single-valued features at a fixed
+    sample-rate but with gaps between features or a non-zero start time,
+    then the "tracks" element will be used. It will contain a list of
+    dictionaries, each containing a startTime, a stepTime, and a
+    one-dimensional NumPy array of contiguous feature values. An example
+    of such a feature might be a pitch tracker that emits values only
+    during pitched sections of the input audio.
+
     * Otherwise, the "list" element will be used, and will contain a
     list of features, where each feature is represented as a
     dictionary containing a timestamp (always) and a duration
@@ -176,8 +223,7 @@
     results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output])
 
     shape = deduce_shape(output_desc)
-    rv = reshape(results, sample_rate, step_size, output_desc, shape)
+    (adjusted_shape, rv) = reshape(results, sample_rate, step_size, output_desc, shape)
 
     plugin.unload()
-    return { shape : rv }
-
+    return { adjusted_shape : rv }