annotate vamp/collect.py @ 151:5a6b8f4be9b9 tracks tip

Docs
author Chris Cannam
date Fri, 21 Apr 2017 14:33:57 +0100
parents 37d2fd57723e
children
rev   line source
Chris@117 1 #!/usr/bin/env python
Chris@117 2
Chris@117 3 # Python Vamp Host
Chris@117 4 # Copyright (c) 2008-2015 Queen Mary, University of London
Chris@117 5 #
Chris@117 6 # Permission is hereby granted, free of charge, to any person
Chris@117 7 # obtaining a copy of this software and associated documentation
Chris@117 8 # files (the "Software"), to deal in the Software without
Chris@117 9 # restriction, including without limitation the rights to use, copy,
Chris@117 10 # modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@117 11 # of the Software, and to permit persons to whom the Software is
Chris@117 12 # furnished to do so, subject to the following conditions:
Chris@117 13 #
Chris@117 14 # The above copyright notice and this permission notice shall be
Chris@117 15 # included in all copies or substantial portions of the Software.
Chris@117 16 #
Chris@117 17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@117 18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@117 19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@117 20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
Chris@117 21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@117 22 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@117 23 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@117 24 #
Chris@117 25 # Except as contained in this notice, the names of the Centre for
Chris@117 26 # Digital Music and Queen Mary, University of London shall not be
Chris@117 27 # used in advertising or otherwise to promote the sale, use or other
Chris@117 28 # dealings in this Software without prior written authorization.
Chris@117 29
Chris@56 30 '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.'''
Chris@56 31
Chris@56 32 import vampyhost
Chris@112 33 import vamp.load
Chris@112 34 import vamp.process
Chris@112 35 import vamp.frames
Chris@89 36
Chris@93 37 import numpy as np
Chris@93 38
Chris@94 39 def get_feature_step_time(sample_rate, step_size, output_desc):
Chris@111 40 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
Chris@94 41 return vampyhost.frame_to_realtime(step_size, sample_rate)
Chris@111 42 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@111 43 return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"])
Chris@94 44 else:
Chris@94 45 return 1
Chris@72 46
Chris@85 47 def timestamp_features(sample_rate, step_size, output_desc, features):
Chris@85 48 n = -1
Chris@111 49 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
Chris@85 50 for f in features:
Chris@85 51 n = n + 1
Chris@85 52 t = vampyhost.frame_to_realtime(n * step_size, sample_rate)
Chris@85 53 f["timestamp"] = t
Chris@85 54 yield f
Chris@111 55 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@111 56 output_rate = output_desc["sampleRate"]
Chris@85 57 for f in features:
Chris@149 58 if "timestamp" in f:
Chris@85 59 n = int(f["timestamp"].to_float() * output_rate + 0.5)
Chris@85 60 else:
Chris@85 61 n = n + 1
Chris@85 62 f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate)
Chris@85 63 yield f
Chris@85 64 else:
Chris@85 65 for f in features:
Chris@85 66 yield f
Chris@72 67
Chris@93 68 def fill_timestamps(results, sample_rate, step_size, output_desc):
Chris@93 69
Chris@93 70 output = output_desc["identifier"]
Chris@93 71
Chris@145 72 selected = ( r[output] for r in results )
Chris@93 73
Chris@93 74 stamped = timestamp_features(sample_rate, step_size, output_desc, selected)
Chris@93 75
Chris@93 76 for s in stamped:
Chris@93 77 yield s
Chris@93 78
Chris@93 79 def deduce_shape(output_desc):
Chris@111 80 if output_desc["hasDuration"]:
Chris@117 81 return "list"
Chris@111 82 if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE:
Chris@117 83 return "list"
Chris@111 84 if not output_desc["hasFixedBinCount"]:
Chris@117 85 return "list"
Chris@111 86 if output_desc["binCount"] == 0:
Chris@117 87 return "list"
Chris@111 88 if output_desc["binCount"] == 1:
Chris@94 89 return "vector"
Chris@94 90 return "matrix"
Chris@93 91
Chris@150 92 def populate_reshaped_vector(results, out_step, output_desc, return_dict):
Chris@149 93
Chris@149 94 output = output_desc["identifier"]
Chris@149 95 tracks = []
Chris@150 96 whole = []
Chris@149 97 current_track = []
Chris@149 98 current_start_time = 0
Chris@149 99 out_step_secs = out_step.to_float()
Chris@149 100
Chris@149 101 n = -1
Chris@149 102
Chris@149 103 for r in results:
Chris@149 104 f = r[output]
Chris@149 105 n = n + 1
Chris@150 106 whole.append(f["values"][0])
Chris@149 107 if output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@149 108 if "timestamp" in f:
Chris@149 109 m = int(round(f["timestamp"].to_float() / out_step_secs))
Chris@149 110 if m != n:
Chris@149 111 if current_track != []:
Chris@149 112 tracks.append({ "start": current_start_time,
Chris@149 113 "step": out_step,
Chris@149 114 "values": np.array(current_track, np.float32) })
Chris@149 115 current_track = []
Chris@149 116 n = m
Chris@149 117 current_start_time = vampyhost.RealTime('seconds', m * out_step_secs)
Chris@149 118 current_track.append(f["values"][0])
Chris@149 119
Chris@149 120 if tracks != []:
Chris@149 121 if current_track != []:
Chris@149 122 tracks.append({ "start": current_start_time,
Chris@149 123 "step": out_step,
Chris@149 124 "values": np.array(current_track, np.float32) })
Chris@150 125 return_dict["tracks"] = tracks
Chris@149 126
Chris@150 127 return_dict["vector"] = (out_step, whole)
Chris@150 128
Chris@150 129 def populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict):
Chris@71 130
Chris@95 131 output = output_desc["identifier"]
Chris@94 132 out_step = get_feature_step_time(sample_rate, step_size, output_desc)
Chris@149 133 adjusted_shape = shape
Chris@89 134
Chris@93 135 if shape == "vector":
Chris@150 136 populate_reshaped_vector(results, out_step, output_desc, return_dict)
Chris@93 137 elif shape == "matrix":
Chris@97 138 #!!! todo: check that each feature has the right number of bins?
Chris@97 139 outseq = [r[output]["values"] for r in results]
Chris@150 140 return_dict[shape] = (out_step, np.array(outseq, np.float32))
Chris@93 141 else:
Chris@150 142 return_dict[shape] = list(fill_timestamps(results, sample_rate, step_size, output_desc))
Chris@95 143
Chris@140 144 def collect(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs):
Chris@99 145 """Process audio data with a Vamp plugin, and make the results from a
Chris@149 146
Chris@99 147 single plugin output available as a single structure.
Chris@95 148
Chris@99 149 The provided data should be a 1- or 2-dimensional list or NumPy
Chris@99 150 array of floats. If it is 2-dimensional, the first dimension is
Chris@99 151 taken to be the channel count.
Chris@99 152
Chris@99 153 The returned results will be those calculated by the plugin with
Chris@99 154 the given key and returned through its output with the given
Chris@99 155 output identifier. If the requested output is the empty string,
Chris@99 156 the first output provided by the plugin will be used.
Chris@99 157
Chris@99 158 If the parameters dict is non-empty, the plugin will be configured
Chris@99 159 by setting its parameters according to the (string) key and
Chris@99 160 (float) value data found in the dict.
Chris@99 161
Chris@151 162 The results are returned in a dictionary. This will always contain
Chris@151 163 exactly one of the keys "vector", "matrix", or "list". In addition
Chris@151 164 it may optionally contain the key "tracks". Which of these is used
Chris@151 165 depends on the structure of features set out in the output
Chris@151 166 descriptor for the requested plugin output, and sometimes on the
Chris@151 167 features themselves, as follows:
Chris@99 168
Chris@151 169 * If the plugin output emits single-valued features at a fixed
Chris@151 170 sample-rate, then the "vector" element will be used. It will contain
Chris@151 171 a tuple of step time (the time in seconds between consecutive
Chris@151 172 feature values) and a one-dimensional NumPy array of feature
Chris@151 173 values. An example of such a feature might be a loudness curve
Chris@151 174 against time.
Chris@149 175
Chris@151 176 * If the above is true but it also happens that the plugin output
Chris@151 177 has gaps between some features, so that a single continuous vector
Chris@151 178 can't convey all the relevant information, then the "tracks" element
Chris@151 179 will additionally be used. It will contain a list of dictionaries,
Chris@151 180 one for each set of contiguous points in the output, each containing
Chris@151 181 elements "start" (start time in seconds), "step" (step time in
Chris@151 182 seconds), and "values" (a one-dimensional NumPy array of contiguous
Chris@151 183 feature values). An example of such a feature might be the output of
Chris@151 184 a pitch tracker that emits values only during pitched sections of
Chris@151 185 the input audio.
Chris@99 186
Chris@99 187 * If the plugin output emits multiple-valued features, with an
Chris@143 188 equal number of bins per feature, at a fixed sample-rate, then
Chris@143 189 the "matrix" element will be used. It will contain a tuple of
Chris@143 190 step time (the time in seconds between consecutive feature
Chris@143 191 values) and a two-dimensional NumPy array of feature values. An
Chris@143 192 example of such a feature might be a spectrogram.
Chris@99 193
Chris@117 194 * Otherwise, the "list" element will be used, and will contain a
Chris@143 195 list of features, where each feature is represented as a
Chris@143 196 dictionary containing a timestamp (always) and a duration
Chris@143 197 (optionally), a label (string), and a 1-dimensional array of
Chris@143 198 float values.
Chris@143 199
Chris@143 200 If you wish to override the processing step size, block size, or
Chris@143 201 process timestamp method, you may supply them as keyword arguments
Chris@143 202 with the keywords step_size (int), block_size (int), and
Chris@143 203 process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA,
Chris@143 204 vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT).
Chris@99 205
Chris@99 206 If you would prefer to obtain features as they are calculated
Chris@99 207 (where the plugin supports this) and with the format in which the
Chris@99 208 plugin returns them, via an asynchronous generator function, use
Chris@99 209 vamp.process() instead.
Chris@99 210 """
Chris@100 211
Chris@140 212 plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs)
Chris@95 213
Chris@95 214 if output == "":
Chris@95 215 output_desc = plugin.get_output(0)
Chris@95 216 output = output_desc["identifier"]
Chris@95 217 else:
Chris@95 218 output_desc = plugin.get_output(output)
Chris@95 219
Chris@112 220 ff = vamp.frames.frames_from_array(data, step_size, block_size)
Chris@95 221
Chris@112 222 results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output])
Chris@95 223
Chris@117 224 shape = deduce_shape(output_desc)
Chris@95 225
Chris@150 226 return_dict = {}
Chris@150 227 populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict)
Chris@150 228
Chris@150 229 print("return_dict now = " + str(return_dict))
Chris@89 230 plugin.unload()
Chris@150 231 return return_dict