annotate vamp/collect.py @ 117:2370b942cd32

Docs, make collect() return a dict so it can be more easily tested for shape, and rationalise some function naming etc
author Chris Cannam
date Wed, 17 Jun 2015 15:31:16 +0100
parents 9343eee50605
children b8fe675f9c3f
rev   line source
Chris@117 1 #!/usr/bin/env python
Chris@117 2
Chris@117 3 # Python Vamp Host
Chris@117 4 # Copyright (c) 2008-2015 Queen Mary, University of London
Chris@117 5 #
Chris@117 6 # Permission is hereby granted, free of charge, to any person
Chris@117 7 # obtaining a copy of this software and associated documentation
Chris@117 8 # files (the "Software"), to deal in the Software without
Chris@117 9 # restriction, including without limitation the rights to use, copy,
Chris@117 10 # modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@117 11 # of the Software, and to permit persons to whom the Software is
Chris@117 12 # furnished to do so, subject to the following conditions:
Chris@117 13 #
Chris@117 14 # The above copyright notice and this permission notice shall be
Chris@117 15 # included in all copies or substantial portions of the Software.
Chris@117 16 #
Chris@117 17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@117 18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@117 19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@117 20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
Chris@117 21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@117 22 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@117 23 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@117 24 #
Chris@117 25 # Except as contained in this notice, the names of the Centre for
Chris@117 26 # Digital Music and Queen Mary, University of London shall not be
Chris@117 27 # used in advertising or otherwise to promote the sale, use or other
Chris@117 28 # dealings in this Software without prior written authorization.
Chris@117 29
Chris@56 30 '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.'''
Chris@56 31
Chris@56 32 import vampyhost
Chris@112 33 import vamp.load
Chris@112 34 import vamp.process
Chris@112 35 import vamp.frames
Chris@89 36
Chris@93 37 import numpy as np
Chris@93 38
Chris@94 39 def get_feature_step_time(sample_rate, step_size, output_desc):
Chris@111 40 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
Chris@94 41 return vampyhost.frame_to_realtime(step_size, sample_rate)
Chris@111 42 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@111 43 return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"])
Chris@94 44 else:
Chris@94 45 return 1
Chris@72 46
Chris@85 47 def timestamp_features(sample_rate, step_size, output_desc, features):
Chris@85 48 n = -1
Chris@111 49 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
Chris@85 50 for f in features:
Chris@85 51 n = n + 1
Chris@85 52 t = vampyhost.frame_to_realtime(n * step_size, sample_rate)
Chris@85 53 f["timestamp"] = t
Chris@85 54 yield f
Chris@111 55 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@111 56 output_rate = output_desc["sampleRate"]
Chris@85 57 for f in features:
Chris@85 58 if "has_timestamp" in f:
Chris@85 59 n = int(f["timestamp"].to_float() * output_rate + 0.5)
Chris@85 60 else:
Chris@85 61 n = n + 1
Chris@85 62 f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate)
Chris@85 63 yield f
Chris@85 64 else:
Chris@85 65 for f in features:
Chris@85 66 yield f
Chris@72 67
Chris@93 68 def fill_timestamps(results, sample_rate, step_size, output_desc):
Chris@93 69
Chris@93 70 output = output_desc["identifier"]
Chris@93 71
Chris@93 72 selected = [ r[output] for r in results ]
Chris@93 73
Chris@93 74 stamped = timestamp_features(sample_rate, step_size, output_desc, selected)
Chris@93 75
Chris@93 76 for s in stamped:
Chris@93 77 yield s
Chris@93 78
Chris@93 79 def deduce_shape(output_desc):
Chris@111 80 if output_desc["hasDuration"]:
Chris@117 81 return "list"
Chris@111 82 if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE:
Chris@117 83 return "list"
Chris@111 84 if not output_desc["hasFixedBinCount"]:
Chris@117 85 return "list"
Chris@111 86 if output_desc["binCount"] == 0:
Chris@117 87 return "list"
Chris@111 88 if output_desc["binCount"] == 1:
Chris@94 89 return "vector"
Chris@94 90 return "matrix"
Chris@93 91
Chris@88 92
Chris@117 93 def reshape(results, sample_rate, step_size, output_desc, shape):
Chris@71 94
Chris@95 95 output = output_desc["identifier"]
Chris@94 96 out_step = get_feature_step_time(sample_rate, step_size, output_desc)
Chris@89 97
Chris@93 98 if shape == "vector":
Chris@94 99 rv = ( out_step,
Chris@96 100 np.array([r[output]["values"][0] for r in results], np.float32) )
Chris@93 101 elif shape == "matrix":
Chris@97 102 #!!! todo: check that each feature has the right number of bins?
Chris@97 103 outseq = [r[output]["values"] for r in results]
Chris@97 104 rv = ( out_step, np.array(outseq, np.float32) )
Chris@93 105 else:
Chris@93 106 rv = list(fill_timestamps(results, sample_rate, step_size, output_desc))
Chris@89 107
Chris@95 108 return rv
Chris@95 109
Chris@95 110
Chris@96 111 def collect(data, sample_rate, key, output = "", parameters = {}):
Chris@99 112 """Process audio data with a Vamp plugin, and make the results from a
Chris@99 113 single plugin output available as a single structure.
Chris@95 114
Chris@99 115 The provided data should be a 1- or 2-dimensional list or NumPy
Chris@99 116 array of floats. If it is 2-dimensional, the first dimension is
Chris@99 117 taken to be the channel count.
Chris@99 118
Chris@99 119 The returned results will be those calculated by the plugin with
Chris@99 120 the given key and returned through its output with the given
Chris@99 121 output identifier. If the requested output is the empty string,
Chris@99 122 the first output provided by the plugin will be used.
Chris@99 123
Chris@99 124 If the parameters dict is non-empty, the plugin will be configured
Chris@99 125 by setting its parameters according to the (string) key and
Chris@99 126 (float) value data found in the dict.
Chris@99 127
Chris@117 128 The results are returned in a dictionary which will always contain
Chris@117 129 exactly one element, whose key is one of the strings "vector",
Chris@117 130 "matrix", or "list". Which one is used depends on the structure of
Chris@117 131 features set out in the output descriptor for the requested plugin
Chris@117 132 output:
Chris@99 133
Chris@99 134 * If the plugin output emits single-valued features at a fixed
Chris@117 135 sample-rate, then the "vector" element will be used. It will
Chris@117 136 contain a tuple of step time (the time in seconds between
Chris@117 137 consecutive feature values) and a one-dimensional NumPy array of
Chris@117 138 feature values. An example of such a feature might be a loudness
Chris@117 139 curve against time.
Chris@99 140
Chris@99 141 * If the plugin output emits multiple-valued features, with an
Chris@99 142 equal number of bins per feature, at a fixed sample-rate, then
Chris@117 143 the "matrix" element will be used. It will contain a tuple of
Chris@117 144 step time (the time in seconds between consecutive feature
Chris@117 145 values) and a two-dimensional NumPy array of feature values. An
Chris@117 146 example of such a feature might be a spectrogram.
Chris@99 147
Chris@117 148 * Otherwise, the "list" element will be used, and will contain a
Chris@117 149 list of features, where each feature is represented as a
Chris@117 150 dictionary containing a timestamp (always) and a duration
Chris@117 151 (optionally), a label (string), and a 1-dimensional array of
Chris@117 152 float values.
Chris@99 153
Chris@99 154 If you would prefer to obtain features as they are calculated
Chris@99 155 (where the plugin supports this) and with the format in which the
Chris@99 156 plugin returns them, via an asynchronous generator function, use
Chris@99 157 vamp.process() instead.
Chris@117 158
Chris@99 159 """
Chris@100 160
Chris@112 161 plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, key, parameters)
Chris@95 162
Chris@95 163 if output == "":
Chris@95 164 output_desc = plugin.get_output(0)
Chris@95 165 output = output_desc["identifier"]
Chris@95 166 else:
Chris@95 167 output_desc = plugin.get_output(output)
Chris@95 168
Chris@112 169 ff = vamp.frames.frames_from_array(data, step_size, block_size)
Chris@95 170
Chris@112 171 results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output])
Chris@95 172
Chris@117 173 shape = deduce_shape(output_desc)
Chris@117 174 rv = reshape(results, sample_rate, step_size, output_desc, shape)
Chris@95 175
Chris@89 176 plugin.unload()
Chris@117 177 return { shape : rv }
Chris@93 178