Chris@117
|
1 #!/usr/bin/env python
|
Chris@117
|
2
|
Chris@117
|
3 # Python Vamp Host
|
Chris@117
|
4 # Copyright (c) 2008-2015 Queen Mary, University of London
|
Chris@117
|
5 #
|
Chris@117
|
6 # Permission is hereby granted, free of charge, to any person
|
Chris@117
|
7 # obtaining a copy of this software and associated documentation
|
Chris@117
|
8 # files (the "Software"), to deal in the Software without
|
Chris@117
|
9 # restriction, including without limitation the rights to use, copy,
|
Chris@117
|
10 # modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@117
|
11 # of the Software, and to permit persons to whom the Software is
|
Chris@117
|
12 # furnished to do so, subject to the following conditions:
|
Chris@117
|
13 #
|
Chris@117
|
14 # The above copyright notice and this permission notice shall be
|
Chris@117
|
15 # included in all copies or substantial portions of the Software.
|
Chris@117
|
16 #
|
Chris@117
|
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@117
|
18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@117
|
19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@117
|
20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
Chris@117
|
21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@117
|
22 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@117
|
23 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@117
|
24 #
|
Chris@117
|
25 # Except as contained in this notice, the names of the Centre for
|
Chris@117
|
26 # Digital Music and Queen Mary, University of London shall not be
|
Chris@117
|
27 # used in advertising or otherwise to promote the sale, use or other
|
Chris@117
|
28 # dealings in this Software without prior written authorization.
|
Chris@117
|
29
|
Chris@56
|
30 '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.'''
|
Chris@56
|
31
|
Chris@56
|
32 import vampyhost
|
Chris@112
|
33 import vamp.load
|
Chris@112
|
34 import vamp.process
|
Chris@112
|
35 import vamp.frames
|
Chris@89
|
36
|
Chris@93
|
37 import numpy as np
|
Chris@93
|
38
|
Chris@94
|
39 def get_feature_step_time(sample_rate, step_size, output_desc):
|
Chris@111
|
40 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
|
Chris@94
|
41 return vampyhost.frame_to_realtime(step_size, sample_rate)
|
Chris@111
|
42 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
|
Chris@111
|
43 return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"])
|
Chris@94
|
44 else:
|
Chris@94
|
45 return 1
|
Chris@72
|
46
|
Chris@85
|
47 def timestamp_features(sample_rate, step_size, output_desc, features):
|
Chris@85
|
48 n = -1
|
Chris@111
|
49 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
|
Chris@85
|
50 for f in features:
|
Chris@85
|
51 n = n + 1
|
Chris@85
|
52 t = vampyhost.frame_to_realtime(n * step_size, sample_rate)
|
Chris@85
|
53 f["timestamp"] = t
|
Chris@85
|
54 yield f
|
Chris@111
|
55 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
|
Chris@111
|
56 output_rate = output_desc["sampleRate"]
|
Chris@85
|
57 for f in features:
|
Chris@149
|
58 if "timestamp" in f:
|
Chris@85
|
59 n = int(f["timestamp"].to_float() * output_rate + 0.5)
|
Chris@85
|
60 else:
|
Chris@85
|
61 n = n + 1
|
Chris@85
|
62 f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate)
|
Chris@85
|
63 yield f
|
Chris@85
|
64 else:
|
Chris@85
|
65 for f in features:
|
Chris@85
|
66 yield f
|
Chris@72
|
67
|
Chris@93
|
68 def fill_timestamps(results, sample_rate, step_size, output_desc):
|
Chris@93
|
69
|
Chris@93
|
70 output = output_desc["identifier"]
|
Chris@93
|
71
|
Chris@145
|
72 selected = ( r[output] for r in results )
|
Chris@93
|
73
|
Chris@93
|
74 stamped = timestamp_features(sample_rate, step_size, output_desc, selected)
|
Chris@93
|
75
|
Chris@93
|
76 for s in stamped:
|
Chris@93
|
77 yield s
|
Chris@93
|
78
|
Chris@93
|
79 def deduce_shape(output_desc):
|
Chris@111
|
80 if output_desc["hasDuration"]:
|
Chris@117
|
81 return "list"
|
Chris@111
|
82 if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE:
|
Chris@117
|
83 return "list"
|
Chris@111
|
84 if not output_desc["hasFixedBinCount"]:
|
Chris@117
|
85 return "list"
|
Chris@111
|
86 if output_desc["binCount"] == 0:
|
Chris@117
|
87 return "list"
|
Chris@111
|
88 if output_desc["binCount"] == 1:
|
Chris@94
|
89 return "vector"
|
Chris@94
|
90 return "matrix"
|
Chris@93
|
91
|
Chris@150
|
92 def populate_reshaped_vector(results, out_step, output_desc, return_dict):
|
Chris@149
|
93
|
Chris@149
|
94 output = output_desc["identifier"]
|
Chris@149
|
95 tracks = []
|
Chris@150
|
96 whole = []
|
Chris@149
|
97 current_track = []
|
Chris@149
|
98 current_start_time = 0
|
Chris@149
|
99 out_step_secs = out_step.to_float()
|
Chris@149
|
100
|
Chris@149
|
101 n = -1
|
Chris@149
|
102
|
Chris@149
|
103 for r in results:
|
Chris@149
|
104 f = r[output]
|
Chris@149
|
105 n = n + 1
|
Chris@150
|
106 whole.append(f["values"][0])
|
Chris@149
|
107 if output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
|
Chris@149
|
108 if "timestamp" in f:
|
Chris@149
|
109 m = int(round(f["timestamp"].to_float() / out_step_secs))
|
Chris@149
|
110 if m != n:
|
Chris@149
|
111 if current_track != []:
|
Chris@149
|
112 tracks.append({ "start": current_start_time,
|
Chris@149
|
113 "step": out_step,
|
Chris@149
|
114 "values": np.array(current_track, np.float32) })
|
Chris@149
|
115 current_track = []
|
Chris@149
|
116 n = m
|
Chris@149
|
117 current_start_time = vampyhost.RealTime('seconds', m * out_step_secs)
|
Chris@149
|
118 current_track.append(f["values"][0])
|
Chris@149
|
119
|
Chris@149
|
120 if tracks != []:
|
Chris@149
|
121 if current_track != []:
|
Chris@149
|
122 tracks.append({ "start": current_start_time,
|
Chris@149
|
123 "step": out_step,
|
Chris@149
|
124 "values": np.array(current_track, np.float32) })
|
Chris@150
|
125 return_dict["tracks"] = tracks
|
Chris@149
|
126
|
Chris@150
|
127 return_dict["vector"] = (out_step, whole)
|
Chris@150
|
128
|
Chris@150
|
129 def populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict):
|
Chris@71
|
130
|
Chris@95
|
131 output = output_desc["identifier"]
|
Chris@94
|
132 out_step = get_feature_step_time(sample_rate, step_size, output_desc)
|
Chris@149
|
133 adjusted_shape = shape
|
Chris@89
|
134
|
Chris@93
|
135 if shape == "vector":
|
Chris@150
|
136 populate_reshaped_vector(results, out_step, output_desc, return_dict)
|
Chris@93
|
137 elif shape == "matrix":
|
Chris@97
|
138 #!!! todo: check that each feature has the right number of bins?
|
Chris@97
|
139 outseq = [r[output]["values"] for r in results]
|
Chris@150
|
140 return_dict[shape] = (out_step, np.array(outseq, np.float32))
|
Chris@93
|
141 else:
|
Chris@150
|
142 return_dict[shape] = list(fill_timestamps(results, sample_rate, step_size, output_desc))
|
Chris@95
|
143
|
Chris@140
|
144 def collect(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs):
|
Chris@99
|
145 """Process audio data with a Vamp plugin, and make the results from a
|
Chris@149
|
146
|
Chris@99
|
147 single plugin output available as a single structure.
|
Chris@95
|
148
|
Chris@99
|
149 The provided data should be a 1- or 2-dimensional list or NumPy
|
Chris@99
|
150 array of floats. If it is 2-dimensional, the first dimension is
|
Chris@99
|
151 taken to be the channel count.
|
Chris@99
|
152
|
Chris@99
|
153 The returned results will be those calculated by the plugin with
|
Chris@99
|
154 the given key and returned through its output with the given
|
Chris@99
|
155 output identifier. If the requested output is the empty string,
|
Chris@99
|
156 the first output provided by the plugin will be used.
|
Chris@99
|
157
|
Chris@99
|
158 If the parameters dict is non-empty, the plugin will be configured
|
Chris@99
|
159 by setting its parameters according to the (string) key and
|
Chris@99
|
160 (float) value data found in the dict.
|
Chris@99
|
161
|
Chris@151
|
162 The results are returned in a dictionary. This will always contain
|
Chris@151
|
163 exactly one of the keys "vector", "matrix", or "list". In addition
|
Chris@151
|
164 it may optionally contain the key "tracks". Which of these is used
|
Chris@151
|
165 depends on the structure of features set out in the output
|
Chris@151
|
166 descriptor for the requested plugin output, and sometimes on the
|
Chris@151
|
167 features themselves, as follows:
|
Chris@99
|
168
|
Chris@151
|
169 * If the plugin output emits single-valued features at a fixed
|
Chris@151
|
170 sample-rate, then the "vector" element will be used. It will contain
|
Chris@151
|
171 a tuple of step time (the time in seconds between consecutive
|
Chris@151
|
172 feature values) and a one-dimensional NumPy array of feature
|
Chris@151
|
173 values. An example of such a feature might be a loudness curve
|
Chris@151
|
174 against time.
|
Chris@149
|
175
|
Chris@151
|
176 * If the above is true but it also happens that the plugin output
|
Chris@151
|
177 has gaps between some features, so that a single continuous vector
|
Chris@151
|
178 can't convey all the relevant information, then the "tracks" element
|
Chris@151
|
179 will additionally be used. It will contain a list of dictionaries,
|
Chris@151
|
180 one for each set of contiguous points in the output, each containing
|
Chris@151
|
181 elements "start" (start time in seconds), "step" (step time in
|
Chris@151
|
182 seconds), and "values" (a one-dimensional NumPy array of contiguous
|
Chris@151
|
183 feature values). An example of such a feature might be the output of
|
Chris@151
|
184 a pitch tracker that emits values only during pitched sections of
|
Chris@151
|
185 the input audio.
|
Chris@99
|
186
|
Chris@99
|
187 * If the plugin output emits multiple-valued features, with an
|
Chris@143
|
188 equal number of bins per feature, at a fixed sample-rate, then
|
Chris@143
|
189 the "matrix" element will be used. It will contain a tuple of
|
Chris@143
|
190 step time (the time in seconds between consecutive feature
|
Chris@143
|
191 values) and a two-dimensional NumPy array of feature values. An
|
Chris@143
|
192 example of such a feature might be a spectrogram.
|
Chris@99
|
193
|
Chris@117
|
194 * Otherwise, the "list" element will be used, and will contain a
|
Chris@143
|
195 list of features, where each feature is represented as a
|
Chris@143
|
196 dictionary containing a timestamp (always) and a duration
|
Chris@143
|
197 (optionally), a label (string), and a 1-dimensional array of
|
Chris@143
|
198 float values.
|
Chris@143
|
199
|
Chris@143
|
200 If you wish to override the processing step size, block size, or
|
Chris@143
|
201 process timestamp method, you may supply them as keyword arguments
|
Chris@143
|
202 with the keywords step_size (int), block_size (int), and
|
Chris@143
|
203 process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA,
|
Chris@143
|
204 vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT).
|
Chris@99
|
205
|
Chris@99
|
206 If you would prefer to obtain features as they are calculated
|
Chris@99
|
207 (where the plugin supports this) and with the format in which the
|
Chris@99
|
208 plugin returns them, via an asynchronous generator function, use
|
Chris@99
|
209 vamp.process() instead.
|
Chris@99
|
210 """
|
Chris@100
|
211
|
Chris@140
|
212 plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs)
|
Chris@95
|
213
|
Chris@95
|
214 if output == "":
|
Chris@95
|
215 output_desc = plugin.get_output(0)
|
Chris@95
|
216 output = output_desc["identifier"]
|
Chris@95
|
217 else:
|
Chris@95
|
218 output_desc = plugin.get_output(output)
|
Chris@95
|
219
|
Chris@112
|
220 ff = vamp.frames.frames_from_array(data, step_size, block_size)
|
Chris@95
|
221
|
Chris@112
|
222 results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output])
|
Chris@95
|
223
|
Chris@117
|
224 shape = deduce_shape(output_desc)
|
Chris@95
|
225
|
Chris@150
|
226 return_dict = {}
|
Chris@150
|
227 populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict)
|
Chris@150
|
228
|
Chris@150
|
229 print("return_dict now = " + str(return_dict))
|
Chris@89
|
230 plugin.unload()
|
Chris@150
|
231 return return_dict
|