Chris@117
|
1 #!/usr/bin/env python
|
Chris@117
|
2
|
Chris@117
|
3 # Python Vamp Host
|
Chris@117
|
4 # Copyright (c) 2008-2015 Queen Mary, University of London
|
Chris@117
|
5 #
|
Chris@117
|
6 # Permission is hereby granted, free of charge, to any person
|
Chris@117
|
7 # obtaining a copy of this software and associated documentation
|
Chris@117
|
8 # files (the "Software"), to deal in the Software without
|
Chris@117
|
9 # restriction, including without limitation the rights to use, copy,
|
Chris@117
|
10 # modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@117
|
11 # of the Software, and to permit persons to whom the Software is
|
Chris@117
|
12 # furnished to do so, subject to the following conditions:
|
Chris@117
|
13 #
|
Chris@117
|
14 # The above copyright notice and this permission notice shall be
|
Chris@117
|
15 # included in all copies or substantial portions of the Software.
|
Chris@117
|
16 #
|
Chris@117
|
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@117
|
18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@117
|
19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@117
|
20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
Chris@117
|
21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@117
|
22 # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@117
|
23 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@117
|
24 #
|
Chris@117
|
25 # Except as contained in this notice, the names of the Centre for
|
Chris@117
|
26 # Digital Music and Queen Mary, University of London shall not be
|
Chris@117
|
27 # used in advertising or otherwise to promote the sale, use or other
|
Chris@117
|
28 # dealings in this Software without prior written authorization.
|
Chris@117
|
29
|
Chris@56
|
30 '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.'''
|
Chris@56
|
31
|
Chris@56
|
32 import vampyhost
|
Chris@112
|
33 import vamp.frames
|
Chris@112
|
34 import vamp.load
|
Chris@68
|
35
|
Chris@100
|
36 def process_with_initialised_plugin(ff, sample_rate, step_size, plugin, outputs):
|
Chris@89
|
37
|
Chris@98
|
38 out_indices = dict([(id, plugin.get_output(id)["output_index"])
|
Chris@98
|
39 for id in outputs])
|
Chris@89
|
40 plugin.reset()
|
Chris@89
|
41 fi = 0
|
Chris@89
|
42
|
Chris@89
|
43 for f in ff:
|
Chris@89
|
44 timestamp = vampyhost.frame_to_realtime(fi, sample_rate)
|
Chris@89
|
45 results = plugin.process_block(f, timestamp)
|
Chris@89
|
46 # results is a dict mapping output number -> list of feature dicts
|
Chris@89
|
47 for o in outputs:
|
Chris@89
|
48 ix = out_indices[o]
|
Chris@89
|
49 if ix in results:
|
Chris@89
|
50 for r in results[ix]:
|
Chris@89
|
51 yield { o: r }
|
Chris@89
|
52 fi = fi + step_size
|
Chris@89
|
53
|
Chris@89
|
54 results = plugin.get_remaining_features()
|
Chris@89
|
55 for o in outputs:
|
Chris@89
|
56 ix = out_indices[o]
|
Chris@89
|
57 if ix in results:
|
Chris@89
|
58 for r in results[ix]:
|
Chris@89
|
59 yield { o: r }
|
Chris@89
|
60
|
Chris@89
|
61
|
Chris@140
|
62 def process_audio(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs):
|
Chris@98
|
63 """Process audio data with a Vamp plugin, and make the results from a
|
Chris@98
|
64 single plugin output available as a generator.
|
Chris@98
|
65
|
Chris@98
|
66 The provided data should be a 1- or 2-dimensional list or NumPy
|
Chris@98
|
67 array of floats. If it is 2-dimensional, the first dimension is
|
Chris@98
|
68 taken to be the channel count.
|
Chris@98
|
69
|
Chris@98
|
70 The returned results will be those calculated by the plugin with
|
Chris@98
|
71 the given key and returned through its output with the given
|
Chris@98
|
72 output identifier. If the requested output is the empty string,
|
Chris@98
|
73 the first output provided by the plugin will be used.
|
Chris@98
|
74
|
Chris@98
|
75 If the parameters dict is non-empty, the plugin will be configured
|
Chris@98
|
76 by setting its parameters according to the (string) key and
|
Chris@98
|
77 (float) value data found in the dict.
|
Chris@98
|
78
|
Chris@98
|
79 This function acts as a generator, yielding a sequence of result
|
Chris@98
|
80 features as it obtains them. Each feature is represented as a
|
Chris@98
|
81 dictionary containing, optionally, timestamp and duration
|
Chris@98
|
82 (RealTime objects), label (string), and a 1-dimensional array of
|
Chris@98
|
83 float values.
|
Chris@99
|
84
|
Chris@143
|
85 If you wish to override the step size, block size, or process
|
Chris@143
|
86 timestamp method to be used, you may supply them as keyword
|
Chris@143
|
87 arguments with keywords step_size (int), block_size (int), and
|
Chris@143
|
88 process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA,
|
Chris@143
|
89 vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT).
|
Chris@143
|
90
|
Chris@99
|
91 If you would prefer to obtain all features in a single output
|
Chris@99
|
92 structure, consider using vamp.collect() instead.
|
Chris@98
|
93 """
|
Chris@89
|
94
|
Chris@140
|
95 plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs)
|
Chris@89
|
96
|
Chris@89
|
97 if output == "":
|
Chris@89
|
98 output = plugin.get_output(0)["identifier"]
|
Chris@89
|
99
|
Chris@112
|
100 ff = vamp.frames.frames_from_array(data, step_size, block_size)
|
Chris@89
|
101
|
Chris@100
|
102 for r in process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output]):
|
Chris@89
|
103 yield r[output]
|
Chris@76
|
104
|
Chris@89
|
105 plugin.unload()
|
Chris@89
|
106
|
Chris@76
|
107
|
Chris@129
|
108 def process_frames(ff, sample_rate, step_size, plugin_key, output = "", parameters = {}):
|
Chris@98
|
109 """Process audio data with a Vamp plugin, and make the results from a
|
Chris@98
|
110 single plugin output available as a generator.
|
Chris@95
|
111
|
Chris@98
|
112 The provided data should be an enumerable sequence of time-domain
|
Chris@98
|
113 audio frames, of which each frame is 2-dimensional list or NumPy
|
Chris@98
|
114 array of floats. The first dimension is taken to be the channel
|
Chris@98
|
115 count, and the second dimension the frame or block size. The
|
Chris@98
|
116 step_size argument gives the increment in audio samples from one
|
Chris@98
|
117 frame to the next. Each frame must have the same size.
|
Chris@95
|
118
|
Chris@98
|
119 The returned results will be those calculated by the plugin with
|
Chris@98
|
120 the given key and returned through its output with the given
|
Chris@98
|
121 output identifier. If the requested output is the empty string,
|
Chris@98
|
122 the first output provided by the plugin will be used.
|
Chris@95
|
123
|
Chris@98
|
124 If the parameters dict is non-empty, the plugin will be configured
|
Chris@98
|
125 by setting its parameters according to the (string) key and
|
Chris@98
|
126 (float) value data found in the dict.
|
Chris@98
|
127
|
Chris@98
|
128 This function acts as a generator, yielding a sequence of result
|
Chris@98
|
129 features as it obtains them. Each feature is represented as a
|
Chris@98
|
130 dictionary containing, optionally, timestamp and duration
|
Chris@98
|
131 (RealTime objects), label (string), and a 1-dimensional array of
|
Chris@98
|
132 float values.
|
Chris@99
|
133
|
Chris@99
|
134 If you would prefer to obtain all features in a single output
|
Chris@99
|
135 structure, consider using vamp.collect() instead.
|
Chris@98
|
136 """
|
Chris@129
|
137 plugin = vampyhost.load_plugin(plugin_key, sample_rate,
|
Chris@98
|
138 vampyhost.ADAPT_INPUT_DOMAIN +
|
Chris@98
|
139 vampyhost.ADAPT_BUFFER_SIZE +
|
Chris@98
|
140 vampyhost.ADAPT_CHANNEL_COUNT)
|
Chris@98
|
141
|
Chris@98
|
142 fi = 0
|
Chris@98
|
143 channels = 0
|
Chris@98
|
144 block_size = 0
|
Chris@98
|
145
|
Chris@98
|
146 if output == "":
|
Chris@98
|
147 out_index = 0
|
Chris@98
|
148 else:
|
Chris@98
|
149 out_index = plugin.get_output(output)["output_index"]
|
Chris@95
|
150
|
Chris@98
|
151 for f in ff:
|
Chris@95
|
152
|
Chris@98
|
153 if fi == 0:
|
Chris@98
|
154 channels = f.shape[0]
|
Chris@98
|
155 block_size = f.shape[1]
|
Chris@98
|
156 plugin.set_parameter_values(parameters)
|
Chris@98
|
157 if not plugin.initialise(channels, step_size, block_size):
|
Chris@98
|
158 raise "Failed to initialise plugin"
|
Chris@98
|
159
|
Chris@98
|
160 timestamp = vampyhost.frame_to_realtime(fi, sample_rate)
|
Chris@98
|
161 results = plugin.process_block(f, timestamp)
|
Chris@98
|
162 # results is a dict mapping output number -> list of feature dicts
|
Chris@98
|
163 if out_index in results:
|
Chris@98
|
164 for r in results[out_index]:
|
Chris@98
|
165 yield r
|
Chris@98
|
166
|
Chris@98
|
167 fi = fi + step_size
|
Chris@98
|
168
|
Chris@98
|
169 if fi > 0:
|
Chris@98
|
170 results = plugin.get_remaining_features()
|
Chris@98
|
171 if out_index in results:
|
Chris@98
|
172 for r in results[out_index]:
|
Chris@98
|
173 yield r
|
Chris@98
|
174
|
Chris@95
|
175 plugin.unload()
|
Chris@95
|
176
|
Chris@95
|
177
|
Chris@140
|
178 def process_audio_multiple_outputs(data, sample_rate, plugin_key, outputs, parameters = {}, **kwargs):
|
Chris@99
|
179 """Process audio data with a Vamp plugin, and make the results from a
|
Chris@99
|
180 set of plugin outputs available as a generator.
|
Chris@99
|
181
|
Chris@99
|
182 The provided data should be a 1- or 2-dimensional list or NumPy
|
Chris@99
|
183 array of floats. If it is 2-dimensional, the first dimension is
|
Chris@99
|
184 taken to be the channel count.
|
Chris@99
|
185
|
Chris@99
|
186 The returned results will be those calculated by the plugin with
|
Chris@99
|
187 the given key and returned through its outputs whose identifiers
|
Chris@99
|
188 are given in the outputs argument.
|
Chris@99
|
189
|
Chris@99
|
190 If the parameters dict is non-empty, the plugin will be configured
|
Chris@99
|
191 by setting its parameters according to the (string) key and
|
Chris@99
|
192 (float) value data found in the dict.
|
Chris@99
|
193
|
Chris@99
|
194 This function acts as a generator, yielding a sequence of result
|
Chris@99
|
195 feature sets as it obtains them. Each feature set is a dictionary
|
Chris@99
|
196 mapping from output identifier to a list of features, each
|
Chris@99
|
197 represented as a dictionary containing, optionally, timestamp and
|
Chris@99
|
198 duration (RealTime objects), label (string), and a 1-dimensional
|
Chris@99
|
199 array of float values.
|
Chris@143
|
200
|
Chris@143
|
201 If you wish to override the step size, block size, or process
|
Chris@143
|
202 timestamp method to be used, you may supply them as keyword
|
Chris@143
|
203 arguments with keywords step_size (int), block_size (int), and
|
Chris@143
|
204 process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA,
|
Chris@143
|
205 vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT).
|
Chris@99
|
206 """
|
Chris@68
|
207
|
Chris@140
|
208 plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs)
|
Chris@64
|
209
|
Chris@112
|
210 ff = vamp.frames.frames_from_array(data, step_size, block_size)
|
Chris@64
|
211
|
Chris@100
|
212 for r in process_with_initialised_plugin(ff, sample_rate, step_size, plugin, outputs):
|
Chris@89
|
213 yield r
|
Chris@61
|
214
|
Chris@89
|
215 plugin.unload()
|
Chris@99
|
216
|
Chris@100
|
217
|
Chris@129
|
218 def process_frames_multiple_outputs(ff, sample_rate, step_size, plugin_key, outputs, parameters = {}):
|
Chris@100
|
219 """Process audio data with a Vamp plugin, and make the results from a
|
Chris@100
|
220 set of plugin outputs available as a generator.
|
Chris@100
|
221
|
Chris@100
|
222 The provided data should be an enumerable sequence of time-domain
|
Chris@100
|
223 audio frames, of which each frame is 2-dimensional list or NumPy
|
Chris@100
|
224 array of floats. The first dimension is taken to be the channel
|
Chris@100
|
225 count, and the second dimension the frame or block size. The
|
Chris@100
|
226 step_size argument gives the increment in audio samples from one
|
Chris@100
|
227 frame to the next. Each frame must have the same size.
|
Chris@100
|
228
|
Chris@100
|
229 The returned results will be those calculated by the plugin with
|
Chris@100
|
230 the given key and returned through its outputs whose identifiers
|
Chris@100
|
231 are given in the outputs argument.
|
Chris@100
|
232
|
Chris@100
|
233 If the parameters dict is non-empty, the plugin will be configured
|
Chris@100
|
234 by setting its parameters according to the (string) key and
|
Chris@100
|
235 (float) value data found in the dict.
|
Chris@100
|
236
|
Chris@100
|
237 This function acts as a generator, yielding a sequence of result
|
Chris@100
|
238 feature sets as it obtains them. Each feature set is a dictionary
|
Chris@100
|
239 mapping from output identifier to a list of features, each
|
Chris@100
|
240 represented as a dictionary containing, optionally, timestamp and
|
Chris@100
|
241 duration (RealTime objects), label (string), and a 1-dimensional
|
Chris@100
|
242 array of float values.
|
Chris@100
|
243 """
|
Chris@129
|
244 plugin = vampyhost.load_plugin(plugin_key, sample_rate,
|
Chris@100
|
245 vampyhost.ADAPT_INPUT_DOMAIN +
|
Chris@100
|
246 vampyhost.ADAPT_BUFFER_SIZE +
|
Chris@100
|
247 vampyhost.ADAPT_CHANNEL_COUNT)
|
Chris@100
|
248
|
Chris@100
|
249 out_indices = dict([(id, plugin.get_output(id)["output_index"])
|
Chris@100
|
250 for id in outputs])
|
Chris@100
|
251
|
Chris@100
|
252 fi = 0
|
Chris@100
|
253 channels = 0
|
Chris@100
|
254 block_size = 0
|
Chris@100
|
255
|
Chris@100
|
256 for f in ff:
|
Chris@100
|
257
|
Chris@100
|
258 if fi == 0:
|
Chris@100
|
259 channels = f.shape[0]
|
Chris@100
|
260 block_size = f.shape[1]
|
Chris@100
|
261 plugin.set_parameter_values(parameters)
|
Chris@100
|
262 if not plugin.initialise(channels, step_size, block_size):
|
Chris@100
|
263 raise "Failed to initialise plugin"
|
Chris@100
|
264
|
Chris@100
|
265 timestamp = vampyhost.frame_to_realtime(fi, sample_rate)
|
Chris@100
|
266 results = plugin.process_block(f, timestamp)
|
Chris@100
|
267 # results is a dict mapping output number -> list of feature dicts
|
Chris@100
|
268 for o in outputs:
|
Chris@100
|
269 ix = out_indices[o]
|
Chris@100
|
270 if ix in results:
|
Chris@100
|
271 for r in results[ix]:
|
Chris@100
|
272 yield { o: r }
|
Chris@100
|
273 fi = fi + step_size
|
Chris@100
|
274
|
Chris@100
|
275 if fi > 0:
|
Chris@100
|
276 results = plugin.get_remaining_features()
|
Chris@100
|
277 for o in outputs:
|
Chris@100
|
278 ix = out_indices[o]
|
Chris@100
|
279 if ix in results:
|
Chris@100
|
280 for r in results[ix]:
|
Chris@100
|
281 yield { o: r }
|
Chris@100
|
282
|
Chris@100
|
283 plugin.unload()
|
Chris@100
|
284
|
Chris@100
|
285
|