Chris@56
|
1 '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.'''
|
Chris@56
|
2
|
Chris@56
|
3 import vampyhost
|
Chris@75
|
4 import load
|
Chris@88
|
5 import process
|
Chris@89
|
6 import frames
|
Chris@89
|
7
|
Chris@93
|
8 import numpy as np
|
Chris@93
|
9
|
Chris@94
|
10 def get_feature_step_time(sample_rate, step_size, output_desc):
|
Chris@111
|
11 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
|
Chris@94
|
12 return vampyhost.frame_to_realtime(step_size, sample_rate)
|
Chris@111
|
13 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
|
Chris@111
|
14 return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"])
|
Chris@94
|
15 else:
|
Chris@94
|
16 return 1
|
Chris@72
|
17
|
Chris@85
|
18 def timestamp_features(sample_rate, step_size, output_desc, features):
|
Chris@85
|
19 n = -1
|
Chris@111
|
20 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
|
Chris@85
|
21 for f in features:
|
Chris@85
|
22 n = n + 1
|
Chris@85
|
23 t = vampyhost.frame_to_realtime(n * step_size, sample_rate)
|
Chris@85
|
24 f["timestamp"] = t
|
Chris@85
|
25 yield f
|
Chris@111
|
26 elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
|
Chris@111
|
27 output_rate = output_desc["sampleRate"]
|
Chris@85
|
28 for f in features:
|
Chris@85
|
29 if "has_timestamp" in f:
|
Chris@85
|
30 n = int(f["timestamp"].to_float() * output_rate + 0.5)
|
Chris@85
|
31 else:
|
Chris@85
|
32 n = n + 1
|
Chris@85
|
33 f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate)
|
Chris@85
|
34 yield f
|
Chris@85
|
35 else:
|
Chris@85
|
36 for f in features:
|
Chris@85
|
37 yield f
|
Chris@72
|
38
|
Chris@93
|
39 def fill_timestamps(results, sample_rate, step_size, output_desc):
|
Chris@93
|
40
|
Chris@93
|
41 output = output_desc["identifier"]
|
Chris@93
|
42
|
Chris@93
|
43 selected = [ r[output] for r in results ]
|
Chris@93
|
44
|
Chris@93
|
45 stamped = timestamp_features(sample_rate, step_size, output_desc, selected)
|
Chris@93
|
46
|
Chris@93
|
47 for s in stamped:
|
Chris@93
|
48 yield s
|
Chris@93
|
49
|
Chris@93
|
50 def deduce_shape(output_desc):
|
Chris@111
|
51 if output_desc["hasDuration"]:
|
Chris@93
|
52 return "individual"
|
Chris@111
|
53 if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE:
|
Chris@93
|
54 return "individual"
|
Chris@111
|
55 if not output_desc["hasFixedBinCount"]:
|
Chris@93
|
56 return "individual"
|
Chris@111
|
57 if output_desc["binCount"] == 0:
|
Chris@93
|
58 return "individual"
|
Chris@111
|
59 if output_desc["binCount"] == 1:
|
Chris@94
|
60 return "vector"
|
Chris@94
|
61 return "matrix"
|
Chris@93
|
62
|
Chris@88
|
63
|
Chris@95
|
64 def reshape(results, sample_rate, step_size, output_desc):
|
Chris@71
|
65
|
Chris@95
|
66 output = output_desc["identifier"]
|
Chris@93
|
67 shape = deduce_shape(output_desc)
|
Chris@94
|
68 out_step = get_feature_step_time(sample_rate, step_size, output_desc)
|
Chris@89
|
69
|
Chris@93
|
70 if shape == "vector":
|
Chris@94
|
71 rv = ( out_step,
|
Chris@96
|
72 np.array([r[output]["values"][0] for r in results], np.float32) )
|
Chris@93
|
73 elif shape == "matrix":
|
Chris@97
|
74 #!!! todo: check that each feature has the right number of bins?
|
Chris@97
|
75 outseq = [r[output]["values"] for r in results]
|
Chris@97
|
76 rv = ( out_step, np.array(outseq, np.float32) )
|
Chris@93
|
77 else:
|
Chris@93
|
78 rv = list(fill_timestamps(results, sample_rate, step_size, output_desc))
|
Chris@89
|
79
|
Chris@95
|
80 return rv
|
Chris@95
|
81
|
Chris@95
|
82
|
Chris@96
|
83 def collect(data, sample_rate, key, output = "", parameters = {}):
|
Chris@99
|
84 """Process audio data with a Vamp plugin, and make the results from a
|
Chris@99
|
85 single plugin output available as a single structure.
|
Chris@95
|
86
|
Chris@99
|
87 The provided data should be a 1- or 2-dimensional list or NumPy
|
Chris@99
|
88 array of floats. If it is 2-dimensional, the first dimension is
|
Chris@99
|
89 taken to be the channel count.
|
Chris@99
|
90
|
Chris@99
|
91 The returned results will be those calculated by the plugin with
|
Chris@99
|
92 the given key and returned through its output with the given
|
Chris@99
|
93 output identifier. If the requested output is the empty string,
|
Chris@99
|
94 the first output provided by the plugin will be used.
|
Chris@99
|
95
|
Chris@99
|
96 If the parameters dict is non-empty, the plugin will be configured
|
Chris@99
|
97 by setting its parameters according to the (string) key and
|
Chris@99
|
98 (float) value data found in the dict.
|
Chris@99
|
99
|
Chris@99
|
100 The structure in which the results are returned depends upon the
|
Chris@99
|
101 output descriptor for the requested plugin output, as follows:
|
Chris@99
|
102
|
Chris@99
|
103 * If the plugin output emits single-valued features at a fixed
|
Chris@99
|
104 sample-rate, then this function will return a tuple of step time
|
Chris@99
|
105 (the time in seconds between consecutive feature values) and a
|
Chris@99
|
106 one-dimensional NumPy array of feature values. An example of
|
Chris@99
|
107 such a feature might be a loudness curve against time.
|
Chris@99
|
108
|
Chris@99
|
109 * If the plugin output emits multiple-valued features, with an
|
Chris@99
|
110 equal number of bins per feature, at a fixed sample-rate, then
|
Chris@99
|
111 this function will return a tuple of step time (the time in
|
Chris@99
|
112 seconds between consecutive feature values) and a
|
Chris@99
|
113 two-dimensional NumPy array of feature values. An example of
|
Chris@99
|
114 such a feature might be a spectrogram.
|
Chris@99
|
115
|
Chris@99
|
116 * Otherwise this function will return a list of features, where
|
Chris@99
|
117 each feature is represented as a dictionary containing a
|
Chris@99
|
118 timestamp (always) and a duration (optionally), a label
|
Chris@99
|
119 (string), and a 1-dimensional array of float values.
|
Chris@99
|
120
|
Chris@99
|
121 If you would prefer to obtain features as they are calculated
|
Chris@99
|
122 (where the plugin supports this) and with the format in which the
|
Chris@99
|
123 plugin returns them, via an asynchronous generator function, use
|
Chris@99
|
124 vamp.process() instead.
|
Chris@99
|
125 """
|
Chris@100
|
126
|
Chris@95
|
127 plugin, step_size, block_size = load.load_and_configure(data, sample_rate, key, parameters)
|
Chris@95
|
128
|
Chris@95
|
129 if output == "":
|
Chris@95
|
130 output_desc = plugin.get_output(0)
|
Chris@95
|
131 output = output_desc["identifier"]
|
Chris@95
|
132 else:
|
Chris@95
|
133 output_desc = plugin.get_output(output)
|
Chris@95
|
134
|
Chris@95
|
135 ff = frames.frames_from_array(data, step_size, block_size)
|
Chris@95
|
136
|
Chris@100
|
137 results = process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output])
|
Chris@95
|
138
|
Chris@95
|
139 rv = reshape(results, sample_rate, step_size, output_desc)
|
Chris@95
|
140
|
Chris@89
|
141 plugin.unload()
|
Chris@93
|
142 return rv
|
Chris@93
|
143
|