Chris@117: #!/usr/bin/env python Chris@117: Chris@117: # Python Vamp Host Chris@117: # Copyright (c) 2008-2015 Queen Mary, University of London Chris@117: # Chris@117: # Permission is hereby granted, free of charge, to any person Chris@117: # obtaining a copy of this software and associated documentation Chris@117: # files (the "Software"), to deal in the Software without Chris@117: # restriction, including without limitation the rights to use, copy, Chris@117: # modify, merge, publish, distribute, sublicense, and/or sell copies Chris@117: # of the Software, and to permit persons to whom the Software is Chris@117: # furnished to do so, subject to the following conditions: Chris@117: # Chris@117: # The above copyright notice and this permission notice shall be Chris@117: # included in all copies or substantial portions of the Software. Chris@117: # Chris@117: # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@117: # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@117: # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@117: # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY Chris@117: # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@117: # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@117: # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@117: # Chris@117: # Except as contained in this notice, the names of the Centre for Chris@117: # Digital Music and Queen Mary, University of London shall not be Chris@117: # used in advertising or otherwise to promote the sale, use or other Chris@117: # dealings in this Software without prior written authorization. Chris@117: Chris@56: '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.''' Chris@56: Chris@56: import vampyhost Chris@112: import vamp.load Chris@112: import vamp.process Chris@112: import vamp.frames Chris@89: Chris@93: import numpy as np Chris@93: Chris@94: def get_feature_step_time(sample_rate, step_size, output_desc): Chris@111: if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP: Chris@94: return vampyhost.frame_to_realtime(step_size, sample_rate) Chris@111: elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE: Chris@111: return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"]) Chris@94: else: Chris@94: return 1 Chris@72: Chris@85: def timestamp_features(sample_rate, step_size, output_desc, features): Chris@85: n = -1 Chris@111: if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP: Chris@85: for f in features: Chris@85: n = n + 1 Chris@85: t = vampyhost.frame_to_realtime(n * step_size, sample_rate) Chris@85: f["timestamp"] = t Chris@85: yield f Chris@111: elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE: Chris@111: output_rate = output_desc["sampleRate"] Chris@85: for f in features: Chris@149: if "timestamp" in f: Chris@85: n = int(f["timestamp"].to_float() * output_rate + 0.5) Chris@85: else: Chris@85: n = n + 1 Chris@85: f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate) Chris@85: yield f Chris@85: else: Chris@85: for f in features: Chris@85: yield f Chris@72: Chris@93: def fill_timestamps(results, sample_rate, step_size, output_desc): Chris@93: Chris@93: output = output_desc["identifier"] Chris@93: Chris@145: selected = ( r[output] for r in results ) Chris@93: Chris@93: stamped = timestamp_features(sample_rate, step_size, output_desc, selected) Chris@93: Chris@93: for s in stamped: Chris@93: yield s Chris@93: Chris@93: def deduce_shape(output_desc): Chris@111: if output_desc["hasDuration"]: Chris@117: return "list" Chris@111: if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE: Chris@117: return "list" Chris@111: if not output_desc["hasFixedBinCount"]: Chris@117: return "list" Chris@111: if output_desc["binCount"] == 0: Chris@117: return "list" Chris@111: if output_desc["binCount"] == 1: Chris@94: return "vector" Chris@94: return "matrix" Chris@93: Chris@150: def populate_reshaped_vector(results, out_step, output_desc, return_dict): Chris@149: Chris@149: output = output_desc["identifier"] Chris@149: tracks = [] Chris@150: whole = [] Chris@149: current_track = [] Chris@149: current_start_time = 0 Chris@149: out_step_secs = out_step.to_float() Chris@149: Chris@149: n = -1 Chris@149: Chris@149: for r in results: Chris@149: f = r[output] Chris@149: n = n + 1 Chris@150: whole.append(f["values"][0]) Chris@149: if output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE: Chris@149: if "timestamp" in f: Chris@149: m = int(round(f["timestamp"].to_float() / out_step_secs)) Chris@149: if m != n: Chris@149: if current_track != []: Chris@149: tracks.append({ "start": current_start_time, Chris@149: "step": out_step, Chris@149: "values": np.array(current_track, np.float32) }) Chris@149: current_track = [] Chris@149: n = m Chris@149: current_start_time = vampyhost.RealTime('seconds', m * out_step_secs) Chris@149: current_track.append(f["values"][0]) Chris@149: Chris@149: if tracks != []: Chris@149: if current_track != []: Chris@149: tracks.append({ "start": current_start_time, Chris@149: "step": out_step, Chris@149: "values": np.array(current_track, np.float32) }) Chris@150: return_dict["tracks"] = tracks Chris@149: Chris@150: return_dict["vector"] = (out_step, whole) Chris@150: Chris@150: def populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict): Chris@71: Chris@95: output = output_desc["identifier"] Chris@94: out_step = get_feature_step_time(sample_rate, step_size, output_desc) Chris@149: adjusted_shape = shape Chris@89: Chris@93: if shape == "vector": Chris@150: populate_reshaped_vector(results, out_step, output_desc, return_dict) Chris@93: elif shape == "matrix": Chris@97: #!!! todo: check that each feature has the right number of bins? Chris@97: outseq = [r[output]["values"] for r in results] Chris@150: return_dict[shape] = (out_step, np.array(outseq, np.float32)) Chris@93: else: Chris@150: return_dict[shape] = list(fill_timestamps(results, sample_rate, step_size, output_desc)) Chris@95: Chris@140: def collect(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs): Chris@99: """Process audio data with a Vamp plugin, and make the results from a Chris@149: Chris@99: single plugin output available as a single structure. Chris@95: Chris@99: The provided data should be a 1- or 2-dimensional list or NumPy Chris@99: array of floats. If it is 2-dimensional, the first dimension is Chris@99: taken to be the channel count. Chris@99: Chris@99: The returned results will be those calculated by the plugin with Chris@99: the given key and returned through its output with the given Chris@99: output identifier. If the requested output is the empty string, Chris@99: the first output provided by the plugin will be used. Chris@99: Chris@99: If the parameters dict is non-empty, the plugin will be configured Chris@99: by setting its parameters according to the (string) key and Chris@99: (float) value data found in the dict. Chris@99: Chris@151: The results are returned in a dictionary. This will always contain Chris@151: exactly one of the keys "vector", "matrix", or "list". In addition Chris@151: it may optionally contain the key "tracks". Which of these is used Chris@151: depends on the structure of features set out in the output Chris@151: descriptor for the requested plugin output, and sometimes on the Chris@151: features themselves, as follows: Chris@99: Chris@151: * If the plugin output emits single-valued features at a fixed Chris@151: sample-rate, then the "vector" element will be used. It will contain Chris@151: a tuple of step time (the time in seconds between consecutive Chris@151: feature values) and a one-dimensional NumPy array of feature Chris@151: values. An example of such a feature might be a loudness curve Chris@151: against time. Chris@149: Chris@151: * If the above is true but it also happens that the plugin output Chris@151: has gaps between some features, so that a single continuous vector Chris@151: can't convey all the relevant information, then the "tracks" element Chris@151: will additionally be used. It will contain a list of dictionaries, Chris@151: one for each set of contiguous points in the output, each containing Chris@151: elements "start" (start time in seconds), "step" (step time in Chris@151: seconds), and "values" (a one-dimensional NumPy array of contiguous Chris@151: feature values). An example of such a feature might be the output of Chris@151: a pitch tracker that emits values only during pitched sections of Chris@151: the input audio. Chris@99: Chris@99: * If the plugin output emits multiple-valued features, with an Chris@143: equal number of bins per feature, at a fixed sample-rate, then Chris@143: the "matrix" element will be used. It will contain a tuple of Chris@143: step time (the time in seconds between consecutive feature Chris@143: values) and a two-dimensional NumPy array of feature values. An Chris@143: example of such a feature might be a spectrogram. Chris@99: Chris@117: * Otherwise, the "list" element will be used, and will contain a Chris@143: list of features, where each feature is represented as a Chris@143: dictionary containing a timestamp (always) and a duration Chris@143: (optionally), a label (string), and a 1-dimensional array of Chris@143: float values. Chris@143: Chris@143: If you wish to override the processing step size, block size, or Chris@143: process timestamp method, you may supply them as keyword arguments Chris@143: with the keywords step_size (int), block_size (int), and Chris@143: process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA, Chris@143: vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT). Chris@99: Chris@99: If you would prefer to obtain features as they are calculated Chris@99: (where the plugin supports this) and with the format in which the Chris@99: plugin returns them, via an asynchronous generator function, use Chris@99: vamp.process() instead. Chris@99: """ Chris@100: Chris@140: plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs) Chris@95: Chris@95: if output == "": Chris@95: output_desc = plugin.get_output(0) Chris@95: output = output_desc["identifier"] Chris@95: else: Chris@95: output_desc = plugin.get_output(output) Chris@95: Chris@112: ff = vamp.frames.frames_from_array(data, step_size, block_size) Chris@95: Chris@112: results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output]) Chris@95: Chris@117: shape = deduce_shape(output_desc) Chris@95: Chris@150: return_dict = {} Chris@150: populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict) Chris@150: Chris@150: print("return_dict now = " + str(return_dict)) Chris@89: plugin.unload() Chris@150: return return_dict