Mercurial > hg > vampy-host
view vamp/collect.py @ 151:5a6b8f4be9b9 tracks tip
Docs
author | Chris Cannam |
---|---|
date | Fri, 21 Apr 2017 14:33:57 +0100 |
parents | 37d2fd57723e |
children |
line wrap: on
line source
#!/usr/bin/env python # Python Vamp Host # Copyright (c) 2008-2015 Queen Mary, University of London # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, copy, # modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the names of the Centre for # Digital Music and Queen Mary, University of London shall not be # used in advertising or otherwise to promote the sale, use or other # dealings in this Software without prior written authorization. '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.''' import vampyhost import vamp.load import vamp.process import vamp.frames import numpy as np def get_feature_step_time(sample_rate, step_size, output_desc): if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP: return vampyhost.frame_to_realtime(step_size, sample_rate) elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE: return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"]) else: return 1 def timestamp_features(sample_rate, step_size, output_desc, features): n = -1 if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP: for f in features: n = n + 1 t = vampyhost.frame_to_realtime(n * step_size, sample_rate) f["timestamp"] = t yield f elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE: output_rate = output_desc["sampleRate"] for f in features: if "timestamp" in f: n = int(f["timestamp"].to_float() * output_rate + 0.5) else: n = n + 1 f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate) yield f else: for f in features: yield f def fill_timestamps(results, sample_rate, step_size, output_desc): output = output_desc["identifier"] selected = ( r[output] for r in results ) stamped = timestamp_features(sample_rate, step_size, output_desc, selected) for s in stamped: yield s def deduce_shape(output_desc): if output_desc["hasDuration"]: return "list" if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE: return "list" if not output_desc["hasFixedBinCount"]: return "list" if output_desc["binCount"] == 0: return "list" if output_desc["binCount"] == 1: return "vector" return "matrix" def populate_reshaped_vector(results, out_step, output_desc, return_dict): output = output_desc["identifier"] tracks = [] whole = [] current_track = [] current_start_time = 0 out_step_secs = out_step.to_float() n = -1 for r in results: f = r[output] n = n + 1 whole.append(f["values"][0]) if output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE: if "timestamp" in f: m = int(round(f["timestamp"].to_float() / out_step_secs)) if m != n: if current_track != []: tracks.append({ "start": current_start_time, "step": out_step, "values": np.array(current_track, np.float32) }) current_track = [] n = m current_start_time = vampyhost.RealTime('seconds', m * out_step_secs) current_track.append(f["values"][0]) if tracks != []: if current_track != []: tracks.append({ "start": current_start_time, "step": out_step, "values": np.array(current_track, np.float32) }) return_dict["tracks"] = tracks return_dict["vector"] = (out_step, whole) def populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict): output = output_desc["identifier"] out_step = get_feature_step_time(sample_rate, step_size, output_desc) adjusted_shape = shape if shape == "vector": populate_reshaped_vector(results, out_step, output_desc, return_dict) elif shape == "matrix": #!!! todo: check that each feature has the right number of bins? outseq = [r[output]["values"] for r in results] return_dict[shape] = (out_step, np.array(outseq, np.float32)) else: return_dict[shape] = list(fill_timestamps(results, sample_rate, step_size, output_desc)) def collect(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs): """Process audio data with a Vamp plugin, and make the results from a single plugin output available as a single structure. The provided data should be a 1- or 2-dimensional list or NumPy array of floats. If it is 2-dimensional, the first dimension is taken to be the channel count. The returned results will be those calculated by the plugin with the given key and returned through its output with the given output identifier. If the requested output is the empty string, the first output provided by the plugin will be used. If the parameters dict is non-empty, the plugin will be configured by setting its parameters according to the (string) key and (float) value data found in the dict. The results are returned in a dictionary. This will always contain exactly one of the keys "vector", "matrix", or "list". In addition it may optionally contain the key "tracks". Which of these is used depends on the structure of features set out in the output descriptor for the requested plugin output, and sometimes on the features themselves, as follows: * If the plugin output emits single-valued features at a fixed sample-rate, then the "vector" element will be used. It will contain a tuple of step time (the time in seconds between consecutive feature values) and a one-dimensional NumPy array of feature values. An example of such a feature might be a loudness curve against time. * If the above is true but it also happens that the plugin output has gaps between some features, so that a single continuous vector can't convey all the relevant information, then the "tracks" element will additionally be used. It will contain a list of dictionaries, one for each set of contiguous points in the output, each containing elements "start" (start time in seconds), "step" (step time in seconds), and "values" (a one-dimensional NumPy array of contiguous feature values). An example of such a feature might be the output of a pitch tracker that emits values only during pitched sections of the input audio. * If the plugin output emits multiple-valued features, with an equal number of bins per feature, at a fixed sample-rate, then the "matrix" element will be used. It will contain a tuple of step time (the time in seconds between consecutive feature values) and a two-dimensional NumPy array of feature values. An example of such a feature might be a spectrogram. * Otherwise, the "list" element will be used, and will contain a list of features, where each feature is represented as a dictionary containing a timestamp (always) and a duration (optionally), a label (string), and a 1-dimensional array of float values. If you wish to override the processing step size, block size, or process timestamp method, you may supply them as keyword arguments with the keywords step_size (int), block_size (int), and process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA, vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT). If you would prefer to obtain features as they are calculated (where the plugin supports this) and with the format in which the plugin returns them, via an asynchronous generator function, use vamp.process() instead. """ plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs) if output == "": output_desc = plugin.get_output(0) output = output_desc["identifier"] else: output_desc = plugin.get_output(output) ff = vamp.frames.frames_from_array(data, step_size, block_size) results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output]) shape = deduce_shape(output_desc) return_dict = {} populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict) print("return_dict now = " + str(return_dict)) plugin.unload() return return_dict