Chris@117: #!/usr/bin/env python
Chris@117: 
Chris@117: #   Python Vamp Host
Chris@117: #   Copyright (c) 2008-2015 Queen Mary, University of London
Chris@117: #
Chris@117: #   Permission is hereby granted, free of charge, to any person
Chris@117: #   obtaining a copy of this software and associated documentation
Chris@117: #   files (the "Software"), to deal in the Software without
Chris@117: #   restriction, including without limitation the rights to use, copy,
Chris@117: #   modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@117: #   of the Software, and to permit persons to whom the Software is
Chris@117: #   furnished to do so, subject to the following conditions:
Chris@117: #
Chris@117: #   The above copyright notice and this permission notice shall be
Chris@117: #   included in all copies or substantial portions of the Software.
Chris@117: #
Chris@117: #   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@117: #   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@117: #   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@117: #   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
Chris@117: #   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@117: #   CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@117: #   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@117: #
Chris@117: #   Except as contained in this notice, the names of the Centre for
Chris@117: #   Digital Music and Queen Mary, University of London shall not be
Chris@117: #   used in advertising or otherwise to promote the sale, use or other
Chris@117: #   dealings in this Software without prior written authorization.
Chris@117: 
Chris@56: '''A high-level interface to the vampyhost extension module, for quickly and easily running Vamp audio analysis plugins on audio files and buffers.'''
Chris@56: 
Chris@56: import vampyhost
Chris@112: import vamp.load
Chris@112: import vamp.process
Chris@112: import vamp.frames
Chris@89: 
Chris@93: import numpy as np
Chris@93: 
Chris@94: def get_feature_step_time(sample_rate, step_size, output_desc):
Chris@111:     if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
Chris@94:         return vampyhost.frame_to_realtime(step_size, sample_rate)
Chris@111:     elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@111:         return vampyhost.RealTime('seconds', 1.0 / output_desc["sampleRate"])
Chris@94:     else:
Chris@94:         return 1
Chris@72: 
Chris@85: def timestamp_features(sample_rate, step_size, output_desc, features):
Chris@85:     n = -1
Chris@111:     if output_desc["sampleType"] == vampyhost.ONE_SAMPLE_PER_STEP:
Chris@85:         for f in features:
Chris@85:             n = n + 1
Chris@85:             t = vampyhost.frame_to_realtime(n * step_size, sample_rate)
Chris@85:             f["timestamp"] = t
Chris@85:             yield f
Chris@111:     elif output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@111:         output_rate = output_desc["sampleRate"]
Chris@85:         for f in features:
Chris@149:             if "timestamp" in f:
Chris@85:                 n = int(f["timestamp"].to_float() * output_rate + 0.5)
Chris@85:             else:
Chris@85:                 n = n + 1
Chris@85:             f["timestamp"] = vampyhost.RealTime('seconds', float(n) / output_rate)
Chris@85:             yield f
Chris@85:     else:
Chris@85:         for f in features:
Chris@85:             yield f
Chris@72: 
Chris@93: def fill_timestamps(results, sample_rate, step_size, output_desc):
Chris@93: 
Chris@93:     output = output_desc["identifier"]
Chris@93:     
Chris@145:     selected = ( r[output] for r in results )
Chris@93: 
Chris@93:     stamped = timestamp_features(sample_rate, step_size, output_desc, selected)
Chris@93: 
Chris@93:     for s in stamped:
Chris@93:         yield s
Chris@93: 
Chris@93: def deduce_shape(output_desc):
Chris@111:     if output_desc["hasDuration"]:
Chris@117:         return "list"
Chris@111:     if output_desc["sampleType"] == vampyhost.VARIABLE_SAMPLE_RATE:
Chris@117:         return "list"
Chris@111:     if not output_desc["hasFixedBinCount"]:
Chris@117:         return "list"
Chris@111:     if output_desc["binCount"] == 0:
Chris@117:         return "list"
Chris@111:     if output_desc["binCount"] == 1:
Chris@94:         return "vector"
Chris@94:     return "matrix"
Chris@93: 
Chris@150: def populate_reshaped_vector(results, out_step, output_desc, return_dict):
Chris@149: 
Chris@149:     output = output_desc["identifier"]
Chris@149:     tracks = []
Chris@150:     whole = []
Chris@149:     current_track = []
Chris@149:     current_start_time = 0
Chris@149:     out_step_secs = out_step.to_float()
Chris@149: 
Chris@149:     n = -1
Chris@149:     
Chris@149:     for r in results:
Chris@149:         f = r[output]
Chris@149:         n = n + 1
Chris@150:         whole.append(f["values"][0])
Chris@149:         if output_desc["sampleType"] == vampyhost.FIXED_SAMPLE_RATE:
Chris@149:             if "timestamp" in f:
Chris@149:                 m = int(round(f["timestamp"].to_float() / out_step_secs))
Chris@149:                 if m != n:
Chris@149:                     if current_track != []:
Chris@149:                         tracks.append({ "start": current_start_time,
Chris@149:                                         "step": out_step,
Chris@149:                                         "values": np.array(current_track, np.float32) })
Chris@149:                         current_track = []
Chris@149:                         n = m
Chris@149:                     current_start_time = vampyhost.RealTime('seconds', m * out_step_secs)
Chris@149:         current_track.append(f["values"][0])
Chris@149: 
Chris@149:     if tracks != []:
Chris@149:         if current_track != []:
Chris@149:             tracks.append({ "start": current_start_time,
Chris@149:                             "step": out_step,
Chris@149:                             "values": np.array(current_track, np.float32) })
Chris@150:         return_dict["tracks"] = tracks
Chris@149: 
Chris@150:     return_dict["vector"] = (out_step, whole)
Chris@150: 
Chris@150: def populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict):
Chris@71: 
Chris@95:     output = output_desc["identifier"]
Chris@94:     out_step = get_feature_step_time(sample_rate, step_size, output_desc)
Chris@149:     adjusted_shape = shape
Chris@89: 
Chris@93:     if shape == "vector":
Chris@150:         populate_reshaped_vector(results, out_step, output_desc, return_dict)
Chris@93:     elif shape == "matrix":
Chris@97:         #!!! todo: check that each feature has the right number of bins?
Chris@97:         outseq = [r[output]["values"] for r in results]
Chris@150:         return_dict[shape] = (out_step, np.array(outseq, np.float32))
Chris@93:     else:
Chris@150:         return_dict[shape] = list(fill_timestamps(results, sample_rate, step_size, output_desc))
Chris@95:         
Chris@140: def collect(data, sample_rate, plugin_key, output = "", parameters = {}, **kwargs):
Chris@99:     """Process audio data with a Vamp plugin, and make the results from a
Chris@149: 
Chris@99:     single plugin output available as a single structure.
Chris@95: 
Chris@99:     The provided data should be a 1- or 2-dimensional list or NumPy
Chris@99:     array of floats. If it is 2-dimensional, the first dimension is
Chris@99:     taken to be the channel count.
Chris@99: 
Chris@99:     The returned results will be those calculated by the plugin with
Chris@99:     the given key and returned through its output with the given
Chris@99:     output identifier. If the requested output is the empty string,
Chris@99:     the first output provided by the plugin will be used.
Chris@99: 
Chris@99:     If the parameters dict is non-empty, the plugin will be configured
Chris@99:     by setting its parameters according to the (string) key and
Chris@99:     (float) value data found in the dict.
Chris@99: 
Chris@151:     The results are returned in a dictionary. This will always contain
Chris@151:     exactly one of the keys "vector", "matrix", or "list". In addition
Chris@151:     it may optionally contain the key "tracks". Which of these is used
Chris@151:     depends on the structure of features set out in the output
Chris@151:     descriptor for the requested plugin output, and sometimes on the
Chris@151:     features themselves, as follows:
Chris@99: 
Chris@151:     * If the plugin output emits single-valued features at a fixed
Chris@151:     sample-rate, then the "vector" element will be used. It will contain
Chris@151:     a tuple of step time (the time in seconds between consecutive
Chris@151:     feature values) and a one-dimensional NumPy array of feature
Chris@151:     values. An example of such a feature might be a loudness curve
Chris@151:     against time.
Chris@149: 
Chris@151:     * If the above is true but it also happens that the plugin output
Chris@151:     has gaps between some features, so that a single continuous vector
Chris@151:     can't convey all the relevant information, then the "tracks" element
Chris@151:     will additionally be used. It will contain a list of dictionaries,
Chris@151:     one for each set of contiguous points in the output, each containing
Chris@151:     elements "start" (start time in seconds), "step" (step time in
Chris@151:     seconds), and "values" (a one-dimensional NumPy array of contiguous
Chris@151:     feature values). An example of such a feature might be the output of
Chris@151:     a pitch tracker that emits values only during pitched sections of
Chris@151:     the input audio.
Chris@99: 
Chris@99:     * If the plugin output emits multiple-valued features, with an
Chris@143:     equal number of bins per feature, at a fixed sample-rate, then
Chris@143:     the "matrix" element will be used. It will contain a tuple of
Chris@143:     step time (the time in seconds between consecutive feature
Chris@143:     values) and a two-dimensional NumPy array of feature values. An
Chris@143:     example of such a feature might be a spectrogram.
Chris@99: 
Chris@117:     * Otherwise, the "list" element will be used, and will contain a
Chris@143:     list of features, where each feature is represented as a
Chris@143:     dictionary containing a timestamp (always) and a duration
Chris@143:     (optionally), a label (string), and a 1-dimensional array of
Chris@143:     float values.
Chris@143: 
Chris@143:     If you wish to override the processing step size, block size, or
Chris@143:     process timestamp method, you may supply them as keyword arguments
Chris@143:     with the keywords step_size (int), block_size (int), and
Chris@143:     process_timestamp_method (choose from vamp.vampyhost.SHIFT_DATA,
Chris@143:     vamp.vampyhost.SHIFT_TIMESTAMP, or vamp.vampyhost.NO_SHIFT).
Chris@99: 
Chris@99:     If you would prefer to obtain features as they are calculated
Chris@99:     (where the plugin supports this) and with the format in which the
Chris@99:     plugin returns them, via an asynchronous generator function, use
Chris@99:     vamp.process() instead.
Chris@99:     """
Chris@100: 
Chris@140:     plugin, step_size, block_size = vamp.load.load_and_configure(data, sample_rate, plugin_key, parameters, **kwargs)
Chris@95: 
Chris@95:     if output == "":
Chris@95:         output_desc = plugin.get_output(0)
Chris@95:         output = output_desc["identifier"]
Chris@95:     else:
Chris@95:         output_desc = plugin.get_output(output)
Chris@95: 
Chris@112:     ff = vamp.frames.frames_from_array(data, step_size, block_size)
Chris@95: 
Chris@112:     results = vamp.process.process_with_initialised_plugin(ff, sample_rate, step_size, plugin, [output])
Chris@95: 
Chris@117:     shape = deduce_shape(output_desc)
Chris@95: 
Chris@150:     return_dict = {}
Chris@150:     populate_reshaped_features(results, sample_rate, step_size, output_desc, shape, return_dict)
Chris@150: 
Chris@150:     print("return_dict now = " + str(return_dict))
Chris@89:     plugin.unload()
Chris@150:     return return_dict