cannam@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@0: cannam@0: /* cannam@0: Vamp cannam@0: cannam@0: An API for audio analysis and feature extraction plugins. cannam@0: cannam@0: Centre for Digital Music, Queen Mary, University of London. cannam@0: Copyright 2006 Chris Cannam. cannam@0: cannam@0: Permission is hereby granted, free of charge, to any person cannam@0: obtaining a copy of this software and associated documentation cannam@0: files (the "Software"), to deal in the Software without cannam@0: restriction, including without limitation the rights to use, copy, cannam@0: modify, merge, publish, distribute, sublicense, and/or sell copies cannam@0: of the Software, and to permit persons to whom the Software is cannam@0: furnished to do so, subject to the following conditions: cannam@0: cannam@0: The above copyright notice and this permission notice shall be cannam@0: included in all copies or substantial portions of the Software. cannam@0: cannam@0: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, cannam@0: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF cannam@0: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND cannam@0: NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR cannam@0: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF cannam@0: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION cannam@0: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. cannam@0: cannam@0: Except as contained in this notice, the names of the Centre for cannam@0: Digital Music; Queen Mary, University of London; and Chris Cannam cannam@0: shall not be used in advertising or otherwise to promote the sale, cannam@0: use or other dealings in this Software without prior written cannam@0: authorization. cannam@0: */ cannam@0: cannam@0: #ifndef _VAMP_PLUGIN_H_ cannam@0: #define _VAMP_PLUGIN_H_ cannam@0: cannam@0: #include "PluginBase.h" cannam@0: #include "RealTime.h" cannam@0: cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: cannam@0: namespace Vamp { cannam@0: cannam@0: /** cannam@0: * Vamp::Plugin is a base class for plugin instance classes cannam@0: * that provide feature extraction from audio or related data. cannam@0: * cannam@0: * In most cases, the input will be audio and the output will be a cannam@0: * stream of derived data at a lower sampling resolution than the cannam@0: * input. cannam@0: * cannam@0: * Note that this class inherits several abstract methods from cannam@0: * PluginBase, that must be implemented by the subclass. cannam@0: */ cannam@0: cannam@0: /** cannam@0: * Plugin Lifecycle cannam@0: * ================ cannam@0: * cannam@0: * Feature extraction plugins are managed differently from real-time cannam@0: * plugins (such as VST effects). The main difference is that the cannam@0: * parameters for a feature extraction plugin are configured before cannam@0: * the plugin is used, and do not change during use. cannam@0: * cannam@0: * 1. Host constructs the plugin, passing it the input sample rate. cannam@0: * The plugin may do basic initialisation, but should not do anything cannam@0: * computationally expensive at this point. cannam@0: * cannam@0: * 2. Host may query the plugin's available outputs. cannam@0: * cannam@0: * 3. Host queries programs and parameter descriptors, and may set cannam@0: * some or all of them. Parameters that are not explicitly set should cannam@0: * take their default values as specified in the parameter descriptor. cannam@0: * When a program is set, the parameter values may change and the host cannam@0: * will re-query them to check. cannam@0: * cannam@0: * 4. Host queries the preferred step size, block size, number of cannam@0: * channels, and the number of values per feature for the plugin's cannam@0: * outputs. These may all vary depending on the parameter values. cannam@0: * (Note however that you cannot make the number of distinct outputs cannam@0: * dependent on parameter values; nor can you make any of these depend cannam@0: * on the number of input channels.) cannam@0: * cannam@0: * 5. Plugin is properly initialised with a call to initialise. This cannam@0: * fixes the step size, block size, and number of channels, as well as cannam@0: * all of the parameter and program settings. If the values passed in cannam@0: * to initialise do not match the plugin's advertised preferred values cannam@0: * from step 4, the plugin may refuse to initialise and return false cannam@0: * (although if possible it should accept the new values). cannam@0: * cannam@0: * 6. Host will repeatedly call the process method to pass in blocks cannam@0: * of input data. This method may return features extracted from that cannam@0: * data (if the plugin is causal). cannam@0: * cannam@0: * 7. Host will call getRemainingFeatures exactly once, after all the cannam@0: * input data has been processed. This may return any non-causal or cannam@0: * leftover features. cannam@0: * cannam@0: * 8. At any point after initialise was called, the host may cannam@0: * optionally call the reset method and restart processing. (This cannam@0: * does not mean it can change the parameters, which are fixed from cannam@0: * initialise until destruction.) cannam@0: * cannam@0: * A plugin does not need to handle the case where setParameter or cannam@0: * selectProgram is called after initialise has been called. It's the cannam@0: * host's responsibility not to do that. cannam@0: */ cannam@0: cannam@0: class Plugin : public PluginBase cannam@0: { cannam@0: public: cannam@0: /** cannam@0: * Initialise a plugin to prepare it for use with the given number cannam@0: * of input channels, step size (window increment, in sample cannam@0: * frames) and block size (window size, in sample frames). cannam@0: * cannam@0: * The input sample rate should have been already specified at cannam@0: * construction time. cannam@0: * cannam@0: * Return true for successful initialisation, false if the number cannam@0: * of input channels, step size and/or block size cannot be cannam@0: * supported. cannam@0: */ cannam@0: virtual bool initialise(size_t inputChannels, cannam@0: size_t stepSize, cannam@0: size_t blockSize) = 0; cannam@0: cannam@0: /** cannam@0: * Reset the plugin after use, to prepare it for another clean cannam@0: * run. Not called for the first initialisation (i.e. initialise cannam@0: * must also do a reset). cannam@0: */ cannam@0: virtual void reset() = 0; cannam@0: cannam@0: enum InputDomain { TimeDomain, FrequencyDomain }; cannam@0: cannam@0: /** cannam@0: * Get the plugin's required input domain. If this is TimeDomain, cannam@0: * the samples provided to the process() function (below) will be cannam@0: * in the time domain, as for a traditional audio processing cannam@0: * plugin. If this is FrequencyDomain, the host will carry out a cannam@0: * windowed FFT of size equal to the negotiated block size on the cannam@0: * data before passing the frequency bin data in to process(). cannam@0: * The plugin does not get to choose the window type -- the host cannam@0: * will either let the user do so, or will use a Hanning window. cannam@0: */ cannam@0: virtual InputDomain getInputDomain() const = 0; cannam@0: cannam@0: /** cannam@0: * Get the preferred step size (window increment -- the distance cannam@0: * in sample frames between the start frames of consecutive blocks cannam@0: * passed to the process() function) for the plugin. This should cannam@0: * be called before initialise(). cannam@0: */ cannam@0: virtual size_t getPreferredStepSize() const = 0; cannam@0: cannam@0: /** cannam@0: * Get the preferred block size (window size -- the number of cannam@0: * sample frames passed in each block to the process() function). cannam@0: * This should be called before initialise(). cannam@0: */ cannam@0: virtual size_t getPreferredBlockSize() const { return getPreferredStepSize(); } cannam@0: cannam@0: /** cannam@0: * Get the minimum supported number of input channels. cannam@0: */ cannam@0: virtual size_t getMinChannelCount() const { return 1; } cannam@0: cannam@0: /** cannam@0: * Get the maximum supported number of input channels. cannam@0: */ cannam@0: virtual size_t getMaxChannelCount() const { return 1; } cannam@0: cannam@0: struct OutputDescriptor cannam@0: { cannam@0: /** cannam@0: * The name of the output, in computer-usable form. Should be cannam@0: * reasonably short and without whitespace or punctuation. cannam@0: */ cannam@0: std::string name; cannam@0: cannam@0: /** cannam@0: * The human-readable name of the output. cannam@0: */ cannam@0: std::string description; cannam@0: cannam@0: /** cannam@0: * The unit of the output, in human-readable form. cannam@0: */ cannam@0: std::string unit; cannam@0: cannam@0: /** cannam@0: * True if the output has the same number of values per result cannam@0: * for every output result. Outputs for which this is false cannam@0: * are unlikely to be very useful in a general-purpose host. cannam@0: */ cannam@0: bool hasFixedValueCount; cannam@0: cannam@0: /** cannam@0: * The number of values per result of the output. Undefined cannam@0: * if hasFixedValueCount is false. If this is zero, the output cannam@0: * is point data (i.e. only the time of each output is of cannam@0: * interest, the value list will be empty). cannam@0: * cannam@0: * Note that this gives the number of values of a single cannam@0: * output result, not of the output stream (which has one more cannam@0: * dimension: time). cannam@0: */ cannam@0: size_t valueCount; cannam@0: cannam@0: /** cannam@0: * The names of each of the values, if appropriate. This is cannam@0: * always optional. cannam@0: */ cannam@0: std::vector valueNames; cannam@0: cannam@0: /** cannam@0: * True if the results in the output have a fixed numeric cannam@0: * range (minimum and maximum values). Undefined if cannam@0: * valueCount is zero. cannam@0: */ cannam@0: bool hasKnownExtents; cannam@0: cannam@0: /** cannam@0: * Minimum value of the results in the output. Undefined if cannam@0: * hasKnownExtents is false or valueCount is zero. cannam@0: */ cannam@0: float minValue; cannam@0: cannam@0: /** cannam@0: * Maximum value of the results in the output. Undefined if cannam@0: * hasKnownExtents is false or valueCount is zero. cannam@0: */ cannam@0: float maxValue; cannam@0: cannam@0: /** cannam@0: * True if the output values are quantized to a particular cannam@0: * resolution. Undefined if valueCount is zero. cannam@0: */ cannam@0: bool isQuantized; cannam@0: cannam@0: /** cannam@0: * Quantization resolution of the output values (e.g. 1.0 if cannam@0: * they are all integers). Undefined if isQuantized is false cannam@0: * or valueCount is zero. cannam@0: */ cannam@0: float quantizeStep; cannam@0: cannam@0: enum SampleType { cannam@0: cannam@0: /// Results from each process() align with that call's block start cannam@0: OneSamplePerStep, cannam@0: cannam@0: /// Results are evenly spaced in time (sampleRate specified below) cannam@0: FixedSampleRate, cannam@0: cannam@0: /// Results are unevenly spaced and have individual timestamps cannam@0: VariableSampleRate cannam@0: }; cannam@0: cannam@0: /** cannam@0: * Positioning in time of the output results. cannam@0: */ cannam@0: SampleType sampleType; cannam@0: cannam@0: /** cannam@0: * Sample rate of the output results. Undefined if sampleType cannam@0: * is OneSamplePerStep. cannam@0: * cannam@0: * If sampleType is VariableSampleRate and this value is cannam@0: * non-zero, then it may be used to calculate a resolution for cannam@0: * the output (i.e. the "duration" of each value, in time). cannam@0: * It's recommended to set this to zero if that behaviour is cannam@0: * not desired. cannam@0: */ cannam@0: float sampleRate; cannam@0: }; cannam@0: cannam@0: typedef std::vector OutputList; cannam@0: cannam@0: /** cannam@0: * Get the outputs of this plugin. An output's index in this list cannam@0: * is used as its numeric index when looking it up in the cannam@0: * FeatureSet returned from the process() call. cannam@0: */ cannam@0: virtual OutputList getOutputDescriptors() const = 0; cannam@0: cannam@0: struct Feature cannam@0: { cannam@0: /** cannam@0: * True if an output feature has its own timestamp. This is cannam@0: * mandatory if the output has VariableSampleRate, and is cannam@0: * likely to be disregarded otherwise. cannam@0: */ cannam@0: bool hasTimestamp; cannam@0: cannam@0: /** cannam@0: * Timestamp of the output feature. This is mandatory if the cannam@0: * output has VariableSampleRate, and is likely to be cannam@0: * disregarded otherwise. Undefined if hasTimestamp is false. cannam@0: */ cannam@0: RealTime timestamp; cannam@0: cannam@0: /** cannam@0: * Results for a single sample of this feature. If the output cannam@0: * hasFixedValueCount, there must be the same number of values cannam@0: * as the output's valueCount count. cannam@0: */ cannam@0: std::vector values; cannam@0: cannam@0: /** cannam@0: * Label for the sample of this feature. cannam@0: */ cannam@0: std::string label; cannam@0: }; cannam@0: cannam@0: typedef std::vector FeatureList; cannam@0: typedef std::map FeatureSet; // key is output no cannam@0: cannam@0: /** cannam@0: * Process a single block of input data. cannam@0: * cannam@0: * If the plugin's inputDomain is TimeDomain, inputBuffers will cannam@0: * point to one array of floats per input channel, and each of cannam@0: * these arrays will contain blockSize consecutive audio samples cannam@0: * (the host will zero-pad as necessary). cannam@0: * cannam@0: * If the plugin's inputDomain is FrequencyDomain, inputBuffers cannam@0: * will point to one array of floats per input channel, and each cannam@0: * of these arrays will contain blockSize/2 consecutive pairs of cannam@0: * real and imaginary component floats corresponding to bins cannam@0: * 0..(blockSize/2-1) of the FFT output. cannam@0: * cannam@0: * The timestamp is the real time in seconds of the start of the cannam@0: * supplied block of samples. cannam@0: * cannam@0: * Return any features that have become available after this cannam@0: * process call. (These do not necessarily have to fall within cannam@0: * the process block, except for OneSamplePerStep outputs.) cannam@0: */ cannam@0: virtual FeatureSet process(float **inputBuffers, cannam@0: RealTime timestamp) = 0; cannam@0: cannam@0: /** cannam@0: * After all blocks have been processed, calculate and return any cannam@0: * remaining features derived from the complete input. cannam@0: */ cannam@0: virtual FeatureSet getRemainingFeatures() = 0; cannam@0: cannam@0: virtual std::string getType() const { return "Feature Extraction Plugin"; } cannam@0: cannam@0: protected: cannam@0: Plugin(float inputSampleRate) : cannam@0: m_inputSampleRate(inputSampleRate) { } cannam@0: cannam@0: float m_inputSampleRate; cannam@0: }; cannam@0: cannam@0: } cannam@0: cannam@0: #endif cannam@0: cannam@0: cannam@0: