Chris@23: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@23: Chris@23: /* Chris@23: Vamp Chris@23: Chris@23: An API for audio analysis and feature extraction plugins. Chris@23: Chris@23: Centre for Digital Music, Queen Mary, University of London. Chris@23: Copyright 2006 Chris Cannam. Chris@23: Chris@23: Permission is hereby granted, free of charge, to any person Chris@23: obtaining a copy of this software and associated documentation Chris@23: files (the "Software"), to deal in the Software without Chris@23: restriction, including without limitation the rights to use, copy, Chris@23: modify, merge, publish, distribute, sublicense, and/or sell copies Chris@23: of the Software, and to permit persons to whom the Software is Chris@23: furnished to do so, subject to the following conditions: Chris@23: Chris@23: The above copyright notice and this permission notice shall be Chris@23: included in all copies or substantial portions of the Software. Chris@23: Chris@23: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@23: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@23: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@23: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR Chris@23: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@23: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@23: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@23: Chris@23: Except as contained in this notice, the names of the Centre for Chris@23: Digital Music; Queen Mary, University of London; and Chris Cannam Chris@23: shall not be used in advertising or otherwise to promote the sale, Chris@23: use or other dealings in this Software without prior written Chris@23: authorization. Chris@23: */ Chris@23: Chris@23: #ifndef _VAMP_SDK_PLUGIN_H_ Chris@23: #define _VAMP_SDK_PLUGIN_H_ Chris@23: Chris@23: #include Chris@23: #include Chris@23: #include Chris@23: Chris@23: #include "PluginBase.h" Chris@23: #include "RealTime.h" Chris@23: Chris@23: #include "plugguard.h" Chris@23: _VAMP_SDK_PLUGSPACE_BEGIN(Plugin.h) Chris@23: Chris@23: namespace Vamp { Chris@23: Chris@23: /** Chris@23: * \class Plugin Plugin.h Chris@23: * Chris@23: * Vamp::Plugin is a base class for plugin instance classes Chris@23: * that provide feature extraction from audio or related data. Chris@23: * Chris@23: * In most cases, the input will be audio and the output will be a Chris@23: * stream of derived data at a lower sampling resolution than the Chris@23: * input. Chris@23: * Chris@23: * Note that this class inherits several abstract methods from Chris@23: * PluginBase. These must be implemented by the subclass. Chris@23: * Chris@23: * Chris@23: * PLUGIN LIFECYCLE Chris@23: * Chris@23: * Feature extraction plugins are managed differently from real-time Chris@23: * plugins (such as VST effects). The main difference is that the Chris@23: * parameters for a feature extraction plugin are configured before Chris@23: * the plugin is used, and do not change during use. Chris@23: * Chris@23: * 1. Host constructs the plugin, passing it the input sample rate. Chris@23: * The plugin may do basic initialisation, but should not do anything Chris@23: * computationally expensive at this point. You must make sure your Chris@23: * plugin is cheap to construct, otherwise you'll seriously affect the Chris@23: * startup performance of almost all hosts. If you have serious Chris@23: * initialisation to do, the proper place is in initialise() (step 5). Chris@23: * Chris@23: * 2. Host may query the plugin's available outputs. Chris@23: * Chris@23: * 3. Host queries programs and parameter descriptors, and may set Chris@23: * some or all of them. Parameters that are not explicitly set should Chris@23: * take their default values as specified in the parameter descriptor. Chris@23: * When a program is set, the parameter values may change and the host Chris@23: * will re-query them to check. Chris@23: * Chris@23: * 4. Host queries the preferred step size, block size and number of Chris@23: * channels. These may all vary depending on the parameter values. Chris@23: * (Note however that you cannot make the number of distinct outputs Chris@23: * dependent on parameter values.) Chris@23: * Chris@23: * 5. Plugin is properly initialised with a call to initialise. This Chris@23: * fixes the step size, block size, and number of channels, as well as Chris@23: * all of the parameter and program settings. If the values passed in Chris@23: * to initialise do not match the plugin's advertised preferred values Chris@23: * from step 4, the plugin may refuse to initialise and return false Chris@23: * (although if possible it should accept the new values). Any Chris@23: * computationally expensive setup code should take place here. Chris@23: * Chris@23: * 6. Host finally checks the number of values, resolution, extents Chris@23: * etc per output (which may vary depending on the number of channels, Chris@23: * step size and block size as well as the parameter values). Chris@23: * Chris@23: * 7. Host will repeatedly call the process method to pass in blocks Chris@23: * of input data. This method may return features extracted from that Chris@23: * data (if the plugin is causal). Chris@23: * Chris@23: * 8. Host will call getRemainingFeatures exactly once, after all the Chris@23: * input data has been processed. This may return any non-causal or Chris@23: * leftover features. Chris@23: * Chris@23: * 9. At any point after initialise was called, the host may Chris@23: * optionally call the reset method and restart processing. (This Chris@23: * does not mean it can change the parameters, which are fixed from Chris@23: * initialise until destruction.) Chris@23: * Chris@23: * A plugin does not need to handle the case where setParameter or Chris@23: * selectProgram is called after initialise has been called. It's the Chris@23: * host's responsibility not to do that. Similarly, the plugin may Chris@23: * safely assume that initialise is called no more than once. Chris@23: */ Chris@23: Chris@23: class Plugin : public PluginBase Chris@23: { Chris@23: public: Chris@23: virtual ~Plugin() { } Chris@23: Chris@23: /** Chris@23: * Initialise a plugin to prepare it for use with the given number Chris@23: * of input channels, step size (window increment, in sample Chris@23: * frames) and block size (window size, in sample frames). Chris@23: * Chris@23: * The input sample rate should have been already specified at Chris@23: * construction time. Chris@23: * Chris@23: * Return true for successful initialisation, false if the number Chris@23: * of input channels, step size and/or block size cannot be Chris@23: * supported. Chris@23: */ Chris@23: virtual bool initialise(size_t inputChannels, Chris@23: size_t stepSize, Chris@23: size_t blockSize) = 0; Chris@23: Chris@23: /** Chris@23: * Reset the plugin after use, to prepare it for another clean Chris@23: * run. Not called for the first initialisation (i.e. initialise Chris@23: * must also do a reset). Chris@23: */ Chris@23: virtual void reset() = 0; Chris@23: Chris@23: enum InputDomain { TimeDomain, FrequencyDomain }; Chris@23: Chris@23: /** Chris@23: * Get the plugin's required input domain. Chris@23: * Chris@23: * If this is TimeDomain, the samples provided to the process() Chris@23: * function (below) will be in the time domain, as for a Chris@23: * traditional audio processing plugin. Chris@23: * Chris@23: * If this is FrequencyDomain, the host will carry out a windowed Chris@23: * FFT of size equal to the negotiated block size on the data Chris@23: * before passing the frequency bin data in to process(). The Chris@23: * input data for the FFT will be rotated so as to place the Chris@23: * origin in the centre of the block. Chris@23: * The plugin does not get to choose the window type -- the host Chris@23: * will either let the user do so, or will use a Hanning window. Chris@23: */ Chris@23: virtual InputDomain getInputDomain() const = 0; Chris@23: Chris@23: /** Chris@23: * Get the preferred block size (window size -- the number of Chris@23: * sample frames passed in each block to the process() function). Chris@23: * This should be called before initialise(). Chris@23: * Chris@23: * A plugin that can handle any block size may return 0. The Chris@23: * final block size will be set in the initialise() call. Chris@23: */ Chris@23: virtual size_t getPreferredBlockSize() const { return 0; } Chris@23: Chris@23: /** Chris@23: * Get the preferred step size (window increment -- the distance Chris@23: * in sample frames between the start frames of consecutive blocks Chris@23: * passed to the process() function) for the plugin. This should Chris@23: * be called before initialise(). Chris@23: * Chris@23: * A plugin may return 0 if it has no particular interest in the Chris@23: * step size. In this case, the host should make the step size Chris@23: * equal to the block size if the plugin is accepting input in the Chris@23: * time domain. If the plugin is accepting input in the frequency Chris@23: * domain, the host may use any step size. The final step size Chris@23: * will be set in the initialise() call. Chris@23: */ Chris@23: virtual size_t getPreferredStepSize() const { return 0; } Chris@23: Chris@23: /** Chris@23: * Get the minimum supported number of input channels. Chris@23: */ Chris@23: virtual size_t getMinChannelCount() const { return 1; } Chris@23: Chris@23: /** Chris@23: * Get the maximum supported number of input channels. Chris@23: */ Chris@23: virtual size_t getMaxChannelCount() const { return 1; } Chris@23: Chris@23: struct OutputDescriptor Chris@23: { Chris@23: /** Chris@23: * The name of the output, in computer-usable form. Should be Chris@23: * reasonably short and without whitespace or punctuation, using Chris@23: * the characters [a-zA-Z0-9_-] only. Chris@23: * Example: "zero_crossing_count" Chris@23: */ Chris@23: std::string identifier; Chris@23: Chris@23: /** Chris@23: * The human-readable name of the output. Chris@23: * Example: "Zero Crossing Counts" Chris@23: */ Chris@23: std::string name; Chris@23: Chris@23: /** Chris@23: * A human-readable short text describing the output. May be Chris@23: * empty if the name has said it all already. Chris@23: * Example: "The number of zero crossing points per processing block" Chris@23: */ Chris@23: std::string description; Chris@23: Chris@23: /** Chris@23: * The unit of the output, in human-readable form. Chris@23: */ Chris@23: std::string unit; Chris@23: Chris@23: /** Chris@23: * True if the output has the same number of values per sample Chris@23: * for every output sample. Outputs for which this is false Chris@23: * are unlikely to be very useful in a general-purpose host. Chris@23: */ Chris@23: bool hasFixedBinCount; Chris@23: Chris@23: /** Chris@23: * The number of values per result of the output. Undefined Chris@23: * if hasFixedBinCount is false. If this is zero, the output Chris@23: * is point data (i.e. only the time of each output is of Chris@23: * interest, the value list will be empty). Chris@23: */ Chris@23: size_t binCount; Chris@23: Chris@23: /** Chris@23: * The (human-readable) names of each of the bins, if Chris@23: * appropriate. This is always optional. Chris@23: */ Chris@23: std::vector binNames; Chris@23: Chris@23: /** Chris@23: * True if the results in each output bin fall within a fixed Chris@23: * numeric range (minimum and maximum values). Undefined if Chris@23: * binCount is zero. Chris@23: */ Chris@23: bool hasKnownExtents; Chris@23: Chris@23: /** Chris@23: * Minimum value of the results in the output. Undefined if Chris@23: * hasKnownExtents is false or binCount is zero. Chris@23: */ Chris@23: float minValue; Chris@23: Chris@23: /** Chris@23: * Maximum value of the results in the output. Undefined if Chris@23: * hasKnownExtents is false or binCount is zero. Chris@23: */ Chris@23: float maxValue; Chris@23: Chris@23: /** Chris@23: * True if the output values are quantized to a particular Chris@23: * resolution. Undefined if binCount is zero. Chris@23: */ Chris@23: bool isQuantized; Chris@23: Chris@23: /** Chris@23: * Quantization resolution of the output values (e.g. 1.0 if Chris@23: * they are all integers). Undefined if isQuantized is false Chris@23: * or binCount is zero. Chris@23: */ Chris@23: float quantizeStep; Chris@23: Chris@23: enum SampleType { Chris@23: Chris@23: /// Results from each process() align with that call's block start Chris@23: OneSamplePerStep, Chris@23: Chris@23: /// Results are evenly spaced in time (sampleRate specified below) Chris@23: FixedSampleRate, Chris@23: Chris@23: /// Results are unevenly spaced and have individual timestamps Chris@23: VariableSampleRate Chris@23: }; Chris@23: Chris@23: /** Chris@23: * Positioning in time of the output results. Chris@23: */ Chris@23: SampleType sampleType; Chris@23: Chris@23: /** Chris@23: * Sample rate of the output results, as samples per second. Chris@23: * Undefined if sampleType is OneSamplePerStep. Chris@23: * Chris@23: * If sampleType is VariableSampleRate and this value is Chris@23: * non-zero, then it may be used to calculate a resolution for Chris@23: * the output (i.e. the "duration" of each sample, in time, Chris@23: * will be 1/sampleRate seconds). It's recommended to set Chris@23: * this to zero if that behaviour is not desired. Chris@23: */ Chris@23: float sampleRate; Chris@23: Chris@23: /** Chris@23: * True if the returned results for this output are known to Chris@23: * have a duration field. Chris@23: */ Chris@23: bool hasDuration; Chris@23: Chris@23: OutputDescriptor() : // defaults for mandatory non-class-type members Chris@23: hasFixedBinCount(false), hasKnownExtents(false), isQuantized(false), Chris@23: sampleType(OneSamplePerStep), sampleRate(0), hasDuration(false) { } Chris@23: }; Chris@23: Chris@23: typedef std::vector OutputList; Chris@23: Chris@23: /** Chris@23: * Get the outputs of this plugin. An output's index in this list Chris@23: * is used as its numeric index when looking it up in the Chris@23: * FeatureSet returned from the process() call. Chris@23: */ Chris@23: virtual OutputList getOutputDescriptors() const = 0; Chris@23: Chris@23: struct Feature Chris@23: { Chris@23: /** Chris@23: * True if an output feature has its own timestamp. This is Chris@23: * mandatory if the output has VariableSampleRate, optional if Chris@23: * the output has FixedSampleRate, and unused if the output Chris@23: * has OneSamplePerStep. Chris@23: */ Chris@23: bool hasTimestamp; Chris@23: Chris@23: /** Chris@23: * Timestamp of the output feature. This is mandatory if the Chris@23: * output has VariableSampleRate or if the output has Chris@23: * FixedSampleRate and hasTimestamp is true, and unused Chris@23: * otherwise. Chris@23: */ Chris@23: RealTime timestamp; Chris@23: Chris@23: /** Chris@23: * True if an output feature has a specified duration. This Chris@23: * is optional if the output has VariableSampleRate or Chris@23: * FixedSampleRate, and and unused if the output has Chris@23: * OneSamplePerStep. Chris@23: */ Chris@23: bool hasDuration; Chris@23: Chris@23: /** Chris@23: * Duration of the output feature. This is mandatory if the Chris@23: * output has VariableSampleRate or FixedSampleRate and Chris@23: * hasDuration is true, and unused otherwise. Chris@23: */ Chris@23: RealTime duration; Chris@23: Chris@23: /** Chris@23: * Results for a single sample of this feature. If the output Chris@23: * hasFixedBinCount, there must be the same number of values Chris@23: * as the output's binCount count. Chris@23: */ Chris@23: std::vector values; Chris@23: Chris@23: /** Chris@23: * Label for the sample of this feature. Chris@23: */ Chris@23: std::string label; Chris@23: Chris@23: Feature() : // defaults for mandatory non-class-type members Chris@23: hasTimestamp(false), hasDuration(false) { } Chris@23: }; Chris@23: Chris@23: typedef std::vector FeatureList; Chris@23: Chris@23: typedef std::map FeatureSet; // key is output no Chris@23: Chris@23: /** Chris@23: * Process a single block of input data. Chris@23: * Chris@23: * If the plugin's inputDomain is TimeDomain, inputBuffers will Chris@23: * point to one array of floats per input channel, and each of Chris@23: * these arrays will contain blockSize consecutive audio samples Chris@23: * (the host will zero-pad as necessary). The timestamp in this Chris@23: * case will be the real time in seconds of the start of the Chris@23: * supplied block of samples. Chris@23: * Chris@23: * If the plugin's inputDomain is FrequencyDomain, inputBuffers Chris@23: * will point to one array of floats per input channel, and each Chris@23: * of these arrays will contain blockSize/2+1 consecutive pairs of Chris@23: * real and imaginary component floats corresponding to bins Chris@23: * 0..(blockSize/2) of the FFT output. That is, bin 0 (the first Chris@23: * pair of floats) contains the DC output, up to bin blockSize/2 Chris@23: * which contains the Nyquist-frequency output. There will Chris@23: * therefore be blockSize+2 floats per channel in total. The Chris@23: * timestamp will be the real time in seconds of the centre of the Chris@23: * FFT input window (i.e. the very first block passed to process Chris@23: * might contain the FFT of half a block of zero samples and the Chris@23: * first half-block of the actual data, with a timestamp of zero). Chris@23: * Chris@23: * Return any features that have become available after this Chris@23: * process call. (These do not necessarily have to fall within Chris@23: * the process block, except for OneSamplePerStep outputs.) Chris@23: */ Chris@23: virtual FeatureSet process(const float *const *inputBuffers, Chris@23: RealTime timestamp) = 0; Chris@23: Chris@23: /** Chris@23: * After all blocks have been processed, calculate and return any Chris@23: * remaining features derived from the complete input. Chris@23: */ Chris@23: virtual FeatureSet getRemainingFeatures() = 0; Chris@23: Chris@23: /** Chris@23: * Used to distinguish between Vamp::Plugin and other potential Chris@23: * sibling subclasses of PluginBase. Do not reimplement this Chris@23: * function in your subclass. Chris@23: */ Chris@23: virtual std::string getType() const { return "Feature Extraction Plugin"; } Chris@23: Chris@23: protected: Chris@23: Plugin(float inputSampleRate) : Chris@23: m_inputSampleRate(inputSampleRate) { } Chris@23: Chris@23: float m_inputSampleRate; Chris@23: }; Chris@23: Chris@23: } Chris@23: Chris@23: _VAMP_SDK_PLUGSPACE_END(Plugin.h) Chris@23: Chris@23: #endif Chris@23: Chris@23: Chris@23: