annotate vamp-sdk/Plugin.h @ 263:4454843ff384

* OK, we're going to have to place the host stuff in its own namespace too. Otherwise our new SV build on OSX fails to load old plugins because they pull in the host namespace PluginBase and thus report the wrong Vamp version... *sigh*
author cannam
date Thu, 20 Nov 2008 15:01:30 +0000
parents 521734d2b498
children c97e70ed5abc
rev   line source
cannam@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@3 2
cannam@3 3 /*
cannam@3 4 Vamp
cannam@3 5
cannam@3 6 An API for audio analysis and feature extraction plugins.
cannam@3 7
cannam@3 8 Centre for Digital Music, Queen Mary, University of London.
cannam@3 9 Copyright 2006 Chris Cannam.
cannam@3 10
cannam@3 11 Permission is hereby granted, free of charge, to any person
cannam@3 12 obtaining a copy of this software and associated documentation
cannam@3 13 files (the "Software"), to deal in the Software without
cannam@3 14 restriction, including without limitation the rights to use, copy,
cannam@3 15 modify, merge, publish, distribute, sublicense, and/or sell copies
cannam@3 16 of the Software, and to permit persons to whom the Software is
cannam@3 17 furnished to do so, subject to the following conditions:
cannam@3 18
cannam@3 19 The above copyright notice and this permission notice shall be
cannam@3 20 included in all copies or substantial portions of the Software.
cannam@3 21
cannam@3 22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
cannam@3 23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
cannam@3 24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
cannam@6 25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
cannam@3 26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
cannam@3 27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
cannam@3 28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
cannam@3 29
cannam@3 30 Except as contained in this notice, the names of the Centre for
cannam@3 31 Digital Music; Queen Mary, University of London; and Chris Cannam
cannam@3 32 shall not be used in advertising or otherwise to promote the sale,
cannam@3 33 use or other dealings in this Software without prior written
cannam@3 34 authorization.
cannam@3 35 */
cannam@3 36
cannam@230 37 #ifndef _VAMP_SDK_PLUGIN_H_
cannam@230 38 #define _VAMP_SDK_PLUGIN_H_
cannam@230 39
cannam@230 40 #include <string>
cannam@230 41 #include <vector>
cannam@230 42 #include <map>
cannam@3 43
cannam@3 44 #include "PluginBase.h"
cannam@3 45 #include "RealTime.h"
cannam@3 46
cannam@230 47 #include "plugguard.h"
cannam@233 48 _VAMP_SDK_PLUGSPACE_BEGIN(Plugin.h)
cannam@3 49
cannam@3 50 namespace Vamp {
cannam@3 51
cannam@3 52 /**
cannam@76 53 * \class Plugin Plugin.h <vamp-sdk/Plugin.h>
cannam@76 54 *
cannam@3 55 * Vamp::Plugin is a base class for plugin instance classes
cannam@3 56 * that provide feature extraction from audio or related data.
cannam@3 57 *
cannam@3 58 * In most cases, the input will be audio and the output will be a
cannam@3 59 * stream of derived data at a lower sampling resolution than the
cannam@3 60 * input.
cannam@3 61 *
cannam@3 62 * Note that this class inherits several abstract methods from
cannam@53 63 * PluginBase. These must be implemented by the subclass.
cannam@53 64 *
cannam@53 65 *
cannam@53 66 * PLUGIN LIFECYCLE
cannam@3 67 *
cannam@3 68 * Feature extraction plugins are managed differently from real-time
cannam@3 69 * plugins (such as VST effects). The main difference is that the
cannam@3 70 * parameters for a feature extraction plugin are configured before
cannam@3 71 * the plugin is used, and do not change during use.
cannam@3 72 *
cannam@3 73 * 1. Host constructs the plugin, passing it the input sample rate.
cannam@3 74 * The plugin may do basic initialisation, but should not do anything
cannam@74 75 * computationally expensive at this point. You must make sure your
cannam@74 76 * plugin is cheap to construct, otherwise you'll seriously affect the
cannam@74 77 * startup performance of almost all hosts. If you have serious
cannam@74 78 * initialisation to do, the proper place is in initialise() (step 5).
cannam@3 79 *
cannam@3 80 * 2. Host may query the plugin's available outputs.
cannam@3 81 *
cannam@3 82 * 3. Host queries programs and parameter descriptors, and may set
cannam@3 83 * some or all of them. Parameters that are not explicitly set should
cannam@3 84 * take their default values as specified in the parameter descriptor.
cannam@3 85 * When a program is set, the parameter values may change and the host
cannam@3 86 * will re-query them to check.
cannam@3 87 *
cannam@27 88 * 4. Host queries the preferred step size, block size and number of
cannam@27 89 * channels. These may all vary depending on the parameter values.
cannam@3 90 * (Note however that you cannot make the number of distinct outputs
cannam@27 91 * dependent on parameter values.)
cannam@3 92 *
cannam@3 93 * 5. Plugin is properly initialised with a call to initialise. This
cannam@3 94 * fixes the step size, block size, and number of channels, as well as
cannam@3 95 * all of the parameter and program settings. If the values passed in
cannam@3 96 * to initialise do not match the plugin's advertised preferred values
cannam@3 97 * from step 4, the plugin may refuse to initialise and return false
cannam@35 98 * (although if possible it should accept the new values). Any
cannam@35 99 * computationally expensive setup code should take place here.
cannam@3 100 *
cannam@80 101 * 6. Host finally checks the number of values, resolution, extents
cannam@80 102 * etc per output (which may vary depending on the number of channels,
cannam@80 103 * step size and block size as well as the parameter values).
cannam@27 104 *
cannam@27 105 * 7. Host will repeatedly call the process method to pass in blocks
cannam@3 106 * of input data. This method may return features extracted from that
cannam@3 107 * data (if the plugin is causal).
cannam@3 108 *
cannam@27 109 * 8. Host will call getRemainingFeatures exactly once, after all the
cannam@3 110 * input data has been processed. This may return any non-causal or
cannam@3 111 * leftover features.
cannam@3 112 *
cannam@27 113 * 9. At any point after initialise was called, the host may
cannam@3 114 * optionally call the reset method and restart processing. (This
cannam@3 115 * does not mean it can change the parameters, which are fixed from
cannam@3 116 * initialise until destruction.)
cannam@3 117 *
cannam@3 118 * A plugin does not need to handle the case where setParameter or
cannam@3 119 * selectProgram is called after initialise has been called. It's the
cannam@35 120 * host's responsibility not to do that. Similarly, the plugin may
cannam@35 121 * safely assume that initialise is called no more than once.
cannam@3 122 */
cannam@3 123
cannam@3 124 class Plugin : public PluginBase
cannam@3 125 {
cannam@3 126 public:
cannam@20 127 virtual ~Plugin() { }
cannam@20 128
cannam@3 129 /**
cannam@3 130 * Initialise a plugin to prepare it for use with the given number
cannam@3 131 * of input channels, step size (window increment, in sample
cannam@3 132 * frames) and block size (window size, in sample frames).
cannam@3 133 *
cannam@3 134 * The input sample rate should have been already specified at
cannam@3 135 * construction time.
cannam@3 136 *
cannam@3 137 * Return true for successful initialisation, false if the number
cannam@3 138 * of input channels, step size and/or block size cannot be
cannam@3 139 * supported.
cannam@3 140 */
cannam@3 141 virtual bool initialise(size_t inputChannels,
cannam@3 142 size_t stepSize,
cannam@3 143 size_t blockSize) = 0;
cannam@3 144
cannam@3 145 /**
cannam@3 146 * Reset the plugin after use, to prepare it for another clean
cannam@3 147 * run. Not called for the first initialisation (i.e. initialise
cannam@3 148 * must also do a reset).
cannam@3 149 */
cannam@3 150 virtual void reset() = 0;
cannam@3 151
cannam@3 152 enum InputDomain { TimeDomain, FrequencyDomain };
cannam@3 153
cannam@3 154 /**
cannam@3 155 * Get the plugin's required input domain. If this is TimeDomain,
cannam@3 156 * the samples provided to the process() function (below) will be
cannam@3 157 * in the time domain, as for a traditional audio processing
cannam@3 158 * plugin. If this is FrequencyDomain, the host will carry out a
cannam@3 159 * windowed FFT of size equal to the negotiated block size on the
cannam@3 160 * data before passing the frequency bin data in to process().
cannam@3 161 * The plugin does not get to choose the window type -- the host
cannam@3 162 * will either let the user do so, or will use a Hanning window.
cannam@3 163 */
cannam@3 164 virtual InputDomain getInputDomain() const = 0;
cannam@3 165
cannam@3 166 /**
cannam@8 167 * Get the preferred block size (window size -- the number of
cannam@8 168 * sample frames passed in each block to the process() function).
cannam@8 169 * This should be called before initialise().
cannam@8 170 *
cannam@8 171 * A plugin that can handle any block size may return 0. The
cannam@8 172 * final block size will be set in the initialise() call.
cannam@8 173 */
cannam@8 174 virtual size_t getPreferredBlockSize() const { return 0; }
cannam@8 175
cannam@8 176 /**
cannam@3 177 * Get the preferred step size (window increment -- the distance
cannam@3 178 * in sample frames between the start frames of consecutive blocks
cannam@3 179 * passed to the process() function) for the plugin. This should
cannam@3 180 * be called before initialise().
cannam@8 181 *
cannam@8 182 * A plugin may return 0 if it has no particular interest in the
cannam@8 183 * step size. In this case, the host should make the step size
cannam@8 184 * equal to the block size if the plugin is accepting input in the
cannam@8 185 * time domain. If the plugin is accepting input in the frequency
cannam@8 186 * domain, the host may use any step size. The final step size
cannam@8 187 * will be set in the initialise() call.
cannam@3 188 */
cannam@8 189 virtual size_t getPreferredStepSize() const { return 0; }
cannam@3 190
cannam@3 191 /**
cannam@3 192 * Get the minimum supported number of input channels.
cannam@3 193 */
cannam@3 194 virtual size_t getMinChannelCount() const { return 1; }
cannam@3 195
cannam@3 196 /**
cannam@3 197 * Get the maximum supported number of input channels.
cannam@3 198 */
cannam@3 199 virtual size_t getMaxChannelCount() const { return 1; }
cannam@3 200
cannam@3 201 struct OutputDescriptor
cannam@3 202 {
cannam@3 203 /**
cannam@3 204 * The name of the output, in computer-usable form. Should be
cannam@49 205 * reasonably short and without whitespace or punctuation, using
cannam@134 206 * the characters [a-zA-Z0-9_-] only.
cannam@49 207 * Example: "zero_crossing_count"
cannam@49 208 */
cannam@49 209 std::string identifier;
cannam@49 210
cannam@49 211 /**
cannam@49 212 * The human-readable name of the output.
cannam@49 213 * Example: "Zero Crossing Counts"
cannam@3 214 */
cannam@3 215 std::string name;
cannam@3 216
cannam@3 217 /**
cannam@49 218 * A human-readable short text describing the output. May be
cannam@49 219 * empty if the name has said it all already.
cannam@49 220 * Example: "The number of zero crossing points per processing block"
cannam@3 221 */
cannam@3 222 std::string description;
cannam@3 223
cannam@3 224 /**
cannam@3 225 * The unit of the output, in human-readable form.
cannam@3 226 */
cannam@3 227 std::string unit;
cannam@3 228
cannam@3 229 /**
cannam@9 230 * True if the output has the same number of values per sample
cannam@9 231 * for every output sample. Outputs for which this is false
cannam@3 232 * are unlikely to be very useful in a general-purpose host.
cannam@3 233 */
cannam@9 234 bool hasFixedBinCount;
cannam@3 235
cannam@3 236 /**
cannam@3 237 * The number of values per result of the output. Undefined
cannam@9 238 * if hasFixedBinCount is false. If this is zero, the output
cannam@3 239 * is point data (i.e. only the time of each output is of
cannam@3 240 * interest, the value list will be empty).
cannam@3 241 */
cannam@9 242 size_t binCount;
cannam@3 243
cannam@3 244 /**
cannam@49 245 * The (human-readable) names of each of the bins, if
cannam@49 246 * appropriate. This is always optional.
cannam@3 247 */
cannam@9 248 std::vector<std::string> binNames;
cannam@3 249
cannam@3 250 /**
cannam@9 251 * True if the results in each output bin fall within a fixed
cannam@9 252 * numeric range (minimum and maximum values). Undefined if
cannam@9 253 * binCount is zero.
cannam@3 254 */
cannam@3 255 bool hasKnownExtents;
cannam@3 256
cannam@3 257 /**
cannam@3 258 * Minimum value of the results in the output. Undefined if
cannam@9 259 * hasKnownExtents is false or binCount is zero.
cannam@3 260 */
cannam@3 261 float minValue;
cannam@3 262
cannam@3 263 /**
cannam@3 264 * Maximum value of the results in the output. Undefined if
cannam@9 265 * hasKnownExtents is false or binCount is zero.
cannam@3 266 */
cannam@3 267 float maxValue;
cannam@3 268
cannam@3 269 /**
cannam@3 270 * True if the output values are quantized to a particular
cannam@9 271 * resolution. Undefined if binCount is zero.
cannam@3 272 */
cannam@3 273 bool isQuantized;
cannam@3 274
cannam@3 275 /**
cannam@3 276 * Quantization resolution of the output values (e.g. 1.0 if
cannam@3 277 * they are all integers). Undefined if isQuantized is false
cannam@9 278 * or binCount is zero.
cannam@3 279 */
cannam@3 280 float quantizeStep;
cannam@3 281
cannam@3 282 enum SampleType {
cannam@3 283
cannam@3 284 /// Results from each process() align with that call's block start
cannam@3 285 OneSamplePerStep,
cannam@3 286
cannam@3 287 /// Results are evenly spaced in time (sampleRate specified below)
cannam@3 288 FixedSampleRate,
cannam@3 289
cannam@3 290 /// Results are unevenly spaced and have individual timestamps
cannam@3 291 VariableSampleRate
cannam@3 292 };
cannam@3 293
cannam@3 294 /**
cannam@3 295 * Positioning in time of the output results.
cannam@3 296 */
cannam@3 297 SampleType sampleType;
cannam@3 298
cannam@3 299 /**
cannam@17 300 * Sample rate of the output results, as samples per second.
cannam@17 301 * Undefined if sampleType is OneSamplePerStep.
cannam@3 302 *
cannam@3 303 * If sampleType is VariableSampleRate and this value is
cannam@3 304 * non-zero, then it may be used to calculate a resolution for
cannam@17 305 * the output (i.e. the "duration" of each sample, in time,
cannam@17 306 * will be 1/sampleRate seconds). It's recommended to set
cannam@17 307 * this to zero if that behaviour is not desired.
cannam@3 308 */
cannam@3 309 float sampleRate;
cannam@167 310
cannam@192 311 /**
cannam@192 312 * True if the returned results for this output are known to
cannam@192 313 * have a duration field.
cannam@192 314 */
cannam@192 315 bool hasDuration;
cannam@192 316
cannam@167 317 OutputDescriptor() : // defaults for mandatory non-class-type members
cannam@167 318 hasFixedBinCount(false), hasKnownExtents(false), isQuantized(false),
cannam@192 319 sampleType(OneSamplePerStep), hasDuration(false) { }
cannam@3 320 };
cannam@3 321
cannam@3 322 typedef std::vector<OutputDescriptor> OutputList;
cannam@3 323
cannam@3 324 /**
cannam@3 325 * Get the outputs of this plugin. An output's index in this list
cannam@3 326 * is used as its numeric index when looking it up in the
cannam@3 327 * FeatureSet returned from the process() call.
cannam@3 328 */
cannam@3 329 virtual OutputList getOutputDescriptors() const = 0;
cannam@3 330
cannam@3 331 struct Feature
cannam@3 332 {
cannam@3 333 /**
cannam@3 334 * True if an output feature has its own timestamp. This is
cannam@167 335 * mandatory if the output has VariableSampleRate, optional if
cannam@167 336 * the output has FixedSampleRate, and unused if the output
cannam@167 337 * has OneSamplePerStep.
cannam@3 338 */
cannam@3 339 bool hasTimestamp;
cannam@3 340
cannam@3 341 /**
cannam@3 342 * Timestamp of the output feature. This is mandatory if the
cannam@167 343 * output has VariableSampleRate or if the output has
cannam@167 344 * FixedSampleRate and hasTimestamp is true, and unused
cannam@167 345 * otherwise.
cannam@3 346 */
cannam@3 347 RealTime timestamp;
cannam@167 348
cannam@167 349 /**
cannam@167 350 * True if an output feature has a specified duration. This
cannam@167 351 * is optional if the output has VariableSampleRate or
cannam@167 352 * FixedSampleRate, and and unused if the output has
cannam@167 353 * OneSamplePerStep.
cannam@167 354 */
cannam@167 355 bool hasDuration;
cannam@167 356
cannam@167 357 /**
cannam@167 358 * Duration of the output feature. This is mandatory if the
cannam@167 359 * output has VariableSampleRate or FixedSampleRate and
cannam@167 360 * hasDuration is true, and unused otherwise.
cannam@167 361 */
cannam@167 362 RealTime duration;
cannam@3 363
cannam@3 364 /**
cannam@3 365 * Results for a single sample of this feature. If the output
cannam@9 366 * hasFixedBinCount, there must be the same number of values
cannam@9 367 * as the output's binCount count.
cannam@3 368 */
cannam@3 369 std::vector<float> values;
cannam@3 370
cannam@3 371 /**
cannam@3 372 * Label for the sample of this feature.
cannam@3 373 */
cannam@3 374 std::string label;
cannam@167 375
cannam@167 376 Feature() : // defaults for mandatory non-class-type members
cannam@167 377 hasTimestamp(false), hasDuration(false) { }
cannam@3 378 };
cannam@3 379
cannam@3 380 typedef std::vector<Feature> FeatureList;
cannam@167 381
cannam@3 382 typedef std::map<int, FeatureList> FeatureSet; // key is output no
cannam@3 383
cannam@3 384 /**
cannam@3 385 * Process a single block of input data.
cannam@3 386 *
cannam@3 387 * If the plugin's inputDomain is TimeDomain, inputBuffers will
cannam@3 388 * point to one array of floats per input channel, and each of
cannam@3 389 * these arrays will contain blockSize consecutive audio samples
cannam@190 390 * (the host will zero-pad as necessary). The timestamp in this
cannam@190 391 * case will be the real time in seconds of the start of the
cannam@190 392 * supplied block of samples.
cannam@3 393 *
cannam@3 394 * If the plugin's inputDomain is FrequencyDomain, inputBuffers
cannam@3 395 * will point to one array of floats per input channel, and each
cannam@47 396 * of these arrays will contain blockSize/2+1 consecutive pairs of
cannam@3 397 * real and imaginary component floats corresponding to bins
cannam@78 398 * 0..(blockSize/2) of the FFT output. That is, bin 0 (the first
cannam@78 399 * pair of floats) contains the DC output, up to bin blockSize/2
cannam@78 400 * which contains the Nyquist-frequency output. There will
cannam@78 401 * therefore be blockSize+2 floats per channel in total. The
cannam@78 402 * timestamp will be the real time in seconds of the centre of the
cannam@78 403 * FFT input window (i.e. the very first block passed to process
cannam@78 404 * might contain the FFT of half a block of zero samples and the
cannam@78 405 * first half-block of the actual data, with a timestamp of zero).
cannam@3 406 *
cannam@3 407 * Return any features that have become available after this
cannam@3 408 * process call. (These do not necessarily have to fall within
cannam@3 409 * the process block, except for OneSamplePerStep outputs.)
cannam@3 410 */
cannam@47 411 virtual FeatureSet process(const float *const *inputBuffers,
cannam@3 412 RealTime timestamp) = 0;
cannam@3 413
cannam@3 414 /**
cannam@3 415 * After all blocks have been processed, calculate and return any
cannam@3 416 * remaining features derived from the complete input.
cannam@3 417 */
cannam@3 418 virtual FeatureSet getRemainingFeatures() = 0;
cannam@3 419
cannam@53 420 /**
cannam@53 421 * Used to distinguish between Vamp::Plugin and other potential
cannam@64 422 * sibling subclasses of PluginBase. Do not reimplement this
cannam@53 423 * function in your subclass.
cannam@53 424 */
cannam@3 425 virtual std::string getType() const { return "Feature Extraction Plugin"; }
cannam@3 426
cannam@3 427 protected:
cannam@3 428 Plugin(float inputSampleRate) :
cannam@3 429 m_inputSampleRate(inputSampleRate) { }
cannam@3 430
cannam@3 431 float m_inputSampleRate;
cannam@3 432 };
cannam@3 433
cannam@3 434 }
cannam@3 435
cannam@233 436 _VAMP_SDK_PLUGSPACE_END(Plugin.h)
cannam@230 437
cannam@3 438 #endif
cannam@3 439
cannam@3 440
cannam@3 441