annotate src/vamp-plugin-sdk-2.5/vamp-sdk/Plugin.h @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 619f715526df
children
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@23 2
Chris@23 3 /*
Chris@23 4 Vamp
Chris@23 5
Chris@23 6 An API for audio analysis and feature extraction plugins.
Chris@23 7
Chris@23 8 Centre for Digital Music, Queen Mary, University of London.
Chris@23 9 Copyright 2006 Chris Cannam.
Chris@23 10
Chris@23 11 Permission is hereby granted, free of charge, to any person
Chris@23 12 obtaining a copy of this software and associated documentation
Chris@23 13 files (the "Software"), to deal in the Software without
Chris@23 14 restriction, including without limitation the rights to use, copy,
Chris@23 15 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@23 16 of the Software, and to permit persons to whom the Software is
Chris@23 17 furnished to do so, subject to the following conditions:
Chris@23 18
Chris@23 19 The above copyright notice and this permission notice shall be
Chris@23 20 included in all copies or substantial portions of the Software.
Chris@23 21
Chris@23 22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@23 23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@23 24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@23 25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@23 26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@23 27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@23 28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@23 29
Chris@23 30 Except as contained in this notice, the names of the Centre for
Chris@23 31 Digital Music; Queen Mary, University of London; and Chris Cannam
Chris@23 32 shall not be used in advertising or otherwise to promote the sale,
Chris@23 33 use or other dealings in this Software without prior written
Chris@23 34 authorization.
Chris@23 35 */
Chris@23 36
Chris@23 37 #ifndef _VAMP_SDK_PLUGIN_H_
Chris@23 38 #define _VAMP_SDK_PLUGIN_H_
Chris@23 39
Chris@23 40 #include <string>
Chris@23 41 #include <vector>
Chris@23 42 #include <map>
Chris@23 43
Chris@23 44 #include "PluginBase.h"
Chris@23 45 #include "RealTime.h"
Chris@23 46
Chris@23 47 #include "plugguard.h"
Chris@23 48 _VAMP_SDK_PLUGSPACE_BEGIN(Plugin.h)
Chris@23 49
Chris@23 50 namespace Vamp {
Chris@23 51
Chris@23 52 /**
Chris@23 53 * \class Plugin Plugin.h <vamp-sdk/Plugin.h>
Chris@23 54 *
Chris@23 55 * Vamp::Plugin is a base class for plugin instance classes
Chris@23 56 * that provide feature extraction from audio or related data.
Chris@23 57 *
Chris@23 58 * In most cases, the input will be audio and the output will be a
Chris@23 59 * stream of derived data at a lower sampling resolution than the
Chris@23 60 * input.
Chris@23 61 *
Chris@23 62 * Note that this class inherits several abstract methods from
Chris@23 63 * PluginBase. These must be implemented by the subclass.
Chris@23 64 *
Chris@23 65 *
Chris@23 66 * PLUGIN LIFECYCLE
Chris@23 67 *
Chris@23 68 * Feature extraction plugins are managed differently from real-time
Chris@23 69 * plugins (such as VST effects). The main difference is that the
Chris@23 70 * parameters for a feature extraction plugin are configured before
Chris@23 71 * the plugin is used, and do not change during use.
Chris@23 72 *
Chris@23 73 * 1. Host constructs the plugin, passing it the input sample rate.
Chris@23 74 * The plugin may do basic initialisation, but should not do anything
Chris@23 75 * computationally expensive at this point. You must make sure your
Chris@23 76 * plugin is cheap to construct, otherwise you'll seriously affect the
Chris@23 77 * startup performance of almost all hosts. If you have serious
Chris@23 78 * initialisation to do, the proper place is in initialise() (step 5).
Chris@23 79 *
Chris@23 80 * 2. Host may query the plugin's available outputs.
Chris@23 81 *
Chris@23 82 * 3. Host queries programs and parameter descriptors, and may set
Chris@23 83 * some or all of them. Parameters that are not explicitly set should
Chris@23 84 * take their default values as specified in the parameter descriptor.
Chris@23 85 * When a program is set, the parameter values may change and the host
Chris@23 86 * will re-query them to check.
Chris@23 87 *
Chris@23 88 * 4. Host queries the preferred step size, block size and number of
Chris@23 89 * channels. These may all vary depending on the parameter values.
Chris@23 90 * (Note however that you cannot make the number of distinct outputs
Chris@23 91 * dependent on parameter values.)
Chris@23 92 *
Chris@23 93 * 5. Plugin is properly initialised with a call to initialise. This
Chris@23 94 * fixes the step size, block size, and number of channels, as well as
Chris@23 95 * all of the parameter and program settings. If the values passed in
Chris@23 96 * to initialise do not match the plugin's advertised preferred values
Chris@23 97 * from step 4, the plugin may refuse to initialise and return false
Chris@23 98 * (although if possible it should accept the new values). Any
Chris@23 99 * computationally expensive setup code should take place here.
Chris@23 100 *
Chris@23 101 * 6. Host finally checks the number of values, resolution, extents
Chris@23 102 * etc per output (which may vary depending on the number of channels,
Chris@23 103 * step size and block size as well as the parameter values).
Chris@23 104 *
Chris@23 105 * 7. Host will repeatedly call the process method to pass in blocks
Chris@23 106 * of input data. This method may return features extracted from that
Chris@23 107 * data (if the plugin is causal).
Chris@23 108 *
Chris@23 109 * 8. Host will call getRemainingFeatures exactly once, after all the
Chris@23 110 * input data has been processed. This may return any non-causal or
Chris@23 111 * leftover features.
Chris@23 112 *
Chris@23 113 * 9. At any point after initialise was called, the host may
Chris@23 114 * optionally call the reset method and restart processing. (This
Chris@23 115 * does not mean it can change the parameters, which are fixed from
Chris@23 116 * initialise until destruction.)
Chris@23 117 *
Chris@23 118 * A plugin does not need to handle the case where setParameter or
Chris@23 119 * selectProgram is called after initialise has been called. It's the
Chris@23 120 * host's responsibility not to do that. Similarly, the plugin may
Chris@23 121 * safely assume that initialise is called no more than once.
Chris@23 122 */
Chris@23 123
Chris@23 124 class Plugin : public PluginBase
Chris@23 125 {
Chris@23 126 public:
Chris@23 127 virtual ~Plugin() { }
Chris@23 128
Chris@23 129 /**
Chris@23 130 * Initialise a plugin to prepare it for use with the given number
Chris@23 131 * of input channels, step size (window increment, in sample
Chris@23 132 * frames) and block size (window size, in sample frames).
Chris@23 133 *
Chris@23 134 * The input sample rate should have been already specified at
Chris@23 135 * construction time.
Chris@23 136 *
Chris@23 137 * Return true for successful initialisation, false if the number
Chris@23 138 * of input channels, step size and/or block size cannot be
Chris@23 139 * supported.
Chris@23 140 */
Chris@23 141 virtual bool initialise(size_t inputChannels,
Chris@23 142 size_t stepSize,
Chris@23 143 size_t blockSize) = 0;
Chris@23 144
Chris@23 145 /**
Chris@23 146 * Reset the plugin after use, to prepare it for another clean
Chris@23 147 * run. Not called for the first initialisation (i.e. initialise
Chris@23 148 * must also do a reset).
Chris@23 149 */
Chris@23 150 virtual void reset() = 0;
Chris@23 151
Chris@23 152 enum InputDomain { TimeDomain, FrequencyDomain };
Chris@23 153
Chris@23 154 /**
Chris@23 155 * Get the plugin's required input domain.
Chris@23 156 *
Chris@23 157 * If this is TimeDomain, the samples provided to the process()
Chris@23 158 * function (below) will be in the time domain, as for a
Chris@23 159 * traditional audio processing plugin.
Chris@23 160 *
Chris@23 161 * If this is FrequencyDomain, the host will carry out a windowed
Chris@23 162 * FFT of size equal to the negotiated block size on the data
Chris@23 163 * before passing the frequency bin data in to process(). The
Chris@23 164 * input data for the FFT will be rotated so as to place the
Chris@23 165 * origin in the centre of the block.
Chris@23 166 * The plugin does not get to choose the window type -- the host
Chris@23 167 * will either let the user do so, or will use a Hanning window.
Chris@23 168 */
Chris@23 169 virtual InputDomain getInputDomain() const = 0;
Chris@23 170
Chris@23 171 /**
Chris@23 172 * Get the preferred block size (window size -- the number of
Chris@23 173 * sample frames passed in each block to the process() function).
Chris@23 174 * This should be called before initialise().
Chris@23 175 *
Chris@23 176 * A plugin that can handle any block size may return 0. The
Chris@23 177 * final block size will be set in the initialise() call.
Chris@23 178 */
Chris@23 179 virtual size_t getPreferredBlockSize() const { return 0; }
Chris@23 180
Chris@23 181 /**
Chris@23 182 * Get the preferred step size (window increment -- the distance
Chris@23 183 * in sample frames between the start frames of consecutive blocks
Chris@23 184 * passed to the process() function) for the plugin. This should
Chris@23 185 * be called before initialise().
Chris@23 186 *
Chris@23 187 * A plugin may return 0 if it has no particular interest in the
Chris@23 188 * step size. In this case, the host should make the step size
Chris@23 189 * equal to the block size if the plugin is accepting input in the
Chris@23 190 * time domain. If the plugin is accepting input in the frequency
Chris@23 191 * domain, the host may use any step size. The final step size
Chris@23 192 * will be set in the initialise() call.
Chris@23 193 */
Chris@23 194 virtual size_t getPreferredStepSize() const { return 0; }
Chris@23 195
Chris@23 196 /**
Chris@23 197 * Get the minimum supported number of input channels.
Chris@23 198 */
Chris@23 199 virtual size_t getMinChannelCount() const { return 1; }
Chris@23 200
Chris@23 201 /**
Chris@23 202 * Get the maximum supported number of input channels.
Chris@23 203 */
Chris@23 204 virtual size_t getMaxChannelCount() const { return 1; }
Chris@23 205
Chris@23 206 struct OutputDescriptor
Chris@23 207 {
Chris@23 208 /**
Chris@23 209 * The name of the output, in computer-usable form. Should be
Chris@23 210 * reasonably short and without whitespace or punctuation, using
Chris@23 211 * the characters [a-zA-Z0-9_-] only.
Chris@23 212 * Example: "zero_crossing_count"
Chris@23 213 */
Chris@23 214 std::string identifier;
Chris@23 215
Chris@23 216 /**
Chris@23 217 * The human-readable name of the output.
Chris@23 218 * Example: "Zero Crossing Counts"
Chris@23 219 */
Chris@23 220 std::string name;
Chris@23 221
Chris@23 222 /**
Chris@23 223 * A human-readable short text describing the output. May be
Chris@23 224 * empty if the name has said it all already.
Chris@23 225 * Example: "The number of zero crossing points per processing block"
Chris@23 226 */
Chris@23 227 std::string description;
Chris@23 228
Chris@23 229 /**
Chris@23 230 * The unit of the output, in human-readable form.
Chris@23 231 */
Chris@23 232 std::string unit;
Chris@23 233
Chris@23 234 /**
Chris@23 235 * True if the output has the same number of values per sample
Chris@23 236 * for every output sample. Outputs for which this is false
Chris@23 237 * are unlikely to be very useful in a general-purpose host.
Chris@23 238 */
Chris@23 239 bool hasFixedBinCount;
Chris@23 240
Chris@23 241 /**
Chris@23 242 * The number of values per result of the output. Undefined
Chris@23 243 * if hasFixedBinCount is false. If this is zero, the output
Chris@23 244 * is point data (i.e. only the time of each output is of
Chris@23 245 * interest, the value list will be empty).
Chris@23 246 */
Chris@23 247 size_t binCount;
Chris@23 248
Chris@23 249 /**
Chris@23 250 * The (human-readable) names of each of the bins, if
Chris@23 251 * appropriate. This is always optional.
Chris@23 252 */
Chris@23 253 std::vector<std::string> binNames;
Chris@23 254
Chris@23 255 /**
Chris@23 256 * True if the results in each output bin fall within a fixed
Chris@23 257 * numeric range (minimum and maximum values). Undefined if
Chris@23 258 * binCount is zero.
Chris@23 259 */
Chris@23 260 bool hasKnownExtents;
Chris@23 261
Chris@23 262 /**
Chris@23 263 * Minimum value of the results in the output. Undefined if
Chris@23 264 * hasKnownExtents is false or binCount is zero.
Chris@23 265 */
Chris@23 266 float minValue;
Chris@23 267
Chris@23 268 /**
Chris@23 269 * Maximum value of the results in the output. Undefined if
Chris@23 270 * hasKnownExtents is false or binCount is zero.
Chris@23 271 */
Chris@23 272 float maxValue;
Chris@23 273
Chris@23 274 /**
Chris@23 275 * True if the output values are quantized to a particular
Chris@23 276 * resolution. Undefined if binCount is zero.
Chris@23 277 */
Chris@23 278 bool isQuantized;
Chris@23 279
Chris@23 280 /**
Chris@23 281 * Quantization resolution of the output values (e.g. 1.0 if
Chris@23 282 * they are all integers). Undefined if isQuantized is false
Chris@23 283 * or binCount is zero.
Chris@23 284 */
Chris@23 285 float quantizeStep;
Chris@23 286
Chris@23 287 enum SampleType {
Chris@23 288
Chris@23 289 /// Results from each process() align with that call's block start
Chris@23 290 OneSamplePerStep,
Chris@23 291
Chris@23 292 /// Results are evenly spaced in time (sampleRate specified below)
Chris@23 293 FixedSampleRate,
Chris@23 294
Chris@23 295 /// Results are unevenly spaced and have individual timestamps
Chris@23 296 VariableSampleRate
Chris@23 297 };
Chris@23 298
Chris@23 299 /**
Chris@23 300 * Positioning in time of the output results.
Chris@23 301 */
Chris@23 302 SampleType sampleType;
Chris@23 303
Chris@23 304 /**
Chris@23 305 * Sample rate of the output results, as samples per second.
Chris@23 306 * Undefined if sampleType is OneSamplePerStep.
Chris@23 307 *
Chris@23 308 * If sampleType is VariableSampleRate and this value is
Chris@23 309 * non-zero, then it may be used to calculate a resolution for
Chris@23 310 * the output (i.e. the "duration" of each sample, in time,
Chris@23 311 * will be 1/sampleRate seconds). It's recommended to set
Chris@23 312 * this to zero if that behaviour is not desired.
Chris@23 313 */
Chris@23 314 float sampleRate;
Chris@23 315
Chris@23 316 /**
Chris@23 317 * True if the returned results for this output are known to
Chris@23 318 * have a duration field.
Chris@23 319 */
Chris@23 320 bool hasDuration;
Chris@23 321
Chris@23 322 OutputDescriptor() : // defaults for mandatory non-class-type members
Chris@23 323 hasFixedBinCount(false), hasKnownExtents(false), isQuantized(false),
Chris@23 324 sampleType(OneSamplePerStep), sampleRate(0), hasDuration(false) { }
Chris@23 325 };
Chris@23 326
Chris@23 327 typedef std::vector<OutputDescriptor> OutputList;
Chris@23 328
Chris@23 329 /**
Chris@23 330 * Get the outputs of this plugin. An output's index in this list
Chris@23 331 * is used as its numeric index when looking it up in the
Chris@23 332 * FeatureSet returned from the process() call.
Chris@23 333 */
Chris@23 334 virtual OutputList getOutputDescriptors() const = 0;
Chris@23 335
Chris@23 336 struct Feature
Chris@23 337 {
Chris@23 338 /**
Chris@23 339 * True if an output feature has its own timestamp. This is
Chris@23 340 * mandatory if the output has VariableSampleRate, optional if
Chris@23 341 * the output has FixedSampleRate, and unused if the output
Chris@23 342 * has OneSamplePerStep.
Chris@23 343 */
Chris@23 344 bool hasTimestamp;
Chris@23 345
Chris@23 346 /**
Chris@23 347 * Timestamp of the output feature. This is mandatory if the
Chris@23 348 * output has VariableSampleRate or if the output has
Chris@23 349 * FixedSampleRate and hasTimestamp is true, and unused
Chris@23 350 * otherwise.
Chris@23 351 */
Chris@23 352 RealTime timestamp;
Chris@23 353
Chris@23 354 /**
Chris@23 355 * True if an output feature has a specified duration. This
Chris@23 356 * is optional if the output has VariableSampleRate or
Chris@23 357 * FixedSampleRate, and and unused if the output has
Chris@23 358 * OneSamplePerStep.
Chris@23 359 */
Chris@23 360 bool hasDuration;
Chris@23 361
Chris@23 362 /**
Chris@23 363 * Duration of the output feature. This is mandatory if the
Chris@23 364 * output has VariableSampleRate or FixedSampleRate and
Chris@23 365 * hasDuration is true, and unused otherwise.
Chris@23 366 */
Chris@23 367 RealTime duration;
Chris@23 368
Chris@23 369 /**
Chris@23 370 * Results for a single sample of this feature. If the output
Chris@23 371 * hasFixedBinCount, there must be the same number of values
Chris@23 372 * as the output's binCount count.
Chris@23 373 */
Chris@23 374 std::vector<float> values;
Chris@23 375
Chris@23 376 /**
Chris@23 377 * Label for the sample of this feature.
Chris@23 378 */
Chris@23 379 std::string label;
Chris@23 380
Chris@23 381 Feature() : // defaults for mandatory non-class-type members
Chris@23 382 hasTimestamp(false), hasDuration(false) { }
Chris@23 383 };
Chris@23 384
Chris@23 385 typedef std::vector<Feature> FeatureList;
Chris@23 386
Chris@23 387 typedef std::map<int, FeatureList> FeatureSet; // key is output no
Chris@23 388
Chris@23 389 /**
Chris@23 390 * Process a single block of input data.
Chris@23 391 *
Chris@23 392 * If the plugin's inputDomain is TimeDomain, inputBuffers will
Chris@23 393 * point to one array of floats per input channel, and each of
Chris@23 394 * these arrays will contain blockSize consecutive audio samples
Chris@23 395 * (the host will zero-pad as necessary). The timestamp in this
Chris@23 396 * case will be the real time in seconds of the start of the
Chris@23 397 * supplied block of samples.
Chris@23 398 *
Chris@23 399 * If the plugin's inputDomain is FrequencyDomain, inputBuffers
Chris@23 400 * will point to one array of floats per input channel, and each
Chris@23 401 * of these arrays will contain blockSize/2+1 consecutive pairs of
Chris@23 402 * real and imaginary component floats corresponding to bins
Chris@23 403 * 0..(blockSize/2) of the FFT output. That is, bin 0 (the first
Chris@23 404 * pair of floats) contains the DC output, up to bin blockSize/2
Chris@23 405 * which contains the Nyquist-frequency output. There will
Chris@23 406 * therefore be blockSize+2 floats per channel in total. The
Chris@23 407 * timestamp will be the real time in seconds of the centre of the
Chris@23 408 * FFT input window (i.e. the very first block passed to process
Chris@23 409 * might contain the FFT of half a block of zero samples and the
Chris@23 410 * first half-block of the actual data, with a timestamp of zero).
Chris@23 411 *
Chris@23 412 * Return any features that have become available after this
Chris@23 413 * process call. (These do not necessarily have to fall within
Chris@23 414 * the process block, except for OneSamplePerStep outputs.)
Chris@23 415 */
Chris@23 416 virtual FeatureSet process(const float *const *inputBuffers,
Chris@23 417 RealTime timestamp) = 0;
Chris@23 418
Chris@23 419 /**
Chris@23 420 * After all blocks have been processed, calculate and return any
Chris@23 421 * remaining features derived from the complete input.
Chris@23 422 */
Chris@23 423 virtual FeatureSet getRemainingFeatures() = 0;
Chris@23 424
Chris@23 425 /**
Chris@23 426 * Used to distinguish between Vamp::Plugin and other potential
Chris@23 427 * sibling subclasses of PluginBase. Do not reimplement this
Chris@23 428 * function in your subclass.
Chris@23 429 */
Chris@23 430 virtual std::string getType() const { return "Feature Extraction Plugin"; }
Chris@23 431
Chris@23 432 protected:
Chris@23 433 Plugin(float inputSampleRate) :
Chris@23 434 m_inputSampleRate(inputSampleRate) { }
Chris@23 435
Chris@23 436 float m_inputSampleRate;
Chris@23 437 };
Chris@23 438
Chris@23 439 }
Chris@23 440
Chris@23 441 _VAMP_SDK_PLUGSPACE_END(Plugin.h)
Chris@23 442
Chris@23 443 #endif
Chris@23 444
Chris@23 445
Chris@23 446