comparison sdk/Plugin.h @ 0:6479539d1b32

* Importing first cut of Sonic Visualiser's Vamp plugin format SDK
author cannam
date Fri, 31 Mar 2006 14:21:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6479539d1b32
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Vamp
5
6 An API for audio analysis and feature extraction plugins.
7
8 Centre for Digital Music, Queen Mary, University of London.
9 Copyright 2006 Chris Cannam.
10
11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use, copy,
15 modify, merge, publish, distribute, sublicense, and/or sell copies
16 of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be
20 included in all copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29
30 Except as contained in this notice, the names of the Centre for
31 Digital Music; Queen Mary, University of London; and Chris Cannam
32 shall not be used in advertising or otherwise to promote the sale,
33 use or other dealings in this Software without prior written
34 authorization.
35 */
36
37 #ifndef _VAMP_PLUGIN_H_
38 #define _VAMP_PLUGIN_H_
39
40 #include "PluginBase.h"
41 #include "RealTime.h"
42
43 #include <string>
44 #include <vector>
45 #include <map>
46
47 namespace Vamp {
48
49 /**
50 * Vamp::Plugin is a base class for plugin instance classes
51 * that provide feature extraction from audio or related data.
52 *
53 * In most cases, the input will be audio and the output will be a
54 * stream of derived data at a lower sampling resolution than the
55 * input.
56 *
57 * Note that this class inherits several abstract methods from
58 * PluginBase, that must be implemented by the subclass.
59 */
60
61 /**
62 * Plugin Lifecycle
63 * ================
64 *
65 * Feature extraction plugins are managed differently from real-time
66 * plugins (such as VST effects). The main difference is that the
67 * parameters for a feature extraction plugin are configured before
68 * the plugin is used, and do not change during use.
69 *
70 * 1. Host constructs the plugin, passing it the input sample rate.
71 * The plugin may do basic initialisation, but should not do anything
72 * computationally expensive at this point.
73 *
74 * 2. Host may query the plugin's available outputs.
75 *
76 * 3. Host queries programs and parameter descriptors, and may set
77 * some or all of them. Parameters that are not explicitly set should
78 * take their default values as specified in the parameter descriptor.
79 * When a program is set, the parameter values may change and the host
80 * will re-query them to check.
81 *
82 * 4. Host queries the preferred step size, block size, number of
83 * channels, and the number of values per feature for the plugin's
84 * outputs. These may all vary depending on the parameter values.
85 * (Note however that you cannot make the number of distinct outputs
86 * dependent on parameter values; nor can you make any of these depend
87 * on the number of input channels.)
88 *
89 * 5. Plugin is properly initialised with a call to initialise. This
90 * fixes the step size, block size, and number of channels, as well as
91 * all of the parameter and program settings. If the values passed in
92 * to initialise do not match the plugin's advertised preferred values
93 * from step 4, the plugin may refuse to initialise and return false
94 * (although if possible it should accept the new values).
95 *
96 * 6. Host will repeatedly call the process method to pass in blocks
97 * of input data. This method may return features extracted from that
98 * data (if the plugin is causal).
99 *
100 * 7. Host will call getRemainingFeatures exactly once, after all the
101 * input data has been processed. This may return any non-causal or
102 * leftover features.
103 *
104 * 8. At any point after initialise was called, the host may
105 * optionally call the reset method and restart processing. (This
106 * does not mean it can change the parameters, which are fixed from
107 * initialise until destruction.)
108 *
109 * A plugin does not need to handle the case where setParameter or
110 * selectProgram is called after initialise has been called. It's the
111 * host's responsibility not to do that.
112 */
113
114 class Plugin : public PluginBase
115 {
116 public:
117 /**
118 * Initialise a plugin to prepare it for use with the given number
119 * of input channels, step size (window increment, in sample
120 * frames) and block size (window size, in sample frames).
121 *
122 * The input sample rate should have been already specified at
123 * construction time.
124 *
125 * Return true for successful initialisation, false if the number
126 * of input channels, step size and/or block size cannot be
127 * supported.
128 */
129 virtual bool initialise(size_t inputChannels,
130 size_t stepSize,
131 size_t blockSize) = 0;
132
133 /**
134 * Reset the plugin after use, to prepare it for another clean
135 * run. Not called for the first initialisation (i.e. initialise
136 * must also do a reset).
137 */
138 virtual void reset() = 0;
139
140 enum InputDomain { TimeDomain, FrequencyDomain };
141
142 /**
143 * Get the plugin's required input domain. If this is TimeDomain,
144 * the samples provided to the process() function (below) will be
145 * in the time domain, as for a traditional audio processing
146 * plugin. If this is FrequencyDomain, the host will carry out a
147 * windowed FFT of size equal to the negotiated block size on the
148 * data before passing the frequency bin data in to process().
149 * The plugin does not get to choose the window type -- the host
150 * will either let the user do so, or will use a Hanning window.
151 */
152 virtual InputDomain getInputDomain() const = 0;
153
154 /**
155 * Get the preferred step size (window increment -- the distance
156 * in sample frames between the start frames of consecutive blocks
157 * passed to the process() function) for the plugin. This should
158 * be called before initialise().
159 */
160 virtual size_t getPreferredStepSize() const = 0;
161
162 /**
163 * Get the preferred block size (window size -- the number of
164 * sample frames passed in each block to the process() function).
165 * This should be called before initialise().
166 */
167 virtual size_t getPreferredBlockSize() const { return getPreferredStepSize(); }
168
169 /**
170 * Get the minimum supported number of input channels.
171 */
172 virtual size_t getMinChannelCount() const { return 1; }
173
174 /**
175 * Get the maximum supported number of input channels.
176 */
177 virtual size_t getMaxChannelCount() const { return 1; }
178
179 struct OutputDescriptor
180 {
181 /**
182 * The name of the output, in computer-usable form. Should be
183 * reasonably short and without whitespace or punctuation.
184 */
185 std::string name;
186
187 /**
188 * The human-readable name of the output.
189 */
190 std::string description;
191
192 /**
193 * The unit of the output, in human-readable form.
194 */
195 std::string unit;
196
197 /**
198 * True if the output has the same number of values per result
199 * for every output result. Outputs for which this is false
200 * are unlikely to be very useful in a general-purpose host.
201 */
202 bool hasFixedValueCount;
203
204 /**
205 * The number of values per result of the output. Undefined
206 * if hasFixedValueCount is false. If this is zero, the output
207 * is point data (i.e. only the time of each output is of
208 * interest, the value list will be empty).
209 *
210 * Note that this gives the number of values of a single
211 * output result, not of the output stream (which has one more
212 * dimension: time).
213 */
214 size_t valueCount;
215
216 /**
217 * The names of each of the values, if appropriate. This is
218 * always optional.
219 */
220 std::vector<std::string> valueNames;
221
222 /**
223 * True if the results in the output have a fixed numeric
224 * range (minimum and maximum values). Undefined if
225 * valueCount is zero.
226 */
227 bool hasKnownExtents;
228
229 /**
230 * Minimum value of the results in the output. Undefined if
231 * hasKnownExtents is false or valueCount is zero.
232 */
233 float minValue;
234
235 /**
236 * Maximum value of the results in the output. Undefined if
237 * hasKnownExtents is false or valueCount is zero.
238 */
239 float maxValue;
240
241 /**
242 * True if the output values are quantized to a particular
243 * resolution. Undefined if valueCount is zero.
244 */
245 bool isQuantized;
246
247 /**
248 * Quantization resolution of the output values (e.g. 1.0 if
249 * they are all integers). Undefined if isQuantized is false
250 * or valueCount is zero.
251 */
252 float quantizeStep;
253
254 enum SampleType {
255
256 /// Results from each process() align with that call's block start
257 OneSamplePerStep,
258
259 /// Results are evenly spaced in time (sampleRate specified below)
260 FixedSampleRate,
261
262 /// Results are unevenly spaced and have individual timestamps
263 VariableSampleRate
264 };
265
266 /**
267 * Positioning in time of the output results.
268 */
269 SampleType sampleType;
270
271 /**
272 * Sample rate of the output results. Undefined if sampleType
273 * is OneSamplePerStep.
274 *
275 * If sampleType is VariableSampleRate and this value is
276 * non-zero, then it may be used to calculate a resolution for
277 * the output (i.e. the "duration" of each value, in time).
278 * It's recommended to set this to zero if that behaviour is
279 * not desired.
280 */
281 float sampleRate;
282 };
283
284 typedef std::vector<OutputDescriptor> OutputList;
285
286 /**
287 * Get the outputs of this plugin. An output's index in this list
288 * is used as its numeric index when looking it up in the
289 * FeatureSet returned from the process() call.
290 */
291 virtual OutputList getOutputDescriptors() const = 0;
292
293 struct Feature
294 {
295 /**
296 * True if an output feature has its own timestamp. This is
297 * mandatory if the output has VariableSampleRate, and is
298 * likely to be disregarded otherwise.
299 */
300 bool hasTimestamp;
301
302 /**
303 * Timestamp of the output feature. This is mandatory if the
304 * output has VariableSampleRate, and is likely to be
305 * disregarded otherwise. Undefined if hasTimestamp is false.
306 */
307 RealTime timestamp;
308
309 /**
310 * Results for a single sample of this feature. If the output
311 * hasFixedValueCount, there must be the same number of values
312 * as the output's valueCount count.
313 */
314 std::vector<float> values;
315
316 /**
317 * Label for the sample of this feature.
318 */
319 std::string label;
320 };
321
322 typedef std::vector<Feature> FeatureList;
323 typedef std::map<int, FeatureList> FeatureSet; // key is output no
324
325 /**
326 * Process a single block of input data.
327 *
328 * If the plugin's inputDomain is TimeDomain, inputBuffers will
329 * point to one array of floats per input channel, and each of
330 * these arrays will contain blockSize consecutive audio samples
331 * (the host will zero-pad as necessary).
332 *
333 * If the plugin's inputDomain is FrequencyDomain, inputBuffers
334 * will point to one array of floats per input channel, and each
335 * of these arrays will contain blockSize/2 consecutive pairs of
336 * real and imaginary component floats corresponding to bins
337 * 0..(blockSize/2-1) of the FFT output.
338 *
339 * The timestamp is the real time in seconds of the start of the
340 * supplied block of samples.
341 *
342 * Return any features that have become available after this
343 * process call. (These do not necessarily have to fall within
344 * the process block, except for OneSamplePerStep outputs.)
345 */
346 virtual FeatureSet process(float **inputBuffers,
347 RealTime timestamp) = 0;
348
349 /**
350 * After all blocks have been processed, calculate and return any
351 * remaining features derived from the complete input.
352 */
353 virtual FeatureSet getRemainingFeatures() = 0;
354
355 virtual std::string getType() const { return "Feature Extraction Plugin"; }
356
357 protected:
358 Plugin(float inputSampleRate) :
359 m_inputSampleRate(inputSampleRate) { }
360
361 float m_inputSampleRate;
362 };
363
364 }
365
366 #endif
367
368
369