To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vamp-sdk / Plugin.h

History | View | Annotate | Download (15.6 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2

    
3
/*
4
    Vamp
5

6
    An API for audio analysis and feature extraction plugins.
7

8
    Centre for Digital Music, Queen Mary, University of London.
9
    Copyright 2006 Chris Cannam.
10
  
11
    Permission is hereby granted, free of charge, to any person
12
    obtaining a copy of this software and associated documentation
13
    files (the "Software"), to deal in the Software without
14
    restriction, including without limitation the rights to use, copy,
15
    modify, merge, publish, distribute, sublicense, and/or sell copies
16
    of the Software, and to permit persons to whom the Software is
17
    furnished to do so, subject to the following conditions:
18

19
    The above copyright notice and this permission notice shall be
20
    included in all copies or substantial portions of the Software.
21

22
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
26
    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27
    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29

30
    Except as contained in this notice, the names of the Centre for
31
    Digital Music; Queen Mary, University of London; and Chris Cannam
32
    shall not be used in advertising or otherwise to promote the sale,
33
    use or other dealings in this Software without prior written
34
    authorization.
35
*/
36

    
37
#ifndef _VAMP_SDK_PLUGIN_H_
38
#define _VAMP_SDK_PLUGIN_H_
39

    
40
#include <string>
41
#include <vector>
42
#include <map>
43

    
44
#include "PluginBase.h"
45
#include "RealTime.h"
46

    
47
#include "plugguard.h"
48
_VAMP_SDK_PLUGSPACE_BEGIN(Plugin.h)
49

    
50
namespace Vamp {
51

    
52
/**
53
 * \class Plugin Plugin.h <vamp-sdk/Plugin.h>
54
 * 
55
 * Vamp::Plugin is a base class for plugin instance classes
56
 * that provide feature extraction from audio or related data.
57
 *
58
 * In most cases, the input will be audio and the output will be a
59
 * stream of derived data at a lower sampling resolution than the
60
 * input.
61
 *
62
 * Note that this class inherits several abstract methods from
63
 * PluginBase.  These must be implemented by the subclass.
64
 * 
65
 * 
66
 * PLUGIN LIFECYCLE
67
 *
68
 * Feature extraction plugins are managed differently from real-time
69
 * plugins (such as VST effects).  The main difference is that the
70
 * parameters for a feature extraction plugin are configured before
71
 * the plugin is used, and do not change during use.
72
 *
73
 * 1. Host constructs the plugin, passing it the input sample rate.
74
 * The plugin may do basic initialisation, but should not do anything
75
 * computationally expensive at this point.  You must make sure your
76
 * plugin is cheap to construct, otherwise you'll seriously affect the
77
 * startup performance of almost all hosts.  If you have serious
78
 * initialisation to do, the proper place is in initialise() (step 5).
79
 *
80
 * 2. Host may query the plugin's available outputs.
81
 *
82
 * 3. Host queries programs and parameter descriptors, and may set
83
 * some or all of them.  Parameters that are not explicitly set should
84
 * take their default values as specified in the parameter descriptor.
85
 * When a program is set, the parameter values may change and the host
86
 * will re-query them to check.
87
 *
88
 * 4. Host queries the preferred step size, block size and number of
89
 * channels.  These may all vary depending on the parameter values.
90
 * (Note however that you cannot make the number of distinct outputs
91
 * dependent on parameter values.)
92
 *
93
 * 5. Plugin is properly initialised with a call to initialise.  This
94
 * fixes the step size, block size, and number of channels, as well as
95
 * all of the parameter and program settings.  If the values passed in
96
 * to initialise do not match the plugin's advertised preferred values
97
 * from step 4, the plugin may refuse to initialise and return false
98
 * (although if possible it should accept the new values).  Any
99
 * computationally expensive setup code should take place here.
100
 *
101
 * 6. Host finally checks the number of values, resolution, extents
102
 * etc per output (which may vary depending on the number of channels,
103
 * step size and block size as well as the parameter values).
104
 *
105
 * 7. Host will repeatedly call the process method to pass in blocks
106
 * of input data.  This method may return features extracted from that
107
 * data (if the plugin is causal).
108
 *
109
 * 8. Host will call getRemainingFeatures exactly once, after all the
110
 * input data has been processed.  This may return any non-causal or
111
 * leftover features.
112
 *
113
 * 9. At any point after initialise was called, the host may
114
 * optionally call the reset method and restart processing.  (This
115
 * does not mean it can change the parameters, which are fixed from
116
 * initialise until destruction.)
117
 *
118
 * A plugin does not need to handle the case where setParameter or
119
 * selectProgram is called after initialise has been called.  It's the
120
 * host's responsibility not to do that.  Similarly, the plugin may
121
 * safely assume that initialise is called no more than once.
122
 */
123

    
124
class Plugin : public PluginBase
125
{
126
public:
127
    virtual ~Plugin() { }
128

    
129
    /**
130
     * Initialise a plugin to prepare it for use with the given number
131
     * of input channels, step size (window increment, in sample
132
     * frames) and block size (window size, in sample frames).
133
     *
134
     * The input sample rate should have been already specified at
135
     * construction time.
136
     * 
137
     * Return true for successful initialisation, false if the number
138
     * of input channels, step size and/or block size cannot be
139
     * supported.
140
     */
141
    virtual bool initialise(size_t inputChannels,
142
                            size_t stepSize,
143
                            size_t blockSize) = 0;
144

    
145
    /**
146
     * Reset the plugin after use, to prepare it for another clean
147
     * run.  Not called for the first initialisation (i.e. initialise
148
     * must also do a reset).
149
     */
150
    virtual void reset() = 0;
151

    
152
    enum InputDomain { TimeDomain, FrequencyDomain };
153
    
154
    /**
155
     * Get the plugin's required input domain.
156
     *
157
     * If this is TimeDomain, the samples provided to the process()
158
     * function (below) will be in the time domain, as for a
159
     * traditional audio processing plugin.
160
     *
161
     * If this is FrequencyDomain, the host will carry out a windowed
162
     * FFT of size equal to the negotiated block size on the data
163
     * before passing the frequency bin data in to process().  The
164
     * input data for the FFT will be rotated so as to place the
165
     * origin in the centre of the block.
166
     * The plugin does not get to choose the window type -- the host
167
     * will either let the user do so, or will use a Hanning window.
168
     */
169
    virtual InputDomain getInputDomain() const = 0;
170

    
171
    /**
172
     * Get the preferred block size (window size -- the number of
173
     * sample frames passed in each block to the process() function).
174
     * This should be called before initialise().
175
     *
176
     * A plugin that can handle any block size may return 0.  The
177
     * final block size will be set in the initialise() call.
178
     */
179
    virtual size_t getPreferredBlockSize() const { return 0; }
180

    
181
    /**
182
     * Get the preferred step size (window increment -- the distance
183
     * in sample frames between the start frames of consecutive blocks
184
     * passed to the process() function) for the plugin.  This should
185
     * be called before initialise().
186
     *
187
     * A plugin may return 0 if it has no particular interest in the
188
     * step size.  In this case, the host should make the step size
189
     * equal to the block size if the plugin is accepting input in the
190
     * time domain.  If the plugin is accepting input in the frequency
191
     * domain, the host may use any step size.  The final step size
192
     * will be set in the initialise() call.
193
     */
194
    virtual size_t getPreferredStepSize() const { return 0; }
195

    
196
    /**
197
     * Get the minimum supported number of input channels.
198
     */
199
    virtual size_t getMinChannelCount() const { return 1; }
200

    
201
    /**
202
     * Get the maximum supported number of input channels.
203
     */
204
    virtual size_t getMaxChannelCount() const { return 1; }
205

    
206
    struct OutputDescriptor
207
    {
208
        /**
209
         * The name of the output, in computer-usable form.  Should be
210
         * reasonably short and without whitespace or punctuation, using
211
         * the characters [a-zA-Z0-9_-] only.
212
         * Example: "zero_crossing_count"
213
         */
214
        std::string identifier;
215

    
216
        /**
217
         * The human-readable name of the output.
218
         * Example: "Zero Crossing Counts"
219
         */
220
        std::string name;
221

    
222
        /**
223
         * A human-readable short text describing the output.  May be
224
         * empty if the name has said it all already.
225
         * Example: "The number of zero crossing points per processing block"
226
         */
227
        std::string description;
228

    
229
        /**
230
         * The unit of the output, in human-readable form.
231
         */
232
        std::string unit;
233

    
234
        /**
235
         * True if the output has the same number of values per sample
236
         * for every output sample.  Outputs for which this is false
237
         * are unlikely to be very useful in a general-purpose host.
238
         */
239
        bool hasFixedBinCount;
240

    
241
        /**
242
         * The number of values per result of the output.  Undefined
243
         * if hasFixedBinCount is false.  If this is zero, the output
244
         * is point data (i.e. only the time of each output is of
245
         * interest, the value list will be empty).
246
         */
247
        size_t binCount;
248

    
249
        /**
250
         * The (human-readable) names of each of the bins, if
251
         * appropriate.  This is always optional.
252
         */
253
        std::vector<std::string> binNames;
254

    
255
        /**
256
         * True if the results in each output bin fall within a fixed
257
         * numeric range (minimum and maximum values).  Undefined if
258
         * binCount is zero.
259
         */
260
        bool hasKnownExtents;
261

    
262
        /**
263
         * Minimum value of the results in the output.  Undefined if
264
         * hasKnownExtents is false or binCount is zero.
265
         */
266
        float minValue;
267

    
268
        /**
269
         * Maximum value of the results in the output.  Undefined if
270
         * hasKnownExtents is false or binCount is zero.
271
         */
272
        float maxValue;
273

    
274
        /**
275
         * True if the output values are quantized to a particular
276
         * resolution.  Undefined if binCount is zero.
277
         */
278
        bool isQuantized;
279

    
280
        /**
281
         * Quantization resolution of the output values (e.g. 1.0 if
282
         * they are all integers).  Undefined if isQuantized is false
283
         * or binCount is zero.
284
         */
285
        float quantizeStep;
286

    
287
        enum SampleType {
288

    
289
            /// Results from each process() align with that call's block start
290
            OneSamplePerStep,
291

    
292
            /// Results are evenly spaced in time (sampleRate specified below)
293
            FixedSampleRate,
294

    
295
            /// Results are unevenly spaced and have individual timestamps
296
            VariableSampleRate
297
        };
298

    
299
        /**
300
         * Positioning in time of the output results.
301
         */
302
        SampleType sampleType;
303

    
304
        /**
305
         * Sample rate of the output results, as samples per second.
306
         * Undefined if sampleType is OneSamplePerStep.
307
         *
308
         * If sampleType is VariableSampleRate and this value is
309
         * non-zero, then it may be used to calculate a resolution for
310
         * the output (i.e. the "duration" of each sample, in time,
311
         * will be 1/sampleRate seconds).  It's recommended to set
312
         * this to zero if that behaviour is not desired.
313
         */
314
        float sampleRate;
315

    
316
        /**
317
         * True if the returned results for this output are known to
318
         * have a duration field.
319
         */
320
        bool hasDuration;
321

    
322
        OutputDescriptor() : // defaults for mandatory non-class-type members
323
            hasFixedBinCount(false),
324
            binCount(0),
325
            hasKnownExtents(false),
326
            minValue(0),
327
            maxValue(0),
328
            isQuantized(false),
329
            quantizeStep(0),
330
            sampleType(OneSamplePerStep), 
331
            sampleRate(0), 
332
            hasDuration(false) { }
333
    };
334

    
335
    typedef std::vector<OutputDescriptor> OutputList;
336

    
337
    /**
338
     * Get the outputs of this plugin.  An output's index in this list
339
     * is used as its numeric index when looking it up in the
340
     * FeatureSet returned from the process() call.
341
     */
342
    virtual OutputList getOutputDescriptors() const = 0;
343

    
344
    struct Feature
345
    {
346
        /**
347
         * True if an output feature has its own timestamp.  This is
348
         * mandatory if the output has VariableSampleRate, optional if
349
         * the output has FixedSampleRate, and unused if the output
350
         * has OneSamplePerStep.
351
         */
352
        bool hasTimestamp;
353

    
354
        /**
355
         * Timestamp of the output feature.  This is mandatory if the
356
         * output has VariableSampleRate or if the output has
357
         * FixedSampleRate and hasTimestamp is true, and unused
358
         * otherwise.
359
         */
360
        RealTime timestamp;
361

    
362
        /**
363
         * True if an output feature has a specified duration.  This
364
         * is optional if the output has VariableSampleRate or
365
         * FixedSampleRate, and and unused if the output has
366
         * OneSamplePerStep.
367
         */
368
        bool hasDuration;
369

    
370
        /**
371
         * Duration of the output feature.  This is mandatory if the
372
         * output has VariableSampleRate or FixedSampleRate and
373
         * hasDuration is true, and unused otherwise.
374
         */
375
        RealTime duration;
376
        
377
        /**
378
         * Results for a single sample of this feature.  If the output
379
         * hasFixedBinCount, there must be the same number of values
380
         * as the output's binCount count.
381
         */
382
        std::vector<float> values;
383

    
384
        /**
385
         * Label for the sample of this feature.
386
         */
387
        std::string label;
388

    
389
        Feature() : // defaults for mandatory non-class-type members
390
            hasTimestamp(false), hasDuration(false) { }
391
    };
392

    
393
    typedef std::vector<Feature> FeatureList;
394

    
395
    typedef std::map<int, FeatureList> FeatureSet; // key is output no
396

    
397
    /**
398
     * Process a single block of input data.
399
     * 
400
     * If the plugin's inputDomain is TimeDomain, inputBuffers will
401
     * point to one array of floats per input channel, and each of
402
     * these arrays will contain blockSize consecutive audio samples
403
     * (the host will zero-pad as necessary).  The timestamp in this
404
     * case will be the real time in seconds of the start of the
405
     * supplied block of samples.
406
     *
407
     * If the plugin's inputDomain is FrequencyDomain, inputBuffers
408
     * will point to one array of floats per input channel, and each
409
     * of these arrays will contain blockSize/2+1 consecutive pairs of
410
     * real and imaginary component floats corresponding to bins
411
     * 0..(blockSize/2) of the FFT output.  That is, bin 0 (the first
412
     * pair of floats) contains the DC output, up to bin blockSize/2
413
     * which contains the Nyquist-frequency output.  There will
414
     * therefore be blockSize+2 floats per channel in total.  The
415
     * timestamp will be the real time in seconds of the centre of the
416
     * FFT input window (i.e. the very first block passed to process
417
     * might contain the FFT of half a block of zero samples and the
418
     * first half-block of the actual data, with a timestamp of zero).
419
     *
420
     * Return any features that have become available after this
421
     * process call.  (These do not necessarily have to fall within
422
     * the process block, except for OneSamplePerStep outputs.)
423
     */
424
    virtual FeatureSet process(const float *const *inputBuffers,
425
                               RealTime timestamp) = 0;
426

    
427
    /**
428
     * After all blocks have been processed, calculate and return any
429
     * remaining features derived from the complete input.
430
     */
431
    virtual FeatureSet getRemainingFeatures() = 0;
432

    
433
    /**
434
     * Used to distinguish between Vamp::Plugin and other potential
435
     * sibling subclasses of PluginBase.  Do not reimplement this
436
     * function in your subclass.
437
     */
438
    virtual std::string getType() const { return "Feature Extraction Plugin"; }
439

    
440
    /**
441
     * Retrieve the input sample rate set on construction.
442
     */
443
    float getInputSampleRate() const { return m_inputSampleRate; }
444
    
445
protected:
446
    Plugin(float inputSampleRate) :
447
        m_inputSampleRate(inputSampleRate) { }
448

    
449
    float m_inputSampleRate;
450
};
451

    
452
}
453

    
454
_VAMP_SDK_PLUGSPACE_END(Plugin.h)
455

    
456
#endif
457

    
458

    
459