Mercurial > hg > svcore
comparison plugin/FeatureExtractionPlugin.h @ 0:da6937383da8
initial import
author | Chris Cannam |
---|---|
date | Tue, 10 Jan 2006 16:33:16 +0000 |
parents | |
children | d86891498eef |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:da6937383da8 |
---|---|
1 /* -*- c-basic-offset: 4 -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 A waveform viewer and audio annotation editor. | |
5 Chris Cannam, Queen Mary University of London, 2005 | |
6 | |
7 This is experimental software. Not for distribution. | |
8 */ | |
9 | |
10 #ifndef _FEATURE_EXTRACTION_PLUGIN_H_ | |
11 #define _FEATURE_EXTRACTION_PLUGIN_H_ | |
12 | |
13 /** | |
14 * A base class for feature extraction plugins. | |
15 */ | |
16 | |
17 #include <string> | |
18 #include <vector> | |
19 #include <map> | |
20 | |
21 #include "base/RealTime.h" | |
22 | |
23 /** | |
24 * FeatureExtractionPlugin is a base class for plugin instance classes | |
25 * that provide feature extraction from audio or related data. | |
26 * | |
27 * In most cases, the input will be audio and the output will be a | |
28 * stream of derived data at a lower sampling resolution than the | |
29 * input. | |
30 */ | |
31 | |
32 class FeatureExtractionPlugin | |
33 { | |
34 public: | |
35 /** | |
36 * Initialise a plugin to prepare it for use with the given number | |
37 * of input channels, step size (window increment, in sample | |
38 * frames) and block size (window size, in sample frames). | |
39 * | |
40 * The input sample rate should have been already specified at | |
41 * construction time. | |
42 * | |
43 * Return true for successful initialisation, false if the number | |
44 * of input channels, step size and/or block size cannot be | |
45 * supported. | |
46 */ | |
47 virtual bool initialise(size_t inputChannels, | |
48 size_t stepSize, | |
49 size_t blockSize) = 0; | |
50 | |
51 /** | |
52 * Reset the plugin after use, to prepare it for another clean | |
53 * run. Not called for the first initialisation (i.e. initialise | |
54 * must also do a reset). | |
55 */ | |
56 virtual void reset() = 0; | |
57 | |
58 /** | |
59 * Get the computer-usable name of the plugin. This should be | |
60 * reasonably short and contain no whitespace or punctuation | |
61 * characters. | |
62 */ | |
63 virtual std::string getName() const = 0; | |
64 | |
65 /** | |
66 * Get a human-readable description of the plugin. This should be | |
67 * self-contained, as it may be shown to the user in isolation | |
68 * without also showing the plugin's "name". | |
69 */ | |
70 virtual std::string getDescription() const = 0; | |
71 | |
72 /** | |
73 * Get the name of the author or vendor of the plugin in | |
74 * human-readable form. | |
75 */ | |
76 virtual std::string getMaker() const = 0; | |
77 | |
78 /** | |
79 * Get the version number of the plugin. | |
80 */ | |
81 virtual int getPluginVersion() const = 0; | |
82 | |
83 /** | |
84 * Get the copyright statement or licensing summary of the plugin. | |
85 */ | |
86 virtual std::string getCopyright() const = 0; | |
87 | |
88 /** | |
89 * Get the preferred step size (window increment -- the distance | |
90 * in sample frames between the start frames of consecutive blocks | |
91 * passed to the process() function) for the plugin. This should | |
92 * be called before initialise(). | |
93 */ | |
94 virtual size_t getPreferredStepSize() const = 0; | |
95 | |
96 /** | |
97 * Get the preferred block size (window size -- the number of | |
98 * sample frames passed in each block to the process() function). | |
99 * This should be called before initialise(). | |
100 */ | |
101 virtual size_t getPreferredBlockSize() const { return getPreferredStepSize(); } | |
102 | |
103 /** | |
104 * Get the minimum supported number of input channels. | |
105 */ | |
106 virtual size_t getMinChannelCount() const { return 1; } | |
107 | |
108 /** | |
109 * Get the maximum supported number of input channels. | |
110 */ | |
111 virtual size_t getMaxChannelCount() const { return 1; } | |
112 | |
113 | |
114 struct OutputDescriptor | |
115 { | |
116 /** | |
117 * The name of the output, in computer-usable form. Should be | |
118 * reasonably short and without whitespace or punctuation. | |
119 */ | |
120 std::string name; | |
121 | |
122 /** | |
123 * The human-readable name of the output. | |
124 */ | |
125 std::string description; | |
126 | |
127 /** | |
128 * The unit of the output, in human-readable form. | |
129 */ | |
130 std::string unit; | |
131 | |
132 /** | |
133 * True if the output has the same number of values per result | |
134 * for every output result. Outputs for which this is false | |
135 * are unlikely to be very useful in a general-purpose host. | |
136 */ | |
137 bool hasFixedValueCount; | |
138 | |
139 /** | |
140 * The number of values per result of the output. Undefined | |
141 * if hasFixedValueCount is false. If this is zero, the output | |
142 * is point data (i.e. only the time of each output is of | |
143 * interest, the value list will be empty). | |
144 * | |
145 * Note that this gives the number of values of a single | |
146 * output result, not of the output stream (which has one more | |
147 * dimension: time). | |
148 */ | |
149 size_t valueCount; | |
150 | |
151 /** | |
152 * True if the results in the output have a fixed numeric | |
153 * range (minimum and maximum values). Undefined if | |
154 * valueCount is zero. | |
155 */ | |
156 bool hasKnownExtents; | |
157 | |
158 /** | |
159 * Minimum value of the results in the output. Undefined if | |
160 * hasKnownExtents is false or valueCount is zero. | |
161 */ | |
162 float minValue; | |
163 | |
164 /** | |
165 * Maximum value of the results in the output. Undefined if | |
166 * hasKnownExtents is false or valueCount is zero. | |
167 */ | |
168 float maxValue; | |
169 | |
170 /** | |
171 * True if the output values are quantized to a particular | |
172 * resolution. Undefined if valueCount is zero. | |
173 */ | |
174 bool isQuantized; | |
175 | |
176 /** | |
177 * Quantization resolution of the output values (e.g. 1.0 if | |
178 * they are all integers). Undefined if isQuantized is false | |
179 * or valueCount is zero. | |
180 */ | |
181 float quantizeStep; | |
182 | |
183 enum SampleType { | |
184 | |
185 /// Results from each process() align with that call's block start | |
186 OneSamplePerStep, | |
187 | |
188 /// Results are evenly spaced in time (sampleRate specified below) | |
189 FixedSampleRate, | |
190 | |
191 /// Results are unevenly spaced and have individual timestamps | |
192 VariableSampleRate | |
193 }; | |
194 | |
195 /** | |
196 * Positioning in time of the output results. | |
197 */ | |
198 SampleType sampleType; | |
199 | |
200 /** | |
201 * Sample rate of the output results. Undefined if sampleType | |
202 * is OneSamplePerStep. | |
203 * | |
204 * If sampleType is VariableSampleRate and this value is | |
205 * non-zero, then it may be used to calculate a resolution for | |
206 * the output (i.e. the "duration" of each value, in time). | |
207 * It's recommended to set this to zero if that behaviour is | |
208 * not desired. | |
209 */ | |
210 float sampleRate; | |
211 }; | |
212 | |
213 typedef std::vector<OutputDescriptor> OutputList; | |
214 | |
215 /** | |
216 * Get the outputs of this plugin. An output's index in this list | |
217 * is used as its numeric index when looking it up in the | |
218 * FeatureSet returned from the process() call. | |
219 */ | |
220 virtual OutputList getOutputDescriptors() const = 0; | |
221 | |
222 | |
223 struct ParameterDescriptor | |
224 { | |
225 /** | |
226 * The name of the parameter, in computer-usable form. Should | |
227 * be reasonably short and without whitespace or punctuation. | |
228 */ | |
229 std::string name; | |
230 | |
231 /** | |
232 * The human-readable name of the parameter. | |
233 */ | |
234 std::string description; | |
235 | |
236 /** | |
237 * The unit of the parameter, in human-readable form. | |
238 */ | |
239 std::string unit; | |
240 | |
241 /** | |
242 * The minimum value of the parameter. | |
243 */ | |
244 float minValue; | |
245 | |
246 /** | |
247 * The maximum value of the parameter. | |
248 */ | |
249 float maxValue; | |
250 | |
251 /** | |
252 * The default value of the parameter. | |
253 */ | |
254 float defaultValue; | |
255 | |
256 /** | |
257 * True if the parameter values are quantized to a particular | |
258 * resolution. | |
259 */ | |
260 bool isQuantized; | |
261 | |
262 /** | |
263 * Quantization resolution of the parameter values (e.g. 1.0 | |
264 * if they are all integers). Undefined if isQuantized is | |
265 * false. | |
266 */ | |
267 float quantizeStep; | |
268 }; | |
269 | |
270 typedef std::vector<ParameterDescriptor> ParameterList; | |
271 | |
272 /** | |
273 * Get the controllable parameters of this plugin. | |
274 */ | |
275 virtual ParameterList getParameterDescriptors() const { | |
276 return ParameterList(); | |
277 } | |
278 | |
279 /** | |
280 * Get the value of a named parameter. The argument is the name | |
281 * field from that parameter's descriptor. | |
282 */ | |
283 virtual float getParameter(std::string) const { return 0.0; } | |
284 | |
285 /** | |
286 * Set a named parameter. The first argument is the name field | |
287 * from that parameter's descriptor. | |
288 */ | |
289 virtual void setParameter(std::string, float) { } | |
290 | |
291 struct Feature | |
292 { | |
293 /** | |
294 * True if an output feature has its own timestamp. This is | |
295 * mandatory if the output has VariableSampleRate, and is | |
296 * likely to be disregarded otherwise. | |
297 */ | |
298 bool hasTimestamp; | |
299 | |
300 /** | |
301 * Timestamp of the output feature. This is mandatory if the | |
302 * output has VariableSampleRate, and is likely to be | |
303 * disregarded otherwise. Undefined if hasTimestamp is false. | |
304 */ | |
305 RealTime timestamp; | |
306 | |
307 /** | |
308 * Results for a single sample of this feature. If the output | |
309 * hasFixedValueCount, there must be the same number of values | |
310 * as the output's valueCount count. | |
311 */ | |
312 std::vector<float> values; | |
313 | |
314 /** | |
315 * Label for the sample of this feature. | |
316 */ | |
317 std::string label; | |
318 }; | |
319 | |
320 typedef std::vector<Feature> FeatureList; | |
321 typedef std::map<int, FeatureList> FeatureSet; // key is output no | |
322 | |
323 /** | |
324 * Process a single block of input data. inputBuffers points to | |
325 * one array of floats per input channel, and each of those arrays | |
326 * contains the blockSize number of samples (the host will | |
327 * zero-pad as necessary). The timestamp is the real time in | |
328 * seconds of the start of the supplied block of samples. | |
329 * | |
330 * Return any features that have become available after this | |
331 * process call. (These do not necessarily have to fall within | |
332 * the process block, except for OneSamplePerStep outputs.) | |
333 */ | |
334 virtual FeatureSet process(float **inputBuffers, | |
335 RealTime timestamp) = 0; | |
336 | |
337 /** | |
338 * After all blocks have been processed, calculate and return any | |
339 * remaining features derived from the complete input. | |
340 */ | |
341 virtual FeatureSet getRemainingFeatures() = 0; | |
342 | |
343 protected: | |
344 FeatureExtractionPlugin(float inputSampleRate) : | |
345 m_inputSampleRate(inputSampleRate) { } | |
346 | |
347 float m_inputSampleRate; | |
348 }; | |
349 | |
350 #endif | |
351 | |
352 | |
353 |