cannam@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@0
|
2
|
cannam@0
|
3 /*
|
cannam@0
|
4 Vamp feature extraction plugins using Jamie Bullock's
|
cannam@0
|
5 libxtract audio feature extraction library.
|
cannam@0
|
6
|
cannam@0
|
7 Centre for Digital Music, Queen Mary, University of London.
|
cannam@0
|
8 This file copyright 2006 Queen Mary, University of London.
|
cannam@0
|
9
|
cannam@0
|
10 This program is free software; you can redistribute it and/or
|
cannam@0
|
11 modify it under the terms of the GNU General Public License as
|
cannam@0
|
12 published by the Free Software Foundation; either version 2 of the
|
cannam@0
|
13 License, or (at your option) any later version. See the file
|
cannam@0
|
14 COPYING included with this distribution for more information.
|
cannam@0
|
15 */
|
cannam@0
|
16
|
cannam@0
|
17 #include "XTractPlugin.h"
|
cannam@0
|
18
|
cannam@0
|
19 #include <cassert>
|
cannam@1
|
20 #include <math.h>
|
cannam@0
|
21
|
cannam@0
|
22
|
cannam@0
|
23 using std::cerr;
|
cannam@0
|
24 using std::endl;
|
cannam@0
|
25 using std::string;
|
cannam@0
|
26
|
cannam@1
|
27 xtract_function_descriptor_t *
|
cannam@1
|
28 XTractPlugin::m_xtDescriptors = 0;
|
cannam@1
|
29
|
cannam@1
|
30 int
|
cannam@1
|
31 XTractPlugin::m_xtDescRefCount = 0;
|
cannam@1
|
32
|
cannam@0
|
33 XTractPlugin::XTractPlugin(unsigned int xtFeature, float inputSampleRate) :
|
cannam@0
|
34 Plugin(inputSampleRate),
|
cannam@0
|
35 m_xtFeature(xtFeature),
|
cannam@0
|
36 m_channels(0),
|
cannam@0
|
37 m_stepSize(0),
|
cannam@0
|
38 m_blockSize(0),
|
cannam@0
|
39 m_resultBuffer(0),
|
cannam@1
|
40 m_peakThreshold(10),
|
cannam@1
|
41 m_rolloffThreshold(90),
|
cannam@1
|
42 m_harmonicThreshold(.1),
|
cannam@0
|
43 m_minFreq(80),
|
cannam@0
|
44 m_maxFreq(18000),
|
cannam@0
|
45 m_coeffCount(20),
|
cannam@0
|
46 m_mfccFilters(0),
|
cannam@1
|
47 m_mfccStyle((int)XTRACT_EQUAL_GAIN),
|
cannam@0
|
48 m_barkBandLimits(0),
|
cannam@0
|
49 m_outputBinCount(0),
|
cannam@0
|
50 m_initialised(false)
|
cannam@0
|
51 {
|
cannam@1
|
52 if (m_xtDescRefCount++ == 0) {
|
cannam@1
|
53 m_xtDescriptors =
|
cannam@1
|
54 (xtract_function_descriptor_t *)xtract_make_descriptors();
|
cannam@1
|
55 }
|
cannam@0
|
56 }
|
cannam@0
|
57
|
cannam@0
|
58 XTractPlugin::~XTractPlugin()
|
cannam@0
|
59 {
|
cannam@0
|
60 if (m_mfccFilters) {
|
cannam@0
|
61 for (size_t i = 0; i < m_coeffCount; ++i) {
|
cannam@0
|
62 delete[] m_mfccFilters[i];
|
cannam@0
|
63 }
|
cannam@0
|
64 delete[] m_mfccFilters;
|
cannam@0
|
65 }
|
cannam@0
|
66 if (m_barkBandLimits) {
|
cannam@0
|
67 delete[] m_barkBandLimits;
|
cannam@0
|
68 }
|
cannam@0
|
69 if (m_resultBuffer) {
|
cannam@0
|
70 delete[] m_resultBuffer;
|
cannam@0
|
71 }
|
cannam@1
|
72
|
cannam@1
|
73 if (--m_xtDescRefCount == 0) {
|
cannam@1
|
74 xtract_free_descriptors(m_xtDescriptors);
|
cannam@1
|
75 }
|
cannam@0
|
76 }
|
cannam@0
|
77
|
cannam@0
|
78 string
|
cannam@2
|
79 XTractPlugin::getIdentifier() const
|
cannam@0
|
80 {
|
cannam@1
|
81 return xtDescriptor()->algo.name;
|
cannam@0
|
82 }
|
cannam@0
|
83
|
cannam@0
|
84 string
|
cannam@2
|
85 XTractPlugin::getName() const
|
cannam@2
|
86 {
|
cannam@2
|
87 return xtDescriptor()->algo.p_name;
|
cannam@2
|
88 }
|
cannam@2
|
89
|
cannam@2
|
90 string
|
cannam@0
|
91 XTractPlugin::getDescription() const
|
cannam@0
|
92 {
|
cannam@2
|
93 return xtDescriptor()->algo.p_desc;
|
cannam@0
|
94 }
|
cannam@1
|
95
|
cannam@0
|
96
|
cannam@0
|
97 string
|
cannam@0
|
98 XTractPlugin::getMaker() const
|
cannam@0
|
99 {
|
cannam@0
|
100 return "libxtract by Jamie Bullock (plugin by Chris Cannam)";
|
cannam@0
|
101 }
|
cannam@0
|
102
|
cannam@0
|
103 int
|
cannam@0
|
104 XTractPlugin::getPluginVersion() const
|
cannam@0
|
105 {
|
cannam@1
|
106 return 2;
|
cannam@0
|
107 }
|
cannam@0
|
108
|
cannam@0
|
109 string
|
cannam@0
|
110 XTractPlugin::getCopyright() const
|
cannam@0
|
111 {
|
cannam@1
|
112 char year[12];
|
cannam@0
|
113 string text = "Copyright 2006 Jamie Bullock, plugin Copyright 2006 Queen Mary, University of London. ";
|
cannam@0
|
114
|
cannam@1
|
115 string method = "";
|
cannam@0
|
116
|
cannam@1
|
117 method += xtDescriptor()->algo.author;
|
cannam@1
|
118 sprintf(year, " (%d)", xtDescriptor()->algo.year);
|
cannam@1
|
119 method += year;
|
cannam@0
|
120
|
cannam@1
|
121 if (method != "") text += "Method from " + method + ". ";
|
cannam@0
|
122 text += "Distributed under the GNU General Public License";
|
cannam@0
|
123 return text;
|
cannam@0
|
124 }
|
cannam@0
|
125
|
cannam@0
|
126 XTractPlugin::InputDomain
|
cannam@0
|
127 XTractPlugin::getInputDomain() const
|
cannam@0
|
128 {
|
cannam@1
|
129
|
cannam@1
|
130 if (xtDescriptor()->data.format == XTRACT_AUDIO_SAMPLES)
|
cannam@1
|
131 return TimeDomain;
|
cannam@1
|
132 else
|
cannam@1
|
133 return FrequencyDomain;
|
cannam@0
|
134 }
|
cannam@1
|
135
|
cannam@1
|
136
|
cannam@0
|
137
|
cannam@0
|
138 bool
|
cannam@0
|
139 XTractPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
cannam@0
|
140 {
|
cannam@1
|
141
|
cannam@1
|
142 int donor = *(xtDescriptor()->argv.donor),
|
cannam@1
|
143 data_format = xtDescriptor()->data.format;
|
cannam@1
|
144
|
cannam@0
|
145 if (channels < getMinChannelCount() ||
|
cannam@0
|
146 channels > getMaxChannelCount()) return false;
|
cannam@0
|
147
|
cannam@0
|
148 m_channels = channels;
|
cannam@0
|
149 m_stepSize = stepSize;
|
cannam@0
|
150 m_blockSize = blockSize;
|
cannam@0
|
151
|
cannam@1
|
152 if (donor == XTRACT_INIT_MFCC) {
|
cannam@0
|
153
|
cannam@0
|
154 m_mfccFilters = new float *[m_coeffCount];
|
cannam@0
|
155 for (size_t i = 0; i < m_coeffCount; ++i) {
|
cannam@0
|
156 m_mfccFilters[i] = new float[m_blockSize];
|
cannam@0
|
157 }
|
cannam@0
|
158
|
cannam@0
|
159 int error = (int)xtract_init_mfcc(m_blockSize, m_inputSampleRate/2,
|
cannam@0
|
160 m_mfccStyle, m_minFreq, m_maxFreq,
|
cannam@0
|
161 m_coeffCount, m_mfccFilters);
|
cannam@1
|
162 if (error != XTRACT_SUCCESS) {
|
cannam@0
|
163 cerr << "XTractPlugin::initialise: ERROR: "
|
cannam@0
|
164 << "xtract_init_mfcc returned error code " << error << endl;
|
cannam@0
|
165 return false;
|
cannam@0
|
166 }
|
cannam@0
|
167
|
cannam@1
|
168 } else if (donor == XTRACT_BARK_COEFFICIENTS ||
|
cannam@7
|
169 donor == XTRACT_INIT_BARK ||
|
cannam@1
|
170 data_format == XTRACT_BARK_COEFFS) {
|
cannam@7
|
171
|
cannam@1
|
172 m_barkBandLimits = new int[XTRACT_BARK_BANDS];
|
cannam@0
|
173
|
cannam@1
|
174 /*int error = *(int)*/xtract_init_bark(m_blockSize, m_inputSampleRate,
|
cannam@0
|
175 m_barkBandLimits);
|
cannam@0
|
176 // if (error != SUCCESS) {
|
cannam@0
|
177 // cerr << "XTractPlugin::initialise: ERROR: "
|
cannam@0
|
178 // << "xtract_init_bark returned error code " << error << endl;
|
cannam@0
|
179 // return false;
|
cannam@0
|
180 // }
|
cannam@0
|
181 }
|
cannam@0
|
182
|
cannam@0
|
183 switch (m_xtFeature) {
|
cannam@1
|
184 case XTRACT_SPECTRUM:
|
cannam@1
|
185 case XTRACT_HARMONIC_SPECTRUM:
|
cannam@1
|
186 case XTRACT_PEAK_SPECTRUM:
|
cannam@1
|
187 m_outputBinCount = m_blockSize / 2; break;
|
cannam@1
|
188 case XTRACT_DCT:
|
cannam@1
|
189 case XTRACT_AUTOCORRELATION_FFT:
|
cannam@1
|
190 case XTRACT_AUTOCORRELATION:
|
cannam@1
|
191 case XTRACT_AMDF:
|
cannam@1
|
192 case XTRACT_ASDF:
|
cannam@1
|
193 m_outputBinCount = m_blockSize; break;
|
cannam@1
|
194 case XTRACT_MFCC:
|
cannam@1
|
195 m_outputBinCount = m_coeffCount; break;
|
cannam@1
|
196 case XTRACT_BARK_COEFFICIENTS:
|
cannam@1
|
197 m_outputBinCount = XTRACT_BARK_BANDS; break;
|
cannam@1
|
198 default:
|
cannam@1
|
199 m_outputBinCount = 1; break;
|
cannam@0
|
200 }
|
cannam@0
|
201
|
cannam@0
|
202 setupOutputDescriptors();
|
cannam@0
|
203
|
cannam@0
|
204 m_initialised = true;
|
cannam@0
|
205
|
cannam@0
|
206 return true;
|
cannam@0
|
207 }
|
cannam@0
|
208
|
cannam@0
|
209 void
|
cannam@0
|
210 XTractPlugin::reset()
|
cannam@0
|
211 {
|
cannam@0
|
212 }
|
cannam@0
|
213
|
cannam@0
|
214 size_t
|
cannam@0
|
215 XTractPlugin::getMinChannelCount() const
|
cannam@0
|
216 {
|
cannam@0
|
217 return 1;
|
cannam@0
|
218 }
|
cannam@0
|
219
|
cannam@0
|
220 size_t
|
cannam@0
|
221 XTractPlugin::getMaxChannelCount() const
|
cannam@0
|
222 {
|
cannam@0
|
223 return 1;
|
cannam@0
|
224 }
|
cannam@0
|
225
|
cannam@0
|
226 size_t
|
cannam@0
|
227 XTractPlugin::getPreferredStepSize() const
|
cannam@0
|
228 {
|
cannam@0
|
229 if (getInputDomain() == FrequencyDomain) {
|
cannam@1
|
230 return getPreferredBlockSize();
|
cannam@1
|
231 } else {
|
cannam@0
|
232 return getPreferredBlockSize() / 2;
|
cannam@0
|
233 }
|
cannam@0
|
234 }
|
cannam@0
|
235
|
cannam@0
|
236 size_t
|
cannam@0
|
237 XTractPlugin::getPreferredBlockSize() const
|
cannam@0
|
238 {
|
cannam@0
|
239 return 1024;
|
cannam@0
|
240 }
|
cannam@0
|
241
|
cannam@0
|
242 XTractPlugin::ParameterList
|
cannam@0
|
243 XTractPlugin::getParameterDescriptors() const
|
cannam@0
|
244 {
|
cannam@0
|
245 ParameterList list;
|
cannam@0
|
246 ParameterDescriptor desc;
|
cannam@0
|
247
|
cannam@1
|
248 if (m_xtFeature == XTRACT_MFCC) {
|
cannam@0
|
249
|
cannam@2
|
250 desc.identifier = "minfreq";
|
cannam@2
|
251 desc.name = "Minimum Frequency";
|
cannam@0
|
252 desc.minValue = 0;
|
cannam@0
|
253 desc.maxValue = m_inputSampleRate / 2;
|
cannam@0
|
254 desc.defaultValue = 80;
|
cannam@0
|
255 desc.isQuantized = false;
|
cannam@0
|
256 desc.unit = "Hz";
|
cannam@0
|
257 list.push_back(desc);
|
cannam@0
|
258
|
cannam@2
|
259 desc.identifier = "maxfreq";
|
cannam@2
|
260 desc.name = "Maximum Frequency";
|
cannam@0
|
261 desc.defaultValue = 18000;
|
cannam@0
|
262 if (desc.defaultValue > m_inputSampleRate * 0.875) {
|
cannam@0
|
263 desc.defaultValue = m_inputSampleRate * 0.875;
|
cannam@0
|
264 }
|
cannam@0
|
265 list.push_back(desc);
|
cannam@0
|
266
|
cannam@2
|
267 desc.identifier = "bands";
|
cannam@2
|
268 desc.name = "Mel Frequency Bands";
|
cannam@0
|
269 desc.minValue = 10;
|
cannam@0
|
270 desc.maxValue = 30;
|
cannam@0
|
271 desc.defaultValue = 20;
|
cannam@0
|
272 desc.unit = "";
|
cannam@0
|
273 desc.isQuantized = true;
|
cannam@0
|
274 desc.quantizeStep = 1;
|
cannam@0
|
275 list.push_back(desc);
|
cannam@0
|
276
|
cannam@2
|
277 desc.identifier = "style";
|
cannam@2
|
278 desc.name = "MFCC Type";
|
cannam@0
|
279 desc.minValue = 0;
|
cannam@0
|
280 desc.maxValue = 1;
|
cannam@0
|
281 desc.defaultValue = 0;
|
cannam@0
|
282 desc.valueNames.push_back("Equal Gain");
|
cannam@0
|
283 desc.valueNames.push_back("Equal Area");
|
cannam@0
|
284 list.push_back(desc);
|
cannam@0
|
285 }
|
cannam@0
|
286
|
cannam@0
|
287 if (needPeakThreshold()) {
|
cannam@0
|
288
|
cannam@2
|
289 desc.identifier = "peak threshold";
|
cannam@2
|
290 desc.name = "Peak Threshold";
|
cannam@0
|
291 desc.minValue = 0;
|
cannam@0
|
292 desc.maxValue = 100;
|
cannam@1
|
293 desc.defaultValue = 10; /* Threshold as % of maximum peak found */
|
cannam@0
|
294 desc.isQuantized = false;
|
cannam@0
|
295 desc.valueNames.clear();
|
cannam@0
|
296 desc.unit = "%";
|
cannam@0
|
297 list.push_back(desc);
|
cannam@0
|
298
|
cannam@1
|
299 }
|
cannam@1
|
300
|
cannam@1
|
301 if (needRolloffThreshold()) {
|
cannam@0
|
302
|
cannam@2
|
303 desc.identifier = "rolloff threshold";
|
cannam@2
|
304 desc.name = "Rolloff Threshold";
|
cannam@0
|
305 desc.minValue = 0;
|
cannam@0
|
306 desc.maxValue = 100;
|
cannam@1
|
307 desc.defaultValue = 90; /* Freq below which 90% of energy is */
|
cannam@0
|
308 desc.isQuantized = false;
|
cannam@0
|
309 desc.valueNames.clear();
|
cannam@0
|
310 desc.unit = "%";
|
cannam@0
|
311 list.push_back(desc);
|
cannam@1
|
312
|
cannam@1
|
313 }
|
cannam@1
|
314
|
cannam@1
|
315 if (needHarmonicThreshold()) {
|
cannam@1
|
316
|
cannam@2
|
317 desc.identifier = "harmonic threshold";
|
cannam@2
|
318 desc.name = "Harmonic Threshold";
|
cannam@1
|
319 desc.minValue = 0;
|
cannam@1
|
320 desc.maxValue = 1.0;
|
cannam@1
|
321 desc.defaultValue = .1; /* Distance from nearesst harmonic number */
|
cannam@1
|
322 desc.isQuantized = false;
|
cannam@1
|
323 desc.valueNames.clear();
|
cannam@1
|
324 desc.unit = "";
|
cannam@1
|
325 list.push_back(desc);
|
cannam@0
|
326 }
|
cannam@0
|
327
|
cannam@0
|
328 return list;
|
cannam@0
|
329 }
|
cannam@0
|
330
|
cannam@0
|
331 float
|
cannam@0
|
332 XTractPlugin::getParameter(string param) const
|
cannam@0
|
333 {
|
cannam@1
|
334 if (m_xtFeature == XTRACT_MFCC) {
|
cannam@0
|
335 if (param == "minfreq") return m_minFreq;
|
cannam@0
|
336 if (param == "maxfreq") return m_maxFreq;
|
cannam@0
|
337 if (param == "bands") return m_coeffCount;
|
cannam@0
|
338 if (param == "style") return m_mfccStyle;
|
cannam@0
|
339 }
|
cannam@0
|
340
|
cannam@1
|
341 if (param == "peak threshold") return m_peakThreshold;
|
cannam@1
|
342 if (param == "rolloff threshold") return m_rolloffThreshold;
|
cannam@1
|
343 if (param == "harmonic threshold") return m_harmonicThreshold;
|
cannam@0
|
344
|
cannam@0
|
345 return 0.f;
|
cannam@0
|
346 }
|
cannam@0
|
347
|
cannam@0
|
348 void
|
cannam@0
|
349 XTractPlugin::setParameter(string param, float value)
|
cannam@0
|
350 {
|
cannam@1
|
351 if (m_xtFeature == XTRACT_MFCC) {
|
cannam@0
|
352 if (param == "minfreq") m_minFreq = value;
|
cannam@0
|
353 else if (param == "maxfreq") m_maxFreq = value;
|
cannam@0
|
354 else if (param == "bands") m_coeffCount = lrintf(value + .1);
|
cannam@0
|
355 else if (param == "style") m_mfccStyle = lrintf(value + .1);
|
cannam@0
|
356 }
|
cannam@0
|
357
|
cannam@1
|
358 if (param == "peak threshold") m_peakThreshold = value;
|
cannam@1
|
359 if (param == "rolloff threshold") m_rolloffThreshold = value;
|
cannam@1
|
360 if (param == "harmonic threshold") m_harmonicThreshold = value;
|
cannam@0
|
361 }
|
cannam@0
|
362
|
cannam@0
|
363 XTractPlugin::OutputList
|
cannam@0
|
364 XTractPlugin::getOutputDescriptors() const
|
cannam@0
|
365 {
|
cannam@0
|
366 if (m_outputDescriptors.empty()) setupOutputDescriptors();
|
cannam@0
|
367 return m_outputDescriptors;
|
cannam@0
|
368 }
|
cannam@0
|
369
|
cannam@0
|
370 void
|
cannam@0
|
371 XTractPlugin::setupOutputDescriptors() const
|
cannam@0
|
372 {
|
cannam@0
|
373 OutputDescriptor d;
|
cannam@1
|
374 const xtract_function_descriptor_t *xtFd = xtDescriptor();
|
cannam@2
|
375 d.identifier = getIdentifier();
|
cannam@2
|
376 d.name = getName();
|
cannam@2
|
377 d.description = getDescription();
|
cannam@0
|
378 d.unit = "";
|
cannam@0
|
379 d.hasFixedBinCount = true;
|
cannam@0
|
380 d.binCount = m_outputBinCount;
|
cannam@0
|
381 d.hasKnownExtents = false;
|
cannam@0
|
382 d.isQuantized = false;
|
cannam@0
|
383 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
cannam@0
|
384
|
cannam@1
|
385 if(xtFd->is_scalar){
|
cannam@1
|
386 switch(xtFd->result.scalar.unit){
|
cannam@1
|
387 case XTRACT_HERTZ: d.unit = "Hz"; break;
|
cannam@1
|
388 case XTRACT_DBFS: d.unit = "dB"; break;
|
cannam@1
|
389 default: d.unit = ""; break;
|
cannam@1
|
390 }
|
cannam@1
|
391 }
|
cannam@1
|
392 else {
|
cannam@1
|
393 if (xtFd->result.vector.format == XTRACT_SPECTRAL){
|
cannam@0
|
394
|
cannam@1
|
395 d.binCount /= 2;
|
cannam@2
|
396 d.identifier = "amplitudes";
|
cannam@2
|
397 d.name = "Peak Amplitudes";
|
cannam@2
|
398 d.description = "";
|
cannam@1
|
399 m_outputDescriptors.push_back(d);
|
cannam@0
|
400
|
cannam@1
|
401 }
|
cannam@1
|
402 }
|
cannam@0
|
403
|
cannam@0
|
404 m_outputDescriptors.push_back(d);
|
cannam@0
|
405 }
|
cannam@0
|
406
|
cannam@0
|
407 bool
|
cannam@0
|
408 XTractPlugin::needPeakThreshold() const
|
cannam@0
|
409 {
|
cannam@1
|
410 const xtract_function_descriptor_t *xtFd = xtDescriptor();
|
cannam@0
|
411
|
cannam@1
|
412 if(m_xtFeature == XTRACT_PEAK_SPECTRUM ||
|
cannam@1
|
413 xtFd->data.format == XTRACT_SPECTRAL_PEAKS ||
|
cannam@1
|
414 xtFd->data.format == XTRACT_SPECTRAL_PEAKS_MAGNITUDES ||
|
cannam@1
|
415 needHarmonicThreshold())
|
cannam@1
|
416 return true;
|
cannam@1
|
417 else return false;
|
cannam@1
|
418 }
|
cannam@1
|
419
|
cannam@1
|
420 bool
|
cannam@1
|
421 XTractPlugin::needHarmonicThreshold() const
|
cannam@1
|
422 {
|
cannam@1
|
423 const xtract_function_descriptor_t *xtFd = xtDescriptor();
|
cannam@1
|
424
|
cannam@1
|
425 if(m_xtFeature == XTRACT_HARMONIC_SPECTRUM ||
|
cannam@1
|
426 xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_FREQUENCIES ||
|
cannam@1
|
427 m_xtFeature == XTRACT_NOISINESS ||
|
cannam@1
|
428 xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_MAGNITUDES)
|
cannam@1
|
429 return true;
|
cannam@1
|
430 else return false;
|
cannam@1
|
431 }
|
cannam@1
|
432
|
cannam@1
|
433 bool
|
cannam@1
|
434 XTractPlugin::needRolloffThreshold() const
|
cannam@1
|
435 {
|
cannam@1
|
436 if(m_xtFeature == XTRACT_ROLLOFF)
|
cannam@1
|
437 return true;
|
cannam@1
|
438 else
|
cannam@1
|
439 return false;
|
cannam@0
|
440 }
|
cannam@0
|
441
|
cannam@0
|
442 XTractPlugin::FeatureSet
|
cannam@0
|
443 XTractPlugin::process(const float *const *inputBuffers,
|
cannam@0
|
444 Vamp::RealTime timestamp)
|
cannam@0
|
445 {
|
cannam@1
|
446
|
cannam@0
|
447 if (m_outputDescriptors.empty()) setupOutputDescriptors();
|
cannam@0
|
448
|
cannam@0
|
449 int rbs = m_outputBinCount > m_blockSize ? m_outputBinCount : m_blockSize;
|
cannam@0
|
450 if (!m_resultBuffer) {
|
cannam@0
|
451 m_resultBuffer = new float[rbs];
|
cannam@0
|
452 }
|
cannam@0
|
453
|
cannam@1
|
454 int i;
|
cannam@1
|
455
|
cannam@1
|
456 for (i = 0; i < rbs; ++i) m_resultBuffer[i] = 0.f;
|
cannam@1
|
457
|
cannam@1
|
458 const float *data = 0;
|
cannam@1
|
459 float *fft_temp = 0, *data_temp = 0;
|
cannam@1
|
460 int N = m_blockSize, M = N >> 1;
|
cannam@0
|
461 void *argv = 0;
|
cannam@1
|
462 bool isSpectral = false;
|
cannam@1
|
463 xtract_function_descriptor_t *xtFd = xtDescriptor();
|
cannam@0
|
464
|
cannam@0
|
465 FeatureSet fs;
|
cannam@0
|
466
|
cannam@1
|
467 switch (xtFd->data.format) {
|
cannam@1
|
468 case XTRACT_AUDIO_SAMPLES:
|
cannam@1
|
469 data = &inputBuffers[0][0];
|
cannam@1
|
470 break;
|
cannam@1
|
471 case XTRACT_SPECTRAL:
|
cannam@1
|
472 default:
|
cannam@1
|
473 // All the rest are derived from the spectrum
|
cannam@1
|
474 // Need same format as would be output by xtract_spectrum
|
cannam@1
|
475 float q = m_inputSampleRate / N;
|
cannam@1
|
476 fft_temp = new float[N];
|
cannam@1
|
477 for (int n = 1; n < N/2; ++n) {
|
cannam@1
|
478 fft_temp[n] = sqrt(inputBuffers[0][n*2] *
|
cannam@1
|
479 inputBuffers[0][n*2] + inputBuffers[0][n*2+1] *
|
cannam@1
|
480 inputBuffers[0][n*2+1]) / N;
|
cannam@1
|
481 fft_temp[N-n] = (N/2 - n) * q;
|
cannam@1
|
482 }
|
cannam@1
|
483 fft_temp[0] = fabs(inputBuffers[0][0]) / N;
|
cannam@1
|
484 fft_temp[N/2] = fabs(inputBuffers[0][N]) / N;
|
cannam@1
|
485 data = &fft_temp[0];
|
cannam@1
|
486 isSpectral = true;
|
cannam@1
|
487 break;
|
cannam@0
|
488 }
|
cannam@0
|
489
|
cannam@0
|
490 assert(m_outputBinCount > 0);
|
cannam@0
|
491
|
cannam@0
|
492 float *result = m_resultBuffer;
|
cannam@0
|
493
|
cannam@1
|
494 float argf[XTRACT_MAXARGS];
|
cannam@0
|
495 argv = &argf[0];
|
cannam@0
|
496
|
cannam@1
|
497 float mean, variance, sd, npartials, nharmonics;
|
cannam@0
|
498
|
cannam@1
|
499 bool needSD, needVariance, needMean, needPeaks,
|
cannam@1
|
500 needBarkCoefficients, needHarmonics, needF0, needSFM, needMax,
|
cannam@1
|
501 needNumPartials, needNumHarmonics;
|
cannam@0
|
502
|
cannam@1
|
503 int donor;
|
cannam@0
|
504
|
cannam@1
|
505 needSD = needVariance = needMean = needPeaks =
|
cannam@1
|
506 needBarkCoefficients = needF0 = needHarmonics = needSFM = needMax =
|
cannam@1
|
507 needNumPartials = needNumHarmonics = 0;
|
cannam@0
|
508
|
cannam@1
|
509 mean = variance = sd = npartials = nharmonics = 0.f;
|
cannam@0
|
510
|
cannam@1
|
511 i = xtFd->argc;
|
cannam@0
|
512
|
cannam@1
|
513 while(i--){
|
cannam@1
|
514 donor = xtFd->argv.donor[i];
|
cannam@1
|
515 switch(donor){
|
cannam@1
|
516 case XTRACT_STANDARD_DEVIATION:
|
cannam@1
|
517 case XTRACT_SPECTRAL_STANDARD_DEVIATION:
|
cannam@1
|
518 needSD = 1;
|
cannam@1
|
519 break;
|
cannam@1
|
520 case XTRACT_VARIANCE:
|
cannam@1
|
521 case XTRACT_SPECTRAL_VARIANCE:
|
cannam@1
|
522 needVariance = 1;
|
cannam@1
|
523 break;
|
cannam@1
|
524 case XTRACT_MEAN:
|
cannam@1
|
525 case XTRACT_SPECTRAL_MEAN:
|
cannam@1
|
526 needMean = 1;
|
cannam@1
|
527 break;
|
cannam@1
|
528 case XTRACT_F0:
|
cannam@1
|
529 case XTRACT_FAILSAFE_F0:
|
cannam@1
|
530 needF0 = 1;
|
cannam@1
|
531 break;
|
cannam@1
|
532 case XTRACT_FLATNESS:
|
cannam@1
|
533 needSFM = 1;
|
cannam@1
|
534 case XTRACT_HIGHEST_VALUE:
|
cannam@1
|
535 needMax = 1;
|
cannam@1
|
536 break;
|
cannam@1
|
537 }
|
cannam@1
|
538 }
|
cannam@1
|
539
|
cannam@1
|
540 if(needHarmonicThreshold() && m_xtFeature != XTRACT_HARMONIC_SPECTRUM)
|
cannam@1
|
541 needHarmonics = needF0 = 1;
|
cannam@1
|
542
|
cannam@1
|
543 if(needPeakThreshold() && m_xtFeature != XTRACT_PEAK_SPECTRUM)
|
cannam@1
|
544 needPeaks = 1;
|
cannam@1
|
545
|
cannam@1
|
546 if(xtFd->data.format == XTRACT_BARK_COEFFS &&
|
cannam@1
|
547 m_xtFeature != XTRACT_BARK_COEFFICIENTS){
|
cannam@1
|
548 needBarkCoefficients = 1;
|
cannam@0
|
549 }
|
cannam@0
|
550
|
cannam@0
|
551 if (needMean) {
|
cannam@1
|
552 if(isSpectral)
|
cannam@1
|
553 xtract_spectral_mean(data, N, 0, result);
|
cannam@1
|
554 else
|
cannam@1
|
555 xtract_mean(data, M, 0, result);
|
cannam@0
|
556 mean = *result;
|
cannam@0
|
557 *result = 0.f;
|
cannam@0
|
558 }
|
cannam@0
|
559
|
cannam@1
|
560 if (needVariance || needSD) {
|
cannam@0
|
561 argf[0] = mean;
|
cannam@1
|
562 if(isSpectral)
|
cannam@1
|
563 xtract_spectral_variance(data, N, argv, result);
|
cannam@1
|
564 else
|
cannam@1
|
565 xtract_variance(data, M, argv, result);
|
cannam@0
|
566 variance = *result;
|
cannam@0
|
567 *result = 0.f;
|
cannam@0
|
568 }
|
cannam@0
|
569
|
cannam@0
|
570 if (needSD) {
|
cannam@0
|
571 argf[0] = variance;
|
cannam@1
|
572 if(isSpectral)
|
cannam@1
|
573 xtract_spectral_standard_deviation(data, N, argv, result);
|
cannam@1
|
574 else
|
cannam@1
|
575 xtract_standard_deviation(data, M, argv, result);
|
cannam@0
|
576 sd = *result;
|
cannam@0
|
577 *result = 0.f;
|
cannam@0
|
578 }
|
cannam@0
|
579
|
cannam@1
|
580 if (needMax) {
|
cannam@1
|
581 xtract_highest_value(data, M, argv, result);
|
cannam@1
|
582 argf[1] = *result;
|
cannam@1
|
583 *result = 0.f;
|
cannam@1
|
584 }
|
cannam@1
|
585
|
cannam@0
|
586 if (needSD) {
|
cannam@0
|
587 argf[0] = mean;
|
cannam@0
|
588 argf[1] = sd;
|
cannam@0
|
589 } else if (needVariance) {
|
cannam@0
|
590 argf[0] = variance;
|
cannam@0
|
591 } else if (needMean) {
|
cannam@0
|
592 argf[0] = mean;
|
cannam@0
|
593 }
|
cannam@0
|
594
|
cannam@0
|
595 // data should be now correct for all except:
|
cannam@1
|
596 // XTRACT_SPECTRAL_CENTROID -- N/2 magnitude peaks and N/2 frequencies
|
cannam@1
|
597 // TONALITY -- SFM
|
cannam@0
|
598 // TRISTIMULUS_1/2/3 -- harmonic spectrum
|
cannam@0
|
599 // ODD_EVEN_RATIO -- harmonic spectrum
|
cannam@0
|
600 // LOUDNESS -- Bark coefficients
|
cannam@1
|
601 // XTRACT_HARMONIC_SPECTRUM -- peak spectrum
|
cannam@0
|
602
|
cannam@0
|
603 // argv should be now correct for all except:
|
cannam@0
|
604 //
|
cannam@1
|
605 // XTRACT_ROLLOFF -- (sr/N), threshold (%)
|
cannam@1
|
606 // XTRACT_PEAK_SPECTRUM -- (sr / N), peak threshold (%)
|
cannam@1
|
607 // XTRACT_HARMONIC_SPECTRUM -- f0, harmonic threshold
|
cannam@1
|
608 // XTRACT_F0 -- samplerate
|
cannam@1
|
609 // XTRACT_MFCC -- Mel filter coefficients
|
cannam@1
|
610 // XTRACT_BARK_COEFFICIENTS -- Bark band limits
|
cannam@1
|
611 // XTRACT_NOISINESS -- npartials, nharmonics.
|
cannam@0
|
612
|
cannam@1
|
613 data_temp = new float[N];
|
cannam@1
|
614
|
cannam@1
|
615 if (m_xtFeature == XTRACT_ROLLOFF ||
|
cannam@1
|
616 m_xtFeature == XTRACT_PEAK_SPECTRUM || needPeaks) {
|
cannam@1
|
617 argf[0] = m_inputSampleRate / N;
|
cannam@1
|
618 if(m_xtFeature == XTRACT_ROLLOFF)
|
cannam@1
|
619 argf[1] = m_rolloffThreshold;
|
cannam@1
|
620 else
|
cannam@1
|
621 argf[1] = m_peakThreshold;
|
cannam@0
|
622 argv = &argf[0];
|
cannam@0
|
623 }
|
cannam@0
|
624
|
cannam@0
|
625 if (needPeaks) {
|
cannam@1
|
626 //We only read in the magnitudes (M)
|
cannam@1
|
627 /*int rv = */ xtract_peak_spectrum(data, M, argv, result);
|
cannam@0
|
628 for (int n = 0; n < N; ++n) {
|
cannam@1
|
629 data_temp[n] = result[n];
|
cannam@0
|
630 result[n] = 0.f;
|
cannam@0
|
631 }
|
cannam@0
|
632 // rv not trustworthy
|
cannam@0
|
633 // if (rv != SUCCESS) {
|
cannam@0
|
634 // cerr << "ERROR: XTractPlugin::process: xtract_peaks failed (error code = " << rv << ")" << endl;
|
cannam@0
|
635 // goto done;
|
cannam@0
|
636 // }
|
cannam@0
|
637 }
|
cannam@0
|
638
|
cannam@1
|
639 if (needNumPartials) {
|
cannam@1
|
640 xtract_nonzero_count(data_temp, M, NULL, &npartials);
|
cannam@1
|
641 }
|
cannam@1
|
642
|
cannam@1
|
643 if (needF0 || m_xtFeature == XTRACT_FAILSAFE_F0 ||
|
cannam@1
|
644 m_xtFeature == XTRACT_F0) {
|
cannam@1
|
645 argf[0] = m_inputSampleRate;
|
cannam@1
|
646 argv = &argf[0];
|
cannam@1
|
647 }
|
cannam@1
|
648
|
cannam@1
|
649 if (needF0) {
|
cannam@1
|
650 xtract_failsafe_f0(&inputBuffers[0][0], N,
|
cannam@1
|
651 (void *)&m_inputSampleRate, result);
|
cannam@1
|
652 argf[0] = *result;
|
cannam@1
|
653 argv = &argf[0];
|
cannam@1
|
654 }
|
cannam@1
|
655
|
cannam@1
|
656 if (needSFM) {
|
cannam@1
|
657 xtract_flatness(data, N >> 1, 0, &argf[0]);
|
cannam@1
|
658 argv = &argf[0];
|
cannam@1
|
659 }
|
cannam@1
|
660
|
cannam@1
|
661 if (needHarmonics || m_xtFeature == XTRACT_HARMONIC_SPECTRUM){
|
cannam@1
|
662 argf[1] = m_harmonicThreshold;
|
cannam@1
|
663 }
|
cannam@1
|
664
|
cannam@1
|
665 if (needHarmonics){
|
cannam@1
|
666 xtract_harmonic_spectrum(data_temp, N, argv, result);
|
cannam@1
|
667 for (int n = 0; n < N; ++n) {
|
cannam@1
|
668 data_temp[n] = result[n];
|
cannam@1
|
669 result[n] = 0.f;
|
cannam@1
|
670 }
|
cannam@1
|
671 }
|
cannam@1
|
672
|
cannam@1
|
673 if (needNumHarmonics) {
|
cannam@1
|
674 xtract_nonzero_count(data_temp, M, NULL, &nharmonics);
|
cannam@1
|
675 }
|
cannam@1
|
676
|
cannam@1
|
677 if (m_xtFeature == XTRACT_NOISINESS) {
|
cannam@1
|
678
|
cannam@1
|
679 argf[0] = nharmonics;
|
cannam@1
|
680 argf[1] = npartials;
|
cannam@1
|
681 argv = &argf[0];
|
cannam@1
|
682
|
cannam@1
|
683 }
|
cannam@1
|
684
|
cannam@1
|
685 if (needBarkCoefficients || m_xtFeature == XTRACT_BARK_COEFFICIENTS) {
|
cannam@1
|
686 argv = &m_barkBandLimits[0];
|
cannam@1
|
687 }
|
cannam@1
|
688
|
cannam@1
|
689 xtract_mel_filter mfccFilterBank;
|
cannam@1
|
690 if (m_xtFeature == XTRACT_MFCC) {
|
cannam@1
|
691 mfccFilterBank.n_filters = m_coeffCount;
|
cannam@1
|
692 mfccFilterBank.filters = m_mfccFilters;
|
cannam@1
|
693 argv = &mfccFilterBank;
|
cannam@1
|
694 }
|
cannam@1
|
695
|
cannam@0
|
696 if (needBarkCoefficients) {
|
cannam@1
|
697
|
cannam@1
|
698 /*int rv = */ xtract_bark_coefficients(data, 0, argv, data_temp);
|
cannam@0
|
699 // if (rv != SUCCESS) {
|
cannam@0
|
700 // cerr << "ERROR: XTractPlugin::process: xtract_bark_coefficients failed (error code = " << rv << ")" << endl;
|
cannam@0
|
701 // goto done;
|
cannam@0
|
702 // }
|
cannam@1
|
703 data = &data_temp[0];
|
cannam@0
|
704 argv = 0;
|
cannam@0
|
705 }
|
cannam@1
|
706
|
cannam@1
|
707 if (xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_FREQUENCIES) {
|
cannam@0
|
708
|
cannam@1
|
709 N = M;
|
cannam@1
|
710 data = &data_temp[N];
|
cannam@0
|
711
|
cannam@1
|
712 } else if (xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_MAGNITUDES) {
|
cannam@0
|
713
|
cannam@1
|
714 N = M;
|
cannam@1
|
715 data = &data_temp[0];
|
cannam@1
|
716
|
cannam@1
|
717 }
|
cannam@0
|
718
|
cannam@1
|
719 // If we only want spectral magnitudes, use first half of the input array
|
cannam@1
|
720 else if(xtFd->data.format == XTRACT_SPECTRAL_MAGNITUDES ||
|
cannam@1
|
721 xtFd->data.format == XTRACT_SPECTRAL_PEAKS_MAGNITUDES ||
|
cannam@1
|
722 xtFd->data.format == XTRACT_ARBITRARY_SERIES) {
|
cannam@1
|
723 N = M;
|
cannam@1
|
724 }
|
cannam@1
|
725
|
cannam@1
|
726 else if(xtFd->data.format == XTRACT_BARK_COEFFS) {
|
cannam@1
|
727
|
cannam@1
|
728 N = XTRACT_BARK_BANDS - 1; /* Because our SR is 44100 (< 54000)*/
|
cannam@1
|
729 }
|
cannam@1
|
730
|
cannam@1
|
731 if (needPeaks && !needHarmonics) {
|
cannam@1
|
732
|
cannam@1
|
733 data = &data_temp[0];
|
cannam@1
|
734
|
cannam@0
|
735 }
|
cannam@0
|
736
|
cannam@0
|
737 // now the main result
|
cannam@0
|
738 xtract[m_xtFeature](data, N, argv, result);
|
cannam@0
|
739
|
cannam@1
|
740 //haveResult:
|
cannam@1
|
741 // {
|
cannam@0
|
742 int index = 0;
|
cannam@0
|
743
|
cannam@0
|
744 for (size_t output = 0; output < m_outputDescriptors.size(); ++output) {
|
cannam@0
|
745
|
cannam@0
|
746 Feature feature;
|
cannam@0
|
747 feature.hasTimestamp = false;
|
cannam@0
|
748 bool good = true;
|
cannam@0
|
749
|
cannam@0
|
750 for (size_t n = 0; n < m_outputDescriptors[output].binCount; ++n) {
|
cannam@0
|
751 float value = m_resultBuffer[index];
|
cannam@0
|
752 if (isnan(value) || isinf(value)) {
|
cannam@0
|
753 good = false;
|
cannam@0
|
754 index += (m_outputDescriptors[output].binCount - n);
|
cannam@0
|
755 break;
|
cannam@0
|
756 }
|
cannam@0
|
757 feature.values.push_back(value);
|
cannam@0
|
758 ++index;
|
cannam@0
|
759 }
|
cannam@0
|
760
|
cannam@0
|
761 if (good) fs[output].push_back(feature);
|
cannam@0
|
762 }
|
cannam@1
|
763 // }
|
cannam@0
|
764
|
cannam@1
|
765 //done:
|
cannam@1
|
766 delete[] fft_temp;
|
cannam@1
|
767 delete[] data_temp;
|
cannam@0
|
768
|
cannam@3
|
769 // cerr << "XTractPlugin::process returning" << endl;
|
cannam@0
|
770
|
cannam@0
|
771 return fs;
|
cannam@0
|
772 }
|
cannam@0
|
773
|
cannam@0
|
774 XTractPlugin::FeatureSet
|
cannam@0
|
775 XTractPlugin::getRemainingFeatures()
|
cannam@0
|
776 {
|
cannam@0
|
777 return FeatureSet();
|
cannam@0
|
778 }
|
cannam@0
|
779
|