comparison transform/FeatureExtractionPluginTransform.cpp @ 0:cd5d7ff8ef38

* Reorganising code base. This revision will not compile.
author Chris Cannam
date Mon, 31 Jul 2006 12:03:45 +0000
parents
children 40116f709d3b
comparison
equal deleted inserted replaced
-1:000000000000 0:cd5d7ff8ef38
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2006 Chris Cannam.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "FeatureExtractionPluginTransform.h"
17
18 #include "plugin/FeatureExtractionPluginFactory.h"
19 #include "plugin/PluginXml.h"
20 #include "vamp-sdk/Plugin.h"
21
22 #include "base/Model.h"
23 #include "base/Window.h"
24 #include "model/SparseOneDimensionalModel.h"
25 #include "model/SparseTimeValueModel.h"
26 #include "model/DenseThreeDimensionalModel.h"
27 #include "model/DenseTimeValueModel.h"
28 #include "model/NoteModel.h"
29 #include "fileio/FFTFuzzyAdapter.h"
30
31 #include <fftw3.h>
32
33 #include <iostream>
34
35 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel,
36 QString pluginId,
37 int channel,
38 QString configurationXml,
39 QString outputName) :
40 Transform(inputModel),
41 m_plugin(0),
42 m_channel(channel),
43 m_stepSize(0),
44 m_blockSize(0),
45 m_descriptor(0),
46 m_outputFeatureNo(0)
47 {
48 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl;
49
50 FeatureExtractionPluginFactory *factory =
51 FeatureExtractionPluginFactory::instanceFor(pluginId);
52
53 if (!factory) {
54 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \""
55 << pluginId.toStdString() << "\"" << std::endl;
56 return;
57 }
58
59 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate());
60
61 if (!m_plugin) {
62 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \""
63 << pluginId.toStdString() << "\"" << std::endl;
64 return;
65 }
66
67 if (configurationXml != "") {
68 PluginXml(m_plugin).setParametersFromXml(configurationXml);
69 }
70
71 m_blockSize = m_plugin->getPreferredBlockSize();
72 m_stepSize = m_plugin->getPreferredStepSize();
73
74 if (m_blockSize == 0) m_blockSize = 1024; //!!! todo: ask user
75 if (m_stepSize == 0) m_stepSize = m_blockSize; //!!! likewise
76
77 DenseTimeValueModel *input = getInput();
78 if (!input) return;
79
80 size_t channelCount = input->getChannelCount();
81 if (m_plugin->getMaxChannelCount() < channelCount) {
82 channelCount = 1;
83 }
84 if (m_plugin->getMinChannelCount() > channelCount) {
85 std::cerr << "FeatureExtractionPluginTransform:: "
86 << "Can't provide enough channels to plugin (plugin min "
87 << m_plugin->getMinChannelCount() << ", max "
88 << m_plugin->getMaxChannelCount() << ", input model has "
89 << input->getChannelCount() << ")" << std::endl;
90 return;
91 }
92
93 if (!m_plugin->initialise(channelCount, m_stepSize, m_blockSize)) {
94 std::cerr << "FeatureExtractionPluginTransform: Plugin "
95 << m_plugin->getName() << " failed to initialise!" << std::endl;
96 return;
97 }
98
99 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
100
101 if (outputs.empty()) {
102 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
103 << pluginId.toStdString() << "\" has no outputs" << std::endl;
104 return;
105 }
106
107 for (size_t i = 0; i < outputs.size(); ++i) {
108 if (outputName == "" || outputs[i].name == outputName.toStdString()) {
109 m_outputFeatureNo = i;
110 m_descriptor = new Vamp::Plugin::OutputDescriptor
111 (outputs[i]);
112 break;
113 }
114 }
115
116 if (!m_descriptor) {
117 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
118 << pluginId.toStdString() << "\" has no output named \""
119 << outputName.toStdString() << "\"" << std::endl;
120 return;
121 }
122
123 // std::cerr << "FeatureExtractionPluginTransform: output sample type "
124 // << m_descriptor->sampleType << std::endl;
125
126 int binCount = 1;
127 float minValue = 0.0, maxValue = 0.0;
128
129 if (m_descriptor->hasFixedBinCount) {
130 binCount = m_descriptor->binCount;
131 }
132
133 // std::cerr << "FeatureExtractionPluginTransform: output bin count "
134 // << binCount << std::endl;
135
136 if (binCount > 0 && m_descriptor->hasKnownExtents) {
137 minValue = m_descriptor->minValue;
138 maxValue = m_descriptor->maxValue;
139 }
140
141 size_t modelRate = m_input->getSampleRate();
142 size_t modelResolution = 1;
143
144 switch (m_descriptor->sampleType) {
145
146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
147 if (m_descriptor->sampleRate != 0.0) {
148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
149 }
150 break;
151
152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
153 modelResolution = m_stepSize;
154 break;
155
156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
157 modelRate = size_t(m_descriptor->sampleRate + 0.001);
158 break;
159 }
160
161 if (binCount == 0) {
162
163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
164 false);
165
166 } else if (binCount == 1) {
167
168 SparseTimeValueModel *model = new SparseTimeValueModel
169 (modelRate, modelResolution, minValue, maxValue, false);
170 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
171
172 m_output = model;
173
174 } else if (m_descriptor->sampleType ==
175 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
176
177 // We don't have a sparse 3D model, so interpret this as a
178 // note model. There's nothing to define which values to use
179 // as which parameters of the note -- for the moment let's
180 // treat the first as pitch, second as duration in frames,
181 // third (if present) as velocity. (Our note model doesn't
182 // yet store velocity.)
183 //!!! todo: ask the user!
184
185 NoteModel *model = new NoteModel
186 (modelRate, modelResolution, minValue, maxValue, false);
187 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
188
189 m_output = model;
190
191 } else {
192
193 m_output = new DenseThreeDimensionalModel(modelRate, modelResolution,
194 binCount, false);
195
196 if (!m_descriptor->binNames.empty()) {
197 std::vector<QString> names;
198 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
199 names.push_back(m_descriptor->binNames[i].c_str());
200 }
201 (dynamic_cast<DenseThreeDimensionalModel *>(m_output))
202 ->setBinNames(names);
203 }
204 }
205 }
206
207 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()
208 {
209 delete m_plugin;
210 delete m_descriptor;
211 }
212
213 DenseTimeValueModel *
214 FeatureExtractionPluginTransform::getInput()
215 {
216 DenseTimeValueModel *dtvm =
217 dynamic_cast<DenseTimeValueModel *>(getInputModel());
218 if (!dtvm) {
219 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
220 }
221 return dtvm;
222 }
223
224 void
225 FeatureExtractionPluginTransform::run()
226 {
227 DenseTimeValueModel *input = getInput();
228 if (!input) return;
229
230 if (!m_output) return;
231
232 size_t sampleRate = m_input->getSampleRate();
233
234 size_t channelCount = input->getChannelCount();
235 if (m_plugin->getMaxChannelCount() < channelCount) {
236 channelCount = 1;
237 }
238
239 float **buffers = new float*[channelCount];
240 for (size_t ch = 0; ch < channelCount; ++ch) {
241 buffers[ch] = new float[m_blockSize];
242 }
243
244 bool frequencyDomain = (m_plugin->getInputDomain() ==
245 Vamp::Plugin::FrequencyDomain);
246 std::vector<FFTFuzzyAdapter *> fftAdapters;
247
248 if (frequencyDomain) {
249 for (size_t ch = 0; ch < channelCount; ++ch) {
250 fftAdapters.push_back(new FFTFuzzyAdapter
251 (getInput(),
252 channelCount == 1 ? m_channel : ch,
253 HanningWindow,
254 m_blockSize,
255 m_stepSize,
256 m_blockSize,
257 false));
258 }
259 }
260
261 long startFrame = m_input->getStartFrame();
262 long endFrame = m_input->getEndFrame();
263 long blockFrame = startFrame;
264
265 long prevCompletion = 0;
266
267 while (1) {
268
269 if (frequencyDomain) {
270 if (blockFrame - int(m_blockSize)/2 > endFrame) break;
271 } else {
272 if (blockFrame >= endFrame) break;
273 }
274
275 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame "
276 // << blockFrame << std::endl;
277
278 long completion =
279 (((blockFrame - startFrame) / m_stepSize) * 99) /
280 ( (endFrame - startFrame) / m_stepSize);
281
282 // channelCount is either m_input->channelCount or 1
283
284 for (size_t ch = 0; ch < channelCount; ++ch) {
285 if (frequencyDomain) {
286 int column = (blockFrame - startFrame) / m_stepSize;
287 for (size_t i = 0; i < m_blockSize/2; ++i) {
288 fftAdapters[ch]->getValuesAt
289 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]);
290 }
291 /*!!!
292 float sum = 0.0;
293 for (size_t i = 0; i < m_blockSize/2; ++i) {
294 sum += buffers[ch][i*2];
295 }
296 if (fabs(sum) < 0.0001) {
297 std::cerr << "WARNING: small sum for column " << column << " (sum is " << sum << ")" << std::endl;
298 }
299 */
300 } else {
301 getFrames(ch, channelCount,
302 blockFrame, m_blockSize, buffers[ch]);
303 }
304 }
305
306 Vamp::Plugin::FeatureSet features = m_plugin->process
307 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
308
309 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
310 Vamp::Plugin::Feature feature =
311 features[m_outputFeatureNo][fi];
312 addFeature(blockFrame, feature);
313 }
314
315 if (blockFrame == startFrame || completion > prevCompletion) {
316 setCompletion(completion);
317 prevCompletion = completion;
318 }
319
320 blockFrame += m_stepSize;
321 }
322
323 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
324
325 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
326 Vamp::Plugin::Feature feature =
327 features[m_outputFeatureNo][fi];
328 addFeature(blockFrame, feature);
329 }
330
331 if (frequencyDomain) {
332 for (size_t ch = 0; ch < channelCount; ++ch) {
333 delete fftAdapters[ch];
334 }
335 }
336
337 setCompletion(100);
338 }
339
340 void
341 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount,
342 long startFrame, long size,
343 float *buffer)
344 {
345 long offset = 0;
346
347 if (startFrame < 0) {
348 for (int i = 0; i < size && startFrame + i < 0; ++i) {
349 buffer[i] = 0.0f;
350 }
351 offset = -startFrame;
352 size -= offset;
353 if (size <= 0) return;
354 startFrame = 0;
355 }
356
357 long got = getInput()->getValues
358 ((channelCount == 1 ? m_channel : channel),
359 startFrame, startFrame + size, buffer + offset);
360
361 while (got < size) {
362 buffer[offset + got] = 0.0;
363 ++got;
364 }
365
366 if (m_channel == -1 && channelCount == 1 &&
367 getInput()->getChannelCount() > 1) {
368 // use mean instead of sum, as plugin input
369 int cc = getInput()->getChannelCount();
370 for (long i = 0; i < size; ++i) {
371 buffer[i] /= cc;
372 }
373 }
374 }
375
376 void
377 FeatureExtractionPluginTransform::addFeature(size_t blockFrame,
378 const Vamp::Plugin::Feature &feature)
379 {
380 size_t inputRate = m_input->getSampleRate();
381
382 // std::cerr << "FeatureExtractionPluginTransform::addFeature("
383 // << blockFrame << ")" << std::endl;
384
385 int binCount = 1;
386 if (m_descriptor->hasFixedBinCount) {
387 binCount = m_descriptor->binCount;
388 }
389
390 size_t frame = blockFrame;
391
392 if (m_descriptor->sampleType ==
393 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
394
395 if (!feature.hasTimestamp) {
396 std::cerr
397 << "WARNING: FeatureExtractionPluginTransform::addFeature: "
398 << "Feature has variable sample rate but no timestamp!"
399 << std::endl;
400 return;
401 } else {
402 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
403 }
404
405 } else if (m_descriptor->sampleType ==
406 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
407
408 if (feature.hasTimestamp) {
409 //!!! warning: sampleRate may be non-integral
410 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
411 m_descriptor->sampleRate);
412 } else {
413 frame = m_output->getEndFrame() + 1;
414 }
415 }
416
417 if (binCount == 0) {
418
419 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
420 if (!model) return;
421 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
422
423 } else if (binCount == 1) {
424
425 float value = 0.0;
426 if (feature.values.size() > 0) value = feature.values[0];
427
428 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
429 if (!model) return;
430 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
431
432 } else if (m_descriptor->sampleType ==
433 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
434
435 float pitch = 0.0;
436 if (feature.values.size() > 0) pitch = feature.values[0];
437
438 float duration = 1;
439 if (feature.values.size() > 1) duration = feature.values[1];
440
441 float velocity = 100;
442 if (feature.values.size() > 2) velocity = feature.values[2];
443
444 NoteModel *model = getOutput<NoteModel>();
445 if (!model) return;
446
447 model->addPoint(NoteModel::Point(frame, pitch, duration, feature.label.c_str()));
448
449 } else {
450
451 DenseThreeDimensionalModel::BinValueSet values = feature.values;
452
453 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>();
454 if (!model) return;
455
456 model->setBinValues(frame, values);
457 }
458 }
459
460 void
461 FeatureExtractionPluginTransform::setCompletion(int completion)
462 {
463 int binCount = 1;
464 if (m_descriptor->hasFixedBinCount) {
465 binCount = m_descriptor->binCount;
466 }
467
468 if (binCount == 0) {
469
470 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
471 if (!model) return;
472 model->setCompletion(completion);
473
474 } else if (binCount == 1) {
475
476 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
477 if (!model) return;
478 model->setCompletion(completion);
479
480 } else if (m_descriptor->sampleType ==
481 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
482
483 NoteModel *model = getOutput<NoteModel>();
484 if (!model) return;
485 model->setCompletion(completion);
486
487 } else {
488
489 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>();
490 if (!model) return;
491 model->setCompletion(completion);
492 }
493 }
494