comparison sv/transform/FeatureExtractionPluginTransform.cpp @ 0:fc9323a41f5a

start base : Sonic Visualiser sv1-1.0rc1
author lbajardsilogic
date Fri, 11 May 2007 09:08:14 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fc9323a41f5a
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2006 Chris Cannam and QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "FeatureExtractionPluginTransform.h"
17
18 #include "plugin/FeatureExtractionPluginFactory.h"
19 #include "plugin/PluginXml.h"
20 #include "vamp-sdk/Plugin.h"
21
22 #include "data/model/Model.h"
23 #include "base/Window.h"
24 #include "data/model/SparseOneDimensionalModel.h"
25 #include "data/model/SparseTimeValueModel.h"
26 #include "data/model/EditableDenseThreeDimensionalModel.h"
27 #include "data/model/DenseTimeValueModel.h"
28 #include "data/model/NoteModel.h"
29 #include "data/model/FFTModel.h"
30 #include "data/model/WaveFileModel.h"
31
32 #include <QMessageBox>
33
34 #include <iostream>
35
36 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel,
37 QString pluginId,
38 const ExecutionContext &context,
39 QString configurationXml,
40 QString outputName) :
41 PluginTransform(inputModel, context),
42 m_plugin(0),
43 m_descriptor(0),
44 m_outputFeatureNo(0)
45 {
46 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl;
47
48 FeatureExtractionPluginFactory *factory =
49 FeatureExtractionPluginFactory::instanceFor(pluginId);
50
51 if (!factory) {
52 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \""
53 << pluginId.toStdString() << "\"" << std::endl;
54 return;
55 }
56
57 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate());
58
59 if (!m_plugin) {
60 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \""
61 << pluginId.toStdString() << "\"" << std::endl;
62 return;
63 }
64
65 if (configurationXml != "") {
66 PluginXml(m_plugin).setParametersFromXml(configurationXml);
67 }
68
69 DenseTimeValueModel *input = getInput();
70 if (!input) return;
71
72 size_t channelCount = input->getChannelCount();
73 if (m_plugin->getMaxChannelCount() < channelCount) {
74 channelCount = 1;
75 }
76 if (m_plugin->getMinChannelCount() > channelCount) {
77 std::cerr << "FeatureExtractionPluginTransform:: "
78 << "Can't provide enough channels to plugin (plugin min "
79 << m_plugin->getMinChannelCount() << ", max "
80 << m_plugin->getMaxChannelCount() << ", input model has "
81 << input->getChannelCount() << ")" << std::endl;
82 return;
83 }
84
85 std::cerr << "Initialising feature extraction plugin with channels = "
86 << channelCount << ", step = " << m_context.stepSize
87 << ", block = " << m_context.blockSize << std::endl;
88
89 if (!m_plugin->initialise(channelCount,
90 m_context.stepSize,
91 m_context.blockSize)) {
92 std::cerr << "FeatureExtractionPluginTransform: Plugin "
93 << m_plugin->getIdentifier() << " failed to initialise!" << std::endl;
94 return;
95 }
96
97 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
98
99 if (outputs.empty()) {
100 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
101 << pluginId.toStdString() << "\" has no outputs" << std::endl;
102 return;
103 }
104
105 for (size_t i = 0; i < outputs.size(); ++i) {
106 if (outputName == "" || outputs[i].identifier == outputName.toStdString()) {
107 m_outputFeatureNo = i;
108 m_descriptor = new Vamp::Plugin::OutputDescriptor
109 (outputs[i]);
110 break;
111 }
112 }
113
114 if (!m_descriptor) {
115 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
116 << pluginId.toStdString() << "\" has no output named \""
117 << outputName.toStdString() << "\"" << std::endl;
118 return;
119 }
120
121 // std::cerr << "FeatureExtractionPluginTransform: output sample type "
122 // << m_descriptor->sampleType << std::endl;
123
124 int binCount = 1;
125 float minValue = 0.0, maxValue = 0.0;
126 bool haveExtents = false;
127
128 if (m_descriptor->hasFixedBinCount) {
129 binCount = m_descriptor->binCount;
130 }
131
132 // std::cerr << "FeatureExtractionPluginTransform: output bin count "
133 // << binCount << std::endl;
134
135 if (binCount > 0 && m_descriptor->hasKnownExtents) {
136 minValue = m_descriptor->minValue;
137 maxValue = m_descriptor->maxValue;
138 haveExtents = true;
139 }
140
141 size_t modelRate = m_input->getSampleRate();
142 size_t modelResolution = 1;
143
144 switch (m_descriptor->sampleType) {
145
146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
147 if (m_descriptor->sampleRate != 0.0) {
148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
149 }
150 break;
151
152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
153 modelResolution = m_context.stepSize;
154 break;
155
156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
157 modelRate = size_t(m_descriptor->sampleRate + 0.001);
158 break;
159 }
160
161 if (binCount == 0) {
162
163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
164 false);
165
166 } else if (binCount == 1) {
167
168 SparseTimeValueModel *model;
169 if (haveExtents) {
170 model = new SparseTimeValueModel
171 (modelRate, modelResolution, minValue, maxValue, false);
172 } else {
173 model = new SparseTimeValueModel
174 (modelRate, modelResolution, false);
175 }
176 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
177
178 m_output = model;
179
180 } else if (m_descriptor->sampleType ==
181 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
182
183 // We don't have a sparse 3D model, so interpret this as a
184 // note model. There's nothing to define which values to use
185 // as which parameters of the note -- for the moment let's
186 // treat the first as pitch, second as duration in frames,
187 // third (if present) as velocity. (Our note model doesn't
188 // yet store velocity.)
189 //!!! todo: ask the user!
190
191 NoteModel *model;
192 if (haveExtents) {
193 model = new NoteModel
194 (modelRate, modelResolution, minValue, maxValue, false);
195 } else {
196 model = new NoteModel
197 (modelRate, modelResolution, false);
198 }
199 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
200
201 m_output = model;
202
203 } else {
204
205 EditableDenseThreeDimensionalModel *model =
206 new EditableDenseThreeDimensionalModel
207 (modelRate, modelResolution, binCount, false);
208
209 if (!m_descriptor->binNames.empty()) {
210 std::vector<QString> names;
211 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
212 names.push_back(m_descriptor->binNames[i].c_str());
213 }
214 model->setBinNames(names);
215 }
216
217 m_output = model;
218 }
219 }
220
221 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()
222 {
223 delete m_plugin;
224 delete m_descriptor;
225 }
226
227 DenseTimeValueModel *
228 FeatureExtractionPluginTransform::getInput()
229 {
230 DenseTimeValueModel *dtvm =
231 dynamic_cast<DenseTimeValueModel *>(getInputModel());
232 if (!dtvm) {
233 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
234 }
235 return dtvm;
236 }
237
238 void
239 FeatureExtractionPluginTransform::run()
240 {
241 DenseTimeValueModel *input = getInput();
242 if (!input) return;
243
244 while (!input->isReady()) {
245 if (dynamic_cast<WaveFileModel *>(input)) break; // no need to wait
246 std::cerr << "FeatureExtractionPluginTransform::run: Waiting for input model to be ready..." << std::endl;
247 sleep(1);
248 }
249
250 if (!m_output) return;
251
252 size_t sampleRate = m_input->getSampleRate();
253
254 size_t channelCount = input->getChannelCount();
255 if (m_plugin->getMaxChannelCount() < channelCount) {
256 channelCount = 1;
257 }
258
259 float **buffers = new float*[channelCount];
260 for (size_t ch = 0; ch < channelCount; ++ch) {
261 buffers[ch] = new float[m_context.blockSize + 2];
262 }
263
264 bool frequencyDomain = (m_plugin->getInputDomain() ==
265 Vamp::Plugin::FrequencyDomain);
266 std::vector<FFTModel *> fftModels;
267
268 if (frequencyDomain) {
269 for (size_t ch = 0; ch < channelCount; ++ch) {
270 FFTModel *model = new FFTModel
271 (getInput(),
272 channelCount == 1 ? m_context.channel : ch,
273 m_context.windowType,
274 m_context.blockSize,
275 m_context.stepSize,
276 m_context.blockSize,
277 false);
278 if (!model->isOK()) {
279 QMessageBox::critical
280 (0, tr("FFT cache failed"),
281 tr("Failed to create the FFT model for this transform.\n"
282 "There may be insufficient memory or disc space to continue."));
283 delete model;
284 setCompletion(100);
285 return;
286 }
287 model->resume();
288 fftModels.push_back(model);
289 }
290 }
291
292 long startFrame = m_input->getStartFrame();
293 long endFrame = m_input->getEndFrame();
294 long blockFrame = startFrame;
295
296 long prevCompletion = 0;
297
298 while (!m_abandoned) {
299
300 if (frequencyDomain) {
301 if (blockFrame - int(m_context.blockSize)/2 > endFrame) break;
302 } else {
303 if (blockFrame >= endFrame) break;
304 }
305
306 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame "
307 // << blockFrame << std::endl;
308
309 long completion =
310 (((blockFrame - startFrame) / m_context.stepSize) * 99) /
311 ( (endFrame - startFrame) / m_context.stepSize);
312
313 // channelCount is either m_input->channelCount or 1
314
315 for (size_t ch = 0; ch < channelCount; ++ch) {
316 if (frequencyDomain) {
317 int column = (blockFrame - startFrame) / m_context.stepSize;
318 for (size_t i = 0; i <= m_context.blockSize/2; ++i) {
319 fftModels[ch]->getValuesAt
320 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]);
321 }
322 } else {
323 getFrames(ch, channelCount,
324 blockFrame, m_context.blockSize, buffers[ch]);
325 }
326 }
327
328 Vamp::Plugin::FeatureSet features = m_plugin->process
329 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
330
331 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
332 Vamp::Plugin::Feature feature =
333 features[m_outputFeatureNo][fi];
334 addFeature(blockFrame, feature);
335 }
336
337 if (blockFrame == startFrame || completion > prevCompletion) {
338 setCompletion(completion);
339 prevCompletion = completion;
340 }
341
342 blockFrame += m_context.stepSize;
343 }
344
345 if (m_abandoned) return;
346
347 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
348
349 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
350 Vamp::Plugin::Feature feature =
351 features[m_outputFeatureNo][fi];
352 addFeature(blockFrame, feature);
353 }
354
355 if (frequencyDomain) {
356 for (size_t ch = 0; ch < channelCount; ++ch) {
357 delete fftModels[ch];
358 }
359 }
360
361 setCompletion(100);
362 }
363
364 void
365 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount,
366 long startFrame, long size,
367 float *buffer)
368 {
369 long offset = 0;
370
371 if (startFrame < 0) {
372 for (int i = 0; i < size && startFrame + i < 0; ++i) {
373 buffer[i] = 0.0f;
374 }
375 offset = -startFrame;
376 size -= offset;
377 if (size <= 0) return;
378 startFrame = 0;
379 }
380
381 long got = getInput()->getValues
382 ((channelCount == 1 ? m_context.channel : channel),
383 startFrame, startFrame + size, buffer + offset);
384
385 while (got < size) {
386 buffer[offset + got] = 0.0;
387 ++got;
388 }
389
390 if (m_context.channel == -1 && channelCount == 1 &&
391 getInput()->getChannelCount() > 1) {
392 // use mean instead of sum, as plugin input
393 int cc = getInput()->getChannelCount();
394 for (long i = 0; i < size; ++i) {
395 buffer[i] /= cc;
396 }
397 }
398 }
399
400 void
401 FeatureExtractionPluginTransform::addFeature(size_t blockFrame,
402 const Vamp::Plugin::Feature &feature)
403 {
404 size_t inputRate = m_input->getSampleRate();
405
406 // std::cerr << "FeatureExtractionPluginTransform::addFeature("
407 // << blockFrame << ")" << std::endl;
408
409 int binCount = 1;
410 if (m_descriptor->hasFixedBinCount) {
411 binCount = m_descriptor->binCount;
412 }
413
414 size_t frame = blockFrame;
415
416 if (m_descriptor->sampleType ==
417 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
418
419 if (!feature.hasTimestamp) {
420 std::cerr
421 << "WARNING: FeatureExtractionPluginTransform::addFeature: "
422 << "Feature has variable sample rate but no timestamp!"
423 << std::endl;
424 return;
425 } else {
426 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
427 }
428
429 } else if (m_descriptor->sampleType ==
430 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
431
432 if (feature.hasTimestamp) {
433 //!!! warning: sampleRate may be non-integral
434 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
435 lrintf(m_descriptor->sampleRate));
436 } else {
437 frame = m_output->getEndFrame();
438 }
439 }
440
441 if (binCount == 0) {
442
443 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
444 if (!model) return;
445 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
446
447 } else if (binCount == 1) {
448
449 float value = 0.0;
450 if (feature.values.size() > 0) value = feature.values[0];
451
452 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
453 if (!model) return;
454 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
455
456 } else if (m_descriptor->sampleType ==
457 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
458
459 float pitch = 0.0;
460 if (feature.values.size() > 0) pitch = feature.values[0];
461
462 float duration = 1;
463 if (feature.values.size() > 1) duration = feature.values[1];
464
465 float velocity = 100;
466 if (feature.values.size() > 2) velocity = feature.values[2];
467
468 NoteModel *model = getOutput<NoteModel>();
469 if (!model) return;
470
471 model->addPoint(NoteModel::Point(frame, pitch,
472 lrintf(duration),
473 feature.label.c_str()));
474
475 } else {
476
477 DenseThreeDimensionalModel::Column values = feature.values;
478
479 EditableDenseThreeDimensionalModel *model =
480 getOutput<EditableDenseThreeDimensionalModel>();
481 if (!model) return;
482
483 model->setColumn(frame / model->getResolution(), values);
484 }
485 }
486
487 void
488 FeatureExtractionPluginTransform::setCompletion(int completion)
489 {
490 int binCount = 1;
491 if (m_descriptor->hasFixedBinCount) {
492 binCount = m_descriptor->binCount;
493 }
494
495 std::cerr << "FeatureExtractionPluginTransform::setCompletion("
496 << completion << ")" << std::endl;
497
498 if (binCount == 0) {
499
500 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
501 if (!model) return;
502 model->setCompletion(completion);
503
504 } else if (binCount == 1) {
505
506 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
507 if (!model) return;
508 model->setCompletion(completion);
509
510 } else if (m_descriptor->sampleType ==
511 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
512
513 NoteModel *model = getOutput<NoteModel>();
514 if (!model) return;
515 model->setCompletion(completion);
516
517 } else {
518
519 EditableDenseThreeDimensionalModel *model =
520 getOutput<EditableDenseThreeDimensionalModel>();
521 if (!model) return;
522 model->setCompletion(completion);
523 }
524 }
525