comparison plugin/transform/FeatureExtractionPluginTransform.cpp @ 320:32e50b620a6c

* Move some things around to facilitate plundering libraries for other applications without needing to duplicate so much code. sv/osc -> data/osc sv/audioio -> audioio sv/transform -> plugin/transform sv/document -> document (will rename to framework in next commit)
author Chris Cannam
date Wed, 24 Oct 2007 16:34:31 +0000
parents
children
comparison
equal deleted inserted replaced
319:3ff8f571da09 320:32e50b620a6c
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2006 Chris Cannam and QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "FeatureExtractionPluginTransform.h"
17
18 #include "plugin/FeatureExtractionPluginFactory.h"
19 #include "plugin/PluginXml.h"
20 #include "vamp-sdk/Plugin.h"
21
22 #include "data/model/Model.h"
23 #include "base/Window.h"
24 #include "data/model/SparseOneDimensionalModel.h"
25 #include "data/model/SparseTimeValueModel.h"
26 #include "data/model/EditableDenseThreeDimensionalModel.h"
27 #include "data/model/DenseTimeValueModel.h"
28 #include "data/model/NoteModel.h"
29 #include "data/model/FFTModel.h"
30 #include "data/model/WaveFileModel.h"
31
32 #include <QMessageBox>
33
34 #include <iostream>
35
36 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel,
37 QString pluginId,
38 const ExecutionContext &context,
39 QString configurationXml,
40 QString outputName) :
41 PluginTransform(inputModel, context),
42 m_plugin(0),
43 m_descriptor(0),
44 m_outputFeatureNo(0)
45 {
46 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl;
47
48 FeatureExtractionPluginFactory *factory =
49 FeatureExtractionPluginFactory::instanceFor(pluginId);
50
51 if (!factory) {
52 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \""
53 << pluginId.toStdString() << "\"" << std::endl;
54 return;
55 }
56
57 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate());
58
59 if (!m_plugin) {
60 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \""
61 << pluginId.toStdString() << "\"" << std::endl;
62 return;
63 }
64
65 if (configurationXml != "") {
66 PluginXml(m_plugin).setParametersFromXml(configurationXml);
67 }
68
69 DenseTimeValueModel *input = getInput();
70 if (!input) return;
71
72 size_t channelCount = input->getChannelCount();
73 if (m_plugin->getMaxChannelCount() < channelCount) {
74 channelCount = 1;
75 }
76 if (m_plugin->getMinChannelCount() > channelCount) {
77 std::cerr << "FeatureExtractionPluginTransform:: "
78 << "Can't provide enough channels to plugin (plugin min "
79 << m_plugin->getMinChannelCount() << ", max "
80 << m_plugin->getMaxChannelCount() << ", input model has "
81 << input->getChannelCount() << ")" << std::endl;
82 return;
83 }
84
85 std::cerr << "Initialising feature extraction plugin with channels = "
86 << channelCount << ", step = " << m_context.stepSize
87 << ", block = " << m_context.blockSize << std::endl;
88
89 if (!m_plugin->initialise(channelCount,
90 m_context.stepSize,
91 m_context.blockSize)) {
92 std::cerr << "FeatureExtractionPluginTransform: Plugin "
93 << m_plugin->getIdentifier() << " failed to initialise!" << std::endl;
94 return;
95 }
96
97 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
98
99 if (outputs.empty()) {
100 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
101 << pluginId.toStdString() << "\" has no outputs" << std::endl;
102 return;
103 }
104
105 for (size_t i = 0; i < outputs.size(); ++i) {
106 if (outputName == "" || outputs[i].identifier == outputName.toStdString()) {
107 m_outputFeatureNo = i;
108 m_descriptor = new Vamp::Plugin::OutputDescriptor
109 (outputs[i]);
110 break;
111 }
112 }
113
114 if (!m_descriptor) {
115 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
116 << pluginId.toStdString() << "\" has no output named \""
117 << outputName.toStdString() << "\"" << std::endl;
118 return;
119 }
120
121 // std::cerr << "FeatureExtractionPluginTransform: output sample type "
122 // << m_descriptor->sampleType << std::endl;
123
124 int binCount = 1;
125 float minValue = 0.0, maxValue = 0.0;
126 bool haveExtents = false;
127
128 if (m_descriptor->hasFixedBinCount) {
129 binCount = m_descriptor->binCount;
130 }
131
132 // std::cerr << "FeatureExtractionPluginTransform: output bin count "
133 // << binCount << std::endl;
134
135 if (binCount > 0 && m_descriptor->hasKnownExtents) {
136 minValue = m_descriptor->minValue;
137 maxValue = m_descriptor->maxValue;
138 haveExtents = true;
139 }
140
141 size_t modelRate = m_input->getSampleRate();
142 size_t modelResolution = 1;
143
144 switch (m_descriptor->sampleType) {
145
146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
147 if (m_descriptor->sampleRate != 0.0) {
148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
149 }
150 break;
151
152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
153 modelResolution = m_context.stepSize;
154 break;
155
156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
157 modelRate = size_t(m_descriptor->sampleRate + 0.001);
158 break;
159 }
160
161 if (binCount == 0) {
162
163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
164 false);
165
166 } else if (binCount == 1) {
167
168 SparseTimeValueModel *model;
169 if (haveExtents) {
170 model = new SparseTimeValueModel
171 (modelRate, modelResolution, minValue, maxValue, false);
172 } else {
173 model = new SparseTimeValueModel
174 (modelRate, modelResolution, false);
175 }
176 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
177
178 m_output = model;
179
180 } else if (m_descriptor->sampleType ==
181 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
182
183 // We don't have a sparse 3D model, so interpret this as a
184 // note model. There's nothing to define which values to use
185 // as which parameters of the note -- for the moment let's
186 // treat the first as pitch, second as duration in frames,
187 // third (if present) as velocity. (Our note model doesn't
188 // yet store velocity.)
189 //!!! todo: ask the user!
190
191 NoteModel *model;
192 if (haveExtents) {
193 model = new NoteModel
194 (modelRate, modelResolution, minValue, maxValue, false);
195 } else {
196 model = new NoteModel
197 (modelRate, modelResolution, false);
198 }
199 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
200
201 m_output = model;
202
203 } else {
204
205 EditableDenseThreeDimensionalModel *model =
206 new EditableDenseThreeDimensionalModel
207 (modelRate, modelResolution, binCount, false);
208
209 if (!m_descriptor->binNames.empty()) {
210 std::vector<QString> names;
211 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
212 names.push_back(m_descriptor->binNames[i].c_str());
213 }
214 model->setBinNames(names);
215 }
216
217 m_output = model;
218 }
219 }
220
221 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()
222 {
223 std::cerr << "FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()" << std::endl;
224 delete m_plugin;
225 delete m_descriptor;
226 }
227
228 DenseTimeValueModel *
229 FeatureExtractionPluginTransform::getInput()
230 {
231 DenseTimeValueModel *dtvm =
232 dynamic_cast<DenseTimeValueModel *>(getInputModel());
233 if (!dtvm) {
234 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
235 }
236 return dtvm;
237 }
238
239 void
240 FeatureExtractionPluginTransform::run()
241 {
242 DenseTimeValueModel *input = getInput();
243 if (!input) return;
244
245 if (!m_output) return;
246
247 while (!input->isReady()) {
248 /*
249 if (dynamic_cast<WaveFileModel *>(input)) {
250 std::cerr << "FeatureExtractionPluginTransform::run: Model is not ready, but it's not a WaveFileModel (it's a " << typeid(input).name() << "), so that's OK" << std::endl;
251 sleep(2);
252 break; // no need to wait
253 }
254 */
255 std::cerr << "FeatureExtractionPluginTransform::run: Waiting for input model to be ready..." << std::endl;
256 sleep(1);
257 }
258
259 size_t sampleRate = m_input->getSampleRate();
260
261 size_t channelCount = input->getChannelCount();
262 if (m_plugin->getMaxChannelCount() < channelCount) {
263 channelCount = 1;
264 }
265
266 float **buffers = new float*[channelCount];
267 for (size_t ch = 0; ch < channelCount; ++ch) {
268 buffers[ch] = new float[m_context.blockSize + 2];
269 }
270
271 bool frequencyDomain = (m_plugin->getInputDomain() ==
272 Vamp::Plugin::FrequencyDomain);
273 std::vector<FFTModel *> fftModels;
274
275 if (frequencyDomain) {
276 for (size_t ch = 0; ch < channelCount; ++ch) {
277 FFTModel *model = new FFTModel
278 (getInput(),
279 channelCount == 1 ? m_context.channel : ch,
280 m_context.windowType,
281 m_context.blockSize,
282 m_context.stepSize,
283 m_context.blockSize,
284 false);
285 if (!model->isOK()) {
286 QMessageBox::critical
287 (0, tr("FFT cache failed"),
288 tr("Failed to create the FFT model for this transform.\n"
289 "There may be insufficient memory or disc space to continue."));
290 delete model;
291 setCompletion(100);
292 return;
293 }
294 model->resume();
295 fftModels.push_back(model);
296 }
297 }
298
299 long startFrame = m_input->getStartFrame();
300 long endFrame = m_input->getEndFrame();
301
302 long contextStart = m_context.startFrame;
303 long contextDuration = m_context.duration;
304
305 if (contextStart == 0 || contextStart < startFrame) {
306 contextStart = startFrame;
307 }
308
309 if (contextDuration == 0) {
310 contextDuration = endFrame - contextStart;
311 }
312 if (contextStart + contextDuration > endFrame) {
313 contextDuration = endFrame - contextStart;
314 }
315
316 long blockFrame = contextStart;
317
318 long prevCompletion = 0;
319
320 setCompletion(0);
321
322 while (!m_abandoned) {
323
324 if (frequencyDomain) {
325 if (blockFrame - int(m_context.blockSize)/2 >
326 contextStart + contextDuration) break;
327 } else {
328 if (blockFrame >=
329 contextStart + contextDuration) break;
330 }
331
332 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame "
333 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
334 // << m_context.blockSize << std::endl;
335
336 long completion =
337 (((blockFrame - contextStart) / m_context.stepSize) * 99) /
338 (contextDuration / m_context.stepSize);
339
340 // channelCount is either m_input->channelCount or 1
341
342 for (size_t ch = 0; ch < channelCount; ++ch) {
343 if (frequencyDomain) {
344 int column = (blockFrame - startFrame) / m_context.stepSize;
345 for (size_t i = 0; i <= m_context.blockSize/2; ++i) {
346 fftModels[ch]->getValuesAt
347 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]);
348 }
349 } else {
350 getFrames(ch, channelCount,
351 blockFrame, m_context.blockSize, buffers[ch]);
352 }
353 }
354
355 Vamp::Plugin::FeatureSet features = m_plugin->process
356 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
357
358 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
359 Vamp::Plugin::Feature feature =
360 features[m_outputFeatureNo][fi];
361 addFeature(blockFrame, feature);
362 }
363
364 if (blockFrame == contextStart || completion > prevCompletion) {
365 setCompletion(completion);
366 prevCompletion = completion;
367 }
368
369 blockFrame += m_context.stepSize;
370 }
371
372 if (m_abandoned) return;
373
374 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
375
376 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
377 Vamp::Plugin::Feature feature =
378 features[m_outputFeatureNo][fi];
379 addFeature(blockFrame, feature);
380 }
381
382 if (frequencyDomain) {
383 for (size_t ch = 0; ch < channelCount; ++ch) {
384 delete fftModels[ch];
385 }
386 }
387
388 setCompletion(100);
389 }
390
391 void
392 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount,
393 long startFrame, long size,
394 float *buffer)
395 {
396 long offset = 0;
397
398 if (startFrame < 0) {
399 for (int i = 0; i < size && startFrame + i < 0; ++i) {
400 buffer[i] = 0.0f;
401 }
402 offset = -startFrame;
403 size -= offset;
404 if (size <= 0) return;
405 startFrame = 0;
406 }
407
408 long got = getInput()->getData
409 ((channelCount == 1 ? m_context.channel : channel),
410 startFrame, size, buffer + offset);
411
412 while (got < size) {
413 buffer[offset + got] = 0.0;
414 ++got;
415 }
416
417 if (m_context.channel == -1 && channelCount == 1 &&
418 getInput()->getChannelCount() > 1) {
419 // use mean instead of sum, as plugin input
420 int cc = getInput()->getChannelCount();
421 for (long i = 0; i < size; ++i) {
422 buffer[i] /= cc;
423 }
424 }
425 }
426
427 void
428 FeatureExtractionPluginTransform::addFeature(size_t blockFrame,
429 const Vamp::Plugin::Feature &feature)
430 {
431 size_t inputRate = m_input->getSampleRate();
432
433 // std::cerr << "FeatureExtractionPluginTransform::addFeature("
434 // << blockFrame << ")" << std::endl;
435
436 int binCount = 1;
437 if (m_descriptor->hasFixedBinCount) {
438 binCount = m_descriptor->binCount;
439 }
440
441 size_t frame = blockFrame;
442
443 if (m_descriptor->sampleType ==
444 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
445
446 if (!feature.hasTimestamp) {
447 std::cerr
448 << "WARNING: FeatureExtractionPluginTransform::addFeature: "
449 << "Feature has variable sample rate but no timestamp!"
450 << std::endl;
451 return;
452 } else {
453 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
454 }
455
456 } else if (m_descriptor->sampleType ==
457 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
458
459 if (feature.hasTimestamp) {
460 //!!! warning: sampleRate may be non-integral
461 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
462 lrintf(m_descriptor->sampleRate));
463 } else {
464 frame = m_output->getEndFrame();
465 }
466 }
467
468 if (binCount == 0) {
469
470 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
471 if (!model) return;
472 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
473
474 } else if (binCount == 1) {
475
476 float value = 0.0;
477 if (feature.values.size() > 0) value = feature.values[0];
478
479 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
480 if (!model) return;
481 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
482 // std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl;
483
484 } else if (m_descriptor->sampleType ==
485 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
486
487 float pitch = 0.0;
488 if (feature.values.size() > 0) pitch = feature.values[0];
489
490 float duration = 1;
491 if (feature.values.size() > 1) duration = feature.values[1];
492
493 float velocity = 100;
494 if (feature.values.size() > 2) velocity = feature.values[2];
495
496 NoteModel *model = getOutput<NoteModel>();
497 if (!model) return;
498
499 model->addPoint(NoteModel::Point(frame, pitch,
500 lrintf(duration),
501 feature.label.c_str()));
502
503 } else {
504
505 DenseThreeDimensionalModel::Column values = feature.values;
506
507 EditableDenseThreeDimensionalModel *model =
508 getOutput<EditableDenseThreeDimensionalModel>();
509 if (!model) return;
510
511 model->setColumn(frame / model->getResolution(), values);
512 }
513 }
514
515 void
516 FeatureExtractionPluginTransform::setCompletion(int completion)
517 {
518 int binCount = 1;
519 if (m_descriptor->hasFixedBinCount) {
520 binCount = m_descriptor->binCount;
521 }
522
523 // std::cerr << "FeatureExtractionPluginTransform::setCompletion("
524 // << completion << ")" << std::endl;
525
526 if (binCount == 0) {
527
528 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
529 if (!model) return;
530 model->setCompletion(completion);
531
532 } else if (binCount == 1) {
533
534 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
535 if (!model) return;
536 model->setCompletion(completion);
537
538 } else if (m_descriptor->sampleType ==
539 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
540
541 NoteModel *model = getOutput<NoteModel>();
542 if (!model) return;
543 model->setCompletion(completion);
544
545 } else {
546
547 EditableDenseThreeDimensionalModel *model =
548 getOutput<EditableDenseThreeDimensionalModel>();
549 if (!model) return;
550 model->setCompletion(completion);
551 }
552 }
553