Mercurial > hg > easaier-soundaccess
comparison sv/transform/FeatureExtractionPluginTransform.cpp @ 0:fc9323a41f5a
start base : Sonic Visualiser sv1-1.0rc1
author | lbajardsilogic |
---|---|
date | Fri, 11 May 2007 09:08:14 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fc9323a41f5a |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Sonic Visualiser | |
5 An audio file viewer and annotation editor. | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2006 Chris Cannam and QMUL. | |
8 | |
9 This program is free software; you can redistribute it and/or | |
10 modify it under the terms of the GNU General Public License as | |
11 published by the Free Software Foundation; either version 2 of the | |
12 License, or (at your option) any later version. See the file | |
13 COPYING included with this distribution for more information. | |
14 */ | |
15 | |
16 #include "FeatureExtractionPluginTransform.h" | |
17 | |
18 #include "plugin/FeatureExtractionPluginFactory.h" | |
19 #include "plugin/PluginXml.h" | |
20 #include "vamp-sdk/Plugin.h" | |
21 | |
22 #include "data/model/Model.h" | |
23 #include "base/Window.h" | |
24 #include "data/model/SparseOneDimensionalModel.h" | |
25 #include "data/model/SparseTimeValueModel.h" | |
26 #include "data/model/EditableDenseThreeDimensionalModel.h" | |
27 #include "data/model/DenseTimeValueModel.h" | |
28 #include "data/model/NoteModel.h" | |
29 #include "data/model/FFTModel.h" | |
30 #include "data/model/WaveFileModel.h" | |
31 | |
32 #include <QMessageBox> | |
33 | |
34 #include <iostream> | |
35 | |
36 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel, | |
37 QString pluginId, | |
38 const ExecutionContext &context, | |
39 QString configurationXml, | |
40 QString outputName) : | |
41 PluginTransform(inputModel, context), | |
42 m_plugin(0), | |
43 m_descriptor(0), | |
44 m_outputFeatureNo(0) | |
45 { | |
46 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl; | |
47 | |
48 FeatureExtractionPluginFactory *factory = | |
49 FeatureExtractionPluginFactory::instanceFor(pluginId); | |
50 | |
51 if (!factory) { | |
52 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \"" | |
53 << pluginId.toStdString() << "\"" << std::endl; | |
54 return; | |
55 } | |
56 | |
57 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate()); | |
58 | |
59 if (!m_plugin) { | |
60 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \"" | |
61 << pluginId.toStdString() << "\"" << std::endl; | |
62 return; | |
63 } | |
64 | |
65 if (configurationXml != "") { | |
66 PluginXml(m_plugin).setParametersFromXml(configurationXml); | |
67 } | |
68 | |
69 DenseTimeValueModel *input = getInput(); | |
70 if (!input) return; | |
71 | |
72 size_t channelCount = input->getChannelCount(); | |
73 if (m_plugin->getMaxChannelCount() < channelCount) { | |
74 channelCount = 1; | |
75 } | |
76 if (m_plugin->getMinChannelCount() > channelCount) { | |
77 std::cerr << "FeatureExtractionPluginTransform:: " | |
78 << "Can't provide enough channels to plugin (plugin min " | |
79 << m_plugin->getMinChannelCount() << ", max " | |
80 << m_plugin->getMaxChannelCount() << ", input model has " | |
81 << input->getChannelCount() << ")" << std::endl; | |
82 return; | |
83 } | |
84 | |
85 std::cerr << "Initialising feature extraction plugin with channels = " | |
86 << channelCount << ", step = " << m_context.stepSize | |
87 << ", block = " << m_context.blockSize << std::endl; | |
88 | |
89 if (!m_plugin->initialise(channelCount, | |
90 m_context.stepSize, | |
91 m_context.blockSize)) { | |
92 std::cerr << "FeatureExtractionPluginTransform: Plugin " | |
93 << m_plugin->getIdentifier() << " failed to initialise!" << std::endl; | |
94 return; | |
95 } | |
96 | |
97 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); | |
98 | |
99 if (outputs.empty()) { | |
100 std::cerr << "FeatureExtractionPluginTransform: Plugin \"" | |
101 << pluginId.toStdString() << "\" has no outputs" << std::endl; | |
102 return; | |
103 } | |
104 | |
105 for (size_t i = 0; i < outputs.size(); ++i) { | |
106 if (outputName == "" || outputs[i].identifier == outputName.toStdString()) { | |
107 m_outputFeatureNo = i; | |
108 m_descriptor = new Vamp::Plugin::OutputDescriptor | |
109 (outputs[i]); | |
110 break; | |
111 } | |
112 } | |
113 | |
114 if (!m_descriptor) { | |
115 std::cerr << "FeatureExtractionPluginTransform: Plugin \"" | |
116 << pluginId.toStdString() << "\" has no output named \"" | |
117 << outputName.toStdString() << "\"" << std::endl; | |
118 return; | |
119 } | |
120 | |
121 // std::cerr << "FeatureExtractionPluginTransform: output sample type " | |
122 // << m_descriptor->sampleType << std::endl; | |
123 | |
124 int binCount = 1; | |
125 float minValue = 0.0, maxValue = 0.0; | |
126 bool haveExtents = false; | |
127 | |
128 if (m_descriptor->hasFixedBinCount) { | |
129 binCount = m_descriptor->binCount; | |
130 } | |
131 | |
132 // std::cerr << "FeatureExtractionPluginTransform: output bin count " | |
133 // << binCount << std::endl; | |
134 | |
135 if (binCount > 0 && m_descriptor->hasKnownExtents) { | |
136 minValue = m_descriptor->minValue; | |
137 maxValue = m_descriptor->maxValue; | |
138 haveExtents = true; | |
139 } | |
140 | |
141 size_t modelRate = m_input->getSampleRate(); | |
142 size_t modelResolution = 1; | |
143 | |
144 switch (m_descriptor->sampleType) { | |
145 | |
146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate: | |
147 if (m_descriptor->sampleRate != 0.0) { | |
148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001); | |
149 } | |
150 break; | |
151 | |
152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: | |
153 modelResolution = m_context.stepSize; | |
154 break; | |
155 | |
156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: | |
157 modelRate = size_t(m_descriptor->sampleRate + 0.001); | |
158 break; | |
159 } | |
160 | |
161 if (binCount == 0) { | |
162 | |
163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, | |
164 false); | |
165 | |
166 } else if (binCount == 1) { | |
167 | |
168 SparseTimeValueModel *model; | |
169 if (haveExtents) { | |
170 model = new SparseTimeValueModel | |
171 (modelRate, modelResolution, minValue, maxValue, false); | |
172 } else { | |
173 model = new SparseTimeValueModel | |
174 (modelRate, modelResolution, false); | |
175 } | |
176 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
177 | |
178 m_output = model; | |
179 | |
180 } else if (m_descriptor->sampleType == | |
181 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
182 | |
183 // We don't have a sparse 3D model, so interpret this as a | |
184 // note model. There's nothing to define which values to use | |
185 // as which parameters of the note -- for the moment let's | |
186 // treat the first as pitch, second as duration in frames, | |
187 // third (if present) as velocity. (Our note model doesn't | |
188 // yet store velocity.) | |
189 //!!! todo: ask the user! | |
190 | |
191 NoteModel *model; | |
192 if (haveExtents) { | |
193 model = new NoteModel | |
194 (modelRate, modelResolution, minValue, maxValue, false); | |
195 } else { | |
196 model = new NoteModel | |
197 (modelRate, modelResolution, false); | |
198 } | |
199 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
200 | |
201 m_output = model; | |
202 | |
203 } else { | |
204 | |
205 EditableDenseThreeDimensionalModel *model = | |
206 new EditableDenseThreeDimensionalModel | |
207 (modelRate, modelResolution, binCount, false); | |
208 | |
209 if (!m_descriptor->binNames.empty()) { | |
210 std::vector<QString> names; | |
211 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) { | |
212 names.push_back(m_descriptor->binNames[i].c_str()); | |
213 } | |
214 model->setBinNames(names); | |
215 } | |
216 | |
217 m_output = model; | |
218 } | |
219 } | |
220 | |
221 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform() | |
222 { | |
223 delete m_plugin; | |
224 delete m_descriptor; | |
225 } | |
226 | |
227 DenseTimeValueModel * | |
228 FeatureExtractionPluginTransform::getInput() | |
229 { | |
230 DenseTimeValueModel *dtvm = | |
231 dynamic_cast<DenseTimeValueModel *>(getInputModel()); | |
232 if (!dtvm) { | |
233 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl; | |
234 } | |
235 return dtvm; | |
236 } | |
237 | |
238 void | |
239 FeatureExtractionPluginTransform::run() | |
240 { | |
241 DenseTimeValueModel *input = getInput(); | |
242 if (!input) return; | |
243 | |
244 while (!input->isReady()) { | |
245 if (dynamic_cast<WaveFileModel *>(input)) break; // no need to wait | |
246 std::cerr << "FeatureExtractionPluginTransform::run: Waiting for input model to be ready..." << std::endl; | |
247 sleep(1); | |
248 } | |
249 | |
250 if (!m_output) return; | |
251 | |
252 size_t sampleRate = m_input->getSampleRate(); | |
253 | |
254 size_t channelCount = input->getChannelCount(); | |
255 if (m_plugin->getMaxChannelCount() < channelCount) { | |
256 channelCount = 1; | |
257 } | |
258 | |
259 float **buffers = new float*[channelCount]; | |
260 for (size_t ch = 0; ch < channelCount; ++ch) { | |
261 buffers[ch] = new float[m_context.blockSize + 2]; | |
262 } | |
263 | |
264 bool frequencyDomain = (m_plugin->getInputDomain() == | |
265 Vamp::Plugin::FrequencyDomain); | |
266 std::vector<FFTModel *> fftModels; | |
267 | |
268 if (frequencyDomain) { | |
269 for (size_t ch = 0; ch < channelCount; ++ch) { | |
270 FFTModel *model = new FFTModel | |
271 (getInput(), | |
272 channelCount == 1 ? m_context.channel : ch, | |
273 m_context.windowType, | |
274 m_context.blockSize, | |
275 m_context.stepSize, | |
276 m_context.blockSize, | |
277 false); | |
278 if (!model->isOK()) { | |
279 QMessageBox::critical | |
280 (0, tr("FFT cache failed"), | |
281 tr("Failed to create the FFT model for this transform.\n" | |
282 "There may be insufficient memory or disc space to continue.")); | |
283 delete model; | |
284 setCompletion(100); | |
285 return; | |
286 } | |
287 model->resume(); | |
288 fftModels.push_back(model); | |
289 } | |
290 } | |
291 | |
292 long startFrame = m_input->getStartFrame(); | |
293 long endFrame = m_input->getEndFrame(); | |
294 long blockFrame = startFrame; | |
295 | |
296 long prevCompletion = 0; | |
297 | |
298 while (!m_abandoned) { | |
299 | |
300 if (frequencyDomain) { | |
301 if (blockFrame - int(m_context.blockSize)/2 > endFrame) break; | |
302 } else { | |
303 if (blockFrame >= endFrame) break; | |
304 } | |
305 | |
306 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame " | |
307 // << blockFrame << std::endl; | |
308 | |
309 long completion = | |
310 (((blockFrame - startFrame) / m_context.stepSize) * 99) / | |
311 ( (endFrame - startFrame) / m_context.stepSize); | |
312 | |
313 // channelCount is either m_input->channelCount or 1 | |
314 | |
315 for (size_t ch = 0; ch < channelCount; ++ch) { | |
316 if (frequencyDomain) { | |
317 int column = (blockFrame - startFrame) / m_context.stepSize; | |
318 for (size_t i = 0; i <= m_context.blockSize/2; ++i) { | |
319 fftModels[ch]->getValuesAt | |
320 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]); | |
321 } | |
322 } else { | |
323 getFrames(ch, channelCount, | |
324 blockFrame, m_context.blockSize, buffers[ch]); | |
325 } | |
326 } | |
327 | |
328 Vamp::Plugin::FeatureSet features = m_plugin->process | |
329 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); | |
330 | |
331 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
332 Vamp::Plugin::Feature feature = | |
333 features[m_outputFeatureNo][fi]; | |
334 addFeature(blockFrame, feature); | |
335 } | |
336 | |
337 if (blockFrame == startFrame || completion > prevCompletion) { | |
338 setCompletion(completion); | |
339 prevCompletion = completion; | |
340 } | |
341 | |
342 blockFrame += m_context.stepSize; | |
343 } | |
344 | |
345 if (m_abandoned) return; | |
346 | |
347 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); | |
348 | |
349 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
350 Vamp::Plugin::Feature feature = | |
351 features[m_outputFeatureNo][fi]; | |
352 addFeature(blockFrame, feature); | |
353 } | |
354 | |
355 if (frequencyDomain) { | |
356 for (size_t ch = 0; ch < channelCount; ++ch) { | |
357 delete fftModels[ch]; | |
358 } | |
359 } | |
360 | |
361 setCompletion(100); | |
362 } | |
363 | |
364 void | |
365 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount, | |
366 long startFrame, long size, | |
367 float *buffer) | |
368 { | |
369 long offset = 0; | |
370 | |
371 if (startFrame < 0) { | |
372 for (int i = 0; i < size && startFrame + i < 0; ++i) { | |
373 buffer[i] = 0.0f; | |
374 } | |
375 offset = -startFrame; | |
376 size -= offset; | |
377 if (size <= 0) return; | |
378 startFrame = 0; | |
379 } | |
380 | |
381 long got = getInput()->getValues | |
382 ((channelCount == 1 ? m_context.channel : channel), | |
383 startFrame, startFrame + size, buffer + offset); | |
384 | |
385 while (got < size) { | |
386 buffer[offset + got] = 0.0; | |
387 ++got; | |
388 } | |
389 | |
390 if (m_context.channel == -1 && channelCount == 1 && | |
391 getInput()->getChannelCount() > 1) { | |
392 // use mean instead of sum, as plugin input | |
393 int cc = getInput()->getChannelCount(); | |
394 for (long i = 0; i < size; ++i) { | |
395 buffer[i] /= cc; | |
396 } | |
397 } | |
398 } | |
399 | |
400 void | |
401 FeatureExtractionPluginTransform::addFeature(size_t blockFrame, | |
402 const Vamp::Plugin::Feature &feature) | |
403 { | |
404 size_t inputRate = m_input->getSampleRate(); | |
405 | |
406 // std::cerr << "FeatureExtractionPluginTransform::addFeature(" | |
407 // << blockFrame << ")" << std::endl; | |
408 | |
409 int binCount = 1; | |
410 if (m_descriptor->hasFixedBinCount) { | |
411 binCount = m_descriptor->binCount; | |
412 } | |
413 | |
414 size_t frame = blockFrame; | |
415 | |
416 if (m_descriptor->sampleType == | |
417 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
418 | |
419 if (!feature.hasTimestamp) { | |
420 std::cerr | |
421 << "WARNING: FeatureExtractionPluginTransform::addFeature: " | |
422 << "Feature has variable sample rate but no timestamp!" | |
423 << std::endl; | |
424 return; | |
425 } else { | |
426 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); | |
427 } | |
428 | |
429 } else if (m_descriptor->sampleType == | |
430 Vamp::Plugin::OutputDescriptor::FixedSampleRate) { | |
431 | |
432 if (feature.hasTimestamp) { | |
433 //!!! warning: sampleRate may be non-integral | |
434 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, | |
435 lrintf(m_descriptor->sampleRate)); | |
436 } else { | |
437 frame = m_output->getEndFrame(); | |
438 } | |
439 } | |
440 | |
441 if (binCount == 0) { | |
442 | |
443 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
444 if (!model) return; | |
445 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); | |
446 | |
447 } else if (binCount == 1) { | |
448 | |
449 float value = 0.0; | |
450 if (feature.values.size() > 0) value = feature.values[0]; | |
451 | |
452 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
453 if (!model) return; | |
454 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); | |
455 | |
456 } else if (m_descriptor->sampleType == | |
457 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
458 | |
459 float pitch = 0.0; | |
460 if (feature.values.size() > 0) pitch = feature.values[0]; | |
461 | |
462 float duration = 1; | |
463 if (feature.values.size() > 1) duration = feature.values[1]; | |
464 | |
465 float velocity = 100; | |
466 if (feature.values.size() > 2) velocity = feature.values[2]; | |
467 | |
468 NoteModel *model = getOutput<NoteModel>(); | |
469 if (!model) return; | |
470 | |
471 model->addPoint(NoteModel::Point(frame, pitch, | |
472 lrintf(duration), | |
473 feature.label.c_str())); | |
474 | |
475 } else { | |
476 | |
477 DenseThreeDimensionalModel::Column values = feature.values; | |
478 | |
479 EditableDenseThreeDimensionalModel *model = | |
480 getOutput<EditableDenseThreeDimensionalModel>(); | |
481 if (!model) return; | |
482 | |
483 model->setColumn(frame / model->getResolution(), values); | |
484 } | |
485 } | |
486 | |
487 void | |
488 FeatureExtractionPluginTransform::setCompletion(int completion) | |
489 { | |
490 int binCount = 1; | |
491 if (m_descriptor->hasFixedBinCount) { | |
492 binCount = m_descriptor->binCount; | |
493 } | |
494 | |
495 std::cerr << "FeatureExtractionPluginTransform::setCompletion(" | |
496 << completion << ")" << std::endl; | |
497 | |
498 if (binCount == 0) { | |
499 | |
500 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
501 if (!model) return; | |
502 model->setCompletion(completion); | |
503 | |
504 } else if (binCount == 1) { | |
505 | |
506 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
507 if (!model) return; | |
508 model->setCompletion(completion); | |
509 | |
510 } else if (m_descriptor->sampleType == | |
511 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
512 | |
513 NoteModel *model = getOutput<NoteModel>(); | |
514 if (!model) return; | |
515 model->setCompletion(completion); | |
516 | |
517 } else { | |
518 | |
519 EditableDenseThreeDimensionalModel *model = | |
520 getOutput<EditableDenseThreeDimensionalModel>(); | |
521 if (!model) return; | |
522 model->setCompletion(completion); | |
523 } | |
524 } | |
525 |