Mercurial > hg > svcore
comparison plugin/transform/FeatureExtractionModelTransformer.cpp @ 331:f620ce48c950
* Further naming change: Transformer -> ModelTransformer.
The Transform class now describes a thing that can be done, and the
ModelTransformer does it to a Model.
author | Chris Cannam |
---|---|
date | Wed, 07 Nov 2007 12:59:01 +0000 |
parents | plugin/transform/FeatureExtractionPluginTransformer.cpp@21bd032ae791 |
children | 1afaf98dbf11 |
comparison
equal
deleted
inserted
replaced
330:6e9dcf09b7fe | 331:f620ce48c950 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Sonic Visualiser | |
5 An audio file viewer and annotation editor. | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2006 Chris Cannam and QMUL. | |
8 | |
9 This program is free software; you can redistribute it and/or | |
10 modify it under the terms of the GNU General Public License as | |
11 published by the Free Software Foundation; either version 2 of the | |
12 License, or (at your option) any later version. See the file | |
13 COPYING included with this distribution for more information. | |
14 */ | |
15 | |
16 #include "FeatureExtractionModelTransformer.h" | |
17 | |
18 #include "plugin/FeatureExtractionPluginFactory.h" | |
19 #include "plugin/PluginXml.h" | |
20 #include "vamp-sdk/Plugin.h" | |
21 | |
22 #include "data/model/Model.h" | |
23 #include "base/Window.h" | |
24 #include "data/model/SparseOneDimensionalModel.h" | |
25 #include "data/model/SparseTimeValueModel.h" | |
26 #include "data/model/EditableDenseThreeDimensionalModel.h" | |
27 #include "data/model/DenseTimeValueModel.h" | |
28 #include "data/model/NoteModel.h" | |
29 #include "data/model/FFTModel.h" | |
30 #include "data/model/WaveFileModel.h" | |
31 | |
32 #include <QMessageBox> | |
33 | |
34 #include <iostream> | |
35 | |
36 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Model *inputModel, | |
37 QString pluginId, | |
38 const ExecutionContext &context, | |
39 QString configurationXml, | |
40 QString outputName) : | |
41 PluginTransformer(inputModel, context), | |
42 m_plugin(0), | |
43 m_descriptor(0), | |
44 m_outputFeatureNo(0) | |
45 { | |
46 // std::cerr << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl; | |
47 | |
48 FeatureExtractionPluginFactory *factory = | |
49 FeatureExtractionPluginFactory::instanceFor(pluginId); | |
50 | |
51 if (!factory) { | |
52 std::cerr << "FeatureExtractionModelTransformer: No factory available for plugin id \"" | |
53 << pluginId.toStdString() << "\"" << std::endl; | |
54 return; | |
55 } | |
56 | |
57 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate()); | |
58 | |
59 if (!m_plugin) { | |
60 std::cerr << "FeatureExtractionModelTransformer: Failed to instantiate plugin \"" | |
61 << pluginId.toStdString() << "\"" << std::endl; | |
62 return; | |
63 } | |
64 | |
65 if (configurationXml != "") { | |
66 PluginXml(m_plugin).setParametersFromXml(configurationXml); | |
67 } | |
68 | |
69 DenseTimeValueModel *input = getInput(); | |
70 if (!input) return; | |
71 | |
72 size_t channelCount = input->getChannelCount(); | |
73 if (m_plugin->getMaxChannelCount() < channelCount) { | |
74 channelCount = 1; | |
75 } | |
76 if (m_plugin->getMinChannelCount() > channelCount) { | |
77 std::cerr << "FeatureExtractionModelTransformer:: " | |
78 << "Can't provide enough channels to plugin (plugin min " | |
79 << m_plugin->getMinChannelCount() << ", max " | |
80 << m_plugin->getMaxChannelCount() << ", input model has " | |
81 << input->getChannelCount() << ")" << std::endl; | |
82 return; | |
83 } | |
84 | |
85 std::cerr << "Initialising feature extraction plugin with channels = " | |
86 << channelCount << ", step = " << m_context.stepSize | |
87 << ", block = " << m_context.blockSize << std::endl; | |
88 | |
89 if (!m_plugin->initialise(channelCount, | |
90 m_context.stepSize, | |
91 m_context.blockSize)) { | |
92 std::cerr << "FeatureExtractionModelTransformer: Plugin " | |
93 << m_plugin->getIdentifier() << " failed to initialise!" << std::endl; | |
94 return; | |
95 } | |
96 | |
97 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); | |
98 | |
99 if (outputs.empty()) { | |
100 std::cerr << "FeatureExtractionModelTransformer: Plugin \"" | |
101 << pluginId.toStdString() << "\" has no outputs" << std::endl; | |
102 return; | |
103 } | |
104 | |
105 for (size_t i = 0; i < outputs.size(); ++i) { | |
106 if (outputName == "" || outputs[i].identifier == outputName.toStdString()) { | |
107 m_outputFeatureNo = i; | |
108 m_descriptor = new Vamp::Plugin::OutputDescriptor | |
109 (outputs[i]); | |
110 break; | |
111 } | |
112 } | |
113 | |
114 if (!m_descriptor) { | |
115 std::cerr << "FeatureExtractionModelTransformer: Plugin \"" | |
116 << pluginId.toStdString() << "\" has no output named \"" | |
117 << outputName.toStdString() << "\"" << std::endl; | |
118 return; | |
119 } | |
120 | |
121 // std::cerr << "FeatureExtractionModelTransformer: output sample type " | |
122 // << m_descriptor->sampleType << std::endl; | |
123 | |
124 int binCount = 1; | |
125 float minValue = 0.0, maxValue = 0.0; | |
126 bool haveExtents = false; | |
127 | |
128 if (m_descriptor->hasFixedBinCount) { | |
129 binCount = m_descriptor->binCount; | |
130 } | |
131 | |
132 // std::cerr << "FeatureExtractionModelTransformer: output bin count " | |
133 // << binCount << std::endl; | |
134 | |
135 if (binCount > 0 && m_descriptor->hasKnownExtents) { | |
136 minValue = m_descriptor->minValue; | |
137 maxValue = m_descriptor->maxValue; | |
138 haveExtents = true; | |
139 } | |
140 | |
141 size_t modelRate = m_input->getSampleRate(); | |
142 size_t modelResolution = 1; | |
143 | |
144 switch (m_descriptor->sampleType) { | |
145 | |
146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate: | |
147 if (m_descriptor->sampleRate != 0.0) { | |
148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001); | |
149 } | |
150 break; | |
151 | |
152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: | |
153 modelResolution = m_context.stepSize; | |
154 break; | |
155 | |
156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: | |
157 modelRate = size_t(m_descriptor->sampleRate + 0.001); | |
158 break; | |
159 } | |
160 | |
161 if (binCount == 0) { | |
162 | |
163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, | |
164 false); | |
165 | |
166 } else if (binCount == 1) { | |
167 | |
168 SparseTimeValueModel *model; | |
169 if (haveExtents) { | |
170 model = new SparseTimeValueModel | |
171 (modelRate, modelResolution, minValue, maxValue, false); | |
172 } else { | |
173 model = new SparseTimeValueModel | |
174 (modelRate, modelResolution, false); | |
175 } | |
176 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
177 | |
178 m_output = model; | |
179 | |
180 } else if (m_descriptor->sampleType == | |
181 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
182 | |
183 // We don't have a sparse 3D model, so interpret this as a | |
184 // note model. There's nothing to define which values to use | |
185 // as which parameters of the note -- for the moment let's | |
186 // treat the first as pitch, second as duration in frames, | |
187 // third (if present) as velocity. (Our note model doesn't | |
188 // yet store velocity.) | |
189 //!!! todo: ask the user! | |
190 | |
191 NoteModel *model; | |
192 if (haveExtents) { | |
193 model = new NoteModel | |
194 (modelRate, modelResolution, minValue, maxValue, false); | |
195 } else { | |
196 model = new NoteModel | |
197 (modelRate, modelResolution, false); | |
198 } | |
199 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
200 | |
201 m_output = model; | |
202 | |
203 } else { | |
204 | |
205 EditableDenseThreeDimensionalModel *model = | |
206 new EditableDenseThreeDimensionalModel | |
207 (modelRate, modelResolution, binCount, false); | |
208 | |
209 if (!m_descriptor->binNames.empty()) { | |
210 std::vector<QString> names; | |
211 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) { | |
212 names.push_back(m_descriptor->binNames[i].c_str()); | |
213 } | |
214 model->setBinNames(names); | |
215 } | |
216 | |
217 m_output = model; | |
218 } | |
219 } | |
220 | |
221 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer() | |
222 { | |
223 std::cerr << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << std::endl; | |
224 delete m_plugin; | |
225 delete m_descriptor; | |
226 } | |
227 | |
228 DenseTimeValueModel * | |
229 FeatureExtractionModelTransformer::getInput() | |
230 { | |
231 DenseTimeValueModel *dtvm = | |
232 dynamic_cast<DenseTimeValueModel *>(getInputModel()); | |
233 if (!dtvm) { | |
234 std::cerr << "FeatureExtractionModelTransformer::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl; | |
235 } | |
236 return dtvm; | |
237 } | |
238 | |
239 void | |
240 FeatureExtractionModelTransformer::run() | |
241 { | |
242 DenseTimeValueModel *input = getInput(); | |
243 if (!input) return; | |
244 | |
245 if (!m_output) return; | |
246 | |
247 while (!input->isReady()) { | |
248 /* | |
249 if (dynamic_cast<WaveFileModel *>(input)) { | |
250 std::cerr << "FeatureExtractionModelTransformer::run: Model is not ready, but it's not a WaveFileModel (it's a " << typeid(input).name() << "), so that's OK" << std::endl; | |
251 sleep(2); | |
252 break; // no need to wait | |
253 } | |
254 */ | |
255 std::cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << std::endl; | |
256 sleep(1); | |
257 } | |
258 | |
259 size_t sampleRate = m_input->getSampleRate(); | |
260 | |
261 size_t channelCount = input->getChannelCount(); | |
262 if (m_plugin->getMaxChannelCount() < channelCount) { | |
263 channelCount = 1; | |
264 } | |
265 | |
266 float **buffers = new float*[channelCount]; | |
267 for (size_t ch = 0; ch < channelCount; ++ch) { | |
268 buffers[ch] = new float[m_context.blockSize + 2]; | |
269 } | |
270 | |
271 bool frequencyDomain = (m_plugin->getInputDomain() == | |
272 Vamp::Plugin::FrequencyDomain); | |
273 std::vector<FFTModel *> fftModels; | |
274 | |
275 if (frequencyDomain) { | |
276 for (size_t ch = 0; ch < channelCount; ++ch) { | |
277 FFTModel *model = new FFTModel | |
278 (getInput(), | |
279 channelCount == 1 ? m_context.channel : ch, | |
280 m_context.windowType, | |
281 m_context.blockSize, | |
282 m_context.stepSize, | |
283 m_context.blockSize, | |
284 false); | |
285 if (!model->isOK()) { | |
286 QMessageBox::critical | |
287 (0, tr("FFT cache failed"), | |
288 tr("Failed to create the FFT model for this transform.\n" | |
289 "There may be insufficient memory or disc space to continue.")); | |
290 delete model; | |
291 setCompletion(100); | |
292 return; | |
293 } | |
294 model->resume(); | |
295 fftModels.push_back(model); | |
296 } | |
297 } | |
298 | |
299 long startFrame = m_input->getStartFrame(); | |
300 long endFrame = m_input->getEndFrame(); | |
301 | |
302 long contextStart = m_context.startFrame; | |
303 long contextDuration = m_context.duration; | |
304 | |
305 if (contextStart == 0 || contextStart < startFrame) { | |
306 contextStart = startFrame; | |
307 } | |
308 | |
309 if (contextDuration == 0) { | |
310 contextDuration = endFrame - contextStart; | |
311 } | |
312 if (contextStart + contextDuration > endFrame) { | |
313 contextDuration = endFrame - contextStart; | |
314 } | |
315 | |
316 long blockFrame = contextStart; | |
317 | |
318 long prevCompletion = 0; | |
319 | |
320 setCompletion(0); | |
321 | |
322 while (!m_abandoned) { | |
323 | |
324 if (frequencyDomain) { | |
325 if (blockFrame - int(m_context.blockSize)/2 > | |
326 contextStart + contextDuration) break; | |
327 } else { | |
328 if (blockFrame >= | |
329 contextStart + contextDuration) break; | |
330 } | |
331 | |
332 // std::cerr << "FeatureExtractionModelTransformer::run: blockFrame " | |
333 // << blockFrame << ", endFrame " << endFrame << ", blockSize " | |
334 // << m_context.blockSize << std::endl; | |
335 | |
336 long completion = | |
337 (((blockFrame - contextStart) / m_context.stepSize) * 99) / | |
338 (contextDuration / m_context.stepSize); | |
339 | |
340 // channelCount is either m_input->channelCount or 1 | |
341 | |
342 for (size_t ch = 0; ch < channelCount; ++ch) { | |
343 if (frequencyDomain) { | |
344 int column = (blockFrame - startFrame) / m_context.stepSize; | |
345 for (size_t i = 0; i <= m_context.blockSize/2; ++i) { | |
346 fftModels[ch]->getValuesAt | |
347 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]); | |
348 } | |
349 } else { | |
350 getFrames(ch, channelCount, | |
351 blockFrame, m_context.blockSize, buffers[ch]); | |
352 } | |
353 } | |
354 | |
355 Vamp::Plugin::FeatureSet features = m_plugin->process | |
356 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); | |
357 | |
358 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
359 Vamp::Plugin::Feature feature = | |
360 features[m_outputFeatureNo][fi]; | |
361 addFeature(blockFrame, feature); | |
362 } | |
363 | |
364 if (blockFrame == contextStart || completion > prevCompletion) { | |
365 setCompletion(completion); | |
366 prevCompletion = completion; | |
367 } | |
368 | |
369 blockFrame += m_context.stepSize; | |
370 } | |
371 | |
372 if (m_abandoned) return; | |
373 | |
374 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); | |
375 | |
376 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
377 Vamp::Plugin::Feature feature = | |
378 features[m_outputFeatureNo][fi]; | |
379 addFeature(blockFrame, feature); | |
380 } | |
381 | |
382 if (frequencyDomain) { | |
383 for (size_t ch = 0; ch < channelCount; ++ch) { | |
384 delete fftModels[ch]; | |
385 } | |
386 } | |
387 | |
388 setCompletion(100); | |
389 } | |
390 | |
391 void | |
392 FeatureExtractionModelTransformer::getFrames(int channel, int channelCount, | |
393 long startFrame, long size, | |
394 float *buffer) | |
395 { | |
396 long offset = 0; | |
397 | |
398 if (startFrame < 0) { | |
399 for (int i = 0; i < size && startFrame + i < 0; ++i) { | |
400 buffer[i] = 0.0f; | |
401 } | |
402 offset = -startFrame; | |
403 size -= offset; | |
404 if (size <= 0) return; | |
405 startFrame = 0; | |
406 } | |
407 | |
408 long got = getInput()->getData | |
409 ((channelCount == 1 ? m_context.channel : channel), | |
410 startFrame, size, buffer + offset); | |
411 | |
412 while (got < size) { | |
413 buffer[offset + got] = 0.0; | |
414 ++got; | |
415 } | |
416 | |
417 if (m_context.channel == -1 && channelCount == 1 && | |
418 getInput()->getChannelCount() > 1) { | |
419 // use mean instead of sum, as plugin input | |
420 int cc = getInput()->getChannelCount(); | |
421 for (long i = 0; i < size; ++i) { | |
422 buffer[i] /= cc; | |
423 } | |
424 } | |
425 } | |
426 | |
427 void | |
428 FeatureExtractionModelTransformer::addFeature(size_t blockFrame, | |
429 const Vamp::Plugin::Feature &feature) | |
430 { | |
431 size_t inputRate = m_input->getSampleRate(); | |
432 | |
433 // std::cerr << "FeatureExtractionModelTransformer::addFeature(" | |
434 // << blockFrame << ")" << std::endl; | |
435 | |
436 int binCount = 1; | |
437 if (m_descriptor->hasFixedBinCount) { | |
438 binCount = m_descriptor->binCount; | |
439 } | |
440 | |
441 size_t frame = blockFrame; | |
442 | |
443 if (m_descriptor->sampleType == | |
444 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
445 | |
446 if (!feature.hasTimestamp) { | |
447 std::cerr | |
448 << "WARNING: FeatureExtractionModelTransformer::addFeature: " | |
449 << "Feature has variable sample rate but no timestamp!" | |
450 << std::endl; | |
451 return; | |
452 } else { | |
453 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); | |
454 } | |
455 | |
456 } else if (m_descriptor->sampleType == | |
457 Vamp::Plugin::OutputDescriptor::FixedSampleRate) { | |
458 | |
459 if (feature.hasTimestamp) { | |
460 //!!! warning: sampleRate may be non-integral | |
461 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, | |
462 lrintf(m_descriptor->sampleRate)); | |
463 } else { | |
464 frame = m_output->getEndFrame(); | |
465 } | |
466 } | |
467 | |
468 if (binCount == 0) { | |
469 | |
470 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
471 if (!model) return; | |
472 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); | |
473 | |
474 } else if (binCount == 1) { | |
475 | |
476 float value = 0.0; | |
477 if (feature.values.size() > 0) value = feature.values[0]; | |
478 | |
479 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
480 if (!model) return; | |
481 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); | |
482 // std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl; | |
483 | |
484 } else if (m_descriptor->sampleType == | |
485 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
486 | |
487 float pitch = 0.0; | |
488 if (feature.values.size() > 0) pitch = feature.values[0]; | |
489 | |
490 float duration = 1; | |
491 if (feature.values.size() > 1) duration = feature.values[1]; | |
492 | |
493 float velocity = 100; | |
494 if (feature.values.size() > 2) velocity = feature.values[2]; | |
495 | |
496 NoteModel *model = getOutput<NoteModel>(); | |
497 if (!model) return; | |
498 | |
499 model->addPoint(NoteModel::Point(frame, pitch, | |
500 lrintf(duration), | |
501 feature.label.c_str())); | |
502 | |
503 } else { | |
504 | |
505 DenseThreeDimensionalModel::Column values = feature.values; | |
506 | |
507 EditableDenseThreeDimensionalModel *model = | |
508 getOutput<EditableDenseThreeDimensionalModel>(); | |
509 if (!model) return; | |
510 | |
511 model->setColumn(frame / model->getResolution(), values); | |
512 } | |
513 } | |
514 | |
515 void | |
516 FeatureExtractionModelTransformer::setCompletion(int completion) | |
517 { | |
518 int binCount = 1; | |
519 if (m_descriptor->hasFixedBinCount) { | |
520 binCount = m_descriptor->binCount; | |
521 } | |
522 | |
523 // std::cerr << "FeatureExtractionModelTransformer::setCompletion(" | |
524 // << completion << ")" << std::endl; | |
525 | |
526 if (binCount == 0) { | |
527 | |
528 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
529 if (!model) return; | |
530 model->setCompletion(completion); | |
531 | |
532 } else if (binCount == 1) { | |
533 | |
534 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
535 if (!model) return; | |
536 model->setCompletion(completion); | |
537 | |
538 } else if (m_descriptor->sampleType == | |
539 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
540 | |
541 NoteModel *model = getOutput<NoteModel>(); | |
542 if (!model) return; | |
543 model->setCompletion(completion); | |
544 | |
545 } else { | |
546 | |
547 EditableDenseThreeDimensionalModel *model = | |
548 getOutput<EditableDenseThreeDimensionalModel>(); | |
549 if (!model) return; | |
550 model->setCompletion(completion); | |
551 } | |
552 } | |
553 |