Mercurial > hg > sonic-visualiser
comparison transform/FeatureExtractionPluginTransform.cpp @ 0:cd5d7ff8ef38
* Reorganising code base. This revision will not compile.
author | Chris Cannam |
---|---|
date | Mon, 31 Jul 2006 12:03:45 +0000 |
parents | |
children | 40116f709d3b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cd5d7ff8ef38 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Sonic Visualiser | |
5 An audio file viewer and annotation editor. | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2006 Chris Cannam. | |
8 | |
9 This program is free software; you can redistribute it and/or | |
10 modify it under the terms of the GNU General Public License as | |
11 published by the Free Software Foundation; either version 2 of the | |
12 License, or (at your option) any later version. See the file | |
13 COPYING included with this distribution for more information. | |
14 */ | |
15 | |
16 #include "FeatureExtractionPluginTransform.h" | |
17 | |
18 #include "plugin/FeatureExtractionPluginFactory.h" | |
19 #include "plugin/PluginXml.h" | |
20 #include "vamp-sdk/Plugin.h" | |
21 | |
22 #include "base/Model.h" | |
23 #include "base/Window.h" | |
24 #include "model/SparseOneDimensionalModel.h" | |
25 #include "model/SparseTimeValueModel.h" | |
26 #include "model/DenseThreeDimensionalModel.h" | |
27 #include "model/DenseTimeValueModel.h" | |
28 #include "model/NoteModel.h" | |
29 #include "fileio/FFTFuzzyAdapter.h" | |
30 | |
31 #include <fftw3.h> | |
32 | |
33 #include <iostream> | |
34 | |
35 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel, | |
36 QString pluginId, | |
37 int channel, | |
38 QString configurationXml, | |
39 QString outputName) : | |
40 Transform(inputModel), | |
41 m_plugin(0), | |
42 m_channel(channel), | |
43 m_stepSize(0), | |
44 m_blockSize(0), | |
45 m_descriptor(0), | |
46 m_outputFeatureNo(0) | |
47 { | |
48 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl; | |
49 | |
50 FeatureExtractionPluginFactory *factory = | |
51 FeatureExtractionPluginFactory::instanceFor(pluginId); | |
52 | |
53 if (!factory) { | |
54 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \"" | |
55 << pluginId.toStdString() << "\"" << std::endl; | |
56 return; | |
57 } | |
58 | |
59 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate()); | |
60 | |
61 if (!m_plugin) { | |
62 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \"" | |
63 << pluginId.toStdString() << "\"" << std::endl; | |
64 return; | |
65 } | |
66 | |
67 if (configurationXml != "") { | |
68 PluginXml(m_plugin).setParametersFromXml(configurationXml); | |
69 } | |
70 | |
71 m_blockSize = m_plugin->getPreferredBlockSize(); | |
72 m_stepSize = m_plugin->getPreferredStepSize(); | |
73 | |
74 if (m_blockSize == 0) m_blockSize = 1024; //!!! todo: ask user | |
75 if (m_stepSize == 0) m_stepSize = m_blockSize; //!!! likewise | |
76 | |
77 DenseTimeValueModel *input = getInput(); | |
78 if (!input) return; | |
79 | |
80 size_t channelCount = input->getChannelCount(); | |
81 if (m_plugin->getMaxChannelCount() < channelCount) { | |
82 channelCount = 1; | |
83 } | |
84 if (m_plugin->getMinChannelCount() > channelCount) { | |
85 std::cerr << "FeatureExtractionPluginTransform:: " | |
86 << "Can't provide enough channels to plugin (plugin min " | |
87 << m_plugin->getMinChannelCount() << ", max " | |
88 << m_plugin->getMaxChannelCount() << ", input model has " | |
89 << input->getChannelCount() << ")" << std::endl; | |
90 return; | |
91 } | |
92 | |
93 if (!m_plugin->initialise(channelCount, m_stepSize, m_blockSize)) { | |
94 std::cerr << "FeatureExtractionPluginTransform: Plugin " | |
95 << m_plugin->getName() << " failed to initialise!" << std::endl; | |
96 return; | |
97 } | |
98 | |
99 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); | |
100 | |
101 if (outputs.empty()) { | |
102 std::cerr << "FeatureExtractionPluginTransform: Plugin \"" | |
103 << pluginId.toStdString() << "\" has no outputs" << std::endl; | |
104 return; | |
105 } | |
106 | |
107 for (size_t i = 0; i < outputs.size(); ++i) { | |
108 if (outputName == "" || outputs[i].name == outputName.toStdString()) { | |
109 m_outputFeatureNo = i; | |
110 m_descriptor = new Vamp::Plugin::OutputDescriptor | |
111 (outputs[i]); | |
112 break; | |
113 } | |
114 } | |
115 | |
116 if (!m_descriptor) { | |
117 std::cerr << "FeatureExtractionPluginTransform: Plugin \"" | |
118 << pluginId.toStdString() << "\" has no output named \"" | |
119 << outputName.toStdString() << "\"" << std::endl; | |
120 return; | |
121 } | |
122 | |
123 // std::cerr << "FeatureExtractionPluginTransform: output sample type " | |
124 // << m_descriptor->sampleType << std::endl; | |
125 | |
126 int binCount = 1; | |
127 float minValue = 0.0, maxValue = 0.0; | |
128 | |
129 if (m_descriptor->hasFixedBinCount) { | |
130 binCount = m_descriptor->binCount; | |
131 } | |
132 | |
133 // std::cerr << "FeatureExtractionPluginTransform: output bin count " | |
134 // << binCount << std::endl; | |
135 | |
136 if (binCount > 0 && m_descriptor->hasKnownExtents) { | |
137 minValue = m_descriptor->minValue; | |
138 maxValue = m_descriptor->maxValue; | |
139 } | |
140 | |
141 size_t modelRate = m_input->getSampleRate(); | |
142 size_t modelResolution = 1; | |
143 | |
144 switch (m_descriptor->sampleType) { | |
145 | |
146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate: | |
147 if (m_descriptor->sampleRate != 0.0) { | |
148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001); | |
149 } | |
150 break; | |
151 | |
152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: | |
153 modelResolution = m_stepSize; | |
154 break; | |
155 | |
156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: | |
157 modelRate = size_t(m_descriptor->sampleRate + 0.001); | |
158 break; | |
159 } | |
160 | |
161 if (binCount == 0) { | |
162 | |
163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, | |
164 false); | |
165 | |
166 } else if (binCount == 1) { | |
167 | |
168 SparseTimeValueModel *model = new SparseTimeValueModel | |
169 (modelRate, modelResolution, minValue, maxValue, false); | |
170 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
171 | |
172 m_output = model; | |
173 | |
174 } else if (m_descriptor->sampleType == | |
175 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
176 | |
177 // We don't have a sparse 3D model, so interpret this as a | |
178 // note model. There's nothing to define which values to use | |
179 // as which parameters of the note -- for the moment let's | |
180 // treat the first as pitch, second as duration in frames, | |
181 // third (if present) as velocity. (Our note model doesn't | |
182 // yet store velocity.) | |
183 //!!! todo: ask the user! | |
184 | |
185 NoteModel *model = new NoteModel | |
186 (modelRate, modelResolution, minValue, maxValue, false); | |
187 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
188 | |
189 m_output = model; | |
190 | |
191 } else { | |
192 | |
193 m_output = new DenseThreeDimensionalModel(modelRate, modelResolution, | |
194 binCount, false); | |
195 | |
196 if (!m_descriptor->binNames.empty()) { | |
197 std::vector<QString> names; | |
198 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) { | |
199 names.push_back(m_descriptor->binNames[i].c_str()); | |
200 } | |
201 (dynamic_cast<DenseThreeDimensionalModel *>(m_output)) | |
202 ->setBinNames(names); | |
203 } | |
204 } | |
205 } | |
206 | |
207 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform() | |
208 { | |
209 delete m_plugin; | |
210 delete m_descriptor; | |
211 } | |
212 | |
213 DenseTimeValueModel * | |
214 FeatureExtractionPluginTransform::getInput() | |
215 { | |
216 DenseTimeValueModel *dtvm = | |
217 dynamic_cast<DenseTimeValueModel *>(getInputModel()); | |
218 if (!dtvm) { | |
219 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl; | |
220 } | |
221 return dtvm; | |
222 } | |
223 | |
224 void | |
225 FeatureExtractionPluginTransform::run() | |
226 { | |
227 DenseTimeValueModel *input = getInput(); | |
228 if (!input) return; | |
229 | |
230 if (!m_output) return; | |
231 | |
232 size_t sampleRate = m_input->getSampleRate(); | |
233 | |
234 size_t channelCount = input->getChannelCount(); | |
235 if (m_plugin->getMaxChannelCount() < channelCount) { | |
236 channelCount = 1; | |
237 } | |
238 | |
239 float **buffers = new float*[channelCount]; | |
240 for (size_t ch = 0; ch < channelCount; ++ch) { | |
241 buffers[ch] = new float[m_blockSize]; | |
242 } | |
243 | |
244 bool frequencyDomain = (m_plugin->getInputDomain() == | |
245 Vamp::Plugin::FrequencyDomain); | |
246 std::vector<FFTFuzzyAdapter *> fftAdapters; | |
247 | |
248 if (frequencyDomain) { | |
249 for (size_t ch = 0; ch < channelCount; ++ch) { | |
250 fftAdapters.push_back(new FFTFuzzyAdapter | |
251 (getInput(), | |
252 channelCount == 1 ? m_channel : ch, | |
253 HanningWindow, | |
254 m_blockSize, | |
255 m_stepSize, | |
256 m_blockSize, | |
257 false)); | |
258 } | |
259 } | |
260 | |
261 long startFrame = m_input->getStartFrame(); | |
262 long endFrame = m_input->getEndFrame(); | |
263 long blockFrame = startFrame; | |
264 | |
265 long prevCompletion = 0; | |
266 | |
267 while (1) { | |
268 | |
269 if (frequencyDomain) { | |
270 if (blockFrame - int(m_blockSize)/2 > endFrame) break; | |
271 } else { | |
272 if (blockFrame >= endFrame) break; | |
273 } | |
274 | |
275 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame " | |
276 // << blockFrame << std::endl; | |
277 | |
278 long completion = | |
279 (((blockFrame - startFrame) / m_stepSize) * 99) / | |
280 ( (endFrame - startFrame) / m_stepSize); | |
281 | |
282 // channelCount is either m_input->channelCount or 1 | |
283 | |
284 for (size_t ch = 0; ch < channelCount; ++ch) { | |
285 if (frequencyDomain) { | |
286 int column = (blockFrame - startFrame) / m_stepSize; | |
287 for (size_t i = 0; i < m_blockSize/2; ++i) { | |
288 fftAdapters[ch]->getValuesAt | |
289 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]); | |
290 } | |
291 /*!!! | |
292 float sum = 0.0; | |
293 for (size_t i = 0; i < m_blockSize/2; ++i) { | |
294 sum += buffers[ch][i*2]; | |
295 } | |
296 if (fabs(sum) < 0.0001) { | |
297 std::cerr << "WARNING: small sum for column " << column << " (sum is " << sum << ")" << std::endl; | |
298 } | |
299 */ | |
300 } else { | |
301 getFrames(ch, channelCount, | |
302 blockFrame, m_blockSize, buffers[ch]); | |
303 } | |
304 } | |
305 | |
306 Vamp::Plugin::FeatureSet features = m_plugin->process | |
307 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); | |
308 | |
309 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
310 Vamp::Plugin::Feature feature = | |
311 features[m_outputFeatureNo][fi]; | |
312 addFeature(blockFrame, feature); | |
313 } | |
314 | |
315 if (blockFrame == startFrame || completion > prevCompletion) { | |
316 setCompletion(completion); | |
317 prevCompletion = completion; | |
318 } | |
319 | |
320 blockFrame += m_stepSize; | |
321 } | |
322 | |
323 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); | |
324 | |
325 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
326 Vamp::Plugin::Feature feature = | |
327 features[m_outputFeatureNo][fi]; | |
328 addFeature(blockFrame, feature); | |
329 } | |
330 | |
331 if (frequencyDomain) { | |
332 for (size_t ch = 0; ch < channelCount; ++ch) { | |
333 delete fftAdapters[ch]; | |
334 } | |
335 } | |
336 | |
337 setCompletion(100); | |
338 } | |
339 | |
340 void | |
341 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount, | |
342 long startFrame, long size, | |
343 float *buffer) | |
344 { | |
345 long offset = 0; | |
346 | |
347 if (startFrame < 0) { | |
348 for (int i = 0; i < size && startFrame + i < 0; ++i) { | |
349 buffer[i] = 0.0f; | |
350 } | |
351 offset = -startFrame; | |
352 size -= offset; | |
353 if (size <= 0) return; | |
354 startFrame = 0; | |
355 } | |
356 | |
357 long got = getInput()->getValues | |
358 ((channelCount == 1 ? m_channel : channel), | |
359 startFrame, startFrame + size, buffer + offset); | |
360 | |
361 while (got < size) { | |
362 buffer[offset + got] = 0.0; | |
363 ++got; | |
364 } | |
365 | |
366 if (m_channel == -1 && channelCount == 1 && | |
367 getInput()->getChannelCount() > 1) { | |
368 // use mean instead of sum, as plugin input | |
369 int cc = getInput()->getChannelCount(); | |
370 for (long i = 0; i < size; ++i) { | |
371 buffer[i] /= cc; | |
372 } | |
373 } | |
374 } | |
375 | |
376 void | |
377 FeatureExtractionPluginTransform::addFeature(size_t blockFrame, | |
378 const Vamp::Plugin::Feature &feature) | |
379 { | |
380 size_t inputRate = m_input->getSampleRate(); | |
381 | |
382 // std::cerr << "FeatureExtractionPluginTransform::addFeature(" | |
383 // << blockFrame << ")" << std::endl; | |
384 | |
385 int binCount = 1; | |
386 if (m_descriptor->hasFixedBinCount) { | |
387 binCount = m_descriptor->binCount; | |
388 } | |
389 | |
390 size_t frame = blockFrame; | |
391 | |
392 if (m_descriptor->sampleType == | |
393 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
394 | |
395 if (!feature.hasTimestamp) { | |
396 std::cerr | |
397 << "WARNING: FeatureExtractionPluginTransform::addFeature: " | |
398 << "Feature has variable sample rate but no timestamp!" | |
399 << std::endl; | |
400 return; | |
401 } else { | |
402 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); | |
403 } | |
404 | |
405 } else if (m_descriptor->sampleType == | |
406 Vamp::Plugin::OutputDescriptor::FixedSampleRate) { | |
407 | |
408 if (feature.hasTimestamp) { | |
409 //!!! warning: sampleRate may be non-integral | |
410 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, | |
411 m_descriptor->sampleRate); | |
412 } else { | |
413 frame = m_output->getEndFrame() + 1; | |
414 } | |
415 } | |
416 | |
417 if (binCount == 0) { | |
418 | |
419 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
420 if (!model) return; | |
421 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); | |
422 | |
423 } else if (binCount == 1) { | |
424 | |
425 float value = 0.0; | |
426 if (feature.values.size() > 0) value = feature.values[0]; | |
427 | |
428 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
429 if (!model) return; | |
430 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); | |
431 | |
432 } else if (m_descriptor->sampleType == | |
433 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
434 | |
435 float pitch = 0.0; | |
436 if (feature.values.size() > 0) pitch = feature.values[0]; | |
437 | |
438 float duration = 1; | |
439 if (feature.values.size() > 1) duration = feature.values[1]; | |
440 | |
441 float velocity = 100; | |
442 if (feature.values.size() > 2) velocity = feature.values[2]; | |
443 | |
444 NoteModel *model = getOutput<NoteModel>(); | |
445 if (!model) return; | |
446 | |
447 model->addPoint(NoteModel::Point(frame, pitch, duration, feature.label.c_str())); | |
448 | |
449 } else { | |
450 | |
451 DenseThreeDimensionalModel::BinValueSet values = feature.values; | |
452 | |
453 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>(); | |
454 if (!model) return; | |
455 | |
456 model->setBinValues(frame, values); | |
457 } | |
458 } | |
459 | |
460 void | |
461 FeatureExtractionPluginTransform::setCompletion(int completion) | |
462 { | |
463 int binCount = 1; | |
464 if (m_descriptor->hasFixedBinCount) { | |
465 binCount = m_descriptor->binCount; | |
466 } | |
467 | |
468 if (binCount == 0) { | |
469 | |
470 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
471 if (!model) return; | |
472 model->setCompletion(completion); | |
473 | |
474 } else if (binCount == 1) { | |
475 | |
476 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
477 if (!model) return; | |
478 model->setCompletion(completion); | |
479 | |
480 } else if (m_descriptor->sampleType == | |
481 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
482 | |
483 NoteModel *model = getOutput<NoteModel>(); | |
484 if (!model) return; | |
485 model->setCompletion(completion); | |
486 | |
487 } else { | |
488 | |
489 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>(); | |
490 if (!model) return; | |
491 model->setCompletion(completion); | |
492 } | |
493 } | |
494 |