Mercurial > hg > sonic-visualiser
comparison transform/FeatureExtractionPluginTransform.cpp @ 0:cd5d7ff8ef38
* Reorganising code base.  This revision will not compile.
| author | Chris Cannam | 
|---|---|
| date | Mon, 31 Jul 2006 12:03:45 +0000 | 
| parents | |
| children | 40116f709d3b | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:cd5d7ff8ef38 | 
|---|---|
| 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
| 2 | |
| 3 /* | |
| 4 Sonic Visualiser | |
| 5 An audio file viewer and annotation editor. | |
| 6 Centre for Digital Music, Queen Mary, University of London. | |
| 7 This file copyright 2006 Chris Cannam. | |
| 8 | |
| 9 This program is free software; you can redistribute it and/or | |
| 10 modify it under the terms of the GNU General Public License as | |
| 11 published by the Free Software Foundation; either version 2 of the | |
| 12 License, or (at your option) any later version. See the file | |
| 13 COPYING included with this distribution for more information. | |
| 14 */ | |
| 15 | |
| 16 #include "FeatureExtractionPluginTransform.h" | |
| 17 | |
| 18 #include "plugin/FeatureExtractionPluginFactory.h" | |
| 19 #include "plugin/PluginXml.h" | |
| 20 #include "vamp-sdk/Plugin.h" | |
| 21 | |
| 22 #include "base/Model.h" | |
| 23 #include "base/Window.h" | |
| 24 #include "model/SparseOneDimensionalModel.h" | |
| 25 #include "model/SparseTimeValueModel.h" | |
| 26 #include "model/DenseThreeDimensionalModel.h" | |
| 27 #include "model/DenseTimeValueModel.h" | |
| 28 #include "model/NoteModel.h" | |
| 29 #include "fileio/FFTFuzzyAdapter.h" | |
| 30 | |
| 31 #include <fftw3.h> | |
| 32 | |
| 33 #include <iostream> | |
| 34 | |
| 35 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel, | |
| 36 QString pluginId, | |
| 37 int channel, | |
| 38 QString configurationXml, | |
| 39 QString outputName) : | |
| 40 Transform(inputModel), | |
| 41 m_plugin(0), | |
| 42 m_channel(channel), | |
| 43 m_stepSize(0), | |
| 44 m_blockSize(0), | |
| 45 m_descriptor(0), | |
| 46 m_outputFeatureNo(0) | |
| 47 { | |
| 48 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl; | |
| 49 | |
| 50 FeatureExtractionPluginFactory *factory = | |
| 51 FeatureExtractionPluginFactory::instanceFor(pluginId); | |
| 52 | |
| 53 if (!factory) { | |
| 54 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \"" | |
| 55 << pluginId.toStdString() << "\"" << std::endl; | |
| 56 return; | |
| 57 } | |
| 58 | |
| 59 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate()); | |
| 60 | |
| 61 if (!m_plugin) { | |
| 62 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \"" | |
| 63 << pluginId.toStdString() << "\"" << std::endl; | |
| 64 return; | |
| 65 } | |
| 66 | |
| 67 if (configurationXml != "") { | |
| 68 PluginXml(m_plugin).setParametersFromXml(configurationXml); | |
| 69 } | |
| 70 | |
| 71 m_blockSize = m_plugin->getPreferredBlockSize(); | |
| 72 m_stepSize = m_plugin->getPreferredStepSize(); | |
| 73 | |
| 74 if (m_blockSize == 0) m_blockSize = 1024; //!!! todo: ask user | |
| 75 if (m_stepSize == 0) m_stepSize = m_blockSize; //!!! likewise | |
| 76 | |
| 77 DenseTimeValueModel *input = getInput(); | |
| 78 if (!input) return; | |
| 79 | |
| 80 size_t channelCount = input->getChannelCount(); | |
| 81 if (m_plugin->getMaxChannelCount() < channelCount) { | |
| 82 channelCount = 1; | |
| 83 } | |
| 84 if (m_plugin->getMinChannelCount() > channelCount) { | |
| 85 std::cerr << "FeatureExtractionPluginTransform:: " | |
| 86 << "Can't provide enough channels to plugin (plugin min " | |
| 87 << m_plugin->getMinChannelCount() << ", max " | |
| 88 << m_plugin->getMaxChannelCount() << ", input model has " | |
| 89 << input->getChannelCount() << ")" << std::endl; | |
| 90 return; | |
| 91 } | |
| 92 | |
| 93 if (!m_plugin->initialise(channelCount, m_stepSize, m_blockSize)) { | |
| 94 std::cerr << "FeatureExtractionPluginTransform: Plugin " | |
| 95 << m_plugin->getName() << " failed to initialise!" << std::endl; | |
| 96 return; | |
| 97 } | |
| 98 | |
| 99 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); | |
| 100 | |
| 101 if (outputs.empty()) { | |
| 102 std::cerr << "FeatureExtractionPluginTransform: Plugin \"" | |
| 103 << pluginId.toStdString() << "\" has no outputs" << std::endl; | |
| 104 return; | |
| 105 } | |
| 106 | |
| 107 for (size_t i = 0; i < outputs.size(); ++i) { | |
| 108 if (outputName == "" || outputs[i].name == outputName.toStdString()) { | |
| 109 m_outputFeatureNo = i; | |
| 110 m_descriptor = new Vamp::Plugin::OutputDescriptor | |
| 111 (outputs[i]); | |
| 112 break; | |
| 113 } | |
| 114 } | |
| 115 | |
| 116 if (!m_descriptor) { | |
| 117 std::cerr << "FeatureExtractionPluginTransform: Plugin \"" | |
| 118 << pluginId.toStdString() << "\" has no output named \"" | |
| 119 << outputName.toStdString() << "\"" << std::endl; | |
| 120 return; | |
| 121 } | |
| 122 | |
| 123 // std::cerr << "FeatureExtractionPluginTransform: output sample type " | |
| 124 // << m_descriptor->sampleType << std::endl; | |
| 125 | |
| 126 int binCount = 1; | |
| 127 float minValue = 0.0, maxValue = 0.0; | |
| 128 | |
| 129 if (m_descriptor->hasFixedBinCount) { | |
| 130 binCount = m_descriptor->binCount; | |
| 131 } | |
| 132 | |
| 133 // std::cerr << "FeatureExtractionPluginTransform: output bin count " | |
| 134 // << binCount << std::endl; | |
| 135 | |
| 136 if (binCount > 0 && m_descriptor->hasKnownExtents) { | |
| 137 minValue = m_descriptor->minValue; | |
| 138 maxValue = m_descriptor->maxValue; | |
| 139 } | |
| 140 | |
| 141 size_t modelRate = m_input->getSampleRate(); | |
| 142 size_t modelResolution = 1; | |
| 143 | |
| 144 switch (m_descriptor->sampleType) { | |
| 145 | |
| 146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate: | |
| 147 if (m_descriptor->sampleRate != 0.0) { | |
| 148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001); | |
| 149 } | |
| 150 break; | |
| 151 | |
| 152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: | |
| 153 modelResolution = m_stepSize; | |
| 154 break; | |
| 155 | |
| 156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: | |
| 157 modelRate = size_t(m_descriptor->sampleRate + 0.001); | |
| 158 break; | |
| 159 } | |
| 160 | |
| 161 if (binCount == 0) { | |
| 162 | |
| 163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, | |
| 164 false); | |
| 165 | |
| 166 } else if (binCount == 1) { | |
| 167 | |
| 168 SparseTimeValueModel *model = new SparseTimeValueModel | |
| 169 (modelRate, modelResolution, minValue, maxValue, false); | |
| 170 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
| 171 | |
| 172 m_output = model; | |
| 173 | |
| 174 } else if (m_descriptor->sampleType == | |
| 175 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
| 176 | |
| 177 // We don't have a sparse 3D model, so interpret this as a | |
| 178 // note model. There's nothing to define which values to use | |
| 179 // as which parameters of the note -- for the moment let's | |
| 180 // treat the first as pitch, second as duration in frames, | |
| 181 // third (if present) as velocity. (Our note model doesn't | |
| 182 // yet store velocity.) | |
| 183 //!!! todo: ask the user! | |
| 184 | |
| 185 NoteModel *model = new NoteModel | |
| 186 (modelRate, modelResolution, minValue, maxValue, false); | |
| 187 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
| 188 | |
| 189 m_output = model; | |
| 190 | |
| 191 } else { | |
| 192 | |
| 193 m_output = new DenseThreeDimensionalModel(modelRate, modelResolution, | |
| 194 binCount, false); | |
| 195 | |
| 196 if (!m_descriptor->binNames.empty()) { | |
| 197 std::vector<QString> names; | |
| 198 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) { | |
| 199 names.push_back(m_descriptor->binNames[i].c_str()); | |
| 200 } | |
| 201 (dynamic_cast<DenseThreeDimensionalModel *>(m_output)) | |
| 202 ->setBinNames(names); | |
| 203 } | |
| 204 } | |
| 205 } | |
| 206 | |
| 207 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform() | |
| 208 { | |
| 209 delete m_plugin; | |
| 210 delete m_descriptor; | |
| 211 } | |
| 212 | |
| 213 DenseTimeValueModel * | |
| 214 FeatureExtractionPluginTransform::getInput() | |
| 215 { | |
| 216 DenseTimeValueModel *dtvm = | |
| 217 dynamic_cast<DenseTimeValueModel *>(getInputModel()); | |
| 218 if (!dtvm) { | |
| 219 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl; | |
| 220 } | |
| 221 return dtvm; | |
| 222 } | |
| 223 | |
| 224 void | |
| 225 FeatureExtractionPluginTransform::run() | |
| 226 { | |
| 227 DenseTimeValueModel *input = getInput(); | |
| 228 if (!input) return; | |
| 229 | |
| 230 if (!m_output) return; | |
| 231 | |
| 232 size_t sampleRate = m_input->getSampleRate(); | |
| 233 | |
| 234 size_t channelCount = input->getChannelCount(); | |
| 235 if (m_plugin->getMaxChannelCount() < channelCount) { | |
| 236 channelCount = 1; | |
| 237 } | |
| 238 | |
| 239 float **buffers = new float*[channelCount]; | |
| 240 for (size_t ch = 0; ch < channelCount; ++ch) { | |
| 241 buffers[ch] = new float[m_blockSize]; | |
| 242 } | |
| 243 | |
| 244 bool frequencyDomain = (m_plugin->getInputDomain() == | |
| 245 Vamp::Plugin::FrequencyDomain); | |
| 246 std::vector<FFTFuzzyAdapter *> fftAdapters; | |
| 247 | |
| 248 if (frequencyDomain) { | |
| 249 for (size_t ch = 0; ch < channelCount; ++ch) { | |
| 250 fftAdapters.push_back(new FFTFuzzyAdapter | |
| 251 (getInput(), | |
| 252 channelCount == 1 ? m_channel : ch, | |
| 253 HanningWindow, | |
| 254 m_blockSize, | |
| 255 m_stepSize, | |
| 256 m_blockSize, | |
| 257 false)); | |
| 258 } | |
| 259 } | |
| 260 | |
| 261 long startFrame = m_input->getStartFrame(); | |
| 262 long endFrame = m_input->getEndFrame(); | |
| 263 long blockFrame = startFrame; | |
| 264 | |
| 265 long prevCompletion = 0; | |
| 266 | |
| 267 while (1) { | |
| 268 | |
| 269 if (frequencyDomain) { | |
| 270 if (blockFrame - int(m_blockSize)/2 > endFrame) break; | |
| 271 } else { | |
| 272 if (blockFrame >= endFrame) break; | |
| 273 } | |
| 274 | |
| 275 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame " | |
| 276 // << blockFrame << std::endl; | |
| 277 | |
| 278 long completion = | |
| 279 (((blockFrame - startFrame) / m_stepSize) * 99) / | |
| 280 ( (endFrame - startFrame) / m_stepSize); | |
| 281 | |
| 282 // channelCount is either m_input->channelCount or 1 | |
| 283 | |
| 284 for (size_t ch = 0; ch < channelCount; ++ch) { | |
| 285 if (frequencyDomain) { | |
| 286 int column = (blockFrame - startFrame) / m_stepSize; | |
| 287 for (size_t i = 0; i < m_blockSize/2; ++i) { | |
| 288 fftAdapters[ch]->getValuesAt | |
| 289 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]); | |
| 290 } | |
| 291 /*!!! | |
| 292 float sum = 0.0; | |
| 293 for (size_t i = 0; i < m_blockSize/2; ++i) { | |
| 294 sum += buffers[ch][i*2]; | |
| 295 } | |
| 296 if (fabs(sum) < 0.0001) { | |
| 297 std::cerr << "WARNING: small sum for column " << column << " (sum is " << sum << ")" << std::endl; | |
| 298 } | |
| 299 */ | |
| 300 } else { | |
| 301 getFrames(ch, channelCount, | |
| 302 blockFrame, m_blockSize, buffers[ch]); | |
| 303 } | |
| 304 } | |
| 305 | |
| 306 Vamp::Plugin::FeatureSet features = m_plugin->process | |
| 307 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); | |
| 308 | |
| 309 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
| 310 Vamp::Plugin::Feature feature = | |
| 311 features[m_outputFeatureNo][fi]; | |
| 312 addFeature(blockFrame, feature); | |
| 313 } | |
| 314 | |
| 315 if (blockFrame == startFrame || completion > prevCompletion) { | |
| 316 setCompletion(completion); | |
| 317 prevCompletion = completion; | |
| 318 } | |
| 319 | |
| 320 blockFrame += m_stepSize; | |
| 321 } | |
| 322 | |
| 323 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); | |
| 324 | |
| 325 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { | |
| 326 Vamp::Plugin::Feature feature = | |
| 327 features[m_outputFeatureNo][fi]; | |
| 328 addFeature(blockFrame, feature); | |
| 329 } | |
| 330 | |
| 331 if (frequencyDomain) { | |
| 332 for (size_t ch = 0; ch < channelCount; ++ch) { | |
| 333 delete fftAdapters[ch]; | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 setCompletion(100); | |
| 338 } | |
| 339 | |
| 340 void | |
| 341 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount, | |
| 342 long startFrame, long size, | |
| 343 float *buffer) | |
| 344 { | |
| 345 long offset = 0; | |
| 346 | |
| 347 if (startFrame < 0) { | |
| 348 for (int i = 0; i < size && startFrame + i < 0; ++i) { | |
| 349 buffer[i] = 0.0f; | |
| 350 } | |
| 351 offset = -startFrame; | |
| 352 size -= offset; | |
| 353 if (size <= 0) return; | |
| 354 startFrame = 0; | |
| 355 } | |
| 356 | |
| 357 long got = getInput()->getValues | |
| 358 ((channelCount == 1 ? m_channel : channel), | |
| 359 startFrame, startFrame + size, buffer + offset); | |
| 360 | |
| 361 while (got < size) { | |
| 362 buffer[offset + got] = 0.0; | |
| 363 ++got; | |
| 364 } | |
| 365 | |
| 366 if (m_channel == -1 && channelCount == 1 && | |
| 367 getInput()->getChannelCount() > 1) { | |
| 368 // use mean instead of sum, as plugin input | |
| 369 int cc = getInput()->getChannelCount(); | |
| 370 for (long i = 0; i < size; ++i) { | |
| 371 buffer[i] /= cc; | |
| 372 } | |
| 373 } | |
| 374 } | |
| 375 | |
| 376 void | |
| 377 FeatureExtractionPluginTransform::addFeature(size_t blockFrame, | |
| 378 const Vamp::Plugin::Feature &feature) | |
| 379 { | |
| 380 size_t inputRate = m_input->getSampleRate(); | |
| 381 | |
| 382 // std::cerr << "FeatureExtractionPluginTransform::addFeature(" | |
| 383 // << blockFrame << ")" << std::endl; | |
| 384 | |
| 385 int binCount = 1; | |
| 386 if (m_descriptor->hasFixedBinCount) { | |
| 387 binCount = m_descriptor->binCount; | |
| 388 } | |
| 389 | |
| 390 size_t frame = blockFrame; | |
| 391 | |
| 392 if (m_descriptor->sampleType == | |
| 393 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
| 394 | |
| 395 if (!feature.hasTimestamp) { | |
| 396 std::cerr | |
| 397 << "WARNING: FeatureExtractionPluginTransform::addFeature: " | |
| 398 << "Feature has variable sample rate but no timestamp!" | |
| 399 << std::endl; | |
| 400 return; | |
| 401 } else { | |
| 402 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); | |
| 403 } | |
| 404 | |
| 405 } else if (m_descriptor->sampleType == | |
| 406 Vamp::Plugin::OutputDescriptor::FixedSampleRate) { | |
| 407 | |
| 408 if (feature.hasTimestamp) { | |
| 409 //!!! warning: sampleRate may be non-integral | |
| 410 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, | |
| 411 m_descriptor->sampleRate); | |
| 412 } else { | |
| 413 frame = m_output->getEndFrame() + 1; | |
| 414 } | |
| 415 } | |
| 416 | |
| 417 if (binCount == 0) { | |
| 418 | |
| 419 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
| 420 if (!model) return; | |
| 421 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); | |
| 422 | |
| 423 } else if (binCount == 1) { | |
| 424 | |
| 425 float value = 0.0; | |
| 426 if (feature.values.size() > 0) value = feature.values[0]; | |
| 427 | |
| 428 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
| 429 if (!model) return; | |
| 430 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); | |
| 431 | |
| 432 } else if (m_descriptor->sampleType == | |
| 433 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
| 434 | |
| 435 float pitch = 0.0; | |
| 436 if (feature.values.size() > 0) pitch = feature.values[0]; | |
| 437 | |
| 438 float duration = 1; | |
| 439 if (feature.values.size() > 1) duration = feature.values[1]; | |
| 440 | |
| 441 float velocity = 100; | |
| 442 if (feature.values.size() > 2) velocity = feature.values[2]; | |
| 443 | |
| 444 NoteModel *model = getOutput<NoteModel>(); | |
| 445 if (!model) return; | |
| 446 | |
| 447 model->addPoint(NoteModel::Point(frame, pitch, duration, feature.label.c_str())); | |
| 448 | |
| 449 } else { | |
| 450 | |
| 451 DenseThreeDimensionalModel::BinValueSet values = feature.values; | |
| 452 | |
| 453 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>(); | |
| 454 if (!model) return; | |
| 455 | |
| 456 model->setBinValues(frame, values); | |
| 457 } | |
| 458 } | |
| 459 | |
| 460 void | |
| 461 FeatureExtractionPluginTransform::setCompletion(int completion) | |
| 462 { | |
| 463 int binCount = 1; | |
| 464 if (m_descriptor->hasFixedBinCount) { | |
| 465 binCount = m_descriptor->binCount; | |
| 466 } | |
| 467 | |
| 468 if (binCount == 0) { | |
| 469 | |
| 470 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>(); | |
| 471 if (!model) return; | |
| 472 model->setCompletion(completion); | |
| 473 | |
| 474 } else if (binCount == 1) { | |
| 475 | |
| 476 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>(); | |
| 477 if (!model) return; | |
| 478 model->setCompletion(completion); | |
| 479 | |
| 480 } else if (m_descriptor->sampleType == | |
| 481 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
| 482 | |
| 483 NoteModel *model = getOutput<NoteModel>(); | |
| 484 if (!model) return; | |
| 485 model->setCompletion(completion); | |
| 486 | |
| 487 } else { | |
| 488 | |
| 489 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>(); | |
| 490 if (!model) return; | |
| 491 model->setCompletion(completion); | |
| 492 } | |
| 493 } | |
| 494 | 
