annotate transform/FeatureExtractionPluginTransform.cpp @ 76:af2725b5d6fe

* Implement harmonic cursor in spectrogram * Implement layer export. This doesn't quite do the right thing for the SV XML layer export yet -- it doesn't include layer display information, so when imported, it only creates an invisible model. Could also do with fixing CSV file import so as to work correctly for note and text layers.
author Chris Cannam
date Mon, 10 Apr 2006 17:22:59 +0000
parents 47fd14e29813
children 8cd01027502f
rev   line source
Chris@0 1
Chris@49 2 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 3
Chris@0 4 /*
Chris@52 5 Sonic Visualiser
Chris@52 6 An audio file viewer and annotation editor.
Chris@52 7 Centre for Digital Music, Queen Mary, University of London.
Chris@52 8 This file copyright 2006 Chris Cannam.
Chris@0 9
Chris@52 10 This program is free software; you can redistribute it and/or
Chris@52 11 modify it under the terms of the GNU General Public License as
Chris@52 12 published by the Free Software Foundation; either version 2 of the
Chris@52 13 License, or (at your option) any later version. See the file
Chris@52 14 COPYING included with this distribution for more information.
Chris@0 15 */
Chris@0 16
Chris@0 17 #include "FeatureExtractionPluginTransform.h"
Chris@0 18
Chris@0 19 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@66 20 #include "plugin/PluginXml.h"
Chris@66 21 #include "vamp-sdk/Plugin.h"
Chris@0 22
Chris@0 23 #include "base/Model.h"
Chris@67 24 #include "base/Window.h"
Chris@0 25 #include "model/SparseOneDimensionalModel.h"
Chris@0 26 #include "model/SparseTimeValueModel.h"
Chris@0 27 #include "model/DenseThreeDimensionalModel.h"
Chris@0 28 #include "model/DenseTimeValueModel.h"
Chris@0 29
Chris@67 30 #include <fftw3.h>
Chris@67 31
Chris@0 32 #include <iostream>
Chris@0 33
Chris@0 34 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel,
Chris@0 35 QString pluginId,
Chris@64 36 int channel,
Chris@56 37 QString configurationXml,
Chris@0 38 QString outputName) :
Chris@0 39 Transform(inputModel),
Chris@0 40 m_plugin(0),
Chris@64 41 m_channel(channel),
Chris@68 42 m_stepSize(0),
Chris@68 43 m_blockSize(0),
Chris@0 44 m_descriptor(0),
Chris@0 45 m_outputFeatureNo(0)
Chris@0 46 {
Chris@0 47 std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl;
Chris@0 48
Chris@0 49 FeatureExtractionPluginFactory *factory =
Chris@0 50 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@0 51
Chris@0 52 if (!factory) {
Chris@0 53 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \""
Chris@0 54 << pluginId.toStdString() << "\"" << std::endl;
Chris@0 55 return;
Chris@0 56 }
Chris@0 57
Chris@0 58 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate());
Chris@0 59
Chris@0 60 if (!m_plugin) {
Chris@0 61 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \""
Chris@0 62 << pluginId.toStdString() << "\"" << std::endl;
Chris@0 63 return;
Chris@0 64 }
Chris@0 65
Chris@56 66 if (configurationXml != "") {
Chris@66 67 PluginXml(m_plugin).setParametersFromXml(configurationXml);
Chris@56 68 }
Chris@56 69
Chris@68 70 m_blockSize = m_plugin->getPreferredBlockSize();
Chris@68 71 m_stepSize = m_plugin->getPreferredStepSize();
Chris@68 72
Chris@68 73 if (m_blockSize == 0) m_blockSize = 1024; //!!! todo: ask user
Chris@68 74 if (m_stepSize == 0) m_stepSize = m_blockSize; //!!! likewise
Chris@68 75
Chris@74 76 //!!! cope with plugins that request non-power-of-2 block sizes in
Chris@74 77 // the frequency domain!
Chris@74 78
Chris@74 79 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@0 80
Chris@0 81 if (outputs.empty()) {
Chris@0 82 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
Chris@0 83 << pluginId.toStdString() << "\" has no outputs" << std::endl;
Chris@0 84 return;
Chris@0 85 }
Chris@0 86
Chris@0 87 for (size_t i = 0; i < outputs.size(); ++i) {
Chris@0 88 if (outputName == "" || outputs[i].name == outputName.toStdString()) {
Chris@0 89 m_outputFeatureNo = i;
Chris@66 90 m_descriptor = new Vamp::Plugin::OutputDescriptor
Chris@0 91 (outputs[i]);
Chris@0 92 break;
Chris@0 93 }
Chris@0 94 }
Chris@0 95
Chris@0 96 if (!m_descriptor) {
Chris@0 97 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
Chris@0 98 << pluginId.toStdString() << "\" has no output named \""
Chris@0 99 << outputName.toStdString() << "\"" << std::endl;
Chris@0 100 return;
Chris@0 101 }
Chris@0 102
Chris@0 103 std::cerr << "FeatureExtractionPluginTransform: output sample type "
Chris@0 104 << m_descriptor->sampleType << std::endl;
Chris@0 105
Chris@70 106 int binCount = 1;
Chris@0 107 float minValue = 0.0, maxValue = 0.0;
Chris@0 108
Chris@70 109 if (m_descriptor->hasFixedBinCount) {
Chris@70 110 binCount = m_descriptor->binCount;
Chris@0 111 }
Chris@0 112
Chris@70 113 if (binCount > 0 && m_descriptor->hasKnownExtents) {
Chris@0 114 minValue = m_descriptor->minValue;
Chris@0 115 maxValue = m_descriptor->maxValue;
Chris@0 116 }
Chris@0 117
Chris@0 118 size_t modelRate = m_input->getSampleRate();
Chris@0 119 size_t modelResolution = 1;
Chris@0 120
Chris@0 121 switch (m_descriptor->sampleType) {
Chris@0 122
Chris@66 123 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@0 124 if (m_descriptor->sampleRate != 0.0) {
Chris@0 125 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
Chris@0 126 }
Chris@0 127 break;
Chris@0 128
Chris@66 129 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@72 130 modelResolution = m_stepSize;
Chris@0 131 break;
Chris@0 132
Chris@66 133 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@73 134 modelRate = size_t(m_descriptor->sampleRate + 0.001);
Chris@0 135 break;
Chris@0 136 }
Chris@0 137
Chris@70 138 if (binCount == 0) {
Chris@0 139
Chris@20 140 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
Chris@20 141 false);
Chris@0 142
Chris@70 143 } else if (binCount == 1 ||
Chris@0 144
Chris@0 145 // We don't have a sparse 3D model
Chris@0 146 m_descriptor->sampleType ==
Chris@66 147 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 148
Chris@63 149 SparseTimeValueModel *model = new SparseTimeValueModel
Chris@63 150 (modelRate, modelResolution, minValue, maxValue, false);
Chris@63 151 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
Chris@63 152
Chris@63 153 m_output = model;
Chris@0 154
Chris@0 155 } else {
Chris@0 156
Chris@0 157 m_output = new DenseThreeDimensionalModel(modelRate, modelResolution,
Chris@70 158 binCount, false);
Chris@20 159
Chris@70 160 if (!m_descriptor->binNames.empty()) {
Chris@20 161 std::vector<QString> names;
Chris@70 162 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
Chris@70 163 names.push_back(m_descriptor->binNames[i].c_str());
Chris@20 164 }
Chris@20 165 (dynamic_cast<DenseThreeDimensionalModel *>(m_output))
Chris@20 166 ->setBinNames(names);
Chris@20 167 }
Chris@0 168 }
Chris@0 169 }
Chris@0 170
Chris@0 171 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()
Chris@0 172 {
Chris@0 173 delete m_plugin;
Chris@0 174 delete m_descriptor;
Chris@0 175 }
Chris@0 176
Chris@0 177 DenseTimeValueModel *
Chris@0 178 FeatureExtractionPluginTransform::getInput()
Chris@0 179 {
Chris@0 180 DenseTimeValueModel *dtvm =
Chris@0 181 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@0 182 if (!dtvm) {
Chris@0 183 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
Chris@0 184 }
Chris@0 185 return dtvm;
Chris@0 186 }
Chris@0 187
Chris@0 188 void
Chris@0 189 FeatureExtractionPluginTransform::run()
Chris@0 190 {
Chris@0 191 DenseTimeValueModel *input = getInput();
Chris@0 192 if (!input) return;
Chris@0 193
Chris@0 194 if (!m_output) return;
Chris@0 195
Chris@0 196 size_t channelCount = input->getChannelCount();
Chris@0 197 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@0 198 channelCount = 1;
Chris@0 199 }
Chris@0 200 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@0 201 std::cerr << "FeatureExtractionPluginTransform::run: "
Chris@0 202 << "Can't provide enough channels to plugin (plugin min "
Chris@0 203 << m_plugin->getMinChannelCount() << ", max "
Chris@0 204 << m_plugin->getMaxChannelCount() << ", input model has "
Chris@0 205 << input->getChannelCount() << ")" << std::endl;
Chris@0 206 return;
Chris@0 207 }
Chris@0 208
Chris@0 209 size_t sampleRate = m_input->getSampleRate();
Chris@0 210
Chris@69 211 if (!m_plugin->initialise(channelCount, m_stepSize, m_blockSize)) {
Chris@69 212 std::cerr << "FeatureExtractionPluginTransform::run: Plugin "
Chris@69 213 << m_plugin->getName() << " failed to initialise!" << std::endl;
Chris@69 214 return;
Chris@69 215 }
Chris@0 216
Chris@0 217 float **buffers = new float*[channelCount];
Chris@0 218 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@68 219 buffers[ch] = new float[m_blockSize];
Chris@0 220 }
Chris@0 221
Chris@67 222 double *fftInput = 0;
Chris@67 223 fftw_complex *fftOutput = 0;
Chris@67 224 fftw_plan fftPlan = 0;
Chris@68 225 Window<double> windower(HanningWindow, m_blockSize);
Chris@67 226
Chris@67 227 if (m_plugin->getInputDomain() == Vamp::Plugin::FrequencyDomain) {
Chris@67 228
Chris@68 229 fftInput = (double *)fftw_malloc(m_blockSize * sizeof(double));
Chris@68 230 fftOutput = (fftw_complex *)fftw_malloc(m_blockSize * sizeof(fftw_complex));
Chris@68 231 fftPlan = fftw_plan_dft_r2c_1d(m_blockSize, fftInput, fftOutput,
Chris@67 232 FFTW_ESTIMATE);
Chris@67 233 if (!fftPlan) {
Chris@67 234 std::cerr << "ERROR: FeatureExtractionPluginTransform::run(): fftw_plan failed! Results will be garbage" << std::endl;
Chris@67 235 }
Chris@67 236 }
Chris@67 237
Chris@70 238 long startFrame = m_input->getStartFrame();
Chris@70 239 long endFrame = m_input->getEndFrame();
Chris@70 240 long blockFrame = startFrame;
Chris@0 241
Chris@70 242 long prevCompletion = 0;
Chris@0 243
Chris@70 244 while (1) {
Chris@70 245
Chris@70 246 if (fftPlan) {
Chris@73 247 if (blockFrame - int(m_blockSize)/2 > endFrame) break;
Chris@70 248 } else {
Chris@70 249 if (blockFrame >= endFrame) break;
Chris@70 250 }
Chris@0 251
Chris@0 252 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame "
Chris@0 253 // << blockFrame << std::endl;
Chris@0 254
Chris@70 255 long completion =
Chris@68 256 (((blockFrame - startFrame) / m_stepSize) * 99) /
Chris@68 257 ( (endFrame - startFrame) / m_stepSize);
Chris@0 258
Chris@0 259 // channelCount is either m_input->channelCount or 1
Chris@0 260
Chris@73 261 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@70 262 if (fftPlan) {
Chris@70 263 getFrames(ch, channelCount,
Chris@70 264 blockFrame - m_blockSize/2, m_blockSize, buffers[ch]);
Chris@70 265 } else {
Chris@70 266 getFrames(ch, channelCount,
Chris@70 267 blockFrame, m_blockSize, buffers[ch]);
Chris@70 268 }
Chris@70 269 }
Chris@70 270
Chris@67 271 if (fftPlan) {
Chris@73 272 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@73 273 for (size_t i = 0; i < m_blockSize; ++i) {
Chris@67 274 fftInput[i] = buffers[ch][i];
Chris@67 275 }
Chris@67 276 windower.cut(fftInput);
Chris@73 277 for (size_t i = 0; i < m_blockSize/2; ++i) {
Chris@67 278 double temp = fftInput[i];
Chris@68 279 fftInput[i] = fftInput[i + m_blockSize/2];
Chris@68 280 fftInput[i + m_blockSize/2] = temp;
Chris@67 281 }
Chris@67 282 fftw_execute(fftPlan);
Chris@73 283 for (size_t i = 0; i < m_blockSize/2; ++i) {
Chris@67 284 buffers[ch][i*2] = fftOutput[i][0];
Chris@67 285 buffers[ch][i*2 + 1] = fftOutput[i][1];
Chris@67 286 }
Chris@67 287 }
Chris@67 288 }
Chris@67 289
Chris@66 290 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@66 291 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@0 292
Chris@0 293 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@66 294 Vamp::Plugin::Feature feature =
Chris@0 295 features[m_outputFeatureNo][fi];
Chris@0 296 addFeature(blockFrame, feature);
Chris@0 297 }
Chris@0 298
Chris@0 299 if (blockFrame == startFrame || completion > prevCompletion) {
Chris@0 300 setCompletion(completion);
Chris@0 301 prevCompletion = completion;
Chris@0 302 }
Chris@0 303
Chris@68 304 blockFrame += m_stepSize;
Chris@0 305 }
Chris@0 306
Chris@67 307 if (fftPlan) {
Chris@67 308 fftw_destroy_plan(fftPlan);
Chris@67 309 fftw_free(fftInput);
Chris@67 310 fftw_free(fftOutput);
Chris@67 311 }
Chris@67 312
Chris@66 313 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@0 314
Chris@0 315 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@66 316 Vamp::Plugin::Feature feature =
Chris@0 317 features[m_outputFeatureNo][fi];
Chris@0 318 addFeature(blockFrame, feature);
Chris@0 319 }
Chris@0 320
Chris@0 321 setCompletion(100);
Chris@0 322 }
Chris@0 323
Chris@70 324 void
Chris@70 325 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount,
Chris@70 326 long startFrame, long size,
Chris@70 327 float *buffer)
Chris@70 328 {
Chris@70 329 long offset = 0;
Chris@70 330
Chris@70 331 if (startFrame < 0) {
Chris@70 332 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@70 333 buffer[i] = 0.0f;
Chris@70 334 }
Chris@70 335 offset = -startFrame;
Chris@70 336 size -= offset;
Chris@70 337 if (size <= 0) return;
Chris@70 338 startFrame = 0;
Chris@70 339 }
Chris@70 340
Chris@73 341 long got = getInput()->getValues
Chris@70 342 ((channelCount == 1 ? m_channel : channel),
Chris@70 343 startFrame, startFrame + size, buffer + offset);
Chris@70 344
Chris@70 345 while (got < size) {
Chris@70 346 buffer[offset + got] = 0.0;
Chris@70 347 ++got;
Chris@70 348 }
Chris@74 349
Chris@74 350 if (m_channel == -1 && channelCount == 1 &&
Chris@74 351 getInput()->getChannelCount() > 1) {
Chris@74 352 // use mean instead of sum, as plugin input
Chris@74 353 int cc = getInput()->getChannelCount();
Chris@74 354 for (long i = 0; i < size; ++i) {
Chris@74 355 buffer[i] /= cc;
Chris@74 356 }
Chris@74 357 }
Chris@70 358 }
Chris@0 359
Chris@0 360 void
Chris@0 361 FeatureExtractionPluginTransform::addFeature(size_t blockFrame,
Chris@66 362 const Vamp::Plugin::Feature &feature)
Chris@0 363 {
Chris@0 364 size_t inputRate = m_input->getSampleRate();
Chris@0 365
Chris@0 366 // std::cerr << "FeatureExtractionPluginTransform::addFeature("
Chris@0 367 // << blockFrame << ")" << std::endl;
Chris@0 368
Chris@70 369 int binCount = 1;
Chris@70 370 if (m_descriptor->hasFixedBinCount) {
Chris@70 371 binCount = m_descriptor->binCount;
Chris@0 372 }
Chris@0 373
Chris@0 374 size_t frame = blockFrame;
Chris@0 375
Chris@0 376 if (m_descriptor->sampleType ==
Chris@66 377 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 378
Chris@0 379 if (!feature.hasTimestamp) {
Chris@0 380 std::cerr
Chris@0 381 << "WARNING: FeatureExtractionPluginTransform::addFeature: "
Chris@0 382 << "Feature has variable sample rate but no timestamp!"
Chris@0 383 << std::endl;
Chris@0 384 return;
Chris@0 385 } else {
Chris@66 386 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@0 387 }
Chris@0 388
Chris@0 389 } else if (m_descriptor->sampleType ==
Chris@66 390 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@0 391
Chris@0 392 if (feature.hasTimestamp) {
Chris@0 393 //!!! warning: sampleRate may be non-integral
Chris@66 394 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
Chris@66 395 m_descriptor->sampleRate);
Chris@0 396 } else {
Chris@0 397 frame = m_output->getEndFrame() + 1;
Chris@0 398 }
Chris@0 399 }
Chris@0 400
Chris@70 401 if (binCount == 0) {
Chris@0 402
Chris@0 403 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
Chris@0 404 if (!model) return;
Chris@0 405 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
Chris@0 406
Chris@70 407 } else if (binCount == 1 ||
Chris@0 408 m_descriptor->sampleType ==
Chris@66 409 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 410
Chris@0 411 float value = 0.0;
Chris@0 412 if (feature.values.size() > 0) value = feature.values[0];
Chris@0 413
Chris@0 414 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
Chris@0 415 if (!model) return;
Chris@0 416 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
Chris@0 417
Chris@0 418 } else {
Chris@0 419
Chris@0 420 DenseThreeDimensionalModel::BinValueSet values = feature.values;
Chris@0 421
Chris@0 422 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>();
Chris@0 423 if (!model) return;
Chris@0 424
Chris@0 425 model->setBinValues(frame, values);
Chris@0 426 }
Chris@0 427 }
Chris@0 428
Chris@0 429 void
Chris@0 430 FeatureExtractionPluginTransform::setCompletion(int completion)
Chris@0 431 {
Chris@70 432 int binCount = 1;
Chris@70 433 if (m_descriptor->hasFixedBinCount) {
Chris@70 434 binCount = m_descriptor->binCount;
Chris@0 435 }
Chris@0 436
Chris@70 437 if (binCount == 0) {
Chris@0 438
Chris@0 439 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
Chris@0 440 if (!model) return;
Chris@0 441 model->setCompletion(completion);
Chris@0 442
Chris@70 443 } else if (binCount == 1 ||
Chris@0 444 m_descriptor->sampleType ==
Chris@66 445 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 446
Chris@0 447 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
Chris@0 448 if (!model) return;
Chris@0 449 model->setCompletion(completion);
Chris@0 450
Chris@0 451 } else {
Chris@0 452
Chris@19 453 DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>();
Chris@19 454 if (!model) return;
Chris@19 455 model->setCompletion(completion);
Chris@0 456 }
Chris@0 457 }
Chris@0 458