annotate transform/FeatureExtractionPluginTransform.cpp @ 25:e74f508db18c

* Add setRatio method to the time stretcher, and make it possible to change the ratio without having to construct and replace the time stretcher. This means we can do it seamlessly. Add a lot more ratios to the time stretch control in the main window
author Chris Cannam
date Fri, 15 Sep 2006 15:35:06 +0000
parents e764bbf2b090
children d88d117e0c34
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@0 16 #include "FeatureExtractionPluginTransform.h"
Chris@0 17
Chris@0 18 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@0 19 #include "plugin/PluginXml.h"
Chris@0 20 #include "vamp-sdk/Plugin.h"
Chris@0 21
Chris@1 22 #include "data/model/Model.h"
Chris@0 23 #include "base/Window.h"
Chris@1 24 #include "data/model/SparseOneDimensionalModel.h"
Chris@1 25 #include "data/model/SparseTimeValueModel.h"
Chris@3 26 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@1 27 #include "data/model/DenseTimeValueModel.h"
Chris@1 28 #include "data/model/NoteModel.h"
Chris@3 29 #include "data/model/FFTModel.h"
Chris@0 30
Chris@0 31 #include <fftw3.h>
Chris@0 32
Chris@0 33 #include <iostream>
Chris@0 34
Chris@0 35 FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel,
Chris@0 36 QString pluginId,
Chris@0 37 int channel,
Chris@0 38 QString configurationXml,
Chris@0 39 QString outputName) :
Chris@0 40 Transform(inputModel),
Chris@0 41 m_plugin(0),
Chris@0 42 m_channel(channel),
Chris@0 43 m_stepSize(0),
Chris@0 44 m_blockSize(0),
Chris@0 45 m_descriptor(0),
Chris@0 46 m_outputFeatureNo(0)
Chris@0 47 {
Chris@0 48 // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl;
Chris@0 49
Chris@0 50 FeatureExtractionPluginFactory *factory =
Chris@0 51 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@0 52
Chris@0 53 if (!factory) {
Chris@0 54 std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \""
Chris@0 55 << pluginId.toStdString() << "\"" << std::endl;
Chris@0 56 return;
Chris@0 57 }
Chris@0 58
Chris@0 59 m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate());
Chris@0 60
Chris@0 61 if (!m_plugin) {
Chris@0 62 std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \""
Chris@0 63 << pluginId.toStdString() << "\"" << std::endl;
Chris@0 64 return;
Chris@0 65 }
Chris@0 66
Chris@0 67 if (configurationXml != "") {
Chris@0 68 PluginXml(m_plugin).setParametersFromXml(configurationXml);
Chris@0 69 }
Chris@0 70
Chris@0 71 m_blockSize = m_plugin->getPreferredBlockSize();
Chris@0 72 m_stepSize = m_plugin->getPreferredStepSize();
Chris@0 73
Chris@0 74 if (m_blockSize == 0) m_blockSize = 1024; //!!! todo: ask user
Chris@0 75 if (m_stepSize == 0) m_stepSize = m_blockSize; //!!! likewise
Chris@0 76
Chris@0 77 DenseTimeValueModel *input = getInput();
Chris@0 78 if (!input) return;
Chris@0 79
Chris@0 80 size_t channelCount = input->getChannelCount();
Chris@0 81 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@0 82 channelCount = 1;
Chris@0 83 }
Chris@0 84 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@0 85 std::cerr << "FeatureExtractionPluginTransform:: "
Chris@0 86 << "Can't provide enough channels to plugin (plugin min "
Chris@0 87 << m_plugin->getMinChannelCount() << ", max "
Chris@0 88 << m_plugin->getMaxChannelCount() << ", input model has "
Chris@0 89 << input->getChannelCount() << ")" << std::endl;
Chris@0 90 return;
Chris@0 91 }
Chris@0 92
Chris@0 93 if (!m_plugin->initialise(channelCount, m_stepSize, m_blockSize)) {
Chris@0 94 std::cerr << "FeatureExtractionPluginTransform: Plugin "
Chris@0 95 << m_plugin->getName() << " failed to initialise!" << std::endl;
Chris@0 96 return;
Chris@0 97 }
Chris@0 98
Chris@0 99 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@0 100
Chris@0 101 if (outputs.empty()) {
Chris@0 102 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
Chris@0 103 << pluginId.toStdString() << "\" has no outputs" << std::endl;
Chris@0 104 return;
Chris@0 105 }
Chris@0 106
Chris@0 107 for (size_t i = 0; i < outputs.size(); ++i) {
Chris@0 108 if (outputName == "" || outputs[i].name == outputName.toStdString()) {
Chris@0 109 m_outputFeatureNo = i;
Chris@0 110 m_descriptor = new Vamp::Plugin::OutputDescriptor
Chris@0 111 (outputs[i]);
Chris@0 112 break;
Chris@0 113 }
Chris@0 114 }
Chris@0 115
Chris@0 116 if (!m_descriptor) {
Chris@0 117 std::cerr << "FeatureExtractionPluginTransform: Plugin \""
Chris@0 118 << pluginId.toStdString() << "\" has no output named \""
Chris@0 119 << outputName.toStdString() << "\"" << std::endl;
Chris@0 120 return;
Chris@0 121 }
Chris@0 122
Chris@0 123 // std::cerr << "FeatureExtractionPluginTransform: output sample type "
Chris@0 124 // << m_descriptor->sampleType << std::endl;
Chris@0 125
Chris@0 126 int binCount = 1;
Chris@0 127 float minValue = 0.0, maxValue = 0.0;
Chris@0 128
Chris@0 129 if (m_descriptor->hasFixedBinCount) {
Chris@0 130 binCount = m_descriptor->binCount;
Chris@0 131 }
Chris@0 132
Chris@0 133 // std::cerr << "FeatureExtractionPluginTransform: output bin count "
Chris@0 134 // << binCount << std::endl;
Chris@0 135
Chris@0 136 if (binCount > 0 && m_descriptor->hasKnownExtents) {
Chris@0 137 minValue = m_descriptor->minValue;
Chris@0 138 maxValue = m_descriptor->maxValue;
Chris@0 139 }
Chris@0 140
Chris@0 141 size_t modelRate = m_input->getSampleRate();
Chris@0 142 size_t modelResolution = 1;
Chris@0 143
Chris@0 144 switch (m_descriptor->sampleType) {
Chris@0 145
Chris@0 146 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@0 147 if (m_descriptor->sampleRate != 0.0) {
Chris@0 148 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
Chris@0 149 }
Chris@0 150 break;
Chris@0 151
Chris@0 152 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@0 153 modelResolution = m_stepSize;
Chris@0 154 break;
Chris@0 155
Chris@0 156 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@0 157 modelRate = size_t(m_descriptor->sampleRate + 0.001);
Chris@0 158 break;
Chris@0 159 }
Chris@0 160
Chris@0 161 if (binCount == 0) {
Chris@0 162
Chris@0 163 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
Chris@0 164 false);
Chris@0 165
Chris@0 166 } else if (binCount == 1) {
Chris@0 167
Chris@0 168 SparseTimeValueModel *model = new SparseTimeValueModel
Chris@0 169 (modelRate, modelResolution, minValue, maxValue, false);
Chris@0 170 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
Chris@0 171
Chris@0 172 m_output = model;
Chris@0 173
Chris@0 174 } else if (m_descriptor->sampleType ==
Chris@0 175 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 176
Chris@0 177 // We don't have a sparse 3D model, so interpret this as a
Chris@0 178 // note model. There's nothing to define which values to use
Chris@0 179 // as which parameters of the note -- for the moment let's
Chris@0 180 // treat the first as pitch, second as duration in frames,
Chris@0 181 // third (if present) as velocity. (Our note model doesn't
Chris@0 182 // yet store velocity.)
Chris@0 183 //!!! todo: ask the user!
Chris@0 184
Chris@0 185 NoteModel *model = new NoteModel
Chris@0 186 (modelRate, modelResolution, minValue, maxValue, false);
Chris@0 187 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
Chris@0 188
Chris@0 189 m_output = model;
Chris@0 190
Chris@0 191 } else {
Chris@0 192
Chris@3 193 m_output = new EditableDenseThreeDimensionalModel
Chris@3 194 (modelRate, modelResolution, binCount, false);
Chris@0 195
Chris@0 196 if (!m_descriptor->binNames.empty()) {
Chris@0 197 std::vector<QString> names;
Chris@0 198 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
Chris@0 199 names.push_back(m_descriptor->binNames[i].c_str());
Chris@0 200 }
Chris@3 201 (dynamic_cast<EditableDenseThreeDimensionalModel *>(m_output))
Chris@0 202 ->setBinNames(names);
Chris@0 203 }
Chris@0 204 }
Chris@0 205 }
Chris@0 206
Chris@0 207 FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()
Chris@0 208 {
Chris@0 209 delete m_plugin;
Chris@0 210 delete m_descriptor;
Chris@0 211 }
Chris@0 212
Chris@0 213 DenseTimeValueModel *
Chris@0 214 FeatureExtractionPluginTransform::getInput()
Chris@0 215 {
Chris@0 216 DenseTimeValueModel *dtvm =
Chris@0 217 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@0 218 if (!dtvm) {
Chris@0 219 std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
Chris@0 220 }
Chris@0 221 return dtvm;
Chris@0 222 }
Chris@0 223
Chris@0 224 void
Chris@0 225 FeatureExtractionPluginTransform::run()
Chris@0 226 {
Chris@0 227 DenseTimeValueModel *input = getInput();
Chris@0 228 if (!input) return;
Chris@0 229
Chris@0 230 if (!m_output) return;
Chris@0 231
Chris@0 232 size_t sampleRate = m_input->getSampleRate();
Chris@0 233
Chris@0 234 size_t channelCount = input->getChannelCount();
Chris@0 235 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@0 236 channelCount = 1;
Chris@0 237 }
Chris@0 238
Chris@0 239 float **buffers = new float*[channelCount];
Chris@0 240 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@0 241 buffers[ch] = new float[m_blockSize];
Chris@0 242 }
Chris@0 243
Chris@0 244 bool frequencyDomain = (m_plugin->getInputDomain() ==
Chris@0 245 Vamp::Plugin::FrequencyDomain);
Chris@3 246 std::vector<FFTModel *> fftModels;
Chris@0 247
Chris@0 248 if (frequencyDomain) {
Chris@0 249 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@3 250 fftModels.push_back(new FFTModel
Chris@0 251 (getInput(),
Chris@0 252 channelCount == 1 ? m_channel : ch,
Chris@0 253 HanningWindow,
Chris@0 254 m_blockSize,
Chris@0 255 m_stepSize,
Chris@0 256 m_blockSize,
Chris@0 257 false));
Chris@0 258 }
Chris@0 259 }
Chris@0 260
Chris@0 261 long startFrame = m_input->getStartFrame();
Chris@0 262 long endFrame = m_input->getEndFrame();
Chris@0 263 long blockFrame = startFrame;
Chris@0 264
Chris@0 265 long prevCompletion = 0;
Chris@0 266
Chris@0 267 while (1) {
Chris@0 268
Chris@0 269 if (frequencyDomain) {
Chris@0 270 if (blockFrame - int(m_blockSize)/2 > endFrame) break;
Chris@0 271 } else {
Chris@0 272 if (blockFrame >= endFrame) break;
Chris@0 273 }
Chris@0 274
Chris@0 275 // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame "
Chris@0 276 // << blockFrame << std::endl;
Chris@0 277
Chris@0 278 long completion =
Chris@0 279 (((blockFrame - startFrame) / m_stepSize) * 99) /
Chris@0 280 ( (endFrame - startFrame) / m_stepSize);
Chris@0 281
Chris@0 282 // channelCount is either m_input->channelCount or 1
Chris@0 283
Chris@0 284 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@0 285 if (frequencyDomain) {
Chris@0 286 int column = (blockFrame - startFrame) / m_stepSize;
Chris@0 287 for (size_t i = 0; i < m_blockSize/2; ++i) {
Chris@3 288 fftModels[ch]->getValuesAt
Chris@0 289 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]);
Chris@0 290 }
Chris@0 291 /*!!!
Chris@0 292 float sum = 0.0;
Chris@0 293 for (size_t i = 0; i < m_blockSize/2; ++i) {
Chris@0 294 sum += buffers[ch][i*2];
Chris@0 295 }
Chris@0 296 if (fabs(sum) < 0.0001) {
Chris@0 297 std::cerr << "WARNING: small sum for column " << column << " (sum is " << sum << ")" << std::endl;
Chris@0 298 }
Chris@0 299 */
Chris@0 300 } else {
Chris@0 301 getFrames(ch, channelCount,
Chris@0 302 blockFrame, m_blockSize, buffers[ch]);
Chris@0 303 }
Chris@0 304 }
Chris@0 305
Chris@0 306 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@0 307 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@0 308
Chris@0 309 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@0 310 Vamp::Plugin::Feature feature =
Chris@0 311 features[m_outputFeatureNo][fi];
Chris@0 312 addFeature(blockFrame, feature);
Chris@0 313 }
Chris@0 314
Chris@0 315 if (blockFrame == startFrame || completion > prevCompletion) {
Chris@0 316 setCompletion(completion);
Chris@0 317 prevCompletion = completion;
Chris@0 318 }
Chris@0 319
Chris@0 320 blockFrame += m_stepSize;
Chris@0 321 }
Chris@0 322
Chris@0 323 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@0 324
Chris@0 325 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@0 326 Vamp::Plugin::Feature feature =
Chris@0 327 features[m_outputFeatureNo][fi];
Chris@0 328 addFeature(blockFrame, feature);
Chris@0 329 }
Chris@0 330
Chris@0 331 if (frequencyDomain) {
Chris@0 332 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@3 333 delete fftModels[ch];
Chris@0 334 }
Chris@0 335 }
Chris@0 336
Chris@0 337 setCompletion(100);
Chris@0 338 }
Chris@0 339
Chris@0 340 void
Chris@0 341 FeatureExtractionPluginTransform::getFrames(int channel, int channelCount,
Chris@0 342 long startFrame, long size,
Chris@0 343 float *buffer)
Chris@0 344 {
Chris@0 345 long offset = 0;
Chris@0 346
Chris@0 347 if (startFrame < 0) {
Chris@0 348 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@0 349 buffer[i] = 0.0f;
Chris@0 350 }
Chris@0 351 offset = -startFrame;
Chris@0 352 size -= offset;
Chris@0 353 if (size <= 0) return;
Chris@0 354 startFrame = 0;
Chris@0 355 }
Chris@0 356
Chris@0 357 long got = getInput()->getValues
Chris@0 358 ((channelCount == 1 ? m_channel : channel),
Chris@0 359 startFrame, startFrame + size, buffer + offset);
Chris@0 360
Chris@0 361 while (got < size) {
Chris@0 362 buffer[offset + got] = 0.0;
Chris@0 363 ++got;
Chris@0 364 }
Chris@0 365
Chris@0 366 if (m_channel == -1 && channelCount == 1 &&
Chris@0 367 getInput()->getChannelCount() > 1) {
Chris@0 368 // use mean instead of sum, as plugin input
Chris@0 369 int cc = getInput()->getChannelCount();
Chris@0 370 for (long i = 0; i < size; ++i) {
Chris@0 371 buffer[i] /= cc;
Chris@0 372 }
Chris@0 373 }
Chris@0 374 }
Chris@0 375
Chris@0 376 void
Chris@0 377 FeatureExtractionPluginTransform::addFeature(size_t blockFrame,
Chris@0 378 const Vamp::Plugin::Feature &feature)
Chris@0 379 {
Chris@0 380 size_t inputRate = m_input->getSampleRate();
Chris@0 381
Chris@0 382 // std::cerr << "FeatureExtractionPluginTransform::addFeature("
Chris@0 383 // << blockFrame << ")" << std::endl;
Chris@0 384
Chris@0 385 int binCount = 1;
Chris@0 386 if (m_descriptor->hasFixedBinCount) {
Chris@0 387 binCount = m_descriptor->binCount;
Chris@0 388 }
Chris@0 389
Chris@0 390 size_t frame = blockFrame;
Chris@0 391
Chris@0 392 if (m_descriptor->sampleType ==
Chris@0 393 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 394
Chris@0 395 if (!feature.hasTimestamp) {
Chris@0 396 std::cerr
Chris@0 397 << "WARNING: FeatureExtractionPluginTransform::addFeature: "
Chris@0 398 << "Feature has variable sample rate but no timestamp!"
Chris@0 399 << std::endl;
Chris@0 400 return;
Chris@0 401 } else {
Chris@0 402 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@0 403 }
Chris@0 404
Chris@0 405 } else if (m_descriptor->sampleType ==
Chris@0 406 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@0 407
Chris@0 408 if (feature.hasTimestamp) {
Chris@0 409 //!!! warning: sampleRate may be non-integral
Chris@0 410 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
Chris@0 411 m_descriptor->sampleRate);
Chris@0 412 } else {
Chris@0 413 frame = m_output->getEndFrame() + 1;
Chris@0 414 }
Chris@0 415 }
Chris@0 416
Chris@0 417 if (binCount == 0) {
Chris@0 418
Chris@0 419 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
Chris@0 420 if (!model) return;
Chris@0 421 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
Chris@0 422
Chris@0 423 } else if (binCount == 1) {
Chris@0 424
Chris@0 425 float value = 0.0;
Chris@0 426 if (feature.values.size() > 0) value = feature.values[0];
Chris@0 427
Chris@0 428 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
Chris@0 429 if (!model) return;
Chris@0 430 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
Chris@0 431
Chris@0 432 } else if (m_descriptor->sampleType ==
Chris@0 433 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 434
Chris@0 435 float pitch = 0.0;
Chris@0 436 if (feature.values.size() > 0) pitch = feature.values[0];
Chris@0 437
Chris@0 438 float duration = 1;
Chris@0 439 if (feature.values.size() > 1) duration = feature.values[1];
Chris@0 440
Chris@0 441 float velocity = 100;
Chris@0 442 if (feature.values.size() > 2) velocity = feature.values[2];
Chris@0 443
Chris@0 444 NoteModel *model = getOutput<NoteModel>();
Chris@0 445 if (!model) return;
Chris@0 446
Chris@0 447 model->addPoint(NoteModel::Point(frame, pitch, duration, feature.label.c_str()));
Chris@0 448
Chris@0 449 } else {
Chris@0 450
Chris@0 451 DenseThreeDimensionalModel::BinValueSet values = feature.values;
Chris@0 452
Chris@3 453 EditableDenseThreeDimensionalModel *model =
Chris@3 454 getOutput<EditableDenseThreeDimensionalModel>();
Chris@0 455 if (!model) return;
Chris@0 456
Chris@0 457 model->setBinValues(frame, values);
Chris@0 458 }
Chris@0 459 }
Chris@0 460
Chris@0 461 void
Chris@0 462 FeatureExtractionPluginTransform::setCompletion(int completion)
Chris@0 463 {
Chris@0 464 int binCount = 1;
Chris@0 465 if (m_descriptor->hasFixedBinCount) {
Chris@0 466 binCount = m_descriptor->binCount;
Chris@0 467 }
Chris@0 468
Chris@0 469 if (binCount == 0) {
Chris@0 470
Chris@0 471 SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
Chris@0 472 if (!model) return;
Chris@0 473 model->setCompletion(completion);
Chris@0 474
Chris@0 475 } else if (binCount == 1) {
Chris@0 476
Chris@0 477 SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
Chris@0 478 if (!model) return;
Chris@0 479 model->setCompletion(completion);
Chris@0 480
Chris@0 481 } else if (m_descriptor->sampleType ==
Chris@0 482 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@0 483
Chris@0 484 NoteModel *model = getOutput<NoteModel>();
Chris@0 485 if (!model) return;
Chris@0 486 model->setCompletion(completion);
Chris@0 487
Chris@0 488 } else {
Chris@0 489
Chris@3 490 EditableDenseThreeDimensionalModel *model =
Chris@3 491 getOutput<EditableDenseThreeDimensionalModel>();
Chris@0 492 if (!model) return;
Chris@0 493 model->setCompletion(completion);
Chris@0 494 }
Chris@0 495 }
Chris@0 496