annotate transform/FeatureExtractionModelTransformer.cpp @ 875:3e6ed8a8577b tonioni

Use a sparse time-value model only for outputs with fixed bin count of 1, not for those with unknown bin count. (Precursor to using more than one model for outputs with unknown bin count)
author Chris Cannam
date Tue, 28 Jan 2014 18:52:22 +0000
parents 13803edd513d
children 47aa3aeb687b
rev   line source
Chris@320 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@320 2
Chris@320 3 /*
Chris@320 4 Sonic Visualiser
Chris@320 5 An audio file viewer and annotation editor.
Chris@320 6 Centre for Digital Music, Queen Mary, University of London.
Chris@320 7 This file copyright 2006 Chris Cannam and QMUL.
Chris@320 8
Chris@320 9 This program is free software; you can redistribute it and/or
Chris@320 10 modify it under the terms of the GNU General Public License as
Chris@320 11 published by the Free Software Foundation; either version 2 of the
Chris@320 12 License, or (at your option) any later version. See the file
Chris@320 13 COPYING included with this distribution for more information.
Chris@320 14 */
Chris@320 15
Chris@331 16 #include "FeatureExtractionModelTransformer.h"
Chris@320 17
Chris@320 18 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@320 19 #include "plugin/PluginXml.h"
Chris@475 20 #include <vamp-hostsdk/Plugin.h>
Chris@320 21
Chris@320 22 #include "data/model/Model.h"
Chris@320 23 #include "base/Window.h"
Chris@387 24 #include "base/Exceptions.h"
Chris@320 25 #include "data/model/SparseOneDimensionalModel.h"
Chris@320 26 #include "data/model/SparseTimeValueModel.h"
Chris@320 27 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@320 28 #include "data/model/DenseTimeValueModel.h"
Chris@320 29 #include "data/model/NoteModel.h"
gyorgyf@786 30 #include "data/model/FlexiNoteModel.h"
Chris@441 31 #include "data/model/RegionModel.h"
Chris@320 32 #include "data/model/FFTModel.h"
Chris@320 33 #include "data/model/WaveFileModel.h"
Chris@558 34 #include "rdf/PluginRDFDescription.h"
Chris@320 35
Chris@350 36 #include "TransformFactory.h"
Chris@350 37
Chris@320 38 #include <iostream>
Chris@320 39
Chris@859 40 #include <QSettings>
Chris@859 41
Chris@350 42 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@859 43 const Transform &transform) :
Chris@350 44 ModelTransformer(in, transform),
Chris@859 45 m_plugin(0)
Chris@320 46 {
Chris@690 47 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
Chris@350 48
Chris@849 49 initialise();
Chris@849 50 }
Chris@849 51
Chris@849 52 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@859 53 const Transforms &transforms) :
Chris@849 54 ModelTransformer(in, transforms),
Chris@859 55 m_plugin(0)
Chris@849 56 {
Chris@849 57 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
Chris@849 58
Chris@849 59 initialise();
Chris@849 60 }
Chris@849 61
Chris@849 62 static bool
Chris@849 63 areTransformsSimilar(const Transform &t1, const Transform &t2)
Chris@849 64 {
Chris@849 65 Transform t2o(t2);
Chris@849 66 t2o.setOutput(t1.getOutput());
Chris@849 67 return t1 == t2o;
Chris@849 68 }
Chris@849 69
Chris@849 70 bool
Chris@849 71 FeatureExtractionModelTransformer::initialise()
Chris@849 72 {
Chris@849 73 // All transforms must use the same plugin, parameters, and
Chris@849 74 // inputs: they can differ only in choice of plugin output. So we
Chris@849 75 // initialise based purely on the first transform in the list (but
Chris@849 76 // first check that they are actually similar as promised)
Chris@849 77
Chris@849 78 for (int j = 1; j < (int)m_transforms.size(); ++j) {
Chris@849 79 if (!areTransformsSimilar(m_transforms[0], m_transforms[j])) {
Chris@849 80 m_message = tr("Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output");
Chris@849 81 return false;
Chris@849 82 }
Chris@849 83 }
Chris@849 84
Chris@849 85 Transform primaryTransform = m_transforms[0];
Chris@849 86
Chris@849 87 QString pluginId = primaryTransform.getPluginIdentifier();
Chris@320 88
Chris@320 89 FeatureExtractionPluginFactory *factory =
Chris@320 90 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@320 91
Chris@320 92 if (!factory) {
Chris@361 93 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
Chris@849 94 return false;
Chris@320 95 }
Chris@320 96
Chris@350 97 DenseTimeValueModel *input = getConformingInput();
Chris@350 98 if (!input) {
Chris@361 99 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
Chris@849 100 return false;
Chris@350 101 }
Chris@320 102
Chris@350 103 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
Chris@320 104 if (!m_plugin) {
Chris@361 105 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
Chris@849 106 return false;
Chris@320 107 }
Chris@320 108
Chris@350 109 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@849 110 (primaryTransform, m_plugin);
Chris@343 111
Chris@350 112 TransformFactory::getInstance()->setPluginParameters
Chris@849 113 (primaryTransform, m_plugin);
Chris@320 114
Chris@320 115 size_t channelCount = input->getChannelCount();
Chris@320 116 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 117 channelCount = 1;
Chris@320 118 }
Chris@320 119 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@361 120 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
Chris@361 121 .arg(pluginId)
Chris@361 122 .arg(m_plugin->getMinChannelCount())
Chris@361 123 .arg(m_plugin->getMaxChannelCount())
Chris@361 124 .arg(input->getChannelCount());
Chris@849 125 return false;
Chris@320 126 }
Chris@320 127
Chris@690 128 SVDEBUG << "Initialising feature extraction plugin with channels = "
Chris@849 129 << channelCount << ", step = " << primaryTransform.getStepSize()
Chris@849 130 << ", block = " << primaryTransform.getBlockSize() << endl;
Chris@320 131
Chris@320 132 if (!m_plugin->initialise(channelCount,
Chris@849 133 primaryTransform.getStepSize(),
Chris@849 134 primaryTransform.getBlockSize())) {
Chris@361 135
Chris@849 136 size_t pstep = primaryTransform.getStepSize();
Chris@849 137 size_t pblock = primaryTransform.getBlockSize();
Chris@361 138
Chris@850 139 ///!!! hang on, this isn't right -- we're modifying a copy
Chris@849 140 primaryTransform.setStepSize(0);
Chris@849 141 primaryTransform.setBlockSize(0);
Chris@361 142 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@849 143 (primaryTransform, m_plugin);
Chris@361 144
Chris@849 145 if (primaryTransform.getStepSize() != pstep ||
Chris@849 146 primaryTransform.getBlockSize() != pblock) {
Chris@361 147
Chris@361 148 if (!m_plugin->initialise(channelCount,
Chris@849 149 primaryTransform.getStepSize(),
Chris@849 150 primaryTransform.getBlockSize())) {
Chris@361 151
Chris@361 152 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@849 153 return false;
Chris@361 154
Chris@361 155 } else {
Chris@361 156
Chris@361 157 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
Chris@361 158 .arg(pluginId)
Chris@361 159 .arg(pstep)
Chris@361 160 .arg(pblock)
Chris@849 161 .arg(primaryTransform.getStepSize())
Chris@849 162 .arg(primaryTransform.getBlockSize());
Chris@361 163 }
Chris@361 164
Chris@361 165 } else {
Chris@361 166
Chris@361 167 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@849 168 return false;
Chris@361 169 }
Chris@320 170 }
Chris@320 171
Chris@849 172 if (primaryTransform.getPluginVersion() != "") {
Chris@366 173 QString pv = QString("%1").arg(m_plugin->getPluginVersion());
Chris@849 174 if (pv != primaryTransform.getPluginVersion()) {
Chris@366 175 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
Chris@849 176 .arg(primaryTransform.getPluginVersion())
Chris@366 177 .arg(pluginId)
Chris@366 178 .arg(pv);
Chris@366 179 if (m_message != "") {
Chris@366 180 m_message = QString("%1; %2").arg(vm).arg(m_message);
Chris@366 181 } else {
Chris@366 182 m_message = vm;
Chris@366 183 }
Chris@366 184 }
Chris@366 185 }
Chris@366 186
Chris@320 187 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320 188
Chris@320 189 if (outputs.empty()) {
Chris@361 190 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
Chris@849 191 return false;
Chris@320 192 }
Chris@320 193
Chris@849 194 for (int j = 0; j < (int)m_transforms.size(); ++j) {
Chris@849 195
Chris@849 196 for (int i = 0; i < (int)outputs.size(); ++i) {
Chris@849 197 // SVDEBUG << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput() << "\"" << endl;
Chris@849 198 if (m_transforms[j].getOutput() == "" ||
Chris@849 199 outputs[i].identifier == m_transforms[j].getOutput().toStdString()) {
Chris@849 200 m_outputNos.push_back(i);
Chris@849 201 m_descriptors.push_back(new Vamp::Plugin::OutputDescriptor(outputs[i]));
Chris@849 202 m_fixedRateFeatureNos.push_back(-1); // we increment before use
Chris@849 203 break;
Chris@849 204 }
Chris@849 205 }
Chris@849 206
Chris@849 207 if (m_descriptors.size() <= j) {
Chris@849 208 m_message = tr("Plugin \"%1\" has no output named \"%2\"")
Chris@849 209 .arg(pluginId)
Chris@849 210 .arg(m_transforms[j].getOutput());
Chris@849 211 return false;
Chris@849 212 }
Chris@320 213 }
Chris@320 214
Chris@849 215 for (int j = 0; j < (int)m_transforms.size(); ++j) {
Chris@849 216 createOutputModel(j);
Chris@849 217 }
Chris@849 218
Chris@849 219 return true;
Chris@558 220 }
Chris@558 221
Chris@558 222 void
Chris@849 223 FeatureExtractionModelTransformer::createOutputModel(int n)
Chris@558 224 {
Chris@558 225 DenseTimeValueModel *input = getConformingInput();
Chris@558 226
Chris@843 227 // cerr << "FeatureExtractionModelTransformer::createOutputModel: sample type " << m_descriptor->sampleType << ", rate " << m_descriptor->sampleRate << endl;
Chris@712 228
Chris@849 229 PluginRDFDescription description(m_transforms[n].getPluginIdentifier());
Chris@849 230 QString outputId = m_transforms[n].getOutput();
Chris@558 231
Chris@320 232 int binCount = 1;
Chris@320 233 float minValue = 0.0, maxValue = 0.0;
Chris@320 234 bool haveExtents = false;
Chris@320 235
Chris@849 236 if (m_descriptors[n]->hasFixedBinCount) {
Chris@849 237 binCount = m_descriptors[n]->binCount;
Chris@320 238 }
Chris@320 239
Chris@843 240 // cerr << "FeatureExtractionModelTransformer: output bin count "
Chris@843 241 // << binCount << endl;
Chris@320 242
Chris@849 243 if (binCount > 0 && m_descriptors[n]->hasKnownExtents) {
Chris@849 244 minValue = m_descriptors[n]->minValue;
Chris@849 245 maxValue = m_descriptors[n]->maxValue;
Chris@320 246 haveExtents = true;
Chris@320 247 }
Chris@320 248
Chris@350 249 size_t modelRate = input->getSampleRate();
Chris@320 250 size_t modelResolution = 1;
Chris@712 251
Chris@849 252 if (m_descriptors[n]->sampleType !=
Chris@785 253 Vamp::Plugin::OutputDescriptor::OneSamplePerStep) {
Chris@849 254 if (m_descriptors[n]->sampleRate > input->getSampleRate()) {
Chris@843 255 cerr << "WARNING: plugin reports output sample rate as "
Chris@849 256 << m_descriptors[n]->sampleRate << " (can't display features with finer resolution than the input rate of " << input->getSampleRate() << ")" << endl;
Chris@785 257 }
Chris@785 258 }
Chris@785 259
Chris@849 260 switch (m_descriptors[n]->sampleType) {
Chris@320 261
Chris@320 262 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@849 263 if (m_descriptors[n]->sampleRate != 0.0) {
Chris@849 264 modelResolution = size_t(modelRate / m_descriptors[n]->sampleRate + 0.001);
Chris@320 265 }
Chris@320 266 break;
Chris@320 267
Chris@320 268 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@849 269 modelResolution = m_transforms[n].getStepSize();
Chris@320 270 break;
Chris@320 271
Chris@320 272 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@451 273 //!!! SV doesn't actually support display of models that have
Chris@451 274 //!!! different underlying rates together -- so we always set
Chris@451 275 //!!! the model rate to be the input model's rate, and adjust
Chris@451 276 //!!! the resolution appropriately. We can't properly display
Chris@451 277 //!!! data with a higher resolution than the base model at all
Chris@849 278 // modelRate = size_t(m_descriptors[n]->sampleRate + 0.001);
Chris@849 279 if (m_descriptors[n]->sampleRate > input->getSampleRate()) {
Chris@451 280 modelResolution = 1;
Chris@451 281 } else {
Chris@451 282 modelResolution = size_t(input->getSampleRate() /
Chris@849 283 m_descriptors[n]->sampleRate);
Chris@451 284 }
Chris@320 285 break;
Chris@320 286 }
Chris@320 287
Chris@441 288 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
Chris@441 289
Chris@849 290 Model *out = 0;
Chris@849 291
Chris@441 292 if (binCount == 0 &&
Chris@849 293 (preDurationPlugin || !m_descriptors[n]->hasDuration)) {
Chris@320 294
Chris@445 295 // Anything with no value and no duration is an instant
Chris@445 296
Chris@849 297 out = new SparseOneDimensionalModel(modelRate, modelResolution, false);
Chris@558 298 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@849 299 out->setRDFTypeURI(outputEventTypeURI);
Chris@558 300
Chris@441 301 } else if ((preDurationPlugin && binCount > 1 &&
Chris@849 302 (m_descriptors[n]->sampleType ==
Chris@441 303 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
Chris@849 304 (!preDurationPlugin && m_descriptors[n]->hasDuration)) {
Chris@441 305
Chris@441 306 // For plugins using the old v1 API without explicit duration,
Chris@441 307 // we treat anything that has multiple bins (i.e. that has the
Chris@441 308 // potential to have value and duration) and a variable sample
Chris@441 309 // rate as a note model, taking its values as pitch, duration
Chris@441 310 // and velocity (if present) respectively. This is the same
Chris@441 311 // behaviour as always applied by SV to these plugins in the
Chris@441 312 // past.
Chris@441 313
Chris@441 314 // For plugins with the newer API, we treat anything with
Chris@441 315 // duration as either a note model with pitch and velocity, or
Chris@441 316 // a region model.
Chris@441 317
Chris@441 318 // How do we know whether it's an interval or note model?
Chris@441 319 // What's the essential difference? Is a note model any
Chris@441 320 // interval model using a Hz or "MIDI pitch" scale? There
Chris@441 321 // isn't really a reliable test for "MIDI pitch"... Does a
Chris@441 322 // note model always have velocity? This is a good question
Chris@441 323 // to be addressed by accompanying RDF, but for the moment we
Chris@441 324 // will do the following...
Chris@441 325
Chris@441 326 bool isNoteModel = false;
Chris@441 327
Chris@441 328 // Regions have only value (and duration -- we can't extract a
Chris@441 329 // region model from an old-style plugin that doesn't support
Chris@441 330 // duration)
Chris@441 331 if (binCount > 1) isNoteModel = true;
Chris@441 332
Chris@595 333 // Regions do not have units of Hz or MIDI things (a sweeping
Chris@595 334 // assumption!)
Chris@849 335 if (m_descriptors[n]->unit == "Hz" ||
Chris@849 336 m_descriptors[n]->unit.find("MIDI") != std::string::npos ||
Chris@849 337 m_descriptors[n]->unit.find("midi") != std::string::npos) {
Chris@595 338 isNoteModel = true;
Chris@595 339 }
Chris@441 340
Chris@441 341 // If we had a "sparse 3D model", we would have the additional
Chris@441 342 // problem of determining whether to use that here (if bin
Chris@441 343 // count > 1). But we don't.
Chris@441 344
Chris@859 345 QSettings settings;
Chris@859 346 settings.beginGroup("Transformer");
Chris@859 347 bool flexi = settings.value("use-flexi-note-model", false).toBool();
Chris@859 348 settings.endGroup();
Chris@859 349
Chris@859 350 cerr << "flexi = " << flexi << endl;
Chris@859 351
Chris@859 352 if (isNoteModel && !flexi) {
Chris@441 353
Chris@441 354 NoteModel *model;
Chris@441 355 if (haveExtents) {
Chris@859 356 model = new NoteModel
Chris@859 357 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 358 } else {
Chris@859 359 model = new NoteModel
Chris@859 360 (modelRate, modelResolution, false);
gyorgyf@786 361 }
Chris@849 362 model->setScaleUnits(m_descriptors[n]->unit.c_str());
Chris@849 363 out = model;
gyorgyf@786 364
Chris@859 365 } else if (isNoteModel && flexi) {
gyorgyf@786 366
gyorgyf@786 367 FlexiNoteModel *model;
gyorgyf@786 368 if (haveExtents) {
Chris@859 369 model = new FlexiNoteModel
Chris@859 370 (modelRate, modelResolution, minValue, maxValue, false);
gyorgyf@786 371 } else {
Chris@859 372 model = new FlexiNoteModel
Chris@859 373 (modelRate, modelResolution, false);
Chris@441 374 }
Chris@849 375 model->setScaleUnits(m_descriptors[n]->unit.c_str());
Chris@849 376 out = model;
Chris@441 377
Chris@441 378 } else {
Chris@441 379
Chris@441 380 RegionModel *model;
Chris@441 381 if (haveExtents) {
Chris@441 382 model = new RegionModel
Chris@441 383 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 384 } else {
Chris@441 385 model = new RegionModel
Chris@441 386 (modelRate, modelResolution, false);
Chris@441 387 }
Chris@849 388 model->setScaleUnits(m_descriptors[n]->unit.c_str());
Chris@849 389 out = model;
Chris@441 390 }
Chris@441 391
Chris@558 392 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@849 393 out->setRDFTypeURI(outputEventTypeURI);
Chris@558 394
Chris@875 395 } else if ((binCount == 1 && m_descriptors[n]->hasFixedBinCount) ||
Chris@849 396 (m_descriptors[n]->sampleType ==
Chris@441 397 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
Chris@441 398
Chris@441 399 // Anything that is not a 1D, note, or interval model and that
Chris@441 400 // has only one value per result must be a sparse time value
Chris@441 401 // model.
Chris@441 402
Chris@441 403 // Anything that is not a 1D, note, or interval model and that
Chris@441 404 // has a variable sample rate is also treated as a sparse time
Chris@441 405 // value model regardless of its bin count, because we lack a
Chris@441 406 // sparse 3D model.
Chris@320 407
Chris@320 408 SparseTimeValueModel *model;
Chris@320 409 if (haveExtents) {
Chris@320 410 model = new SparseTimeValueModel
Chris@320 411 (modelRate, modelResolution, minValue, maxValue, false);
Chris@320 412 } else {
Chris@320 413 model = new SparseTimeValueModel
Chris@320 414 (modelRate, modelResolution, false);
Chris@320 415 }
Chris@558 416
Chris@558 417 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@849 418 model->setScaleUnits(outputs[m_outputNos[n]].unit.c_str());
Chris@320 419
Chris@849 420 out = model;
Chris@320 421
Chris@558 422 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@849 423 out->setRDFTypeURI(outputEventTypeURI);
Chris@558 424
Chris@441 425 } else {
Chris@320 426
Chris@441 427 // Anything that is not a 1D, note, or interval model and that
Chris@441 428 // has a fixed sample rate and more than one value per result
Chris@441 429 // must be a dense 3D model.
Chris@320 430
Chris@320 431 EditableDenseThreeDimensionalModel *model =
Chris@320 432 new EditableDenseThreeDimensionalModel
Chris@535 433 (modelRate, modelResolution, binCount,
Chris@535 434 EditableDenseThreeDimensionalModel::BasicMultirateCompression,
Chris@535 435 false);
Chris@320 436
Chris@849 437 if (!m_descriptors[n]->binNames.empty()) {
Chris@320 438 std::vector<QString> names;
Chris@849 439 for (size_t i = 0; i < m_descriptors[n]->binNames.size(); ++i) {
Chris@849 440 names.push_back(m_descriptors[n]->binNames[i].c_str());
Chris@320 441 }
Chris@320 442 model->setBinNames(names);
Chris@320 443 }
Chris@320 444
Chris@849 445 out = model;
Chris@558 446
Chris@558 447 QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
Chris@849 448 out->setRDFTypeURI(outputSignalTypeURI);
Chris@320 449 }
Chris@333 450
Chris@849 451 if (out) {
Chris@849 452 out->setSourceModel(input);
Chris@849 453 m_outputs.push_back(out);
Chris@849 454 }
Chris@320 455 }
Chris@320 456
Chris@331 457 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()
Chris@320 458 {
Chris@690 459 // SVDEBUG << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << endl;
Chris@320 460 delete m_plugin;
Chris@850 461 for (int j = 0; j < m_descriptors.size(); ++j) {
Chris@850 462 delete m_descriptors[j];
Chris@850 463 }
Chris@320 464 }
Chris@320 465
Chris@320 466 DenseTimeValueModel *
Chris@350 467 FeatureExtractionModelTransformer::getConformingInput()
Chris@320 468 {
Chris@690 469 // SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << endl;
Chris@408 470
Chris@320 471 DenseTimeValueModel *dtvm =
Chris@320 472 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@320 473 if (!dtvm) {
Chris@690 474 SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl;
Chris@320 475 }
Chris@320 476 return dtvm;
Chris@320 477 }
Chris@320 478
Chris@320 479 void
Chris@331 480 FeatureExtractionModelTransformer::run()
Chris@320 481 {
Chris@350 482 DenseTimeValueModel *input = getConformingInput();
Chris@320 483 if (!input) return;
Chris@320 484
Chris@849 485 if (m_outputs.empty()) return;
Chris@320 486
Chris@850 487 Transform primaryTransform = m_transforms[0];
Chris@850 488
Chris@497 489 while (!input->isReady() && !m_abandoned) {
Chris@690 490 SVDEBUG << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl;
Chris@497 491 usleep(500000);
Chris@320 492 }
Chris@497 493 if (m_abandoned) return;
Chris@320 494
Chris@350 495 size_t sampleRate = input->getSampleRate();
Chris@320 496
Chris@320 497 size_t channelCount = input->getChannelCount();
Chris@320 498 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 499 channelCount = 1;
Chris@320 500 }
Chris@320 501
Chris@320 502 float **buffers = new float*[channelCount];
Chris@320 503 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@850 504 buffers[ch] = new float[primaryTransform.getBlockSize() + 2];
Chris@320 505 }
Chris@320 506
Chris@850 507 size_t stepSize = primaryTransform.getStepSize();
Chris@850 508 size_t blockSize = primaryTransform.getBlockSize();
Chris@350 509
Chris@320 510 bool frequencyDomain = (m_plugin->getInputDomain() ==
Chris@320 511 Vamp::Plugin::FrequencyDomain);
Chris@320 512 std::vector<FFTModel *> fftModels;
Chris@320 513
Chris@320 514 if (frequencyDomain) {
Chris@320 515 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 516 FFTModel *model = new FFTModel
Chris@350 517 (getConformingInput(),
Chris@350 518 channelCount == 1 ? m_input.getChannel() : ch,
Chris@850 519 primaryTransform.getWindowType(),
Chris@350 520 blockSize,
Chris@350 521 stepSize,
Chris@350 522 blockSize,
Chris@334 523 false,
Chris@334 524 StorageAdviser::PrecisionCritical);
Chris@320 525 if (!model->isOK()) {
Chris@320 526 delete model;
Chris@850 527 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850 528 setCompletion(j, 100);
Chris@850 529 }
Chris@387 530 //!!! need a better way to handle this -- previously we were using a QMessageBox but that isn't an appropriate thing to do here either
Chris@387 531 throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer");
Chris@320 532 }
Chris@320 533 model->resume();
Chris@320 534 fftModels.push_back(model);
Chris@320 535 }
Chris@320 536 }
Chris@320 537
Chris@350 538 long startFrame = m_input.getModel()->getStartFrame();
Chris@350 539 long endFrame = m_input.getModel()->getEndFrame();
Chris@320 540
Chris@850 541 RealTime contextStartRT = primaryTransform.getStartTime();
Chris@850 542 RealTime contextDurationRT = primaryTransform.getDuration();
Chris@350 543
Chris@350 544 long contextStart =
Chris@350 545 RealTime::realTime2Frame(contextStartRT, sampleRate);
Chris@350 546
Chris@350 547 long contextDuration =
Chris@350 548 RealTime::realTime2Frame(contextDurationRT, sampleRate);
Chris@320 549
Chris@320 550 if (contextStart == 0 || contextStart < startFrame) {
Chris@320 551 contextStart = startFrame;
Chris@320 552 }
Chris@320 553
Chris@320 554 if (contextDuration == 0) {
Chris@320 555 contextDuration = endFrame - contextStart;
Chris@320 556 }
Chris@320 557 if (contextStart + contextDuration > endFrame) {
Chris@320 558 contextDuration = endFrame - contextStart;
Chris@320 559 }
Chris@320 560
Chris@320 561 long blockFrame = contextStart;
Chris@320 562
Chris@320 563 long prevCompletion = 0;
Chris@320 564
Chris@850 565 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850 566 setCompletion(j, 0);
Chris@850 567 }
Chris@320 568
Chris@556 569 float *reals = 0;
Chris@556 570 float *imaginaries = 0;
Chris@556 571 if (frequencyDomain) {
Chris@556 572 reals = new float[blockSize/2 + 1];
Chris@556 573 imaginaries = new float[blockSize/2 + 1];
Chris@556 574 }
Chris@556 575
Chris@678 576 QString error = "";
Chris@678 577
Chris@320 578 while (!m_abandoned) {
Chris@320 579
Chris@320 580 if (frequencyDomain) {
Chris@350 581 if (blockFrame - int(blockSize)/2 >
Chris@320 582 contextStart + contextDuration) break;
Chris@320 583 } else {
Chris@320 584 if (blockFrame >=
Chris@320 585 contextStart + contextDuration) break;
Chris@320 586 }
Chris@320 587
Chris@690 588 // SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame "
Chris@320 589 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
Chris@687 590 // << blockSize << endl;
Chris@320 591
Chris@320 592 long completion =
Chris@350 593 (((blockFrame - contextStart) / stepSize) * 99) /
Chris@557 594 (contextDuration / stepSize + 1);
Chris@320 595
Chris@350 596 // channelCount is either m_input.getModel()->channelCount or 1
Chris@320 597
Chris@363 598 if (frequencyDomain) {
Chris@363 599 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 600 int column = (blockFrame - startFrame) / stepSize;
Chris@556 601 fftModels[ch]->getValuesAt(column, reals, imaginaries);
Chris@350 602 for (size_t i = 0; i <= blockSize/2; ++i) {
Chris@556 603 buffers[ch][i*2] = reals[i];
Chris@556 604 buffers[ch][i*2+1] = imaginaries[i];
Chris@320 605 }
Chris@678 606 error = fftModels[ch]->getError();
Chris@678 607 if (error != "") {
Chris@843 608 cerr << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl;
Chris@678 609 m_abandoned = true;
Chris@678 610 m_message = error;
Chris@678 611 }
Chris@363 612 }
Chris@363 613 } else {
Chris@363 614 getFrames(channelCount, blockFrame, blockSize, buffers);
Chris@320 615 }
Chris@320 616
Chris@497 617 if (m_abandoned) break;
Chris@497 618
Chris@320 619 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@320 620 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@320 621
Chris@497 622 if (m_abandoned) break;
Chris@497 623
Chris@850 624 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850 625 for (size_t fi = 0; fi < features[m_outputNos[j]].size(); ++fi) {
Chris@850 626 Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi];
Chris@850 627 addFeature(j, blockFrame, feature);
Chris@850 628 }
Chris@850 629 }
Chris@320 630
Chris@320 631 if (blockFrame == contextStart || completion > prevCompletion) {
Chris@850 632 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850 633 setCompletion(j, completion);
Chris@850 634 }
Chris@320 635 prevCompletion = completion;
Chris@320 636 }
Chris@320 637
Chris@350 638 blockFrame += stepSize;
Chris@320 639 }
Chris@320 640
Chris@497 641 if (!m_abandoned) {
Chris@497 642 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@320 643
Chris@850 644 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850 645 for (size_t fi = 0; fi < features[m_outputNos[j]].size(); ++fi) {
Chris@850 646 Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi];
Chris@850 647 addFeature(j, blockFrame, feature);
Chris@850 648 }
Chris@497 649 }
Chris@497 650 }
Chris@320 651
Chris@850 652 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850 653 setCompletion(j, 100);
Chris@850 654 }
Chris@320 655
Chris@320 656 if (frequencyDomain) {
Chris@320 657 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 658 delete fftModels[ch];
Chris@320 659 }
Chris@556 660 delete[] reals;
Chris@556 661 delete[] imaginaries;
Chris@320 662 }
Chris@320 663 }
Chris@320 664
Chris@320 665 void
Chris@363 666 FeatureExtractionModelTransformer::getFrames(int channelCount,
Chris@363 667 long startFrame, long size,
Chris@363 668 float **buffers)
Chris@320 669 {
Chris@320 670 long offset = 0;
Chris@320 671
Chris@320 672 if (startFrame < 0) {
Chris@363 673 for (int c = 0; c < channelCount; ++c) {
Chris@363 674 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@363 675 buffers[c][i] = 0.0f;
Chris@363 676 }
Chris@320 677 }
Chris@320 678 offset = -startFrame;
Chris@320 679 size -= offset;
Chris@320 680 if (size <= 0) return;
Chris@320 681 startFrame = 0;
Chris@320 682 }
Chris@320 683
Chris@350 684 DenseTimeValueModel *input = getConformingInput();
Chris@350 685 if (!input) return;
Chris@363 686
Chris@363 687 long got = 0;
Chris@350 688
Chris@363 689 if (channelCount == 1) {
Chris@363 690
Chris@363 691 got = input->getData(m_input.getChannel(), startFrame, size,
Chris@363 692 buffers[0] + offset);
Chris@363 693
Chris@363 694 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
Chris@363 695 // use mean instead of sum, as plugin input
Chris@363 696 float cc = float(input->getChannelCount());
Chris@363 697 for (long i = 0; i < size; ++i) {
Chris@363 698 buffers[0][i + offset] /= cc;
Chris@363 699 }
Chris@363 700 }
Chris@363 701
Chris@363 702 } else {
Chris@363 703
Chris@363 704 float **writebuf = buffers;
Chris@363 705 if (offset > 0) {
Chris@363 706 writebuf = new float *[channelCount];
Chris@363 707 for (int i = 0; i < channelCount; ++i) {
Chris@363 708 writebuf[i] = buffers[i] + offset;
Chris@363 709 }
Chris@363 710 }
Chris@363 711
Chris@363 712 got = input->getData(0, channelCount-1, startFrame, size, writebuf);
Chris@363 713
Chris@363 714 if (writebuf != buffers) delete[] writebuf;
Chris@363 715 }
Chris@320 716
Chris@320 717 while (got < size) {
Chris@363 718 for (int c = 0; c < channelCount; ++c) {
Chris@363 719 buffers[c][got + offset] = 0.0;
Chris@363 720 }
Chris@320 721 ++got;
Chris@320 722 }
Chris@320 723 }
Chris@320 724
Chris@320 725 void
Chris@850 726 FeatureExtractionModelTransformer::addFeature(int n,
Chris@850 727 size_t blockFrame,
Chris@850 728 const Vamp::Plugin::Feature &feature)
Chris@320 729 {
Chris@350 730 size_t inputRate = m_input.getModel()->getSampleRate();
Chris@320 731
Chris@843 732 // cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = "
Chris@712 733 // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp
Chris@712 734 // << ", timestamp = " << feature.timestamp << ", hasDuration = "
Chris@712 735 // << feature.hasDuration << ", duration = " << feature.duration
Chris@843 736 // << endl;
Chris@320 737
Chris@320 738 int binCount = 1;
Chris@849 739 if (m_descriptors[n]->hasFixedBinCount) {
Chris@849 740 binCount = m_descriptors[n]->binCount;
Chris@320 741 }
Chris@320 742
Chris@320 743 size_t frame = blockFrame;
Chris@320 744
Chris@849 745 if (m_descriptors[n]->sampleType ==
Chris@320 746 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@320 747
Chris@320 748 if (!feature.hasTimestamp) {
Chris@843 749 cerr
Chris@331 750 << "WARNING: FeatureExtractionModelTransformer::addFeature: "
Chris@320 751 << "Feature has variable sample rate but no timestamp!"
Chris@843 752 << endl;
Chris@320 753 return;
Chris@320 754 } else {
Chris@320 755 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@320 756 }
Chris@320 757
Chris@849 758 } else if (m_descriptors[n]->sampleType ==
Chris@320 759 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@320 760
Chris@779 761 if (!feature.hasTimestamp) {
Chris@849 762 ++m_fixedRateFeatureNos[n];
Chris@779 763 } else {
Chris@779 764 RealTime ts(feature.timestamp.sec, feature.timestamp.nsec);
Chris@849 765 m_fixedRateFeatureNos[n] =
Chris@849 766 lrint(ts.toDouble() * m_descriptors[n]->sampleRate);
Chris@779 767 }
Chris@779 768
Chris@849 769 frame = lrintf((m_fixedRateFeatureNos[n] / m_descriptors[n]->sampleRate)
Chris@779 770 * inputRate);
Chris@320 771 }
Chris@320 772
Chris@441 773 // Rather than repeat the complicated tests from the constructor
Chris@441 774 // to determine what sort of model we must be adding the features
Chris@441 775 // to, we instead test what sort of model the constructor decided
Chris@441 776 // to create.
Chris@320 777
Chris@849 778 if (isOutput<SparseOneDimensionalModel>(n)) {
Chris@441 779
Chris@441 780 SparseOneDimensionalModel *model =
Chris@849 781 getConformingOutput<SparseOneDimensionalModel>(n);
Chris@320 782 if (!model) return;
Chris@350 783
Chris@441 784 model->addPoint(SparseOneDimensionalModel::Point
Chris@441 785 (frame, feature.label.c_str()));
Chris@320 786
Chris@849 787 } else if (isOutput<SparseTimeValueModel>(n)) {
Chris@320 788
Chris@350 789 SparseTimeValueModel *model =
Chris@849 790 getConformingOutput<SparseTimeValueModel>(n);
Chris@320 791 if (!model) return;
Chris@350 792
Chris@454 793 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 794
Chris@454 795 float value = feature.values[i];
Chris@454 796
Chris@454 797 QString label = feature.label.c_str();
Chris@454 798 if (feature.values.size() > 1) {
Chris@454 799 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 800 }
Chris@454 801
Chris@454 802 model->addPoint(SparseTimeValueModel::Point(frame, value, label));
Chris@454 803 }
Chris@320 804
Chris@849 805 } else if (isOutput<FlexiNoteModel>(n) || isOutput<NoteModel>(n) || isOutput<RegionModel>(n)) { //GF: Added Note Model
Chris@320 806
Chris@441 807 int index = 0;
Chris@441 808
Chris@441 809 float value = 0.0;
Chris@441 810 if (feature.values.size() > index) {
Chris@441 811 value = feature.values[index++];
Chris@441 812 }
Chris@320 813
Chris@320 814 float duration = 1;
Chris@441 815 if (feature.hasDuration) {
Chris@441 816 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
Chris@441 817 } else {
Chris@441 818 if (feature.values.size() > index) {
Chris@441 819 duration = feature.values[index++];
Chris@441 820 }
Chris@441 821 }
gyorgyf@786 822
Chris@849 823 if (isOutput<FlexiNoteModel>(n)) { // GF: added for flexi note model
gyorgyf@786 824
gyorgyf@786 825 float velocity = 100;
gyorgyf@786 826 if (feature.values.size() > index) {
gyorgyf@786 827 velocity = feature.values[index++];
gyorgyf@786 828 }
gyorgyf@786 829 if (velocity < 0) velocity = 127;
gyorgyf@786 830 if (velocity > 127) velocity = 127;
gyorgyf@786 831
Chris@849 832 FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n);
gyorgyf@786 833 if (!model) return;
gyorgyf@786 834 model->addPoint(FlexiNoteModel::Point(frame, value, // value is pitch
gyorgyf@786 835 lrintf(duration),
gyorgyf@786 836 velocity / 127.f,
gyorgyf@786 837 feature.label.c_str()));
gyorgyf@786 838 // GF: end -- added for flexi note model
Chris@849 839 } else if (isOutput<NoteModel>(n)) {
Chris@320 840
Chris@441 841 float velocity = 100;
Chris@441 842 if (feature.values.size() > index) {
Chris@441 843 velocity = feature.values[index++];
Chris@441 844 }
Chris@441 845 if (velocity < 0) velocity = 127;
Chris@441 846 if (velocity > 127) velocity = 127;
Chris@320 847
Chris@849 848 NoteModel *model = getConformingOutput<NoteModel>(n);
Chris@441 849 if (!model) return;
Chris@441 850 model->addPoint(NoteModel::Point(frame, value, // value is pitch
Chris@441 851 lrintf(duration),
Chris@441 852 velocity / 127.f,
Chris@441 853 feature.label.c_str()));
Chris@441 854 } else {
gyorgyf@786 855
Chris@849 856 RegionModel *model = getConformingOutput<RegionModel>(n);
Chris@454 857 if (!model) return;
Chris@454 858
Chris@474 859 if (feature.hasDuration && !feature.values.empty()) {
Chris@454 860
Chris@454 861 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 862
Chris@454 863 float value = feature.values[i];
Chris@454 864
Chris@454 865 QString label = feature.label.c_str();
Chris@454 866 if (feature.values.size() > 1) {
Chris@454 867 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 868 }
Chris@454 869
Chris@454 870 model->addPoint(RegionModel::Point(frame, value,
Chris@454 871 lrintf(duration),
Chris@454 872 label));
Chris@454 873 }
Chris@454 874 } else {
Chris@454 875
Chris@441 876 model->addPoint(RegionModel::Point(frame, value,
Chris@441 877 lrintf(duration),
Chris@441 878 feature.label.c_str()));
Chris@454 879 }
Chris@441 880 }
Chris@320 881
Chris@849 882 } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
Chris@320 883
Chris@533 884 DenseThreeDimensionalModel::Column values =
Chris@533 885 DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
Chris@320 886
Chris@320 887 EditableDenseThreeDimensionalModel *model =
Chris@849 888 getConformingOutput<EditableDenseThreeDimensionalModel>(n);
Chris@320 889 if (!model) return;
Chris@320 890
Chris@320 891 model->setColumn(frame / model->getResolution(), values);
Chris@441 892
Chris@441 893 } else {
Chris@690 894 SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl;
Chris@320 895 }
Chris@320 896 }
Chris@320 897
Chris@320 898 void
Chris@850 899 FeatureExtractionModelTransformer::setCompletion(int n, int completion)
Chris@320 900 {
Chris@320 901 int binCount = 1;
Chris@849 902 if (m_descriptors[n]->hasFixedBinCount) {
Chris@849 903 binCount = m_descriptors[n]->binCount;
Chris@320 904 }
Chris@320 905
Chris@690 906 // SVDEBUG << "FeatureExtractionModelTransformer::setCompletion("
Chris@687 907 // << completion << ")" << endl;
Chris@320 908
Chris@849 909 if (isOutput<SparseOneDimensionalModel>(n)) {
Chris@320 910
Chris@350 911 SparseOneDimensionalModel *model =
Chris@849 912 getConformingOutput<SparseOneDimensionalModel>(n);
Chris@320 913 if (!model) return;
Chris@441 914 model->setCompletion(completion, true);
Chris@320 915
Chris@849 916 } else if (isOutput<SparseTimeValueModel>(n)) {
Chris@320 917
Chris@350 918 SparseTimeValueModel *model =
Chris@849 919 getConformingOutput<SparseTimeValueModel>(n);
Chris@320 920 if (!model) return;
Chris@441 921 model->setCompletion(completion, true);
Chris@320 922
Chris@849 923 } else if (isOutput<NoteModel>(n)) {
Chris@320 924
Chris@849 925 NoteModel *model = getConformingOutput<NoteModel>(n);
Chris@320 926 if (!model) return;
Chris@441 927 model->setCompletion(completion, true);
gyorgyf@786 928
Chris@849 929 } else if (isOutput<FlexiNoteModel>(n)) {
gyorgyf@786 930
Chris@849 931 FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n);
gyorgyf@786 932 if (!model) return;
gyorgyf@786 933 model->setCompletion(completion, true);
Chris@320 934
Chris@849 935 } else if (isOutput<RegionModel>(n)) {
Chris@441 936
Chris@849 937 RegionModel *model = getConformingOutput<RegionModel>(n);
Chris@441 938 if (!model) return;
Chris@441 939 model->setCompletion(completion, true);
Chris@441 940
Chris@849 941 } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
Chris@320 942
Chris@320 943 EditableDenseThreeDimensionalModel *model =
Chris@849 944 getConformingOutput<EditableDenseThreeDimensionalModel>(n);
Chris@320 945 if (!model) return;
Chris@350 946 model->setCompletion(completion, true); //!!!m_context.updates);
Chris@320 947 }
Chris@320 948 }
Chris@320 949