annotate transform/FeatureExtractionModelTransformer.cpp @ 823:f0558e69a074

Rename Resampling- to DecodingWavFileReader, and use it whenever we have an audio file that is not quickly seekable using libsndfile. Avoids very slow performance when analysing ogg files.
author Chris Cannam
date Wed, 17 Jul 2013 15:40:01 +0100
parents 98cf98a217d2
children 54829c1e155e e802e550a1f2
rev   line source
Chris@320 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@320 2
Chris@320 3 /*
Chris@320 4 Sonic Visualiser
Chris@320 5 An audio file viewer and annotation editor.
Chris@320 6 Centre for Digital Music, Queen Mary, University of London.
Chris@320 7 This file copyright 2006 Chris Cannam and QMUL.
Chris@320 8
Chris@320 9 This program is free software; you can redistribute it and/or
Chris@320 10 modify it under the terms of the GNU General Public License as
Chris@320 11 published by the Free Software Foundation; either version 2 of the
Chris@320 12 License, or (at your option) any later version. See the file
Chris@320 13 COPYING included with this distribution for more information.
Chris@320 14 */
Chris@320 15
Chris@331 16 #include "FeatureExtractionModelTransformer.h"
Chris@320 17
Chris@320 18 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@320 19 #include "plugin/PluginXml.h"
Chris@475 20 #include <vamp-hostsdk/Plugin.h>
Chris@320 21
Chris@320 22 #include "data/model/Model.h"
Chris@320 23 #include "base/Window.h"
Chris@387 24 #include "base/Exceptions.h"
Chris@320 25 #include "data/model/SparseOneDimensionalModel.h"
Chris@320 26 #include "data/model/SparseTimeValueModel.h"
Chris@320 27 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@320 28 #include "data/model/DenseTimeValueModel.h"
Chris@320 29 #include "data/model/NoteModel.h"
Chris@441 30 #include "data/model/RegionModel.h"
Chris@320 31 #include "data/model/FFTModel.h"
Chris@320 32 #include "data/model/WaveFileModel.h"
Chris@558 33 #include "rdf/PluginRDFDescription.h"
Chris@320 34
Chris@350 35 #include "TransformFactory.h"
Chris@350 36
Chris@320 37 #include <iostream>
Chris@320 38
Chris@350 39 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@350 40 const Transform &transform) :
Chris@350 41 ModelTransformer(in, transform),
Chris@320 42 m_plugin(0),
Chris@320 43 m_descriptor(0),
Chris@779 44 m_outputNo(0),
Chris@779 45 m_fixedRateFeatureNo(-1) // we increment before use
Chris@320 46 {
Chris@690 47 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
Chris@350 48
Chris@350 49 QString pluginId = transform.getPluginIdentifier();
Chris@320 50
Chris@320 51 FeatureExtractionPluginFactory *factory =
Chris@320 52 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@320 53
Chris@320 54 if (!factory) {
Chris@361 55 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
Chris@320 56 return;
Chris@320 57 }
Chris@320 58
Chris@350 59 DenseTimeValueModel *input = getConformingInput();
Chris@350 60 if (!input) {
Chris@361 61 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
Chris@350 62 return;
Chris@350 63 }
Chris@320 64
Chris@350 65 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
Chris@320 66 if (!m_plugin) {
Chris@361 67 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
Chris@320 68 return;
Chris@320 69 }
Chris@320 70
Chris@350 71 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@350 72 (m_transform, m_plugin);
Chris@343 73
Chris@350 74 TransformFactory::getInstance()->setPluginParameters
Chris@350 75 (m_transform, m_plugin);
Chris@320 76
Chris@320 77 size_t channelCount = input->getChannelCount();
Chris@320 78 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 79 channelCount = 1;
Chris@320 80 }
Chris@320 81 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@361 82 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
Chris@361 83 .arg(pluginId)
Chris@361 84 .arg(m_plugin->getMinChannelCount())
Chris@361 85 .arg(m_plugin->getMaxChannelCount())
Chris@361 86 .arg(input->getChannelCount());
Chris@320 87 return;
Chris@320 88 }
Chris@320 89
Chris@690 90 SVDEBUG << "Initialising feature extraction plugin with channels = "
Chris@350 91 << channelCount << ", step = " << m_transform.getStepSize()
Chris@687 92 << ", block = " << m_transform.getBlockSize() << endl;
Chris@320 93
Chris@320 94 if (!m_plugin->initialise(channelCount,
Chris@350 95 m_transform.getStepSize(),
Chris@350 96 m_transform.getBlockSize())) {
Chris@361 97
Chris@361 98 size_t pstep = m_transform.getStepSize();
Chris@361 99 size_t pblock = m_transform.getBlockSize();
Chris@361 100
Chris@361 101 m_transform.setStepSize(0);
Chris@361 102 m_transform.setBlockSize(0);
Chris@361 103 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@361 104 (m_transform, m_plugin);
Chris@361 105
Chris@361 106 if (m_transform.getStepSize() != pstep ||
Chris@361 107 m_transform.getBlockSize() != pblock) {
Chris@361 108
Chris@361 109 if (!m_plugin->initialise(channelCount,
Chris@361 110 m_transform.getStepSize(),
Chris@361 111 m_transform.getBlockSize())) {
Chris@361 112
Chris@361 113 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@361 114 return;
Chris@361 115
Chris@361 116 } else {
Chris@361 117
Chris@361 118 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
Chris@361 119 .arg(pluginId)
Chris@361 120 .arg(pstep)
Chris@361 121 .arg(pblock)
Chris@361 122 .arg(m_transform.getStepSize())
Chris@361 123 .arg(m_transform.getBlockSize());
Chris@361 124 }
Chris@361 125
Chris@361 126 } else {
Chris@361 127
Chris@361 128 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@361 129 return;
Chris@361 130 }
Chris@320 131 }
Chris@320 132
Chris@366 133 if (m_transform.getPluginVersion() != "") {
Chris@366 134 QString pv = QString("%1").arg(m_plugin->getPluginVersion());
Chris@366 135 if (pv != m_transform.getPluginVersion()) {
Chris@366 136 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
Chris@366 137 .arg(m_transform.getPluginVersion())
Chris@366 138 .arg(pluginId)
Chris@366 139 .arg(pv);
Chris@366 140 if (m_message != "") {
Chris@366 141 m_message = QString("%1; %2").arg(vm).arg(m_message);
Chris@366 142 } else {
Chris@366 143 m_message = vm;
Chris@366 144 }
Chris@366 145 }
Chris@366 146 }
Chris@366 147
Chris@320 148 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320 149
Chris@320 150 if (outputs.empty()) {
Chris@361 151 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
Chris@320 152 return;
Chris@320 153 }
Chris@320 154
Chris@320 155 for (size_t i = 0; i < outputs.size(); ++i) {
Chris@690 156 // SVDEBUG << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput() << "\"" << endl;
Chris@350 157 if (m_transform.getOutput() == "" ||
Chris@350 158 outputs[i].identifier == m_transform.getOutput().toStdString()) {
Chris@778 159 m_outputNo = i;
Chris@441 160 m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]);
Chris@320 161 break;
Chris@320 162 }
Chris@320 163 }
Chris@320 164
Chris@320 165 if (!m_descriptor) {
Chris@361 166 m_message = tr("Plugin \"%1\" has no output named \"%2\"")
Chris@361 167 .arg(pluginId)
Chris@361 168 .arg(m_transform.getOutput());
Chris@320 169 return;
Chris@320 170 }
Chris@320 171
Chris@558 172 createOutputModel();
Chris@558 173 }
Chris@558 174
Chris@558 175 void
Chris@558 176 FeatureExtractionModelTransformer::createOutputModel()
Chris@558 177 {
Chris@558 178 DenseTimeValueModel *input = getConformingInput();
Chris@558 179
Chris@712 180 // std::cerr << "FeatureExtractionModelTransformer::createOutputModel: sample type " << m_descriptor->sampleType << ", rate " << m_descriptor->sampleRate << std::endl;
Chris@712 181
Chris@558 182 PluginRDFDescription description(m_transform.getPluginIdentifier());
Chris@558 183 QString outputId = m_transform.getOutput();
Chris@558 184
Chris@320 185 int binCount = 1;
Chris@320 186 float minValue = 0.0, maxValue = 0.0;
Chris@320 187 bool haveExtents = false;
Chris@320 188
Chris@320 189 if (m_descriptor->hasFixedBinCount) {
Chris@320 190 binCount = m_descriptor->binCount;
Chris@320 191 }
Chris@320 192
Chris@331 193 // std::cerr << "FeatureExtractionModelTransformer: output bin count "
Chris@320 194 // << binCount << std::endl;
Chris@320 195
Chris@320 196 if (binCount > 0 && m_descriptor->hasKnownExtents) {
Chris@320 197 minValue = m_descriptor->minValue;
Chris@320 198 maxValue = m_descriptor->maxValue;
Chris@320 199 haveExtents = true;
Chris@320 200 }
Chris@320 201
Chris@350 202 size_t modelRate = input->getSampleRate();
Chris@320 203 size_t modelResolution = 1;
Chris@712 204
Chris@785 205 if (m_descriptor->sampleType !=
Chris@785 206 Vamp::Plugin::OutputDescriptor::OneSamplePerStep) {
Chris@785 207 if (m_descriptor->sampleRate > input->getSampleRate()) {
Chris@785 208 std::cerr << "WARNING: plugin reports output sample rate as "
Chris@785 209 << m_descriptor->sampleRate << " (can't display features with finer resolution than the input rate of " << input->getSampleRate() << ")" << std::endl;
Chris@785 210 }
Chris@785 211 }
Chris@785 212
Chris@320 213 switch (m_descriptor->sampleType) {
Chris@320 214
Chris@320 215 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@320 216 if (m_descriptor->sampleRate != 0.0) {
Chris@320 217 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
Chris@320 218 }
Chris@320 219 break;
Chris@320 220
Chris@320 221 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@350 222 modelResolution = m_transform.getStepSize();
Chris@320 223 break;
Chris@320 224
Chris@320 225 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@451 226 //!!! SV doesn't actually support display of models that have
Chris@451 227 //!!! different underlying rates together -- so we always set
Chris@451 228 //!!! the model rate to be the input model's rate, and adjust
Chris@451 229 //!!! the resolution appropriately. We can't properly display
Chris@451 230 //!!! data with a higher resolution than the base model at all
Chris@451 231 // modelRate = size_t(m_descriptor->sampleRate + 0.001);
Chris@451 232 if (m_descriptor->sampleRate > input->getSampleRate()) {
Chris@451 233 modelResolution = 1;
Chris@451 234 } else {
Chris@451 235 modelResolution = size_t(input->getSampleRate() /
Chris@451 236 m_descriptor->sampleRate);
Chris@451 237 }
Chris@320 238 break;
Chris@320 239 }
Chris@320 240
Chris@441 241 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
Chris@441 242
Chris@441 243 if (binCount == 0 &&
Chris@441 244 (preDurationPlugin || !m_descriptor->hasDuration)) {
Chris@320 245
Chris@445 246 // Anything with no value and no duration is an instant
Chris@445 247
Chris@320 248 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
Chris@320 249 false);
Chris@320 250
Chris@558 251 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 252 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 253
Chris@441 254 } else if ((preDurationPlugin && binCount > 1 &&
Chris@441 255 (m_descriptor->sampleType ==
Chris@441 256 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
Chris@441 257 (!preDurationPlugin && m_descriptor->hasDuration)) {
Chris@441 258
Chris@441 259 // For plugins using the old v1 API without explicit duration,
Chris@441 260 // we treat anything that has multiple bins (i.e. that has the
Chris@441 261 // potential to have value and duration) and a variable sample
Chris@441 262 // rate as a note model, taking its values as pitch, duration
Chris@441 263 // and velocity (if present) respectively. This is the same
Chris@441 264 // behaviour as always applied by SV to these plugins in the
Chris@441 265 // past.
Chris@441 266
Chris@441 267 // For plugins with the newer API, we treat anything with
Chris@441 268 // duration as either a note model with pitch and velocity, or
Chris@441 269 // a region model.
Chris@441 270
Chris@441 271 // How do we know whether it's an interval or note model?
Chris@441 272 // What's the essential difference? Is a note model any
Chris@441 273 // interval model using a Hz or "MIDI pitch" scale? There
Chris@441 274 // isn't really a reliable test for "MIDI pitch"... Does a
Chris@441 275 // note model always have velocity? This is a good question
Chris@441 276 // to be addressed by accompanying RDF, but for the moment we
Chris@441 277 // will do the following...
Chris@441 278
Chris@441 279 bool isNoteModel = false;
Chris@441 280
Chris@441 281 // Regions have only value (and duration -- we can't extract a
Chris@441 282 // region model from an old-style plugin that doesn't support
Chris@441 283 // duration)
Chris@441 284 if (binCount > 1) isNoteModel = true;
Chris@441 285
Chris@595 286 // Regions do not have units of Hz or MIDI things (a sweeping
Chris@595 287 // assumption!)
Chris@595 288 if (m_descriptor->unit == "Hz" ||
Chris@595 289 m_descriptor->unit.find("MIDI") != std::string::npos ||
Chris@595 290 m_descriptor->unit.find("midi") != std::string::npos) {
Chris@595 291 isNoteModel = true;
Chris@595 292 }
Chris@441 293
Chris@441 294 // If we had a "sparse 3D model", we would have the additional
Chris@441 295 // problem of determining whether to use that here (if bin
Chris@441 296 // count > 1). But we don't.
Chris@441 297
Chris@441 298 if (isNoteModel) {
Chris@441 299
Chris@441 300 NoteModel *model;
Chris@441 301 if (haveExtents) {
Chris@441 302 model = new NoteModel
Chris@441 303 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 304 } else {
Chris@441 305 model = new NoteModel
Chris@441 306 (modelRate, modelResolution, false);
Chris@441 307 }
Chris@441 308 model->setScaleUnits(m_descriptor->unit.c_str());
Chris@441 309 m_output = model;
Chris@441 310
Chris@441 311 } else {
Chris@441 312
Chris@441 313 RegionModel *model;
Chris@441 314 if (haveExtents) {
Chris@441 315 model = new RegionModel
Chris@441 316 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 317 } else {
Chris@441 318 model = new RegionModel
Chris@441 319 (modelRate, modelResolution, false);
Chris@441 320 }
Chris@441 321 model->setScaleUnits(m_descriptor->unit.c_str());
Chris@441 322 m_output = model;
Chris@441 323 }
Chris@441 324
Chris@558 325 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 326 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 327
Chris@441 328 } else if (binCount == 1 ||
Chris@441 329 (m_descriptor->sampleType ==
Chris@441 330 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
Chris@441 331
Chris@441 332 // Anything that is not a 1D, note, or interval model and that
Chris@441 333 // has only one value per result must be a sparse time value
Chris@441 334 // model.
Chris@441 335
Chris@441 336 // Anything that is not a 1D, note, or interval model and that
Chris@441 337 // has a variable sample rate is also treated as a sparse time
Chris@441 338 // value model regardless of its bin count, because we lack a
Chris@441 339 // sparse 3D model.
Chris@320 340
Chris@320 341 SparseTimeValueModel *model;
Chris@320 342 if (haveExtents) {
Chris@320 343 model = new SparseTimeValueModel
Chris@320 344 (modelRate, modelResolution, minValue, maxValue, false);
Chris@320 345 } else {
Chris@320 346 model = new SparseTimeValueModel
Chris@320 347 (modelRate, modelResolution, false);
Chris@320 348 }
Chris@558 349
Chris@558 350 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@778 351 model->setScaleUnits(outputs[m_outputNo].unit.c_str());
Chris@320 352
Chris@320 353 m_output = model;
Chris@320 354
Chris@558 355 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 356 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 357
Chris@441 358 } else {
Chris@320 359
Chris@441 360 // Anything that is not a 1D, note, or interval model and that
Chris@441 361 // has a fixed sample rate and more than one value per result
Chris@441 362 // must be a dense 3D model.
Chris@320 363
Chris@320 364 EditableDenseThreeDimensionalModel *model =
Chris@320 365 new EditableDenseThreeDimensionalModel
Chris@535 366 (modelRate, modelResolution, binCount,
Chris@535 367 EditableDenseThreeDimensionalModel::BasicMultirateCompression,
Chris@535 368 false);
Chris@320 369
Chris@320 370 if (!m_descriptor->binNames.empty()) {
Chris@320 371 std::vector<QString> names;
Chris@320 372 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
Chris@320 373 names.push_back(m_descriptor->binNames[i].c_str());
Chris@320 374 }
Chris@320 375 model->setBinNames(names);
Chris@320 376 }
Chris@320 377
Chris@320 378 m_output = model;
Chris@558 379
Chris@558 380 QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
Chris@558 381 m_output->setRDFTypeURI(outputSignalTypeURI);
Chris@320 382 }
Chris@333 383
Chris@350 384 if (m_output) m_output->setSourceModel(input);
Chris@320 385 }
Chris@320 386
Chris@331 387 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()
Chris@320 388 {
Chris@690 389 // SVDEBUG << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << endl;
Chris@320 390 delete m_plugin;
Chris@320 391 delete m_descriptor;
Chris@320 392 }
Chris@320 393
Chris@320 394 DenseTimeValueModel *
Chris@350 395 FeatureExtractionModelTransformer::getConformingInput()
Chris@320 396 {
Chris@690 397 // SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << endl;
Chris@408 398
Chris@320 399 DenseTimeValueModel *dtvm =
Chris@320 400 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@320 401 if (!dtvm) {
Chris@690 402 SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl;
Chris@320 403 }
Chris@320 404 return dtvm;
Chris@320 405 }
Chris@320 406
Chris@320 407 void
Chris@331 408 FeatureExtractionModelTransformer::run()
Chris@320 409 {
Chris@350 410 DenseTimeValueModel *input = getConformingInput();
Chris@320 411 if (!input) return;
Chris@320 412
Chris@320 413 if (!m_output) return;
Chris@320 414
Chris@497 415 while (!input->isReady() && !m_abandoned) {
Chris@690 416 SVDEBUG << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl;
Chris@497 417 usleep(500000);
Chris@320 418 }
Chris@497 419 if (m_abandoned) return;
Chris@320 420
Chris@350 421 size_t sampleRate = input->getSampleRate();
Chris@320 422
Chris@320 423 size_t channelCount = input->getChannelCount();
Chris@320 424 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 425 channelCount = 1;
Chris@320 426 }
Chris@320 427
Chris@320 428 float **buffers = new float*[channelCount];
Chris@320 429 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 430 buffers[ch] = new float[m_transform.getBlockSize() + 2];
Chris@320 431 }
Chris@320 432
Chris@350 433 size_t stepSize = m_transform.getStepSize();
Chris@350 434 size_t blockSize = m_transform.getBlockSize();
Chris@350 435
Chris@320 436 bool frequencyDomain = (m_plugin->getInputDomain() ==
Chris@320 437 Vamp::Plugin::FrequencyDomain);
Chris@320 438 std::vector<FFTModel *> fftModels;
Chris@320 439
Chris@320 440 if (frequencyDomain) {
Chris@320 441 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 442 FFTModel *model = new FFTModel
Chris@350 443 (getConformingInput(),
Chris@350 444 channelCount == 1 ? m_input.getChannel() : ch,
Chris@350 445 m_transform.getWindowType(),
Chris@350 446 blockSize,
Chris@350 447 stepSize,
Chris@350 448 blockSize,
Chris@334 449 false,
Chris@334 450 StorageAdviser::PrecisionCritical);
Chris@320 451 if (!model->isOK()) {
Chris@320 452 delete model;
Chris@320 453 setCompletion(100);
Chris@387 454 //!!! need a better way to handle this -- previously we were using a QMessageBox but that isn't an appropriate thing to do here either
Chris@387 455 throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer");
Chris@320 456 }
Chris@320 457 model->resume();
Chris@320 458 fftModels.push_back(model);
Chris@320 459 }
Chris@320 460 }
Chris@320 461
Chris@350 462 long startFrame = m_input.getModel()->getStartFrame();
Chris@350 463 long endFrame = m_input.getModel()->getEndFrame();
Chris@320 464
Chris@350 465 RealTime contextStartRT = m_transform.getStartTime();
Chris@350 466 RealTime contextDurationRT = m_transform.getDuration();
Chris@350 467
Chris@350 468 long contextStart =
Chris@350 469 RealTime::realTime2Frame(contextStartRT, sampleRate);
Chris@350 470
Chris@350 471 long contextDuration =
Chris@350 472 RealTime::realTime2Frame(contextDurationRT, sampleRate);
Chris@320 473
Chris@320 474 if (contextStart == 0 || contextStart < startFrame) {
Chris@320 475 contextStart = startFrame;
Chris@320 476 }
Chris@320 477
Chris@320 478 if (contextDuration == 0) {
Chris@320 479 contextDuration = endFrame - contextStart;
Chris@320 480 }
Chris@320 481 if (contextStart + contextDuration > endFrame) {
Chris@320 482 contextDuration = endFrame - contextStart;
Chris@320 483 }
Chris@320 484
Chris@320 485 long blockFrame = contextStart;
Chris@320 486
Chris@320 487 long prevCompletion = 0;
Chris@320 488
Chris@320 489 setCompletion(0);
Chris@320 490
Chris@556 491 float *reals = 0;
Chris@556 492 float *imaginaries = 0;
Chris@556 493 if (frequencyDomain) {
Chris@556 494 reals = new float[blockSize/2 + 1];
Chris@556 495 imaginaries = new float[blockSize/2 + 1];
Chris@556 496 }
Chris@556 497
Chris@678 498 QString error = "";
Chris@678 499
Chris@320 500 while (!m_abandoned) {
Chris@320 501
Chris@320 502 if (frequencyDomain) {
Chris@350 503 if (blockFrame - int(blockSize)/2 >
Chris@320 504 contextStart + contextDuration) break;
Chris@320 505 } else {
Chris@320 506 if (blockFrame >=
Chris@320 507 contextStart + contextDuration) break;
Chris@320 508 }
Chris@320 509
Chris@690 510 // SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame "
Chris@320 511 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
Chris@687 512 // << blockSize << endl;
Chris@320 513
Chris@320 514 long completion =
Chris@350 515 (((blockFrame - contextStart) / stepSize) * 99) /
Chris@557 516 (contextDuration / stepSize + 1);
Chris@320 517
Chris@350 518 // channelCount is either m_input.getModel()->channelCount or 1
Chris@320 519
Chris@363 520 if (frequencyDomain) {
Chris@363 521 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 522 int column = (blockFrame - startFrame) / stepSize;
Chris@556 523 fftModels[ch]->getValuesAt(column, reals, imaginaries);
Chris@350 524 for (size_t i = 0; i <= blockSize/2; ++i) {
Chris@556 525 buffers[ch][i*2] = reals[i];
Chris@556 526 buffers[ch][i*2+1] = imaginaries[i];
Chris@320 527 }
Chris@678 528 error = fftModels[ch]->getError();
Chris@678 529 if (error != "") {
Chris@686 530 std::cerr << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << std::endl;
Chris@678 531 m_abandoned = true;
Chris@678 532 m_message = error;
Chris@678 533 }
Chris@363 534 }
Chris@363 535 } else {
Chris@363 536 getFrames(channelCount, blockFrame, blockSize, buffers);
Chris@320 537 }
Chris@320 538
Chris@497 539 if (m_abandoned) break;
Chris@497 540
Chris@320 541 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@320 542 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@320 543
Chris@497 544 if (m_abandoned) break;
Chris@497 545
Chris@778 546 for (size_t fi = 0; fi < features[m_outputNo].size(); ++fi) {
Chris@778 547 Vamp::Plugin::Feature feature = features[m_outputNo][fi];
Chris@320 548 addFeature(blockFrame, feature);
Chris@320 549 }
Chris@320 550
Chris@320 551 if (blockFrame == contextStart || completion > prevCompletion) {
Chris@320 552 setCompletion(completion);
Chris@320 553 prevCompletion = completion;
Chris@320 554 }
Chris@320 555
Chris@350 556 blockFrame += stepSize;
Chris@320 557 }
Chris@320 558
Chris@497 559 if (!m_abandoned) {
Chris@497 560 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@320 561
Chris@778 562 for (size_t fi = 0; fi < features[m_outputNo].size(); ++fi) {
Chris@778 563 Vamp::Plugin::Feature feature = features[m_outputNo][fi];
Chris@497 564 addFeature(blockFrame, feature);
Chris@497 565 }
Chris@497 566 }
Chris@320 567
Chris@497 568 setCompletion(100);
Chris@320 569
Chris@320 570 if (frequencyDomain) {
Chris@320 571 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 572 delete fftModels[ch];
Chris@320 573 }
Chris@556 574 delete[] reals;
Chris@556 575 delete[] imaginaries;
Chris@320 576 }
Chris@320 577 }
Chris@320 578
Chris@320 579 void
Chris@363 580 FeatureExtractionModelTransformer::getFrames(int channelCount,
Chris@363 581 long startFrame, long size,
Chris@363 582 float **buffers)
Chris@320 583 {
Chris@320 584 long offset = 0;
Chris@320 585
Chris@320 586 if (startFrame < 0) {
Chris@363 587 for (int c = 0; c < channelCount; ++c) {
Chris@363 588 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@363 589 buffers[c][i] = 0.0f;
Chris@363 590 }
Chris@320 591 }
Chris@320 592 offset = -startFrame;
Chris@320 593 size -= offset;
Chris@320 594 if (size <= 0) return;
Chris@320 595 startFrame = 0;
Chris@320 596 }
Chris@320 597
Chris@350 598 DenseTimeValueModel *input = getConformingInput();
Chris@350 599 if (!input) return;
Chris@363 600
Chris@363 601 long got = 0;
Chris@350 602
Chris@363 603 if (channelCount == 1) {
Chris@363 604
Chris@363 605 got = input->getData(m_input.getChannel(), startFrame, size,
Chris@363 606 buffers[0] + offset);
Chris@363 607
Chris@363 608 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
Chris@363 609 // use mean instead of sum, as plugin input
Chris@363 610 float cc = float(input->getChannelCount());
Chris@363 611 for (long i = 0; i < size; ++i) {
Chris@363 612 buffers[0][i + offset] /= cc;
Chris@363 613 }
Chris@363 614 }
Chris@363 615
Chris@363 616 } else {
Chris@363 617
Chris@363 618 float **writebuf = buffers;
Chris@363 619 if (offset > 0) {
Chris@363 620 writebuf = new float *[channelCount];
Chris@363 621 for (int i = 0; i < channelCount; ++i) {
Chris@363 622 writebuf[i] = buffers[i] + offset;
Chris@363 623 }
Chris@363 624 }
Chris@363 625
Chris@363 626 got = input->getData(0, channelCount-1, startFrame, size, writebuf);
Chris@363 627
Chris@363 628 if (writebuf != buffers) delete[] writebuf;
Chris@363 629 }
Chris@320 630
Chris@320 631 while (got < size) {
Chris@363 632 for (int c = 0; c < channelCount; ++c) {
Chris@363 633 buffers[c][got + offset] = 0.0;
Chris@363 634 }
Chris@320 635 ++got;
Chris@320 636 }
Chris@320 637 }
Chris@320 638
Chris@320 639 void
Chris@331 640 FeatureExtractionModelTransformer::addFeature(size_t blockFrame,
Chris@320 641 const Vamp::Plugin::Feature &feature)
Chris@320 642 {
Chris@350 643 size_t inputRate = m_input.getModel()->getSampleRate();
Chris@320 644
Chris@712 645 // std::cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = "
Chris@712 646 // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp
Chris@712 647 // << ", timestamp = " << feature.timestamp << ", hasDuration = "
Chris@712 648 // << feature.hasDuration << ", duration = " << feature.duration
Chris@712 649 // << std::endl;
Chris@320 650
Chris@320 651 int binCount = 1;
Chris@320 652 if (m_descriptor->hasFixedBinCount) {
Chris@320 653 binCount = m_descriptor->binCount;
Chris@320 654 }
Chris@320 655
Chris@320 656 size_t frame = blockFrame;
Chris@320 657
Chris@320 658 if (m_descriptor->sampleType ==
Chris@320 659 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@320 660
Chris@320 661 if (!feature.hasTimestamp) {
Chris@320 662 std::cerr
Chris@331 663 << "WARNING: FeatureExtractionModelTransformer::addFeature: "
Chris@320 664 << "Feature has variable sample rate but no timestamp!"
Chris@320 665 << std::endl;
Chris@320 666 return;
Chris@320 667 } else {
Chris@320 668 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@320 669 }
Chris@320 670
Chris@320 671 } else if (m_descriptor->sampleType ==
Chris@320 672 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@320 673
Chris@779 674 if (!feature.hasTimestamp) {
Chris@779 675 ++m_fixedRateFeatureNo;
Chris@779 676 } else {
Chris@779 677 RealTime ts(feature.timestamp.sec, feature.timestamp.nsec);
Chris@779 678 m_fixedRateFeatureNo =
Chris@779 679 lrint(ts.toDouble() * m_descriptor->sampleRate);
Chris@779 680 }
Chris@779 681
Chris@779 682 frame = lrintf((m_fixedRateFeatureNo / m_descriptor->sampleRate)
Chris@779 683 * inputRate);
Chris@320 684 }
Chris@320 685
Chris@441 686 // Rather than repeat the complicated tests from the constructor
Chris@441 687 // to determine what sort of model we must be adding the features
Chris@441 688 // to, we instead test what sort of model the constructor decided
Chris@441 689 // to create.
Chris@320 690
Chris@441 691 if (isOutput<SparseOneDimensionalModel>()) {
Chris@441 692
Chris@441 693 SparseOneDimensionalModel *model =
Chris@350 694 getConformingOutput<SparseOneDimensionalModel>();
Chris@320 695 if (!model) return;
Chris@350 696
Chris@441 697 model->addPoint(SparseOneDimensionalModel::Point
Chris@441 698 (frame, feature.label.c_str()));
Chris@320 699
Chris@441 700 } else if (isOutput<SparseTimeValueModel>()) {
Chris@320 701
Chris@350 702 SparseTimeValueModel *model =
Chris@350 703 getConformingOutput<SparseTimeValueModel>();
Chris@320 704 if (!model) return;
Chris@350 705
Chris@454 706 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 707
Chris@454 708 float value = feature.values[i];
Chris@454 709
Chris@454 710 QString label = feature.label.c_str();
Chris@454 711 if (feature.values.size() > 1) {
Chris@454 712 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 713 }
Chris@454 714
Chris@454 715 model->addPoint(SparseTimeValueModel::Point(frame, value, label));
Chris@454 716 }
Chris@320 717
Chris@441 718 } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) {
Chris@320 719
Chris@441 720 int index = 0;
Chris@441 721
Chris@441 722 float value = 0.0;
Chris@441 723 if (feature.values.size() > index) {
Chris@441 724 value = feature.values[index++];
Chris@441 725 }
Chris@320 726
Chris@320 727 float duration = 1;
Chris@441 728 if (feature.hasDuration) {
Chris@441 729 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
Chris@441 730 } else {
Chris@441 731 if (feature.values.size() > index) {
Chris@441 732 duration = feature.values[index++];
Chris@441 733 }
Chris@441 734 }
Chris@320 735
Chris@441 736 if (isOutput<NoteModel>()) {
Chris@320 737
Chris@441 738 float velocity = 100;
Chris@441 739 if (feature.values.size() > index) {
Chris@441 740 velocity = feature.values[index++];
Chris@441 741 }
Chris@441 742 if (velocity < 0) velocity = 127;
Chris@441 743 if (velocity > 127) velocity = 127;
Chris@320 744
Chris@441 745 NoteModel *model = getConformingOutput<NoteModel>();
Chris@441 746 if (!model) return;
Chris@441 747 model->addPoint(NoteModel::Point(frame, value, // value is pitch
Chris@441 748 lrintf(duration),
Chris@441 749 velocity / 127.f,
Chris@441 750 feature.label.c_str()));
Chris@441 751 } else {
Chris@441 752 RegionModel *model = getConformingOutput<RegionModel>();
Chris@454 753 if (!model) return;
Chris@454 754
Chris@474 755 if (feature.hasDuration && !feature.values.empty()) {
Chris@454 756
Chris@454 757 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 758
Chris@454 759 float value = feature.values[i];
Chris@454 760
Chris@454 761 QString label = feature.label.c_str();
Chris@454 762 if (feature.values.size() > 1) {
Chris@454 763 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 764 }
Chris@454 765
Chris@454 766 model->addPoint(RegionModel::Point(frame, value,
Chris@454 767 lrintf(duration),
Chris@454 768 label));
Chris@454 769 }
Chris@454 770 } else {
Chris@454 771
Chris@441 772 model->addPoint(RegionModel::Point(frame, value,
Chris@441 773 lrintf(duration),
Chris@441 774 feature.label.c_str()));
Chris@454 775 }
Chris@441 776 }
Chris@320 777
Chris@441 778 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
Chris@320 779
Chris@533 780 DenseThreeDimensionalModel::Column values =
Chris@533 781 DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
Chris@320 782
Chris@320 783 EditableDenseThreeDimensionalModel *model =
Chris@350 784 getConformingOutput<EditableDenseThreeDimensionalModel>();
Chris@320 785 if (!model) return;
Chris@320 786
Chris@320 787 model->setColumn(frame / model->getResolution(), values);
Chris@441 788
Chris@441 789 } else {
Chris@690 790 SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl;
Chris@320 791 }
Chris@320 792 }
Chris@320 793
Chris@320 794 void
Chris@331 795 FeatureExtractionModelTransformer::setCompletion(int completion)
Chris@320 796 {
Chris@320 797 int binCount = 1;
Chris@320 798 if (m_descriptor->hasFixedBinCount) {
Chris@320 799 binCount = m_descriptor->binCount;
Chris@320 800 }
Chris@320 801
Chris@690 802 // SVDEBUG << "FeatureExtractionModelTransformer::setCompletion("
Chris@687 803 // << completion << ")" << endl;
Chris@320 804
Chris@441 805 if (isOutput<SparseOneDimensionalModel>()) {
Chris@320 806
Chris@350 807 SparseOneDimensionalModel *model =
Chris@350 808 getConformingOutput<SparseOneDimensionalModel>();
Chris@320 809 if (!model) return;
Chris@441 810 model->setCompletion(completion, true);
Chris@320 811
Chris@441 812 } else if (isOutput<SparseTimeValueModel>()) {
Chris@320 813
Chris@350 814 SparseTimeValueModel *model =
Chris@350 815 getConformingOutput<SparseTimeValueModel>();
Chris@320 816 if (!model) return;
Chris@441 817 model->setCompletion(completion, true);
Chris@320 818
Chris@441 819 } else if (isOutput<NoteModel>()) {
Chris@320 820
Chris@441 821 NoteModel *model = getConformingOutput<NoteModel>();
Chris@320 822 if (!model) return;
Chris@441 823 model->setCompletion(completion, true);
Chris@320 824
Chris@441 825 } else if (isOutput<RegionModel>()) {
Chris@441 826
Chris@441 827 RegionModel *model = getConformingOutput<RegionModel>();
Chris@441 828 if (!model) return;
Chris@441 829 model->setCompletion(completion, true);
Chris@441 830
Chris@441 831 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
Chris@320 832
Chris@320 833 EditableDenseThreeDimensionalModel *model =
Chris@350 834 getConformingOutput<EditableDenseThreeDimensionalModel>();
Chris@320 835 if (!model) return;
Chris@350 836 model->setCompletion(completion, true); //!!!m_context.updates);
Chris@320 837 }
Chris@320 838 }
Chris@320 839