annotate transform/FeatureExtractionModelTransformer.cpp @ 558:1d7ebc05157e

* Some fairly simplistic code to set up layer type properties based on RDF data about feature types (both when running transforms and when importing features from RDF files).
author Chris Cannam
date Thu, 12 Feb 2009 15:26:43 +0000
parents a40023bebd15
children 3bbac921b70a
rev   line source
Chris@320 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@320 2
Chris@320 3 /*
Chris@320 4 Sonic Visualiser
Chris@320 5 An audio file viewer and annotation editor.
Chris@320 6 Centre for Digital Music, Queen Mary, University of London.
Chris@320 7 This file copyright 2006 Chris Cannam and QMUL.
Chris@320 8
Chris@320 9 This program is free software; you can redistribute it and/or
Chris@320 10 modify it under the terms of the GNU General Public License as
Chris@320 11 published by the Free Software Foundation; either version 2 of the
Chris@320 12 License, or (at your option) any later version. See the file
Chris@320 13 COPYING included with this distribution for more information.
Chris@320 14 */
Chris@320 15
Chris@331 16 #include "FeatureExtractionModelTransformer.h"
Chris@320 17
Chris@320 18 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@320 19 #include "plugin/PluginXml.h"
Chris@475 20 #include <vamp-hostsdk/Plugin.h>
Chris@320 21
Chris@320 22 #include "data/model/Model.h"
Chris@320 23 #include "base/Window.h"
Chris@387 24 #include "base/Exceptions.h"
Chris@320 25 #include "data/model/SparseOneDimensionalModel.h"
Chris@320 26 #include "data/model/SparseTimeValueModel.h"
Chris@320 27 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@320 28 #include "data/model/DenseTimeValueModel.h"
Chris@320 29 #include "data/model/NoteModel.h"
Chris@441 30 #include "data/model/RegionModel.h"
Chris@320 31 #include "data/model/FFTModel.h"
Chris@320 32 #include "data/model/WaveFileModel.h"
Chris@558 33 #include "rdf/PluginRDFDescription.h"
Chris@320 34
Chris@350 35 #include "TransformFactory.h"
Chris@350 36
Chris@320 37 #include <iostream>
Chris@320 38
Chris@350 39 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@350 40 const Transform &transform) :
Chris@350 41 ModelTransformer(in, transform),
Chris@320 42 m_plugin(0),
Chris@320 43 m_descriptor(0),
Chris@320 44 m_outputFeatureNo(0)
Chris@320 45 {
Chris@350 46 // std::cerr << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId.toStdString() << ", outputName " << m_transform.getOutput().toStdString() << std::endl;
Chris@350 47
Chris@350 48 QString pluginId = transform.getPluginIdentifier();
Chris@320 49
Chris@320 50 FeatureExtractionPluginFactory *factory =
Chris@320 51 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@320 52
Chris@320 53 if (!factory) {
Chris@361 54 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
Chris@320 55 return;
Chris@320 56 }
Chris@320 57
Chris@350 58 DenseTimeValueModel *input = getConformingInput();
Chris@350 59 if (!input) {
Chris@361 60 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
Chris@350 61 return;
Chris@350 62 }
Chris@320 63
Chris@350 64 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
Chris@320 65 if (!m_plugin) {
Chris@361 66 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
Chris@320 67 return;
Chris@320 68 }
Chris@320 69
Chris@350 70 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@350 71 (m_transform, m_plugin);
Chris@343 72
Chris@350 73 TransformFactory::getInstance()->setPluginParameters
Chris@350 74 (m_transform, m_plugin);
Chris@320 75
Chris@320 76 size_t channelCount = input->getChannelCount();
Chris@320 77 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 78 channelCount = 1;
Chris@320 79 }
Chris@320 80 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@361 81 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
Chris@361 82 .arg(pluginId)
Chris@361 83 .arg(m_plugin->getMinChannelCount())
Chris@361 84 .arg(m_plugin->getMaxChannelCount())
Chris@361 85 .arg(input->getChannelCount());
Chris@320 86 return;
Chris@320 87 }
Chris@320 88
Chris@320 89 std::cerr << "Initialising feature extraction plugin with channels = "
Chris@350 90 << channelCount << ", step = " << m_transform.getStepSize()
Chris@350 91 << ", block = " << m_transform.getBlockSize() << std::endl;
Chris@320 92
Chris@320 93 if (!m_plugin->initialise(channelCount,
Chris@350 94 m_transform.getStepSize(),
Chris@350 95 m_transform.getBlockSize())) {
Chris@361 96
Chris@361 97 size_t pstep = m_transform.getStepSize();
Chris@361 98 size_t pblock = m_transform.getBlockSize();
Chris@361 99
Chris@361 100 m_transform.setStepSize(0);
Chris@361 101 m_transform.setBlockSize(0);
Chris@361 102 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@361 103 (m_transform, m_plugin);
Chris@361 104
Chris@361 105 if (m_transform.getStepSize() != pstep ||
Chris@361 106 m_transform.getBlockSize() != pblock) {
Chris@361 107
Chris@361 108 if (!m_plugin->initialise(channelCount,
Chris@361 109 m_transform.getStepSize(),
Chris@361 110 m_transform.getBlockSize())) {
Chris@361 111
Chris@361 112 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@361 113 return;
Chris@361 114
Chris@361 115 } else {
Chris@361 116
Chris@361 117 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
Chris@361 118 .arg(pluginId)
Chris@361 119 .arg(pstep)
Chris@361 120 .arg(pblock)
Chris@361 121 .arg(m_transform.getStepSize())
Chris@361 122 .arg(m_transform.getBlockSize());
Chris@361 123 }
Chris@361 124
Chris@361 125 } else {
Chris@361 126
Chris@361 127 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@361 128 return;
Chris@361 129 }
Chris@320 130 }
Chris@320 131
Chris@366 132 if (m_transform.getPluginVersion() != "") {
Chris@366 133 QString pv = QString("%1").arg(m_plugin->getPluginVersion());
Chris@366 134 if (pv != m_transform.getPluginVersion()) {
Chris@366 135 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
Chris@366 136 .arg(m_transform.getPluginVersion())
Chris@366 137 .arg(pluginId)
Chris@366 138 .arg(pv);
Chris@366 139 if (m_message != "") {
Chris@366 140 m_message = QString("%1; %2").arg(vm).arg(m_message);
Chris@366 141 } else {
Chris@366 142 m_message = vm;
Chris@366 143 }
Chris@366 144 }
Chris@366 145 }
Chris@366 146
Chris@320 147 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320 148
Chris@320 149 if (outputs.empty()) {
Chris@361 150 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
Chris@320 151 return;
Chris@320 152 }
Chris@320 153
Chris@320 154 for (size_t i = 0; i < outputs.size(); ++i) {
Chris@429 155 // std::cerr << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput().toStdString() << "\"" << std::endl;
Chris@350 156 if (m_transform.getOutput() == "" ||
Chris@350 157 outputs[i].identifier == m_transform.getOutput().toStdString()) {
Chris@320 158 m_outputFeatureNo = i;
Chris@441 159 m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]);
Chris@320 160 break;
Chris@320 161 }
Chris@320 162 }
Chris@320 163
Chris@320 164 if (!m_descriptor) {
Chris@361 165 m_message = tr("Plugin \"%1\" has no output named \"%2\"")
Chris@361 166 .arg(pluginId)
Chris@361 167 .arg(m_transform.getOutput());
Chris@320 168 return;
Chris@320 169 }
Chris@320 170
Chris@558 171 createOutputModel();
Chris@558 172 }
Chris@558 173
Chris@558 174 void
Chris@558 175 FeatureExtractionModelTransformer::createOutputModel()
Chris@558 176 {
Chris@558 177 DenseTimeValueModel *input = getConformingInput();
Chris@558 178
Chris@331 179 // std::cerr << "FeatureExtractionModelTransformer: output sample type "
Chris@320 180 // << m_descriptor->sampleType << std::endl;
Chris@320 181
Chris@558 182 PluginRDFDescription description(m_transform.getPluginIdentifier());
Chris@558 183 QString outputId = m_transform.getOutput();
Chris@558 184
Chris@320 185 int binCount = 1;
Chris@320 186 float minValue = 0.0, maxValue = 0.0;
Chris@320 187 bool haveExtents = false;
Chris@320 188
Chris@320 189 if (m_descriptor->hasFixedBinCount) {
Chris@320 190 binCount = m_descriptor->binCount;
Chris@320 191 }
Chris@320 192
Chris@331 193 // std::cerr << "FeatureExtractionModelTransformer: output bin count "
Chris@320 194 // << binCount << std::endl;
Chris@320 195
Chris@320 196 if (binCount > 0 && m_descriptor->hasKnownExtents) {
Chris@320 197 minValue = m_descriptor->minValue;
Chris@320 198 maxValue = m_descriptor->maxValue;
Chris@320 199 haveExtents = true;
Chris@320 200 }
Chris@320 201
Chris@350 202 size_t modelRate = input->getSampleRate();
Chris@320 203 size_t modelResolution = 1;
Chris@320 204
Chris@320 205 switch (m_descriptor->sampleType) {
Chris@320 206
Chris@320 207 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@320 208 if (m_descriptor->sampleRate != 0.0) {
Chris@320 209 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
Chris@320 210 }
Chris@320 211 break;
Chris@320 212
Chris@320 213 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@350 214 modelResolution = m_transform.getStepSize();
Chris@320 215 break;
Chris@320 216
Chris@320 217 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@451 218 //!!! SV doesn't actually support display of models that have
Chris@451 219 //!!! different underlying rates together -- so we always set
Chris@451 220 //!!! the model rate to be the input model's rate, and adjust
Chris@451 221 //!!! the resolution appropriately. We can't properly display
Chris@451 222 //!!! data with a higher resolution than the base model at all
Chris@451 223 // modelRate = size_t(m_descriptor->sampleRate + 0.001);
Chris@451 224 if (m_descriptor->sampleRate > input->getSampleRate()) {
Chris@451 225 modelResolution = 1;
Chris@451 226 } else {
Chris@451 227 modelResolution = size_t(input->getSampleRate() /
Chris@451 228 m_descriptor->sampleRate);
Chris@451 229 }
Chris@320 230 break;
Chris@320 231 }
Chris@320 232
Chris@441 233 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
Chris@441 234
Chris@441 235 if (binCount == 0 &&
Chris@441 236 (preDurationPlugin || !m_descriptor->hasDuration)) {
Chris@320 237
Chris@445 238 // Anything with no value and no duration is an instant
Chris@445 239
Chris@320 240 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
Chris@320 241 false);
Chris@320 242
Chris@558 243 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 244 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 245
Chris@441 246 } else if ((preDurationPlugin && binCount > 1 &&
Chris@441 247 (m_descriptor->sampleType ==
Chris@441 248 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
Chris@441 249 (!preDurationPlugin && m_descriptor->hasDuration)) {
Chris@441 250
Chris@441 251 // For plugins using the old v1 API without explicit duration,
Chris@441 252 // we treat anything that has multiple bins (i.e. that has the
Chris@441 253 // potential to have value and duration) and a variable sample
Chris@441 254 // rate as a note model, taking its values as pitch, duration
Chris@441 255 // and velocity (if present) respectively. This is the same
Chris@441 256 // behaviour as always applied by SV to these plugins in the
Chris@441 257 // past.
Chris@441 258
Chris@441 259 // For plugins with the newer API, we treat anything with
Chris@441 260 // duration as either a note model with pitch and velocity, or
Chris@441 261 // a region model.
Chris@441 262
Chris@441 263 // How do we know whether it's an interval or note model?
Chris@441 264 // What's the essential difference? Is a note model any
Chris@441 265 // interval model using a Hz or "MIDI pitch" scale? There
Chris@441 266 // isn't really a reliable test for "MIDI pitch"... Does a
Chris@441 267 // note model always have velocity? This is a good question
Chris@441 268 // to be addressed by accompanying RDF, but for the moment we
Chris@441 269 // will do the following...
Chris@441 270
Chris@441 271 bool isNoteModel = false;
Chris@441 272
Chris@441 273 // Regions have only value (and duration -- we can't extract a
Chris@441 274 // region model from an old-style plugin that doesn't support
Chris@441 275 // duration)
Chris@441 276 if (binCount > 1) isNoteModel = true;
Chris@441 277
Chris@441 278 // Regions do not have units of Hz (a sweeping assumption!)
Chris@441 279 if (m_descriptor->unit == "Hz") isNoteModel = true;
Chris@441 280
Chris@441 281 // If we had a "sparse 3D model", we would have the additional
Chris@441 282 // problem of determining whether to use that here (if bin
Chris@441 283 // count > 1). But we don't.
Chris@441 284
Chris@441 285 if (isNoteModel) {
Chris@441 286
Chris@441 287 NoteModel *model;
Chris@441 288 if (haveExtents) {
Chris@441 289 model = new NoteModel
Chris@441 290 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 291 } else {
Chris@441 292 model = new NoteModel
Chris@441 293 (modelRate, modelResolution, false);
Chris@441 294 }
Chris@441 295 model->setScaleUnits(m_descriptor->unit.c_str());
Chris@441 296 m_output = model;
Chris@441 297
Chris@441 298 } else {
Chris@441 299
Chris@441 300 RegionModel *model;
Chris@441 301 if (haveExtents) {
Chris@441 302 model = new RegionModel
Chris@441 303 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 304 } else {
Chris@441 305 model = new RegionModel
Chris@441 306 (modelRate, modelResolution, false);
Chris@441 307 }
Chris@441 308 model->setScaleUnits(m_descriptor->unit.c_str());
Chris@441 309 m_output = model;
Chris@441 310 }
Chris@441 311
Chris@558 312 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 313 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 314
Chris@441 315 } else if (binCount == 1 ||
Chris@441 316 (m_descriptor->sampleType ==
Chris@441 317 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
Chris@441 318
Chris@441 319 // Anything that is not a 1D, note, or interval model and that
Chris@441 320 // has only one value per result must be a sparse time value
Chris@441 321 // model.
Chris@441 322
Chris@441 323 // Anything that is not a 1D, note, or interval model and that
Chris@441 324 // has a variable sample rate is also treated as a sparse time
Chris@441 325 // value model regardless of its bin count, because we lack a
Chris@441 326 // sparse 3D model.
Chris@320 327
Chris@320 328 SparseTimeValueModel *model;
Chris@320 329 if (haveExtents) {
Chris@320 330 model = new SparseTimeValueModel
Chris@320 331 (modelRate, modelResolution, minValue, maxValue, false);
Chris@320 332 } else {
Chris@320 333 model = new SparseTimeValueModel
Chris@320 334 (modelRate, modelResolution, false);
Chris@320 335 }
Chris@558 336
Chris@558 337 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320 338 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
Chris@320 339
Chris@320 340 m_output = model;
Chris@320 341
Chris@558 342 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 343 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 344
Chris@441 345 } else {
Chris@320 346
Chris@441 347 // Anything that is not a 1D, note, or interval model and that
Chris@441 348 // has a fixed sample rate and more than one value per result
Chris@441 349 // must be a dense 3D model.
Chris@320 350
Chris@320 351 EditableDenseThreeDimensionalModel *model =
Chris@320 352 new EditableDenseThreeDimensionalModel
Chris@535 353 (modelRate, modelResolution, binCount,
Chris@535 354 EditableDenseThreeDimensionalModel::BasicMultirateCompression,
Chris@535 355 false);
Chris@320 356
Chris@320 357 if (!m_descriptor->binNames.empty()) {
Chris@320 358 std::vector<QString> names;
Chris@320 359 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
Chris@320 360 names.push_back(m_descriptor->binNames[i].c_str());
Chris@320 361 }
Chris@320 362 model->setBinNames(names);
Chris@320 363 }
Chris@320 364
Chris@320 365 m_output = model;
Chris@558 366
Chris@558 367 QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
Chris@558 368 m_output->setRDFTypeURI(outputSignalTypeURI);
Chris@320 369 }
Chris@333 370
Chris@350 371 if (m_output) m_output->setSourceModel(input);
Chris@320 372 }
Chris@320 373
Chris@331 374 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()
Chris@320 375 {
Chris@436 376 // std::cerr << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << std::endl;
Chris@320 377 delete m_plugin;
Chris@320 378 delete m_descriptor;
Chris@320 379 }
Chris@320 380
Chris@320 381 DenseTimeValueModel *
Chris@350 382 FeatureExtractionModelTransformer::getConformingInput()
Chris@320 383 {
Chris@408 384 // std::cerr << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << std::endl;
Chris@408 385
Chris@320 386 DenseTimeValueModel *dtvm =
Chris@320 387 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@320 388 if (!dtvm) {
Chris@350 389 std::cerr << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
Chris@320 390 }
Chris@320 391 return dtvm;
Chris@320 392 }
Chris@320 393
Chris@320 394 void
Chris@331 395 FeatureExtractionModelTransformer::run()
Chris@320 396 {
Chris@350 397 DenseTimeValueModel *input = getConformingInput();
Chris@320 398 if (!input) return;
Chris@320 399
Chris@320 400 if (!m_output) return;
Chris@320 401
Chris@497 402 while (!input->isReady() && !m_abandoned) {
Chris@331 403 std::cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << std::endl;
Chris@497 404 usleep(500000);
Chris@320 405 }
Chris@497 406 if (m_abandoned) return;
Chris@320 407
Chris@350 408 size_t sampleRate = input->getSampleRate();
Chris@320 409
Chris@320 410 size_t channelCount = input->getChannelCount();
Chris@320 411 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 412 channelCount = 1;
Chris@320 413 }
Chris@320 414
Chris@320 415 float **buffers = new float*[channelCount];
Chris@320 416 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 417 buffers[ch] = new float[m_transform.getBlockSize() + 2];
Chris@320 418 }
Chris@320 419
Chris@350 420 size_t stepSize = m_transform.getStepSize();
Chris@350 421 size_t blockSize = m_transform.getBlockSize();
Chris@350 422
Chris@320 423 bool frequencyDomain = (m_plugin->getInputDomain() ==
Chris@320 424 Vamp::Plugin::FrequencyDomain);
Chris@320 425 std::vector<FFTModel *> fftModels;
Chris@320 426
Chris@320 427 if (frequencyDomain) {
Chris@320 428 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 429 FFTModel *model = new FFTModel
Chris@350 430 (getConformingInput(),
Chris@350 431 channelCount == 1 ? m_input.getChannel() : ch,
Chris@350 432 m_transform.getWindowType(),
Chris@350 433 blockSize,
Chris@350 434 stepSize,
Chris@350 435 blockSize,
Chris@334 436 false,
Chris@334 437 StorageAdviser::PrecisionCritical);
Chris@320 438 if (!model->isOK()) {
Chris@320 439 delete model;
Chris@320 440 setCompletion(100);
Chris@387 441 //!!! need a better way to handle this -- previously we were using a QMessageBox but that isn't an appropriate thing to do here either
Chris@387 442 throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer");
Chris@320 443 }
Chris@320 444 model->resume();
Chris@320 445 fftModels.push_back(model);
Chris@320 446 }
Chris@320 447 }
Chris@320 448
Chris@350 449 long startFrame = m_input.getModel()->getStartFrame();
Chris@350 450 long endFrame = m_input.getModel()->getEndFrame();
Chris@320 451
Chris@350 452 RealTime contextStartRT = m_transform.getStartTime();
Chris@350 453 RealTime contextDurationRT = m_transform.getDuration();
Chris@350 454
Chris@350 455 long contextStart =
Chris@350 456 RealTime::realTime2Frame(contextStartRT, sampleRate);
Chris@350 457
Chris@350 458 long contextDuration =
Chris@350 459 RealTime::realTime2Frame(contextDurationRT, sampleRate);
Chris@320 460
Chris@320 461 if (contextStart == 0 || contextStart < startFrame) {
Chris@320 462 contextStart = startFrame;
Chris@320 463 }
Chris@320 464
Chris@320 465 if (contextDuration == 0) {
Chris@320 466 contextDuration = endFrame - contextStart;
Chris@320 467 }
Chris@320 468 if (contextStart + contextDuration > endFrame) {
Chris@320 469 contextDuration = endFrame - contextStart;
Chris@320 470 }
Chris@320 471
Chris@320 472 long blockFrame = contextStart;
Chris@320 473
Chris@320 474 long prevCompletion = 0;
Chris@320 475
Chris@320 476 setCompletion(0);
Chris@320 477
Chris@556 478 float *reals = 0;
Chris@556 479 float *imaginaries = 0;
Chris@556 480 if (frequencyDomain) {
Chris@556 481 reals = new float[blockSize/2 + 1];
Chris@556 482 imaginaries = new float[blockSize/2 + 1];
Chris@556 483 }
Chris@556 484
Chris@320 485 while (!m_abandoned) {
Chris@320 486
Chris@320 487 if (frequencyDomain) {
Chris@350 488 if (blockFrame - int(blockSize)/2 >
Chris@320 489 contextStart + contextDuration) break;
Chris@320 490 } else {
Chris@320 491 if (blockFrame >=
Chris@320 492 contextStart + contextDuration) break;
Chris@320 493 }
Chris@320 494
Chris@331 495 // std::cerr << "FeatureExtractionModelTransformer::run: blockFrame "
Chris@320 496 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
Chris@350 497 // << blockSize << std::endl;
Chris@320 498
Chris@320 499 long completion =
Chris@350 500 (((blockFrame - contextStart) / stepSize) * 99) /
Chris@557 501 (contextDuration / stepSize + 1);
Chris@320 502
Chris@350 503 // channelCount is either m_input.getModel()->channelCount or 1
Chris@320 504
Chris@363 505 if (frequencyDomain) {
Chris@363 506 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 507 int column = (blockFrame - startFrame) / stepSize;
Chris@556 508 fftModels[ch]->getValuesAt(column, reals, imaginaries);
Chris@350 509 for (size_t i = 0; i <= blockSize/2; ++i) {
Chris@556 510 buffers[ch][i*2] = reals[i];
Chris@556 511 buffers[ch][i*2+1] = imaginaries[i];
Chris@320 512 }
Chris@363 513 }
Chris@363 514 } else {
Chris@363 515 getFrames(channelCount, blockFrame, blockSize, buffers);
Chris@320 516 }
Chris@320 517
Chris@497 518 if (m_abandoned) break;
Chris@497 519
Chris@320 520 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@320 521 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@320 522
Chris@497 523 if (m_abandoned) break;
Chris@497 524
Chris@320 525 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@320 526 Vamp::Plugin::Feature feature =
Chris@320 527 features[m_outputFeatureNo][fi];
Chris@320 528 addFeature(blockFrame, feature);
Chris@320 529 }
Chris@320 530
Chris@320 531 if (blockFrame == contextStart || completion > prevCompletion) {
Chris@320 532 setCompletion(completion);
Chris@320 533 prevCompletion = completion;
Chris@320 534 }
Chris@320 535
Chris@350 536 blockFrame += stepSize;
Chris@320 537 }
Chris@320 538
Chris@497 539 if (!m_abandoned) {
Chris@497 540 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@320 541
Chris@497 542 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@497 543 Vamp::Plugin::Feature feature =
Chris@497 544 features[m_outputFeatureNo][fi];
Chris@497 545 addFeature(blockFrame, feature);
Chris@497 546 }
Chris@497 547 }
Chris@320 548
Chris@497 549 setCompletion(100);
Chris@320 550
Chris@320 551 if (frequencyDomain) {
Chris@320 552 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 553 delete fftModels[ch];
Chris@320 554 }
Chris@556 555 delete[] reals;
Chris@556 556 delete[] imaginaries;
Chris@320 557 }
Chris@320 558 }
Chris@320 559
Chris@320 560 void
Chris@363 561 FeatureExtractionModelTransformer::getFrames(int channelCount,
Chris@363 562 long startFrame, long size,
Chris@363 563 float **buffers)
Chris@320 564 {
Chris@320 565 long offset = 0;
Chris@320 566
Chris@320 567 if (startFrame < 0) {
Chris@363 568 for (int c = 0; c < channelCount; ++c) {
Chris@363 569 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@363 570 buffers[c][i] = 0.0f;
Chris@363 571 }
Chris@320 572 }
Chris@320 573 offset = -startFrame;
Chris@320 574 size -= offset;
Chris@320 575 if (size <= 0) return;
Chris@320 576 startFrame = 0;
Chris@320 577 }
Chris@320 578
Chris@350 579 DenseTimeValueModel *input = getConformingInput();
Chris@350 580 if (!input) return;
Chris@363 581
Chris@363 582 long got = 0;
Chris@350 583
Chris@363 584 if (channelCount == 1) {
Chris@363 585
Chris@363 586 got = input->getData(m_input.getChannel(), startFrame, size,
Chris@363 587 buffers[0] + offset);
Chris@363 588
Chris@363 589 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
Chris@363 590 // use mean instead of sum, as plugin input
Chris@363 591 float cc = float(input->getChannelCount());
Chris@363 592 for (long i = 0; i < size; ++i) {
Chris@363 593 buffers[0][i + offset] /= cc;
Chris@363 594 }
Chris@363 595 }
Chris@363 596
Chris@363 597 } else {
Chris@363 598
Chris@363 599 float **writebuf = buffers;
Chris@363 600 if (offset > 0) {
Chris@363 601 writebuf = new float *[channelCount];
Chris@363 602 for (int i = 0; i < channelCount; ++i) {
Chris@363 603 writebuf[i] = buffers[i] + offset;
Chris@363 604 }
Chris@363 605 }
Chris@363 606
Chris@363 607 got = input->getData(0, channelCount-1, startFrame, size, writebuf);
Chris@363 608
Chris@363 609 if (writebuf != buffers) delete[] writebuf;
Chris@363 610 }
Chris@320 611
Chris@320 612 while (got < size) {
Chris@363 613 for (int c = 0; c < channelCount; ++c) {
Chris@363 614 buffers[c][got + offset] = 0.0;
Chris@363 615 }
Chris@320 616 ++got;
Chris@320 617 }
Chris@320 618 }
Chris@320 619
Chris@320 620 void
Chris@331 621 FeatureExtractionModelTransformer::addFeature(size_t blockFrame,
Chris@320 622 const Vamp::Plugin::Feature &feature)
Chris@320 623 {
Chris@350 624 size_t inputRate = m_input.getModel()->getSampleRate();
Chris@320 625
Chris@331 626 // std::cerr << "FeatureExtractionModelTransformer::addFeature("
Chris@320 627 // << blockFrame << ")" << std::endl;
Chris@320 628
Chris@320 629 int binCount = 1;
Chris@320 630 if (m_descriptor->hasFixedBinCount) {
Chris@320 631 binCount = m_descriptor->binCount;
Chris@320 632 }
Chris@320 633
Chris@320 634 size_t frame = blockFrame;
Chris@320 635
Chris@320 636 if (m_descriptor->sampleType ==
Chris@320 637 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@320 638
Chris@320 639 if (!feature.hasTimestamp) {
Chris@320 640 std::cerr
Chris@331 641 << "WARNING: FeatureExtractionModelTransformer::addFeature: "
Chris@320 642 << "Feature has variable sample rate but no timestamp!"
Chris@320 643 << std::endl;
Chris@320 644 return;
Chris@320 645 } else {
Chris@320 646 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@320 647 }
Chris@320 648
Chris@320 649 } else if (m_descriptor->sampleType ==
Chris@320 650 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@320 651
Chris@320 652 if (feature.hasTimestamp) {
Chris@320 653 //!!! warning: sampleRate may be non-integral
Chris@320 654 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
Chris@451 655 //!!! see comment above when setting up modelResolution and modelRate
Chris@451 656 // lrintf(m_descriptor->sampleRate));
Chris@451 657 inputRate);
Chris@320 658 } else {
Chris@320 659 frame = m_output->getEndFrame();
Chris@320 660 }
Chris@320 661 }
Chris@320 662
Chris@441 663 // Rather than repeat the complicated tests from the constructor
Chris@441 664 // to determine what sort of model we must be adding the features
Chris@441 665 // to, we instead test what sort of model the constructor decided
Chris@441 666 // to create.
Chris@320 667
Chris@441 668 if (isOutput<SparseOneDimensionalModel>()) {
Chris@441 669
Chris@441 670 SparseOneDimensionalModel *model =
Chris@350 671 getConformingOutput<SparseOneDimensionalModel>();
Chris@320 672 if (!model) return;
Chris@350 673
Chris@441 674 model->addPoint(SparseOneDimensionalModel::Point
Chris@441 675 (frame, feature.label.c_str()));
Chris@320 676
Chris@441 677 } else if (isOutput<SparseTimeValueModel>()) {
Chris@320 678
Chris@350 679 SparseTimeValueModel *model =
Chris@350 680 getConformingOutput<SparseTimeValueModel>();
Chris@320 681 if (!model) return;
Chris@350 682
Chris@454 683 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 684
Chris@454 685 float value = feature.values[i];
Chris@454 686
Chris@454 687 QString label = feature.label.c_str();
Chris@454 688 if (feature.values.size() > 1) {
Chris@454 689 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 690 }
Chris@454 691
Chris@454 692 model->addPoint(SparseTimeValueModel::Point(frame, value, label));
Chris@454 693 }
Chris@320 694
Chris@441 695 } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) {
Chris@320 696
Chris@441 697 int index = 0;
Chris@441 698
Chris@441 699 float value = 0.0;
Chris@441 700 if (feature.values.size() > index) {
Chris@441 701 value = feature.values[index++];
Chris@441 702 }
Chris@320 703
Chris@320 704 float duration = 1;
Chris@441 705 if (feature.hasDuration) {
Chris@441 706 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
Chris@441 707 } else {
Chris@441 708 if (feature.values.size() > index) {
Chris@441 709 duration = feature.values[index++];
Chris@441 710 }
Chris@441 711 }
Chris@320 712
Chris@441 713 if (isOutput<NoteModel>()) {
Chris@320 714
Chris@441 715 float velocity = 100;
Chris@441 716 if (feature.values.size() > index) {
Chris@441 717 velocity = feature.values[index++];
Chris@441 718 }
Chris@441 719 if (velocity < 0) velocity = 127;
Chris@441 720 if (velocity > 127) velocity = 127;
Chris@320 721
Chris@441 722 NoteModel *model = getConformingOutput<NoteModel>();
Chris@441 723 if (!model) return;
Chris@441 724 model->addPoint(NoteModel::Point(frame, value, // value is pitch
Chris@441 725 lrintf(duration),
Chris@441 726 velocity / 127.f,
Chris@441 727 feature.label.c_str()));
Chris@441 728 } else {
Chris@441 729 RegionModel *model = getConformingOutput<RegionModel>();
Chris@454 730 if (!model) return;
Chris@454 731
Chris@474 732 if (feature.hasDuration && !feature.values.empty()) {
Chris@454 733
Chris@454 734 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 735
Chris@454 736 float value = feature.values[i];
Chris@454 737
Chris@454 738 QString label = feature.label.c_str();
Chris@454 739 if (feature.values.size() > 1) {
Chris@454 740 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 741 }
Chris@454 742
Chris@454 743 model->addPoint(RegionModel::Point(frame, value,
Chris@454 744 lrintf(duration),
Chris@454 745 label));
Chris@454 746 }
Chris@454 747 } else {
Chris@454 748
Chris@441 749 model->addPoint(RegionModel::Point(frame, value,
Chris@441 750 lrintf(duration),
Chris@441 751 feature.label.c_str()));
Chris@454 752 }
Chris@441 753 }
Chris@320 754
Chris@441 755 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
Chris@320 756
Chris@533 757 DenseThreeDimensionalModel::Column values =
Chris@533 758 DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
Chris@320 759
Chris@320 760 EditableDenseThreeDimensionalModel *model =
Chris@350 761 getConformingOutput<EditableDenseThreeDimensionalModel>();
Chris@320 762 if (!model) return;
Chris@320 763
Chris@320 764 model->setColumn(frame / model->getResolution(), values);
Chris@441 765
Chris@441 766 } else {
Chris@441 767 std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl;
Chris@320 768 }
Chris@320 769 }
Chris@320 770
Chris@320 771 void
Chris@331 772 FeatureExtractionModelTransformer::setCompletion(int completion)
Chris@320 773 {
Chris@320 774 int binCount = 1;
Chris@320 775 if (m_descriptor->hasFixedBinCount) {
Chris@320 776 binCount = m_descriptor->binCount;
Chris@320 777 }
Chris@320 778
Chris@331 779 // std::cerr << "FeatureExtractionModelTransformer::setCompletion("
Chris@320 780 // << completion << ")" << std::endl;
Chris@320 781
Chris@441 782 if (isOutput<SparseOneDimensionalModel>()) {
Chris@320 783
Chris@350 784 SparseOneDimensionalModel *model =
Chris@350 785 getConformingOutput<SparseOneDimensionalModel>();
Chris@320 786 if (!model) return;
Chris@441 787 model->setCompletion(completion, true);
Chris@320 788
Chris@441 789 } else if (isOutput<SparseTimeValueModel>()) {
Chris@320 790
Chris@350 791 SparseTimeValueModel *model =
Chris@350 792 getConformingOutput<SparseTimeValueModel>();
Chris@320 793 if (!model) return;
Chris@441 794 model->setCompletion(completion, true);
Chris@320 795
Chris@441 796 } else if (isOutput<NoteModel>()) {
Chris@320 797
Chris@441 798 NoteModel *model = getConformingOutput<NoteModel>();
Chris@320 799 if (!model) return;
Chris@441 800 model->setCompletion(completion, true);
Chris@320 801
Chris@441 802 } else if (isOutput<RegionModel>()) {
Chris@441 803
Chris@441 804 RegionModel *model = getConformingOutput<RegionModel>();
Chris@441 805 if (!model) return;
Chris@441 806 model->setCompletion(completion, true);
Chris@441 807
Chris@441 808 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
Chris@320 809
Chris@320 810 EditableDenseThreeDimensionalModel *model =
Chris@350 811 getConformingOutput<EditableDenseThreeDimensionalModel>();
Chris@320 812 if (!model) return;
Chris@350 813 model->setCompletion(completion, true); //!!!m_context.updates);
Chris@320 814 }
Chris@320 815 }
Chris@320 816