annotate transform/FeatureExtractionModelTransformer.cpp @ 631:3a5ee4b6c9ad

* Complete the overhaul of CSV file import; now you can pick the purpose for each column in the file, and SV should do the rest. The most significant practical improvement here is that we can now handle files in which time and duration do not necessarily appear in known columns.
author Chris Cannam
date Mon, 19 Jul 2010 17:08:56 +0000
parents 3bbac921b70a
children 948271d124ac
rev   line source
Chris@320 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@320 2
Chris@320 3 /*
Chris@320 4 Sonic Visualiser
Chris@320 5 An audio file viewer and annotation editor.
Chris@320 6 Centre for Digital Music, Queen Mary, University of London.
Chris@320 7 This file copyright 2006 Chris Cannam and QMUL.
Chris@320 8
Chris@320 9 This program is free software; you can redistribute it and/or
Chris@320 10 modify it under the terms of the GNU General Public License as
Chris@320 11 published by the Free Software Foundation; either version 2 of the
Chris@320 12 License, or (at your option) any later version. See the file
Chris@320 13 COPYING included with this distribution for more information.
Chris@320 14 */
Chris@320 15
Chris@331 16 #include "FeatureExtractionModelTransformer.h"
Chris@320 17
Chris@320 18 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@320 19 #include "plugin/PluginXml.h"
Chris@475 20 #include <vamp-hostsdk/Plugin.h>
Chris@320 21
Chris@320 22 #include "data/model/Model.h"
Chris@320 23 #include "base/Window.h"
Chris@387 24 #include "base/Exceptions.h"
Chris@320 25 #include "data/model/SparseOneDimensionalModel.h"
Chris@320 26 #include "data/model/SparseTimeValueModel.h"
Chris@320 27 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@320 28 #include "data/model/DenseTimeValueModel.h"
Chris@320 29 #include "data/model/NoteModel.h"
Chris@441 30 #include "data/model/RegionModel.h"
Chris@320 31 #include "data/model/FFTModel.h"
Chris@320 32 #include "data/model/WaveFileModel.h"
Chris@558 33 #include "rdf/PluginRDFDescription.h"
Chris@320 34
Chris@350 35 #include "TransformFactory.h"
Chris@350 36
Chris@320 37 #include <iostream>
Chris@320 38
Chris@350 39 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@350 40 const Transform &transform) :
Chris@350 41 ModelTransformer(in, transform),
Chris@320 42 m_plugin(0),
Chris@320 43 m_descriptor(0),
Chris@320 44 m_outputFeatureNo(0)
Chris@320 45 {
Chris@350 46 // std::cerr << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId.toStdString() << ", outputName " << m_transform.getOutput().toStdString() << std::endl;
Chris@350 47
Chris@350 48 QString pluginId = transform.getPluginIdentifier();
Chris@320 49
Chris@320 50 FeatureExtractionPluginFactory *factory =
Chris@320 51 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@320 52
Chris@320 53 if (!factory) {
Chris@361 54 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
Chris@320 55 return;
Chris@320 56 }
Chris@320 57
Chris@350 58 DenseTimeValueModel *input = getConformingInput();
Chris@350 59 if (!input) {
Chris@361 60 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
Chris@350 61 return;
Chris@350 62 }
Chris@320 63
Chris@350 64 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
Chris@320 65 if (!m_plugin) {
Chris@361 66 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
Chris@320 67 return;
Chris@320 68 }
Chris@320 69
Chris@350 70 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@350 71 (m_transform, m_plugin);
Chris@343 72
Chris@350 73 TransformFactory::getInstance()->setPluginParameters
Chris@350 74 (m_transform, m_plugin);
Chris@320 75
Chris@320 76 size_t channelCount = input->getChannelCount();
Chris@320 77 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 78 channelCount = 1;
Chris@320 79 }
Chris@320 80 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@361 81 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
Chris@361 82 .arg(pluginId)
Chris@361 83 .arg(m_plugin->getMinChannelCount())
Chris@361 84 .arg(m_plugin->getMaxChannelCount())
Chris@361 85 .arg(input->getChannelCount());
Chris@320 86 return;
Chris@320 87 }
Chris@320 88
Chris@320 89 std::cerr << "Initialising feature extraction plugin with channels = "
Chris@350 90 << channelCount << ", step = " << m_transform.getStepSize()
Chris@350 91 << ", block = " << m_transform.getBlockSize() << std::endl;
Chris@320 92
Chris@320 93 if (!m_plugin->initialise(channelCount,
Chris@350 94 m_transform.getStepSize(),
Chris@350 95 m_transform.getBlockSize())) {
Chris@361 96
Chris@361 97 size_t pstep = m_transform.getStepSize();
Chris@361 98 size_t pblock = m_transform.getBlockSize();
Chris@361 99
Chris@361 100 m_transform.setStepSize(0);
Chris@361 101 m_transform.setBlockSize(0);
Chris@361 102 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@361 103 (m_transform, m_plugin);
Chris@361 104
Chris@361 105 if (m_transform.getStepSize() != pstep ||
Chris@361 106 m_transform.getBlockSize() != pblock) {
Chris@361 107
Chris@361 108 if (!m_plugin->initialise(channelCount,
Chris@361 109 m_transform.getStepSize(),
Chris@361 110 m_transform.getBlockSize())) {
Chris@361 111
Chris@361 112 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@361 113 return;
Chris@361 114
Chris@361 115 } else {
Chris@361 116
Chris@361 117 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
Chris@361 118 .arg(pluginId)
Chris@361 119 .arg(pstep)
Chris@361 120 .arg(pblock)
Chris@361 121 .arg(m_transform.getStepSize())
Chris@361 122 .arg(m_transform.getBlockSize());
Chris@361 123 }
Chris@361 124
Chris@361 125 } else {
Chris@361 126
Chris@361 127 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@361 128 return;
Chris@361 129 }
Chris@320 130 }
Chris@320 131
Chris@366 132 if (m_transform.getPluginVersion() != "") {
Chris@366 133 QString pv = QString("%1").arg(m_plugin->getPluginVersion());
Chris@366 134 if (pv != m_transform.getPluginVersion()) {
Chris@366 135 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
Chris@366 136 .arg(m_transform.getPluginVersion())
Chris@366 137 .arg(pluginId)
Chris@366 138 .arg(pv);
Chris@366 139 if (m_message != "") {
Chris@366 140 m_message = QString("%1; %2").arg(vm).arg(m_message);
Chris@366 141 } else {
Chris@366 142 m_message = vm;
Chris@366 143 }
Chris@366 144 }
Chris@366 145 }
Chris@366 146
Chris@320 147 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320 148
Chris@320 149 if (outputs.empty()) {
Chris@361 150 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
Chris@320 151 return;
Chris@320 152 }
Chris@320 153
Chris@320 154 for (size_t i = 0; i < outputs.size(); ++i) {
Chris@429 155 // std::cerr << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput().toStdString() << "\"" << std::endl;
Chris@350 156 if (m_transform.getOutput() == "" ||
Chris@350 157 outputs[i].identifier == m_transform.getOutput().toStdString()) {
Chris@320 158 m_outputFeatureNo = i;
Chris@441 159 m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]);
Chris@320 160 break;
Chris@320 161 }
Chris@320 162 }
Chris@320 163
Chris@320 164 if (!m_descriptor) {
Chris@361 165 m_message = tr("Plugin \"%1\" has no output named \"%2\"")
Chris@361 166 .arg(pluginId)
Chris@361 167 .arg(m_transform.getOutput());
Chris@320 168 return;
Chris@320 169 }
Chris@320 170
Chris@558 171 createOutputModel();
Chris@558 172 }
Chris@558 173
Chris@558 174 void
Chris@558 175 FeatureExtractionModelTransformer::createOutputModel()
Chris@558 176 {
Chris@558 177 DenseTimeValueModel *input = getConformingInput();
Chris@558 178
Chris@331 179 // std::cerr << "FeatureExtractionModelTransformer: output sample type "
Chris@320 180 // << m_descriptor->sampleType << std::endl;
Chris@320 181
Chris@558 182 PluginRDFDescription description(m_transform.getPluginIdentifier());
Chris@558 183 QString outputId = m_transform.getOutput();
Chris@558 184
Chris@320 185 int binCount = 1;
Chris@320 186 float minValue = 0.0, maxValue = 0.0;
Chris@320 187 bool haveExtents = false;
Chris@320 188
Chris@320 189 if (m_descriptor->hasFixedBinCount) {
Chris@320 190 binCount = m_descriptor->binCount;
Chris@320 191 }
Chris@320 192
Chris@331 193 // std::cerr << "FeatureExtractionModelTransformer: output bin count "
Chris@320 194 // << binCount << std::endl;
Chris@320 195
Chris@320 196 if (binCount > 0 && m_descriptor->hasKnownExtents) {
Chris@320 197 minValue = m_descriptor->minValue;
Chris@320 198 maxValue = m_descriptor->maxValue;
Chris@320 199 haveExtents = true;
Chris@320 200 }
Chris@320 201
Chris@350 202 size_t modelRate = input->getSampleRate();
Chris@320 203 size_t modelResolution = 1;
Chris@320 204
Chris@320 205 switch (m_descriptor->sampleType) {
Chris@320 206
Chris@320 207 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@320 208 if (m_descriptor->sampleRate != 0.0) {
Chris@320 209 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
Chris@320 210 }
Chris@320 211 break;
Chris@320 212
Chris@320 213 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@350 214 modelResolution = m_transform.getStepSize();
Chris@320 215 break;
Chris@320 216
Chris@320 217 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@451 218 //!!! SV doesn't actually support display of models that have
Chris@451 219 //!!! different underlying rates together -- so we always set
Chris@451 220 //!!! the model rate to be the input model's rate, and adjust
Chris@451 221 //!!! the resolution appropriately. We can't properly display
Chris@451 222 //!!! data with a higher resolution than the base model at all
Chris@451 223 // modelRate = size_t(m_descriptor->sampleRate + 0.001);
Chris@451 224 if (m_descriptor->sampleRate > input->getSampleRate()) {
Chris@451 225 modelResolution = 1;
Chris@451 226 } else {
Chris@451 227 modelResolution = size_t(input->getSampleRate() /
Chris@451 228 m_descriptor->sampleRate);
Chris@451 229 }
Chris@320 230 break;
Chris@320 231 }
Chris@320 232
Chris@441 233 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
Chris@441 234
Chris@441 235 if (binCount == 0 &&
Chris@441 236 (preDurationPlugin || !m_descriptor->hasDuration)) {
Chris@320 237
Chris@445 238 // Anything with no value and no duration is an instant
Chris@445 239
Chris@320 240 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
Chris@320 241 false);
Chris@320 242
Chris@558 243 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 244 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 245
Chris@441 246 } else if ((preDurationPlugin && binCount > 1 &&
Chris@441 247 (m_descriptor->sampleType ==
Chris@441 248 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
Chris@441 249 (!preDurationPlugin && m_descriptor->hasDuration)) {
Chris@441 250
Chris@441 251 // For plugins using the old v1 API without explicit duration,
Chris@441 252 // we treat anything that has multiple bins (i.e. that has the
Chris@441 253 // potential to have value and duration) and a variable sample
Chris@441 254 // rate as a note model, taking its values as pitch, duration
Chris@441 255 // and velocity (if present) respectively. This is the same
Chris@441 256 // behaviour as always applied by SV to these plugins in the
Chris@441 257 // past.
Chris@441 258
Chris@441 259 // For plugins with the newer API, we treat anything with
Chris@441 260 // duration as either a note model with pitch and velocity, or
Chris@441 261 // a region model.
Chris@441 262
Chris@441 263 // How do we know whether it's an interval or note model?
Chris@441 264 // What's the essential difference? Is a note model any
Chris@441 265 // interval model using a Hz or "MIDI pitch" scale? There
Chris@441 266 // isn't really a reliable test for "MIDI pitch"... Does a
Chris@441 267 // note model always have velocity? This is a good question
Chris@441 268 // to be addressed by accompanying RDF, but for the moment we
Chris@441 269 // will do the following...
Chris@441 270
Chris@441 271 bool isNoteModel = false;
Chris@441 272
Chris@441 273 // Regions have only value (and duration -- we can't extract a
Chris@441 274 // region model from an old-style plugin that doesn't support
Chris@441 275 // duration)
Chris@441 276 if (binCount > 1) isNoteModel = true;
Chris@441 277
Chris@595 278 // Regions do not have units of Hz or MIDI things (a sweeping
Chris@595 279 // assumption!)
Chris@595 280 if (m_descriptor->unit == "Hz" ||
Chris@595 281 m_descriptor->unit.find("MIDI") != std::string::npos ||
Chris@595 282 m_descriptor->unit.find("midi") != std::string::npos) {
Chris@595 283 isNoteModel = true;
Chris@595 284 }
Chris@441 285
Chris@441 286 // If we had a "sparse 3D model", we would have the additional
Chris@441 287 // problem of determining whether to use that here (if bin
Chris@441 288 // count > 1). But we don't.
Chris@441 289
Chris@441 290 if (isNoteModel) {
Chris@441 291
Chris@441 292 NoteModel *model;
Chris@441 293 if (haveExtents) {
Chris@441 294 model = new NoteModel
Chris@441 295 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 296 } else {
Chris@441 297 model = new NoteModel
Chris@441 298 (modelRate, modelResolution, false);
Chris@441 299 }
Chris@441 300 model->setScaleUnits(m_descriptor->unit.c_str());
Chris@441 301 m_output = model;
Chris@441 302
Chris@441 303 } else {
Chris@441 304
Chris@441 305 RegionModel *model;
Chris@441 306 if (haveExtents) {
Chris@441 307 model = new RegionModel
Chris@441 308 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441 309 } else {
Chris@441 310 model = new RegionModel
Chris@441 311 (modelRate, modelResolution, false);
Chris@441 312 }
Chris@441 313 model->setScaleUnits(m_descriptor->unit.c_str());
Chris@441 314 m_output = model;
Chris@441 315 }
Chris@441 316
Chris@558 317 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 318 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 319
Chris@441 320 } else if (binCount == 1 ||
Chris@441 321 (m_descriptor->sampleType ==
Chris@441 322 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
Chris@441 323
Chris@441 324 // Anything that is not a 1D, note, or interval model and that
Chris@441 325 // has only one value per result must be a sparse time value
Chris@441 326 // model.
Chris@441 327
Chris@441 328 // Anything that is not a 1D, note, or interval model and that
Chris@441 329 // has a variable sample rate is also treated as a sparse time
Chris@441 330 // value model regardless of its bin count, because we lack a
Chris@441 331 // sparse 3D model.
Chris@320 332
Chris@320 333 SparseTimeValueModel *model;
Chris@320 334 if (haveExtents) {
Chris@320 335 model = new SparseTimeValueModel
Chris@320 336 (modelRate, modelResolution, minValue, maxValue, false);
Chris@320 337 } else {
Chris@320 338 model = new SparseTimeValueModel
Chris@320 339 (modelRate, modelResolution, false);
Chris@320 340 }
Chris@558 341
Chris@558 342 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320 343 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
Chris@320 344
Chris@320 345 m_output = model;
Chris@320 346
Chris@558 347 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@558 348 m_output->setRDFTypeURI(outputEventTypeURI);
Chris@558 349
Chris@441 350 } else {
Chris@320 351
Chris@441 352 // Anything that is not a 1D, note, or interval model and that
Chris@441 353 // has a fixed sample rate and more than one value per result
Chris@441 354 // must be a dense 3D model.
Chris@320 355
Chris@320 356 EditableDenseThreeDimensionalModel *model =
Chris@320 357 new EditableDenseThreeDimensionalModel
Chris@535 358 (modelRate, modelResolution, binCount,
Chris@535 359 EditableDenseThreeDimensionalModel::BasicMultirateCompression,
Chris@535 360 false);
Chris@320 361
Chris@320 362 if (!m_descriptor->binNames.empty()) {
Chris@320 363 std::vector<QString> names;
Chris@320 364 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
Chris@320 365 names.push_back(m_descriptor->binNames[i].c_str());
Chris@320 366 }
Chris@320 367 model->setBinNames(names);
Chris@320 368 }
Chris@320 369
Chris@320 370 m_output = model;
Chris@558 371
Chris@558 372 QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
Chris@558 373 m_output->setRDFTypeURI(outputSignalTypeURI);
Chris@320 374 }
Chris@333 375
Chris@350 376 if (m_output) m_output->setSourceModel(input);
Chris@320 377 }
Chris@320 378
Chris@331 379 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()
Chris@320 380 {
Chris@436 381 // std::cerr << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << std::endl;
Chris@320 382 delete m_plugin;
Chris@320 383 delete m_descriptor;
Chris@320 384 }
Chris@320 385
Chris@320 386 DenseTimeValueModel *
Chris@350 387 FeatureExtractionModelTransformer::getConformingInput()
Chris@320 388 {
Chris@408 389 // std::cerr << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << std::endl;
Chris@408 390
Chris@320 391 DenseTimeValueModel *dtvm =
Chris@320 392 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@320 393 if (!dtvm) {
Chris@350 394 std::cerr << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
Chris@320 395 }
Chris@320 396 return dtvm;
Chris@320 397 }
Chris@320 398
Chris@320 399 void
Chris@331 400 FeatureExtractionModelTransformer::run()
Chris@320 401 {
Chris@350 402 DenseTimeValueModel *input = getConformingInput();
Chris@320 403 if (!input) return;
Chris@320 404
Chris@320 405 if (!m_output) return;
Chris@320 406
Chris@497 407 while (!input->isReady() && !m_abandoned) {
Chris@331 408 std::cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << std::endl;
Chris@497 409 usleep(500000);
Chris@320 410 }
Chris@497 411 if (m_abandoned) return;
Chris@320 412
Chris@350 413 size_t sampleRate = input->getSampleRate();
Chris@320 414
Chris@320 415 size_t channelCount = input->getChannelCount();
Chris@320 416 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320 417 channelCount = 1;
Chris@320 418 }
Chris@320 419
Chris@320 420 float **buffers = new float*[channelCount];
Chris@320 421 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 422 buffers[ch] = new float[m_transform.getBlockSize() + 2];
Chris@320 423 }
Chris@320 424
Chris@350 425 size_t stepSize = m_transform.getStepSize();
Chris@350 426 size_t blockSize = m_transform.getBlockSize();
Chris@350 427
Chris@320 428 bool frequencyDomain = (m_plugin->getInputDomain() ==
Chris@320 429 Vamp::Plugin::FrequencyDomain);
Chris@320 430 std::vector<FFTModel *> fftModels;
Chris@320 431
Chris@320 432 if (frequencyDomain) {
Chris@320 433 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 434 FFTModel *model = new FFTModel
Chris@350 435 (getConformingInput(),
Chris@350 436 channelCount == 1 ? m_input.getChannel() : ch,
Chris@350 437 m_transform.getWindowType(),
Chris@350 438 blockSize,
Chris@350 439 stepSize,
Chris@350 440 blockSize,
Chris@334 441 false,
Chris@334 442 StorageAdviser::PrecisionCritical);
Chris@320 443 if (!model->isOK()) {
Chris@320 444 delete model;
Chris@320 445 setCompletion(100);
Chris@387 446 //!!! need a better way to handle this -- previously we were using a QMessageBox but that isn't an appropriate thing to do here either
Chris@387 447 throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer");
Chris@320 448 }
Chris@320 449 model->resume();
Chris@320 450 fftModels.push_back(model);
Chris@320 451 }
Chris@320 452 }
Chris@320 453
Chris@350 454 long startFrame = m_input.getModel()->getStartFrame();
Chris@350 455 long endFrame = m_input.getModel()->getEndFrame();
Chris@320 456
Chris@350 457 RealTime contextStartRT = m_transform.getStartTime();
Chris@350 458 RealTime contextDurationRT = m_transform.getDuration();
Chris@350 459
Chris@350 460 long contextStart =
Chris@350 461 RealTime::realTime2Frame(contextStartRT, sampleRate);
Chris@350 462
Chris@350 463 long contextDuration =
Chris@350 464 RealTime::realTime2Frame(contextDurationRT, sampleRate);
Chris@320 465
Chris@320 466 if (contextStart == 0 || contextStart < startFrame) {
Chris@320 467 contextStart = startFrame;
Chris@320 468 }
Chris@320 469
Chris@320 470 if (contextDuration == 0) {
Chris@320 471 contextDuration = endFrame - contextStart;
Chris@320 472 }
Chris@320 473 if (contextStart + contextDuration > endFrame) {
Chris@320 474 contextDuration = endFrame - contextStart;
Chris@320 475 }
Chris@320 476
Chris@320 477 long blockFrame = contextStart;
Chris@320 478
Chris@320 479 long prevCompletion = 0;
Chris@320 480
Chris@320 481 setCompletion(0);
Chris@320 482
Chris@556 483 float *reals = 0;
Chris@556 484 float *imaginaries = 0;
Chris@556 485 if (frequencyDomain) {
Chris@556 486 reals = new float[blockSize/2 + 1];
Chris@556 487 imaginaries = new float[blockSize/2 + 1];
Chris@556 488 }
Chris@556 489
Chris@320 490 while (!m_abandoned) {
Chris@320 491
Chris@320 492 if (frequencyDomain) {
Chris@350 493 if (blockFrame - int(blockSize)/2 >
Chris@320 494 contextStart + contextDuration) break;
Chris@320 495 } else {
Chris@320 496 if (blockFrame >=
Chris@320 497 contextStart + contextDuration) break;
Chris@320 498 }
Chris@320 499
Chris@331 500 // std::cerr << "FeatureExtractionModelTransformer::run: blockFrame "
Chris@320 501 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
Chris@350 502 // << blockSize << std::endl;
Chris@320 503
Chris@320 504 long completion =
Chris@350 505 (((blockFrame - contextStart) / stepSize) * 99) /
Chris@557 506 (contextDuration / stepSize + 1);
Chris@320 507
Chris@350 508 // channelCount is either m_input.getModel()->channelCount or 1
Chris@320 509
Chris@363 510 if (frequencyDomain) {
Chris@363 511 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350 512 int column = (blockFrame - startFrame) / stepSize;
Chris@556 513 fftModels[ch]->getValuesAt(column, reals, imaginaries);
Chris@350 514 for (size_t i = 0; i <= blockSize/2; ++i) {
Chris@556 515 buffers[ch][i*2] = reals[i];
Chris@556 516 buffers[ch][i*2+1] = imaginaries[i];
Chris@320 517 }
Chris@363 518 }
Chris@363 519 } else {
Chris@363 520 getFrames(channelCount, blockFrame, blockSize, buffers);
Chris@320 521 }
Chris@320 522
Chris@497 523 if (m_abandoned) break;
Chris@497 524
Chris@320 525 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@320 526 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@320 527
Chris@497 528 if (m_abandoned) break;
Chris@497 529
Chris@320 530 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@320 531 Vamp::Plugin::Feature feature =
Chris@320 532 features[m_outputFeatureNo][fi];
Chris@320 533 addFeature(blockFrame, feature);
Chris@320 534 }
Chris@320 535
Chris@320 536 if (blockFrame == contextStart || completion > prevCompletion) {
Chris@320 537 setCompletion(completion);
Chris@320 538 prevCompletion = completion;
Chris@320 539 }
Chris@320 540
Chris@350 541 blockFrame += stepSize;
Chris@320 542 }
Chris@320 543
Chris@497 544 if (!m_abandoned) {
Chris@497 545 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@320 546
Chris@497 547 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
Chris@497 548 Vamp::Plugin::Feature feature =
Chris@497 549 features[m_outputFeatureNo][fi];
Chris@497 550 addFeature(blockFrame, feature);
Chris@497 551 }
Chris@497 552 }
Chris@320 553
Chris@497 554 setCompletion(100);
Chris@320 555
Chris@320 556 if (frequencyDomain) {
Chris@320 557 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320 558 delete fftModels[ch];
Chris@320 559 }
Chris@556 560 delete[] reals;
Chris@556 561 delete[] imaginaries;
Chris@320 562 }
Chris@320 563 }
Chris@320 564
Chris@320 565 void
Chris@363 566 FeatureExtractionModelTransformer::getFrames(int channelCount,
Chris@363 567 long startFrame, long size,
Chris@363 568 float **buffers)
Chris@320 569 {
Chris@320 570 long offset = 0;
Chris@320 571
Chris@320 572 if (startFrame < 0) {
Chris@363 573 for (int c = 0; c < channelCount; ++c) {
Chris@363 574 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@363 575 buffers[c][i] = 0.0f;
Chris@363 576 }
Chris@320 577 }
Chris@320 578 offset = -startFrame;
Chris@320 579 size -= offset;
Chris@320 580 if (size <= 0) return;
Chris@320 581 startFrame = 0;
Chris@320 582 }
Chris@320 583
Chris@350 584 DenseTimeValueModel *input = getConformingInput();
Chris@350 585 if (!input) return;
Chris@363 586
Chris@363 587 long got = 0;
Chris@350 588
Chris@363 589 if (channelCount == 1) {
Chris@363 590
Chris@363 591 got = input->getData(m_input.getChannel(), startFrame, size,
Chris@363 592 buffers[0] + offset);
Chris@363 593
Chris@363 594 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
Chris@363 595 // use mean instead of sum, as plugin input
Chris@363 596 float cc = float(input->getChannelCount());
Chris@363 597 for (long i = 0; i < size; ++i) {
Chris@363 598 buffers[0][i + offset] /= cc;
Chris@363 599 }
Chris@363 600 }
Chris@363 601
Chris@363 602 } else {
Chris@363 603
Chris@363 604 float **writebuf = buffers;
Chris@363 605 if (offset > 0) {
Chris@363 606 writebuf = new float *[channelCount];
Chris@363 607 for (int i = 0; i < channelCount; ++i) {
Chris@363 608 writebuf[i] = buffers[i] + offset;
Chris@363 609 }
Chris@363 610 }
Chris@363 611
Chris@363 612 got = input->getData(0, channelCount-1, startFrame, size, writebuf);
Chris@363 613
Chris@363 614 if (writebuf != buffers) delete[] writebuf;
Chris@363 615 }
Chris@320 616
Chris@320 617 while (got < size) {
Chris@363 618 for (int c = 0; c < channelCount; ++c) {
Chris@363 619 buffers[c][got + offset] = 0.0;
Chris@363 620 }
Chris@320 621 ++got;
Chris@320 622 }
Chris@320 623 }
Chris@320 624
Chris@320 625 void
Chris@331 626 FeatureExtractionModelTransformer::addFeature(size_t blockFrame,
Chris@320 627 const Vamp::Plugin::Feature &feature)
Chris@320 628 {
Chris@350 629 size_t inputRate = m_input.getModel()->getSampleRate();
Chris@320 630
Chris@331 631 // std::cerr << "FeatureExtractionModelTransformer::addFeature("
Chris@320 632 // << blockFrame << ")" << std::endl;
Chris@320 633
Chris@320 634 int binCount = 1;
Chris@320 635 if (m_descriptor->hasFixedBinCount) {
Chris@320 636 binCount = m_descriptor->binCount;
Chris@320 637 }
Chris@320 638
Chris@320 639 size_t frame = blockFrame;
Chris@320 640
Chris@320 641 if (m_descriptor->sampleType ==
Chris@320 642 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@320 643
Chris@320 644 if (!feature.hasTimestamp) {
Chris@320 645 std::cerr
Chris@331 646 << "WARNING: FeatureExtractionModelTransformer::addFeature: "
Chris@320 647 << "Feature has variable sample rate but no timestamp!"
Chris@320 648 << std::endl;
Chris@320 649 return;
Chris@320 650 } else {
Chris@320 651 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@320 652 }
Chris@320 653
Chris@320 654 } else if (m_descriptor->sampleType ==
Chris@320 655 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@320 656
Chris@320 657 if (feature.hasTimestamp) {
Chris@320 658 //!!! warning: sampleRate may be non-integral
Chris@320 659 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
Chris@451 660 //!!! see comment above when setting up modelResolution and modelRate
Chris@451 661 // lrintf(m_descriptor->sampleRate));
Chris@451 662 inputRate);
Chris@320 663 } else {
Chris@320 664 frame = m_output->getEndFrame();
Chris@320 665 }
Chris@320 666 }
Chris@320 667
Chris@441 668 // Rather than repeat the complicated tests from the constructor
Chris@441 669 // to determine what sort of model we must be adding the features
Chris@441 670 // to, we instead test what sort of model the constructor decided
Chris@441 671 // to create.
Chris@320 672
Chris@441 673 if (isOutput<SparseOneDimensionalModel>()) {
Chris@441 674
Chris@441 675 SparseOneDimensionalModel *model =
Chris@350 676 getConformingOutput<SparseOneDimensionalModel>();
Chris@320 677 if (!model) return;
Chris@350 678
Chris@441 679 model->addPoint(SparseOneDimensionalModel::Point
Chris@441 680 (frame, feature.label.c_str()));
Chris@320 681
Chris@441 682 } else if (isOutput<SparseTimeValueModel>()) {
Chris@320 683
Chris@350 684 SparseTimeValueModel *model =
Chris@350 685 getConformingOutput<SparseTimeValueModel>();
Chris@320 686 if (!model) return;
Chris@350 687
Chris@454 688 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 689
Chris@454 690 float value = feature.values[i];
Chris@454 691
Chris@454 692 QString label = feature.label.c_str();
Chris@454 693 if (feature.values.size() > 1) {
Chris@454 694 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 695 }
Chris@454 696
Chris@454 697 model->addPoint(SparseTimeValueModel::Point(frame, value, label));
Chris@454 698 }
Chris@320 699
Chris@441 700 } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) {
Chris@320 701
Chris@441 702 int index = 0;
Chris@441 703
Chris@441 704 float value = 0.0;
Chris@441 705 if (feature.values.size() > index) {
Chris@441 706 value = feature.values[index++];
Chris@441 707 }
Chris@320 708
Chris@320 709 float duration = 1;
Chris@441 710 if (feature.hasDuration) {
Chris@441 711 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
Chris@441 712 } else {
Chris@441 713 if (feature.values.size() > index) {
Chris@441 714 duration = feature.values[index++];
Chris@441 715 }
Chris@441 716 }
Chris@320 717
Chris@441 718 if (isOutput<NoteModel>()) {
Chris@320 719
Chris@441 720 float velocity = 100;
Chris@441 721 if (feature.values.size() > index) {
Chris@441 722 velocity = feature.values[index++];
Chris@441 723 }
Chris@441 724 if (velocity < 0) velocity = 127;
Chris@441 725 if (velocity > 127) velocity = 127;
Chris@320 726
Chris@441 727 NoteModel *model = getConformingOutput<NoteModel>();
Chris@441 728 if (!model) return;
Chris@441 729 model->addPoint(NoteModel::Point(frame, value, // value is pitch
Chris@441 730 lrintf(duration),
Chris@441 731 velocity / 127.f,
Chris@441 732 feature.label.c_str()));
Chris@441 733 } else {
Chris@441 734 RegionModel *model = getConformingOutput<RegionModel>();
Chris@454 735 if (!model) return;
Chris@454 736
Chris@474 737 if (feature.hasDuration && !feature.values.empty()) {
Chris@454 738
Chris@454 739 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454 740
Chris@454 741 float value = feature.values[i];
Chris@454 742
Chris@454 743 QString label = feature.label.c_str();
Chris@454 744 if (feature.values.size() > 1) {
Chris@454 745 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454 746 }
Chris@454 747
Chris@454 748 model->addPoint(RegionModel::Point(frame, value,
Chris@454 749 lrintf(duration),
Chris@454 750 label));
Chris@454 751 }
Chris@454 752 } else {
Chris@454 753
Chris@441 754 model->addPoint(RegionModel::Point(frame, value,
Chris@441 755 lrintf(duration),
Chris@441 756 feature.label.c_str()));
Chris@454 757 }
Chris@441 758 }
Chris@320 759
Chris@441 760 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
Chris@320 761
Chris@533 762 DenseThreeDimensionalModel::Column values =
Chris@533 763 DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
Chris@320 764
Chris@320 765 EditableDenseThreeDimensionalModel *model =
Chris@350 766 getConformingOutput<EditableDenseThreeDimensionalModel>();
Chris@320 767 if (!model) return;
Chris@320 768
Chris@320 769 model->setColumn(frame / model->getResolution(), values);
Chris@441 770
Chris@441 771 } else {
Chris@441 772 std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl;
Chris@320 773 }
Chris@320 774 }
Chris@320 775
Chris@320 776 void
Chris@331 777 FeatureExtractionModelTransformer::setCompletion(int completion)
Chris@320 778 {
Chris@320 779 int binCount = 1;
Chris@320 780 if (m_descriptor->hasFixedBinCount) {
Chris@320 781 binCount = m_descriptor->binCount;
Chris@320 782 }
Chris@320 783
Chris@331 784 // std::cerr << "FeatureExtractionModelTransformer::setCompletion("
Chris@320 785 // << completion << ")" << std::endl;
Chris@320 786
Chris@441 787 if (isOutput<SparseOneDimensionalModel>()) {
Chris@320 788
Chris@350 789 SparseOneDimensionalModel *model =
Chris@350 790 getConformingOutput<SparseOneDimensionalModel>();
Chris@320 791 if (!model) return;
Chris@441 792 model->setCompletion(completion, true);
Chris@320 793
Chris@441 794 } else if (isOutput<SparseTimeValueModel>()) {
Chris@320 795
Chris@350 796 SparseTimeValueModel *model =
Chris@350 797 getConformingOutput<SparseTimeValueModel>();
Chris@320 798 if (!model) return;
Chris@441 799 model->setCompletion(completion, true);
Chris@320 800
Chris@441 801 } else if (isOutput<NoteModel>()) {
Chris@320 802
Chris@441 803 NoteModel *model = getConformingOutput<NoteModel>();
Chris@320 804 if (!model) return;
Chris@441 805 model->setCompletion(completion, true);
Chris@320 806
Chris@441 807 } else if (isOutput<RegionModel>()) {
Chris@441 808
Chris@441 809 RegionModel *model = getConformingOutput<RegionModel>();
Chris@441 810 if (!model) return;
Chris@441 811 model->setCompletion(completion, true);
Chris@441 812
Chris@441 813 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
Chris@320 814
Chris@320 815 EditableDenseThreeDimensionalModel *model =
Chris@350 816 getConformingOutput<EditableDenseThreeDimensionalModel>();
Chris@320 817 if (!model) return;
Chris@350 818 model->setCompletion(completion, true); //!!!m_context.updates);
Chris@320 819 }
Chris@320 820 }
Chris@320 821