annotate runner/FeatureExtractionManager.cpp @ 97:54565c08c197 start-duration

Add multi-output start-duration test (with different start and duration per output!), and clear out data from output files for tests that are not yet implemented so they don't accidentally succeed
author Chris Cannam
date Wed, 01 Oct 2014 09:27:21 +0100
parents 03b1d83fca29
children 136d8496a4b8 526feaad5820
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Annotator
Chris@0 5 A utility for batch feature extraction from audio files.
Chris@0 6 Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
Chris@0 7 Copyright 2007-2008 QMUL.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@0 16 #include "FeatureExtractionManager.h"
Chris@0 17
Chris@0 18 #include <vamp-hostsdk/PluginChannelAdapter.h>
Chris@0 19 #include <vamp-hostsdk/PluginBufferingAdapter.h>
Chris@0 20 #include <vamp-hostsdk/PluginInputDomainAdapter.h>
Chris@0 21 #include <vamp-hostsdk/PluginSummarisingAdapter.h>
Chris@8 22 #include <vamp-hostsdk/PluginWrapper.h>
Chris@0 23 #include <vamp-hostsdk/PluginLoader.h>
Chris@0 24
Chris@21 25 #include "base/Exceptions.h"
Chris@21 26
Chris@0 27 #include <iostream>
Chris@0 28
Chris@0 29 using namespace std;
Chris@0 30
Chris@0 31 using Vamp::Plugin;
Chris@0 32 using Vamp::PluginBase;
Chris@0 33 using Vamp::HostExt::PluginLoader;
Chris@0 34 using Vamp::HostExt::PluginChannelAdapter;
Chris@0 35 using Vamp::HostExt::PluginBufferingAdapter;
Chris@0 36 using Vamp::HostExt::PluginInputDomainAdapter;
Chris@0 37 using Vamp::HostExt::PluginSummarisingAdapter;
Chris@8 38 using Vamp::HostExt::PluginWrapper;
Chris@0 39
Chris@0 40 #include "data/fileio/FileSource.h"
Chris@0 41 #include "data/fileio/AudioFileReader.h"
Chris@0 42 #include "data/fileio/AudioFileReaderFactory.h"
Chris@0 43 #include "data/fileio/PlaylistFileReader.h"
Chris@0 44 #include "base/TempDirectory.h"
Chris@0 45 #include "base/ProgressPrinter.h"
Chris@0 46 #include "transform/TransformFactory.h"
Chris@0 47 #include "rdf/RDFTransformFactory.h"
Chris@0 48 #include "transform/FeatureWriter.h"
Chris@0 49
Chris@0 50 #include <QTextStream>
Chris@0 51 #include <QFile>
Chris@0 52 #include <QFileInfo>
Chris@0 53
Chris@0 54 FeatureExtractionManager::FeatureExtractionManager() :
Chris@0 55 m_summariesOnly(false),
Chris@0 56 // We can read using an arbitrary fixed block size --
Chris@0 57 // PluginBufferingAdapter handles this for us. It's likely to be
Chris@0 58 // quicker to use larger sizes than smallish ones like 1024
Chris@0 59 m_blockSize(16384),
Chris@0 60 m_defaultSampleRate(0),
Chris@0 61 m_sampleRate(0),
Chris@45 62 m_channels(0)
Chris@0 63 {
Chris@0 64 }
Chris@0 65
Chris@0 66 FeatureExtractionManager::~FeatureExtractionManager()
Chris@0 67 {
Chris@0 68 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 69 pi != m_plugins.end(); ++pi) {
Chris@0 70 delete pi->first;
Chris@0 71 }
Chris@45 72 foreach (AudioFileReader *r, m_readyReaders) {
Chris@45 73 delete r;
Chris@45 74 }
Chris@0 75 }
Chris@0 76
Chris@0 77 void FeatureExtractionManager::setChannels(int channels)
Chris@0 78 {
Chris@0 79 m_channels = channels;
Chris@0 80 }
Chris@0 81
Chris@0 82 void FeatureExtractionManager::setDefaultSampleRate(int sampleRate)
Chris@0 83 {
Chris@0 84 m_defaultSampleRate = sampleRate;
Chris@0 85 }
Chris@0 86
Chris@0 87 static PluginSummarisingAdapter::SummaryType
Chris@0 88 getSummaryType(string name)
Chris@0 89 {
Chris@0 90 if (name == "min") return PluginSummarisingAdapter::Minimum;
Chris@0 91 if (name == "max") return PluginSummarisingAdapter::Maximum;
Chris@0 92 if (name == "mean") return PluginSummarisingAdapter::Mean;
Chris@0 93 if (name == "median") return PluginSummarisingAdapter::Median;
Chris@0 94 if (name == "mode") return PluginSummarisingAdapter::Mode;
Chris@0 95 if (name == "sum") return PluginSummarisingAdapter::Sum;
Chris@0 96 if (name == "variance") return PluginSummarisingAdapter::Variance;
Chris@0 97 if (name == "sd") return PluginSummarisingAdapter::StandardDeviation;
Chris@0 98 if (name == "count") return PluginSummarisingAdapter::Count;
Chris@0 99 return PluginSummarisingAdapter::UnknownSummaryType;
Chris@0 100 }
Chris@0 101
Chris@0 102 bool FeatureExtractionManager::setSummaryTypes(const set<string> &names,
Chris@0 103 bool summariesOnly,
Chris@0 104 const PluginSummarisingAdapter::SegmentBoundaries &boundaries)
Chris@0 105 {
Chris@0 106 for (SummaryNameSet::const_iterator i = names.begin();
Chris@0 107 i != names.end(); ++i) {
Chris@0 108 if (getSummaryType(*i) == PluginSummarisingAdapter::UnknownSummaryType) {
Chris@0 109 cerr << "ERROR: Unknown summary type \"" << *i << "\"" << endl;
Chris@0 110 return false;
Chris@0 111 }
Chris@0 112 }
Chris@0 113 m_summaries = names;
Chris@0 114 m_summariesOnly = summariesOnly;
Chris@0 115 m_boundaries = boundaries;
Chris@0 116 return true;
Chris@0 117 }
Chris@0 118
Chris@51 119 static PluginInputDomainAdapter::WindowType
Chris@51 120 convertWindowType(WindowType t)
Chris@51 121 {
Chris@51 122 switch (t) {
Chris@51 123 case RectangularWindow:
Chris@51 124 return PluginInputDomainAdapter::RectangularWindow;
Chris@51 125 case BartlettWindow:
Chris@51 126 return PluginInputDomainAdapter::BartlettWindow;
Chris@51 127 case HammingWindow:
Chris@51 128 return PluginInputDomainAdapter::HammingWindow;
Chris@51 129 case HanningWindow:
Chris@51 130 return PluginInputDomainAdapter::HanningWindow;
Chris@51 131 case BlackmanWindow:
Chris@51 132 return PluginInputDomainAdapter::BlackmanWindow;
Chris@51 133 case NuttallWindow:
Chris@51 134 return PluginInputDomainAdapter::NuttallWindow;
Chris@51 135 case BlackmanHarrisWindow:
Chris@51 136 return PluginInputDomainAdapter::BlackmanHarrisWindow;
Chris@51 137 default:
Chris@51 138 cerr << "ERROR: Unknown or unsupported window type \"" << t << "\", using Hann (\"" << HanningWindow << "\")" << endl;
Chris@51 139 return PluginInputDomainAdapter::HanningWindow;
Chris@51 140 }
Chris@51 141 }
Chris@51 142
Chris@0 143 bool FeatureExtractionManager::addFeatureExtractor
Chris@0 144 (Transform transform, const vector<FeatureWriter*> &writers)
Chris@0 145 {
Chris@0 146 //!!! exceptions rather than return values?
Chris@0 147
Chris@0 148 if (transform.getSampleRate() == 0) {
Chris@0 149 if (m_sampleRate == 0) {
Chris@0 150 cerr << "NOTE: Transform does not specify a sample rate, using default rate of " << m_defaultSampleRate << endl;
Chris@0 151 transform.setSampleRate(m_defaultSampleRate);
Chris@0 152 m_sampleRate = m_defaultSampleRate;
Chris@0 153 } else {
Chris@0 154 cerr << "NOTE: Transform does not specify a sample rate, using previous transform's rate of " << m_sampleRate << endl;
Chris@0 155 transform.setSampleRate(m_sampleRate);
Chris@0 156 }
Chris@0 157 }
Chris@0 158
Chris@0 159 if (m_sampleRate == 0) {
Chris@0 160 m_sampleRate = transform.getSampleRate();
Chris@0 161 }
Chris@0 162
Chris@0 163 if (transform.getSampleRate() != m_sampleRate) {
Chris@0 164 cerr << "WARNING: Transform sample rate " << transform.getSampleRate() << " does not match previously specified transform rate of " << m_sampleRate << " -- only a single rate is supported for each run" << endl;
Chris@0 165 cerr << "WARNING: Using previous rate of " << m_sampleRate << " for this transform as well" << endl;
Chris@0 166 transform.setSampleRate(m_sampleRate);
Chris@0 167 }
Chris@0 168
Chris@0 169 Plugin *plugin = 0;
Chris@0 170
Chris@0 171 // Remember what the original transform looked like, and index
Chris@0 172 // based on this -- because we may be about to fill in the zeros
Chris@0 173 // for step and block size, but we want any further copies with
Chris@0 174 // the same zeros to match this one
Chris@0 175 Transform originalTransform = transform;
Chris@0 176
Chris@0 177 if (m_transformPluginMap.find(transform) == m_transformPluginMap.end()) {
Chris@0 178
Chris@0 179 // Test whether we already have a transform that is identical
Chris@0 180 // to this, except for the output requested and/or the summary
Chris@0 181 // type -- if so, they should share plugin instances (a vital
Chris@0 182 // optimisation)
Chris@0 183
Chris@0 184 for (TransformPluginMap::iterator i = m_transformPluginMap.begin();
Chris@0 185 i != m_transformPluginMap.end(); ++i) {
Chris@0 186 Transform test = i->first;
Chris@0 187 test.setOutput(transform.getOutput());
Chris@0 188 test.setSummaryType(transform.getSummaryType());
Chris@0 189 if (transform == test) {
Chris@0 190 cerr << "NOTE: Already have transform identical to this one (for \""
Chris@0 191 << transform.getIdentifier().toStdString()
Chris@0 192 << "\") in every detail except output identifier and/or "
Chris@0 193 << "summary type; sharing its plugin instance" << endl;
Chris@0 194 plugin = i->second;
Chris@0 195 if (transform.getSummaryType() != Transform::NoSummary &&
Chris@0 196 !dynamic_cast<PluginSummarisingAdapter *>(plugin)) {
Chris@0 197 plugin = new PluginSummarisingAdapter(plugin);
Chris@0 198 i->second = plugin;
Chris@0 199 }
Chris@0 200 break;
Chris@0 201 }
Chris@0 202 }
Chris@0 203
Chris@0 204 if (!plugin) {
Chris@0 205
Chris@0 206 TransformFactory *tf = TransformFactory::getInstance();
Chris@0 207
Chris@0 208 PluginBase *pb = tf->instantiatePluginFor(transform);
Chris@0 209 plugin = tf->downcastVampPlugin(pb);
Chris@0 210 if (!plugin) {
Chris@0 211 //!!! todo: handle non-Vamp plugins too, or make the main --list
Chris@0 212 // option print out only Vamp transforms
Chris@0 213 cerr << "ERROR: Failed to load plugin for transform \""
Chris@0 214 << transform.getIdentifier().toStdString() << "\"" << endl;
Chris@0 215 delete pb;
Chris@0 216 return false;
Chris@0 217 }
Chris@0 218
Chris@0 219 // We will provide the plugin with arbitrary step and
Chris@0 220 // block sizes (so that we can use the same read/write
Chris@0 221 // block size for all transforms), and to that end we use
Chris@0 222 // a PluginBufferingAdapter. However, we need to know the
Chris@0 223 // underlying step size so that we can provide the right
Chris@0 224 // context for dense outputs. (Although, don't forget
Chris@0 225 // that the PluginBufferingAdapter rewrites
Chris@0 226 // OneSamplePerStep outputs so as to use FixedSampleRate
Chris@0 227 // -- so it supplies the sample rate in the output
Chris@0 228 // feature. I'm not sure whether we can easily use that.)
Chris@0 229
Chris@0 230 size_t pluginStepSize = plugin->getPreferredStepSize();
Chris@0 231 size_t pluginBlockSize = plugin->getPreferredBlockSize();
Chris@0 232
Chris@25 233 PluginInputDomainAdapter *pida = 0;
Chris@25 234
Chris@0 235 // adapt the plugin for buffering, channels, etc.
Chris@0 236 if (plugin->getInputDomain() == Plugin::FrequencyDomain) {
Chris@51 237
Chris@25 238 pida = new PluginInputDomainAdapter(plugin);
Chris@26 239 pida->setProcessTimestampMethod(PluginInputDomainAdapter::ShiftData);
Chris@51 240
Chris@51 241 PluginInputDomainAdapter::WindowType wtype =
Chris@51 242 convertWindowType(transform.getWindowType());
Chris@51 243 pida->setWindowType(wtype);
Chris@25 244 plugin = pida;
Chris@0 245 }
Chris@0 246
Chris@0 247 PluginBufferingAdapter *pba = new PluginBufferingAdapter(plugin);
Chris@0 248 plugin = pba;
Chris@0 249
Chris@0 250 if (transform.getStepSize() != 0) {
Chris@0 251 pba->setPluginStepSize(transform.getStepSize());
Chris@0 252 } else {
Chris@0 253 transform.setStepSize(pluginStepSize);
Chris@0 254 }
Chris@0 255
Chris@0 256 if (transform.getBlockSize() != 0) {
Chris@0 257 pba->setPluginBlockSize(transform.getBlockSize());
Chris@0 258 } else {
Chris@0 259 transform.setBlockSize(pluginBlockSize);
Chris@0 260 }
Chris@0 261
Chris@0 262 plugin = new PluginChannelAdapter(plugin);
Chris@0 263
Chris@0 264 if (!m_summaries.empty() ||
Chris@0 265 transform.getSummaryType() != Transform::NoSummary) {
Chris@0 266 PluginSummarisingAdapter *adapter =
Chris@0 267 new PluginSummarisingAdapter(plugin);
Chris@0 268 adapter->setSummarySegmentBoundaries(m_boundaries);
Chris@0 269 plugin = adapter;
Chris@0 270 }
Chris@0 271
Chris@0 272 if (!plugin->initialise(m_channels, m_blockSize, m_blockSize)) {
Chris@0 273 cerr << "ERROR: Plugin initialise (channels = " << m_channels << ", stepSize = " << m_blockSize << ", blockSize = " << m_blockSize << ") failed." << endl;
Chris@0 274 delete plugin;
Chris@0 275 return false;
Chris@0 276 }
Chris@0 277
Chris@0 278 // cerr << "Initialised plugin" << endl;
Chris@0 279
Chris@0 280 size_t actualStepSize = 0;
Chris@0 281 size_t actualBlockSize = 0;
Chris@0 282 pba->getActualStepAndBlockSizes(actualStepSize, actualBlockSize);
Chris@0 283 transform.setStepSize(actualStepSize);
Chris@0 284 transform.setBlockSize(actualBlockSize);
Chris@0 285
Chris@0 286 Plugin::OutputList outputs = plugin->getOutputDescriptors();
Chris@0 287 for (int i = 0; i < (int)outputs.size(); ++i) {
Chris@0 288
Chris@0 289 // cerr << "Newly initialised plugin output " << i << " has bin count " << outputs[i].binCount << endl;
Chris@0 290
Chris@0 291 m_pluginOutputs[plugin][outputs[i].identifier] = outputs[i];
Chris@0 292 m_pluginOutputIndices[outputs[i].identifier] = i;
Chris@0 293 }
Chris@0 294
Chris@10 295 cerr << "NOTE: Loaded and initialised plugin for transform \""
Chris@25 296 << transform.getIdentifier().toStdString()
Chris@25 297 << "\" with plugin step size " << actualStepSize
Chris@25 298 << " and block size " << actualBlockSize
Chris@25 299 << " (adapter step and block size " << m_blockSize << ")"
Chris@25 300 << endl;
Chris@25 301
Chris@25 302 if (pida) {
Chris@25 303 cerr << "NOTE: PluginInputDomainAdapter timestamp adjustment is "
Chris@25 304
Chris@25 305 << pida->getTimestampAdjustment() << endl;
Chris@25 306 }
Chris@8 307
Chris@8 308 } else {
Chris@8 309
Chris@8 310 if (transform.getStepSize() == 0 || transform.getBlockSize() == 0) {
Chris@8 311
Chris@8 312 PluginWrapper *pw = dynamic_cast<PluginWrapper *>(plugin);
Chris@8 313 if (pw) {
Chris@8 314 PluginBufferingAdapter *pba =
Chris@8 315 pw->getWrapper<PluginBufferingAdapter>();
Chris@8 316 if (pba) {
Chris@8 317 size_t actualStepSize = 0;
Chris@8 318 size_t actualBlockSize = 0;
Chris@8 319 pba->getActualStepAndBlockSizes(actualStepSize,
Chris@8 320 actualBlockSize);
Chris@8 321 if (transform.getStepSize() == 0) {
Chris@8 322 transform.setStepSize(actualStepSize);
Chris@8 323 }
Chris@8 324 if (transform.getBlockSize() == 0) {
Chris@8 325 transform.setBlockSize(actualBlockSize);
Chris@8 326 }
Chris@8 327 }
Chris@8 328 }
Chris@8 329 }
Chris@0 330 }
Chris@0 331
Chris@0 332 if (transform.getOutput() == "") {
Chris@0 333 transform.setOutput
Chris@0 334 (plugin->getOutputDescriptors()[0].identifier.c_str());
Chris@0 335 }
Chris@0 336
Chris@0 337 m_transformPluginMap[transform] = plugin;
Chris@0 338
Chris@0 339 if (!(originalTransform == transform)) {
Chris@0 340 m_transformPluginMap[originalTransform] = plugin;
Chris@0 341 }
Chris@0 342
Chris@0 343 } else {
Chris@0 344
Chris@0 345 plugin = m_transformPluginMap[transform];
Chris@0 346 }
Chris@0 347
Chris@0 348 m_plugins[plugin][transform] = writers;
Chris@0 349
Chris@0 350 return true;
Chris@0 351 }
Chris@0 352
Chris@0 353 bool FeatureExtractionManager::addDefaultFeatureExtractor
Chris@0 354 (TransformId transformId, const vector<FeatureWriter*> &writers)
Chris@0 355 {
Chris@0 356 TransformFactory *tf = TransformFactory::getInstance();
Chris@0 357
Chris@0 358 if (m_sampleRate == 0) {
Chris@0 359 if (m_defaultSampleRate == 0) {
Chris@0 360 cerr << "ERROR: Default transform requested, but no default sample rate available" << endl;
Chris@0 361 return false;
Chris@0 362 } else {
Chris@0 363 cerr << "NOTE: Using default sample rate of " << m_defaultSampleRate << " for default transform" << endl;
Chris@0 364 m_sampleRate = m_defaultSampleRate;
Chris@0 365 }
Chris@0 366 }
Chris@0 367
Chris@0 368 Transform transform = tf->getDefaultTransformFor(transformId, m_sampleRate);
Chris@0 369
Chris@0 370 return addFeatureExtractor(transform, writers);
Chris@0 371 }
Chris@0 372
Chris@0 373 bool FeatureExtractionManager::addFeatureExtractorFromFile
Chris@0 374 (QString transformXmlFile, const vector<FeatureWriter*> &writers)
Chris@0 375 {
Chris@0 376 RDFTransformFactory factory
Chris@0 377 (QUrl::fromLocalFile(QFileInfo(transformXmlFile).absoluteFilePath())
Chris@0 378 .toString());
Chris@0 379 ProgressPrinter printer("Parsing transforms RDF file");
Chris@0 380 std::vector<Transform> transforms = factory.getTransforms(&printer);
Chris@0 381 if (!factory.isOK()) {
Chris@0 382 cerr << "WARNING: FeatureExtractionManager::addFeatureExtractorFromFile: Failed to parse transforms file: " << factory.getErrorString().toStdString() << endl;
Chris@0 383 if (factory.isRDF()) {
Chris@0 384 return false; // no point trying it as XML
Chris@0 385 }
Chris@0 386 }
Chris@0 387 if (!transforms.empty()) {
Chris@0 388 bool success = true;
Chris@0 389 for (int i = 0; i < (int)transforms.size(); ++i) {
Chris@0 390 if (!addFeatureExtractor(transforms[i], writers)) {
Chris@0 391 success = false;
Chris@0 392 }
Chris@0 393 }
Chris@0 394 return success;
Chris@0 395 }
Chris@0 396
Chris@0 397 QFile file(transformXmlFile);
Chris@0 398 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@0 399 cerr << "ERROR: Failed to open transform XML file \""
Chris@0 400 << transformXmlFile.toStdString() << "\" for reading" << endl;
Chris@0 401 return false;
Chris@0 402 }
Chris@0 403
Chris@0 404 QTextStream *qts = new QTextStream(&file);
Chris@0 405 QString qs = qts->readAll();
Chris@0 406 delete qts;
Chris@0 407 file.close();
Chris@0 408
Chris@0 409 Transform transform(qs);
Chris@0 410
Chris@0 411 return addFeatureExtractor(transform, writers);
Chris@0 412 }
Chris@0 413
Chris@45 414 void FeatureExtractionManager::addSource(QString audioSource)
Chris@0 415 {
Chris@0 416 if (QFileInfo(audioSource).suffix().toLower() == "m3u") {
Chris@45 417 ProgressPrinter retrievalProgress("Opening playlist file...");
Chris@45 418 FileSource source(audioSource, &retrievalProgress);
Chris@45 419 if (!source.isAvailable()) {
Chris@45 420 cerr << "ERROR: File or URL \"" << audioSource.toStdString()
Chris@45 421 << "\" could not be located" << endl;
Chris@45 422 throw FileNotFound(audioSource);
Chris@45 423 }
Chris@45 424 source.waitForData();
Chris@0 425 PlaylistFileReader reader(source);
Chris@0 426 if (reader.isOK()) {
Chris@0 427 vector<QString> files = reader.load();
Chris@0 428 for (int i = 0; i < (int)files.size(); ++i) {
Chris@45 429 addSource(files[i]);
Chris@0 430 }
Chris@0 431 return;
Chris@0 432 } else {
Chris@0 433 cerr << "ERROR: Playlist \"" << audioSource.toStdString()
Chris@0 434 << "\" could not be opened" << endl;
Chris@21 435 throw FileNotFound(audioSource);
Chris@0 436 }
Chris@0 437 }
Chris@0 438
Chris@45 439 std::cerr << "Have audio source: \"" << audioSource.toStdString() << "\"" << std::endl;
Chris@45 440
Chris@45 441 // We don't actually do anything with it here, unless it's the
Chris@45 442 // first audio source and we need it to establish default channel
Chris@45 443 // count and sample rate
Chris@45 444
Chris@45 445 if (m_channels == 0 || m_defaultSampleRate == 0) {
Chris@45 446
Chris@45 447 ProgressPrinter retrievalProgress("Determining default rate and channel count from first input file...");
Chris@45 448
Chris@45 449 FileSource source(audioSource, &retrievalProgress);
Chris@45 450 if (!source.isAvailable()) {
Chris@45 451 cerr << "ERROR: File or URL \"" << audioSource.toStdString()
Chris@45 452 << "\" could not be located" << endl;
Chris@45 453 throw FileNotFound(audioSource);
Chris@45 454 }
Chris@45 455
Chris@45 456 source.waitForData();
Chris@45 457
Chris@45 458 // Open to determine validity, channel count, sample rate only
Chris@45 459 // (then close, and open again later with actual desired rate &c)
Chris@45 460
Chris@45 461 AudioFileReader *reader =
Chris@95 462 AudioFileReaderFactory::createReader(source, 0, false,
Chris@95 463 &retrievalProgress);
Chris@45 464
Chris@45 465 if (!reader) {
Chris@45 466 throw FailedToOpenFile(audioSource);
Chris@45 467 }
Chris@45 468
Chris@45 469 retrievalProgress.done();
Chris@45 470
Chris@45 471 cerr << "File or URL \"" << audioSource.toStdString() << "\" opened successfully" << endl;
Chris@45 472
Chris@45 473 if (m_channels == 0) {
Chris@45 474 m_channels = reader->getChannelCount();
Chris@45 475 cerr << "Taking default channel count of "
Chris@45 476 << reader->getChannelCount() << " from file" << endl;
Chris@45 477 }
Chris@45 478
Chris@45 479 if (m_defaultSampleRate == 0) {
Chris@45 480 m_defaultSampleRate = reader->getNativeRate();
Chris@45 481 cerr << "Taking default sample rate of "
Chris@45 482 << reader->getNativeRate() << "Hz from file" << endl;
Chris@45 483 cerr << "(Note: Default may be overridden by transforms)" << endl;
Chris@45 484 }
Chris@45 485
Chris@45 486 m_readyReaders[audioSource] = reader;
Chris@45 487 }
Chris@45 488 }
Chris@45 489
Chris@47 490 void FeatureExtractionManager::extractFeatures(QString audioSource, bool force)
Chris@45 491 {
Chris@45 492 if (m_plugins.empty()) return;
Chris@45 493
Chris@47 494 if (QFileInfo(audioSource).suffix().toLower() == "m3u") {
Chris@47 495 FileSource source(audioSource);
Chris@47 496 PlaylistFileReader reader(source);
Chris@47 497 if (reader.isOK()) {
Chris@47 498 vector<QString> files = reader.load();
Chris@47 499 for (int i = 0; i < (int)files.size(); ++i) {
Chris@47 500 try {
Chris@47 501 extractFeatures(files[i], force);
Chris@47 502 } catch (const std::exception &e) {
Chris@47 503 if (!force) throw;
Chris@47 504 cerr << "ERROR: Feature extraction failed for playlist entry \""
Chris@47 505 << files[i].toStdString()
Chris@47 506 << "\": " << e.what() << endl;
Chris@47 507 // print a note only if we have more files to process
Chris@95 508 if (++i != (int)files.size()) {
Chris@47 509 cerr << "NOTE: \"--force\" option was provided, continuing (more errors may occur)" << endl;
Chris@47 510 }
Chris@47 511 }
Chris@47 512 }
Chris@47 513 return;
Chris@47 514 } else {
Chris@47 515 cerr << "ERROR: Playlist \"" << audioSource.toStdString()
Chris@47 516 << "\" could not be opened" << endl;
Chris@47 517 throw FileNotFound(audioSource);
Chris@47 518 }
Chris@47 519 }
Chris@47 520
Chris@45 521 testOutputFiles(audioSource);
Chris@45 522
Chris@0 523 if (m_sampleRate == 0) {
Chris@45 524 throw FileOperationFailed
Chris@45 525 (audioSource, "internal error: have sources and plugins, but no sample rate");
Chris@45 526 }
Chris@45 527 if (m_channels == 0) {
Chris@45 528 throw FileOperationFailed
Chris@45 529 (audioSource, "internal error: have sources and plugins, but no channel count");
Chris@0 530 }
Chris@0 531
Chris@45 532 AudioFileReader *reader = 0;
Chris@45 533
Chris@45 534 if (m_readyReaders.contains(audioSource)) {
Chris@45 535 reader = m_readyReaders[audioSource];
Chris@45 536 m_readyReaders.remove(audioSource);
Chris@45 537 if (reader->getChannelCount() != m_channels ||
Chris@45 538 reader->getSampleRate() != m_sampleRate) {
Chris@45 539 // can't use this; open it again
Chris@45 540 delete reader;
Chris@45 541 reader = 0;
Chris@45 542 }
Chris@45 543 }
Chris@45 544 if (!reader) {
Chris@45 545 ProgressPrinter retrievalProgress("Retrieving audio data...");
Chris@45 546 FileSource source(audioSource, &retrievalProgress);
Chris@45 547 source.waitForData();
Chris@45 548 reader = AudioFileReaderFactory::createReader
Chris@95 549 (source, m_sampleRate, false, &retrievalProgress);
Chris@45 550 retrievalProgress.done();
Chris@45 551 }
Chris@45 552
Chris@0 553 if (!reader) {
Chris@21 554 throw FailedToOpenFile(audioSource);
Chris@0 555 }
Chris@0 556
Chris@45 557 cerr << "Audio file \"" << audioSource.toStdString() << "\": "
Chris@45 558 << reader->getChannelCount() << "ch at "
Chris@45 559 << reader->getNativeRate() << "Hz" << endl;
Chris@45 560 if (reader->getChannelCount() != m_channels ||
Chris@45 561 reader->getNativeRate() != m_sampleRate) {
Chris@45 562 cerr << "NOTE: File will be mixed or resampled for processing: "
Chris@45 563 << m_channels << "ch at "
Chris@45 564 << m_sampleRate << "Hz" << endl;
Chris@45 565 }
Chris@11 566
Chris@0 567 // allocate audio buffers
Chris@0 568 float **data = new float *[m_channels];
Chris@0 569 for (int c = 0; c < m_channels; ++c) {
Chris@0 570 data[c] = new float[m_blockSize];
Chris@0 571 }
Chris@31 572
Chris@31 573 struct LifespanMgr { // unintrusive hack introduced to ensure
Chris@31 574 // destruction on exceptions
Chris@31 575 AudioFileReader *m_r;
Chris@31 576 int m_c;
Chris@31 577 float **m_d;
Chris@31 578 LifespanMgr(AudioFileReader *r, int c, float **d) :
Chris@31 579 m_r(r), m_c(c), m_d(d) { }
Chris@31 580 ~LifespanMgr() { destroy(); }
Chris@31 581 void destroy() {
Chris@31 582 if (!m_r) return;
Chris@31 583 delete m_r;
Chris@31 584 for (int i = 0; i < m_c; ++i) delete[] m_d[i];
Chris@31 585 delete[] m_d;
Chris@31 586 m_r = 0;
Chris@31 587 }
Chris@31 588 };
Chris@31 589 LifespanMgr lifemgr(reader, m_channels, data);
Chris@0 590
Chris@0 591 size_t frameCount = reader->getFrameCount();
Chris@0 592
Chris@0 593 // cerr << "file has " << frameCount << " frames" << endl;
Chris@0 594
Chris@0 595 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 596 pi != m_plugins.end(); ++pi) {
Chris@0 597
Chris@0 598 Plugin *plugin = pi->first;
Chris@0 599
Chris@0 600 // std::cerr << "Calling reset on " << plugin << std::endl;
Chris@0 601 plugin->reset();
Chris@0 602
Chris@0 603 for (TransformWriterMap::iterator ti = pi->second.begin();
Chris@0 604 ti != pi->second.end(); ++ti) {
Chris@0 605
Chris@0 606 const Transform &transform = ti->first;
Chris@0 607
Chris@0 608 //!!! we may want to set the start and duration times for extraction
Chris@0 609 // in the transform record (defaults of zero indicate extraction
Chris@0 610 // from the whole file)
Chris@0 611 // transform.setStartTime(RealTime::zeroTime);
Chris@0 612 // transform.setDuration
Chris@0 613 // (RealTime::frame2RealTime(reader->getFrameCount(), m_sampleRate));
Chris@0 614
Chris@0 615 string outputId = transform.getOutput().toStdString();
Chris@0 616 if (m_pluginOutputs[plugin].find(outputId) ==
Chris@0 617 m_pluginOutputs[plugin].end()) {
Chris@0 618 //!!! throw?
Chris@0 619 cerr << "WARNING: Nonexistent plugin output \"" << outputId << "\" requested for transform \""
Chris@0 620 << transform.getIdentifier().toStdString() << "\", ignoring this transform"
Chris@0 621 << endl;
Chris@0 622 /*
Chris@0 623 cerr << "Known outputs for all plugins are as follows:" << endl;
Chris@0 624 for (PluginOutputMap::const_iterator k = m_pluginOutputs.begin();
Chris@0 625 k != m_pluginOutputs.end(); ++k) {
Chris@0 626 cerr << "Plugin " << k->first << ": ";
Chris@0 627 if (k->second.empty()) {
Chris@0 628 cerr << "(none)";
Chris@0 629 }
Chris@0 630 for (OutputMap::const_iterator i = k->second.begin();
Chris@0 631 i != k->second.end(); ++i) {
Chris@0 632 cerr << "\"" << i->first << "\" ";
Chris@0 633 }
Chris@0 634 cerr << endl;
Chris@0 635 }
Chris@0 636 */
Chris@0 637 }
Chris@0 638 }
Chris@0 639 }
Chris@0 640
Chris@0 641 long startFrame = 0;
Chris@0 642 long endFrame = frameCount;
Chris@0 643
Chris@0 644 /*!!! No -- there is no single transform to pull this stuff from --
Chris@0 645 * the transforms may have various start and end times, need to be far
Chris@0 646 * cleverer about this if we're going to support them
Chris@0 647
Chris@0 648 RealTime trStartRT = transform.getStartTime();
Chris@0 649 RealTime trDurationRT = transform.getDuration();
Chris@0 650
Chris@0 651 long trStart = RealTime::realTime2Frame(trStartRT, m_sampleRate);
Chris@0 652 long trDuration = RealTime::realTime2Frame(trDurationRT, m_sampleRate);
Chris@0 653
Chris@0 654 if (trStart == 0 || trStart < startFrame) {
Chris@0 655 trStart = startFrame;
Chris@0 656 }
Chris@0 657
Chris@0 658 if (trDuration == 0) {
Chris@0 659 trDuration = endFrame - trStart;
Chris@0 660 }
Chris@0 661 if (trStart + trDuration > endFrame) {
Chris@0 662 trDuration = endFrame - trStart;
Chris@0 663 }
Chris@0 664
Chris@0 665 startFrame = trStart;
Chris@0 666 endFrame = trStart + trDuration;
Chris@0 667 */
Chris@0 668
Chris@0 669 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 670 pi != m_plugins.end(); ++pi) {
Chris@0 671
Chris@0 672 for (TransformWriterMap::const_iterator ti = pi->second.begin();
Chris@0 673 ti != pi->second.end(); ++ti) {
Chris@0 674
Chris@0 675 const vector<FeatureWriter *> &writers = ti->second;
Chris@0 676
Chris@0 677 for (int j = 0; j < (int)writers.size(); ++j) {
Chris@0 678 FeatureWriter::TrackMetadata m;
Chris@0 679 m.title = reader->getTitle();
Chris@0 680 m.maker = reader->getMaker();
Chris@19 681 if (m.title != "" && m.maker != "") {
Chris@19 682 writers[j]->setTrackMetadata(audioSource, m);
Chris@19 683 }
Chris@0 684 }
Chris@0 685 }
Chris@0 686 }
Chris@0 687
Chris@0 688 ProgressPrinter extractionProgress("Extracting and writing features...");
Chris@0 689 int progress = 0;
Chris@0 690
Chris@0 691 for (long i = startFrame; i < endFrame; i += m_blockSize) {
Chris@0 692
Chris@0 693 //!!! inefficient, although much of the inefficiency may be
Chris@0 694 // susceptible to optimisation
Chris@0 695
Chris@0 696 SampleBlock frames;
Chris@0 697 reader->getInterleavedFrames(i, m_blockSize, frames);
Chris@0 698
Chris@0 699 // We have to do our own channel handling here; we can't just
Chris@0 700 // leave it to the plugin adapter because the same plugin
Chris@0 701 // adapter may have to serve for input files with various
Chris@0 702 // numbers of channels (so the adapter is simply configured
Chris@34 703 // with a fixed channel count).
Chris@0 704
Chris@0 705 int rc = reader->getChannelCount();
Chris@0 706
Chris@34 707 // m_channels is the number of channels we need for the plugin
Chris@34 708
Chris@34 709 int index;
Chris@34 710 int fc = (int)frames.size();
Chris@46 711
Chris@34 712 if (m_channels == 1) { // only case in which we can sensibly mix down
Chris@34 713 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 714 data[0][j] = 0.f;
Chris@34 715 }
Chris@34 716 for (int c = 0; c < rc; ++c) {
Chris@34 717 for (int j = 0; j < m_blockSize; ++j) {
Chris@0 718 index = j * rc + c;
Chris@34 719 if (index < fc) data[0][j] += frames[index];
Chris@0 720 }
Chris@0 721 }
Chris@34 722 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 723 data[0][j] /= rc;
Chris@34 724 }
Chris@34 725 } else {
Chris@34 726 for (int c = 0; c < m_channels; ++c) {
Chris@34 727 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 728 data[c][j] = 0.f;
Chris@34 729 }
Chris@34 730 if (c < rc) {
Chris@34 731 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 732 index = j * rc + c;
Chris@34 733 if (index < fc) data[c][j] += frames[index];
Chris@34 734 }
Chris@34 735 }
Chris@34 736 }
Chris@34 737 }
Chris@0 738
Chris@0 739 Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime
Chris@0 740 (i, m_sampleRate);
Chris@0 741
Chris@0 742 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 743 pi != m_plugins.end(); ++pi) {
Chris@0 744
Chris@0 745 Plugin *plugin = pi->first;
Chris@0 746 Plugin::FeatureSet featureSet = plugin->process(data, timestamp);
Chris@0 747
Chris@0 748 if (!m_summariesOnly) {
Chris@0 749 writeFeatures(audioSource, plugin, featureSet);
Chris@0 750 }
Chris@0 751 }
Chris@0 752
Chris@0 753 int pp = progress;
Chris@6 754 progress = int(((i - startFrame) * 100.0) / (endFrame - startFrame) + 0.1);
Chris@0 755 if (progress > pp) extractionProgress.setProgress(progress);
Chris@0 756 }
Chris@10 757
Chris@22 758 // std::cerr << "FeatureExtractionManager: deleting audio file reader" << std::endl;
Chris@12 759
Chris@31 760 lifemgr.destroy(); // deletes reader, data
Chris@57 761
Chris@57 762 // In order to ensure our results are written to the output in a
Chris@57 763 // fixed order (and not one that depends on the pointer value of
Chris@57 764 // each plugin on the heap in any given run of the program) we
Chris@57 765 // take the plugins' entries from the plugin map and sort them
Chris@57 766 // into a new, temporary map that is indexed by the first
Chris@57 767 // transform for each plugin. We then iterate over than instead of
Chris@57 768 // over m_plugins in order to get the right ordering.
Chris@57 769
Chris@57 770 // This is not the most elegant way to do this -- it would be more
Chris@57 771 // elegant to impose an ordering directly on the plugins that are
Chris@57 772 // used as keys to m_plugins. But the plugin type comes from the
Chris@57 773 // Vamp SDK, so this change is more localised.
Chris@57 774
Chris@57 775 // Thanks to Matthias for this.
Chris@57 776
Chris@58 777 typedef map<Transform, PluginMap::value_type> OrderedPluginMap;
Chris@58 778 OrderedPluginMap orderedPlugins;
Chris@57 779
Chris@0 780 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 781 pi != m_plugins.end(); ++pi) {
Chris@57 782 Transform firstForPlugin = (pi->second).begin()->first;
Chris@58 783 orderedPlugins.insert(OrderedPluginMap::value_type(firstForPlugin, *pi));
Chris@57 784 }
Chris@0 785
Chris@58 786 for (OrderedPluginMap::iterator superPi = orderedPlugins.begin();
Chris@57 787 superPi != orderedPlugins.end(); ++superPi) {
Chris@57 788
Chris@57 789 // The value we extract from this map is just the same as the
Chris@57 790 // value_type we get from iterating over our PluginMap
Chris@57 791 // directly -- but we happen to get them in the right order
Chris@57 792 // now because the map iterator is ordered by the Transform
Chris@57 793 // key type ordering
Chris@58 794 PluginMap::value_type pi = superPi->second;
Chris@57 795
Chris@57 796 Plugin *plugin = pi.first;
Chris@0 797 Plugin::FeatureSet featureSet = plugin->getRemainingFeatures();
Chris@0 798
Chris@0 799 if (!m_summariesOnly) {
Chris@0 800 writeFeatures(audioSource, plugin, featureSet);
Chris@0 801 }
Chris@0 802
Chris@0 803 if (!m_summaries.empty()) {
Chris@0 804 PluginSummarisingAdapter *adapter =
Chris@0 805 dynamic_cast<PluginSummarisingAdapter *>(plugin);
Chris@0 806 if (!adapter) {
Chris@0 807 cerr << "WARNING: Summaries requested, but plugin is not a summarising adapter" << endl;
Chris@0 808 } else {
Chris@0 809 for (SummaryNameSet::const_iterator sni = m_summaries.begin();
Chris@0 810 sni != m_summaries.end(); ++sni) {
Chris@0 811 featureSet.clear();
Chris@0 812 //!!! problem here -- we are requesting summaries
Chris@0 813 //!!! for all outputs, but they in principle have
Chris@0 814 //!!! different averaging requirements depending
Chris@0 815 //!!! on whether their features have duration or
Chris@0 816 //!!! not
Chris@0 817 featureSet = adapter->getSummaryForAllOutputs
Chris@0 818 (getSummaryType(*sni),
Chris@0 819 PluginSummarisingAdapter::ContinuousTimeAverage);
Chris@0 820 writeFeatures(audioSource, plugin, featureSet,//!!! *sni);
Chris@0 821 Transform::stringToSummaryType(sni->c_str()));
Chris@0 822 }
Chris@0 823 }
Chris@0 824 }
Chris@0 825
Chris@0 826 writeSummaries(audioSource, plugin);
Chris@0 827 }
Chris@0 828
Chris@3 829 extractionProgress.done();
Chris@3 830
Chris@0 831 finish();
Chris@0 832
Chris@0 833 TempDirectory::getInstance()->cleanup();
Chris@0 834 }
Chris@0 835
Chris@0 836 void
Chris@0 837 FeatureExtractionManager::writeSummaries(QString audioSource, Plugin *plugin)
Chris@0 838 {
Chris@0 839 // caller should have ensured plugin is in m_plugins
Chris@0 840 PluginMap::iterator pi = m_plugins.find(plugin);
Chris@0 841
Chris@0 842 for (TransformWriterMap::const_iterator ti = pi->second.begin();
Chris@0 843 ti != pi->second.end(); ++ti) {
Chris@0 844
Chris@0 845 const Transform &transform = ti->first;
Chris@0 846 const vector<FeatureWriter *> &writers = ti->second;
Chris@0 847
Chris@0 848 Transform::SummaryType summaryType = transform.getSummaryType();
Chris@0 849 PluginSummarisingAdapter::SummaryType pType =
Chris@0 850 (PluginSummarisingAdapter::SummaryType)summaryType;
Chris@0 851
Chris@0 852 if (transform.getSummaryType() == Transform::NoSummary) {
Chris@0 853 continue;
Chris@0 854 }
Chris@0 855
Chris@0 856 PluginSummarisingAdapter *adapter =
Chris@0 857 dynamic_cast<PluginSummarisingAdapter *>(plugin);
Chris@0 858 if (!adapter) {
Chris@0 859 cerr << "FeatureExtractionManager::writeSummaries: INTERNAL ERROR: Summary requested for transform, but plugin is not a summarising adapter" << endl;
Chris@0 860 continue;
Chris@0 861 }
Chris@0 862
Chris@0 863 Plugin::FeatureSet featureSet = adapter->getSummaryForAllOutputs
Chris@0 864 (pType, PluginSummarisingAdapter::ContinuousTimeAverage);
Chris@0 865
Chris@0 866 // cout << "summary type " << int(pType) << " for transform:" << endl << transform.toXmlString().toStdString()<< endl << "... feature set with " << featureSet.size() << " elts" << endl;
Chris@0 867
Chris@0 868 writeFeatures(audioSource, plugin, featureSet, summaryType);
Chris@0 869 }
Chris@0 870 }
Chris@0 871
Chris@0 872 void FeatureExtractionManager::writeFeatures(QString audioSource,
Chris@0 873 Plugin *plugin,
Chris@0 874 const Plugin::FeatureSet &features,
Chris@0 875 Transform::SummaryType summaryType)
Chris@0 876 {
Chris@0 877 // caller should have ensured plugin is in m_plugins
Chris@0 878 PluginMap::iterator pi = m_plugins.find(plugin);
Chris@0 879
Chris@0 880 for (TransformWriterMap::const_iterator ti = pi->second.begin();
Chris@0 881 ti != pi->second.end(); ++ti) {
Chris@0 882
Chris@0 883 const Transform &transform = ti->first;
Chris@0 884 const vector<FeatureWriter *> &writers = ti->second;
Chris@0 885
Chris@0 886 if (transform.getSummaryType() != Transform::NoSummary &&
Chris@0 887 m_summaries.empty() &&
Chris@0 888 summaryType == Transform::NoSummary) {
Chris@0 889 continue;
Chris@0 890 }
Chris@0 891
Chris@0 892 if (transform.getSummaryType() != Transform::NoSummary &&
Chris@0 893 summaryType != Transform::NoSummary &&
Chris@0 894 transform.getSummaryType() != summaryType) {
Chris@0 895 continue;
Chris@0 896 }
Chris@0 897
Chris@0 898 string outputId = transform.getOutput().toStdString();
Chris@0 899
Chris@0 900 if (m_pluginOutputs[plugin].find(outputId) ==
Chris@0 901 m_pluginOutputs[plugin].end()) {
Chris@0 902 continue;
Chris@0 903 }
Chris@0 904
Chris@0 905 const Plugin::OutputDescriptor &desc =
Chris@0 906 m_pluginOutputs[plugin][outputId];
Chris@0 907
Chris@0 908 int outputIndex = m_pluginOutputIndices[outputId];
Chris@0 909 Plugin::FeatureSet::const_iterator fsi = features.find(outputIndex);
Chris@0 910 if (fsi == features.end()) continue;
Chris@0 911
Chris@0 912 for (int j = 0; j < (int)writers.size(); ++j) {
Chris@0 913 writers[j]->write
Chris@0 914 (audioSource, transform, desc, fsi->second,
Chris@0 915 Transform::summaryTypeToString(summaryType).toStdString());
Chris@0 916 }
Chris@0 917 }
Chris@0 918 }
Chris@0 919
Chris@31 920 void FeatureExtractionManager::testOutputFiles(QString audioSource)
Chris@31 921 {
Chris@31 922 for (PluginMap::iterator pi = m_plugins.begin();
Chris@31 923 pi != m_plugins.end(); ++pi) {
Chris@31 924
Chris@31 925 for (TransformWriterMap::iterator ti = pi->second.begin();
Chris@31 926 ti != pi->second.end(); ++ti) {
Chris@31 927
Chris@31 928 vector<FeatureWriter *> &writers = ti->second;
Chris@31 929
Chris@31 930 for (int i = 0; i < (int)writers.size(); ++i) {
Chris@31 931 writers[i]->testOutputFile(audioSource, ti->first.getIdentifier());
Chris@31 932 }
Chris@31 933 }
Chris@31 934 }
Chris@31 935 }
Chris@31 936
Chris@0 937 void FeatureExtractionManager::finish()
Chris@0 938 {
Chris@0 939 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 940 pi != m_plugins.end(); ++pi) {
Chris@0 941
Chris@0 942 for (TransformWriterMap::iterator ti = pi->second.begin();
Chris@0 943 ti != pi->second.end(); ++ti) {
Chris@0 944
Chris@0 945 vector<FeatureWriter *> &writers = ti->second;
Chris@0 946
Chris@0 947 for (int i = 0; i < (int)writers.size(); ++i) {
Chris@0 948 writers[i]->flush();
Chris@0 949 writers[i]->finish();
Chris@0 950 }
Chris@0 951 }
Chris@0 952 }
Chris@0 953 }
Chris@0 954
Chris@0 955 void FeatureExtractionManager::print(Transform transform) const
Chris@0 956 {
Chris@0 957 QString qs;
Chris@0 958 QTextStream qts(&qs);
Chris@0 959 transform.toXml(qts);
Chris@0 960 cerr << qs.toStdString() << endl;
Chris@0 961 }