annotate runner/FeatureExtractionManager.cpp @ 60:400e09d69b8f

Add file: URI for file -- this is stripped out by the test script afterwards, I'm not sure how it got stripped from the source as well but obviously it's not parseable without it
author Chris Cannam
date Thu, 24 May 2012 11:35:54 +0100
parents 350f61d5d9be
children 03b1d83fca29
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Annotator
Chris@0 5 A utility for batch feature extraction from audio files.
Chris@0 6 Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
Chris@0 7 Copyright 2007-2008 QMUL.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@0 16 #include "FeatureExtractionManager.h"
Chris@0 17
Chris@0 18 #include <vamp-hostsdk/PluginChannelAdapter.h>
Chris@0 19 #include <vamp-hostsdk/PluginBufferingAdapter.h>
Chris@0 20 #include <vamp-hostsdk/PluginInputDomainAdapter.h>
Chris@0 21 #include <vamp-hostsdk/PluginSummarisingAdapter.h>
Chris@8 22 #include <vamp-hostsdk/PluginWrapper.h>
Chris@0 23 #include <vamp-hostsdk/PluginLoader.h>
Chris@0 24
Chris@21 25 #include "base/Exceptions.h"
Chris@21 26
Chris@0 27 #include <iostream>
Chris@0 28
Chris@0 29 using namespace std;
Chris@0 30
Chris@0 31 using Vamp::Plugin;
Chris@0 32 using Vamp::PluginBase;
Chris@0 33 using Vamp::HostExt::PluginLoader;
Chris@0 34 using Vamp::HostExt::PluginChannelAdapter;
Chris@0 35 using Vamp::HostExt::PluginBufferingAdapter;
Chris@0 36 using Vamp::HostExt::PluginInputDomainAdapter;
Chris@0 37 using Vamp::HostExt::PluginSummarisingAdapter;
Chris@8 38 using Vamp::HostExt::PluginWrapper;
Chris@0 39
Chris@0 40 #include "data/fileio/FileSource.h"
Chris@0 41 #include "data/fileio/AudioFileReader.h"
Chris@0 42 #include "data/fileio/AudioFileReaderFactory.h"
Chris@0 43 #include "data/fileio/PlaylistFileReader.h"
Chris@0 44 #include "base/TempDirectory.h"
Chris@0 45 #include "base/ProgressPrinter.h"
Chris@0 46 #include "transform/TransformFactory.h"
Chris@0 47 #include "rdf/RDFTransformFactory.h"
Chris@0 48 #include "transform/FeatureWriter.h"
Chris@0 49
Chris@0 50 #include <QTextStream>
Chris@0 51 #include <QFile>
Chris@0 52 #include <QFileInfo>
Chris@0 53
Chris@0 54 FeatureExtractionManager::FeatureExtractionManager() :
Chris@0 55 m_summariesOnly(false),
Chris@0 56 // We can read using an arbitrary fixed block size --
Chris@0 57 // PluginBufferingAdapter handles this for us. It's likely to be
Chris@0 58 // quicker to use larger sizes than smallish ones like 1024
Chris@0 59 m_blockSize(16384),
Chris@0 60 m_defaultSampleRate(0),
Chris@0 61 m_sampleRate(0),
Chris@45 62 m_channels(0)
Chris@0 63 {
Chris@0 64 }
Chris@0 65
Chris@0 66 FeatureExtractionManager::~FeatureExtractionManager()
Chris@0 67 {
Chris@0 68 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 69 pi != m_plugins.end(); ++pi) {
Chris@0 70 delete pi->first;
Chris@0 71 }
Chris@45 72 foreach (AudioFileReader *r, m_readyReaders) {
Chris@45 73 delete r;
Chris@45 74 }
Chris@0 75 }
Chris@0 76
Chris@0 77 void FeatureExtractionManager::setChannels(int channels)
Chris@0 78 {
Chris@0 79 m_channels = channels;
Chris@0 80 }
Chris@0 81
Chris@0 82 void FeatureExtractionManager::setDefaultSampleRate(int sampleRate)
Chris@0 83 {
Chris@0 84 m_defaultSampleRate = sampleRate;
Chris@0 85 }
Chris@0 86
Chris@0 87 static PluginSummarisingAdapter::SummaryType
Chris@0 88 getSummaryType(string name)
Chris@0 89 {
Chris@0 90 if (name == "min") return PluginSummarisingAdapter::Minimum;
Chris@0 91 if (name == "max") return PluginSummarisingAdapter::Maximum;
Chris@0 92 if (name == "mean") return PluginSummarisingAdapter::Mean;
Chris@0 93 if (name == "median") return PluginSummarisingAdapter::Median;
Chris@0 94 if (name == "mode") return PluginSummarisingAdapter::Mode;
Chris@0 95 if (name == "sum") return PluginSummarisingAdapter::Sum;
Chris@0 96 if (name == "variance") return PluginSummarisingAdapter::Variance;
Chris@0 97 if (name == "sd") return PluginSummarisingAdapter::StandardDeviation;
Chris@0 98 if (name == "count") return PluginSummarisingAdapter::Count;
Chris@0 99 return PluginSummarisingAdapter::UnknownSummaryType;
Chris@0 100 }
Chris@0 101
Chris@0 102 bool FeatureExtractionManager::setSummaryTypes(const set<string> &names,
Chris@0 103 bool summariesOnly,
Chris@0 104 const PluginSummarisingAdapter::SegmentBoundaries &boundaries)
Chris@0 105 {
Chris@0 106 for (SummaryNameSet::const_iterator i = names.begin();
Chris@0 107 i != names.end(); ++i) {
Chris@0 108 if (getSummaryType(*i) == PluginSummarisingAdapter::UnknownSummaryType) {
Chris@0 109 cerr << "ERROR: Unknown summary type \"" << *i << "\"" << endl;
Chris@0 110 return false;
Chris@0 111 }
Chris@0 112 }
Chris@0 113 m_summaries = names;
Chris@0 114 m_summariesOnly = summariesOnly;
Chris@0 115 m_boundaries = boundaries;
Chris@0 116 return true;
Chris@0 117 }
Chris@0 118
Chris@51 119 static PluginInputDomainAdapter::WindowType
Chris@51 120 convertWindowType(WindowType t)
Chris@51 121 {
Chris@51 122 switch (t) {
Chris@51 123 case RectangularWindow:
Chris@51 124 return PluginInputDomainAdapter::RectangularWindow;
Chris@51 125 case BartlettWindow:
Chris@51 126 return PluginInputDomainAdapter::BartlettWindow;
Chris@51 127 case HammingWindow:
Chris@51 128 return PluginInputDomainAdapter::HammingWindow;
Chris@51 129 case HanningWindow:
Chris@51 130 return PluginInputDomainAdapter::HanningWindow;
Chris@51 131 case BlackmanWindow:
Chris@51 132 return PluginInputDomainAdapter::BlackmanWindow;
Chris@51 133 case NuttallWindow:
Chris@51 134 return PluginInputDomainAdapter::NuttallWindow;
Chris@51 135 case BlackmanHarrisWindow:
Chris@51 136 return PluginInputDomainAdapter::BlackmanHarrisWindow;
Chris@51 137 default:
Chris@51 138 cerr << "ERROR: Unknown or unsupported window type \"" << t << "\", using Hann (\"" << HanningWindow << "\")" << endl;
Chris@51 139 return PluginInputDomainAdapter::HanningWindow;
Chris@51 140 }
Chris@51 141 }
Chris@51 142
Chris@0 143 bool FeatureExtractionManager::addFeatureExtractor
Chris@0 144 (Transform transform, const vector<FeatureWriter*> &writers)
Chris@0 145 {
Chris@0 146 //!!! exceptions rather than return values?
Chris@0 147
Chris@0 148 if (transform.getSampleRate() == 0) {
Chris@0 149 if (m_sampleRate == 0) {
Chris@0 150 cerr << "NOTE: Transform does not specify a sample rate, using default rate of " << m_defaultSampleRate << endl;
Chris@0 151 transform.setSampleRate(m_defaultSampleRate);
Chris@0 152 m_sampleRate = m_defaultSampleRate;
Chris@0 153 } else {
Chris@0 154 cerr << "NOTE: Transform does not specify a sample rate, using previous transform's rate of " << m_sampleRate << endl;
Chris@0 155 transform.setSampleRate(m_sampleRate);
Chris@0 156 }
Chris@0 157 }
Chris@0 158
Chris@0 159 if (m_sampleRate == 0) {
Chris@0 160 m_sampleRate = transform.getSampleRate();
Chris@0 161 }
Chris@0 162
Chris@0 163 if (transform.getSampleRate() != m_sampleRate) {
Chris@0 164 cerr << "WARNING: Transform sample rate " << transform.getSampleRate() << " does not match previously specified transform rate of " << m_sampleRate << " -- only a single rate is supported for each run" << endl;
Chris@0 165 cerr << "WARNING: Using previous rate of " << m_sampleRate << " for this transform as well" << endl;
Chris@0 166 transform.setSampleRate(m_sampleRate);
Chris@0 167 }
Chris@0 168
Chris@0 169 Plugin *plugin = 0;
Chris@0 170
Chris@0 171 // Remember what the original transform looked like, and index
Chris@0 172 // based on this -- because we may be about to fill in the zeros
Chris@0 173 // for step and block size, but we want any further copies with
Chris@0 174 // the same zeros to match this one
Chris@0 175 Transform originalTransform = transform;
Chris@0 176
Chris@0 177 if (m_transformPluginMap.find(transform) == m_transformPluginMap.end()) {
Chris@0 178
Chris@0 179 // Test whether we already have a transform that is identical
Chris@0 180 // to this, except for the output requested and/or the summary
Chris@0 181 // type -- if so, they should share plugin instances (a vital
Chris@0 182 // optimisation)
Chris@0 183
Chris@0 184 for (TransformPluginMap::iterator i = m_transformPluginMap.begin();
Chris@0 185 i != m_transformPluginMap.end(); ++i) {
Chris@0 186 Transform test = i->first;
Chris@0 187 test.setOutput(transform.getOutput());
Chris@0 188 test.setSummaryType(transform.getSummaryType());
Chris@0 189 if (transform == test) {
Chris@0 190 cerr << "NOTE: Already have transform identical to this one (for \""
Chris@0 191 << transform.getIdentifier().toStdString()
Chris@0 192 << "\") in every detail except output identifier and/or "
Chris@0 193 << "summary type; sharing its plugin instance" << endl;
Chris@0 194 plugin = i->second;
Chris@0 195 if (transform.getSummaryType() != Transform::NoSummary &&
Chris@0 196 !dynamic_cast<PluginSummarisingAdapter *>(plugin)) {
Chris@0 197 plugin = new PluginSummarisingAdapter(plugin);
Chris@0 198 i->second = plugin;
Chris@0 199 }
Chris@0 200 break;
Chris@0 201 }
Chris@0 202 }
Chris@0 203
Chris@0 204 if (!plugin) {
Chris@0 205
Chris@0 206 TransformFactory *tf = TransformFactory::getInstance();
Chris@0 207
Chris@0 208 PluginBase *pb = tf->instantiatePluginFor(transform);
Chris@0 209 plugin = tf->downcastVampPlugin(pb);
Chris@0 210 if (!plugin) {
Chris@0 211 //!!! todo: handle non-Vamp plugins too, or make the main --list
Chris@0 212 // option print out only Vamp transforms
Chris@0 213 cerr << "ERROR: Failed to load plugin for transform \""
Chris@0 214 << transform.getIdentifier().toStdString() << "\"" << endl;
Chris@0 215 delete pb;
Chris@0 216 return false;
Chris@0 217 }
Chris@0 218
Chris@0 219 // We will provide the plugin with arbitrary step and
Chris@0 220 // block sizes (so that we can use the same read/write
Chris@0 221 // block size for all transforms), and to that end we use
Chris@0 222 // a PluginBufferingAdapter. However, we need to know the
Chris@0 223 // underlying step size so that we can provide the right
Chris@0 224 // context for dense outputs. (Although, don't forget
Chris@0 225 // that the PluginBufferingAdapter rewrites
Chris@0 226 // OneSamplePerStep outputs so as to use FixedSampleRate
Chris@0 227 // -- so it supplies the sample rate in the output
Chris@0 228 // feature. I'm not sure whether we can easily use that.)
Chris@0 229
Chris@0 230 size_t pluginStepSize = plugin->getPreferredStepSize();
Chris@0 231 size_t pluginBlockSize = plugin->getPreferredBlockSize();
Chris@0 232
Chris@25 233 PluginInputDomainAdapter *pida = 0;
Chris@25 234
Chris@0 235 // adapt the plugin for buffering, channels, etc.
Chris@0 236 if (plugin->getInputDomain() == Plugin::FrequencyDomain) {
Chris@51 237
Chris@25 238 pida = new PluginInputDomainAdapter(plugin);
Chris@26 239 pida->setProcessTimestampMethod(PluginInputDomainAdapter::ShiftData);
Chris@51 240
Chris@51 241 PluginInputDomainAdapter::WindowType wtype =
Chris@51 242 convertWindowType(transform.getWindowType());
Chris@51 243 pida->setWindowType(wtype);
Chris@25 244 plugin = pida;
Chris@0 245 }
Chris@0 246
Chris@0 247 PluginBufferingAdapter *pba = new PluginBufferingAdapter(plugin);
Chris@0 248 plugin = pba;
Chris@0 249
Chris@0 250 if (transform.getStepSize() != 0) {
Chris@0 251 pba->setPluginStepSize(transform.getStepSize());
Chris@0 252 } else {
Chris@0 253 transform.setStepSize(pluginStepSize);
Chris@0 254 }
Chris@0 255
Chris@0 256 if (transform.getBlockSize() != 0) {
Chris@0 257 pba->setPluginBlockSize(transform.getBlockSize());
Chris@0 258 } else {
Chris@0 259 transform.setBlockSize(pluginBlockSize);
Chris@0 260 }
Chris@0 261
Chris@0 262 plugin = new PluginChannelAdapter(plugin);
Chris@0 263
Chris@0 264 if (!m_summaries.empty() ||
Chris@0 265 transform.getSummaryType() != Transform::NoSummary) {
Chris@0 266 PluginSummarisingAdapter *adapter =
Chris@0 267 new PluginSummarisingAdapter(plugin);
Chris@0 268 adapter->setSummarySegmentBoundaries(m_boundaries);
Chris@0 269 plugin = adapter;
Chris@0 270 }
Chris@0 271
Chris@0 272 if (!plugin->initialise(m_channels, m_blockSize, m_blockSize)) {
Chris@0 273 cerr << "ERROR: Plugin initialise (channels = " << m_channels << ", stepSize = " << m_blockSize << ", blockSize = " << m_blockSize << ") failed." << endl;
Chris@0 274 delete plugin;
Chris@0 275 return false;
Chris@0 276 }
Chris@0 277
Chris@0 278 // cerr << "Initialised plugin" << endl;
Chris@0 279
Chris@0 280 size_t actualStepSize = 0;
Chris@0 281 size_t actualBlockSize = 0;
Chris@0 282 pba->getActualStepAndBlockSizes(actualStepSize, actualBlockSize);
Chris@0 283 transform.setStepSize(actualStepSize);
Chris@0 284 transform.setBlockSize(actualBlockSize);
Chris@0 285
Chris@0 286 Plugin::OutputList outputs = plugin->getOutputDescriptors();
Chris@0 287 for (int i = 0; i < (int)outputs.size(); ++i) {
Chris@0 288
Chris@0 289 // cerr << "Newly initialised plugin output " << i << " has bin count " << outputs[i].binCount << endl;
Chris@0 290
Chris@0 291 m_pluginOutputs[plugin][outputs[i].identifier] = outputs[i];
Chris@0 292 m_pluginOutputIndices[outputs[i].identifier] = i;
Chris@0 293 }
Chris@0 294
Chris@10 295 cerr << "NOTE: Loaded and initialised plugin for transform \""
Chris@25 296 << transform.getIdentifier().toStdString()
Chris@25 297 << "\" with plugin step size " << actualStepSize
Chris@25 298 << " and block size " << actualBlockSize
Chris@25 299 << " (adapter step and block size " << m_blockSize << ")"
Chris@25 300 << endl;
Chris@25 301
Chris@25 302 if (pida) {
Chris@25 303 cerr << "NOTE: PluginInputDomainAdapter timestamp adjustment is "
Chris@25 304
Chris@25 305 << pida->getTimestampAdjustment() << endl;
Chris@25 306 }
Chris@8 307
Chris@8 308 } else {
Chris@8 309
Chris@8 310 if (transform.getStepSize() == 0 || transform.getBlockSize() == 0) {
Chris@8 311
Chris@8 312 PluginWrapper *pw = dynamic_cast<PluginWrapper *>(plugin);
Chris@8 313 if (pw) {
Chris@8 314 PluginBufferingAdapter *pba =
Chris@8 315 pw->getWrapper<PluginBufferingAdapter>();
Chris@8 316 if (pba) {
Chris@8 317 size_t actualStepSize = 0;
Chris@8 318 size_t actualBlockSize = 0;
Chris@8 319 pba->getActualStepAndBlockSizes(actualStepSize,
Chris@8 320 actualBlockSize);
Chris@8 321 if (transform.getStepSize() == 0) {
Chris@8 322 transform.setStepSize(actualStepSize);
Chris@8 323 }
Chris@8 324 if (transform.getBlockSize() == 0) {
Chris@8 325 transform.setBlockSize(actualBlockSize);
Chris@8 326 }
Chris@8 327 }
Chris@8 328 }
Chris@8 329 }
Chris@0 330 }
Chris@0 331
Chris@0 332 if (transform.getOutput() == "") {
Chris@0 333 transform.setOutput
Chris@0 334 (plugin->getOutputDescriptors()[0].identifier.c_str());
Chris@0 335 }
Chris@0 336
Chris@0 337 m_transformPluginMap[transform] = plugin;
Chris@0 338
Chris@0 339 if (!(originalTransform == transform)) {
Chris@0 340 m_transformPluginMap[originalTransform] = plugin;
Chris@0 341 }
Chris@0 342
Chris@0 343 } else {
Chris@0 344
Chris@0 345 plugin = m_transformPluginMap[transform];
Chris@0 346 }
Chris@0 347
Chris@0 348 m_plugins[plugin][transform] = writers;
Chris@0 349
Chris@0 350 return true;
Chris@0 351 }
Chris@0 352
Chris@0 353 bool FeatureExtractionManager::addDefaultFeatureExtractor
Chris@0 354 (TransformId transformId, const vector<FeatureWriter*> &writers)
Chris@0 355 {
Chris@0 356 TransformFactory *tf = TransformFactory::getInstance();
Chris@0 357
Chris@0 358 if (m_sampleRate == 0) {
Chris@0 359 if (m_defaultSampleRate == 0) {
Chris@0 360 cerr << "ERROR: Default transform requested, but no default sample rate available" << endl;
Chris@0 361 return false;
Chris@0 362 } else {
Chris@0 363 cerr << "NOTE: Using default sample rate of " << m_defaultSampleRate << " for default transform" << endl;
Chris@0 364 m_sampleRate = m_defaultSampleRate;
Chris@0 365 }
Chris@0 366 }
Chris@0 367
Chris@0 368 Transform transform = tf->getDefaultTransformFor(transformId, m_sampleRate);
Chris@0 369
Chris@0 370 return addFeatureExtractor(transform, writers);
Chris@0 371 }
Chris@0 372
Chris@0 373 bool FeatureExtractionManager::addFeatureExtractorFromFile
Chris@0 374 (QString transformXmlFile, const vector<FeatureWriter*> &writers)
Chris@0 375 {
Chris@0 376 RDFTransformFactory factory
Chris@0 377 (QUrl::fromLocalFile(QFileInfo(transformXmlFile).absoluteFilePath())
Chris@0 378 .toString());
Chris@0 379 ProgressPrinter printer("Parsing transforms RDF file");
Chris@0 380 std::vector<Transform> transforms = factory.getTransforms(&printer);
Chris@0 381 if (!factory.isOK()) {
Chris@0 382 cerr << "WARNING: FeatureExtractionManager::addFeatureExtractorFromFile: Failed to parse transforms file: " << factory.getErrorString().toStdString() << endl;
Chris@0 383 if (factory.isRDF()) {
Chris@0 384 return false; // no point trying it as XML
Chris@0 385 }
Chris@0 386 }
Chris@0 387 if (!transforms.empty()) {
Chris@0 388 bool success = true;
Chris@0 389 for (int i = 0; i < (int)transforms.size(); ++i) {
Chris@0 390 if (!addFeatureExtractor(transforms[i], writers)) {
Chris@0 391 success = false;
Chris@0 392 }
Chris@0 393 }
Chris@0 394 return success;
Chris@0 395 }
Chris@0 396
Chris@0 397 QFile file(transformXmlFile);
Chris@0 398 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@0 399 cerr << "ERROR: Failed to open transform XML file \""
Chris@0 400 << transformXmlFile.toStdString() << "\" for reading" << endl;
Chris@0 401 return false;
Chris@0 402 }
Chris@0 403
Chris@0 404 QTextStream *qts = new QTextStream(&file);
Chris@0 405 QString qs = qts->readAll();
Chris@0 406 delete qts;
Chris@0 407 file.close();
Chris@0 408
Chris@0 409 Transform transform(qs);
Chris@0 410
Chris@0 411 return addFeatureExtractor(transform, writers);
Chris@0 412 }
Chris@0 413
Chris@45 414 void FeatureExtractionManager::addSource(QString audioSource)
Chris@0 415 {
Chris@0 416 if (QFileInfo(audioSource).suffix().toLower() == "m3u") {
Chris@45 417 ProgressPrinter retrievalProgress("Opening playlist file...");
Chris@45 418 FileSource source(audioSource, &retrievalProgress);
Chris@45 419 if (!source.isAvailable()) {
Chris@45 420 cerr << "ERROR: File or URL \"" << audioSource.toStdString()
Chris@45 421 << "\" could not be located" << endl;
Chris@45 422 throw FileNotFound(audioSource);
Chris@45 423 }
Chris@45 424 source.waitForData();
Chris@0 425 PlaylistFileReader reader(source);
Chris@0 426 if (reader.isOK()) {
Chris@0 427 vector<QString> files = reader.load();
Chris@0 428 for (int i = 0; i < (int)files.size(); ++i) {
Chris@45 429 addSource(files[i]);
Chris@0 430 }
Chris@0 431 return;
Chris@0 432 } else {
Chris@0 433 cerr << "ERROR: Playlist \"" << audioSource.toStdString()
Chris@0 434 << "\" could not be opened" << endl;
Chris@21 435 throw FileNotFound(audioSource);
Chris@0 436 }
Chris@0 437 }
Chris@0 438
Chris@45 439 std::cerr << "Have audio source: \"" << audioSource.toStdString() << "\"" << std::endl;
Chris@45 440
Chris@45 441 // We don't actually do anything with it here, unless it's the
Chris@45 442 // first audio source and we need it to establish default channel
Chris@45 443 // count and sample rate
Chris@45 444
Chris@45 445 if (m_channels == 0 || m_defaultSampleRate == 0) {
Chris@45 446
Chris@45 447 ProgressPrinter retrievalProgress("Determining default rate and channel count from first input file...");
Chris@45 448
Chris@45 449 FileSource source(audioSource, &retrievalProgress);
Chris@45 450 if (!source.isAvailable()) {
Chris@45 451 cerr << "ERROR: File or URL \"" << audioSource.toStdString()
Chris@45 452 << "\" could not be located" << endl;
Chris@45 453 throw FileNotFound(audioSource);
Chris@45 454 }
Chris@45 455
Chris@45 456 source.waitForData();
Chris@45 457
Chris@45 458 // Open to determine validity, channel count, sample rate only
Chris@45 459 // (then close, and open again later with actual desired rate &c)
Chris@45 460
Chris@45 461 AudioFileReader *reader =
Chris@45 462 AudioFileReaderFactory::createReader(source, 0, &retrievalProgress);
Chris@45 463
Chris@45 464 if (!reader) {
Chris@45 465 throw FailedToOpenFile(audioSource);
Chris@45 466 }
Chris@45 467
Chris@45 468 retrievalProgress.done();
Chris@45 469
Chris@45 470 cerr << "File or URL \"" << audioSource.toStdString() << "\" opened successfully" << endl;
Chris@45 471
Chris@45 472 if (m_channels == 0) {
Chris@45 473 m_channels = reader->getChannelCount();
Chris@45 474 cerr << "Taking default channel count of "
Chris@45 475 << reader->getChannelCount() << " from file" << endl;
Chris@45 476 }
Chris@45 477
Chris@45 478 if (m_defaultSampleRate == 0) {
Chris@45 479 m_defaultSampleRate = reader->getNativeRate();
Chris@45 480 cerr << "Taking default sample rate of "
Chris@45 481 << reader->getNativeRate() << "Hz from file" << endl;
Chris@45 482 cerr << "(Note: Default may be overridden by transforms)" << endl;
Chris@45 483 }
Chris@45 484
Chris@45 485 m_readyReaders[audioSource] = reader;
Chris@45 486 }
Chris@45 487 }
Chris@45 488
Chris@47 489 void FeatureExtractionManager::extractFeatures(QString audioSource, bool force)
Chris@45 490 {
Chris@45 491 if (m_plugins.empty()) return;
Chris@45 492
Chris@47 493 if (QFileInfo(audioSource).suffix().toLower() == "m3u") {
Chris@47 494 FileSource source(audioSource);
Chris@47 495 PlaylistFileReader reader(source);
Chris@47 496 if (reader.isOK()) {
Chris@47 497 vector<QString> files = reader.load();
Chris@47 498 for (int i = 0; i < (int)files.size(); ++i) {
Chris@47 499 try {
Chris@47 500 extractFeatures(files[i], force);
Chris@47 501 } catch (const std::exception &e) {
Chris@47 502 if (!force) throw;
Chris@47 503 cerr << "ERROR: Feature extraction failed for playlist entry \""
Chris@47 504 << files[i].toStdString()
Chris@47 505 << "\": " << e.what() << endl;
Chris@47 506 // print a note only if we have more files to process
Chris@47 507 if (++i != files.size()) {
Chris@47 508 cerr << "NOTE: \"--force\" option was provided, continuing (more errors may occur)" << endl;
Chris@47 509 }
Chris@47 510 }
Chris@47 511 }
Chris@47 512 return;
Chris@47 513 } else {
Chris@47 514 cerr << "ERROR: Playlist \"" << audioSource.toStdString()
Chris@47 515 << "\" could not be opened" << endl;
Chris@47 516 throw FileNotFound(audioSource);
Chris@47 517 }
Chris@47 518 }
Chris@47 519
Chris@45 520 testOutputFiles(audioSource);
Chris@45 521
Chris@0 522 if (m_sampleRate == 0) {
Chris@45 523 throw FileOperationFailed
Chris@45 524 (audioSource, "internal error: have sources and plugins, but no sample rate");
Chris@45 525 }
Chris@45 526 if (m_channels == 0) {
Chris@45 527 throw FileOperationFailed
Chris@45 528 (audioSource, "internal error: have sources and plugins, but no channel count");
Chris@0 529 }
Chris@0 530
Chris@45 531 AudioFileReader *reader = 0;
Chris@45 532
Chris@45 533 if (m_readyReaders.contains(audioSource)) {
Chris@45 534 reader = m_readyReaders[audioSource];
Chris@45 535 m_readyReaders.remove(audioSource);
Chris@45 536 if (reader->getChannelCount() != m_channels ||
Chris@45 537 reader->getSampleRate() != m_sampleRate) {
Chris@45 538 // can't use this; open it again
Chris@45 539 delete reader;
Chris@45 540 reader = 0;
Chris@45 541 }
Chris@45 542 }
Chris@45 543 if (!reader) {
Chris@45 544 ProgressPrinter retrievalProgress("Retrieving audio data...");
Chris@45 545 FileSource source(audioSource, &retrievalProgress);
Chris@45 546 source.waitForData();
Chris@45 547 reader = AudioFileReaderFactory::createReader
Chris@45 548 (source, m_sampleRate, &retrievalProgress);
Chris@45 549 retrievalProgress.done();
Chris@45 550 }
Chris@45 551
Chris@0 552 if (!reader) {
Chris@21 553 throw FailedToOpenFile(audioSource);
Chris@0 554 }
Chris@0 555
Chris@45 556 cerr << "Audio file \"" << audioSource.toStdString() << "\": "
Chris@45 557 << reader->getChannelCount() << "ch at "
Chris@45 558 << reader->getNativeRate() << "Hz" << endl;
Chris@45 559 if (reader->getChannelCount() != m_channels ||
Chris@45 560 reader->getNativeRate() != m_sampleRate) {
Chris@45 561 cerr << "NOTE: File will be mixed or resampled for processing: "
Chris@45 562 << m_channels << "ch at "
Chris@45 563 << m_sampleRate << "Hz" << endl;
Chris@45 564 }
Chris@11 565
Chris@0 566 // allocate audio buffers
Chris@0 567 float **data = new float *[m_channels];
Chris@0 568 for (int c = 0; c < m_channels; ++c) {
Chris@0 569 data[c] = new float[m_blockSize];
Chris@0 570 }
Chris@31 571
Chris@31 572 struct LifespanMgr { // unintrusive hack introduced to ensure
Chris@31 573 // destruction on exceptions
Chris@31 574 AudioFileReader *m_r;
Chris@31 575 int m_c;
Chris@31 576 float **m_d;
Chris@31 577 LifespanMgr(AudioFileReader *r, int c, float **d) :
Chris@31 578 m_r(r), m_c(c), m_d(d) { }
Chris@31 579 ~LifespanMgr() { destroy(); }
Chris@31 580 void destroy() {
Chris@31 581 if (!m_r) return;
Chris@31 582 delete m_r;
Chris@31 583 for (int i = 0; i < m_c; ++i) delete[] m_d[i];
Chris@31 584 delete[] m_d;
Chris@31 585 m_r = 0;
Chris@31 586 }
Chris@31 587 };
Chris@31 588 LifespanMgr lifemgr(reader, m_channels, data);
Chris@0 589
Chris@0 590 size_t frameCount = reader->getFrameCount();
Chris@0 591
Chris@0 592 // cerr << "file has " << frameCount << " frames" << endl;
Chris@0 593
Chris@0 594 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 595 pi != m_plugins.end(); ++pi) {
Chris@0 596
Chris@0 597 Plugin *plugin = pi->first;
Chris@0 598
Chris@0 599 // std::cerr << "Calling reset on " << plugin << std::endl;
Chris@0 600 plugin->reset();
Chris@0 601
Chris@0 602 for (TransformWriterMap::iterator ti = pi->second.begin();
Chris@0 603 ti != pi->second.end(); ++ti) {
Chris@0 604
Chris@0 605 const Transform &transform = ti->first;
Chris@0 606
Chris@0 607 //!!! we may want to set the start and duration times for extraction
Chris@0 608 // in the transform record (defaults of zero indicate extraction
Chris@0 609 // from the whole file)
Chris@0 610 // transform.setStartTime(RealTime::zeroTime);
Chris@0 611 // transform.setDuration
Chris@0 612 // (RealTime::frame2RealTime(reader->getFrameCount(), m_sampleRate));
Chris@0 613
Chris@0 614 string outputId = transform.getOutput().toStdString();
Chris@0 615 if (m_pluginOutputs[plugin].find(outputId) ==
Chris@0 616 m_pluginOutputs[plugin].end()) {
Chris@0 617 //!!! throw?
Chris@0 618 cerr << "WARNING: Nonexistent plugin output \"" << outputId << "\" requested for transform \""
Chris@0 619 << transform.getIdentifier().toStdString() << "\", ignoring this transform"
Chris@0 620 << endl;
Chris@0 621 /*
Chris@0 622 cerr << "Known outputs for all plugins are as follows:" << endl;
Chris@0 623 for (PluginOutputMap::const_iterator k = m_pluginOutputs.begin();
Chris@0 624 k != m_pluginOutputs.end(); ++k) {
Chris@0 625 cerr << "Plugin " << k->first << ": ";
Chris@0 626 if (k->second.empty()) {
Chris@0 627 cerr << "(none)";
Chris@0 628 }
Chris@0 629 for (OutputMap::const_iterator i = k->second.begin();
Chris@0 630 i != k->second.end(); ++i) {
Chris@0 631 cerr << "\"" << i->first << "\" ";
Chris@0 632 }
Chris@0 633 cerr << endl;
Chris@0 634 }
Chris@0 635 */
Chris@0 636 }
Chris@0 637 }
Chris@0 638 }
Chris@0 639
Chris@0 640 long startFrame = 0;
Chris@0 641 long endFrame = frameCount;
Chris@0 642
Chris@0 643 /*!!! No -- there is no single transform to pull this stuff from --
Chris@0 644 * the transforms may have various start and end times, need to be far
Chris@0 645 * cleverer about this if we're going to support them
Chris@0 646
Chris@0 647 RealTime trStartRT = transform.getStartTime();
Chris@0 648 RealTime trDurationRT = transform.getDuration();
Chris@0 649
Chris@0 650 long trStart = RealTime::realTime2Frame(trStartRT, m_sampleRate);
Chris@0 651 long trDuration = RealTime::realTime2Frame(trDurationRT, m_sampleRate);
Chris@0 652
Chris@0 653 if (trStart == 0 || trStart < startFrame) {
Chris@0 654 trStart = startFrame;
Chris@0 655 }
Chris@0 656
Chris@0 657 if (trDuration == 0) {
Chris@0 658 trDuration = endFrame - trStart;
Chris@0 659 }
Chris@0 660 if (trStart + trDuration > endFrame) {
Chris@0 661 trDuration = endFrame - trStart;
Chris@0 662 }
Chris@0 663
Chris@0 664 startFrame = trStart;
Chris@0 665 endFrame = trStart + trDuration;
Chris@0 666 */
Chris@0 667
Chris@0 668 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 669 pi != m_plugins.end(); ++pi) {
Chris@0 670
Chris@0 671 for (TransformWriterMap::const_iterator ti = pi->second.begin();
Chris@0 672 ti != pi->second.end(); ++ti) {
Chris@0 673
Chris@0 674 const vector<FeatureWriter *> &writers = ti->second;
Chris@0 675
Chris@0 676 for (int j = 0; j < (int)writers.size(); ++j) {
Chris@0 677 FeatureWriter::TrackMetadata m;
Chris@0 678 m.title = reader->getTitle();
Chris@0 679 m.maker = reader->getMaker();
Chris@19 680 if (m.title != "" && m.maker != "") {
Chris@19 681 writers[j]->setTrackMetadata(audioSource, m);
Chris@19 682 }
Chris@0 683 }
Chris@0 684 }
Chris@0 685 }
Chris@0 686
Chris@0 687 ProgressPrinter extractionProgress("Extracting and writing features...");
Chris@0 688 int progress = 0;
Chris@0 689
Chris@0 690 for (long i = startFrame; i < endFrame; i += m_blockSize) {
Chris@0 691
Chris@0 692 //!!! inefficient, although much of the inefficiency may be
Chris@0 693 // susceptible to optimisation
Chris@0 694
Chris@0 695 SampleBlock frames;
Chris@0 696 reader->getInterleavedFrames(i, m_blockSize, frames);
Chris@0 697
Chris@0 698 // We have to do our own channel handling here; we can't just
Chris@0 699 // leave it to the plugin adapter because the same plugin
Chris@0 700 // adapter may have to serve for input files with various
Chris@0 701 // numbers of channels (so the adapter is simply configured
Chris@34 702 // with a fixed channel count).
Chris@0 703
Chris@0 704 int rc = reader->getChannelCount();
Chris@0 705
Chris@34 706 // m_channels is the number of channels we need for the plugin
Chris@34 707
Chris@34 708 int index;
Chris@34 709 int fc = (int)frames.size();
Chris@46 710
Chris@34 711 if (m_channels == 1) { // only case in which we can sensibly mix down
Chris@34 712 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 713 data[0][j] = 0.f;
Chris@34 714 }
Chris@34 715 for (int c = 0; c < rc; ++c) {
Chris@34 716 for (int j = 0; j < m_blockSize; ++j) {
Chris@0 717 index = j * rc + c;
Chris@34 718 if (index < fc) data[0][j] += frames[index];
Chris@0 719 }
Chris@0 720 }
Chris@34 721 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 722 data[0][j] /= rc;
Chris@34 723 }
Chris@34 724 } else {
Chris@34 725 for (int c = 0; c < m_channels; ++c) {
Chris@34 726 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 727 data[c][j] = 0.f;
Chris@34 728 }
Chris@34 729 if (c < rc) {
Chris@34 730 for (int j = 0; j < m_blockSize; ++j) {
Chris@34 731 index = j * rc + c;
Chris@34 732 if (index < fc) data[c][j] += frames[index];
Chris@34 733 }
Chris@34 734 }
Chris@34 735 }
Chris@34 736 }
Chris@0 737
Chris@0 738 Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime
Chris@0 739 (i, m_sampleRate);
Chris@0 740
Chris@0 741 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 742 pi != m_plugins.end(); ++pi) {
Chris@0 743
Chris@0 744 Plugin *plugin = pi->first;
Chris@0 745 Plugin::FeatureSet featureSet = plugin->process(data, timestamp);
Chris@0 746
Chris@0 747 if (!m_summariesOnly) {
Chris@0 748 writeFeatures(audioSource, plugin, featureSet);
Chris@0 749 }
Chris@0 750 }
Chris@0 751
Chris@0 752 int pp = progress;
Chris@6 753 progress = int(((i - startFrame) * 100.0) / (endFrame - startFrame) + 0.1);
Chris@0 754 if (progress > pp) extractionProgress.setProgress(progress);
Chris@0 755 }
Chris@10 756
Chris@22 757 // std::cerr << "FeatureExtractionManager: deleting audio file reader" << std::endl;
Chris@12 758
Chris@31 759 lifemgr.destroy(); // deletes reader, data
Chris@57 760
Chris@57 761 // In order to ensure our results are written to the output in a
Chris@57 762 // fixed order (and not one that depends on the pointer value of
Chris@57 763 // each plugin on the heap in any given run of the program) we
Chris@57 764 // take the plugins' entries from the plugin map and sort them
Chris@57 765 // into a new, temporary map that is indexed by the first
Chris@57 766 // transform for each plugin. We then iterate over than instead of
Chris@57 767 // over m_plugins in order to get the right ordering.
Chris@57 768
Chris@57 769 // This is not the most elegant way to do this -- it would be more
Chris@57 770 // elegant to impose an ordering directly on the plugins that are
Chris@57 771 // used as keys to m_plugins. But the plugin type comes from the
Chris@57 772 // Vamp SDK, so this change is more localised.
Chris@57 773
Chris@57 774 // Thanks to Matthias for this.
Chris@57 775
Chris@58 776 typedef map<Transform, PluginMap::value_type> OrderedPluginMap;
Chris@58 777 OrderedPluginMap orderedPlugins;
Chris@57 778
Chris@0 779 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 780 pi != m_plugins.end(); ++pi) {
Chris@57 781 Transform firstForPlugin = (pi->second).begin()->first;
Chris@58 782 orderedPlugins.insert(OrderedPluginMap::value_type(firstForPlugin, *pi));
Chris@57 783 }
Chris@0 784
Chris@58 785 for (OrderedPluginMap::iterator superPi = orderedPlugins.begin();
Chris@57 786 superPi != orderedPlugins.end(); ++superPi) {
Chris@57 787
Chris@57 788 // The value we extract from this map is just the same as the
Chris@57 789 // value_type we get from iterating over our PluginMap
Chris@57 790 // directly -- but we happen to get them in the right order
Chris@57 791 // now because the map iterator is ordered by the Transform
Chris@57 792 // key type ordering
Chris@58 793 PluginMap::value_type pi = superPi->second;
Chris@57 794
Chris@57 795 Plugin *plugin = pi.first;
Chris@0 796 Plugin::FeatureSet featureSet = plugin->getRemainingFeatures();
Chris@0 797
Chris@0 798 if (!m_summariesOnly) {
Chris@0 799 writeFeatures(audioSource, plugin, featureSet);
Chris@0 800 }
Chris@0 801
Chris@0 802 if (!m_summaries.empty()) {
Chris@0 803 PluginSummarisingAdapter *adapter =
Chris@0 804 dynamic_cast<PluginSummarisingAdapter *>(plugin);
Chris@0 805 if (!adapter) {
Chris@0 806 cerr << "WARNING: Summaries requested, but plugin is not a summarising adapter" << endl;
Chris@0 807 } else {
Chris@0 808 for (SummaryNameSet::const_iterator sni = m_summaries.begin();
Chris@0 809 sni != m_summaries.end(); ++sni) {
Chris@0 810 featureSet.clear();
Chris@0 811 //!!! problem here -- we are requesting summaries
Chris@0 812 //!!! for all outputs, but they in principle have
Chris@0 813 //!!! different averaging requirements depending
Chris@0 814 //!!! on whether their features have duration or
Chris@0 815 //!!! not
Chris@0 816 featureSet = adapter->getSummaryForAllOutputs
Chris@0 817 (getSummaryType(*sni),
Chris@0 818 PluginSummarisingAdapter::ContinuousTimeAverage);
Chris@0 819 writeFeatures(audioSource, plugin, featureSet,//!!! *sni);
Chris@0 820 Transform::stringToSummaryType(sni->c_str()));
Chris@0 821 }
Chris@0 822 }
Chris@0 823 }
Chris@0 824
Chris@0 825 writeSummaries(audioSource, plugin);
Chris@0 826 }
Chris@0 827
Chris@3 828 extractionProgress.done();
Chris@3 829
Chris@0 830 finish();
Chris@0 831
Chris@0 832 TempDirectory::getInstance()->cleanup();
Chris@0 833 }
Chris@0 834
Chris@0 835 void
Chris@0 836 FeatureExtractionManager::writeSummaries(QString audioSource, Plugin *plugin)
Chris@0 837 {
Chris@0 838 // caller should have ensured plugin is in m_plugins
Chris@0 839 PluginMap::iterator pi = m_plugins.find(plugin);
Chris@0 840
Chris@0 841 for (TransformWriterMap::const_iterator ti = pi->second.begin();
Chris@0 842 ti != pi->second.end(); ++ti) {
Chris@0 843
Chris@0 844 const Transform &transform = ti->first;
Chris@0 845 const vector<FeatureWriter *> &writers = ti->second;
Chris@0 846
Chris@0 847 Transform::SummaryType summaryType = transform.getSummaryType();
Chris@0 848 PluginSummarisingAdapter::SummaryType pType =
Chris@0 849 (PluginSummarisingAdapter::SummaryType)summaryType;
Chris@0 850
Chris@0 851 if (transform.getSummaryType() == Transform::NoSummary) {
Chris@0 852 continue;
Chris@0 853 }
Chris@0 854
Chris@0 855 PluginSummarisingAdapter *adapter =
Chris@0 856 dynamic_cast<PluginSummarisingAdapter *>(plugin);
Chris@0 857 if (!adapter) {
Chris@0 858 cerr << "FeatureExtractionManager::writeSummaries: INTERNAL ERROR: Summary requested for transform, but plugin is not a summarising adapter" << endl;
Chris@0 859 continue;
Chris@0 860 }
Chris@0 861
Chris@0 862 Plugin::FeatureSet featureSet = adapter->getSummaryForAllOutputs
Chris@0 863 (pType, PluginSummarisingAdapter::ContinuousTimeAverage);
Chris@0 864
Chris@0 865 // cout << "summary type " << int(pType) << " for transform:" << endl << transform.toXmlString().toStdString()<< endl << "... feature set with " << featureSet.size() << " elts" << endl;
Chris@0 866
Chris@0 867 writeFeatures(audioSource, plugin, featureSet, summaryType);
Chris@0 868 }
Chris@0 869 }
Chris@0 870
Chris@0 871 void FeatureExtractionManager::writeFeatures(QString audioSource,
Chris@0 872 Plugin *plugin,
Chris@0 873 const Plugin::FeatureSet &features,
Chris@0 874 Transform::SummaryType summaryType)
Chris@0 875 {
Chris@0 876 // caller should have ensured plugin is in m_plugins
Chris@0 877 PluginMap::iterator pi = m_plugins.find(plugin);
Chris@0 878
Chris@0 879 for (TransformWriterMap::const_iterator ti = pi->second.begin();
Chris@0 880 ti != pi->second.end(); ++ti) {
Chris@0 881
Chris@0 882 const Transform &transform = ti->first;
Chris@0 883 const vector<FeatureWriter *> &writers = ti->second;
Chris@0 884
Chris@0 885 if (transform.getSummaryType() != Transform::NoSummary &&
Chris@0 886 m_summaries.empty() &&
Chris@0 887 summaryType == Transform::NoSummary) {
Chris@0 888 continue;
Chris@0 889 }
Chris@0 890
Chris@0 891 if (transform.getSummaryType() != Transform::NoSummary &&
Chris@0 892 summaryType != Transform::NoSummary &&
Chris@0 893 transform.getSummaryType() != summaryType) {
Chris@0 894 continue;
Chris@0 895 }
Chris@0 896
Chris@0 897 string outputId = transform.getOutput().toStdString();
Chris@0 898
Chris@0 899 if (m_pluginOutputs[plugin].find(outputId) ==
Chris@0 900 m_pluginOutputs[plugin].end()) {
Chris@0 901 continue;
Chris@0 902 }
Chris@0 903
Chris@0 904 const Plugin::OutputDescriptor &desc =
Chris@0 905 m_pluginOutputs[plugin][outputId];
Chris@0 906
Chris@0 907 int outputIndex = m_pluginOutputIndices[outputId];
Chris@0 908 Plugin::FeatureSet::const_iterator fsi = features.find(outputIndex);
Chris@0 909 if (fsi == features.end()) continue;
Chris@0 910
Chris@0 911 for (int j = 0; j < (int)writers.size(); ++j) {
Chris@0 912 writers[j]->write
Chris@0 913 (audioSource, transform, desc, fsi->second,
Chris@0 914 Transform::summaryTypeToString(summaryType).toStdString());
Chris@0 915 }
Chris@0 916 }
Chris@0 917 }
Chris@0 918
Chris@31 919 void FeatureExtractionManager::testOutputFiles(QString audioSource)
Chris@31 920 {
Chris@31 921 for (PluginMap::iterator pi = m_plugins.begin();
Chris@31 922 pi != m_plugins.end(); ++pi) {
Chris@31 923
Chris@31 924 for (TransformWriterMap::iterator ti = pi->second.begin();
Chris@31 925 ti != pi->second.end(); ++ti) {
Chris@31 926
Chris@31 927 vector<FeatureWriter *> &writers = ti->second;
Chris@31 928
Chris@31 929 for (int i = 0; i < (int)writers.size(); ++i) {
Chris@31 930 writers[i]->testOutputFile(audioSource, ti->first.getIdentifier());
Chris@31 931 }
Chris@31 932 }
Chris@31 933 }
Chris@31 934 }
Chris@31 935
Chris@0 936 void FeatureExtractionManager::finish()
Chris@0 937 {
Chris@0 938 for (PluginMap::iterator pi = m_plugins.begin();
Chris@0 939 pi != m_plugins.end(); ++pi) {
Chris@0 940
Chris@0 941 for (TransformWriterMap::iterator ti = pi->second.begin();
Chris@0 942 ti != pi->second.end(); ++ti) {
Chris@0 943
Chris@0 944 vector<FeatureWriter *> &writers = ti->second;
Chris@0 945
Chris@0 946 for (int i = 0; i < (int)writers.size(); ++i) {
Chris@0 947 writers[i]->flush();
Chris@0 948 writers[i]->finish();
Chris@0 949 }
Chris@0 950 }
Chris@0 951 }
Chris@0 952 }
Chris@0 953
Chris@0 954 void FeatureExtractionManager::print(Transform transform) const
Chris@0 955 {
Chris@0 956 QString qs;
Chris@0 957 QTextStream qts(&qs);
Chris@0 958 transform.toXml(qts);
Chris@0 959 cerr << qs.toStdString() << endl;
Chris@0 960 }