Chris@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@0: Chris@0: /* Chris@0: Sonic Annotator Chris@0: A utility for batch feature extraction from audio files. Chris@0: Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. Chris@0: Copyright 2007-2008 QMUL. Chris@0: Chris@0: This program is free software; you can redistribute it and/or Chris@0: modify it under the terms of the GNU General Public License as Chris@0: published by the Free Software Foundation; either version 2 of the Chris@0: License, or (at your option) any later version. See the file Chris@0: COPYING included with this distribution for more information. Chris@0: */ Chris@0: Chris@0: #include "FeatureExtractionManager.h" Chris@106: #include "MultiplexedReader.h" Chris@0: Chris@0: #include Chris@0: #include Chris@0: #include Chris@0: #include Chris@8: #include Chris@0: #include Chris@0: Chris@21: #include "base/Exceptions.h" Chris@21: Chris@0: #include Chris@0: Chris@0: using namespace std; Chris@0: Chris@0: using Vamp::Plugin; Chris@0: using Vamp::PluginBase; Chris@0: using Vamp::HostExt::PluginLoader; Chris@0: using Vamp::HostExt::PluginChannelAdapter; Chris@0: using Vamp::HostExt::PluginBufferingAdapter; Chris@0: using Vamp::HostExt::PluginInputDomainAdapter; Chris@0: using Vamp::HostExt::PluginSummarisingAdapter; Chris@8: using Vamp::HostExt::PluginWrapper; Chris@0: Chris@0: #include "data/fileio/FileSource.h" Chris@0: #include "data/fileio/AudioFileReader.h" Chris@0: #include "data/fileio/AudioFileReaderFactory.h" Chris@0: #include "base/TempDirectory.h" Chris@0: #include "base/ProgressPrinter.h" Chris@0: #include "transform/TransformFactory.h" Chris@0: #include "rdf/RDFTransformFactory.h" Chris@0: #include "transform/FeatureWriter.h" Chris@0: Chris@0: #include Chris@0: #include Chris@0: #include Chris@0: Chris@0: FeatureExtractionManager::FeatureExtractionManager() : Chris@0: m_summariesOnly(false), Chris@0: // We can read using an arbitrary fixed block size -- Chris@177: // PluginBufferingAdapter handles this for us. But while this Chris@177: // doesn't affect the step and block size actually passed to the Chris@177: // plugin, it does affect the overall time range of the audio Chris@177: // input (which gets rounded to the nearest block boundary). So Chris@177: // although a larger blocksize will normally run faster, and we Chris@177: // used a blocksize of 16384 in earlier releases of Sonic Chris@177: // Annotator for that reason, a smaller blocksize produces Chris@177: // "better" results and this is particularly relevant now we Chris@177: // support the start and duration flags for a transform. Chris@177: m_blockSize(1024), Chris@0: m_defaultSampleRate(0), Chris@0: m_sampleRate(0), Chris@116: m_channels(0), Chris@116: m_normalise(false) Chris@0: { Chris@0: } Chris@0: Chris@0: FeatureExtractionManager::~FeatureExtractionManager() Chris@0: { Chris@0: for (PluginMap::iterator pi = m_plugins.begin(); Chris@0: pi != m_plugins.end(); ++pi) { Chris@0: delete pi->first; Chris@0: } Chris@45: foreach (AudioFileReader *r, m_readyReaders) { Chris@45: delete r; Chris@45: } Chris@0: } Chris@0: Chris@0: void FeatureExtractionManager::setChannels(int channels) Chris@0: { Chris@0: m_channels = channels; Chris@0: } Chris@0: Chris@0: void FeatureExtractionManager::setDefaultSampleRate(int sampleRate) Chris@0: { Chris@0: m_defaultSampleRate = sampleRate; Chris@0: } Chris@0: Chris@116: void FeatureExtractionManager::setNormalise(bool normalise) Chris@116: { Chris@116: m_normalise = normalise; Chris@116: } Chris@116: Chris@0: static PluginSummarisingAdapter::SummaryType Chris@0: getSummaryType(string name) Chris@0: { Chris@0: if (name == "min") return PluginSummarisingAdapter::Minimum; Chris@0: if (name == "max") return PluginSummarisingAdapter::Maximum; Chris@0: if (name == "mean") return PluginSummarisingAdapter::Mean; Chris@0: if (name == "median") return PluginSummarisingAdapter::Median; Chris@0: if (name == "mode") return PluginSummarisingAdapter::Mode; Chris@0: if (name == "sum") return PluginSummarisingAdapter::Sum; Chris@0: if (name == "variance") return PluginSummarisingAdapter::Variance; Chris@0: if (name == "sd") return PluginSummarisingAdapter::StandardDeviation; Chris@0: if (name == "count") return PluginSummarisingAdapter::Count; Chris@0: return PluginSummarisingAdapter::UnknownSummaryType; Chris@0: } Chris@0: Chris@102: bool Chris@102: FeatureExtractionManager::setSummaryTypes(const set &names, Chris@102: const PluginSummarisingAdapter::SegmentBoundaries &boundaries) Chris@0: { Chris@0: for (SummaryNameSet::const_iterator i = names.begin(); Chris@0: i != names.end(); ++i) { Chris@0: if (getSummaryType(*i) == PluginSummarisingAdapter::UnknownSummaryType) { Chris@0: cerr << "ERROR: Unknown summary type \"" << *i << "\"" << endl; Chris@0: return false; Chris@0: } Chris@0: } Chris@0: m_summaries = names; Chris@0: m_boundaries = boundaries; Chris@0: return true; Chris@0: } Chris@0: Chris@102: void Chris@102: FeatureExtractionManager::setSummariesOnly(bool summariesOnly) Chris@102: { Chris@102: m_summariesOnly = summariesOnly; Chris@102: } Chris@102: Chris@51: static PluginInputDomainAdapter::WindowType Chris@51: convertWindowType(WindowType t) Chris@51: { Chris@51: switch (t) { Chris@51: case RectangularWindow: Chris@51: return PluginInputDomainAdapter::RectangularWindow; Chris@51: case BartlettWindow: Chris@51: return PluginInputDomainAdapter::BartlettWindow; Chris@51: case HammingWindow: Chris@51: return PluginInputDomainAdapter::HammingWindow; Chris@51: case HanningWindow: Chris@51: return PluginInputDomainAdapter::HanningWindow; Chris@51: case BlackmanWindow: Chris@51: return PluginInputDomainAdapter::BlackmanWindow; Chris@51: case NuttallWindow: Chris@51: return PluginInputDomainAdapter::NuttallWindow; Chris@51: case BlackmanHarrisWindow: Chris@51: return PluginInputDomainAdapter::BlackmanHarrisWindow; Chris@138: case GaussianWindow: Chris@138: case ParzenWindow: Chris@138: // Not supported in Vamp SDK, fall through Chris@51: default: Chris@51: cerr << "ERROR: Unknown or unsupported window type \"" << t << "\", using Hann (\"" << HanningWindow << "\")" << endl; Chris@51: return PluginInputDomainAdapter::HanningWindow; Chris@51: } Chris@51: } Chris@51: Chris@0: bool FeatureExtractionManager::addFeatureExtractor Chris@0: (Transform transform, const vector &writers) Chris@0: { Chris@0: //!!! exceptions rather than return values? Chris@0: Chris@0: if (transform.getSampleRate() == 0) { Chris@0: if (m_sampleRate == 0) { Chris@0: cerr << "NOTE: Transform does not specify a sample rate, using default rate of " << m_defaultSampleRate << endl; Chris@0: transform.setSampleRate(m_defaultSampleRate); Chris@0: m_sampleRate = m_defaultSampleRate; Chris@0: } else { Chris@0: cerr << "NOTE: Transform does not specify a sample rate, using previous transform's rate of " << m_sampleRate << endl; Chris@0: transform.setSampleRate(m_sampleRate); Chris@0: } Chris@0: } Chris@0: Chris@0: if (m_sampleRate == 0) { Chris@0: m_sampleRate = transform.getSampleRate(); Chris@0: } Chris@0: Chris@0: if (transform.getSampleRate() != m_sampleRate) { Chris@0: cerr << "WARNING: Transform sample rate " << transform.getSampleRate() << " does not match previously specified transform rate of " << m_sampleRate << " -- only a single rate is supported for each run" << endl; Chris@0: cerr << "WARNING: Using previous rate of " << m_sampleRate << " for this transform as well" << endl; Chris@0: transform.setSampleRate(m_sampleRate); Chris@0: } Chris@0: Chris@0: Plugin *plugin = 0; Chris@0: Chris@0: // Remember what the original transform looked like, and index Chris@0: // based on this -- because we may be about to fill in the zeros Chris@0: // for step and block size, but we want any further copies with Chris@0: // the same zeros to match this one Chris@0: Transform originalTransform = transform; Chris@0: Chris@0: if (m_transformPluginMap.find(transform) == m_transformPluginMap.end()) { Chris@0: Chris@0: // Test whether we already have a transform that is identical Chris@0: // to this, except for the output requested and/or the summary Chris@0: // type -- if so, they should share plugin instances (a vital Chris@0: // optimisation) Chris@0: Chris@0: for (TransformPluginMap::iterator i = m_transformPluginMap.begin(); Chris@0: i != m_transformPluginMap.end(); ++i) { Chris@0: Transform test = i->first; Chris@0: test.setOutput(transform.getOutput()); Chris@0: test.setSummaryType(transform.getSummaryType()); Chris@0: if (transform == test) { Chris@0: cerr << "NOTE: Already have transform identical to this one (for \"" Chris@0: << transform.getIdentifier().toStdString() Chris@0: << "\") in every detail except output identifier and/or " Chris@0: << "summary type; sharing its plugin instance" << endl; Chris@0: plugin = i->second; Chris@0: if (transform.getSummaryType() != Transform::NoSummary && Chris@0: !dynamic_cast(plugin)) { Chris@0: plugin = new PluginSummarisingAdapter(plugin); Chris@0: i->second = plugin; Chris@0: } Chris@0: break; Chris@0: } Chris@0: } Chris@0: Chris@0: if (!plugin) { Chris@0: Chris@0: TransformFactory *tf = TransformFactory::getInstance(); Chris@0: Chris@0: PluginBase *pb = tf->instantiatePluginFor(transform); Chris@0: plugin = tf->downcastVampPlugin(pb); Chris@0: if (!plugin) { Chris@0: //!!! todo: handle non-Vamp plugins too, or make the main --list Chris@0: // option print out only Vamp transforms Chris@0: cerr << "ERROR: Failed to load plugin for transform \"" Chris@0: << transform.getIdentifier().toStdString() << "\"" << endl; Chris@203: if (pb) { Chris@203: cerr << "NOTE: (A plugin was loaded, but apparently not a Vamp plugin)" << endl; Chris@203: } Chris@0: delete pb; Chris@0: return false; Chris@0: } Chris@0: Chris@0: // We will provide the plugin with arbitrary step and Chris@0: // block sizes (so that we can use the same read/write Chris@0: // block size for all transforms), and to that end we use Chris@0: // a PluginBufferingAdapter. However, we need to know the Chris@0: // underlying step size so that we can provide the right Chris@0: // context for dense outputs. (Although, don't forget Chris@0: // that the PluginBufferingAdapter rewrites Chris@0: // OneSamplePerStep outputs so as to use FixedSampleRate Chris@0: // -- so it supplies the sample rate in the output Chris@0: // feature. I'm not sure whether we can easily use that.) Chris@0: Chris@0: size_t pluginStepSize = plugin->getPreferredStepSize(); Chris@0: size_t pluginBlockSize = plugin->getPreferredBlockSize(); Chris@0: Chris@25: PluginInputDomainAdapter *pida = 0; Chris@25: Chris@0: // adapt the plugin for buffering, channels, etc. Chris@0: if (plugin->getInputDomain() == Plugin::FrequencyDomain) { Chris@51: Chris@25: pida = new PluginInputDomainAdapter(plugin); Chris@26: pida->setProcessTimestampMethod(PluginInputDomainAdapter::ShiftData); Chris@51: Chris@51: PluginInputDomainAdapter::WindowType wtype = Chris@51: convertWindowType(transform.getWindowType()); Chris@51: pida->setWindowType(wtype); Chris@25: plugin = pida; Chris@0: } Chris@0: Chris@0: PluginBufferingAdapter *pba = new PluginBufferingAdapter(plugin); Chris@0: plugin = pba; Chris@0: Chris@0: if (transform.getStepSize() != 0) { Chris@0: pba->setPluginStepSize(transform.getStepSize()); Chris@0: } else { Chris@0: transform.setStepSize(pluginStepSize); Chris@0: } Chris@0: Chris@0: if (transform.getBlockSize() != 0) { Chris@0: pba->setPluginBlockSize(transform.getBlockSize()); Chris@0: } else { Chris@0: transform.setBlockSize(pluginBlockSize); Chris@0: } Chris@0: Chris@0: plugin = new PluginChannelAdapter(plugin); Chris@0: Chris@0: if (!m_summaries.empty() || Chris@0: transform.getSummaryType() != Transform::NoSummary) { Chris@0: PluginSummarisingAdapter *adapter = Chris@0: new PluginSummarisingAdapter(plugin); Chris@0: adapter->setSummarySegmentBoundaries(m_boundaries); Chris@0: plugin = adapter; Chris@0: } Chris@0: Chris@0: if (!plugin->initialise(m_channels, m_blockSize, m_blockSize)) { Chris@0: cerr << "ERROR: Plugin initialise (channels = " << m_channels << ", stepSize = " << m_blockSize << ", blockSize = " << m_blockSize << ") failed." << endl; Chris@0: delete plugin; Chris@0: return false; Chris@0: } Chris@0: Chris@0: // cerr << "Initialised plugin" << endl; Chris@0: Chris@0: size_t actualStepSize = 0; Chris@0: size_t actualBlockSize = 0; Chris@0: pba->getActualStepAndBlockSizes(actualStepSize, actualBlockSize); Chris@0: transform.setStepSize(actualStepSize); Chris@0: transform.setBlockSize(actualBlockSize); Chris@0: Chris@0: Plugin::OutputList outputs = plugin->getOutputDescriptors(); Chris@0: for (int i = 0; i < (int)outputs.size(); ++i) { Chris@0: Chris@0: // cerr << "Newly initialised plugin output " << i << " has bin count " << outputs[i].binCount << endl; Chris@0: Chris@0: m_pluginOutputs[plugin][outputs[i].identifier] = outputs[i]; Chris@0: m_pluginOutputIndices[outputs[i].identifier] = i; Chris@0: } Chris@0: Chris@10: cerr << "NOTE: Loaded and initialised plugin for transform \"" Chris@25: << transform.getIdentifier().toStdString() Chris@25: << "\" with plugin step size " << actualStepSize Chris@25: << " and block size " << actualBlockSize Chris@25: << " (adapter step and block size " << m_blockSize << ")" Chris@25: << endl; Chris@25: Chris@233: // cerr << "NOTE: That transform is: " << transform.toXmlString() << endl; Chris@233: Chris@25: if (pida) { Chris@25: cerr << "NOTE: PluginInputDomainAdapter timestamp adjustment is " Chris@25: Chris@25: << pida->getTimestampAdjustment() << endl; Chris@25: } Chris@8: Chris@8: } else { Chris@8: Chris@8: if (transform.getStepSize() == 0 || transform.getBlockSize() == 0) { Chris@8: Chris@8: PluginWrapper *pw = dynamic_cast(plugin); Chris@8: if (pw) { Chris@8: PluginBufferingAdapter *pba = Chris@8: pw->getWrapper(); Chris@8: if (pba) { Chris@8: size_t actualStepSize = 0; Chris@8: size_t actualBlockSize = 0; Chris@8: pba->getActualStepAndBlockSizes(actualStepSize, Chris@8: actualBlockSize); Chris@8: if (transform.getStepSize() == 0) { Chris@8: transform.setStepSize(actualStepSize); Chris@8: } Chris@8: if (transform.getBlockSize() == 0) { Chris@8: transform.setBlockSize(actualBlockSize); Chris@8: } Chris@8: } Chris@8: } Chris@8: } Chris@0: } Chris@0: Chris@130: if (transform.getPluginVersion() != "") { Chris@130: if (QString("%1").arg(plugin->getPluginVersion()) Chris@130: != transform.getPluginVersion()) { Chris@130: cerr << "ERROR: Transform specifies version " Chris@130: << transform.getPluginVersion() Chris@130: << " of plugin \"" << plugin->getIdentifier() Chris@130: << "\", but installed plugin is version " Chris@130: << plugin->getPluginVersion() Chris@130: << endl; Chris@130: return false; Chris@130: } Chris@130: } Chris@130: Chris@0: if (transform.getOutput() == "") { Chris@0: transform.setOutput Chris@0: (plugin->getOutputDescriptors()[0].identifier.c_str()); Chris@129: } else { Chris@129: if (m_pluginOutputs[plugin].find Chris@129: (transform.getOutput().toLocal8Bit().data()) == Chris@129: m_pluginOutputs[plugin].end()) { Chris@129: cerr << "ERROR: Transform requests nonexistent plugin output \"" Chris@129: << transform.getOutput() Chris@129: << "\"" << endl; Chris@129: return false; Chris@129: } Chris@0: } Chris@0: Chris@0: m_transformPluginMap[transform] = plugin; Chris@0: Chris@233: // cerr << "NOTE: Assigned plugin " << plugin << " for transform: " << transform.toXmlString() << endl; Chris@233: Chris@0: if (!(originalTransform == transform)) { Chris@0: m_transformPluginMap[originalTransform] = plugin; Chris@233: // cerr << "NOTE: Also assigned plugin " << plugin << " for original transform: " << originalTransform.toXmlString() << endl; Chris@0: } Chris@0: Chris@0: } else { Chris@0: Chris@0: plugin = m_transformPluginMap[transform]; Chris@0: } Chris@0: Chris@109: if (m_plugins.find(plugin) == m_plugins.end()) { Chris@109: m_orderedPlugins.push_back(plugin); Chris@109: } Chris@109: Chris@0: m_plugins[plugin][transform] = writers; Chris@0: Chris@0: return true; Chris@0: } Chris@0: Chris@0: bool FeatureExtractionManager::addDefaultFeatureExtractor Chris@0: (TransformId transformId, const vector &writers) Chris@0: { Chris@0: TransformFactory *tf = TransformFactory::getInstance(); Chris@0: Chris@0: if (m_sampleRate == 0) { Chris@0: if (m_defaultSampleRate == 0) { Chris@0: cerr << "ERROR: Default transform requested, but no default sample rate available" << endl; Chris@0: return false; Chris@0: } else { Chris@0: cerr << "NOTE: Using default sample rate of " << m_defaultSampleRate << " for default transform" << endl; Chris@0: m_sampleRate = m_defaultSampleRate; Chris@0: } Chris@0: } Chris@0: Chris@0: Transform transform = tf->getDefaultTransformFor(transformId, m_sampleRate); Chris@0: Chris@203: bool result = addFeatureExtractor(transform, writers); Chris@203: if (!result) { Chris@203: if (transform.getType() == Transform::UnknownType) { Chris@203: cerr << "(Maybe mixed up filename with transform, or --transform with --default?)" << endl; Chris@203: } Chris@203: } Chris@203: return result; Chris@0: } Chris@0: Chris@0: bool FeatureExtractionManager::addFeatureExtractorFromFile Chris@227: (QString transformFile, const vector &writers) Chris@0: { Chris@227: // We support two formats for transform description files, XML (in Chris@227: // a format specific to Sonic Annotator) and RDF/Turtle. The RDF Chris@227: // format can describe multiple transforms in a single file, the Chris@227: // XML only one. Chris@227: Chris@227: // Possible errors we should report: Chris@227: // Chris@227: // 1. File does not exist or cannot be opened Chris@227: // 2. File is ostensibly XML, but is not parseable Chris@227: // 3. File is ostensibly Turtle, but is not parseable Chris@227: // 4. File is XML, but contains no valid transform (e.g. is unrelated XML) Chris@227: // 5. File is Turtle, but contains no valid transform(s) Chris@227: // 6. File is Turtle and contains both valid and invalid transform(s) Chris@227: Chris@227: { Chris@227: // We don't actually need to open this here yet, we just hoist Chris@227: // it to the top for error reporting purposes Chris@227: QFile file(transformFile); Chris@227: if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { Chris@227: // Error case 1. File does not exist or cannot be opened Chris@227: cerr << "ERROR: Failed to open transform file \"" << transformFile Chris@227: << "\" for reading" << endl; Chris@227: return false; Chris@227: } Chris@227: } Chris@227: Chris@99: bool tryRdf = true; Chris@227: if (transformFile.endsWith(".xml") || transformFile.endsWith(".XML")) { Chris@99: // We don't support RDF-XML (and nor does the underlying Chris@99: // parser library) so skip the RDF parse if the filename Chris@99: // suggests XML, to avoid puking out a load of errors from Chris@99: // feeding XML to a Turtle parser Chris@99: tryRdf = false; Chris@0: } Chris@99: Chris@227: bool tryXml = true; Chris@227: if (transformFile.endsWith(".ttl") || transformFile.endsWith(".TTL") || Chris@227: transformFile.endsWith(".ntriples") || transformFile.endsWith(".NTRIPLES") || Chris@227: transformFile.endsWith(".n3") || transformFile.endsWith(".N3")) { Chris@227: tryXml = false; Chris@227: } Chris@227: Chris@227: QString rdfError, xmlError; Chris@227: Chris@99: if (tryRdf) { Chris@227: Chris@99: RDFTransformFactory factory Chris@227: (QUrl::fromLocalFile(QFileInfo(transformFile).absoluteFilePath()) Chris@99: .toString()); Chris@99: ProgressPrinter printer("Parsing transforms RDF file"); Chris@99: std::vector transforms = factory.getTransforms(&printer); Chris@227: Chris@227: if (factory.isOK()) { Chris@227: if (transforms.empty()) { Chris@227: cerr << "ERROR: Transform file \"" << transformFile Chris@227: << "\" is valid RDF but defines no transforms" << endl; Chris@227: return false; Chris@227: } else { Chris@227: bool success = true; Chris@227: for (int i = 0; i < (int)transforms.size(); ++i) { Chris@227: if (!addFeatureExtractor(transforms[i], writers)) { Chris@227: success = false; Chris@227: } Chris@227: } Chris@227: return success; Chris@227: } Chris@227: } else { // !factory.isOK() Chris@99: if (factory.isRDF()) { Chris@227: cerr << "ERROR: Invalid transform RDF file \"" << transformFile Chris@227: << "\": " << factory.getErrorString() << endl; Chris@227: return false; Chris@0: } Chris@227: Chris@227: // the not-RDF case: fall through without reporting an Chris@227: // error, so we try the file as XML, and if that fails, we Chris@227: // print a general unparseable-file error Chris@227: rdfError = factory.getErrorString(); Chris@99: } Chris@0: } Chris@0: Chris@227: if (tryXml) { Chris@227: Chris@227: QFile file(transformFile); Chris@227: if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { Chris@227: cerr << "ERROR: Failed to open transform file \"" Chris@227: << transformFile.toStdString() << "\" for reading" << endl; Chris@227: return false; Chris@227: } Chris@227: Chris@227: QTextStream *qts = new QTextStream(&file); Chris@227: QString qs = qts->readAll(); Chris@227: delete qts; Chris@227: file.close(); Chris@227: Chris@227: Transform transform(qs); Chris@227: xmlError = transform.getErrorString(); Chris@227: Chris@227: if (xmlError == "") { Chris@227: Chris@227: if (transform.getIdentifier() == "") { Chris@227: cerr << "ERROR: Transform file \"" << transformFile Chris@227: << "\" is valid XML but defines no transform" << endl; Chris@227: return false; Chris@227: } Chris@227: Chris@227: return addFeatureExtractor(transform, writers); Chris@227: } Chris@0: } Chris@0: Chris@227: cerr << "ERROR: Transform file \"" << transformFile Chris@227: << "\" could not be parsed" << endl; Chris@227: if (rdfError != "") { Chris@227: cerr << "ERROR: RDF parser reported: " << rdfError << endl; Chris@227: } Chris@227: if (xmlError != "") { Chris@227: cerr << "ERROR: XML parser reported: " << xmlError << endl; Chris@227: } Chris@0: Chris@227: return false; Chris@0: } Chris@0: Chris@106: void FeatureExtractionManager::addSource(QString audioSource, bool willMultiplex) Chris@0: { Chris@45: std::cerr << "Have audio source: \"" << audioSource.toStdString() << "\"" << std::endl; Chris@45: Chris@45: // We don't actually do anything with it here, unless it's the Chris@45: // first audio source and we need it to establish default channel Chris@45: // count and sample rate Chris@45: Chris@45: if (m_channels == 0 || m_defaultSampleRate == 0) { Chris@45: Chris@227: ProgressPrinter retrievalProgress("Retrieving first input file to determine default rate and channel count..."); Chris@45: Chris@45: FileSource source(audioSource, &retrievalProgress); Chris@45: if (!source.isAvailable()) { Chris@45: cerr << "ERROR: File or URL \"" << audioSource.toStdString() Chris@117: << "\" could not be located"; Chris@117: if (source.getErrorString() != "") { Chris@117: cerr << ": " << source.getErrorString(); Chris@117: } Chris@117: cerr << endl; Chris@45: throw FileNotFound(audioSource); Chris@45: } Chris@45: Chris@45: source.waitForData(); Chris@45: Chris@45: // Open to determine validity, channel count, sample rate only Chris@45: // (then close, and open again later with actual desired rate &c) Chris@45: Chris@45: AudioFileReader *reader = Chris@116: AudioFileReaderFactory::createReader(source, 0, Chris@116: m_normalise, Chris@95: &retrievalProgress); Chris@45: Chris@45: if (!reader) { Chris@45: throw FailedToOpenFile(audioSource); Chris@45: } Chris@45: Chris@45: retrievalProgress.done(); Chris@45: Chris@45: cerr << "File or URL \"" << audioSource.toStdString() << "\" opened successfully" << endl; Chris@45: Chris@113: if (!willMultiplex) { Chris@106: if (m_channels == 0) { Chris@106: m_channels = reader->getChannelCount(); Chris@106: cerr << "Taking default channel count of " Chris@227: << reader->getChannelCount() << " from audio file" << endl; Chris@106: } Chris@45: } Chris@45: Chris@45: if (m_defaultSampleRate == 0) { Chris@45: m_defaultSampleRate = reader->getNativeRate(); Chris@45: cerr << "Taking default sample rate of " Chris@227: << reader->getNativeRate() << "Hz from audio file" << endl; Chris@45: cerr << "(Note: Default may be overridden by transforms)" << endl; Chris@45: } Chris@45: Chris@45: m_readyReaders[audioSource] = reader; Chris@45: } Chris@113: Chris@113: if (willMultiplex) { Chris@113: ++m_channels; // channel count is simply number of sources Chris@113: cerr << "Multiplexing, incremented target channel count to " Chris@113: << m_channels << endl; Chris@113: } Chris@45: } Chris@45: Chris@112: void FeatureExtractionManager::extractFeatures(QString audioSource) Chris@45: { Chris@45: if (m_plugins.empty()) return; Chris@45: Chris@45: testOutputFiles(audioSource); Chris@45: Chris@0: if (m_sampleRate == 0) { Chris@45: throw FileOperationFailed Chris@45: (audioSource, "internal error: have sources and plugins, but no sample rate"); Chris@45: } Chris@45: if (m_channels == 0) { Chris@45: throw FileOperationFailed Chris@45: (audioSource, "internal error: have sources and plugins, but no channel count"); Chris@0: } Chris@0: Chris@106: AudioFileReader *reader = prepareReader(audioSource); Chris@106: extractFeaturesFor(reader, audioSource); // Note this also deletes reader Chris@106: } Chris@102: Chris@106: void FeatureExtractionManager::extractFeaturesMultiplexed(QStringList sources) Chris@106: { Chris@106: if (m_plugins.empty() || sources.empty()) return; Chris@106: Chris@106: QString nominalSource = sources[0]; Chris@106: Chris@106: testOutputFiles(nominalSource); Chris@106: Chris@106: if (m_sampleRate == 0) { Chris@106: throw FileOperationFailed Chris@106: (nominalSource, "internal error: have sources and plugins, but no sample rate"); Chris@106: } Chris@106: if (m_channels == 0) { Chris@106: throw FileOperationFailed Chris@106: (nominalSource, "internal error: have sources and plugins, but no channel count"); Chris@106: } Chris@106: Chris@106: QList readers; Chris@106: foreach (QString source, sources) { Chris@106: AudioFileReader *reader = prepareReader(source); Chris@106: readers.push_back(reader); Chris@106: } Chris@106: Chris@106: AudioFileReader *reader = new MultiplexedReader(readers); Chris@106: extractFeaturesFor(reader, nominalSource); // Note this also deletes reader Chris@106: } Chris@106: Chris@106: AudioFileReader * Chris@106: FeatureExtractionManager::prepareReader(QString source) Chris@106: { Chris@45: AudioFileReader *reader = 0; Chris@106: if (m_readyReaders.contains(source)) { Chris@106: reader = m_readyReaders[source]; Chris@106: m_readyReaders.remove(source); Chris@106: if (reader->getSampleRate() != m_sampleRate) { Chris@45: // can't use this; open it again Chris@45: delete reader; Chris@45: reader = 0; Chris@45: } Chris@45: } Chris@45: if (!reader) { Chris@45: ProgressPrinter retrievalProgress("Retrieving audio data..."); Chris@106: FileSource fs(source, &retrievalProgress); Chris@106: fs.waitForData(); Chris@116: reader = AudioFileReaderFactory::createReader(fs, m_sampleRate, Chris@116: m_normalise, Chris@116: &retrievalProgress); Chris@45: retrievalProgress.done(); Chris@45: } Chris@102: if (!reader) { Chris@107: throw FailedToOpenFile(source); Chris@102: } Chris@219: if (reader->getChannelCount() != m_channels || Chris@219: reader->getNativeRate() != m_sampleRate) { Chris@219: cerr << "NOTE: File will be mixed or resampled for processing, to: " Chris@219: << m_channels << "ch at " Chris@219: << m_sampleRate << "Hz" << endl; Chris@219: } Chris@106: return reader; Chris@106: } Chris@45: Chris@106: void Chris@106: FeatureExtractionManager::extractFeaturesFor(AudioFileReader *reader, Chris@106: QString audioSource) Chris@106: { Chris@106: // Note: This also deletes reader Chris@0: Chris@45: cerr << "Audio file \"" << audioSource.toStdString() << "\": " Chris@45: << reader->getChannelCount() << "ch at " Chris@45: << reader->getNativeRate() << "Hz" << endl; Chris@11: Chris@0: // allocate audio buffers Chris@0: float **data = new float *[m_channels]; Chris@0: for (int c = 0; c < m_channels; ++c) { Chris@0: data[c] = new float[m_blockSize]; Chris@0: } Chris@31: Chris@31: struct LifespanMgr { // unintrusive hack introduced to ensure Chris@31: // destruction on exceptions Chris@31: AudioFileReader *m_r; Chris@31: int m_c; Chris@31: float **m_d; Chris@31: LifespanMgr(AudioFileReader *r, int c, float **d) : Chris@31: m_r(r), m_c(c), m_d(d) { } Chris@31: ~LifespanMgr() { destroy(); } Chris@31: void destroy() { Chris@31: if (!m_r) return; Chris@31: delete m_r; Chris@31: for (int i = 0; i < m_c; ++i) delete[] m_d[i]; Chris@31: delete[] m_d; Chris@31: m_r = 0; Chris@31: } Chris@31: }; Chris@31: LifespanMgr lifemgr(reader, m_channels, data); Chris@0: Chris@0: size_t frameCount = reader->getFrameCount(); Chris@0: Chris@0: // cerr << "file has " << frameCount << " frames" << endl; Chris@0: Chris@99: int earliestStartFrame = 0; Chris@99: int latestEndFrame = frameCount; Chris@99: bool haveExtents = false; Chris@99: Chris@109: foreach (Plugin *plugin, m_orderedPlugins) { Chris@0: Chris@109: PluginMap::iterator pi = m_plugins.find(plugin); Chris@0: Chris@233: // std::cerr << "Calling reset on " << plugin << std::endl; Chris@0: plugin->reset(); Chris@0: Chris@0: for (TransformWriterMap::iterator ti = pi->second.begin(); Chris@0: ti != pi->second.end(); ++ti) { Chris@0: Chris@0: const Transform &transform = ti->first; Chris@0: Chris@99: int startFrame = RealTime::realTime2Frame Chris@99: (transform.getStartTime(), m_sampleRate); Chris@99: int duration = RealTime::realTime2Frame Chris@99: (transform.getDuration(), m_sampleRate); Chris@99: if (duration == 0) { Chris@99: duration = frameCount - startFrame; Chris@99: } Chris@99: Chris@99: if (!haveExtents || startFrame < earliestStartFrame) { Chris@99: earliestStartFrame = startFrame; Chris@99: } Chris@99: if (!haveExtents || startFrame + duration > latestEndFrame) { Chris@99: latestEndFrame = startFrame + duration; Chris@99: } Chris@131: /* Chris@109: cerr << "startFrame for transform " << startFrame << endl; Chris@109: cerr << "duration for transform " << duration << endl; Chris@109: cerr << "earliestStartFrame becomes " << earliestStartFrame << endl; Chris@109: cerr << "latestEndFrame becomes " << latestEndFrame << endl; Chris@131: */ Chris@99: haveExtents = true; Chris@0: Chris@0: string outputId = transform.getOutput().toStdString(); Chris@0: if (m_pluginOutputs[plugin].find(outputId) == Chris@0: m_pluginOutputs[plugin].end()) { Chris@129: // We shouldn't actually reach this point: Chris@129: // addFeatureExtractor tests whether the output exists Chris@129: cerr << "ERROR: Nonexistent plugin output \"" << outputId << "\" requested for transform \"" Chris@0: << transform.getIdentifier().toStdString() << "\", ignoring this transform" Chris@0: << endl; Chris@0: /* Chris@0: cerr << "Known outputs for all plugins are as follows:" << endl; Chris@0: for (PluginOutputMap::const_iterator k = m_pluginOutputs.begin(); Chris@0: k != m_pluginOutputs.end(); ++k) { Chris@0: cerr << "Plugin " << k->first << ": "; Chris@0: if (k->second.empty()) { Chris@0: cerr << "(none)"; Chris@0: } Chris@0: for (OutputMap::const_iterator i = k->second.begin(); Chris@0: i != k->second.end(); ++i) { Chris@0: cerr << "\"" << i->first << "\" "; Chris@0: } Chris@0: cerr << endl; Chris@0: } Chris@0: */ Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@99: int startFrame = earliestStartFrame; Chris@99: int endFrame = latestEndFrame; Chris@0: Chris@109: foreach (Plugin *plugin, m_orderedPlugins) { Chris@109: Chris@109: PluginMap::iterator pi = m_plugins.find(plugin); Chris@0: Chris@0: for (TransformWriterMap::const_iterator ti = pi->second.begin(); Chris@0: ti != pi->second.end(); ++ti) { Chris@0: Chris@0: const vector &writers = ti->second; Chris@0: Chris@0: for (int j = 0; j < (int)writers.size(); ++j) { Chris@0: FeatureWriter::TrackMetadata m; Chris@0: m.title = reader->getTitle(); Chris@0: m.maker = reader->getMaker(); Chris@208: m.duration = RealTime::frame2RealTime(reader->getFrameCount(), Chris@208: reader->getSampleRate()); Chris@208: writers[j]->setTrackMetadata(audioSource, m); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: ProgressPrinter extractionProgress("Extracting and writing features..."); Chris@0: int progress = 0; Chris@0: Chris@99: for (int i = startFrame; i < endFrame; i += m_blockSize) { Chris@0: Chris@0: //!!! inefficient, although much of the inefficiency may be Chris@99: // susceptible to compiler optimisation Chris@0: Chris@195: SampleBlock frames = reader->getInterleavedFrames(i, m_blockSize); Chris@0: Chris@0: // We have to do our own channel handling here; we can't just Chris@0: // leave it to the plugin adapter because the same plugin Chris@0: // adapter may have to serve for input files with various Chris@0: // numbers of channels (so the adapter is simply configured Chris@34: // with a fixed channel count). Chris@0: Chris@0: int rc = reader->getChannelCount(); Chris@0: Chris@34: // m_channels is the number of channels we need for the plugin Chris@34: Chris@34: int index; Chris@34: int fc = (int)frames.size(); Chris@46: Chris@34: if (m_channels == 1) { // only case in which we can sensibly mix down Chris@34: for (int j = 0; j < m_blockSize; ++j) { Chris@34: data[0][j] = 0.f; Chris@34: } Chris@34: for (int c = 0; c < rc; ++c) { Chris@34: for (int j = 0; j < m_blockSize; ++j) { Chris@0: index = j * rc + c; Chris@34: if (index < fc) data[0][j] += frames[index]; Chris@0: } Chris@0: } Chris@34: for (int j = 0; j < m_blockSize; ++j) { Chris@34: data[0][j] /= rc; Chris@34: } Chris@34: } else { Chris@34: for (int c = 0; c < m_channels; ++c) { Chris@34: for (int j = 0; j < m_blockSize; ++j) { Chris@34: data[c][j] = 0.f; Chris@34: } Chris@34: if (c < rc) { Chris@34: for (int j = 0; j < m_blockSize; ++j) { Chris@34: index = j * rc + c; Chris@34: if (index < fc) data[c][j] += frames[index]; Chris@34: } Chris@34: } Chris@34: } Chris@34: } Chris@0: Chris@0: Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime Chris@0: (i, m_sampleRate); Chris@0: Chris@109: foreach (Plugin *plugin, m_orderedPlugins) { Chris@0: Chris@109: PluginMap::iterator pi = m_plugins.find(plugin); Chris@123: Chris@123: // Skip any plugin none of whose transforms have come Chris@123: // around yet. (Though actually, all transforms for a Chris@123: // given plugin must have the same start time -- they can Chris@123: // only differ in output and summary type.) Chris@123: bool inRange = false; Chris@123: for (TransformWriterMap::const_iterator ti = pi->second.begin(); Chris@123: ti != pi->second.end(); ++ti) { Chris@123: int startFrame = RealTime::realTime2Frame Chris@123: (ti->first.getStartTime(), m_sampleRate); Chris@123: if (i >= startFrame || i + m_blockSize > startFrame) { Chris@123: inRange = true; Chris@123: break; Chris@123: } Chris@123: } Chris@123: if (!inRange) { Chris@123: continue; Chris@123: } Chris@123: Chris@0: Plugin::FeatureSet featureSet = plugin->process(data, timestamp); Chris@0: Chris@0: if (!m_summariesOnly) { Chris@0: writeFeatures(audioSource, plugin, featureSet); Chris@0: } Chris@0: } Chris@0: Chris@0: int pp = progress; Chris@6: progress = int(((i - startFrame) * 100.0) / (endFrame - startFrame) + 0.1); Chris@0: if (progress > pp) extractionProgress.setProgress(progress); Chris@0: } Chris@10: Chris@22: // std::cerr << "FeatureExtractionManager: deleting audio file reader" << std::endl; Chris@12: Chris@31: lifemgr.destroy(); // deletes reader, data Chris@57: Chris@109: foreach (Plugin *plugin, m_orderedPlugins) { Chris@57: Chris@109: PluginMap::iterator pi = m_plugins.find(plugin); Chris@0: Plugin::FeatureSet featureSet = plugin->getRemainingFeatures(); Chris@0: Chris@0: if (!m_summariesOnly) { Chris@0: writeFeatures(audioSource, plugin, featureSet); Chris@0: } Chris@0: Chris@0: if (!m_summaries.empty()) { Chris@233: // Summaries requested on the command line, for all transforms Chris@0: PluginSummarisingAdapter *adapter = Chris@0: dynamic_cast(plugin); Chris@0: if (!adapter) { Chris@0: cerr << "WARNING: Summaries requested, but plugin is not a summarising adapter" << endl; Chris@0: } else { Chris@0: for (SummaryNameSet::const_iterator sni = m_summaries.begin(); Chris@0: sni != m_summaries.end(); ++sni) { Chris@0: featureSet.clear(); Chris@0: //!!! problem here -- we are requesting summaries Chris@0: //!!! for all outputs, but they in principle have Chris@0: //!!! different averaging requirements depending Chris@0: //!!! on whether their features have duration or Chris@0: //!!! not Chris@0: featureSet = adapter->getSummaryForAllOutputs Chris@0: (getSummaryType(*sni), Chris@0: PluginSummarisingAdapter::ContinuousTimeAverage); Chris@233: writeFeatures(audioSource, plugin, featureSet, Chris@0: Transform::stringToSummaryType(sni->c_str())); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@233: // Summaries specified in transform definitions themselves Chris@0: writeSummaries(audioSource, plugin); Chris@0: } Chris@0: Chris@3: extractionProgress.done(); Chris@3: Chris@0: finish(); Chris@0: Chris@0: TempDirectory::getInstance()->cleanup(); Chris@0: } Chris@0: Chris@0: void Chris@0: FeatureExtractionManager::writeSummaries(QString audioSource, Plugin *plugin) Chris@0: { Chris@0: // caller should have ensured plugin is in m_plugins Chris@0: PluginMap::iterator pi = m_plugins.find(plugin); Chris@0: Chris@0: for (TransformWriterMap::const_iterator ti = pi->second.begin(); Chris@0: ti != pi->second.end(); ++ti) { Chris@0: Chris@0: const Transform &transform = ti->first; Chris@0: Chris@233: // cerr << "FeatureExtractionManager::writeSummaries: plugin is " << plugin Chris@233: // << ", found transform: " << transform.toXmlString() << endl; Chris@233: Chris@0: Transform::SummaryType summaryType = transform.getSummaryType(); Chris@0: PluginSummarisingAdapter::SummaryType pType = Chris@0: (PluginSummarisingAdapter::SummaryType)summaryType; Chris@0: Chris@0: if (transform.getSummaryType() == Transform::NoSummary) { Chris@233: // cerr << "(no summary, continuing)" << endl; Chris@0: continue; Chris@0: } Chris@0: Chris@0: PluginSummarisingAdapter *adapter = Chris@0: dynamic_cast(plugin); Chris@0: if (!adapter) { Chris@0: cerr << "FeatureExtractionManager::writeSummaries: INTERNAL ERROR: Summary requested for transform, but plugin is not a summarising adapter" << endl; Chris@0: continue; Chris@0: } Chris@0: Chris@0: Plugin::FeatureSet featureSet = adapter->getSummaryForAllOutputs Chris@0: (pType, PluginSummarisingAdapter::ContinuousTimeAverage); Chris@0: Chris@233: // cerr << "summary type " << int(pType) << " for transform:" << endl << transform.toXmlString().toStdString()<< endl << "... feature set with " << featureSet.size() << " elts" << endl; Chris@0: Chris@0: writeFeatures(audioSource, plugin, featureSet, summaryType); Chris@0: } Chris@0: } Chris@0: Chris@0: void FeatureExtractionManager::writeFeatures(QString audioSource, Chris@0: Plugin *plugin, Chris@0: const Plugin::FeatureSet &features, Chris@0: Transform::SummaryType summaryType) Chris@0: { Chris@0: // caller should have ensured plugin is in m_plugins Chris@0: PluginMap::iterator pi = m_plugins.find(plugin); Chris@0: Chris@233: // Write features from the feature set passed in, according to the Chris@233: // transforms listed for the given plugin with the given summary type Chris@233: Chris@0: for (TransformWriterMap::const_iterator ti = pi->second.begin(); Chris@0: ti != pi->second.end(); ++ti) { Chris@0: Chris@0: const Transform &transform = ti->first; Chris@0: const vector &writers = ti->second; Chris@0: Chris@233: // cerr << "writeFeatures: plugin " << plugin << " has transform: " << transform.toXmlString() << endl; Chris@233: Chris@233: if (transform.getSummaryType() == Transform::NoSummary && Chris@233: !m_summaries.empty()) { Chris@233: // cerr << "transform has no summary, but summaries requested on command line, so going for it anyway" << endl; Chris@233: } else if (transform.getSummaryType() != summaryType) { Chris@233: // Either we're not writing a summary and the transform Chris@233: // has one, or we're writing a summary but the transform Chris@233: // has none or a different one; either way, skip it Chris@233: // cerr << "summary type differs from passed-in one " << summaryType << endl; Chris@0: continue; Chris@0: } Chris@0: Chris@0: string outputId = transform.getOutput().toStdString(); Chris@0: Chris@0: if (m_pluginOutputs[plugin].find(outputId) == Chris@0: m_pluginOutputs[plugin].end()) { Chris@0: continue; Chris@0: } Chris@0: Chris@0: const Plugin::OutputDescriptor &desc = Chris@0: m_pluginOutputs[plugin][outputId]; Chris@0: Chris@0: int outputIndex = m_pluginOutputIndices[outputId]; Chris@0: Plugin::FeatureSet::const_iterator fsi = features.find(outputIndex); Chris@0: if (fsi == features.end()) continue; Chris@0: Chris@233: // cerr << "this transform has " << writers.size() << " writer(s)" << endl; Chris@233: Chris@0: for (int j = 0; j < (int)writers.size(); ++j) { Chris@0: writers[j]->write Chris@0: (audioSource, transform, desc, fsi->second, Chris@0: Transform::summaryTypeToString(summaryType).toStdString()); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@31: void FeatureExtractionManager::testOutputFiles(QString audioSource) Chris@31: { Chris@31: for (PluginMap::iterator pi = m_plugins.begin(); Chris@31: pi != m_plugins.end(); ++pi) { Chris@31: Chris@31: for (TransformWriterMap::iterator ti = pi->second.begin(); Chris@31: ti != pi->second.end(); ++ti) { Chris@31: Chris@31: vector &writers = ti->second; Chris@31: Chris@31: for (int i = 0; i < (int)writers.size(); ++i) { Chris@31: writers[i]->testOutputFile(audioSource, ti->first.getIdentifier()); Chris@31: } Chris@31: } Chris@31: } Chris@31: } Chris@31: Chris@0: void FeatureExtractionManager::finish() Chris@0: { Chris@109: foreach (Plugin *plugin, m_orderedPlugins) { Chris@109: Chris@109: PluginMap::iterator pi = m_plugins.find(plugin); Chris@0: Chris@0: for (TransformWriterMap::iterator ti = pi->second.begin(); Chris@0: ti != pi->second.end(); ++ti) { Chris@0: Chris@0: vector &writers = ti->second; Chris@0: Chris@0: for (int i = 0; i < (int)writers.size(); ++i) { Chris@0: writers[i]->flush(); Chris@0: writers[i]->finish(); Chris@0: } Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: void FeatureExtractionManager::print(Transform transform) const Chris@0: { Chris@0: QString qs; Chris@0: QTextStream qts(&qs); Chris@0: transform.toXml(qts); Chris@0: cerr << qs.toStdString() << endl; Chris@0: }