# HG changeset patch # User Chris Cannam # Date 1228990953 0 # Node ID 581b1b150a4d7d953ff1bb4210d69160bb7d2c2c * copy to sonic-annotator diff -r 000000000000 -r 581b1b150a4d AudioDBFeatureWriter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AudioDBFeatureWriter.cpp Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,214 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include + +#include + +#include "AudioDBFeatureWriter.h" + +using namespace std; +using namespace Vamp; + +string +AudioDBFeatureWriter::catalogueIdParam = "catid"; + +string +AudioDBFeatureWriter::baseDirParam = "basedir"; + +struct AudioDBFeatureWriter::TrackStream +{ + QString trackid; + ofstream* ofs; +}; + +AudioDBFeatureWriter::AudioDBFeatureWriter() : + catalogueId("catalog"), baseDir("audiodb") +{ + +} + +AudioDBFeatureWriter::~AudioDBFeatureWriter() +{ + // close all open files + for (map::iterator iter = dbfiles.begin(); iter != dbfiles.end(); ++iter) + { + if (iter->second.ofs) { + iter->second.ofs->close(); + delete iter->second.ofs; + } + } + + // TODO: error handling on close +} + +AudioDBFeatureWriter::ParameterList +AudioDBFeatureWriter::getSupportedParameters() const +{ + ParameterList pl; + Parameter p; + + p.name = catalogueIdParam; + p.description = "Catalogue ID"; + p.hasArg = true; + pl.push_back(p); + + p.name = baseDirParam; + p.description = "Base output directory path"; + p.hasArg = true; + pl.push_back(p); + + return pl; +} + +void +AudioDBFeatureWriter::setParameters(map ¶ms) +{ + if (params.find(catalogueIdParam) != params.end()) { + setCatalogueId(params[catalogueIdParam]); + params.erase(catalogueIdParam); + } + if (params.find(baseDirParam) != params.end()) { + setBaseDirectory(params[baseDirParam]); + params.erase(baseDirParam); + } +} + +void +AudioDBFeatureWriter::setCatalogueId(const string &catid) +{ + catalogueId = catid; +} + +void +AudioDBFeatureWriter::setBaseDirectory(const string &base) +{ + baseDir = base; +} + +void AudioDBFeatureWriter::write(QString trackid, + const Transform &transform, + const Vamp::Plugin::OutputDescriptor& output, + const Vamp::Plugin::FeatureList& featureList, + std::string summaryType) +{ + //!!! use summaryType + if (summaryType != "") { + //!!! IMPLEMENT + cerr << "ERROR: AudioDBFeatureWriter::write: Writing summaries is not yet implemented!" << endl; + exit(1); + } + + + // binary output for FeatureSet + + // feature-dimension feature-1 feature-2 ... + // timestamp-1 timestamp-2 ... + + // audioDB has to write each feature to a different file + // assume a simple naming convention of + // /. + // with timestamps in a corresponding /..timestamp file + // (start and end times in seconds for each frame -- somewhat optional) + + // the feature writer holds a map of open file descriptors + // the catalog-id is passed in to the feature writer's constructor + + // NB -- all "floats" in the file should in fact be doubles + + // TODO: + // - write feature end rather than start times, once end time is available in vamp + // - write a power file, probably by wrapping plugin in a PluginPowerAdapter :) + + if (output.binCount == 0) // this kind of feature just outputs timestamps and labels, assume of no interest to audioDB + return; + + for (int i = 0; i < featureList.size(); ++i) + { + // replace output files if necessary + if (replaceDBFile(trackid, output.identifier)) + { + // write the feature length for the next track feature record + // binCount has to be set + // - it can be zero, i.e. if the output is really a set of labels + timestamps + *dbfiles[output.identifier].ofs /*<< ios::binary*/ << output.binCount; + + cerr << "writing bin count " << output.binCount << " for " << output.identifier << endl; + } + + if (replaceDBFile(trackid, output.identifier + ".timestamp")) + { + // write the start time to the timestamp file + // as we want it for the first feature in the file + *dbfiles[output.identifier + ".timestamp"].ofs << featureList[i].timestamp.toString() << endl; + } + + if (dbfiles[output.identifier].ofs) { + for (int j = 0; j < featureList[i].values.size(); ++j) + *dbfiles[output.identifier].ofs /*<< ios::binary*/ << featureList[i].values[j]; + + // write the *end* time of each feature to the timestamp file + // NOT IMPLEMENTED YET +// *dbfiles[output.identifier + ".timestamp"].ofs << featureList[i].timestamp.toString() << endl; + } + } +} + +bool AudioDBFeatureWriter::openDBFile(QString trackid, const string& identifier) +{ + QString trackBase = QFileInfo(trackid).fileName(); + string filepath = baseDir + "/" + catalogueId + "/" + + trackBase.toStdString() + "." + identifier; + cerr << "AudioDBFeatureWriter::openDBFile: filepath is \"" << filepath << "\"" << endl; + ofstream* ofs = new ofstream(filepath.c_str()); + if (!*ofs) + { + cerr << "ERROR AudioDBFeatureWriter::openDBFile(): can't open file " << filepath << endl; + return false; + } + TrackStream ts; + ts.trackid = trackid; + ts.ofs = ofs; + dbfiles[identifier] = ts; + return true; +} + +// replace file if no file open for this track, else return false +bool AudioDBFeatureWriter::replaceDBFile(QString trackid, + const string& identifier) +{ + if (dbfiles.find(identifier) != dbfiles.end() && dbfiles[identifier].trackid == trackid) + return false; // have an open file for this track + + if (dbfiles.find(identifier) != dbfiles.end() && dbfiles[identifier].trackid != trackid) + { + // close the current file + if (dbfiles[identifier].ofs) { + dbfiles[identifier].ofs->close(); + delete dbfiles[identifier].ofs; + dbfiles[identifier].ofs = 0; + } + } + + // open a new file + if (!openDBFile(trackid, identifier)) { + dbfiles[identifier].ofs = 0; + return false; //!!! should throw an exception, otherwise we'll try to open the file again and again every time we want to write to it + } + + return true; +} + + diff -r 000000000000 -r 581b1b150a4d AudioDBFeatureWriter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AudioDBFeatureWriter.h Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,61 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _AUDIO_DB_FEATURE_WRITER_H_ +#define _AUDIO_DB_FEATURE_WRITER_H_ + +#include +#include + +using std::string; +using std::map; + +#include "transform/FeatureWriter.h" + +class AudioDBFeatureWriter : public FeatureWriter +{ +public: + AudioDBFeatureWriter(); + virtual ~AudioDBFeatureWriter(); + + virtual ParameterList getSupportedParameters() const; + virtual void setParameters(map ¶ms); + + virtual void setCatalogueId(const string &); + virtual void setBaseDirectory(const string &); + + virtual void write(QString trackid, + const Transform &transform, + const Vamp::Plugin::OutputDescriptor &output, + const Vamp::Plugin::FeatureList &features, + std::string summaryType = ""); + + virtual void finish() { } + +private: + string catalogueId; + string baseDir; + + static string catalogueIdParam; + static string baseDirParam; + + struct TrackStream; + map dbfiles; + + bool openDBFile(QString trackid, const string& identifier); + bool replaceDBFile(QString trackid, const string& identifier); +}; + +#endif diff -r 000000000000 -r 581b1b150a4d DefaultFeatureWriter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DefaultFeatureWriter.cpp Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,77 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include +#include + +using namespace std; + +#include "DefaultFeatureWriter.h" + +void DefaultFeatureWriter::write(QString trackid, + const Transform &transform, + const Vamp::Plugin::OutputDescriptor& output, + const Vamp::Plugin::FeatureList& featureList, + std::string summaryType) +{ + // generic XML output + + /* + + + output.name + feature.timestamp + output.binName[0]:feature.value[0]... + + + + */ + + for (int i = 0; i < featureList.size(); ++i) + { + if (summaryType == "") { + cout << "" << endl; + } else { + cout << "" << endl; + } + cout << "\t" << output.name << "" << endl; + if (featureList[i].hasTimestamp) { + cout << "\t" << featureList[i].timestamp << "" << endl; + } + if (featureList[i].hasDuration) { + cout << "\t" << featureList[i].duration << "" << endl; + } + if (featureList[i].values.size() > 0) + { + cout << "\t"; + for (int j = 0; j < featureList[i].values.size(); ++j) + { + if (j > 0) + cout << " "; + if (output.binNames.size() > 0) + cout << output.binNames[j] << ":"; + cout << featureList[i].values[j]; + } + cout << "" << endl; + } + if (featureList[i].label.length() > 0) + cout << "\t" << endl; + if (summaryType == "") { + cout << "" << endl; + } else { + cout << "" << endl; + } + } +} diff -r 000000000000 -r 581b1b150a4d DefaultFeatureWriter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DefaultFeatureWriter.h Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,34 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _DEFAULT_FEATURE_WRITER_H_ +#define _DEFAULT_FEATURE_WRITER_H_ + + +#include "transform/FeatureWriter.h" + +class DefaultFeatureWriter : public FeatureWriter +{ +public: + virtual ~DefaultFeatureWriter() { } + virtual void write(QString trackid, + const Transform &transform, + const Vamp::Plugin::OutputDescriptor &output, + const Vamp::Plugin::FeatureList &features, + std::string summaryType = ""); + virtual void finish() { } +}; + +#endif diff -r 000000000000 -r 581b1b150a4d FeatureExtractionManager.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FeatureExtractionManager.cpp Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,702 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "FeatureExtractionManager.h" + +#include +#include +#include +#include +#include + +#include + +using namespace std; + +using Vamp::Plugin; +using Vamp::PluginBase; +using Vamp::HostExt::PluginLoader; +using Vamp::HostExt::PluginChannelAdapter; +using Vamp::HostExt::PluginBufferingAdapter; +using Vamp::HostExt::PluginInputDomainAdapter; +using Vamp::HostExt::PluginSummarisingAdapter; + +#include "data/fileio/FileSource.h" +#include "data/fileio/AudioFileReader.h" +#include "data/fileio/AudioFileReaderFactory.h" +#include "data/fileio/PlaylistFileReader.h" +#include "base/TempDirectory.h" +#include "base/ProgressPrinter.h" +#include "transform/TransformFactory.h" +#include "rdf/RDFTransformFactory.h" +#include "transform/FeatureWriter.h" + +#include +#include +#include + +FeatureExtractionManager::FeatureExtractionManager() : + m_summariesOnly(false), + // We can read using an arbitrary fixed block size -- + // PluginBufferingAdapter handles this for us. It's likely to be + // quicker to use larger sizes than smallish ones like 1024 + m_blockSize(16384), + m_defaultSampleRate(0), + m_sampleRate(0), + m_channels(1) +{ +} + +FeatureExtractionManager::~FeatureExtractionManager() +{ + for (PluginMap::iterator pi = m_plugins.begin(); + pi != m_plugins.end(); ++pi) { + delete pi->first; + } +} + +void FeatureExtractionManager::setChannels(int channels) +{ + m_channels = channels; +} + +void FeatureExtractionManager::setDefaultSampleRate(int sampleRate) +{ + m_defaultSampleRate = sampleRate; +} + +static PluginSummarisingAdapter::SummaryType +getSummaryType(string name) +{ + if (name == "min") return PluginSummarisingAdapter::Minimum; + if (name == "max") return PluginSummarisingAdapter::Maximum; + if (name == "mean") return PluginSummarisingAdapter::Mean; + if (name == "median") return PluginSummarisingAdapter::Median; + if (name == "mode") return PluginSummarisingAdapter::Mode; + if (name == "sum") return PluginSummarisingAdapter::Sum; + if (name == "variance") return PluginSummarisingAdapter::Variance; + if (name == "sd") return PluginSummarisingAdapter::StandardDeviation; + if (name == "count") return PluginSummarisingAdapter::Count; + return PluginSummarisingAdapter::UnknownSummaryType; +} + +bool FeatureExtractionManager::setSummaryTypes(const set &names, + bool summariesOnly, + const PluginSummarisingAdapter::SegmentBoundaries &boundaries) +{ + for (SummaryNameSet::const_iterator i = names.begin(); + i != names.end(); ++i) { + if (getSummaryType(*i) == PluginSummarisingAdapter::UnknownSummaryType) { + cerr << "ERROR: Unknown summary type \"" << *i << "\"" << endl; + return false; + } + } + m_summaries = names; + m_summariesOnly = summariesOnly; + m_boundaries = boundaries; + return true; +} + +bool FeatureExtractionManager::addFeatureExtractor +(Transform transform, const vector &writers) +{ + //!!! exceptions rather than return values? + + if (transform.getSampleRate() == 0) { + if (m_sampleRate == 0) { + cerr << "NOTE: Transform does not specify a sample rate, using default rate of " << m_defaultSampleRate << endl; + transform.setSampleRate(m_defaultSampleRate); + m_sampleRate = m_defaultSampleRate; + } else { + cerr << "NOTE: Transform does not specify a sample rate, using previous transform's rate of " << m_sampleRate << endl; + transform.setSampleRate(m_sampleRate); + } + } + + if (m_sampleRate == 0) { + m_sampleRate = transform.getSampleRate(); + } + + if (transform.getSampleRate() != m_sampleRate) { + cerr << "WARNING: Transform sample rate " << transform.getSampleRate() << " does not match previously specified transform rate of " << m_sampleRate << " -- only a single rate is supported for each run" << endl; + cerr << "WARNING: Using previous rate of " << m_sampleRate << " for this transform as well" << endl; + transform.setSampleRate(m_sampleRate); + } + + Plugin *plugin = 0; + + // Remember what the original transform looked like, and index + // based on this -- because we may be about to fill in the zeros + // for step and block size, but we want any further copies with + // the same zeros to match this one + Transform originalTransform = transform; + + if (m_transformPluginMap.find(transform) == m_transformPluginMap.end()) { + + // Test whether we already have a transform that is identical + // to this, except for the output requested and/or the summary + // type -- if so, they should share plugin instances (a vital + // optimisation) + + for (TransformPluginMap::iterator i = m_transformPluginMap.begin(); + i != m_transformPluginMap.end(); ++i) { + Transform test = i->first; + test.setOutput(transform.getOutput()); + test.setSummaryType(transform.getSummaryType()); + if (transform == test) { + cerr << "NOTE: Already have transform identical to this one (for \"" + << transform.getIdentifier().toStdString() + << "\") in every detail except output identifier and/or " + << "summary type; sharing its plugin instance" << endl; + plugin = i->second; + if (transform.getSummaryType() != Transform::NoSummary && + !dynamic_cast(plugin)) { + plugin = new PluginSummarisingAdapter(plugin); + i->second = plugin; + } + break; + } + } + + if (!plugin) { + + TransformFactory *tf = TransformFactory::getInstance(); + + PluginBase *pb = tf->instantiatePluginFor(transform); + plugin = tf->downcastVampPlugin(pb); + if (!plugin) { + //!!! todo: handle non-Vamp plugins too, or make the main --list + // option print out only Vamp transforms + cerr << "ERROR: Failed to load plugin for transform \"" + << transform.getIdentifier().toStdString() << "\"" << endl; + delete pb; + return false; + } + + // We will provide the plugin with arbitrary step and + // block sizes (so that we can use the same read/write + // block size for all transforms), and to that end we use + // a PluginBufferingAdapter. However, we need to know the + // underlying step size so that we can provide the right + // context for dense outputs. (Although, don't forget + // that the PluginBufferingAdapter rewrites + // OneSamplePerStep outputs so as to use FixedSampleRate + // -- so it supplies the sample rate in the output + // feature. I'm not sure whether we can easily use that.) + + size_t pluginStepSize = plugin->getPreferredStepSize(); + size_t pluginBlockSize = plugin->getPreferredBlockSize(); + + // adapt the plugin for buffering, channels, etc. + if (plugin->getInputDomain() == Plugin::FrequencyDomain) { + plugin = new PluginInputDomainAdapter(plugin); + } + + PluginBufferingAdapter *pba = new PluginBufferingAdapter(plugin); + plugin = pba; + + if (transform.getStepSize() != 0) { + pba->setPluginStepSize(transform.getStepSize()); + } else { + transform.setStepSize(pluginStepSize); + } + + if (transform.getBlockSize() != 0) { + pba->setPluginBlockSize(transform.getBlockSize()); + } else { + transform.setBlockSize(pluginBlockSize); + } + + plugin = new PluginChannelAdapter(plugin); + + if (!m_summaries.empty() || + transform.getSummaryType() != Transform::NoSummary) { + PluginSummarisingAdapter *adapter = + new PluginSummarisingAdapter(plugin); + adapter->setSummarySegmentBoundaries(m_boundaries); + plugin = adapter; + } + + if (!plugin->initialise(m_channels, m_blockSize, m_blockSize)) { + cerr << "ERROR: Plugin initialise (channels = " << m_channels << ", stepSize = " << m_blockSize << ", blockSize = " << m_blockSize << ") failed." << endl; + delete plugin; + return false; + } + +// cerr << "Initialised plugin" << endl; + + size_t actualStepSize = 0; + size_t actualBlockSize = 0; + pba->getActualStepAndBlockSizes(actualStepSize, actualBlockSize); + transform.setStepSize(actualStepSize); + transform.setBlockSize(actualBlockSize); + + Plugin::OutputList outputs = plugin->getOutputDescriptors(); + for (int i = 0; i < (int)outputs.size(); ++i) { + +// cerr << "Newly initialised plugin output " << i << " has bin count " << outputs[i].binCount << endl; + + m_pluginOutputs[plugin][outputs[i].identifier] = outputs[i]; + m_pluginOutputIndices[outputs[i].identifier] = i; + } + + cerr << "NOTE: Loaded and initialised plugin " << plugin + << " for transform \"" + << transform.getIdentifier().toStdString() << "\"" << endl; + } + + if (transform.getOutput() == "") { + transform.setOutput + (plugin->getOutputDescriptors()[0].identifier.c_str()); + } + + m_transformPluginMap[transform] = plugin; + + if (!(originalTransform == transform)) { + m_transformPluginMap[originalTransform] = plugin; + } + + } else { + + plugin = m_transformPluginMap[transform]; + } + + m_plugins[plugin][transform] = writers; + + return true; +} + +bool FeatureExtractionManager::addDefaultFeatureExtractor +(TransformId transformId, const vector &writers) +{ + TransformFactory *tf = TransformFactory::getInstance(); + + if (m_sampleRate == 0) { + if (m_defaultSampleRate == 0) { + cerr << "ERROR: Default transform requested, but no default sample rate available" << endl; + return false; + } else { + cerr << "NOTE: Using default sample rate of " << m_defaultSampleRate << " for default transform" << endl; + m_sampleRate = m_defaultSampleRate; + } + } + + Transform transform = tf->getDefaultTransformFor(transformId, m_sampleRate); + + return addFeatureExtractor(transform, writers); +} + +bool FeatureExtractionManager::addFeatureExtractorFromFile +(QString transformXmlFile, const vector &writers) +{ + RDFTransformFactory factory + (QUrl::fromLocalFile(QFileInfo(transformXmlFile).absoluteFilePath()) + .toString()); + ProgressPrinter printer("Parsing transforms RDF file"); + std::vector transforms = factory.getTransforms(&printer); + if (!factory.isOK()) { + cerr << "WARNING: FeatureExtractionManager::addFeatureExtractorFromFile: Failed to parse transforms file: " << factory.getErrorString().toStdString() << endl; + if (factory.isRDF()) { + return false; // no point trying it as XML + } + } + if (!transforms.empty()) { + bool success = true; + for (int i = 0; i < (int)transforms.size(); ++i) { + if (!addFeatureExtractor(transforms[i], writers)) { + success = false; + } + } + return success; + } + + QFile file(transformXmlFile); + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { + cerr << "ERROR: Failed to open transform XML file \"" + << transformXmlFile.toStdString() << "\" for reading" << endl; + return false; + } + + QTextStream *qts = new QTextStream(&file); + QString qs = qts->readAll(); + delete qts; + file.close(); + + Transform transform(qs); + + return addFeatureExtractor(transform, writers); +} + +void FeatureExtractionManager::extractFeatures(QString audioSource) +{ + if (m_plugins.empty()) return; + + ProgressPrinter printer("Retrieving audio data..."); + + FileSource source(audioSource, &printer); + if (!source.isAvailable()) { + cerr << "ERROR: File or URL \"" << audioSource.toStdString() + << "\" could not be located" << endl; + exit(1); + } + + source.waitForData(); + + if (QFileInfo(audioSource).suffix().toLower() == "m3u") { + PlaylistFileReader reader(source); + if (reader.isOK()) { + vector files = reader.load(); + for (int i = 0; i < (int)files.size(); ++i) { + extractFeatures(files[i]); + } + return; + } else { + cerr << "ERROR: Playlist \"" << audioSource.toStdString() + << "\" could not be opened" << endl; + exit(1); + } + } + + if (m_sampleRate == 0) { + cerr << "ERROR: Internal error in FeatureExtractionManager::extractFeatures: Plugin list is non-empty, but no sample rate set" << endl; + exit(1); + } + + AudioFileReader *reader = + AudioFileReaderFactory::createReader(source, m_sampleRate, &printer); + + if (!reader) { + cerr << "ERROR: File or URL \"" << audioSource.toStdString() + << "\" could not be opened" << endl; + exit(1); + } + + size_t channels = reader->getChannelCount(); + + cerr << "Opened " << channels << "-channel file or URL \"" << audioSource.toStdString() << "\"" << endl; + + // reject file if it has too few channels, plugin will handle if it has too many + if ((int)channels < m_channels) { + //!!! should not be terminating here! + cerr << "ERROR: File or URL \"" << audioSource.toStdString() << "\" has less than " << m_channels << " channels" << endl; + exit(1); + } + + // allocate audio buffers + float **data = new float *[m_channels]; + for (int c = 0; c < m_channels; ++c) { + data[c] = new float[m_blockSize]; + } + + size_t frameCount = reader->getFrameCount(); + +// cerr << "file has " << frameCount << " frames" << endl; + + for (PluginMap::iterator pi = m_plugins.begin(); + pi != m_plugins.end(); ++pi) { + + Plugin *plugin = pi->first; + +// std::cerr << "Calling reset on " << plugin << std::endl; + plugin->reset(); + + for (TransformWriterMap::iterator ti = pi->second.begin(); + ti != pi->second.end(); ++ti) { + + const Transform &transform = ti->first; + + //!!! we may want to set the start and duration times for extraction + // in the transform record (defaults of zero indicate extraction + // from the whole file) +// transform.setStartTime(RealTime::zeroTime); +// transform.setDuration +// (RealTime::frame2RealTime(reader->getFrameCount(), m_sampleRate)); + + string outputId = transform.getOutput().toStdString(); + if (m_pluginOutputs[plugin].find(outputId) == + m_pluginOutputs[plugin].end()) { + //!!! throw? + cerr << "WARNING: Nonexistent plugin output \"" << outputId << "\" requested for transform \"" + << transform.getIdentifier().toStdString() << "\", ignoring this transform" + << endl; +/* + cerr << "Known outputs for all plugins are as follows:" << endl; + for (PluginOutputMap::const_iterator k = m_pluginOutputs.begin(); + k != m_pluginOutputs.end(); ++k) { + cerr << "Plugin " << k->first << ": "; + if (k->second.empty()) { + cerr << "(none)"; + } + for (OutputMap::const_iterator i = k->second.begin(); + i != k->second.end(); ++i) { + cerr << "\"" << i->first << "\" "; + } + cerr << endl; + } +*/ + } + } + } + + long startFrame = 0; + long endFrame = frameCount; + +/*!!! No -- there is no single transform to pull this stuff from -- + * the transforms may have various start and end times, need to be far + * cleverer about this if we're going to support them + + RealTime trStartRT = transform.getStartTime(); + RealTime trDurationRT = transform.getDuration(); + + long trStart = RealTime::realTime2Frame(trStartRT, m_sampleRate); + long trDuration = RealTime::realTime2Frame(trDurationRT, m_sampleRate); + + if (trStart == 0 || trStart < startFrame) { + trStart = startFrame; + } + + if (trDuration == 0) { + trDuration = endFrame - trStart; + } + if (trStart + trDuration > endFrame) { + trDuration = endFrame - trStart; + } + + startFrame = trStart; + endFrame = trStart + trDuration; +*/ + + for (PluginMap::iterator pi = m_plugins.begin(); + pi != m_plugins.end(); ++pi) { + + for (TransformWriterMap::const_iterator ti = pi->second.begin(); + ti != pi->second.end(); ++ti) { + + const vector &writers = ti->second; + + for (int j = 0; j < (int)writers.size(); ++j) { + FeatureWriter::TrackMetadata m; + m.title = reader->getTitle(); + m.maker = reader->getMaker(); + writers[j]->setTrackMetadata(audioSource, m); + } + } + } + + ProgressPrinter extractionProgress("Extracting and writing features..."); + int progress = 0; + + for (long i = startFrame; i < endFrame; i += m_blockSize) { + + //!!! inefficient, although much of the inefficiency may be + // susceptible to optimisation + + SampleBlock frames; + reader->getInterleavedFrames(i, m_blockSize, frames); + + // We have to do our own channel handling here; we can't just + // leave it to the plugin adapter because the same plugin + // adapter may have to serve for input files with various + // numbers of channels (so the adapter is simply configured + // with a fixed channel count, generally 1). + + int rc = reader->getChannelCount(); + + for (int j = 0; j < m_blockSize; ++j) { + for (int c = 0; c < m_channels; ++c) { + int index; + if (c < rc) { + index = j * rc + c; + data[c][j] = 0.f; + } else { + index = j * rc + (c % rc); + } + if (index < (int)frames.size()) { + data[c][j] += frames[index]; + } + } + } + + Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime + (i, m_sampleRate); + + for (PluginMap::iterator pi = m_plugins.begin(); + pi != m_plugins.end(); ++pi) { + + Plugin *plugin = pi->first; + Plugin::FeatureSet featureSet = plugin->process(data, timestamp); + + if (!m_summariesOnly) { + writeFeatures(audioSource, plugin, featureSet); + } + } + + int pp = progress; + progress = ((i - startFrame) * 100) / (endFrame - startFrame); + if (progress > pp) extractionProgress.setProgress(progress); + } + + for (PluginMap::iterator pi = m_plugins.begin(); + pi != m_plugins.end(); ++pi) { + + Plugin *plugin = pi->first; + Plugin::FeatureSet featureSet = plugin->getRemainingFeatures(); + + if (!m_summariesOnly) { + writeFeatures(audioSource, plugin, featureSet); + } + + if (!m_summaries.empty()) { + PluginSummarisingAdapter *adapter = + dynamic_cast(plugin); + if (!adapter) { + cerr << "WARNING: Summaries requested, but plugin is not a summarising adapter" << endl; + } else { + for (SummaryNameSet::const_iterator sni = m_summaries.begin(); + sni != m_summaries.end(); ++sni) { + featureSet.clear(); + //!!! problem here -- we are requesting summaries + //!!! for all outputs, but they in principle have + //!!! different averaging requirements depending + //!!! on whether their features have duration or + //!!! not + featureSet = adapter->getSummaryForAllOutputs + (getSummaryType(*sni), + PluginSummarisingAdapter::ContinuousTimeAverage); + writeFeatures(audioSource, plugin, featureSet,//!!! *sni); + Transform::stringToSummaryType(sni->c_str())); + } + } + } + + writeSummaries(audioSource, plugin); + } + + finish(); + + extractionProgress.setProgress(100); + + TempDirectory::getInstance()->cleanup(); +} + +void +FeatureExtractionManager::writeSummaries(QString audioSource, Plugin *plugin) +{ + // caller should have ensured plugin is in m_plugins + PluginMap::iterator pi = m_plugins.find(plugin); + + for (TransformWriterMap::const_iterator ti = pi->second.begin(); + ti != pi->second.end(); ++ti) { + + const Transform &transform = ti->first; + const vector &writers = ti->second; + + Transform::SummaryType summaryType = transform.getSummaryType(); + PluginSummarisingAdapter::SummaryType pType = + (PluginSummarisingAdapter::SummaryType)summaryType; + + if (transform.getSummaryType() == Transform::NoSummary) { + continue; + } + + PluginSummarisingAdapter *adapter = + dynamic_cast(plugin); + if (!adapter) { + cerr << "FeatureExtractionManager::writeSummaries: INTERNAL ERROR: Summary requested for transform, but plugin is not a summarising adapter" << endl; + continue; + } + + Plugin::FeatureSet featureSet = adapter->getSummaryForAllOutputs + (pType, PluginSummarisingAdapter::ContinuousTimeAverage); + +// cout << "summary type " << int(pType) << " for transform:" << endl << transform.toXmlString().toStdString()<< endl << "... feature set with " << featureSet.size() << " elts" << endl; + + writeFeatures(audioSource, plugin, featureSet, summaryType); + } +} + +void FeatureExtractionManager::writeFeatures(QString audioSource, + Plugin *plugin, + const Plugin::FeatureSet &features, + Transform::SummaryType summaryType) +{ + // caller should have ensured plugin is in m_plugins + PluginMap::iterator pi = m_plugins.find(plugin); + + for (TransformWriterMap::const_iterator ti = pi->second.begin(); + ti != pi->second.end(); ++ti) { + + const Transform &transform = ti->first; + const vector &writers = ti->second; + + if (transform.getSummaryType() != Transform::NoSummary && + m_summaries.empty() && + summaryType == Transform::NoSummary) { + continue; + } + + if (transform.getSummaryType() != Transform::NoSummary && + summaryType != Transform::NoSummary && + transform.getSummaryType() != summaryType) { + continue; + } + + string outputId = transform.getOutput().toStdString(); + + if (m_pluginOutputs[plugin].find(outputId) == + m_pluginOutputs[plugin].end()) { + continue; + } + + const Plugin::OutputDescriptor &desc = + m_pluginOutputs[plugin][outputId]; + + int outputIndex = m_pluginOutputIndices[outputId]; + Plugin::FeatureSet::const_iterator fsi = features.find(outputIndex); + if (fsi == features.end()) continue; + + for (int j = 0; j < (int)writers.size(); ++j) { + writers[j]->write + (audioSource, transform, desc, fsi->second, + Transform::summaryTypeToString(summaryType).toStdString()); + } + } +} + +void FeatureExtractionManager::finish() +{ + for (PluginMap::iterator pi = m_plugins.begin(); + pi != m_plugins.end(); ++pi) { + + for (TransformWriterMap::iterator ti = pi->second.begin(); + ti != pi->second.end(); ++ti) { + + vector &writers = ti->second; + + for (int i = 0; i < (int)writers.size(); ++i) { + writers[i]->flush(); + writers[i]->finish(); + } + } + } +} + +void FeatureExtractionManager::print(Transform transform) const +{ + QString qs; + QTextStream qts(&qs); + transform.toXml(qts); + cerr << qs.toStdString() << endl; +} diff -r 000000000000 -r 581b1b150a4d FeatureExtractionManager.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FeatureExtractionManager.h Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,110 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _FEATURE_EXTRACTION_MANAGER_H_ +#define _FEATURE_EXTRACTION_MANAGER_H_ + +#include +#include +#include + +#include +#include +#include + +using std::vector; +using std::set; +using std::string; +using std::pair; +using std::map; + +class FeatureWriter; + +class FeatureExtractionManager +{ +public: + FeatureExtractionManager(); + virtual ~FeatureExtractionManager(); + + void setChannels(int channels); + void setDefaultSampleRate(int sampleRate); + + bool setSummaryTypes(const set &summaryTypes, + bool summariesOnly, + const Vamp::HostExt::PluginSummarisingAdapter::SegmentBoundaries &boundaries); + + bool addFeatureExtractor(Transform transform, + const vector &writers); + + bool addFeatureExtractorFromFile(QString transformXmlFile, + const vector &writers); + + bool addDefaultFeatureExtractor(TransformId transformId, + const vector &writers); + + void extractFeatures(QString audioSource); + +private: + // A plugin may have many outputs, so we can have more than one + // transform requested for a single plugin. The things we want to + // run in our process loop are plugins rather than their outputs, + // so we maintain a map from the plugins to the transforms desired + // of them and then iterate through this map + + typedef map > TransformWriterMap; + typedef map PluginMap; + PluginMap m_plugins; + + // And a map back from transforms to their plugins. Note that + // this is keyed by transform, not transform ID -- two differently + // configured transforms with the same ID must use different + // plugin instances. + + typedef map TransformPluginMap; + TransformPluginMap m_transformPluginMap; + + // Cache the plugin output descriptors, mapping from plugin to a + // map from output ID to output descriptor. + typedef map OutputMap; + typedef map PluginOutputMap; + PluginOutputMap m_pluginOutputs; + + // Map from plugin output identifier to plugin output index + typedef map OutputIndexMap; + OutputIndexMap m_pluginOutputIndices; + + typedef set SummaryNameSet; + SummaryNameSet m_summaries; + bool m_summariesOnly; + Vamp::HostExt::PluginSummarisingAdapter::SegmentBoundaries m_boundaries; + + void writeSummaries(QString audioSource, Vamp::Plugin *); + + void writeFeatures(QString audioSource, + Vamp::Plugin *, + const Vamp::Plugin::FeatureSet &, + Transform::SummaryType summaryType = + Transform::NoSummary); + void finish(); + + int m_blockSize; + int m_defaultSampleRate; + int m_sampleRate; + int m_channels; + + void print(Transform transform) const; +}; + +#endif diff -r 000000000000 -r 581b1b150a4d FeatureWriterFactory.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FeatureWriterFactory.cpp Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,49 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + + +#include "FeatureWriterFactory.h" + +#include "DefaultFeatureWriter.h" +#include "rdf/RDFFeatureWriter.h" +#include "AudioDBFeatureWriter.h" +#include "transform/CSVFeatureWriter.h" + +set +FeatureWriterFactory::getWriterTags() +{ + set tags; + tags.insert("default"); + tags.insert("rdf"); + tags.insert("audiodb"); + tags.insert("csv"); + return tags; +} + +FeatureWriter * +FeatureWriterFactory::createWriter(string tag) +{ + if (tag == "default") { + return new DefaultFeatureWriter(); + } else if (tag == "rdf") { + return new RDFFeatureWriter(); + } else if (tag == "audiodb") { + return new AudioDBFeatureWriter(); + } else if (tag == "csv") { + return new CSVFeatureWriter(); + } + + return 0; +} diff -r 000000000000 -r 581b1b150a4d FeatureWriterFactory.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FeatureWriterFactory.h Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,36 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + + +#ifndef _FEATURE_WRITER_FACTORY_H_ +#define _FEATURE_WRITER_FACTORY_H_ + +#include +#include + +using std::set; +using std::string; + +class FeatureWriter; + +class FeatureWriterFactory +{ +public: + static set getWriterTags(); + static FeatureWriter *createWriter(string tag); +}; + + +#endif diff -r 000000000000 -r 581b1b150a4d README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,287 @@ + +Sonic Annotator +=============== + +Sonic Annotator is a utility program for batch feature extraction from +audio files. It runs Vamp audio analysis plugins on audio files, and +can write the result features in a selection of formats. + + +A Quick Tutorial +---------------- + +To use Sonic Annotator, you need to tell it three things: what audio +files to extract features from; what features to extract; and how and +where to write the results. You can also optionally tell it to +summarise the features. + + +1. What audio files to extract features from + +Sonic Annotator accepts a list of audio files on the command line. +Any argument that is not understood as a supported command-line option +will be taken to be the name of an audio file. Any number of files +may be listed. + +Several common audio file formats are supported, including MP3, Ogg, +and a number of PCM formats such as WAV and AIFF. AAC is supported on +OS/X only, and only if not DRM protected. WMA is not supported. + +File paths do not have to be local; you can also provide remote HTTP +or FTP URLs for Sonic Annotator to retrieve. + +Sonic Annotator also accepts the names of playlist files (.m3u +extension) and will process every file found in the playlist. + +Finally, you can provide a local directory path instead of a file, +together with the -r (recursive) option, for Sonic Annotator to +process every audio file found in that directory or any of its +subdirectories. + + +2. What features to extract + +Sonic Annotator applies "transforms" to its input audio files, where a +transform (in this terminology) consists of a Vamp plugin together +with a certain set of parameters and a specified execution context: +step and block size, sample rate, etc. + +(See http://www.vamp-plugins.org/ for more information about Vamp +plugins.) + +To use a particular transform, specify its filename on the command +line with the -t option. + +Transforms are usually described in RDF, following the transform part +of the Vamp plugin ontology (http://purl.org/ontology/vamp/). A +Transform may use any Vamp plugin that is currently installed and +available on the system. You can obtain a list of available plugin +outputs by running Sonic Annotator with the -l option, and you can +obtain a skeleton transform description for one of these plugins with +the -s option. + +For example, if the example plugins from the Vamp plugin SDK are +available and no other plugins are installed, you might have an +exchange like this: + + $ sonic-annotator -l + vamp:vamp-example-plugins:amplitudefollower:amplitude + vamp:vamp-example-plugins:fixedtempo:acf + vamp:vamp-example-plugins:fixedtempo:detectionfunction + vamp:vamp-example-plugins:fixedtempo:filtered_acf + vamp:vamp-example-plugins:fixedtempo:tempo + vamp:vamp-example-plugins:fixedtempo:candidates + vamp:vamp-example-plugins:percussiononsets:detectionfunction + vamp:vamp-example-plugins:percussiononsets:onsets + vamp:vamp-example-plugins:powerspectrum:powerspectrum + vamp:vamp-example-plugins:spectralcentroid:linearcentroid + vamp:vamp-example-plugins:spectralcentroid:logcentroid + vamp:vamp-example-plugins:zerocrossing:counts + vamp:vamp-example-plugins:zerocrossing:zerocrossings + $ sonic-annotator -s vamp:vamp-example-plugins:fixedtempo:tempo + @prefix xsd: . + @prefix vamp: . + @prefix : <#> . + + :transform a vamp:Transform ; + vamp:plugin ; + vamp:step_size "64"^^xsd:int ; + vamp:block_size "256"^^xsd:int ; + vamp:parameter_binding [ + vamp:parameter [ vamp:identifier "maxbpm" ] ; + vamp:value "190"^^xsd:float ; + ] ; + vamp:parameter_binding [ + vamp:parameter [ vamp:identifier "maxdflen" ] ; + vamp:value "10"^^xsd:float ; + ] ; + vamp:parameter_binding [ + vamp:parameter [ vamp:identifier "minbpm" ] ; + vamp:value "50"^^xsd:float ; + ] ; + vamp:output . + $ + +The output of -s is an RDF/Turtle document describing the default +settings for the Tempo output of the Fixed Tempo Estimator plugin in +the Vamp plugin SDK. + +(The exact format of the RDF printed may differ -- e.g. if the +plugin's RDF description is not installed and so its "home" URI is not +known -- but the result should be functionally equivalent to this.) + +You could run this transform by saving the RDF to a file and +specifying that file with -t: + + $ sonic-annotator -s vamp:vamp-example-plugins:fixedtempo:tempo > test.n3 + $ sonic-annotator -t test.n3 audio.wav -w csv --csv-stdout + (... logging output on stderr, then ...) + "audio.wav",0.002902494,5.196916099,68.7916,"68.8 bpm" + $ + +The single line of output above consists of the audio file name, the +timestamp and duration for a single feature, the value of that feature +(the estimated tempo of the given region of time from that file, in +bpm -- the plugin in question performs a single tempo estimation and +nothing else) and the feature's label. + +A quicker way to achieve the above is to use the -d (default) option +to tell Sonic Annotator to use directly the default configuration for +a named transform: + + $ sonic-annotator -d vamp:vamp-example-plugins:fixedtempo:tempo audio.wav -w csv --csv-stdout + (... some log output on stderr, then ...) + "audio.wav",0.002902494,5.196916099,68.7916,"68.8 bpm" + $ + +Although handy for experimentation, the -d option is inadvisable in +any "production" situation because the plugin configuration is not +guaranteed to be the same each time (for example if an updated version +of a plugin changes some of its defaults). It's better to save a +well-defined transform to file and refer to that, even if it is simply +the transform created by the skeleton option. + +To run more than one transform on the same audio files, just put more +than one set of transform RDF descriptions in the same file, or give +the -t option more than once with separate transform description +files. Remember that if you want to specify more than one transform +in the same file, they will need to have distinct URIs (that is, the +":transform" part of the example above, which may be any arbitrary +name, must be distinct for each described transform). + + +3. How and where to write the results + +Sonic Annotator supports various different output modules (and it is +fairly easy for the developer to add new ones). You have to choose at +least one output module; use the -w (writer) option to do so. Each +module has its own set of parameters which can be adjusted on the +command line, as well as its own default rules about where to write +the results. + +The following writers are currently supported. (Others exist, but are +not properly implemented or not supported.) + + * csv + + Writes the results into comma-separated data files. + + One file is created for each transform applied to each input audio + file, named after the input audio file and transform name with .csv + suffix and ":" replaced by "_" throughout, placed in the same + directory as the audio file. + + To instruct Sonic Annotator to place the output files in another + location, use --csv-basedir with a directory name. + + To write a single file with all data in it, use --csv-one-file. + + To write all data to stdout instead of to a file, use --csv-stdout. + + Sonic Annotator will not write to an output file that already + exists. If you want to make it do this, use --csv-force to + overwrite or --csv-append to append to it. + + The data generated consists of one line for each result feature, + containing the feature timestamp, feature duration if present, all + of the feature's bin values in order, followed by the feature's + label if present. If the --csv-one-file or --csv-stdout option is + specified, then an additional column will appear before any of the + above, containing the audio file name from which the feature was + extracted, if it differs from that of the previous row. + + The default column separator is a comma; you can specify a + different one with the --csv-separator option. + + * rdf + + Writes the results into RDF/Turtle documents following the Audio + Features ontology (http://purl.org/ontology/af/). + + One file is created for each input audio file containing the + features extracted by all transforms applied to that file, named + after the input audio file with .n3 extension, placed in the same + directory as the audio file. + + To instruct Sonic Annotator to place the output files in another + location, use --rdf-basedir with a directory name. + + To write a single file with all data (from all input audio files) + in it, use --rdf-one-file. + + To write one file for each transform applied to each input audio + file, named after the input audio file and transform name with .n3 + suffix and ":" replaced by "_" throughout, use --rdf-many-files. + + To write all data to stdout instead of to a file, use --rdf-stdout. + + Sonic Annotator will not write to an output file that already + exists. If you want to make it do this, use --rdf-force to + overwrite or --rdf-append to append to it. + + Sonic Annotator will use plugin description RDF if available to + enhance its output (for example identifying note onset times as + note onset times, if the plugin's RDF says that is what it + produces, rather than writing them as plain events). Best results + will be obtained if an RDF document is provided with your plugins + (for example, vamp-example-plugins.n3) and you have this installed + in the same location as the plugins. To override this enhanced + output and write plain events for all features, use --rdf-plain. + + The output RDF will include an available_as property linking the + results to the original audio signal URI. By default, this will + point to the URI of the file or resource containing the audio that + Sonic Annotator processed, such as the file:/// location on disk. + To override this, for example to process a local copy of a file + while generating RDF that describes a copy of it available on a + network, you can use the --rdf-signal-uri option to specify an + alternative signal URI. + + +4. Optionally, how to summarise the features + +Sonic Annotator can also calculate and write summaries of features, +such as mean and median values. + +To obtain a summary as well as the feature results, just use the -S +option, naming the type of summary you want (min, max, mean, median, +mode, sum, variance, sd or count). You can also tell it to produce +only the summary, not the individual features, with --summary-only. + +Alternatively, you can specify a summary in a transform description. +The following example tells Sonic Annotator to write both the times of +note onsets estimated by the simple percussion onset detector example +plugin, and the variance of the plugin's onset detection function. +(It will only process the audio file and run the plugin once.) + + @prefix rdf: . + @prefix vamp: . + @prefix examples: . + @prefix : <#>. + + :transform1 a vamp:Transform; + vamp:plugin examples:percussiononsets ; + vamp:output examples:percussiononsets_output_onsets . + + :transform0 a vamp:Transform; + vamp:plugin examples:percussiononsets ; + vamp:output examples:percussiononsets_output_detectionfunction ; + vamp:summary_type "variance" . + +Sonic Annotator can also summarise in segments -- if you provide a +comma-separated list of times as an argument to the --segments option, +it will calculate one summary for each segment bounded by the times +you provided. For example, + + $ sonic-annotator -d vamp:vamp-example-plugins:percussiononsets:detectionfunction -S variance --sumary-only --segments 1,2,3 -w csv --csv-stdout audio.wav + (... some log output on stderr, then ...) + ,0.000000000,1.000000000,variance,1723.99,"(variance, continuous-time average)" + ,1.000000000,1.000000000,variance,1981.75,"(variance, continuous-time average)" + ,2.000000000,1.000000000,variance,1248.79,"(variance, continuous-time average)" + ,3.000000000,7.031020407,variance,1030.06,"(variance, continuous-time average)" + +Here the first row contains a summary covering the time period from 0 +to 1 second, the second from 1 to 2 seconds, the third from 2 to 3 +seconds and the fourth from 3 seconds to the end of the (short) audio +file. + diff -r 000000000000 -r 581b1b150a4d deploy_mac.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deploy_mac.sh Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,34 @@ +#!/bin/bash + +# this script should be executed from the directory that contains the app directory (application bundle) +# it copies the required 3rd party libraries into the application bundle and corrects the library install names and references + +TARGETPATH="sonic-annotator.app/Contents/Frameworks/" + +mkdir "$TARGETPATH" + +QTPREFIX=/Library/Frameworks/ +QTFWKS="QtXml QtCore QtNetwork" + +# copy the dynamic libraries into the app bundle + +for FWK in $QTFWKS; do + cp ${QTPREFIX}${FWK}.framework/Versions/4/${FWK} "${TARGETPATH}" +done + +# change the id's of the dylibs +for FWK in $QTFWKS; do + install_name_tool -id @executable_path/../Frameworks/${FWK} "$TARGETPATH/$FWK" +done + +# tell the linker to look for dylibs in the app bundle +for FWK in $QTFWKS; do + install_name_tool -change ${FWK}.framework/Versions/4/${FWK} @executable_path/../Frameworks/${FWK} "sonic-annotator.app/Contents/MacOS/sonic-annotator" +done + +# correct dependencies between QT dylibs +for FWK in $QTFWKS; do + case $FWK in QtCore) continue;; esac + install_name_tool -change QtCore.framework/Versions/4/QtCore @executable_path/../Frameworks/QtCore "$TARGETPATH/${FWK}" +done + diff -r 000000000000 -r 581b1b150a4d feature-description-example.n3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/feature-description-example.n3 Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,82 @@ + +@prefix rdf: . +@prefix rdfs: . +@prefix dc: . +@prefix mo: . +@prefix af: . +@prefix event: . +@prefix tl: . +@prefix xsd: . +@prefix : <#> . + + +# Describe the signal we're annotating, and associate it with a +# timeline (the timeline is named as :base_timeline but never given +# any attributes in this document). Any feature that is associated +# with the same timeline will be taken as derived from this signal. + +:audio_signal a mo:Signal ; + mo:available_as ; + mo:time :signal_interval . + +:signal_interval a tl:Interval ; + tl:onTimeLine :base_timeline ; + tl:beginsAt "PT0"^^xsd:duration . + + +# A sparse feature event. + +:event0 a af:Onset ; + event:time :time0 . + +:time0 a tl:Instant ; + tl:onTimeLine :base_timeline ; + tl:at "PT0.185759637S"^^xsd:duration . + +# Alternatively we could write that with a blank node. + +:event1 a af:Onset ; + event:time [ + a tl:Instant ; + tl:onTimeLine :base_timeline ; + tl:at "PT0.510839002S"^^xsd:duration ; + ] . + + +# For a dense feature, we need an appropriately sampled, windowed timeline. + +:feature_timeline a tl:DiscreteTimeLine . + +:feature_timeline_map a tl:UniformSamplingWindowingMap ; + tl:rangeTimeLine :feature_timeline ; + tl:domainTimeLine :base_timeline ; + tl:sampleRate "44100"^^xsd:int ; + tl:windowLength "1024"^^xsd:int ; + tl:hopSize "512"^^xsd:int . + + +# ... and an interval, defined in terms of hops (the discrete steps of +# the windowed timeline). + +:feature_interval a tl:Interval ; + tl:onTimeLine :feature_timeline ; + tl:beginsAt "0"^^xsd:int ; + tl:duration "5634"^^xsd:int . + + +# Then our feature is a signal that is on the interval we just +# described. We associate it explicitly with the original audio +# signal, although presumably we could leave the association implicit, +# to be derived from the relationships between timelines, just as it +# is for the sparse features above. + +:audio_signal af:signal_feature :feature1 . + +:feature_signal_type rdfs:subClassOf af:DetectionFunction ; + dc:title "Detection Function from Simple Percussion Onset Detector" . + +:feature1 a :feature_signal_type ; + mo:time :feature_interval ; + af:dimensions "12 12345" ; + af:value "0 0 0 0 0 0 0 1 2 3 1 24 236 123213 (etc)" . + diff -r 000000000000 -r 581b1b150a4d main.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main.cpp Thu Dec 11 10:22:33 2008 +0000 @@ -0,0 +1,735 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Annotator + A utility for batch feature extraction from audio files. + Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London. + Copyright 2007-2008 QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using std::cout; +using std::cerr; +using std::endl; +using std::vector; +using std::string; + +#include "base/Exceptions.h" +#include "base/TempDirectory.h" + +#include "data/fileio/AudioFileReaderFactory.h" +#include "data/fileio/PlaylistFileReader.h" + +#include "transform/Transform.h" +#include "transform/TransformFactory.h" + +#include "FeatureExtractionManager.h" +#include "transform/FeatureWriter.h" +#include "FeatureWriterFactory.h" + +#include "rdf/RDFTransformFactory.h" + +#include + +#ifdef HAVE_FFTW3 +#include +#endif + +// Desired options: +// +// * output preference: +// - all data in one file +// - one file per input file +// - one file per input file per transform +// - (any use for: one file per transform?) +// +// * output location: +// - same directory as input file +// - current directory +// +// * output filename: +// - based on input (obvious choice for one file per input file modes) +// - specified on command line (obvious choice for all in one file mode) +// +// * output format: one or more of +// - RDF +// - AudioDB +// - Vamp Simple Host format +// - CSV +// +// * input handling: +// - run each transform on each input file separately +// - provide all input files to the same transform, one per channel +// +// * format-specific options: +// - RDF format: fancy/plain RDF +// - CSV format: separator, timestamp type +// note: do the output file/location also count as format-specific options? +// an output writer that wrote to a database would have different options... +// +// * debug level and progress output +// +// * other potential options: +// - ignore version mismatches in Transform specifications +// - sample rate: force a given rate; use file rate instead of rate in +// Transform spec +// +// * other potential instructions: +// - write out a skeleton Transform file for a specified plugin +// - write out skeleton RDF for a plugin library (i.e. do the job of +// RDF template_generator) +// - verify that RDF for a plugin library matches the plugin +// +// MAYBE: +// * transform(s) to run: +// - supply transform file names on command line +// - use all transforms found in a given directory? +// +// MAYBE: +// * input files to transform: +// - supply file names or URIs on command line +// - use all files in a given directory or tree + +static QString +wrap(QString s, int len, int pfx = 0) +{ + QString ws; + QStringList sl(s.split(' ')); + int i = 0, c = 0; + while (i < sl.size()) { + int wl = sl[i].length(); + if (c + wl < len) { + if (c > 0) { + ws += ' '; + ++c; + } + } else { + if (c > 0) { + ws += '\n'; + for (int j = 0; j < pfx; ++j) ws += ' '; + c = 0; + } + } + ws += sl[i]; + c += wl; + ++i; + } + return ws; +} + +void usage(QString myname) +{ + set writers = FeatureWriterFactory::getWriterTags(); + + cerr << endl; + cerr << "Sonic Annotator" << endl; + cerr << "A utility for batch feature extraction from audio files." << endl; + cerr << "Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London." << endl; + cerr << "Copyright 2007-2008 Queen Mary, University of London." << endl; + cerr << endl; + cerr << "This program is free software. You may redistribute copies of it under the" << endl; + cerr << "terms of the GNU General Public License ." << endl; + cerr << "This program is supplied with NO WARRANTY, to the extent permitted by law." << endl; + cerr << endl; + cerr << " Usage: " << myname.toStdString() + << " [-mr] -t trans.xml [...] -w [...]