Chris@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@0: Chris@0: /* Chris@0: Sonic Visualiser Chris@0: An audio file viewer and annotation editor. Chris@0: Centre for Digital Music, Queen Mary, University of London. Chris@0: This file copyright 2006 Chris Cannam. Chris@0: Chris@0: This program is free software; you can redistribute it and/or Chris@0: modify it under the terms of the GNU General Public License as Chris@0: published by the Free Software Foundation; either version 2 of the Chris@0: License, or (at your option) any later version. See the file Chris@0: COPYING included with this distribution for more information. Chris@0: */ Chris@0: Chris@0: #include "FeatureExtractionPluginTransform.h" Chris@0: Chris@0: #include "plugin/FeatureExtractionPluginFactory.h" Chris@0: #include "plugin/PluginXml.h" Chris@0: #include "vamp-sdk/Plugin.h" Chris@0: Chris@1: #include "data/model/Model.h" Chris@0: #include "base/Window.h" Chris@1: #include "data/model/SparseOneDimensionalModel.h" Chris@1: #include "data/model/SparseTimeValueModel.h" Chris@3: #include "data/model/EditableDenseThreeDimensionalModel.h" Chris@1: #include "data/model/DenseTimeValueModel.h" Chris@1: #include "data/model/NoteModel.h" Chris@3: #include "data/model/FFTModel.h" Chris@55: #include "data/model/WaveFileModel.h" Chris@0: Chris@0: #include Chris@0: Chris@0: #include Chris@0: Chris@0: FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel, Chris@0: QString pluginId, Chris@27: const ExecutionContext &context, Chris@0: QString configurationXml, Chris@27: QString outputName) : Chris@27: PluginTransform(inputModel, context), Chris@0: m_plugin(0), Chris@0: m_descriptor(0), Chris@0: m_outputFeatureNo(0) Chris@0: { Chris@0: // std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl; Chris@0: Chris@0: FeatureExtractionPluginFactory *factory = Chris@0: FeatureExtractionPluginFactory::instanceFor(pluginId); Chris@0: Chris@0: if (!factory) { Chris@0: std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \"" Chris@0: << pluginId.toStdString() << "\"" << std::endl; Chris@0: return; Chris@0: } Chris@0: Chris@0: m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate()); Chris@0: Chris@0: if (!m_plugin) { Chris@0: std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \"" Chris@0: << pluginId.toStdString() << "\"" << std::endl; Chris@0: return; Chris@0: } Chris@0: Chris@0: if (configurationXml != "") { Chris@0: PluginXml(m_plugin).setParametersFromXml(configurationXml); Chris@0: } Chris@0: Chris@0: DenseTimeValueModel *input = getInput(); Chris@0: if (!input) return; Chris@0: Chris@0: size_t channelCount = input->getChannelCount(); Chris@0: if (m_plugin->getMaxChannelCount() < channelCount) { Chris@0: channelCount = 1; Chris@0: } Chris@0: if (m_plugin->getMinChannelCount() > channelCount) { Chris@0: std::cerr << "FeatureExtractionPluginTransform:: " Chris@0: << "Can't provide enough channels to plugin (plugin min " Chris@0: << m_plugin->getMinChannelCount() << ", max " Chris@0: << m_plugin->getMaxChannelCount() << ", input model has " Chris@0: << input->getChannelCount() << ")" << std::endl; Chris@0: return; Chris@0: } Chris@0: Chris@27: std::cerr << "Initialising feature extraction plugin with channels = " Chris@27: << channelCount << ", step = " << m_context.stepSize Chris@27: << ", block = " << m_context.blockSize << std::endl; Chris@27: Chris@27: if (!m_plugin->initialise(channelCount, Chris@27: m_context.stepSize, Chris@27: m_context.blockSize)) { Chris@0: std::cerr << "FeatureExtractionPluginTransform: Plugin " Chris@0: << m_plugin->getName() << " failed to initialise!" << std::endl; Chris@0: return; Chris@0: } Chris@0: Chris@0: Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); Chris@0: Chris@0: if (outputs.empty()) { Chris@0: std::cerr << "FeatureExtractionPluginTransform: Plugin \"" Chris@0: << pluginId.toStdString() << "\" has no outputs" << std::endl; Chris@0: return; Chris@0: } Chris@0: Chris@0: for (size_t i = 0; i < outputs.size(); ++i) { Chris@0: if (outputName == "" || outputs[i].name == outputName.toStdString()) { Chris@0: m_outputFeatureNo = i; Chris@0: m_descriptor = new Vamp::Plugin::OutputDescriptor Chris@0: (outputs[i]); Chris@0: break; Chris@0: } Chris@0: } Chris@0: Chris@0: if (!m_descriptor) { Chris@0: std::cerr << "FeatureExtractionPluginTransform: Plugin \"" Chris@0: << pluginId.toStdString() << "\" has no output named \"" Chris@0: << outputName.toStdString() << "\"" << std::endl; Chris@0: return; Chris@0: } Chris@0: Chris@0: // std::cerr << "FeatureExtractionPluginTransform: output sample type " Chris@0: // << m_descriptor->sampleType << std::endl; Chris@0: Chris@0: int binCount = 1; Chris@0: float minValue = 0.0, maxValue = 0.0; Chris@0: Chris@0: if (m_descriptor->hasFixedBinCount) { Chris@0: binCount = m_descriptor->binCount; Chris@0: } Chris@0: Chris@0: // std::cerr << "FeatureExtractionPluginTransform: output bin count " Chris@0: // << binCount << std::endl; Chris@0: Chris@0: if (binCount > 0 && m_descriptor->hasKnownExtents) { Chris@0: minValue = m_descriptor->minValue; Chris@0: maxValue = m_descriptor->maxValue; Chris@0: } Chris@0: Chris@0: size_t modelRate = m_input->getSampleRate(); Chris@0: size_t modelResolution = 1; Chris@0: Chris@0: switch (m_descriptor->sampleType) { Chris@0: Chris@0: case Vamp::Plugin::OutputDescriptor::VariableSampleRate: Chris@0: if (m_descriptor->sampleRate != 0.0) { Chris@0: modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001); Chris@0: } Chris@0: break; Chris@0: Chris@0: case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: Chris@27: modelResolution = m_context.stepSize; Chris@0: break; Chris@0: Chris@0: case Vamp::Plugin::OutputDescriptor::FixedSampleRate: Chris@0: modelRate = size_t(m_descriptor->sampleRate + 0.001); Chris@0: break; Chris@0: } Chris@0: Chris@0: if (binCount == 0) { Chris@0: Chris@0: m_output = new SparseOneDimensionalModel(modelRate, modelResolution, Chris@0: false); Chris@0: Chris@0: } else if (binCount == 1) { Chris@0: Chris@0: SparseTimeValueModel *model = new SparseTimeValueModel Chris@0: (modelRate, modelResolution, minValue, maxValue, false); Chris@0: model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); Chris@0: Chris@0: m_output = model; Chris@0: Chris@0: } else if (m_descriptor->sampleType == Chris@0: Vamp::Plugin::OutputDescriptor::VariableSampleRate) { Chris@0: Chris@0: // We don't have a sparse 3D model, so interpret this as a Chris@0: // note model. There's nothing to define which values to use Chris@0: // as which parameters of the note -- for the moment let's Chris@0: // treat the first as pitch, second as duration in frames, Chris@0: // third (if present) as velocity. (Our note model doesn't Chris@0: // yet store velocity.) Chris@0: //!!! todo: ask the user! Chris@0: Chris@0: NoteModel *model = new NoteModel Chris@0: (modelRate, modelResolution, minValue, maxValue, false); Chris@0: model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); Chris@0: Chris@0: m_output = model; Chris@0: Chris@0: } else { Chris@0: Chris@3: m_output = new EditableDenseThreeDimensionalModel Chris@3: (modelRate, modelResolution, binCount, false); Chris@0: Chris@0: if (!m_descriptor->binNames.empty()) { Chris@0: std::vector names; Chris@0: for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) { Chris@0: names.push_back(m_descriptor->binNames[i].c_str()); Chris@0: } Chris@3: (dynamic_cast(m_output)) Chris@0: ->setBinNames(names); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform() Chris@0: { Chris@0: delete m_plugin; Chris@0: delete m_descriptor; Chris@0: } Chris@0: Chris@0: DenseTimeValueModel * Chris@0: FeatureExtractionPluginTransform::getInput() Chris@0: { Chris@0: DenseTimeValueModel *dtvm = Chris@0: dynamic_cast(getInputModel()); Chris@0: if (!dtvm) { Chris@0: std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl; Chris@0: } Chris@0: return dtvm; Chris@0: } Chris@0: Chris@0: void Chris@0: FeatureExtractionPluginTransform::run() Chris@0: { Chris@0: DenseTimeValueModel *input = getInput(); Chris@0: if (!input) return; Chris@0: Chris@55: while (!input->isReady()) { Chris@55: if (dynamic_cast(input)) break; // no need to wait Chris@55: std::cerr << "FeatureExtractionPluginTransform::run: Waiting for input model to be ready..." << std::endl; Chris@55: sleep(1); Chris@55: } Chris@55: Chris@0: if (!m_output) return; Chris@0: Chris@0: size_t sampleRate = m_input->getSampleRate(); Chris@0: Chris@0: size_t channelCount = input->getChannelCount(); Chris@0: if (m_plugin->getMaxChannelCount() < channelCount) { Chris@0: channelCount = 1; Chris@0: } Chris@0: Chris@0: float **buffers = new float*[channelCount]; Chris@0: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@27: buffers[ch] = new float[m_context.blockSize]; Chris@0: } Chris@0: Chris@0: bool frequencyDomain = (m_plugin->getInputDomain() == Chris@0: Vamp::Plugin::FrequencyDomain); Chris@3: std::vector fftModels; Chris@0: Chris@0: if (frequencyDomain) { Chris@0: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@3: fftModels.push_back(new FFTModel Chris@0: (getInput(), Chris@27: channelCount == 1 ? m_context.channel : ch, Chris@27: m_context.windowType, Chris@27: m_context.blockSize, Chris@27: m_context.stepSize, Chris@27: m_context.blockSize, Chris@0: false)); Chris@0: } Chris@0: } Chris@0: Chris@0: long startFrame = m_input->getStartFrame(); Chris@0: long endFrame = m_input->getEndFrame(); Chris@0: long blockFrame = startFrame; Chris@0: Chris@0: long prevCompletion = 0; Chris@0: Chris@0: while (1) { Chris@0: Chris@0: if (frequencyDomain) { Chris@27: if (blockFrame - int(m_context.blockSize)/2 > endFrame) break; Chris@0: } else { Chris@0: if (blockFrame >= endFrame) break; Chris@0: } Chris@0: Chris@33: // std::cerr << "FeatureExtractionPluginTransform::run: blockFrame " Chris@33: // << blockFrame << std::endl; Chris@0: Chris@0: long completion = Chris@27: (((blockFrame - startFrame) / m_context.stepSize) * 99) / Chris@27: ( (endFrame - startFrame) / m_context.stepSize); Chris@0: Chris@0: // channelCount is either m_input->channelCount or 1 Chris@0: Chris@0: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@0: if (frequencyDomain) { Chris@27: int column = (blockFrame - startFrame) / m_context.stepSize; Chris@27: for (size_t i = 0; i < m_context.blockSize/2; ++i) { Chris@3: fftModels[ch]->getValuesAt Chris@0: (column, i, buffers[ch][i*2], buffers[ch][i*2+1]); Chris@0: } Chris@0: /*!!! Chris@0: float sum = 0.0; Chris@27: for (size_t i = 0; i < m_context.blockSize/2; ++i) { Chris@0: sum += buffers[ch][i*2]; Chris@0: } Chris@0: if (fabs(sum) < 0.0001) { Chris@0: std::cerr << "WARNING: small sum for column " << column << " (sum is " << sum << ")" << std::endl; Chris@0: } Chris@0: */ Chris@0: } else { Chris@0: getFrames(ch, channelCount, Chris@27: blockFrame, m_context.blockSize, buffers[ch]); Chris@0: } Chris@0: } Chris@0: Chris@0: Vamp::Plugin::FeatureSet features = m_plugin->process Chris@0: (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); Chris@0: Chris@0: for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { Chris@0: Vamp::Plugin::Feature feature = Chris@0: features[m_outputFeatureNo][fi]; Chris@0: addFeature(blockFrame, feature); Chris@0: } Chris@0: Chris@0: if (blockFrame == startFrame || completion > prevCompletion) { Chris@0: setCompletion(completion); Chris@0: prevCompletion = completion; Chris@0: } Chris@0: Chris@27: blockFrame += m_context.stepSize; Chris@0: } Chris@0: Chris@0: Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); Chris@0: Chris@0: for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) { Chris@0: Vamp::Plugin::Feature feature = Chris@0: features[m_outputFeatureNo][fi]; Chris@0: addFeature(blockFrame, feature); Chris@0: } Chris@0: Chris@0: if (frequencyDomain) { Chris@0: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@3: delete fftModels[ch]; Chris@0: } Chris@0: } Chris@0: Chris@0: setCompletion(100); Chris@0: } Chris@0: Chris@0: void Chris@0: FeatureExtractionPluginTransform::getFrames(int channel, int channelCount, Chris@0: long startFrame, long size, Chris@0: float *buffer) Chris@0: { Chris@0: long offset = 0; Chris@0: Chris@0: if (startFrame < 0) { Chris@0: for (int i = 0; i < size && startFrame + i < 0; ++i) { Chris@0: buffer[i] = 0.0f; Chris@0: } Chris@0: offset = -startFrame; Chris@0: size -= offset; Chris@0: if (size <= 0) return; Chris@0: startFrame = 0; Chris@0: } Chris@0: Chris@0: long got = getInput()->getValues Chris@27: ((channelCount == 1 ? m_context.channel : channel), Chris@0: startFrame, startFrame + size, buffer + offset); Chris@0: Chris@0: while (got < size) { Chris@0: buffer[offset + got] = 0.0; Chris@0: ++got; Chris@0: } Chris@0: Chris@27: if (m_context.channel == -1 && channelCount == 1 && Chris@0: getInput()->getChannelCount() > 1) { Chris@0: // use mean instead of sum, as plugin input Chris@0: int cc = getInput()->getChannelCount(); Chris@0: for (long i = 0; i < size; ++i) { Chris@0: buffer[i] /= cc; Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: void Chris@0: FeatureExtractionPluginTransform::addFeature(size_t blockFrame, Chris@0: const Vamp::Plugin::Feature &feature) Chris@0: { Chris@0: size_t inputRate = m_input->getSampleRate(); Chris@0: Chris@0: // std::cerr << "FeatureExtractionPluginTransform::addFeature(" Chris@0: // << blockFrame << ")" << std::endl; Chris@0: Chris@0: int binCount = 1; Chris@0: if (m_descriptor->hasFixedBinCount) { Chris@0: binCount = m_descriptor->binCount; Chris@0: } Chris@0: Chris@0: size_t frame = blockFrame; Chris@0: Chris@0: if (m_descriptor->sampleType == Chris@0: Vamp::Plugin::OutputDescriptor::VariableSampleRate) { Chris@0: Chris@0: if (!feature.hasTimestamp) { Chris@0: std::cerr Chris@0: << "WARNING: FeatureExtractionPluginTransform::addFeature: " Chris@0: << "Feature has variable sample rate but no timestamp!" Chris@0: << std::endl; Chris@0: return; Chris@0: } else { Chris@0: frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); Chris@0: } Chris@0: Chris@0: } else if (m_descriptor->sampleType == Chris@0: Vamp::Plugin::OutputDescriptor::FixedSampleRate) { Chris@0: Chris@0: if (feature.hasTimestamp) { Chris@0: //!!! warning: sampleRate may be non-integral Chris@0: frame = Vamp::RealTime::realTime2Frame(feature.timestamp, Chris@0: m_descriptor->sampleRate); Chris@0: } else { Chris@50: frame = m_output->getEndFrame(); Chris@0: } Chris@0: } Chris@0: Chris@0: if (binCount == 0) { Chris@0: Chris@0: SparseOneDimensionalModel *model = getOutput(); Chris@0: if (!model) return; Chris@0: model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); Chris@0: Chris@0: } else if (binCount == 1) { Chris@0: Chris@0: float value = 0.0; Chris@0: if (feature.values.size() > 0) value = feature.values[0]; Chris@0: Chris@0: SparseTimeValueModel *model = getOutput(); Chris@0: if (!model) return; Chris@0: model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); Chris@0: Chris@0: } else if (m_descriptor->sampleType == Chris@0: Vamp::Plugin::OutputDescriptor::VariableSampleRate) { Chris@0: Chris@0: float pitch = 0.0; Chris@0: if (feature.values.size() > 0) pitch = feature.values[0]; Chris@0: Chris@0: float duration = 1; Chris@0: if (feature.values.size() > 1) duration = feature.values[1]; Chris@0: Chris@0: float velocity = 100; Chris@0: if (feature.values.size() > 2) velocity = feature.values[2]; Chris@0: Chris@0: NoteModel *model = getOutput(); Chris@0: if (!model) return; Chris@0: Chris@0: model->addPoint(NoteModel::Point(frame, pitch, duration, feature.label.c_str())); Chris@0: Chris@0: } else { Chris@0: Chris@51: DenseThreeDimensionalModel::Column values = feature.values; Chris@0: Chris@3: EditableDenseThreeDimensionalModel *model = Chris@3: getOutput(); Chris@0: if (!model) return; Chris@0: Chris@51: model->setColumn(frame / model->getResolution(), values); Chris@0: } Chris@0: } Chris@0: Chris@0: void Chris@0: FeatureExtractionPluginTransform::setCompletion(int completion) Chris@0: { Chris@0: int binCount = 1; Chris@0: if (m_descriptor->hasFixedBinCount) { Chris@0: binCount = m_descriptor->binCount; Chris@0: } Chris@0: Chris@33: std::cerr << "FeatureExtractionPluginTransform::setCompletion(" Chris@33: << completion << ")" << std::endl; Chris@33: Chris@0: if (binCount == 0) { Chris@0: Chris@0: SparseOneDimensionalModel *model = getOutput(); Chris@0: if (!model) return; Chris@33: std::cerr << "setting on SparseOneDimensionalModel" << std::endl; Chris@0: model->setCompletion(completion); Chris@0: Chris@0: } else if (binCount == 1) { Chris@0: Chris@0: SparseTimeValueModel *model = getOutput(); Chris@0: if (!model) return; Chris@33: std::cerr << "setting on SparseTimeValueModel" << std::endl; Chris@0: model->setCompletion(completion); Chris@0: Chris@0: } else if (m_descriptor->sampleType == Chris@0: Vamp::Plugin::OutputDescriptor::VariableSampleRate) { Chris@0: Chris@0: NoteModel *model = getOutput(); Chris@0: if (!model) return; Chris@33: std::cerr << "setting on NoteModel" << std::endl; Chris@0: model->setCompletion(completion); Chris@0: Chris@0: } else { Chris@0: Chris@3: EditableDenseThreeDimensionalModel *model = Chris@3: getOutput(); Chris@0: if (!model) return; Chris@33: std::cerr << "setting on EditableDenseThreeDimensionalModel" << std::endl; Chris@0: model->setCompletion(completion); Chris@0: } Chris@0: } Chris@0: