Chris@320: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@320: Chris@320: /* Chris@320: Sonic Visualiser Chris@320: An audio file viewer and annotation editor. Chris@320: Centre for Digital Music, Queen Mary, University of London. Chris@320: This file copyright 2006 Chris Cannam and QMUL. Chris@320: Chris@320: This program is free software; you can redistribute it and/or Chris@320: modify it under the terms of the GNU General Public License as Chris@320: published by the Free Software Foundation; either version 2 of the Chris@320: License, or (at your option) any later version. See the file Chris@320: COPYING included with this distribution for more information. Chris@320: */ Chris@320: Chris@331: #include "FeatureExtractionModelTransformer.h" Chris@320: Chris@320: #include "plugin/FeatureExtractionPluginFactory.h" Chris@320: #include "plugin/PluginXml.h" Chris@475: #include Chris@320: Chris@320: #include "data/model/Model.h" Chris@320: #include "base/Window.h" Chris@387: #include "base/Exceptions.h" Chris@320: #include "data/model/SparseOneDimensionalModel.h" Chris@320: #include "data/model/SparseTimeValueModel.h" Chris@320: #include "data/model/EditableDenseThreeDimensionalModel.h" Chris@320: #include "data/model/DenseTimeValueModel.h" Chris@320: #include "data/model/NoteModel.h" gyorgyf@786: #include "data/model/FlexiNoteModel.h" Chris@441: #include "data/model/RegionModel.h" Chris@320: #include "data/model/FFTModel.h" Chris@320: #include "data/model/WaveFileModel.h" Chris@558: #include "rdf/PluginRDFDescription.h" Chris@320: Chris@350: #include "TransformFactory.h" Chris@350: Chris@320: #include Chris@320: Chris@859: #include Chris@859: Chris@350: FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in, Chris@859: const Transform &transform) : Chris@350: ModelTransformer(in, transform), Chris@859: m_plugin(0) Chris@320: { Chris@690: // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl; Chris@350: Chris@849: initialise(); Chris@849: } Chris@849: Chris@849: FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in, Chris@859: const Transforms &transforms) : Chris@849: ModelTransformer(in, transforms), Chris@859: m_plugin(0) Chris@849: { Chris@849: // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl; Chris@849: Chris@849: initialise(); Chris@849: } Chris@849: Chris@849: static bool Chris@849: areTransformsSimilar(const Transform &t1, const Transform &t2) Chris@849: { Chris@849: Transform t2o(t2); Chris@849: t2o.setOutput(t1.getOutput()); Chris@849: return t1 == t2o; Chris@849: } Chris@849: Chris@849: bool Chris@849: FeatureExtractionModelTransformer::initialise() Chris@849: { Chris@849: // All transforms must use the same plugin, parameters, and Chris@849: // inputs: they can differ only in choice of plugin output. So we Chris@849: // initialise based purely on the first transform in the list (but Chris@849: // first check that they are actually similar as promised) Chris@849: Chris@849: for (int j = 1; j < (int)m_transforms.size(); ++j) { Chris@849: if (!areTransformsSimilar(m_transforms[0], m_transforms[j])) { Chris@849: m_message = tr("Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output"); Chris@849: return false; Chris@849: } Chris@849: } Chris@849: Chris@849: Transform primaryTransform = m_transforms[0]; Chris@849: Chris@849: QString pluginId = primaryTransform.getPluginIdentifier(); Chris@320: Chris@320: FeatureExtractionPluginFactory *factory = Chris@320: FeatureExtractionPluginFactory::instanceFor(pluginId); Chris@320: Chris@320: if (!factory) { Chris@361: m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId); Chris@849: return false; Chris@320: } Chris@320: Chris@350: DenseTimeValueModel *input = getConformingInput(); Chris@350: if (!input) { Chris@361: m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId); Chris@849: return false; Chris@350: } Chris@320: Chris@350: m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate()); Chris@320: if (!m_plugin) { Chris@361: m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId); Chris@849: return false; Chris@320: } Chris@320: Chris@350: TransformFactory::getInstance()->makeContextConsistentWithPlugin Chris@849: (primaryTransform, m_plugin); Chris@343: Chris@350: TransformFactory::getInstance()->setPluginParameters Chris@849: (primaryTransform, m_plugin); Chris@320: Chris@320: size_t channelCount = input->getChannelCount(); Chris@320: if (m_plugin->getMaxChannelCount() < channelCount) { Chris@320: channelCount = 1; Chris@320: } Chris@320: if (m_plugin->getMinChannelCount() > channelCount) { Chris@361: m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)") Chris@361: .arg(pluginId) Chris@361: .arg(m_plugin->getMinChannelCount()) Chris@361: .arg(m_plugin->getMaxChannelCount()) Chris@361: .arg(input->getChannelCount()); Chris@849: return false; Chris@320: } Chris@320: Chris@690: SVDEBUG << "Initialising feature extraction plugin with channels = " Chris@849: << channelCount << ", step = " << primaryTransform.getStepSize() Chris@849: << ", block = " << primaryTransform.getBlockSize() << endl; Chris@320: Chris@320: if (!m_plugin->initialise(channelCount, Chris@849: primaryTransform.getStepSize(), Chris@849: primaryTransform.getBlockSize())) { Chris@361: Chris@849: size_t pstep = primaryTransform.getStepSize(); Chris@849: size_t pblock = primaryTransform.getBlockSize(); Chris@361: Chris@850: ///!!! hang on, this isn't right -- we're modifying a copy Chris@849: primaryTransform.setStepSize(0); Chris@849: primaryTransform.setBlockSize(0); Chris@361: TransformFactory::getInstance()->makeContextConsistentWithPlugin Chris@849: (primaryTransform, m_plugin); Chris@361: Chris@849: if (primaryTransform.getStepSize() != pstep || Chris@849: primaryTransform.getBlockSize() != pblock) { Chris@361: Chris@361: if (!m_plugin->initialise(channelCount, Chris@849: primaryTransform.getStepSize(), Chris@849: primaryTransform.getBlockSize())) { Chris@361: Chris@361: m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId); Chris@849: return false; Chris@361: Chris@361: } else { Chris@361: Chris@361: m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead") Chris@361: .arg(pluginId) Chris@361: .arg(pstep) Chris@361: .arg(pblock) Chris@849: .arg(primaryTransform.getStepSize()) Chris@849: .arg(primaryTransform.getBlockSize()); Chris@361: } Chris@361: Chris@361: } else { Chris@361: Chris@361: m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId); Chris@849: return false; Chris@361: } Chris@320: } Chris@320: Chris@849: if (primaryTransform.getPluginVersion() != "") { Chris@366: QString pv = QString("%1").arg(m_plugin->getPluginVersion()); Chris@849: if (pv != primaryTransform.getPluginVersion()) { Chris@366: QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3") Chris@849: .arg(primaryTransform.getPluginVersion()) Chris@366: .arg(pluginId) Chris@366: .arg(pv); Chris@366: if (m_message != "") { Chris@366: m_message = QString("%1; %2").arg(vm).arg(m_message); Chris@366: } else { Chris@366: m_message = vm; Chris@366: } Chris@366: } Chris@366: } Chris@366: Chris@320: Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); Chris@320: Chris@320: if (outputs.empty()) { Chris@361: m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId); Chris@849: return false; Chris@320: } Chris@320: Chris@849: for (int j = 0; j < (int)m_transforms.size(); ++j) { Chris@849: Chris@849: for (int i = 0; i < (int)outputs.size(); ++i) { Chris@849: // SVDEBUG << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput() << "\"" << endl; Chris@849: if (m_transforms[j].getOutput() == "" || Chris@849: outputs[i].identifier == m_transforms[j].getOutput().toStdString()) { Chris@849: m_outputNos.push_back(i); Chris@849: m_descriptors.push_back(new Vamp::Plugin::OutputDescriptor(outputs[i])); Chris@849: m_fixedRateFeatureNos.push_back(-1); // we increment before use Chris@849: break; Chris@849: } Chris@849: } Chris@849: Chris@849: if (m_descriptors.size() <= j) { Chris@849: m_message = tr("Plugin \"%1\" has no output named \"%2\"") Chris@849: .arg(pluginId) Chris@849: .arg(m_transforms[j].getOutput()); Chris@849: return false; Chris@849: } Chris@320: } Chris@320: Chris@849: for (int j = 0; j < (int)m_transforms.size(); ++j) { Chris@876: createOutputModels(j); Chris@849: } Chris@849: Chris@849: return true; Chris@558: } Chris@558: Chris@558: void Chris@876: FeatureExtractionModelTransformer::createOutputModels(int n) Chris@558: { Chris@558: DenseTimeValueModel *input = getConformingInput(); Chris@558: Chris@843: // cerr << "FeatureExtractionModelTransformer::createOutputModel: sample type " << m_descriptor->sampleType << ", rate " << m_descriptor->sampleRate << endl; Chris@712: Chris@849: PluginRDFDescription description(m_transforms[n].getPluginIdentifier()); Chris@849: QString outputId = m_transforms[n].getOutput(); Chris@558: Chris@320: int binCount = 1; Chris@320: float minValue = 0.0, maxValue = 0.0; Chris@320: bool haveExtents = false; Chris@876: bool haveBinCount = m_descriptors[n]->hasFixedBinCount; Chris@876: Chris@876: if (haveBinCount) { Chris@849: binCount = m_descriptors[n]->binCount; Chris@320: } Chris@320: Chris@876: m_needAdditionalModels[n] = false; Chris@876: Chris@843: // cerr << "FeatureExtractionModelTransformer: output bin count " Chris@843: // << binCount << endl; Chris@320: Chris@849: if (binCount > 0 && m_descriptors[n]->hasKnownExtents) { Chris@849: minValue = m_descriptors[n]->minValue; Chris@849: maxValue = m_descriptors[n]->maxValue; Chris@320: haveExtents = true; Chris@320: } Chris@320: Chris@350: size_t modelRate = input->getSampleRate(); Chris@320: size_t modelResolution = 1; Chris@712: Chris@849: if (m_descriptors[n]->sampleType != Chris@785: Vamp::Plugin::OutputDescriptor::OneSamplePerStep) { Chris@849: if (m_descriptors[n]->sampleRate > input->getSampleRate()) { Chris@843: cerr << "WARNING: plugin reports output sample rate as " Chris@849: << m_descriptors[n]->sampleRate << " (can't display features with finer resolution than the input rate of " << input->getSampleRate() << ")" << endl; Chris@785: } Chris@785: } Chris@785: Chris@849: switch (m_descriptors[n]->sampleType) { Chris@320: Chris@320: case Vamp::Plugin::OutputDescriptor::VariableSampleRate: Chris@849: if (m_descriptors[n]->sampleRate != 0.0) { Chris@849: modelResolution = size_t(modelRate / m_descriptors[n]->sampleRate + 0.001); Chris@320: } Chris@320: break; Chris@320: Chris@320: case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: Chris@849: modelResolution = m_transforms[n].getStepSize(); Chris@320: break; Chris@320: Chris@320: case Vamp::Plugin::OutputDescriptor::FixedSampleRate: Chris@451: //!!! SV doesn't actually support display of models that have Chris@451: //!!! different underlying rates together -- so we always set Chris@451: //!!! the model rate to be the input model's rate, and adjust Chris@451: //!!! the resolution appropriately. We can't properly display Chris@451: //!!! data with a higher resolution than the base model at all Chris@849: // modelRate = size_t(m_descriptors[n]->sampleRate + 0.001); Chris@849: if (m_descriptors[n]->sampleRate > input->getSampleRate()) { Chris@451: modelResolution = 1; Chris@451: } else { Chris@451: modelResolution = size_t(input->getSampleRate() / Chris@849: m_descriptors[n]->sampleRate); Chris@451: } Chris@320: break; Chris@320: } Chris@320: Chris@441: bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2); Chris@441: Chris@849: Model *out = 0; Chris@849: Chris@441: if (binCount == 0 && Chris@849: (preDurationPlugin || !m_descriptors[n]->hasDuration)) { Chris@320: Chris@445: // Anything with no value and no duration is an instant Chris@445: Chris@849: out = new SparseOneDimensionalModel(modelRate, modelResolution, false); Chris@558: QString outputEventTypeURI = description.getOutputEventTypeURI(outputId); Chris@849: out->setRDFTypeURI(outputEventTypeURI); Chris@558: Chris@441: } else if ((preDurationPlugin && binCount > 1 && Chris@849: (m_descriptors[n]->sampleType == Chris@441: Vamp::Plugin::OutputDescriptor::VariableSampleRate)) || Chris@849: (!preDurationPlugin && m_descriptors[n]->hasDuration)) { Chris@441: Chris@441: // For plugins using the old v1 API without explicit duration, Chris@441: // we treat anything that has multiple bins (i.e. that has the Chris@441: // potential to have value and duration) and a variable sample Chris@441: // rate as a note model, taking its values as pitch, duration Chris@441: // and velocity (if present) respectively. This is the same Chris@441: // behaviour as always applied by SV to these plugins in the Chris@441: // past. Chris@441: Chris@441: // For plugins with the newer API, we treat anything with Chris@441: // duration as either a note model with pitch and velocity, or Chris@441: // a region model. Chris@441: Chris@441: // How do we know whether it's an interval or note model? Chris@441: // What's the essential difference? Is a note model any Chris@441: // interval model using a Hz or "MIDI pitch" scale? There Chris@441: // isn't really a reliable test for "MIDI pitch"... Does a Chris@441: // note model always have velocity? This is a good question Chris@441: // to be addressed by accompanying RDF, but for the moment we Chris@441: // will do the following... Chris@441: Chris@441: bool isNoteModel = false; Chris@441: Chris@441: // Regions have only value (and duration -- we can't extract a Chris@441: // region model from an old-style plugin that doesn't support Chris@441: // duration) Chris@441: if (binCount > 1) isNoteModel = true; Chris@441: Chris@595: // Regions do not have units of Hz or MIDI things (a sweeping Chris@595: // assumption!) Chris@849: if (m_descriptors[n]->unit == "Hz" || Chris@849: m_descriptors[n]->unit.find("MIDI") != std::string::npos || Chris@849: m_descriptors[n]->unit.find("midi") != std::string::npos) { Chris@595: isNoteModel = true; Chris@595: } Chris@441: Chris@441: // If we had a "sparse 3D model", we would have the additional Chris@441: // problem of determining whether to use that here (if bin Chris@441: // count > 1). But we don't. Chris@441: Chris@859: QSettings settings; Chris@859: settings.beginGroup("Transformer"); Chris@859: bool flexi = settings.value("use-flexi-note-model", false).toBool(); Chris@859: settings.endGroup(); Chris@859: Chris@859: cerr << "flexi = " << flexi << endl; Chris@859: Chris@859: if (isNoteModel && !flexi) { Chris@441: Chris@441: NoteModel *model; Chris@441: if (haveExtents) { Chris@859: model = new NoteModel Chris@859: (modelRate, modelResolution, minValue, maxValue, false); Chris@441: } else { Chris@859: model = new NoteModel Chris@859: (modelRate, modelResolution, false); gyorgyf@786: } Chris@849: model->setScaleUnits(m_descriptors[n]->unit.c_str()); Chris@849: out = model; gyorgyf@786: Chris@859: } else if (isNoteModel && flexi) { gyorgyf@786: gyorgyf@786: FlexiNoteModel *model; gyorgyf@786: if (haveExtents) { Chris@859: model = new FlexiNoteModel Chris@859: (modelRate, modelResolution, minValue, maxValue, false); gyorgyf@786: } else { Chris@859: model = new FlexiNoteModel Chris@859: (modelRate, modelResolution, false); Chris@441: } Chris@849: model->setScaleUnits(m_descriptors[n]->unit.c_str()); Chris@849: out = model; Chris@441: Chris@441: } else { Chris@441: Chris@441: RegionModel *model; Chris@441: if (haveExtents) { Chris@441: model = new RegionModel Chris@441: (modelRate, modelResolution, minValue, maxValue, false); Chris@441: } else { Chris@441: model = new RegionModel Chris@441: (modelRate, modelResolution, false); Chris@441: } Chris@849: model->setScaleUnits(m_descriptors[n]->unit.c_str()); Chris@849: out = model; Chris@441: } Chris@441: Chris@558: QString outputEventTypeURI = description.getOutputEventTypeURI(outputId); Chris@849: out->setRDFTypeURI(outputEventTypeURI); Chris@558: Chris@876: } else if (binCount == 1 || Chris@849: (m_descriptors[n]->sampleType == Chris@441: Vamp::Plugin::OutputDescriptor::VariableSampleRate)) { Chris@441: Chris@441: // Anything that is not a 1D, note, or interval model and that Chris@441: // has only one value per result must be a sparse time value Chris@441: // model. Chris@441: Chris@441: // Anything that is not a 1D, note, or interval model and that Chris@876: // has a variable sample rate is treated as a set of sparse Chris@876: // time value models, one per output bin, because we lack a Chris@441: // sparse 3D model. Chris@320: Chris@876: // Anything that is not a 1D, note, or interval model and that Chris@876: // has a fixed sample rate but an unknown number of values per Chris@876: // result is also treated as a set of sparse time value models. Chris@876: Chris@876: // For sets of sparse time value models, we create a single Chris@876: // model first as the "standard" output and then create models Chris@876: // for bins 1+ in the additional model map (mapping the output Chris@876: // descriptor to a list of models indexed by bin-1). But we Chris@876: // don't create the additional models yet, as this case has to Chris@876: // work even if the number of bins is unknown at this point -- Chris@877: // we create an additional model (copying its parameters from Chris@877: // the default one) each time a new bin is encountered. Chris@876: Chris@876: if (!haveBinCount || binCount > 1) { Chris@876: m_needAdditionalModels[n] = true; Chris@876: } Chris@876: Chris@320: SparseTimeValueModel *model; Chris@320: if (haveExtents) { Chris@320: model = new SparseTimeValueModel Chris@320: (modelRate, modelResolution, minValue, maxValue, false); Chris@320: } else { Chris@320: model = new SparseTimeValueModel Chris@320: (modelRate, modelResolution, false); Chris@320: } Chris@558: Chris@558: Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); Chris@849: model->setScaleUnits(outputs[m_outputNos[n]].unit.c_str()); Chris@320: Chris@849: out = model; Chris@320: Chris@558: QString outputEventTypeURI = description.getOutputEventTypeURI(outputId); Chris@849: out->setRDFTypeURI(outputEventTypeURI); Chris@558: Chris@441: } else { Chris@320: Chris@441: // Anything that is not a 1D, note, or interval model and that Chris@441: // has a fixed sample rate and more than one value per result Chris@441: // must be a dense 3D model. Chris@320: Chris@320: EditableDenseThreeDimensionalModel *model = Chris@320: new EditableDenseThreeDimensionalModel Chris@535: (modelRate, modelResolution, binCount, Chris@535: EditableDenseThreeDimensionalModel::BasicMultirateCompression, Chris@535: false); Chris@320: Chris@849: if (!m_descriptors[n]->binNames.empty()) { Chris@320: std::vector names; Chris@849: for (size_t i = 0; i < m_descriptors[n]->binNames.size(); ++i) { Chris@849: names.push_back(m_descriptors[n]->binNames[i].c_str()); Chris@320: } Chris@320: model->setBinNames(names); Chris@320: } Chris@320: Chris@849: out = model; Chris@558: Chris@558: QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId); Chris@849: out->setRDFTypeURI(outputSignalTypeURI); Chris@320: } Chris@333: Chris@849: if (out) { Chris@849: out->setSourceModel(input); Chris@849: m_outputs.push_back(out); Chris@849: } Chris@320: } Chris@320: Chris@331: FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer() Chris@320: { Chris@690: // SVDEBUG << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << endl; Chris@320: delete m_plugin; Chris@850: for (int j = 0; j < m_descriptors.size(); ++j) { Chris@850: delete m_descriptors[j]; Chris@850: } Chris@320: } Chris@320: Chris@876: FeatureExtractionModelTransformer::Models Chris@876: FeatureExtractionModelTransformer::getAdditionalOutputModels() Chris@876: { Chris@876: Models mm; Chris@876: for (AdditionalModelMap::iterator i = m_additionalModels.begin(); Chris@876: i != m_additionalModels.end(); ++i) { Chris@876: for (std::map::iterator j = Chris@876: i->second.begin(); Chris@876: j != i->second.end(); ++j) { Chris@876: SparseTimeValueModel *m = j->second; Chris@876: if (m) mm.push_back(m); Chris@876: } Chris@876: } Chris@876: return mm; Chris@876: } Chris@876: Chris@877: bool Chris@877: FeatureExtractionModelTransformer::willHaveAdditionalOutputModels() Chris@877: { Chris@877: for (std::map::const_iterator i = Chris@877: m_needAdditionalModels.begin(); Chris@877: i != m_needAdditionalModels.end(); ++i) { Chris@877: if (i->second) return true; Chris@877: } Chris@877: return false; Chris@877: } Chris@877: Chris@876: SparseTimeValueModel * Chris@876: FeatureExtractionModelTransformer::getAdditionalModel(int n, int binNo) Chris@876: { Chris@876: std::cerr << "getAdditionalModel(" << n << ", " << binNo << ")" << std::endl; Chris@876: Chris@876: if (binNo == 0) { Chris@876: std::cerr << "Internal error: binNo == 0 in getAdditionalModel (should be using primary model)" << std::endl; Chris@876: return 0; Chris@876: } Chris@876: Chris@876: if (!m_needAdditionalModels[n]) return 0; Chris@876: if (!isOutput(n)) return 0; Chris@876: if (m_additionalModels[n][binNo]) return m_additionalModels[n][binNo]; Chris@876: Chris@876: std::cerr << "getAdditionalModel(" << n << ", " << binNo << "): creating" << std::endl; Chris@876: Chris@876: SparseTimeValueModel *baseModel = getConformingOutput(n); Chris@876: if (!baseModel) return 0; Chris@876: Chris@876: std::cerr << "getAdditionalModel(" << n << ", " << binNo << "): (from " << baseModel << ")" << std::endl; Chris@876: Chris@876: SparseTimeValueModel *additional = Chris@876: new SparseTimeValueModel(baseModel->getSampleRate(), Chris@876: baseModel->getResolution(), Chris@876: baseModel->getValueMinimum(), Chris@876: baseModel->getValueMaximum(), Chris@876: false); Chris@876: Chris@876: additional->setScaleUnits(baseModel->getScaleUnits()); Chris@876: additional->setRDFTypeURI(baseModel->getRDFTypeURI()); Chris@876: Chris@876: m_additionalModels[n][binNo] = additional; Chris@876: return additional; Chris@876: } Chris@876: Chris@320: DenseTimeValueModel * Chris@350: FeatureExtractionModelTransformer::getConformingInput() Chris@320: { Chris@690: // SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << endl; Chris@408: Chris@320: DenseTimeValueModel *dtvm = Chris@320: dynamic_cast(getInputModel()); Chris@320: if (!dtvm) { Chris@690: SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl; Chris@320: } Chris@320: return dtvm; Chris@320: } Chris@320: Chris@320: void Chris@331: FeatureExtractionModelTransformer::run() Chris@320: { Chris@350: DenseTimeValueModel *input = getConformingInput(); Chris@320: if (!input) return; Chris@320: Chris@849: if (m_outputs.empty()) return; Chris@320: Chris@850: Transform primaryTransform = m_transforms[0]; Chris@850: Chris@497: while (!input->isReady() && !m_abandoned) { Chris@877: cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl; Chris@497: usleep(500000); Chris@320: } Chris@497: if (m_abandoned) return; Chris@320: Chris@350: size_t sampleRate = input->getSampleRate(); Chris@320: Chris@320: size_t channelCount = input->getChannelCount(); Chris@320: if (m_plugin->getMaxChannelCount() < channelCount) { Chris@320: channelCount = 1; Chris@320: } Chris@320: Chris@320: float **buffers = new float*[channelCount]; Chris@320: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@850: buffers[ch] = new float[primaryTransform.getBlockSize() + 2]; Chris@320: } Chris@320: Chris@850: size_t stepSize = primaryTransform.getStepSize(); Chris@850: size_t blockSize = primaryTransform.getBlockSize(); Chris@350: Chris@320: bool frequencyDomain = (m_plugin->getInputDomain() == Chris@320: Vamp::Plugin::FrequencyDomain); Chris@320: std::vector fftModels; Chris@320: Chris@320: if (frequencyDomain) { Chris@320: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@320: FFTModel *model = new FFTModel Chris@350: (getConformingInput(), Chris@350: channelCount == 1 ? m_input.getChannel() : ch, Chris@850: primaryTransform.getWindowType(), Chris@350: blockSize, Chris@350: stepSize, Chris@350: blockSize, Chris@334: false, Chris@334: StorageAdviser::PrecisionCritical); Chris@320: if (!model->isOK()) { Chris@320: delete model; Chris@850: for (int j = 0; j < (int)m_outputNos.size(); ++j) { Chris@850: setCompletion(j, 100); Chris@850: } Chris@387: //!!! need a better way to handle this -- previously we were using a QMessageBox but that isn't an appropriate thing to do here either Chris@387: throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer"); Chris@320: } Chris@320: model->resume(); Chris@320: fftModels.push_back(model); Chris@320: } Chris@320: } Chris@320: Chris@350: long startFrame = m_input.getModel()->getStartFrame(); Chris@350: long endFrame = m_input.getModel()->getEndFrame(); Chris@320: Chris@850: RealTime contextStartRT = primaryTransform.getStartTime(); Chris@850: RealTime contextDurationRT = primaryTransform.getDuration(); Chris@350: Chris@350: long contextStart = Chris@350: RealTime::realTime2Frame(contextStartRT, sampleRate); Chris@350: Chris@350: long contextDuration = Chris@350: RealTime::realTime2Frame(contextDurationRT, sampleRate); Chris@320: Chris@320: if (contextStart == 0 || contextStart < startFrame) { Chris@320: contextStart = startFrame; Chris@320: } Chris@320: Chris@320: if (contextDuration == 0) { Chris@320: contextDuration = endFrame - contextStart; Chris@320: } Chris@320: if (contextStart + contextDuration > endFrame) { Chris@320: contextDuration = endFrame - contextStart; Chris@320: } Chris@320: Chris@320: long blockFrame = contextStart; Chris@320: Chris@320: long prevCompletion = 0; Chris@320: Chris@850: for (int j = 0; j < (int)m_outputNos.size(); ++j) { Chris@850: setCompletion(j, 0); Chris@850: } Chris@320: Chris@556: float *reals = 0; Chris@556: float *imaginaries = 0; Chris@556: if (frequencyDomain) { Chris@556: reals = new float[blockSize/2 + 1]; Chris@556: imaginaries = new float[blockSize/2 + 1]; Chris@556: } Chris@556: Chris@678: QString error = ""; Chris@678: Chris@320: while (!m_abandoned) { Chris@320: Chris@320: if (frequencyDomain) { Chris@350: if (blockFrame - int(blockSize)/2 > Chris@320: contextStart + contextDuration) break; Chris@320: } else { Chris@320: if (blockFrame >= Chris@320: contextStart + contextDuration) break; Chris@320: } Chris@320: Chris@690: // SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame " Chris@320: // << blockFrame << ", endFrame " << endFrame << ", blockSize " Chris@687: // << blockSize << endl; Chris@320: Chris@320: long completion = Chris@350: (((blockFrame - contextStart) / stepSize) * 99) / Chris@557: (contextDuration / stepSize + 1); Chris@320: Chris@350: // channelCount is either m_input.getModel()->channelCount or 1 Chris@320: Chris@363: if (frequencyDomain) { Chris@363: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@350: int column = (blockFrame - startFrame) / stepSize; Chris@556: fftModels[ch]->getValuesAt(column, reals, imaginaries); Chris@350: for (size_t i = 0; i <= blockSize/2; ++i) { Chris@556: buffers[ch][i*2] = reals[i]; Chris@556: buffers[ch][i*2+1] = imaginaries[i]; Chris@320: } Chris@678: error = fftModels[ch]->getError(); Chris@678: if (error != "") { Chris@843: cerr << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl; Chris@678: m_abandoned = true; Chris@678: m_message = error; Chris@678: } Chris@363: } Chris@363: } else { Chris@363: getFrames(channelCount, blockFrame, blockSize, buffers); Chris@320: } Chris@320: Chris@497: if (m_abandoned) break; Chris@497: Chris@320: Vamp::Plugin::FeatureSet features = m_plugin->process Chris@320: (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); Chris@320: Chris@497: if (m_abandoned) break; Chris@497: Chris@850: for (int j = 0; j < (int)m_outputNos.size(); ++j) { Chris@850: for (size_t fi = 0; fi < features[m_outputNos[j]].size(); ++fi) { Chris@850: Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi]; Chris@850: addFeature(j, blockFrame, feature); Chris@850: } Chris@850: } Chris@320: Chris@320: if (blockFrame == contextStart || completion > prevCompletion) { Chris@850: for (int j = 0; j < (int)m_outputNos.size(); ++j) { Chris@850: setCompletion(j, completion); Chris@850: } Chris@320: prevCompletion = completion; Chris@320: } Chris@320: Chris@350: blockFrame += stepSize; Chris@320: } Chris@320: Chris@497: if (!m_abandoned) { Chris@497: Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); Chris@320: Chris@850: for (int j = 0; j < (int)m_outputNos.size(); ++j) { Chris@850: for (size_t fi = 0; fi < features[m_outputNos[j]].size(); ++fi) { Chris@850: Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi]; Chris@850: addFeature(j, blockFrame, feature); Chris@850: } Chris@497: } Chris@497: } Chris@320: Chris@850: for (int j = 0; j < (int)m_outputNos.size(); ++j) { Chris@850: setCompletion(j, 100); Chris@850: } Chris@320: Chris@320: if (frequencyDomain) { Chris@320: for (size_t ch = 0; ch < channelCount; ++ch) { Chris@320: delete fftModels[ch]; Chris@320: } Chris@556: delete[] reals; Chris@556: delete[] imaginaries; Chris@320: } Chris@320: } Chris@320: Chris@320: void Chris@363: FeatureExtractionModelTransformer::getFrames(int channelCount, Chris@363: long startFrame, long size, Chris@363: float **buffers) Chris@320: { Chris@320: long offset = 0; Chris@320: Chris@320: if (startFrame < 0) { Chris@363: for (int c = 0; c < channelCount; ++c) { Chris@363: for (int i = 0; i < size && startFrame + i < 0; ++i) { Chris@363: buffers[c][i] = 0.0f; Chris@363: } Chris@320: } Chris@320: offset = -startFrame; Chris@320: size -= offset; Chris@320: if (size <= 0) return; Chris@320: startFrame = 0; Chris@320: } Chris@320: Chris@350: DenseTimeValueModel *input = getConformingInput(); Chris@350: if (!input) return; Chris@363: Chris@363: long got = 0; Chris@350: Chris@363: if (channelCount == 1) { Chris@363: Chris@363: got = input->getData(m_input.getChannel(), startFrame, size, Chris@363: buffers[0] + offset); Chris@363: Chris@363: if (m_input.getChannel() == -1 && input->getChannelCount() > 1) { Chris@363: // use mean instead of sum, as plugin input Chris@363: float cc = float(input->getChannelCount()); Chris@363: for (long i = 0; i < size; ++i) { Chris@363: buffers[0][i + offset] /= cc; Chris@363: } Chris@363: } Chris@363: Chris@363: } else { Chris@363: Chris@363: float **writebuf = buffers; Chris@363: if (offset > 0) { Chris@363: writebuf = new float *[channelCount]; Chris@363: for (int i = 0; i < channelCount; ++i) { Chris@363: writebuf[i] = buffers[i] + offset; Chris@363: } Chris@363: } Chris@363: Chris@363: got = input->getData(0, channelCount-1, startFrame, size, writebuf); Chris@363: Chris@363: if (writebuf != buffers) delete[] writebuf; Chris@363: } Chris@320: Chris@320: while (got < size) { Chris@363: for (int c = 0; c < channelCount; ++c) { Chris@363: buffers[c][got + offset] = 0.0; Chris@363: } Chris@320: ++got; Chris@320: } Chris@320: } Chris@320: Chris@320: void Chris@850: FeatureExtractionModelTransformer::addFeature(int n, Chris@850: size_t blockFrame, Chris@850: const Vamp::Plugin::Feature &feature) Chris@320: { Chris@350: size_t inputRate = m_input.getModel()->getSampleRate(); Chris@320: Chris@843: // cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = " Chris@712: // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp Chris@712: // << ", timestamp = " << feature.timestamp << ", hasDuration = " Chris@712: // << feature.hasDuration << ", duration = " << feature.duration Chris@843: // << endl; Chris@320: Chris@320: int binCount = 1; Chris@849: if (m_descriptors[n]->hasFixedBinCount) { Chris@849: binCount = m_descriptors[n]->binCount; Chris@320: } Chris@320: Chris@320: size_t frame = blockFrame; Chris@320: Chris@849: if (m_descriptors[n]->sampleType == Chris@320: Vamp::Plugin::OutputDescriptor::VariableSampleRate) { Chris@320: Chris@320: if (!feature.hasTimestamp) { Chris@843: cerr Chris@331: << "WARNING: FeatureExtractionModelTransformer::addFeature: " Chris@320: << "Feature has variable sample rate but no timestamp!" Chris@843: << endl; Chris@320: return; Chris@320: } else { Chris@320: frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); Chris@320: } Chris@320: Chris@849: } else if (m_descriptors[n]->sampleType == Chris@320: Vamp::Plugin::OutputDescriptor::FixedSampleRate) { Chris@320: Chris@779: if (!feature.hasTimestamp) { Chris@849: ++m_fixedRateFeatureNos[n]; Chris@779: } else { Chris@779: RealTime ts(feature.timestamp.sec, feature.timestamp.nsec); Chris@849: m_fixedRateFeatureNos[n] = Chris@849: lrint(ts.toDouble() * m_descriptors[n]->sampleRate); Chris@779: } Chris@779: Chris@849: frame = lrintf((m_fixedRateFeatureNos[n] / m_descriptors[n]->sampleRate) Chris@779: * inputRate); Chris@320: } Chris@320: Chris@441: // Rather than repeat the complicated tests from the constructor Chris@441: // to determine what sort of model we must be adding the features Chris@441: // to, we instead test what sort of model the constructor decided Chris@441: // to create. Chris@320: Chris@849: if (isOutput(n)) { Chris@441: Chris@441: SparseOneDimensionalModel *model = Chris@849: getConformingOutput(n); Chris@320: if (!model) return; Chris@350: Chris@441: model->addPoint(SparseOneDimensionalModel::Point Chris@441: (frame, feature.label.c_str())); Chris@320: Chris@849: } else if (isOutput(n)) { Chris@320: Chris@350: SparseTimeValueModel *model = Chris@849: getConformingOutput(n); Chris@320: if (!model) return; Chris@350: Chris@454: for (int i = 0; i < feature.values.size(); ++i) { Chris@454: Chris@454: float value = feature.values[i]; Chris@454: Chris@454: QString label = feature.label.c_str(); Chris@454: if (feature.values.size() > 1) { Chris@454: label = QString("[%1] %2").arg(i+1).arg(label); Chris@454: } Chris@454: Chris@876: SparseTimeValueModel *targetModel = model; Chris@876: Chris@876: if (m_needAdditionalModels[n] && i > 0) { Chris@876: targetModel = getAdditionalModel(n, i); Chris@876: if (!targetModel) targetModel = model; Chris@876: std::cerr << "adding point to model " << targetModel Chris@876: << " for output " << n << " bin " << i << std::endl; Chris@876: } Chris@876: Chris@876: targetModel->addPoint Chris@876: (SparseTimeValueModel::Point(frame, value, label)); Chris@454: } Chris@320: Chris@849: } else if (isOutput(n) || isOutput(n) || isOutput(n)) { //GF: Added Note Model Chris@320: Chris@441: int index = 0; Chris@441: Chris@441: float value = 0.0; Chris@441: if (feature.values.size() > index) { Chris@441: value = feature.values[index++]; Chris@441: } Chris@320: Chris@320: float duration = 1; Chris@441: if (feature.hasDuration) { Chris@441: duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate); Chris@441: } else { Chris@441: if (feature.values.size() > index) { Chris@441: duration = feature.values[index++]; Chris@441: } Chris@441: } gyorgyf@786: Chris@849: if (isOutput(n)) { // GF: added for flexi note model gyorgyf@786: gyorgyf@786: float velocity = 100; gyorgyf@786: if (feature.values.size() > index) { gyorgyf@786: velocity = feature.values[index++]; gyorgyf@786: } gyorgyf@786: if (velocity < 0) velocity = 127; gyorgyf@786: if (velocity > 127) velocity = 127; gyorgyf@786: Chris@849: FlexiNoteModel *model = getConformingOutput(n); gyorgyf@786: if (!model) return; gyorgyf@786: model->addPoint(FlexiNoteModel::Point(frame, value, // value is pitch gyorgyf@786: lrintf(duration), gyorgyf@786: velocity / 127.f, gyorgyf@786: feature.label.c_str())); gyorgyf@786: // GF: end -- added for flexi note model Chris@849: } else if (isOutput(n)) { Chris@320: Chris@441: float velocity = 100; Chris@441: if (feature.values.size() > index) { Chris@441: velocity = feature.values[index++]; Chris@441: } Chris@441: if (velocity < 0) velocity = 127; Chris@441: if (velocity > 127) velocity = 127; Chris@320: Chris@849: NoteModel *model = getConformingOutput(n); Chris@441: if (!model) return; Chris@441: model->addPoint(NoteModel::Point(frame, value, // value is pitch Chris@441: lrintf(duration), Chris@441: velocity / 127.f, Chris@441: feature.label.c_str())); Chris@441: } else { gyorgyf@786: Chris@849: RegionModel *model = getConformingOutput(n); Chris@454: if (!model) return; Chris@454: Chris@474: if (feature.hasDuration && !feature.values.empty()) { Chris@454: Chris@454: for (int i = 0; i < feature.values.size(); ++i) { Chris@454: Chris@454: float value = feature.values[i]; Chris@454: Chris@454: QString label = feature.label.c_str(); Chris@454: if (feature.values.size() > 1) { Chris@454: label = QString("[%1] %2").arg(i+1).arg(label); Chris@454: } Chris@454: Chris@454: model->addPoint(RegionModel::Point(frame, value, Chris@454: lrintf(duration), Chris@454: label)); Chris@454: } Chris@454: } else { Chris@454: Chris@441: model->addPoint(RegionModel::Point(frame, value, Chris@441: lrintf(duration), Chris@441: feature.label.c_str())); Chris@454: } Chris@441: } Chris@320: Chris@849: } else if (isOutput(n)) { Chris@320: Chris@533: DenseThreeDimensionalModel::Column values = Chris@533: DenseThreeDimensionalModel::Column::fromStdVector(feature.values); Chris@320: Chris@320: EditableDenseThreeDimensionalModel *model = Chris@849: getConformingOutput(n); Chris@320: if (!model) return; Chris@320: Chris@320: model->setColumn(frame / model->getResolution(), values); Chris@441: Chris@441: } else { Chris@690: SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl; Chris@320: } Chris@320: } Chris@320: Chris@320: void Chris@850: FeatureExtractionModelTransformer::setCompletion(int n, int completion) Chris@320: { Chris@690: // SVDEBUG << "FeatureExtractionModelTransformer::setCompletion(" Chris@687: // << completion << ")" << endl; Chris@320: Chris@849: if (isOutput(n)) { Chris@320: Chris@350: SparseOneDimensionalModel *model = Chris@849: getConformingOutput(n); Chris@320: if (!model) return; Chris@441: model->setCompletion(completion, true); Chris@320: Chris@849: } else if (isOutput(n)) { Chris@320: Chris@350: SparseTimeValueModel *model = Chris@849: getConformingOutput(n); Chris@320: if (!model) return; Chris@441: model->setCompletion(completion, true); Chris@320: Chris@849: } else if (isOutput(n)) { Chris@320: Chris@849: NoteModel *model = getConformingOutput(n); Chris@320: if (!model) return; Chris@441: model->setCompletion(completion, true); gyorgyf@786: Chris@849: } else if (isOutput(n)) { gyorgyf@786: Chris@849: FlexiNoteModel *model = getConformingOutput(n); gyorgyf@786: if (!model) return; gyorgyf@786: model->setCompletion(completion, true); Chris@320: Chris@849: } else if (isOutput(n)) { Chris@441: Chris@849: RegionModel *model = getConformingOutput(n); Chris@441: if (!model) return; Chris@441: model->setCompletion(completion, true); Chris@441: Chris@849: } else if (isOutput(n)) { Chris@320: Chris@320: EditableDenseThreeDimensionalModel *model = Chris@849: getConformingOutput(n); Chris@320: if (!model) return; Chris@350: model->setCompletion(completion, true); //!!!m_context.updates); Chris@320: } Chris@320: } Chris@320: