svcore: transform/FeatureExtractionModelTransformer.cpp annotate

annotate transform/FeatureExtractionModelTransformer.cpp @ 875:3e6ed8a8577b tonioni

Use a sparse time-value model only for outputs with fixed bin count of 1, not for those with unknown bin count. (Precursor to using more than one model for outputs with unknown bin count)

author	Chris Cannam
date	Tue, 28 Jan 2014 18:52:22 +0000
parents	13803edd513d
children	47aa3aeb687b

rev	line source
Chris@320	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
Chris@320	2
Chris@320	3 /*
Chris@320	4 Sonic Visualiser
Chris@320	5 An audio file viewer and annotation editor.
Chris@320	6 Centre for Digital Music, Queen Mary, University of London.
Chris@320	7 This file copyright 2006 Chris Cannam and QMUL.
Chris@320	8
Chris@320	9 This program is free software; you can redistribute it and/or
Chris@320	10 modify it under the terms of the GNU General Public License as
Chris@320	11 published by the Free Software Foundation; either version 2 of the
Chris@320	12 License, or (at your option) any later version. See the file
Chris@320	13 COPYING included with this distribution for more information.
Chris@320	14 */
Chris@320	15
Chris@331	16 #include "FeatureExtractionModelTransformer.h"
Chris@320	17
Chris@320	18 #include "plugin/FeatureExtractionPluginFactory.h"
Chris@320	19 #include "plugin/PluginXml.h"
Chris@475	20 #include <vamp-hostsdk/Plugin.h>
Chris@320	21
Chris@320	22 #include "data/model/Model.h"
Chris@320	23 #include "base/Window.h"
Chris@387	24 #include "base/Exceptions.h"
Chris@320	25 #include "data/model/SparseOneDimensionalModel.h"
Chris@320	26 #include "data/model/SparseTimeValueModel.h"
Chris@320	27 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@320	28 #include "data/model/DenseTimeValueModel.h"
Chris@320	29 #include "data/model/NoteModel.h"
gyorgyf@786	30 #include "data/model/FlexiNoteModel.h"
Chris@441	31 #include "data/model/RegionModel.h"
Chris@320	32 #include "data/model/FFTModel.h"
Chris@320	33 #include "data/model/WaveFileModel.h"
Chris@558	34 #include "rdf/PluginRDFDescription.h"
Chris@320	35
Chris@350	36 #include "TransformFactory.h"
Chris@350	37
Chris@320	38 #include <iostream>
Chris@320	39
Chris@859	40 #include <QSettings>
Chris@859	41
Chris@350	42 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@859	43 const Transform &transform) :
Chris@350	44 ModelTransformer(in, transform),
Chris@859	45 m_plugin(0)
Chris@320	46 {
Chris@690	47 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
Chris@350	48
Chris@849	49 initialise();
Chris@849	50 }
Chris@849	51
Chris@849	52 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
Chris@859	53 const Transforms &transforms) :
Chris@849	54 ModelTransformer(in, transforms),
Chris@859	55 m_plugin(0)
Chris@849	56 {
Chris@849	57 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
Chris@849	58
Chris@849	59 initialise();
Chris@849	60 }
Chris@849	61
Chris@849	62 static bool
Chris@849	63 areTransformsSimilar(const Transform &t1, const Transform &t2)
Chris@849	64 {
Chris@849	65 Transform t2o(t2);
Chris@849	66 t2o.setOutput(t1.getOutput());
Chris@849	67 return t1 == t2o;
Chris@849	68 }
Chris@849	69
Chris@849	70 bool
Chris@849	71 FeatureExtractionModelTransformer::initialise()
Chris@849	72 {
Chris@849	73 // All transforms must use the same plugin, parameters, and
Chris@849	74 // inputs: they can differ only in choice of plugin output. So we
Chris@849	75 // initialise based purely on the first transform in the list (but
Chris@849	76 // first check that they are actually similar as promised)
Chris@849	77
Chris@849	78 for (int j = 1; j < (int)m_transforms.size(); ++j) {
Chris@849	79 if (!areTransformsSimilar(m_transforms[0], m_transforms[j])) {
Chris@849	80 m_message = tr("Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output");
Chris@849	81 return false;
Chris@849	82 }
Chris@849	83 }
Chris@849	84
Chris@849	85 Transform primaryTransform = m_transforms[0];
Chris@849	86
Chris@849	87 QString pluginId = primaryTransform.getPluginIdentifier();
Chris@320	88
Chris@320	89 FeatureExtractionPluginFactory *factory =
Chris@320	90 FeatureExtractionPluginFactory::instanceFor(pluginId);
Chris@320	91
Chris@320	92 if (!factory) {
Chris@361	93 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
Chris@849	94 return false;
Chris@320	95 }
Chris@320	96
Chris@350	97 DenseTimeValueModel *input = getConformingInput();
Chris@350	98 if (!input) {
Chris@361	99 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
Chris@849	100 return false;
Chris@350	101 }
Chris@320	102
Chris@350	103 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
Chris@320	104 if (!m_plugin) {
Chris@361	105 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
Chris@849	106 return false;
Chris@320	107 }
Chris@320	108
Chris@350	109 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@849	110 (primaryTransform, m_plugin);
Chris@343	111
Chris@350	112 TransformFactory::getInstance()->setPluginParameters
Chris@849	113 (primaryTransform, m_plugin);
Chris@320	114
Chris@320	115 size_t channelCount = input->getChannelCount();
Chris@320	116 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320	117 channelCount = 1;
Chris@320	118 }
Chris@320	119 if (m_plugin->getMinChannelCount() > channelCount) {
Chris@361	120 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
Chris@361	121 .arg(pluginId)
Chris@361	122 .arg(m_plugin->getMinChannelCount())
Chris@361	123 .arg(m_plugin->getMaxChannelCount())
Chris@361	124 .arg(input->getChannelCount());
Chris@849	125 return false;
Chris@320	126 }
Chris@320	127
Chris@690	128 SVDEBUG << "Initialising feature extraction plugin with channels = "
Chris@849	129 << channelCount << ", step = " << primaryTransform.getStepSize()
Chris@849	130 << ", block = " << primaryTransform.getBlockSize() << endl;
Chris@320	131
Chris@320	132 if (!m_plugin->initialise(channelCount,
Chris@849	133 primaryTransform.getStepSize(),
Chris@849	134 primaryTransform.getBlockSize())) {
Chris@361	135
Chris@849	136 size_t pstep = primaryTransform.getStepSize();
Chris@849	137 size_t pblock = primaryTransform.getBlockSize();
Chris@361	138
Chris@850	139 ///!!! hang on, this isn't right -- we're modifying a copy
Chris@849	140 primaryTransform.setStepSize(0);
Chris@849	141 primaryTransform.setBlockSize(0);
Chris@361	142 TransformFactory::getInstance()->makeContextConsistentWithPlugin
Chris@849	143 (primaryTransform, m_plugin);
Chris@361	144
Chris@849	145 if (primaryTransform.getStepSize() != pstep \|\|
Chris@849	146 primaryTransform.getBlockSize() != pblock) {
Chris@361	147
Chris@361	148 if (!m_plugin->initialise(channelCount,
Chris@849	149 primaryTransform.getStepSize(),
Chris@849	150 primaryTransform.getBlockSize())) {
Chris@361	151
Chris@361	152 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@849	153 return false;
Chris@361	154
Chris@361	155 } else {
Chris@361	156
Chris@361	157 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
Chris@361	158 .arg(pluginId)
Chris@361	159 .arg(pstep)
Chris@361	160 .arg(pblock)
Chris@849	161 .arg(primaryTransform.getStepSize())
Chris@849	162 .arg(primaryTransform.getBlockSize());
Chris@361	163 }
Chris@361	164
Chris@361	165 } else {
Chris@361	166
Chris@361	167 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
Chris@849	168 return false;
Chris@361	169 }
Chris@320	170 }
Chris@320	171
Chris@849	172 if (primaryTransform.getPluginVersion() != "") {
Chris@366	173 QString pv = QString("%1").arg(m_plugin->getPluginVersion());
Chris@849	174 if (pv != primaryTransform.getPluginVersion()) {
Chris@366	175 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
Chris@849	176 .arg(primaryTransform.getPluginVersion())
Chris@366	177 .arg(pluginId)
Chris@366	178 .arg(pv);
Chris@366	179 if (m_message != "") {
Chris@366	180 m_message = QString("%1; %2").arg(vm).arg(m_message);
Chris@366	181 } else {
Chris@366	182 m_message = vm;
Chris@366	183 }
Chris@366	184 }
Chris@366	185 }
Chris@366	186
Chris@320	187 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@320	188
Chris@320	189 if (outputs.empty()) {
Chris@361	190 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
Chris@849	191 return false;
Chris@320	192 }
Chris@320	193
Chris@849	194 for (int j = 0; j < (int)m_transforms.size(); ++j) {
Chris@849	195
Chris@849	196 for (int i = 0; i < (int)outputs.size(); ++i) {
Chris@849	197 // SVDEBUG << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput() << "\"" << endl;
Chris@849	198 if (m_transforms[j].getOutput() == "" \|\|
Chris@849	199 outputs[i].identifier == m_transforms[j].getOutput().toStdString()) {
Chris@849	200 m_outputNos.push_back(i);
Chris@849	201 m_descriptors.push_back(new Vamp::Plugin::OutputDescriptor(outputs[i]));
Chris@849	202 m_fixedRateFeatureNos.push_back(-1); // we increment before use
Chris@849	203 break;
Chris@849	204 }
Chris@849	205 }
Chris@849	206
Chris@849	207 if (m_descriptors.size() <= j) {
Chris@849	208 m_message = tr("Plugin \"%1\" has no output named \"%2\"")
Chris@849	209 .arg(pluginId)
Chris@849	210 .arg(m_transforms[j].getOutput());
Chris@849	211 return false;
Chris@849	212 }
Chris@320	213 }
Chris@320	214
Chris@849	215 for (int j = 0; j < (int)m_transforms.size(); ++j) {
Chris@849	216 createOutputModel(j);
Chris@849	217 }
Chris@849	218
Chris@849	219 return true;
Chris@558	220 }
Chris@558	221
Chris@558	222 void
Chris@849	223 FeatureExtractionModelTransformer::createOutputModel(int n)
Chris@558	224 {
Chris@558	225 DenseTimeValueModel *input = getConformingInput();
Chris@558	226
Chris@843	227 // cerr << "FeatureExtractionModelTransformer::createOutputModel: sample type " << m_descriptor->sampleType << ", rate " << m_descriptor->sampleRate << endl;
Chris@712	228
Chris@849	229 PluginRDFDescription description(m_transforms[n].getPluginIdentifier());
Chris@849	230 QString outputId = m_transforms[n].getOutput();
Chris@558	231
Chris@320	232 int binCount = 1;
Chris@320	233 float minValue = 0.0, maxValue = 0.0;
Chris@320	234 bool haveExtents = false;
Chris@320	235
Chris@849	236 if (m_descriptors[n]->hasFixedBinCount) {
Chris@849	237 binCount = m_descriptors[n]->binCount;
Chris@320	238 }
Chris@320	239
Chris@843	240 // cerr << "FeatureExtractionModelTransformer: output bin count "
Chris@843	241 // << binCount << endl;
Chris@320	242
Chris@849	243 if (binCount > 0 && m_descriptors[n]->hasKnownExtents) {
Chris@849	244 minValue = m_descriptors[n]->minValue;
Chris@849	245 maxValue = m_descriptors[n]->maxValue;
Chris@320	246 haveExtents = true;
Chris@320	247 }
Chris@320	248
Chris@350	249 size_t modelRate = input->getSampleRate();
Chris@320	250 size_t modelResolution = 1;
Chris@712	251
Chris@849	252 if (m_descriptors[n]->sampleType !=
Chris@785	253 Vamp::Plugin::OutputDescriptor::OneSamplePerStep) {
Chris@849	254 if (m_descriptors[n]->sampleRate > input->getSampleRate()) {
Chris@843	255 cerr << "WARNING: plugin reports output sample rate as "
Chris@849	256 << m_descriptors[n]->sampleRate << " (can't display features with finer resolution than the input rate of " << input->getSampleRate() << ")" << endl;
Chris@785	257 }
Chris@785	258 }
Chris@785	259
Chris@849	260 switch (m_descriptors[n]->sampleType) {
Chris@320	261
Chris@320	262 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
Chris@849	263 if (m_descriptors[n]->sampleRate != 0.0) {
Chris@849	264 modelResolution = size_t(modelRate / m_descriptors[n]->sampleRate + 0.001);
Chris@320	265 }
Chris@320	266 break;
Chris@320	267
Chris@320	268 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
Chris@849	269 modelResolution = m_transforms[n].getStepSize();
Chris@320	270 break;
Chris@320	271
Chris@320	272 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
Chris@451	273 //!!! SV doesn't actually support display of models that have
Chris@451	274 //!!! different underlying rates together -- so we always set
Chris@451	275 //!!! the model rate to be the input model's rate, and adjust
Chris@451	276 //!!! the resolution appropriately. We can't properly display
Chris@451	277 //!!! data with a higher resolution than the base model at all
Chris@849	278 // modelRate = size_t(m_descriptors[n]->sampleRate + 0.001);
Chris@849	279 if (m_descriptors[n]->sampleRate > input->getSampleRate()) {
Chris@451	280 modelResolution = 1;
Chris@451	281 } else {
Chris@451	282 modelResolution = size_t(input->getSampleRate() /
Chris@849	283 m_descriptors[n]->sampleRate);
Chris@451	284 }
Chris@320	285 break;
Chris@320	286 }
Chris@320	287
Chris@441	288 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
Chris@441	289
Chris@849	290 Model *out = 0;
Chris@849	291
Chris@441	292 if (binCount == 0 &&
Chris@849	293 (preDurationPlugin \|\| !m_descriptors[n]->hasDuration)) {
Chris@320	294
Chris@445	295 // Anything with no value and no duration is an instant
Chris@445	296
Chris@849	297 out = new SparseOneDimensionalModel(modelRate, modelResolution, false);
Chris@558	298 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@849	299 out->setRDFTypeURI(outputEventTypeURI);
Chris@558	300
Chris@441	301 } else if ((preDurationPlugin && binCount > 1 &&
Chris@849	302 (m_descriptors[n]->sampleType ==
Chris@441	303 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) \|\|
Chris@849	304 (!preDurationPlugin && m_descriptors[n]->hasDuration)) {
Chris@441	305
Chris@441	306 // For plugins using the old v1 API without explicit duration,
Chris@441	307 // we treat anything that has multiple bins (i.e. that has the
Chris@441	308 // potential to have value and duration) and a variable sample
Chris@441	309 // rate as a note model, taking its values as pitch, duration
Chris@441	310 // and velocity (if present) respectively. This is the same
Chris@441	311 // behaviour as always applied by SV to these plugins in the
Chris@441	312 // past.
Chris@441	313
Chris@441	314 // For plugins with the newer API, we treat anything with
Chris@441	315 // duration as either a note model with pitch and velocity, or
Chris@441	316 // a region model.
Chris@441	317
Chris@441	318 // How do we know whether it's an interval or note model?
Chris@441	319 // What's the essential difference? Is a note model any
Chris@441	320 // interval model using a Hz or "MIDI pitch" scale? There
Chris@441	321 // isn't really a reliable test for "MIDI pitch"... Does a
Chris@441	322 // note model always have velocity? This is a good question
Chris@441	323 // to be addressed by accompanying RDF, but for the moment we
Chris@441	324 // will do the following...
Chris@441	325
Chris@441	326 bool isNoteModel = false;
Chris@441	327
Chris@441	328 // Regions have only value (and duration -- we can't extract a
Chris@441	329 // region model from an old-style plugin that doesn't support
Chris@441	330 // duration)
Chris@441	331 if (binCount > 1) isNoteModel = true;
Chris@441	332
Chris@595	333 // Regions do not have units of Hz or MIDI things (a sweeping
Chris@595	334 // assumption!)
Chris@849	335 if (m_descriptors[n]->unit == "Hz" \|\|
Chris@849	336 m_descriptors[n]->unit.find("MIDI") != std::string::npos \|\|
Chris@849	337 m_descriptors[n]->unit.find("midi") != std::string::npos) {
Chris@595	338 isNoteModel = true;
Chris@595	339 }
Chris@441	340
Chris@441	341 // If we had a "sparse 3D model", we would have the additional
Chris@441	342 // problem of determining whether to use that here (if bin
Chris@441	343 // count > 1). But we don't.
Chris@441	344
Chris@859	345 QSettings settings;
Chris@859	346 settings.beginGroup("Transformer");
Chris@859	347 bool flexi = settings.value("use-flexi-note-model", false).toBool();
Chris@859	348 settings.endGroup();
Chris@859	349
Chris@859	350 cerr << "flexi = " << flexi << endl;
Chris@859	351
Chris@859	352 if (isNoteModel && !flexi) {
Chris@441	353
Chris@441	354 NoteModel *model;
Chris@441	355 if (haveExtents) {
Chris@859	356 model = new NoteModel
Chris@859	357 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441	358 } else {
Chris@859	359 model = new NoteModel
Chris@859	360 (modelRate, modelResolution, false);
gyorgyf@786	361 }
Chris@849	362 model->setScaleUnits(m_descriptors[n]->unit.c_str());
Chris@849	363 out = model;
gyorgyf@786	364
Chris@859	365 } else if (isNoteModel && flexi) {
gyorgyf@786	366
gyorgyf@786	367 FlexiNoteModel *model;
gyorgyf@786	368 if (haveExtents) {
Chris@859	369 model = new FlexiNoteModel
Chris@859	370 (modelRate, modelResolution, minValue, maxValue, false);
gyorgyf@786	371 } else {
Chris@859	372 model = new FlexiNoteModel
Chris@859	373 (modelRate, modelResolution, false);
Chris@441	374 }
Chris@849	375 model->setScaleUnits(m_descriptors[n]->unit.c_str());
Chris@849	376 out = model;
Chris@441	377
Chris@441	378 } else {
Chris@441	379
Chris@441	380 RegionModel *model;
Chris@441	381 if (haveExtents) {
Chris@441	382 model = new RegionModel
Chris@441	383 (modelRate, modelResolution, minValue, maxValue, false);
Chris@441	384 } else {
Chris@441	385 model = new RegionModel
Chris@441	386 (modelRate, modelResolution, false);
Chris@441	387 }
Chris@849	388 model->setScaleUnits(m_descriptors[n]->unit.c_str());
Chris@849	389 out = model;
Chris@441	390 }
Chris@441	391
Chris@558	392 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@849	393 out->setRDFTypeURI(outputEventTypeURI);
Chris@558	394
Chris@875	395 } else if ((binCount == 1 && m_descriptors[n]->hasFixedBinCount) \|\|
Chris@849	396 (m_descriptors[n]->sampleType ==
Chris@441	397 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
Chris@441	398
Chris@441	399 // Anything that is not a 1D, note, or interval model and that
Chris@441	400 // has only one value per result must be a sparse time value
Chris@441	401 // model.
Chris@441	402
Chris@441	403 // Anything that is not a 1D, note, or interval model and that
Chris@441	404 // has a variable sample rate is also treated as a sparse time
Chris@441	405 // value model regardless of its bin count, because we lack a
Chris@441	406 // sparse 3D model.
Chris@320	407
Chris@320	408 SparseTimeValueModel *model;
Chris@320	409 if (haveExtents) {
Chris@320	410 model = new SparseTimeValueModel
Chris@320	411 (modelRate, modelResolution, minValue, maxValue, false);
Chris@320	412 } else {
Chris@320	413 model = new SparseTimeValueModel
Chris@320	414 (modelRate, modelResolution, false);
Chris@320	415 }
Chris@558	416
Chris@558	417 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
Chris@849	418 model->setScaleUnits(outputs[m_outputNos[n]].unit.c_str());
Chris@320	419
Chris@849	420 out = model;
Chris@320	421
Chris@558	422 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
Chris@849	423 out->setRDFTypeURI(outputEventTypeURI);
Chris@558	424
Chris@441	425 } else {
Chris@320	426
Chris@441	427 // Anything that is not a 1D, note, or interval model and that
Chris@441	428 // has a fixed sample rate and more than one value per result
Chris@441	429 // must be a dense 3D model.
Chris@320	430
Chris@320	431 EditableDenseThreeDimensionalModel *model =
Chris@320	432 new EditableDenseThreeDimensionalModel
Chris@535	433 (modelRate, modelResolution, binCount,
Chris@535	434 EditableDenseThreeDimensionalModel::BasicMultirateCompression,
Chris@535	435 false);
Chris@320	436
Chris@849	437 if (!m_descriptors[n]->binNames.empty()) {
Chris@320	438 std::vector<QString> names;
Chris@849	439 for (size_t i = 0; i < m_descriptors[n]->binNames.size(); ++i) {
Chris@849	440 names.push_back(m_descriptors[n]->binNames[i].c_str());
Chris@320	441 }
Chris@320	442 model->setBinNames(names);
Chris@320	443 }
Chris@320	444
Chris@849	445 out = model;
Chris@558	446
Chris@558	447 QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
Chris@849	448 out->setRDFTypeURI(outputSignalTypeURI);
Chris@320	449 }
Chris@333	450
Chris@849	451 if (out) {
Chris@849	452 out->setSourceModel(input);
Chris@849	453 m_outputs.push_back(out);
Chris@849	454 }
Chris@320	455 }
Chris@320	456
Chris@331	457 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()
Chris@320	458 {
Chris@690	459 // SVDEBUG << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << endl;
Chris@320	460 delete m_plugin;
Chris@850	461 for (int j = 0; j < m_descriptors.size(); ++j) {
Chris@850	462 delete m_descriptors[j];
Chris@850	463 }
Chris@320	464 }
Chris@320	465
Chris@320	466 DenseTimeValueModel *
Chris@350	467 FeatureExtractionModelTransformer::getConformingInput()
Chris@320	468 {
Chris@690	469 // SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << endl;
Chris@408	470
Chris@320	471 DenseTimeValueModel *dtvm =
Chris@320	472 dynamic_cast<DenseTimeValueModel *>(getInputModel());
Chris@320	473 if (!dtvm) {
Chris@690	474 SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl;
Chris@320	475 }
Chris@320	476 return dtvm;
Chris@320	477 }
Chris@320	478
Chris@320	479 void
Chris@331	480 FeatureExtractionModelTransformer::run()
Chris@320	481 {
Chris@350	482 DenseTimeValueModel *input = getConformingInput();
Chris@320	483 if (!input) return;
Chris@320	484
Chris@849	485 if (m_outputs.empty()) return;
Chris@320	486
Chris@850	487 Transform primaryTransform = m_transforms[0];
Chris@850	488
Chris@497	489 while (!input->isReady() && !m_abandoned) {
Chris@690	490 SVDEBUG << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl;
Chris@497	491 usleep(500000);
Chris@320	492 }
Chris@497	493 if (m_abandoned) return;
Chris@320	494
Chris@350	495 size_t sampleRate = input->getSampleRate();
Chris@320	496
Chris@320	497 size_t channelCount = input->getChannelCount();
Chris@320	498 if (m_plugin->getMaxChannelCount() < channelCount) {
Chris@320	499 channelCount = 1;
Chris@320	500 }
Chris@320	501
Chris@320	502 float *buffers = new float[channelCount];
Chris@320	503 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@850	504 buffers[ch] = new float[primaryTransform.getBlockSize() + 2];
Chris@320	505 }
Chris@320	506
Chris@850	507 size_t stepSize = primaryTransform.getStepSize();
Chris@850	508 size_t blockSize = primaryTransform.getBlockSize();
Chris@350	509
Chris@320	510 bool frequencyDomain = (m_plugin->getInputDomain() ==
Chris@320	511 Vamp::Plugin::FrequencyDomain);
Chris@320	512 std::vector<FFTModel *> fftModels;
Chris@320	513
Chris@320	514 if (frequencyDomain) {
Chris@320	515 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320	516 FFTModel *model = new FFTModel
Chris@350	517 (getConformingInput(),
Chris@350	518 channelCount == 1 ? m_input.getChannel() : ch,
Chris@850	519 primaryTransform.getWindowType(),
Chris@350	520 blockSize,
Chris@350	521 stepSize,
Chris@350	522 blockSize,
Chris@334	523 false,
Chris@334	524 StorageAdviser::PrecisionCritical);
Chris@320	525 if (!model->isOK()) {
Chris@320	526 delete model;
Chris@850	527 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850	528 setCompletion(j, 100);
Chris@850	529 }
Chris@387	530 //!!! need a better way to handle this -- previously we were using a QMessageBox but that isn't an appropriate thing to do here either
Chris@387	531 throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer");
Chris@320	532 }
Chris@320	533 model->resume();
Chris@320	534 fftModels.push_back(model);
Chris@320	535 }
Chris@320	536 }
Chris@320	537
Chris@350	538 long startFrame = m_input.getModel()->getStartFrame();
Chris@350	539 long endFrame = m_input.getModel()->getEndFrame();
Chris@320	540
Chris@850	541 RealTime contextStartRT = primaryTransform.getStartTime();
Chris@850	542 RealTime contextDurationRT = primaryTransform.getDuration();
Chris@350	543
Chris@350	544 long contextStart =
Chris@350	545 RealTime::realTime2Frame(contextStartRT, sampleRate);
Chris@350	546
Chris@350	547 long contextDuration =
Chris@350	548 RealTime::realTime2Frame(contextDurationRT, sampleRate);
Chris@320	549
Chris@320	550 if (contextStart == 0 \|\| contextStart < startFrame) {
Chris@320	551 contextStart = startFrame;
Chris@320	552 }
Chris@320	553
Chris@320	554 if (contextDuration == 0) {
Chris@320	555 contextDuration = endFrame - contextStart;
Chris@320	556 }
Chris@320	557 if (contextStart + contextDuration > endFrame) {
Chris@320	558 contextDuration = endFrame - contextStart;
Chris@320	559 }
Chris@320	560
Chris@320	561 long blockFrame = contextStart;
Chris@320	562
Chris@320	563 long prevCompletion = 0;
Chris@320	564
Chris@850	565 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850	566 setCompletion(j, 0);
Chris@850	567 }
Chris@320	568
Chris@556	569 float *reals = 0;
Chris@556	570 float *imaginaries = 0;
Chris@556	571 if (frequencyDomain) {
Chris@556	572 reals = new float[blockSize/2 + 1];
Chris@556	573 imaginaries = new float[blockSize/2 + 1];
Chris@556	574 }
Chris@556	575
Chris@678	576 QString error = "";
Chris@678	577
Chris@320	578 while (!m_abandoned) {
Chris@320	579
Chris@320	580 if (frequencyDomain) {
Chris@350	581 if (blockFrame - int(blockSize)/2 >
Chris@320	582 contextStart + contextDuration) break;
Chris@320	583 } else {
Chris@320	584 if (blockFrame >=
Chris@320	585 contextStart + contextDuration) break;
Chris@320	586 }
Chris@320	587
Chris@690	588 // SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame "
Chris@320	589 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
Chris@687	590 // << blockSize << endl;
Chris@320	591
Chris@320	592 long completion =
Chris@350	593 (((blockFrame - contextStart) / stepSize) * 99) /
Chris@557	594 (contextDuration / stepSize + 1);
Chris@320	595
Chris@350	596 // channelCount is either m_input.getModel()->channelCount or 1
Chris@320	597
Chris@363	598 if (frequencyDomain) {
Chris@363	599 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@350	600 int column = (blockFrame - startFrame) / stepSize;
Chris@556	601 fftModels[ch]->getValuesAt(column, reals, imaginaries);
Chris@350	602 for (size_t i = 0; i <= blockSize/2; ++i) {
Chris@556	603 buffers[ch][i*2] = reals[i];
Chris@556	604 buffers[ch][i*2+1] = imaginaries[i];
Chris@320	605 }
Chris@678	606 error = fftModels[ch]->getError();
Chris@678	607 if (error != "") {
Chris@843	608 cerr << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl;
Chris@678	609 m_abandoned = true;
Chris@678	610 m_message = error;
Chris@678	611 }
Chris@363	612 }
Chris@363	613 } else {
Chris@363	614 getFrames(channelCount, blockFrame, blockSize, buffers);
Chris@320	615 }
Chris@320	616
Chris@497	617 if (m_abandoned) break;
Chris@497	618
Chris@320	619 Vamp::Plugin::FeatureSet features = m_plugin->process
Chris@320	620 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
Chris@320	621
Chris@497	622 if (m_abandoned) break;
Chris@497	623
Chris@850	624 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850	625 for (size_t fi = 0; fi < features[m_outputNos[j]].size(); ++fi) {
Chris@850	626 Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi];
Chris@850	627 addFeature(j, blockFrame, feature);
Chris@850	628 }
Chris@850	629 }
Chris@320	630
Chris@320	631 if (blockFrame == contextStart \|\| completion > prevCompletion) {
Chris@850	632 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850	633 setCompletion(j, completion);
Chris@850	634 }
Chris@320	635 prevCompletion = completion;
Chris@320	636 }
Chris@320	637
Chris@350	638 blockFrame += stepSize;
Chris@320	639 }
Chris@320	640
Chris@497	641 if (!m_abandoned) {
Chris@497	642 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
Chris@320	643
Chris@850	644 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850	645 for (size_t fi = 0; fi < features[m_outputNos[j]].size(); ++fi) {
Chris@850	646 Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi];
Chris@850	647 addFeature(j, blockFrame, feature);
Chris@850	648 }
Chris@497	649 }
Chris@497	650 }
Chris@320	651
Chris@850	652 for (int j = 0; j < (int)m_outputNos.size(); ++j) {
Chris@850	653 setCompletion(j, 100);
Chris@850	654 }
Chris@320	655
Chris@320	656 if (frequencyDomain) {
Chris@320	657 for (size_t ch = 0; ch < channelCount; ++ch) {
Chris@320	658 delete fftModels[ch];
Chris@320	659 }
Chris@556	660 delete[] reals;
Chris@556	661 delete[] imaginaries;
Chris@320	662 }
Chris@320	663 }
Chris@320	664
Chris@320	665 void
Chris@363	666 FeatureExtractionModelTransformer::getFrames(int channelCount,
Chris@363	667 long startFrame, long size,
Chris@363	668 float **buffers)
Chris@320	669 {
Chris@320	670 long offset = 0;
Chris@320	671
Chris@320	672 if (startFrame < 0) {
Chris@363	673 for (int c = 0; c < channelCount; ++c) {
Chris@363	674 for (int i = 0; i < size && startFrame + i < 0; ++i) {
Chris@363	675 buffers[c][i] = 0.0f;
Chris@363	676 }
Chris@320	677 }
Chris@320	678 offset = -startFrame;
Chris@320	679 size -= offset;
Chris@320	680 if (size <= 0) return;
Chris@320	681 startFrame = 0;
Chris@320	682 }
Chris@320	683
Chris@350	684 DenseTimeValueModel *input = getConformingInput();
Chris@350	685 if (!input) return;
Chris@363	686
Chris@363	687 long got = 0;
Chris@350	688
Chris@363	689 if (channelCount == 1) {
Chris@363	690
Chris@363	691 got = input->getData(m_input.getChannel(), startFrame, size,
Chris@363	692 buffers[0] + offset);
Chris@363	693
Chris@363	694 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
Chris@363	695 // use mean instead of sum, as plugin input
Chris@363	696 float cc = float(input->getChannelCount());
Chris@363	697 for (long i = 0; i < size; ++i) {
Chris@363	698 buffers[0][i + offset] /= cc;
Chris@363	699 }
Chris@363	700 }
Chris@363	701
Chris@363	702 } else {
Chris@363	703
Chris@363	704 float **writebuf = buffers;
Chris@363	705 if (offset > 0) {
Chris@363	706 writebuf = new float *[channelCount];
Chris@363	707 for (int i = 0; i < channelCount; ++i) {
Chris@363	708 writebuf[i] = buffers[i] + offset;
Chris@363	709 }
Chris@363	710 }
Chris@363	711
Chris@363	712 got = input->getData(0, channelCount-1, startFrame, size, writebuf);
Chris@363	713
Chris@363	714 if (writebuf != buffers) delete[] writebuf;
Chris@363	715 }
Chris@320	716
Chris@320	717 while (got < size) {
Chris@363	718 for (int c = 0; c < channelCount; ++c) {
Chris@363	719 buffers[c][got + offset] = 0.0;
Chris@363	720 }
Chris@320	721 ++got;
Chris@320	722 }
Chris@320	723 }
Chris@320	724
Chris@320	725 void
Chris@850	726 FeatureExtractionModelTransformer::addFeature(int n,
Chris@850	727 size_t blockFrame,
Chris@850	728 const Vamp::Plugin::Feature &feature)
Chris@320	729 {
Chris@350	730 size_t inputRate = m_input.getModel()->getSampleRate();
Chris@320	731
Chris@843	732 // cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = "
Chris@712	733 // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp
Chris@712	734 // << ", timestamp = " << feature.timestamp << ", hasDuration = "
Chris@712	735 // << feature.hasDuration << ", duration = " << feature.duration
Chris@843	736 // << endl;
Chris@320	737
Chris@320	738 int binCount = 1;
Chris@849	739 if (m_descriptors[n]->hasFixedBinCount) {
Chris@849	740 binCount = m_descriptors[n]->binCount;
Chris@320	741 }
Chris@320	742
Chris@320	743 size_t frame = blockFrame;
Chris@320	744
Chris@849	745 if (m_descriptors[n]->sampleType ==
Chris@320	746 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
Chris@320	747
Chris@320	748 if (!feature.hasTimestamp) {
Chris@843	749 cerr
Chris@331	750 << "WARNING: FeatureExtractionModelTransformer::addFeature: "
Chris@320	751 << "Feature has variable sample rate but no timestamp!"
Chris@843	752 << endl;
Chris@320	753 return;
Chris@320	754 } else {
Chris@320	755 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
Chris@320	756 }
Chris@320	757
Chris@849	758 } else if (m_descriptors[n]->sampleType ==
Chris@320	759 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
Chris@320	760
Chris@779	761 if (!feature.hasTimestamp) {
Chris@849	762 ++m_fixedRateFeatureNos[n];
Chris@779	763 } else {
Chris@779	764 RealTime ts(feature.timestamp.sec, feature.timestamp.nsec);
Chris@849	765 m_fixedRateFeatureNos[n] =
Chris@849	766 lrint(ts.toDouble() * m_descriptors[n]->sampleRate);
Chris@779	767 }
Chris@779	768
Chris@849	769 frame = lrintf((m_fixedRateFeatureNos[n] / m_descriptors[n]->sampleRate)
Chris@779	770 * inputRate);
Chris@320	771 }
Chris@320	772
Chris@441	773 // Rather than repeat the complicated tests from the constructor
Chris@441	774 // to determine what sort of model we must be adding the features
Chris@441	775 // to, we instead test what sort of model the constructor decided
Chris@441	776 // to create.
Chris@320	777
Chris@849	778 if (isOutput<SparseOneDimensionalModel>(n)) {
Chris@441	779
Chris@441	780 SparseOneDimensionalModel *model =
Chris@849	781 getConformingOutput<SparseOneDimensionalModel>(n);
Chris@320	782 if (!model) return;
Chris@350	783
Chris@441	784 model->addPoint(SparseOneDimensionalModel::Point
Chris@441	785 (frame, feature.label.c_str()));
Chris@320	786
Chris@849	787 } else if (isOutput<SparseTimeValueModel>(n)) {
Chris@320	788
Chris@350	789 SparseTimeValueModel *model =
Chris@849	790 getConformingOutput<SparseTimeValueModel>(n);
Chris@320	791 if (!model) return;
Chris@350	792
Chris@454	793 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454	794
Chris@454	795 float value = feature.values[i];
Chris@454	796
Chris@454	797 QString label = feature.label.c_str();
Chris@454	798 if (feature.values.size() > 1) {
Chris@454	799 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454	800 }
Chris@454	801
Chris@454	802 model->addPoint(SparseTimeValueModel::Point(frame, value, label));
Chris@454	803 }
Chris@320	804
Chris@849	805 } else if (isOutput<FlexiNoteModel>(n) \|\| isOutput<NoteModel>(n) \|\| isOutput<RegionModel>(n)) { //GF: Added Note Model
Chris@320	806
Chris@441	807 int index = 0;
Chris@441	808
Chris@441	809 float value = 0.0;
Chris@441	810 if (feature.values.size() > index) {
Chris@441	811 value = feature.values[index++];
Chris@441	812 }
Chris@320	813
Chris@320	814 float duration = 1;
Chris@441	815 if (feature.hasDuration) {
Chris@441	816 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
Chris@441	817 } else {
Chris@441	818 if (feature.values.size() > index) {
Chris@441	819 duration = feature.values[index++];
Chris@441	820 }
Chris@441	821 }
gyorgyf@786	822
Chris@849	823 if (isOutput<FlexiNoteModel>(n)) { // GF: added for flexi note model
gyorgyf@786	824
gyorgyf@786	825 float velocity = 100;
gyorgyf@786	826 if (feature.values.size() > index) {
gyorgyf@786	827 velocity = feature.values[index++];
gyorgyf@786	828 }
gyorgyf@786	829 if (velocity < 0) velocity = 127;
gyorgyf@786	830 if (velocity > 127) velocity = 127;
gyorgyf@786	831
Chris@849	832 FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n);
gyorgyf@786	833 if (!model) return;
gyorgyf@786	834 model->addPoint(FlexiNoteModel::Point(frame, value, // value is pitch
gyorgyf@786	835 lrintf(duration),
gyorgyf@786	836 velocity / 127.f,
gyorgyf@786	837 feature.label.c_str()));
gyorgyf@786	838 // GF: end -- added for flexi note model
Chris@849	839 } else if (isOutput<NoteModel>(n)) {
Chris@320	840
Chris@441	841 float velocity = 100;
Chris@441	842 if (feature.values.size() > index) {
Chris@441	843 velocity = feature.values[index++];
Chris@441	844 }
Chris@441	845 if (velocity < 0) velocity = 127;
Chris@441	846 if (velocity > 127) velocity = 127;
Chris@320	847
Chris@849	848 NoteModel *model = getConformingOutput<NoteModel>(n);
Chris@441	849 if (!model) return;
Chris@441	850 model->addPoint(NoteModel::Point(frame, value, // value is pitch
Chris@441	851 lrintf(duration),
Chris@441	852 velocity / 127.f,
Chris@441	853 feature.label.c_str()));
Chris@441	854 } else {
gyorgyf@786	855
Chris@849	856 RegionModel *model = getConformingOutput<RegionModel>(n);
Chris@454	857 if (!model) return;
Chris@454	858
Chris@474	859 if (feature.hasDuration && !feature.values.empty()) {
Chris@454	860
Chris@454	861 for (int i = 0; i < feature.values.size(); ++i) {
Chris@454	862
Chris@454	863 float value = feature.values[i];
Chris@454	864
Chris@454	865 QString label = feature.label.c_str();
Chris@454	866 if (feature.values.size() > 1) {
Chris@454	867 label = QString("[%1] %2").arg(i+1).arg(label);
Chris@454	868 }
Chris@454	869
Chris@454	870 model->addPoint(RegionModel::Point(frame, value,
Chris@454	871 lrintf(duration),
Chris@454	872 label));
Chris@454	873 }
Chris@454	874 } else {
Chris@454	875
Chris@441	876 model->addPoint(RegionModel::Point(frame, value,
Chris@441	877 lrintf(duration),
Chris@441	878 feature.label.c_str()));
Chris@454	879 }
Chris@441	880 }
Chris@320	881
Chris@849	882 } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
Chris@320	883
Chris@533	884 DenseThreeDimensionalModel::Column values =
Chris@533	885 DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
Chris@320	886
Chris@320	887 EditableDenseThreeDimensionalModel *model =
Chris@849	888 getConformingOutput<EditableDenseThreeDimensionalModel>(n);
Chris@320	889 if (!model) return;
Chris@320	890
Chris@320	891 model->setColumn(frame / model->getResolution(), values);
Chris@441	892
Chris@441	893 } else {
Chris@690	894 SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl;
Chris@320	895 }
Chris@320	896 }
Chris@320	897
Chris@320	898 void
Chris@850	899 FeatureExtractionModelTransformer::setCompletion(int n, int completion)
Chris@320	900 {
Chris@320	901 int binCount = 1;
Chris@849	902 if (m_descriptors[n]->hasFixedBinCount) {
Chris@849	903 binCount = m_descriptors[n]->binCount;
Chris@320	904 }
Chris@320	905
Chris@690	906 // SVDEBUG << "FeatureExtractionModelTransformer::setCompletion("
Chris@687	907 // << completion << ")" << endl;
Chris@320	908
Chris@849	909 if (isOutput<SparseOneDimensionalModel>(n)) {
Chris@320	910
Chris@350	911 SparseOneDimensionalModel *model =
Chris@849	912 getConformingOutput<SparseOneDimensionalModel>(n);
Chris@320	913 if (!model) return;
Chris@441	914 model->setCompletion(completion, true);
Chris@320	915
Chris@849	916 } else if (isOutput<SparseTimeValueModel>(n)) {
Chris@320	917
Chris@350	918 SparseTimeValueModel *model =
Chris@849	919 getConformingOutput<SparseTimeValueModel>(n);
Chris@320	920 if (!model) return;
Chris@441	921 model->setCompletion(completion, true);
Chris@320	922
Chris@849	923 } else if (isOutput<NoteModel>(n)) {
Chris@320	924
Chris@849	925 NoteModel *model = getConformingOutput<NoteModel>(n);
Chris@320	926 if (!model) return;
Chris@441	927 model->setCompletion(completion, true);
gyorgyf@786	928
Chris@849	929 } else if (isOutput<FlexiNoteModel>(n)) {
gyorgyf@786	930
Chris@849	931 FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n);
gyorgyf@786	932 if (!model) return;
gyorgyf@786	933 model->setCompletion(completion, true);
Chris@320	934
Chris@849	935 } else if (isOutput<RegionModel>(n)) {
Chris@441	936
Chris@849	937 RegionModel *model = getConformingOutput<RegionModel>(n);
Chris@441	938 if (!model) return;
Chris@441	939 model->setCompletion(completion, true);
Chris@441	940
Chris@849	941 } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
Chris@320	942
Chris@320	943 EditableDenseThreeDimensionalModel *model =
Chris@849	944 getConformingOutput<EditableDenseThreeDimensionalModel>(n);
Chris@320	945 if (!model) return;
Chris@350	946 model->setCompletion(completion, true); //!!!m_context.updates);
Chris@320	947 }
Chris@320	948 }
Chris@320	949

Mercurial > hg > svcore

annotate transform/FeatureExtractionModelTransformer.cpp @ 875:3e6ed8a8577b tonioni