FeatureExtractionModelTransformer.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2006 Chris Cannam and QMUL.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
17 
19 
20 #include "plugin/PluginXml.h"
21 #include <vamp-hostsdk/Plugin.h>
22 
23 #include "data/model/Model.h"
24 #include "base/Window.h"
25 #include "base/Exceptions.h"
30 #include "data/model/NoteModel.h"
31 #include "data/model/RegionModel.h"
32 #include "data/model/FFTModel.h"
35 
36 #include "TransformFactory.h"
37 
38 #include <iostream>
39 
40 #include <QSettings>
41 
42 //#define DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN 1
43 
45  const Transform &transform) :
46  ModelTransformer(in, transform),
47  m_plugin(nullptr),
48  m_haveOutputs(false)
49 {
50  SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << m_transforms.begin()->getPluginIdentifier() << ", outputName " << m_transforms.begin()->getOutput() << endl;
51 }
52 
54  const Transforms &transforms) :
55  ModelTransformer(in, transforms),
56  m_plugin(nullptr),
57  m_haveOutputs(false)
58 {
59  if (m_transforms.empty()) {
60  SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: " << transforms.size() << " transform(s)" << endl;
61  } else {
62  SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: " << transforms.size() << " transform(s), first has plugin " << m_transforms.begin()->getPluginIdentifier() << ", outputName " << m_transforms.begin()->getOutput() << endl;
63  }
64 }
65 
66 static bool
68 {
69  Transform t2o(t2);
70  t2o.setOutput(t1.getOutput());
71  return t1 == t2o;
72 }
73 
74 bool
76 {
77  // This is (now) called from the run thread. The plugin is
78  // constructed, initialised, used, and destroyed all from a single
79  // thread.
80 
81  // All transforms must use the same plugin, parameters, and
82  // inputs: they can differ only in choice of plugin output. So we
83  // initialise based purely on the first transform in the list (but
84  // first check that they are actually similar as promised)
85 
86  for (int j = 1; in_range_for(m_transforms, j); ++j) {
88  m_message = tr("Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output");
89  SVCERR << m_message << endl;
90  return false;
91  }
92  }
93 
94  Transform primaryTransform = m_transforms[0];
95 
96  QString pluginId = primaryTransform.getPluginIdentifier();
97 
100 
101  if (!factory) {
102  m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
103  SVCERR << m_message << endl;
104  return false;
105  }
106 
107  auto input = ModelById::getAs<DenseTimeValueModel>(getInputModel());
108  if (!input) {
109  m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
110  SVCERR << m_message << endl;
111  return false;
112  }
113 
114  SVDEBUG << "FeatureExtractionModelTransformer: Instantiating plugin for transform in thread "
115  << QThread::currentThreadId() << endl;
116 
117  m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
118  if (!m_plugin) {
119  m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
120  SVCERR << m_message << endl;
121  return false;
122  }
123 
125  (primaryTransform, m_plugin);
126 
128  (primaryTransform, m_plugin);
129 
130  int channelCount = input->getChannelCount();
131  if ((int)m_plugin->getMaxChannelCount() < channelCount) {
132  channelCount = 1;
133  }
134  if ((int)m_plugin->getMinChannelCount() > channelCount) {
135  m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
136  .arg(pluginId)
137  .arg(m_plugin->getMinChannelCount())
138  .arg(m_plugin->getMaxChannelCount())
139  .arg(input->getChannelCount());
140  SVCERR << m_message << endl;
141  return false;
142  }
143 
144  int step = primaryTransform.getStepSize();
145  int block = primaryTransform.getBlockSize();
146 
147  SVDEBUG << "Initialising feature extraction plugin with channels = "
148  << channelCount << ", step = " << step
149  << ", block = " << block << endl;
150 
151  if (!m_plugin->initialise(channelCount, step, block)) {
152 
153  int preferredStep = int(m_plugin->getPreferredStepSize());
154  int preferredBlock = int(m_plugin->getPreferredBlockSize());
155 
156  if (step != preferredStep || block != preferredBlock) {
157 
158  SVDEBUG << "Initialisation failed, trying again with preferred step = "
159  << preferredStep << ", block = " << preferredBlock << endl;
160 
161  if (!m_plugin->initialise(channelCount,
162  preferredStep,
163  preferredBlock)) {
164 
165  SVDEBUG << "Initialisation failed again" << endl;
166 
167  m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
168  SVCERR << m_message << endl;
169  return false;
170 
171  } else {
172 
173  SVDEBUG << "Initialisation succeeded this time" << endl;
174 
175  // Set these values into the primary transform in the list
176  m_transforms[0].setStepSize(preferredStep);
177  m_transforms[0].setBlockSize(preferredBlock);
178 
179  m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
180  .arg(pluginId)
181  .arg(step)
182  .arg(block)
183  .arg(preferredStep)
184  .arg(preferredBlock);
185  SVCERR << m_message << endl;
186  }
187 
188  } else {
189 
190  SVDEBUG << "Initialisation failed (with step = " << step
191  << " and block = " << block
192  << ", both matching the plugin's preference)" << endl;
193 
194  m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
195  SVCERR << m_message << endl;
196  return false;
197  }
198  } else {
199  SVDEBUG << "Initialisation succeeded" << endl;
200  }
201 
202  if (primaryTransform.getPluginVersion() != "") {
203  QString pv = QString("%1").arg(m_plugin->getPluginVersion());
204  if (pv != primaryTransform.getPluginVersion()) {
205  QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
206  .arg(primaryTransform.getPluginVersion())
207  .arg(pluginId)
208  .arg(pv);
209  if (m_message != "") {
210  m_message = QString("%1; %2").arg(vm).arg(m_message);
211  } else {
212  m_message = vm;
213  }
214  SVCERR << m_message << endl;
215  }
216  }
217 
218  Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
219 
220  if (outputs.empty()) {
221  m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
222  SVCERR << m_message << endl;
223  return false;
224  }
225 
226  for (int j = 0; in_range_for(m_transforms, j); ++j) {
227 
228  for (int i = 0; in_range_for(outputs, i); ++i) {
229 
230  if (m_transforms[j].getOutput() == "" ||
231  outputs[i].identifier ==
232  m_transforms[j].getOutput().toStdString()) {
233 
234  m_outputNos.push_back(i);
235  m_descriptors.push_back(outputs[i]);
236  m_fixedRateFeatureNos.push_back(-1); // we increment before use
237  break;
238  }
239  }
240 
241  if (!in_range_for(m_descriptors, j)) {
242  m_message = tr("Plugin \"%1\" has no output named \"%2\"")
243  .arg(pluginId)
244  .arg(m_transforms[j].getOutput());
245  SVCERR << m_message << endl;
246  return false;
247  }
248  }
249 
250  for (int j = 0; in_range_for(m_transforms, j); ++j) {
252  setCompletion(j, 0);
253  }
254 
255  m_outputMutex.lock();
256  m_haveOutputs = true;
257  m_outputsCondition.wakeAll();
258  m_outputMutex.unlock();
259 
260  return true;
261 }
262 
263 void
265 {
266  SVDEBUG << "FeatureExtractionModelTransformer: deleting plugin for transform in thread "
267  << QThread::currentThreadId() << endl;
268 
269  try {
270  m_plugin = {}; // does not necessarily delete, as it's a
271  // shared_ptr, but in the design case it will
272  } catch (const std::exception &e) {
273  // A destructor shouldn't throw an exception. But at one point
274  // (now fixed) our plugin stub destructor could have
275  // accidentally done so, so just in case:
276  SVCERR << "FeatureExtractionModelTransformer: caught exception while deleting plugin: " << e.what() << endl;
277  m_message = e.what();
278  }
279 
280  m_descriptors.clear();
281 }
282 
283 void
285 {
286  auto input = ModelById::getAs<DenseTimeValueModel>(getInputModel());
287  if (!input) return;
288 
289  PluginRDFDescription description(m_transforms[n].getPluginIdentifier());
290  QString outputId = m_transforms[n].getOutput();
291 
292  int binCount = 1;
293  float minValue = 0.0, maxValue = 0.0;
294  bool haveExtents = false;
295  bool haveBinCount = m_descriptors[n].hasFixedBinCount;
296 
297  if (haveBinCount) {
298  binCount = (int)m_descriptors[n].binCount;
299  }
300 
301  m_needAdditionalModels[n] = false;
302 
303  if (binCount > 0 && m_descriptors[n].hasKnownExtents) {
304  minValue = m_descriptors[n].minValue;
305  maxValue = m_descriptors[n].maxValue;
306  haveExtents = true;
307  }
308 
309  sv_samplerate_t modelRate = input->getSampleRate();
310  sv_samplerate_t outputRate = modelRate;
311  int modelResolution = 1;
312 
313  if (m_descriptors[n].sampleType !=
314  Vamp::Plugin::OutputDescriptor::OneSamplePerStep) {
315 
316  outputRate = m_descriptors[n].sampleRate;
317 
323  if (outputRate > input->getSampleRate()) {
324  SVDEBUG << "WARNING: plugin reports output sample rate as "
325  << outputRate
326  << " (can't display features with finer resolution than the input rate of "
327  << modelRate << ")" << endl;
328  outputRate = modelRate;
329  }
330  }
331 
332  switch (m_descriptors[n].sampleType) {
333 
334  case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
335  if (outputRate != 0.0) {
336  modelResolution = int(round(modelRate / outputRate));
337  }
338  break;
339 
340  case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
341  modelResolution = m_transforms[n].getStepSize();
342  break;
343 
344  case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
345  if (outputRate <= 0.0) {
346  SVDEBUG << "WARNING: Fixed sample-rate plugin reports invalid sample rate " << m_descriptors[n].sampleRate << "; defaulting to input rate of " << input->getSampleRate() << endl;
347  modelResolution = 1;
348  } else {
349  modelResolution = int(round(modelRate / outputRate));
350 // cerr << "modelRate = " << modelRate << ", descriptor rate = " << outputRate << ", modelResolution = " << modelResolution << endl;
351  }
352  break;
353  }
354 
355  bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
356 
357  std::shared_ptr<Model> out;
358 
359  if (binCount == 0 &&
360  (preDurationPlugin || !m_descriptors[n].hasDuration)) {
361 
362  // Anything with no value and no duration is an instant
363 
364  SVDEBUG << "FeatureExtractionModelTransformer::createOutputModels: "
365  << "creating a SparseOneDimensionalModel" << endl;
366 
367  out = std::make_shared<SparseOneDimensionalModel>
368  (modelRate, modelResolution, false);
369 
370  QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
371  out->setRDFTypeURI(outputEventTypeURI);
372 
373  } else if ((preDurationPlugin && binCount > 1 &&
374  (m_descriptors[n].sampleType ==
375  Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
376  (!preDurationPlugin && m_descriptors[n].hasDuration)) {
377 
378  // For plugins using the old v1 API without explicit duration,
379  // we treat anything that has multiple bins (i.e. that has the
380  // potential to have value and duration) and a variable sample
381  // rate as a note model, taking its values as pitch, duration
382  // and velocity (if present) respectively. This is the same
383  // behaviour as always applied by SV to these plugins in the
384  // past.
385 
386  // For plugins with the newer API, we treat anything with
387  // duration as either a note model with pitch and velocity, or
388  // a region model.
389 
390  // How do we know whether it's an interval or note model?
391  // What's the essential difference? Is a note model any
392  // interval model using a Hz or "MIDI pitch" scale? There
393  // isn't really a reliable test for "MIDI pitch"... Does a
394  // note model always have velocity? This is a good question
395  // to be addressed by accompanying RDF, but for the moment we
396  // will do the following...
397 
398  bool isNoteModel = false;
399 
400  // Regions have only value (and duration -- we can't extract a
401  // region model from an old-style plugin that doesn't support
402  // duration)
403  if (binCount > 1) isNoteModel = true;
404 
405  // Regions do not have units of Hz or MIDI things (a sweeping
406  // assumption!)
407  if (m_descriptors[n].unit == "Hz" ||
408  m_descriptors[n].unit.find("MIDI") != std::string::npos ||
409  m_descriptors[n].unit.find("midi") != std::string::npos) {
410  isNoteModel = true;
411  }
412 
413  // If we had a "sparse 3D model", we would have the additional
414  // problem of determining whether to use that here (if bin
415  // count > 1). But we don't.
416 
417  QSettings settings;
418 
419  if (isNoteModel) {
420 
421  QSettings settings;
422  settings.beginGroup("Transformer");
423  bool flexi = settings.value("use-flexi-note-model", false).toBool();
424  settings.endGroup();
425 
426  SVDEBUG << "FeatureExtractionModelTransformer::createOutputModels: "
427  << "creating a NoteModel (flexi = " << flexi << ")" << endl;
428 
429  NoteModel *model;
430  if (haveExtents) {
431  model = new NoteModel
432  (modelRate, modelResolution, minValue, maxValue, false,
434  } else {
435  model = new NoteModel
436  (modelRate, modelResolution, false,
438  }
439  model->setScaleUnits(m_descriptors[n].unit.c_str());
440  out.reset(model);
441 
442  } else {
443 
444  SVDEBUG << "FeatureExtractionModelTransformer::createOutputModels: "
445  << "creating a RegionModel" << endl;
446 
447  RegionModel *model;
448  if (haveExtents) {
449  model = new RegionModel
450  (modelRate, modelResolution, minValue, maxValue, false);
451  } else {
452  model = new RegionModel
453  (modelRate, modelResolution, false);
454  }
455  model->setScaleUnits(m_descriptors[n].unit.c_str());
456  out.reset(model);
457  }
458 
459  QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
460  out->setRDFTypeURI(outputEventTypeURI);
461 
462  } else if (binCount == 1 ||
463  (m_descriptors[n].sampleType ==
464  Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
465 
466  // Anything that is not a 1D, note, or interval model and that
467  // has only one value per result must be a sparse time value
468  // model.
469 
470  // Anything that is not a 1D, note, or interval model and that
471  // has a variable sample rate is treated as a set of sparse
472  // time value models, one per output bin, because we lack a
473  // sparse 3D model.
474 
475  // Anything that is not a 1D, note, or interval model and that
476  // has a fixed sample rate but an unknown number of values per
477  // result is also treated as a set of sparse time value models.
478 
479  // For sets of sparse time value models, we create a single
480  // model first as the "standard" output and then create models
481  // for bins 1+ in the additional model map (mapping the output
482  // descriptor to a list of models indexed by bin-1). But we
483  // don't create the additional models yet, as this case has to
484  // work even if the number of bins is unknown at this point --
485  // we create an additional model (copying its parameters from
486  // the default one) each time a new bin is encountered.
487 
488  if (!haveBinCount || binCount > 1) {
489  m_needAdditionalModels[n] = true;
490  }
491 
492  SVDEBUG << "FeatureExtractionModelTransformer::createOutputModels: "
493  << "creating a SparseTimeValueModel "
494  << "(additional models to come? -> "
495  << m_needAdditionalModels[n] << ")" << endl;
496 
497  SparseTimeValueModel *model;
498  if (haveExtents) {
499  model = new SparseTimeValueModel
500  (modelRate, modelResolution, minValue, maxValue, false);
501  } else {
502  model = new SparseTimeValueModel
503  (modelRate, modelResolution, false);
504  }
505 
506  Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
507  model->setScaleUnits(outputs[m_outputNos[n]].unit.c_str());
508 
509  out.reset(model);
510 
511  QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
512  out->setRDFTypeURI(outputEventTypeURI);
513 
514  } else {
515 
516  // Anything that is not a 1D, note, or interval model and that
517  // has a fixed sample rate and more than one value per result
518  // must be a dense 3D model.
519 
520  SVDEBUG << "FeatureExtractionModelTransformer::createOutputModels: "
521  << "creating a BasicCompressedDenseThreeDimensionalModel"
522  << endl;
523 
524  auto model =
526  (modelRate, modelResolution, binCount, false);
527 
528  if (!m_descriptors[n].binNames.empty()) {
529  std::vector<QString> names;
530  for (int i = 0; i < (int)m_descriptors[n].binNames.size(); ++i) {
531  names.push_back(m_descriptors[n].binNames[i].c_str());
532  }
533  model->setBinNames(names);
534  }
535 
536  out.reset(model);
537 
538  QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
539  out->setRDFTypeURI(outputSignalTypeURI);
540  }
541 
542  if (out) {
543  out->setSourceModel(getInputModel());
544  m_outputs.push_back(ModelById::add(out));
545  }
546 }
547 
548 void
550 {
551  m_outputMutex.lock();
552  while (!m_haveOutputs && !m_abandoned) {
553  m_outputsCondition.wait(&m_outputMutex, 500);
554  }
555  m_outputMutex.unlock();
556 }
557 
559 {
560  // Parent class dtor set the abandoned flag and waited for the run
561  // thread to exit; the run thread owns the plugin, and should have
562  // destroyed it before exiting (via a call to deinitialise)
563 }
564 
567 {
568  Models mm;
569  for (auto mp : m_additionalModels) {
570  for (auto m: mp.second) {
571  mm.push_back(m.second);
572  }
573  }
574  return mm;
575 }
576 
577 bool
579 {
580  for (auto p : m_needAdditionalModels) {
581  if (p.second) return true;
582  }
583  return false;
584 }
585 
586 ModelId
588 {
589  if (binNo == 0) {
590  SVCERR << "Internal error: binNo == 0 in getAdditionalModel (should be using primary model, not calling getAdditionalModel)" << endl;
591  return {};
592  }
593 
594  if (!in_range_for(m_outputs, n)) {
595  SVCERR << "getAdditionalModel: Output " << n << " out of range" << endl;
596  return {};
597  }
598 
601  return {};
602  }
603 
604  if (!m_additionalModels[n][binNo].isNone()) {
605  return m_additionalModels[n][binNo];
606  }
607 
608  SVDEBUG << "getAdditionalModel(" << n << ", " << binNo
609  << "): creating" << endl;
610 
611  auto baseModel = ModelById::getAs<SparseTimeValueModel>(m_outputs[n]);
612  if (!baseModel) {
613  SVCERR << "getAdditionalModel: Output model not conformable, or has vanished" << endl;
614  return {};
615  }
616 
617  SVDEBUG << "getAdditionalModel(" << n << ", " << binNo
618  << "): (from " << baseModel << ")" << endl;
619 
620  SparseTimeValueModel *additional =
621  new SparseTimeValueModel(baseModel->getSampleRate(),
622  baseModel->getResolution(),
623  baseModel->getValueMinimum(),
624  baseModel->getValueMaximum(),
625  false);
626 
627  additional->setScaleUnits(baseModel->getScaleUnits());
628  additional->setRDFTypeURI(baseModel->getRDFTypeURI());
629 
630  ModelId additionalId = ModelById::add
631  (std::shared_ptr<SparseTimeValueModel>(additional));
632  m_additionalModels[n][binNo] = additionalId;
633  return additionalId;
634 }
635 
636 void
638 {
639  try {
640  if (!initialise()) {
641  abandon();
642  return;
643  }
644  } catch (const std::exception &e) {
645  abandon();
646  m_message = e.what();
647  return;
648  }
649 
650  if (m_outputs.empty()) {
651  abandon();
652  return;
653  }
654 
655  Transform primaryTransform = m_transforms[0];
656 
657  ModelId inputId = getInputModel();
658 
659  bool ready = false;
660  while (!ready && !m_abandoned) {
661  { // scope so as to release input shared_ptr before sleeping
662  auto input = ModelById::getAs<DenseTimeValueModel>(inputId);
663  if (!input || !input->isOK()) {
664  abandon();
665  return;
666  }
667  ready = input->isReady();
668  }
669  if (!ready) {
670  SVDEBUG << "FeatureExtractionModelTransformer::run: Waiting for input model "
671  << inputId << " to be ready..." << endl;
672  usleep(500000);
673  }
674  }
675  if (m_abandoned) return;
676 
677 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
678  SVDEBUG << "FeatureExtractionModelTransformer::run: Input model "
679  << inputId << " is ready, going ahead" << endl;
680 #endif
681 
682  sv_samplerate_t sampleRate;
683  int channelCount;
684  sv_frame_t startFrame;
685  sv_frame_t endFrame;
686 
687  { // scope so as not to have this borrowed pointer retained around
688  // the edges of the process loop
689  auto input = ModelById::getAs<DenseTimeValueModel>(inputId);
690  if (!input) {
691  abandon();
692  return;
693  }
694 
695  sampleRate = input->getSampleRate();
696 
697  channelCount = input->getChannelCount();
698  if ((int)m_plugin->getMaxChannelCount() < channelCount) {
699  channelCount = 1;
700  }
701 
702  startFrame = input->getStartFrame();
703  endFrame = input->getEndFrame();
704  }
705 
706  float **buffers = new float*[channelCount];
707  for (int ch = 0; ch < channelCount; ++ch) {
708  buffers[ch] = new float[primaryTransform.getBlockSize() + 2];
709  }
710 
711  int stepSize = primaryTransform.getStepSize();
712  int blockSize = primaryTransform.getBlockSize();
713 
714  bool frequencyDomain = (m_plugin->getInputDomain() ==
715  Vamp::Plugin::FrequencyDomain);
716 
717  std::vector<FFTModel *> fftModels;
718 
719  if (frequencyDomain) {
720 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
721  SVDEBUG << "FeatureExtractionModelTransformer::run: Input is frequency-domain" << endl;
722 #endif
723  for (int ch = 0; ch < channelCount; ++ch) {
724  FFTModel *model = new FFTModel
725  (inputId,
726  channelCount == 1 ? m_input.getChannel() : ch,
727  primaryTransform.getWindowType(),
728  blockSize,
729  stepSize,
730  blockSize);
731  if (!model->isOK() || model->getError() != "") {
732  QString err = model->getError();
733  delete model;
734  for (int j = 0; in_range_for(m_outputNos, j); ++j) {
735  setCompletion(j, 100);
736  }
737  SVDEBUG << "FeatureExtractionModelTransformer::run: Failed to create FFT model for input model " << inputId << ": " << err << endl;
738  m_message = "Failed to create the FFT model for this feature extraction model transformer: error is: " + err;
739  for (int cch = 0; cch < ch; ++cch) {
740  delete fftModels[cch];
741  }
742  abandon();
743  return;
744  }
745  fftModels.push_back(model);
746  }
747 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
748  SVDEBUG << "FeatureExtractionModelTransformer::run: Created FFT model(s) for frequency-domain input" << endl;
749 #endif
750  }
751 
752  RealTime contextStartRT = primaryTransform.getStartTime();
753  RealTime contextDurationRT = primaryTransform.getDuration();
754 
755  sv_frame_t contextStart =
756  RealTime::realTime2Frame(contextStartRT, sampleRate);
757 
758  sv_frame_t contextDuration =
759  RealTime::realTime2Frame(contextDurationRT, sampleRate);
760 
761  if (contextStart == 0 || contextStart < startFrame) {
762  contextStart = startFrame;
763  }
764 
765  if (contextDuration == 0) {
766  contextDuration = endFrame - contextStart;
767  }
768  if (contextStart + contextDuration > endFrame) {
769  contextDuration = endFrame - contextStart;
770  }
771 
772  sv_frame_t blockFrame = contextStart;
773 
774  long prevCompletion = 0;
775 
776  for (int j = 0; in_range_for(m_outputNos, j); ++j) {
777  setCompletion(j, 0);
778  }
779 
780  float *reals = nullptr;
781  float *imaginaries = nullptr;
782  if (frequencyDomain) {
783  reals = new float[blockSize/2 + 1];
784  imaginaries = new float[blockSize/2 + 1];
785  }
786 
787  QString error = "";
788 
789  try {
790  while (!m_abandoned) {
791 
792  if (frequencyDomain) {
793  if (blockFrame - int(blockSize)/2 >
794  contextStart + contextDuration) {
795  break;
796  }
797  } else {
798  if (blockFrame >=
799  contextStart + contextDuration) {
800  break;
801  }
802  }
803 
804 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
805  SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame "
806  << blockFrame << ", endFrame " << endFrame << ", blockSize "
807  << blockSize << endl;
808 #endif
809 
810  int completion = int
811  ((((blockFrame - contextStart) / stepSize) * 99) /
812  (contextDuration / stepSize + 1));
813 
814  bool haveAllModels = true;
815  if (!ModelById::get(inputId)) {
816 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
817  SVDEBUG << "FeatureExtractionModelTransformer::run: Input model " << inputId << " no longer exists" << endl;
818 #endif
819  haveAllModels = false;
820  } else {
821 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
822  SVDEBUG << "Input model " << inputId << " still exists" << endl;
823 #endif
824  }
825  for (auto mid: m_outputs) {
826  if (!ModelById::get(mid)) {
827 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
828  SVDEBUG << "FeatureExtractionModelTransformer::run: Output model " << mid << " no longer exists" << endl;
829 #endif
830  haveAllModels = false;
831  } else {
832 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
833  SVDEBUG << "Output model " << mid << " still exists" << endl;
834 #endif
835  }
836  }
837  if (!haveAllModels) {
838  abandon();
839  break;
840  }
841 
842 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
843  SVDEBUG << "FeatureExtractionModelTransformer::run: All models still exist" << endl;
844 #endif
845 
846  // channelCount is either input->channelCount or 1
847 
848  if (frequencyDomain) {
849  for (int ch = 0; ch < channelCount; ++ch) {
850  int column = int((blockFrame - startFrame) / stepSize);
851  if (fftModels[ch]->getValuesAt(column, reals, imaginaries)) {
852  for (int i = 0; i <= blockSize/2; ++i) {
853  buffers[ch][i*2] = reals[i];
854  buffers[ch][i*2+1] = imaginaries[i];
855  }
856  } else {
857  for (int i = 0; i <= blockSize/2; ++i) {
858  buffers[ch][i*2] = 0.f;
859  buffers[ch][i*2+1] = 0.f;
860  }
861  }
862 
863  error = fftModels[ch]->getError();
864  if (error != "") {
865  SVCERR << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl;
866  m_abandoned = true;
867  m_message = error;
868  break;
869  }
870  }
871  } else {
872  getFrames(channelCount, blockFrame, blockSize, buffers);
873  }
874 
875  if (m_abandoned) break;
876 
877  auto features = m_plugin->process
878  (buffers,
879  RealTime::frame2RealTime(blockFrame, sampleRate)
880  .toVampRealTime());
881 
882  if (m_abandoned) break;
883 
884  for (int j = 0; in_range_for(m_outputNos, j); ++j) {
885  for (int fi = 0; in_range_for(features[m_outputNos[j]], fi); ++fi) {
886  auto feature = features[m_outputNos[j]][fi];
887  addFeature(j, blockFrame, feature);
888  }
889  }
890 
891  if (blockFrame == contextStart || completion > prevCompletion) {
892  for (int j = 0; in_range_for(m_outputNos, j); ++j) {
893  setCompletion(j, completion);
894  }
895  prevCompletion = completion;
896  }
897 
898  blockFrame += stepSize;
899 
900  }
901 
902  if (!m_abandoned) {
903  auto features = m_plugin->getRemainingFeatures();
904 
905  for (int j = 0; in_range_for(m_outputNos, j); ++j) {
906  for (int fi = 0; in_range_for(features[m_outputNos[j]], fi); ++fi) {
907  auto feature = features[m_outputNos[j]][fi];
908  addFeature(j, blockFrame, feature);
909  if (m_abandoned) {
910  break;
911  }
912  }
913  }
914  }
915  } catch (const std::exception &e) {
916  SVCERR << "FeatureExtractionModelTransformer::run: Exception caught: "
917  << e.what() << endl;
918  m_abandoned = true;
919  m_message = e.what();
920  }
921 
922  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
923  setCompletion(j, 100);
924  }
925 
926  if (frequencyDomain) {
927  for (int ch = 0; ch < channelCount; ++ch) {
928  delete fftModels[ch];
929  }
930  delete[] reals;
931  delete[] imaginaries;
932  }
933 
934  for (int ch = 0; ch < channelCount; ++ch) {
935  delete[] buffers[ch];
936  }
937  delete[] buffers;
938 
939  deinitialise();
940 }
941 
942 void
944  sv_frame_t startFrame,
945  sv_frame_t size,
946  float **buffers)
947 {
948  sv_frame_t offset = 0;
949 
950  if (startFrame < 0) {
951  for (int c = 0; c < channelCount; ++c) {
952  for (sv_frame_t i = 0; i < size && startFrame + i < 0; ++i) {
953  buffers[c][i] = 0.0f;
954  }
955  }
956  offset = -startFrame;
957  size -= offset;
958  if (size <= 0) return;
959  startFrame = 0;
960  }
961 
962  auto input = ModelById::getAs<DenseTimeValueModel>(getInputModel());
963  if (!input) {
964  return;
965  }
966 
967  sv_frame_t got = 0;
968 
969  if (channelCount == 1) {
970 
971  auto data = input->getData(m_input.getChannel(), startFrame, size);
972  got = data.size();
973 
974  copy(data.begin(), data.end(), buffers[0] + offset);
975 
976  if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
977  // use mean instead of sum, as plugin input
978  float cc = float(input->getChannelCount());
979  for (sv_frame_t i = 0; i < got; ++i) {
980  buffers[0][i + offset] /= cc;
981  }
982  }
983 
984  } else {
985 
986  auto data = input->getMultiChannelData(0, channelCount-1, startFrame, size);
987  if (!data.empty()) {
988  got = data[0].size();
989  for (int c = 0; in_range_for(data, c); ++c) {
990  copy(data[c].begin(), data[c].end(), buffers[c] + offset);
991  }
992  }
993  }
994 
995  while (got < size) {
996  for (int c = 0; c < channelCount; ++c) {
997  buffers[c][got + offset] = 0.0;
998  }
999  ++got;
1000  }
1001 }
1002 
1003 void
1005  sv_frame_t blockFrame,
1006  const Vamp::Plugin::Feature &feature)
1007 {
1008  auto input = ModelById::get(getInputModel());
1009  if (!input) return;
1010 
1011  sv_samplerate_t inputRate = input->getSampleRate();
1012 
1013 // cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = "
1014 // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp
1015 // << ", timestamp = " << feature.timestamp << ", hasDuration = "
1016 // << feature.hasDuration << ", duration = " << feature.duration
1017 // << endl;
1018 
1019  sv_frame_t frame = blockFrame;
1020 
1021  if (m_descriptors[n].sampleType ==
1022  Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
1023 
1024  if (!feature.hasTimestamp) {
1025  SVDEBUG
1026  << "WARNING: FeatureExtractionModelTransformer::addFeature: "
1027  << "Feature has variable sample rate but no timestamp!"
1028  << endl;
1029  return;
1030  } else {
1031  frame = RealTime::realTime2Frame(feature.timestamp, inputRate);
1032  }
1033 
1034 // cerr << "variable sample rate: timestamp = " << feature.timestamp
1035 // << " at input rate " << inputRate << " -> " << frame << endl;
1036 
1037  } else if (m_descriptors[n].sampleType ==
1038  Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
1039 
1040  sv_samplerate_t rate = m_descriptors[n].sampleRate;
1041  if (rate <= 0.0) {
1042  rate = inputRate;
1043  }
1044 
1045  if (!feature.hasTimestamp) {
1046  ++m_fixedRateFeatureNos[n];
1047  } else {
1048  RealTime ts(feature.timestamp.sec, feature.timestamp.nsec);
1049  m_fixedRateFeatureNos[n] = (int)lrint(ts.toDouble() * rate);
1050  }
1051 
1052 // cerr << "m_fixedRateFeatureNo = " << m_fixedRateFeatureNos[n]
1053 // << ", m_descriptor->sampleRate = " << m_descriptors[n].sampleRate
1054 // << ", inputRate = " << inputRate
1055 // << " giving frame = ";
1056  frame = lrint((double(m_fixedRateFeatureNos[n]) / rate) * inputRate);
1057 // cerr << frame << endl;
1058  }
1059 
1060  if (frame < 0) {
1061  SVDEBUG
1062  << "WARNING: FeatureExtractionModelTransformer::addFeature: "
1063  << "Negative frame counts are not supported (frame = " << frame
1064  << " from timestamp " << feature.timestamp
1065  << "), dropping feature"
1066  << endl;
1067  return;
1068  }
1069 
1070  // Rather than repeat the complicated tests from the constructor
1071  // to determine what sort of model we must be adding the features
1072  // to, we instead test what sort of model the constructor decided
1073  // to create.
1074 
1075  ModelId outputId = m_outputs[n];
1076 
1077  if (isOutputType<SparseOneDimensionalModel>(n)) {
1078 
1079  auto model = ModelById::getAs<SparseOneDimensionalModel>(outputId);
1080  if (!model) return;
1081  model->add(Event(frame, feature.label.c_str()));
1082 
1083  } else if (isOutputType<SparseTimeValueModel>(n)) {
1084 
1085  auto model = ModelById::getAs<SparseTimeValueModel>(outputId);
1086  if (!model) return;
1087 
1088  for (int i = 0; in_range_for(feature.values, i); ++i) {
1089 
1090  float value = feature.values[i];
1091 
1092  QString label = feature.label.c_str();
1093  if (feature.values.size() > 1) {
1094  label = QString("[%1] %2").arg(i+1).arg(label);
1095  }
1096 
1097  auto targetModel = model;
1098 
1099  if (m_needAdditionalModels[n] && i > 0) {
1100  targetModel = ModelById::getAs<SparseTimeValueModel>
1101  (getAdditionalModel(n, i));
1102  if (!targetModel) targetModel = model;
1103  }
1104 
1105  targetModel->add(Event(frame, value, label));
1106  }
1107 
1108  } else if (isOutputType<NoteModel>(n) || isOutputType<RegionModel>(n)) {
1109 
1110  int index = 0;
1111 
1112  float value = 0.0;
1113  if ((int)feature.values.size() > index) {
1114  value = feature.values[index++];
1115  }
1116 
1117  sv_frame_t duration = 1;
1118  if (feature.hasDuration) {
1119  duration = RealTime::realTime2Frame(feature.duration, inputRate);
1120  } else {
1121  if (in_range_for(feature.values, index)) {
1122  duration = lrintf(feature.values[index++]);
1123  }
1124  }
1125 
1126  auto noteModel = ModelById::getAs<NoteModel>(outputId);
1127  if (noteModel) {
1128 
1129  float velocity = 100;
1130  if ((int)feature.values.size() > index) {
1131  velocity = feature.values[index++];
1132  }
1133  if (velocity < 0) velocity = 127;
1134  if (velocity > 127) velocity = 127;
1135 
1136  noteModel->add(Event(frame, value, // value is pitch
1137  duration,
1138  velocity / 127.f,
1139  feature.label.c_str()));
1140  }
1141 
1142  auto regionModel = ModelById::getAs<RegionModel>(outputId);
1143  if (regionModel) {
1144 
1145  if (feature.hasDuration && !feature.values.empty()) {
1146 
1147  for (int i = 0; in_range_for(feature.values, i); ++i) {
1148 
1149  float value = feature.values[i];
1150 
1151  QString label = feature.label.c_str();
1152  if (feature.values.size() > 1) {
1153  label = QString("[%1] %2").arg(i+1).arg(label);
1154  }
1155 
1156  regionModel->add(Event(frame,
1157  value,
1158  duration,
1159  label));
1160  }
1161  } else {
1162 
1163  regionModel->add(Event(frame,
1164  value,
1165  duration,
1166  feature.label.c_str()));
1167  }
1168  }
1169 
1170  } else if (isOutputType<BasicCompressedDenseThreeDimensionalModel>(n)) {
1171 
1172  auto model = ModelById::getAs
1174  if (!model) return;
1175 
1177  values.insert(values.begin(),
1178  feature.values.begin(), feature.values.end());
1179 
1180  if (!feature.hasTimestamp && m_fixedRateFeatureNos[n] >= 0) {
1181  model->setColumn(m_fixedRateFeatureNos[n], values);
1182  } else {
1183  model->setColumn(int(frame / model->getResolution()), values);
1184  }
1185  } else {
1186 
1187  SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type - possibly a deleted model" << endl;
1188  abandon();
1189  }
1190 }
1191 
1192 void
1194 {
1195 #ifdef DEBUG_FEATURE_EXTRACTION_TRANSFORMER_RUN
1196  SVDEBUG << "FeatureExtractionModelTransformer::setCompletion("
1197  << completion << ")" << endl;
1198 #endif
1199 
1200  (void)
1201  (setOutputCompletion<SparseOneDimensionalModel>(n, completion) ||
1202  setOutputCompletion<SparseTimeValueModel>(n, completion) ||
1203  setOutputCompletion<NoteModel>(n, completion) ||
1204  setOutputCompletion<RegionModel>(n, completion) ||
1205  setOutputCompletion<BasicCompressedDenseThreeDimensionalModel>(n, completion));
1206 }
1207 
double sv_samplerate_t
Sample rate.
Definition: BaseTypes.h:51
void abandon()
Hint to the processing thread that it should give up, for example because the process is going to exi...
bool isOK() const override
Return true if the model was constructed successfully.
Definition: FFTModel.cpp:93
QString getOutput() const
Definition: Transform.cpp:219
void addFeature(int n, sv_frame_t blockFrame, const Vamp::Plugin::Feature &feature)
ModelId getAdditionalModel(int transformNo, int binNo)
int getStepSize() const
Definition: Transform.cpp:318
void setPluginParameters(const Transform &transform, std::shared_ptr< Vamp::PluginBase > plugin)
Set the parameters, program and configuration strings on the given plugin from the given Transform ob...
RealTime getDuration() const
Definition: Transform.cpp:366
int64_t sv_frame_t
Frame index, the unit of our time axis.
Definition: BaseTypes.h:31
static RealTime frame2RealTime(sv_frame_t frame, sv_samplerate_t sampleRate)
Convert a sample frame at the given sample rate into a RealTime.
Definition: RealTimeSV.cpp:498
static std::shared_ptr< Derived > getAs(Id id)
Definition: ById.h:247
std::vector< Transform > Transforms
Definition: Transform.h:204
void setRDFTypeURI(QString uri)
Set the event, feature, or signal type URI for the features contained in this model, according to the Audio Features RDF ontology.
Definition: Model.h:264
std::vector< ModelId > Models
An implementation of DenseThreeDimensionalModel that makes FFT data derived from a DenseTimeValueMode...
Definition: FFTModel.h:36
QString getOutputSignalTypeURI(QString outputId) const
WindowType getWindowType() const
Definition: Transform.cpp:342
static TransformFactory * getInstance()
Transforms m_transforms
static bool areTransformsSimilar(const Transform &t1, const Transform &t2)
static Id add(std::shared_ptr< Item > item)
Definition: ById.h:228
std::vector< Vamp::Plugin::OutputDescriptor > m_descriptors
void setOutput(QString output)
Definition: Transform.cpp:231
double toDouble() const
Definition: RealTimeSV.cpp:181
QString getError() const
Definition: FFTModel.h:114
FeatureExtractionModelTransformer(Input input, const Transform &transform)
ModelId getInputModel()
Return the input model for the transform.
bool willHaveAdditionalOutputModels() override
Return true if the current transform is one that may produce additional models (to be retrieved throu...
QString getOutputEventTypeURI(QString outputId) const
void getFrames(int channelCount, sv_frame_t startFrame, sv_frame_t size, float **buffer)
Models getAdditionalOutputModels() override
Return any additional models that were created during processing.
virtual std::shared_ptr< Vamp::Plugin > instantiatePlugin(QString identifier, sv_samplerate_t inputSampleRate)=0
Instantiate (load) and return pointer to the plugin with the given identifier, at the given sample ra...
static sv_frame_t realTime2Frame(const RealTime &r, sv_samplerate_t sampleRate)
Convert a RealTime into a sample frame at the given sample rate.
Definition: RealTimeSV.cpp:490
static FeatureExtractionPluginFactory * instance()
A model representing a wiggly-line plot with points at arbitrary intervals of the model resolution...
RegionModel – a model for intervals associated with a value, which we call regions for no very compe...
Definition: RegionModel.h:33
bool in_range_for(const C &container, T i)
Check whether an integer index is in range for a container, avoiding overflows and signed/unsigned co...
Definition: BaseTypes.h:37
#define SVDEBUG
Definition: Debug.h:106
void setScaleUnits(QString units)
Definition: NoteModel.h:128
void setScaleUnits(QString units)
Definition: RegionModel.h:97
QString getPluginVersion() const
Definition: Transform.cpp:282
An immutable(-ish) type used for point and event representation in sparse models, as well as for inte...
Definition: Event.h:55
A ModelTransformer turns one data model into another.
void setScaleUnits(QString units)
#define SVCERR
Definition: Debug.h:109
virtual void setColumn(int x, const Column &values)
Set the entire set of bin values at the given column.
QString getPluginIdentifier() const
Definition: Transform.cpp:213
void makeContextConsistentWithPlugin(Transform &transform, std::shared_ptr< Vamp::PluginBase > plugin)
If the given Transform object has no processing step and block sizes set, set them to appropriate def...
RealTime getStartTime() const
Definition: Transform.cpp:354
Definition: ById.h:115
static std::shared_ptr< Item > get(Id id)
Definition: ById.h:251
int getBlockSize() const
Definition: Transform.cpp:330
RealTime represents time values to nanosecond precision with accurate arithmetic and frame-rate conve...
Definition: RealTime.h:42