Mercurial > hg > svcore
diff transform/FeatureExtractionModelTransformer.cpp @ 441:288f45533041
* Add region model and layer; improve assignment of model types to
feature extraction transforms with duration
author | Chris Cannam |
---|---|
date | Thu, 18 Sep 2008 16:08:14 +0000 |
parents | cff476cfce77 |
children | 55cff2c6e4a0 |
line wrap: on
line diff
--- a/transform/FeatureExtractionModelTransformer.cpp Thu Sep 18 12:33:30 2008 +0000 +++ b/transform/FeatureExtractionModelTransformer.cpp Thu Sep 18 16:08:14 2008 +0000 @@ -27,6 +27,7 @@ #include "data/model/EditableDenseThreeDimensionalModel.h" #include "data/model/DenseTimeValueModel.h" #include "data/model/NoteModel.h" +#include "data/model/RegionModel.h" #include "data/model/FFTModel.h" #include "data/model/WaveFileModel.h" @@ -154,8 +155,7 @@ if (m_transform.getOutput() == "" || outputs[i].identifier == m_transform.getOutput().toStdString()) { m_outputFeatureNo = i; - m_descriptor = new Vamp::Plugin::OutputDescriptor - (outputs[i]); + m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]); break; } } @@ -207,12 +207,92 @@ break; } - if (binCount == 0) { + bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2); + + if (binCount == 0 && + (preDurationPlugin || !m_descriptor->hasDuration)) { m_output = new SparseOneDimensionalModel(modelRate, modelResolution, false); - } else if (binCount == 1) { + } else if ((preDurationPlugin && binCount > 1 && + (m_descriptor->sampleType == + Vamp::Plugin::OutputDescriptor::VariableSampleRate)) || + (!preDurationPlugin && m_descriptor->hasDuration)) { + + // For plugins using the old v1 API without explicit duration, + // we treat anything that has multiple bins (i.e. that has the + // potential to have value and duration) and a variable sample + // rate as a note model, taking its values as pitch, duration + // and velocity (if present) respectively. This is the same + // behaviour as always applied by SV to these plugins in the + // past. + + // For plugins with the newer API, we treat anything with + // duration as either a note model with pitch and velocity, or + // a region model. + + // How do we know whether it's an interval or note model? + // What's the essential difference? Is a note model any + // interval model using a Hz or "MIDI pitch" scale? There + // isn't really a reliable test for "MIDI pitch"... Does a + // note model always have velocity? This is a good question + // to be addressed by accompanying RDF, but for the moment we + // will do the following... + + bool isNoteModel = false; + + // Regions have only value (and duration -- we can't extract a + // region model from an old-style plugin that doesn't support + // duration) + if (binCount > 1) isNoteModel = true; + + // Regions do not have units of Hz (a sweeping assumption!) + if (m_descriptor->unit == "Hz") isNoteModel = true; + + // If we had a "sparse 3D model", we would have the additional + // problem of determining whether to use that here (if bin + // count > 1). But we don't. + + if (isNoteModel) { + + NoteModel *model; + if (haveExtents) { + model = new NoteModel + (modelRate, modelResolution, minValue, maxValue, false); + } else { + model = new NoteModel + (modelRate, modelResolution, false); + } + model->setScaleUnits(m_descriptor->unit.c_str()); + m_output = model; + + } else { + + RegionModel *model; + if (haveExtents) { + model = new RegionModel + (modelRate, modelResolution, minValue, maxValue, false); + } else { + model = new RegionModel + (modelRate, modelResolution, false); + } + model->setScaleUnits(m_descriptor->unit.c_str()); + m_output = model; + } + + } else if (binCount == 1 || + (m_descriptor->sampleType == + Vamp::Plugin::OutputDescriptor::VariableSampleRate)) { + + // Anything that is not a 1D, note, or interval model and that + // has only one value per result must be a sparse time value + // model. + + // Anything that is not a 1D, note, or interval model and that + // has a variable sample rate is also treated as a sparse time + // value model regardless of its bin count, because we lack a + // sparse 3D model. SparseTimeValueModel *model; if (haveExtents) { @@ -226,30 +306,11 @@ m_output = model; - } else if (m_descriptor->sampleType == - Vamp::Plugin::OutputDescriptor::VariableSampleRate) { + } else { - // We don't have a sparse 3D model, so interpret this as a - // note model. There's nothing to define which values to use - // as which parameters of the note -- for the moment let's - // treat the first as pitch, second as duration in frames, - // third (if present) as velocity. (Our note model doesn't - // yet store velocity.) - //!!! todo: ask the user! - - NoteModel *model; - if (haveExtents) { - model = new NoteModel - (modelRate, modelResolution, minValue, maxValue, false); - } else { - model = new NoteModel - (modelRate, modelResolution, false); - } - model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); - - m_output = model; - - } else { + // Anything that is not a 1D, note, or interval model and that + // has a fixed sample rate and more than one value per result + // must be a dense 3D model. EditableDenseThreeDimensionalModel *model = new EditableDenseThreeDimensionalModel @@ -541,15 +602,21 @@ } } - if (binCount == 0) { + // Rather than repeat the complicated tests from the constructor + // to determine what sort of model we must be adding the features + // to, we instead test what sort of model the constructor decided + // to create. - SparseOneDimensionalModel *model = + if (isOutput<SparseOneDimensionalModel>()) { + + SparseOneDimensionalModel *model = getConformingOutput<SparseOneDimensionalModel>(); if (!model) return; - model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); + model->addPoint(SparseOneDimensionalModel::Point + (frame, feature.label.c_str())); - } else if (binCount == 1) { + } else if (isOutput<SparseTimeValueModel>()) { float value = 0.0; if (feature.values.size() > 0) value = feature.values[0]; @@ -558,32 +625,52 @@ getConformingOutput<SparseTimeValueModel>(); if (!model) return; - model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); -// std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl; + model->addPoint(SparseTimeValueModel::Point + (frame, value, feature.label.c_str())); - } else if (m_descriptor->sampleType == - Vamp::Plugin::OutputDescriptor::VariableSampleRate) { + } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) { - float pitch = 0.0; - if (feature.values.size() > 0) pitch = feature.values[0]; + int index = 0; + + float value = 0.0; + if (feature.values.size() > index) { + value = feature.values[index++]; + } float duration = 1; - if (feature.values.size() > 1) duration = feature.values[1]; + if (feature.hasDuration) { + duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate); + } else { + if (feature.values.size() > index) { + duration = feature.values[index++]; + } + } - float velocity = 100; - if (feature.values.size() > 2) velocity = feature.values[2]; - if (velocity < 0) velocity = 127; - if (velocity > 127) velocity = 127; + if (isOutput<NoteModel>()) { - NoteModel *model = getConformingOutput<NoteModel>(); - if (!model) return; + float velocity = 100; + if (feature.values.size() > index) { + velocity = feature.values[index++]; + } + if (velocity < 0) velocity = 127; + if (velocity > 127) velocity = 127; - model->addPoint(NoteModel::Point(frame, pitch, - lrintf(duration), - velocity / 127.f, - feature.label.c_str())); + NoteModel *model = getConformingOutput<NoteModel>(); + if (!model) return; + model->addPoint(NoteModel::Point(frame, value, // value is pitch + lrintf(duration), + velocity / 127.f, + feature.label.c_str())); + } else { + RegionModel *model = getConformingOutput<RegionModel>(); + if (model) { + model->addPoint(RegionModel::Point(frame, value, + lrintf(duration), + feature.label.c_str())); + } else return; + } - } else { + } else if (isOutput<EditableDenseThreeDimensionalModel>()) { DenseThreeDimensionalModel::Column values = feature.values; @@ -592,6 +679,9 @@ if (!model) return; model->setColumn(frame / model->getResolution(), values); + + } else { + std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl; } } @@ -606,29 +696,33 @@ // std::cerr << "FeatureExtractionModelTransformer::setCompletion(" // << completion << ")" << std::endl; - if (binCount == 0) { + if (isOutput<SparseOneDimensionalModel>()) { SparseOneDimensionalModel *model = getConformingOutput<SparseOneDimensionalModel>(); if (!model) return; - model->setCompletion(completion, true); //!!!m_context.updates); + model->setCompletion(completion, true); - } else if (binCount == 1) { + } else if (isOutput<SparseTimeValueModel>()) { SparseTimeValueModel *model = getConformingOutput<SparseTimeValueModel>(); if (!model) return; - model->setCompletion(completion, true); //!!!m_context.updates); + model->setCompletion(completion, true); - } else if (m_descriptor->sampleType == - Vamp::Plugin::OutputDescriptor::VariableSampleRate) { + } else if (isOutput<NoteModel>()) { - NoteModel *model = - getConformingOutput<NoteModel>(); + NoteModel *model = getConformingOutput<NoteModel>(); if (!model) return; - model->setCompletion(completion, true); //!!!m_context.updates); + model->setCompletion(completion, true); - } else { + } else if (isOutput<RegionModel>()) { + + RegionModel *model = getConformingOutput<RegionModel>(); + if (!model) return; + model->setCompletion(completion, true); + + } else if (isOutput<EditableDenseThreeDimensionalModel>()) { EditableDenseThreeDimensionalModel *model = getConformingOutput<EditableDenseThreeDimensionalModel>();