Mercurial > hg > svcore
changeset 441:288f45533041
* Add region model and layer; improve assignment of model types to
feature extraction transforms with duration
author | Chris Cannam |
---|---|
date | Thu, 18 Sep 2008 16:08:14 +0000 (2008-09-18) |
parents | 5746c559af15 |
children | 04b7fd31e1c6 |
files | data/data.pro data/fileio/QuickTimeFileReader.cpp data/model/NoteModel.h data/model/RegionModel.h transform/FeatureExtractionModelTransformer.cpp transform/FeatureExtractionModelTransformer.h |
diffstat | 6 files changed, 375 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/data/data.pro Thu Sep 18 12:33:30 2008 +0000 +++ b/data/data.pro Thu Sep 18 16:08:14 2008 +0000 @@ -59,6 +59,7 @@ model/PowerOfSqrtTwoZoomConstraint.h \ model/PowerOfTwoZoomConstraint.h \ model/RangeSummarisableTimeValueModel.h \ + model/RegionModel.h \ model/SparseModel.h \ model/SparseOneDimensionalModel.h \ model/SparseTimeValueModel.h \
--- a/data/fileio/QuickTimeFileReader.cpp Thu Sep 18 12:33:30 2008 +0000 +++ b/data/fileio/QuickTimeFileReader.cpp Thu Sep 18 16:08:14 2008 +0000 @@ -203,6 +203,7 @@ if (m_d->err) { m_error = QString("Error in QuickTime decoder property set: code %1").arg(m_d->err); + m_channelCount = 0; return; } m_d->buffer.mNumberBuffers = 1;
--- a/data/model/NoteModel.h Thu Sep 18 12:33:30 2008 +0000 +++ b/data/model/NoteModel.h Thu Sep 18 16:08:14 2008 +0000 @@ -21,12 +21,16 @@ #include "base/PlayParameterRepository.h" /** - * Note type for use in a SparseModel or SparseValueModel. All we - * mean by a "note" is something that has an onset time, a single - * value, and a duration. Like other points, it can also have a - * label. With this point type, the model can be thought of as - * representing a simple MIDI-type piano roll, except that the y - * coordinates (values) do not have to be discrete integers. + * NoteModel -- a concrete IntervalModel for notes. + */ + +/** + * Note type for use in a sparse model. All we mean by a "note" is + * something that has an onset time, a single value, a duration, and a + * level. Like other points, it can also have a label. With this + * point type, the model can be thought of as representing a simple + * MIDI-type piano roll, except that the y coordinates (values) do not + * have to be discrete integers. */ struct Note
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/model/RegionModel.h Thu Sep 18 16:08:14 2008 +0000 @@ -0,0 +1,204 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Visualiser + An audio file viewer and annotation editor. + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2006 Chris Cannam. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _REGION_MODEL_H_ +#define _REGION_MODEL_H_ + +#include "IntervalModel.h" +#include "base/RealTime.h" + +/** + * RegionModel -- a concrete IntervalModel for intervals associated + * with a value, which we call regions for no very compelling reason. + */ + +/** + * Region "point" type. A region is something that has an onset time, + * a single value, and a duration. Like other points, it can also + * have a label. + * + * This is called RegionRec instead of Region to avoid name collisions + * with the X11 Region struct. Bah. + */ + +struct RegionRec +{ +public: + RegionRec(long _frame) : frame(_frame), value(0.0f), duration(0) { } + RegionRec(long _frame, float _value, size_t _duration, QString _label) : + frame(_frame), value(_value), duration(_duration), label(_label) { } + + int getDimensions() const { return 3; } + + long frame; + float value; + size_t duration; + QString label; + + QString getLabel() const { return label; } + + void toXml(QTextStream &stream, + QString indent = "", + QString extraAttributes = "") const + { + stream << + QString("%1<point frame=\"%2\" value=\"%3\" duration=\"%4\" label=\"%5\" %6/>\n") + .arg(indent).arg(frame).arg(value).arg(duration).arg(label).arg(extraAttributes); + } + + QString toDelimitedDataString(QString delimiter, size_t sampleRate) const + { + QStringList list; + list << RealTime::frame2RealTime(frame, sampleRate).toString().c_str(); + list << QString("%1").arg(value); + list << RealTime::frame2RealTime(duration, sampleRate).toString().c_str(); + if (label != "") list << label; + return list.join(delimiter); + } + + struct Comparator { + bool operator()(const RegionRec &p1, + const RegionRec &p2) const { + if (p1.frame != p2.frame) return p1.frame < p2.frame; + if (p1.value != p2.value) return p1.value < p2.value; + if (p1.duration != p2.duration) return p1.duration < p2.duration; + return p1.label < p2.label; + } + }; + + struct OrderComparator { + bool operator()(const RegionRec &p1, + const RegionRec &p2) const { + return p1.frame < p2.frame; + } + }; +}; + + +class RegionModel : public IntervalModel<RegionRec> +{ + Q_OBJECT + +public: + RegionModel(size_t sampleRate, size_t resolution, + bool notifyOnAdd = true) : + IntervalModel<RegionRec>(sampleRate, resolution, notifyOnAdd), + m_valueQuantization(0) + { + } + + RegionModel(size_t sampleRate, size_t resolution, + float valueMinimum, float valueMaximum, + bool notifyOnAdd = true) : + IntervalModel<RegionRec>(sampleRate, resolution, + valueMinimum, valueMaximum, + notifyOnAdd), + m_valueQuantization(0) + { + } + + virtual ~RegionModel() + { + } + + float getValueQuantization() const { return m_valueQuantization; } + void setValueQuantization(float q) { m_valueQuantization = q; } + + QString getTypeName() const { return tr("Region"); } + + virtual void toXml(QTextStream &out, + QString indent = "", + QString extraAttributes = "") const + { + std::cerr << "RegionModel::toXml: extraAttributes = \"" + << extraAttributes.toStdString() << std::endl; + + IntervalModel<RegionRec>::toXml + (out, + indent, + QString("%1 valueQuantization=\"%2\"") + .arg(extraAttributes).arg(m_valueQuantization)); + } + + /** + * TabularModel methods. + */ + + virtual int getColumnCount() const + { + return 6; + } + + virtual QString getHeading(int column) const + { + switch (column) { + case 0: return tr("Time"); + case 1: return tr("Frame"); + case 2: return tr("Value"); + case 3: return tr("Duration"); + case 4: return tr("Label"); + default: return tr("Unknown"); + } + } + + virtual QVariant getData(int row, int column, int role) const + { + if (column < 4) { + return IntervalModel<RegionRec>::getData(row, column, role); + } + + PointListIterator i = getPointListIteratorForRow(row); + if (i == m_points.end()) return QVariant(); + + switch (column) { + case 4: return i->label; + default: return QVariant(); + } + } + + virtual Command *getSetDataCommand(int row, int column, const QVariant &value, int role) + { + if (column < 4) { + return IntervalModel<RegionRec>::getSetDataCommand + (row, column, value, role); + } + + if (role != Qt::EditRole) return false; + PointListIterator i = getPointListIteratorForRow(row); + if (i == m_points.end()) return false; + EditCommand *command = new EditCommand(this, tr("Edit Data")); + + Point point(*i); + command->deletePoint(point); + + switch (column) { + case 4: point.label = value.toString(); break; + } + + command->addPoint(point); + return command->finish(); + } + + virtual SortType getSortType(int column) const + { + if (column == 5) return SortAlphabetical; + return SortNumeric; + } + +protected: + float m_valueQuantization; +}; + +#endif
--- a/transform/FeatureExtractionModelTransformer.cpp Thu Sep 18 12:33:30 2008 +0000 +++ b/transform/FeatureExtractionModelTransformer.cpp Thu Sep 18 16:08:14 2008 +0000 @@ -27,6 +27,7 @@ #include "data/model/EditableDenseThreeDimensionalModel.h" #include "data/model/DenseTimeValueModel.h" #include "data/model/NoteModel.h" +#include "data/model/RegionModel.h" #include "data/model/FFTModel.h" #include "data/model/WaveFileModel.h" @@ -154,8 +155,7 @@ if (m_transform.getOutput() == "" || outputs[i].identifier == m_transform.getOutput().toStdString()) { m_outputFeatureNo = i; - m_descriptor = new Vamp::Plugin::OutputDescriptor - (outputs[i]); + m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]); break; } } @@ -207,12 +207,92 @@ break; } - if (binCount == 0) { + bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2); + + if (binCount == 0 && + (preDurationPlugin || !m_descriptor->hasDuration)) { m_output = new SparseOneDimensionalModel(modelRate, modelResolution, false); - } else if (binCount == 1) { + } else if ((preDurationPlugin && binCount > 1 && + (m_descriptor->sampleType == + Vamp::Plugin::OutputDescriptor::VariableSampleRate)) || + (!preDurationPlugin && m_descriptor->hasDuration)) { + + // For plugins using the old v1 API without explicit duration, + // we treat anything that has multiple bins (i.e. that has the + // potential to have value and duration) and a variable sample + // rate as a note model, taking its values as pitch, duration + // and velocity (if present) respectively. This is the same + // behaviour as always applied by SV to these plugins in the + // past. + + // For plugins with the newer API, we treat anything with + // duration as either a note model with pitch and velocity, or + // a region model. + + // How do we know whether it's an interval or note model? + // What's the essential difference? Is a note model any + // interval model using a Hz or "MIDI pitch" scale? There + // isn't really a reliable test for "MIDI pitch"... Does a + // note model always have velocity? This is a good question + // to be addressed by accompanying RDF, but for the moment we + // will do the following... + + bool isNoteModel = false; + + // Regions have only value (and duration -- we can't extract a + // region model from an old-style plugin that doesn't support + // duration) + if (binCount > 1) isNoteModel = true; + + // Regions do not have units of Hz (a sweeping assumption!) + if (m_descriptor->unit == "Hz") isNoteModel = true; + + // If we had a "sparse 3D model", we would have the additional + // problem of determining whether to use that here (if bin + // count > 1). But we don't. + + if (isNoteModel) { + + NoteModel *model; + if (haveExtents) { + model = new NoteModel + (modelRate, modelResolution, minValue, maxValue, false); + } else { + model = new NoteModel + (modelRate, modelResolution, false); + } + model->setScaleUnits(m_descriptor->unit.c_str()); + m_output = model; + + } else { + + RegionModel *model; + if (haveExtents) { + model = new RegionModel + (modelRate, modelResolution, minValue, maxValue, false); + } else { + model = new RegionModel + (modelRate, modelResolution, false); + } + model->setScaleUnits(m_descriptor->unit.c_str()); + m_output = model; + } + + } else if (binCount == 1 || + (m_descriptor->sampleType == + Vamp::Plugin::OutputDescriptor::VariableSampleRate)) { + + // Anything that is not a 1D, note, or interval model and that + // has only one value per result must be a sparse time value + // model. + + // Anything that is not a 1D, note, or interval model and that + // has a variable sample rate is also treated as a sparse time + // value model regardless of its bin count, because we lack a + // sparse 3D model. SparseTimeValueModel *model; if (haveExtents) { @@ -226,30 +306,11 @@ m_output = model; - } else if (m_descriptor->sampleType == - Vamp::Plugin::OutputDescriptor::VariableSampleRate) { + } else { - // We don't have a sparse 3D model, so interpret this as a - // note model. There's nothing to define which values to use - // as which parameters of the note -- for the moment let's - // treat the first as pitch, second as duration in frames, - // third (if present) as velocity. (Our note model doesn't - // yet store velocity.) - //!!! todo: ask the user! - - NoteModel *model; - if (haveExtents) { - model = new NoteModel - (modelRate, modelResolution, minValue, maxValue, false); - } else { - model = new NoteModel - (modelRate, modelResolution, false); - } - model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); - - m_output = model; - - } else { + // Anything that is not a 1D, note, or interval model and that + // has a fixed sample rate and more than one value per result + // must be a dense 3D model. EditableDenseThreeDimensionalModel *model = new EditableDenseThreeDimensionalModel @@ -541,15 +602,21 @@ } } - if (binCount == 0) { + // Rather than repeat the complicated tests from the constructor + // to determine what sort of model we must be adding the features + // to, we instead test what sort of model the constructor decided + // to create. - SparseOneDimensionalModel *model = + if (isOutput<SparseOneDimensionalModel>()) { + + SparseOneDimensionalModel *model = getConformingOutput<SparseOneDimensionalModel>(); if (!model) return; - model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); + model->addPoint(SparseOneDimensionalModel::Point + (frame, feature.label.c_str())); - } else if (binCount == 1) { + } else if (isOutput<SparseTimeValueModel>()) { float value = 0.0; if (feature.values.size() > 0) value = feature.values[0]; @@ -558,32 +625,52 @@ getConformingOutput<SparseTimeValueModel>(); if (!model) return; - model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); -// std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl; + model->addPoint(SparseTimeValueModel::Point + (frame, value, feature.label.c_str())); - } else if (m_descriptor->sampleType == - Vamp::Plugin::OutputDescriptor::VariableSampleRate) { + } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) { - float pitch = 0.0; - if (feature.values.size() > 0) pitch = feature.values[0]; + int index = 0; + + float value = 0.0; + if (feature.values.size() > index) { + value = feature.values[index++]; + } float duration = 1; - if (feature.values.size() > 1) duration = feature.values[1]; + if (feature.hasDuration) { + duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate); + } else { + if (feature.values.size() > index) { + duration = feature.values[index++]; + } + } - float velocity = 100; - if (feature.values.size() > 2) velocity = feature.values[2]; - if (velocity < 0) velocity = 127; - if (velocity > 127) velocity = 127; + if (isOutput<NoteModel>()) { - NoteModel *model = getConformingOutput<NoteModel>(); - if (!model) return; + float velocity = 100; + if (feature.values.size() > index) { + velocity = feature.values[index++]; + } + if (velocity < 0) velocity = 127; + if (velocity > 127) velocity = 127; - model->addPoint(NoteModel::Point(frame, pitch, - lrintf(duration), - velocity / 127.f, - feature.label.c_str())); + NoteModel *model = getConformingOutput<NoteModel>(); + if (!model) return; + model->addPoint(NoteModel::Point(frame, value, // value is pitch + lrintf(duration), + velocity / 127.f, + feature.label.c_str())); + } else { + RegionModel *model = getConformingOutput<RegionModel>(); + if (model) { + model->addPoint(RegionModel::Point(frame, value, + lrintf(duration), + feature.label.c_str())); + } else return; + } - } else { + } else if (isOutput<EditableDenseThreeDimensionalModel>()) { DenseThreeDimensionalModel::Column values = feature.values; @@ -592,6 +679,9 @@ if (!model) return; model->setColumn(frame / model->getResolution(), values); + + } else { + std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl; } } @@ -606,29 +696,33 @@ // std::cerr << "FeatureExtractionModelTransformer::setCompletion(" // << completion << ")" << std::endl; - if (binCount == 0) { + if (isOutput<SparseOneDimensionalModel>()) { SparseOneDimensionalModel *model = getConformingOutput<SparseOneDimensionalModel>(); if (!model) return; - model->setCompletion(completion, true); //!!!m_context.updates); + model->setCompletion(completion, true); - } else if (binCount == 1) { + } else if (isOutput<SparseTimeValueModel>()) { SparseTimeValueModel *model = getConformingOutput<SparseTimeValueModel>(); if (!model) return; - model->setCompletion(completion, true); //!!!m_context.updates); + model->setCompletion(completion, true); - } else if (m_descriptor->sampleType == - Vamp::Plugin::OutputDescriptor::VariableSampleRate) { + } else if (isOutput<NoteModel>()) { - NoteModel *model = - getConformingOutput<NoteModel>(); + NoteModel *model = getConformingOutput<NoteModel>(); if (!model) return; - model->setCompletion(completion, true); //!!!m_context.updates); + model->setCompletion(completion, true); - } else { + } else if (isOutput<RegionModel>()) { + + RegionModel *model = getConformingOutput<RegionModel>(); + if (!model) return; + model->setCompletion(completion, true); + + } else if (isOutput<EditableDenseThreeDimensionalModel>()) { EditableDenseThreeDimensionalModel *model = getConformingOutput<EditableDenseThreeDimensionalModel>();
--- a/transform/FeatureExtractionModelTransformer.h Thu Sep 18 12:33:30 2008 +0000 +++ b/transform/FeatureExtractionModelTransformer.h Thu Sep 18 16:08:14 2008 +0000 @@ -51,7 +51,13 @@ float **buffer); // just casts + DenseTimeValueModel *getConformingInput(); + + template <typename ModelClass> bool isOutput() { + return dynamic_cast<ModelClass *>(m_output) != 0; + } + template <typename ModelClass> ModelClass *getConformingOutput() { ModelClass *mc = dynamic_cast<ModelClass *>(m_output); if (!mc) {