changeset 441:288f45533041

* Add region model and layer; improve assignment of model types to feature extraction transforms with duration
author Chris Cannam
date Thu, 18 Sep 2008 16:08:14 +0000 (2008-09-18)
parents 5746c559af15
children 04b7fd31e1c6
files data/data.pro data/fileio/QuickTimeFileReader.cpp data/model/NoteModel.h data/model/RegionModel.h transform/FeatureExtractionModelTransformer.cpp transform/FeatureExtractionModelTransformer.h
diffstat 6 files changed, 375 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/data/data.pro	Thu Sep 18 12:33:30 2008 +0000
+++ b/data/data.pro	Thu Sep 18 16:08:14 2008 +0000
@@ -59,6 +59,7 @@
            model/PowerOfSqrtTwoZoomConstraint.h \
            model/PowerOfTwoZoomConstraint.h \
            model/RangeSummarisableTimeValueModel.h \
+           model/RegionModel.h \
            model/SparseModel.h \
            model/SparseOneDimensionalModel.h \
            model/SparseTimeValueModel.h \
--- a/data/fileio/QuickTimeFileReader.cpp	Thu Sep 18 12:33:30 2008 +0000
+++ b/data/fileio/QuickTimeFileReader.cpp	Thu Sep 18 16:08:14 2008 +0000
@@ -203,6 +203,7 @@
 
     if (m_d->err) {
         m_error = QString("Error in QuickTime decoder property set: code %1").arg(m_d->err);
+        m_channelCount = 0;
         return;
     }
     m_d->buffer.mNumberBuffers = 1;
--- a/data/model/NoteModel.h	Thu Sep 18 12:33:30 2008 +0000
+++ b/data/model/NoteModel.h	Thu Sep 18 16:08:14 2008 +0000
@@ -21,12 +21,16 @@
 #include "base/PlayParameterRepository.h"
 
 /**
- * Note type for use in a SparseModel or SparseValueModel.  All we
- * mean by a "note" is something that has an onset time, a single
- * value, and a duration.  Like other points, it can also have a
- * label.  With this point type, the model can be thought of as
- * representing a simple MIDI-type piano roll, except that the y
- * coordinates (values) do not have to be discrete integers.
+ * NoteModel -- a concrete IntervalModel for notes.
+ */
+
+/**
+ * Note type for use in a sparse model.  All we mean by a "note" is
+ * something that has an onset time, a single value, a duration, and a
+ * level.  Like other points, it can also have a label.  With this
+ * point type, the model can be thought of as representing a simple
+ * MIDI-type piano roll, except that the y coordinates (values) do not
+ * have to be discrete integers.
  */
 
 struct Note
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/model/RegionModel.h	Thu Sep 18 16:08:14 2008 +0000
@@ -0,0 +1,204 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    This file copyright 2006 Chris Cannam.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#ifndef _REGION_MODEL_H_
+#define _REGION_MODEL_H_
+
+#include "IntervalModel.h"
+#include "base/RealTime.h"
+
+/**
+ * RegionModel -- a concrete IntervalModel for intervals associated
+ * with a value, which we call regions for no very compelling reason.
+ */
+
+/**
+ * Region "point" type.  A region is something that has an onset time,
+ * a single value, and a duration.  Like other points, it can also
+ * have a label.
+ *
+ * This is called RegionRec instead of Region to avoid name collisions
+ * with the X11 Region struct.  Bah.
+ */
+
+struct RegionRec
+{
+public:
+    RegionRec(long _frame) : frame(_frame), value(0.0f), duration(0) { }
+    RegionRec(long _frame, float _value, size_t _duration, QString _label) :
+	frame(_frame), value(_value), duration(_duration), label(_label) { }
+
+    int getDimensions() const { return 3; }
+
+    long frame;
+    float value;
+    size_t duration;
+    QString label;
+
+    QString getLabel() const { return label; }
+    
+    void toXml(QTextStream &stream,
+               QString indent = "",
+               QString extraAttributes = "") const
+    {
+	stream <<
+            QString("%1<point frame=\"%2\" value=\"%3\" duration=\"%4\" label=\"%5\" %6/>\n")
+	    .arg(indent).arg(frame).arg(value).arg(duration).arg(label).arg(extraAttributes);
+    }
+
+    QString toDelimitedDataString(QString delimiter, size_t sampleRate) const
+    {
+        QStringList list;
+        list << RealTime::frame2RealTime(frame, sampleRate).toString().c_str();
+        list << QString("%1").arg(value);
+        list << RealTime::frame2RealTime(duration, sampleRate).toString().c_str();
+        if (label != "") list << label;
+        return list.join(delimiter);
+    }
+
+    struct Comparator {
+	bool operator()(const RegionRec &p1,
+			const RegionRec &p2) const {
+	    if (p1.frame != p2.frame) return p1.frame < p2.frame;
+	    if (p1.value != p2.value) return p1.value < p2.value;
+	    if (p1.duration != p2.duration) return p1.duration < p2.duration;
+	    return p1.label < p2.label;
+	}
+    };
+    
+    struct OrderComparator {
+	bool operator()(const RegionRec &p1,
+			const RegionRec &p2) const {
+	    return p1.frame < p2.frame;
+	}
+    };
+};
+
+
+class RegionModel : public IntervalModel<RegionRec>
+{
+    Q_OBJECT
+    
+public:
+    RegionModel(size_t sampleRate, size_t resolution,
+                bool notifyOnAdd = true) :
+	IntervalModel<RegionRec>(sampleRate, resolution, notifyOnAdd),
+	m_valueQuantization(0)
+    {
+    }
+
+    RegionModel(size_t sampleRate, size_t resolution,
+	      float valueMinimum, float valueMaximum,
+	      bool notifyOnAdd = true) :
+	IntervalModel<RegionRec>(sampleRate, resolution,
+                            valueMinimum, valueMaximum,
+                            notifyOnAdd),
+	m_valueQuantization(0)
+    {
+    }
+
+    virtual ~RegionModel()
+    {
+    }
+
+    float getValueQuantization() const { return m_valueQuantization; }
+    void setValueQuantization(float q) { m_valueQuantization = q; }
+
+    QString getTypeName() const { return tr("Region"); }
+
+    virtual void toXml(QTextStream &out,
+                       QString indent = "",
+                       QString extraAttributes = "") const
+    {
+        std::cerr << "RegionModel::toXml: extraAttributes = \"" 
+                  << extraAttributes.toStdString() << std::endl;
+
+        IntervalModel<RegionRec>::toXml
+	    (out,
+             indent,
+	     QString("%1 valueQuantization=\"%2\"")
+	     .arg(extraAttributes).arg(m_valueQuantization));
+    }
+
+    /**
+     * TabularModel methods.  
+     */
+    
+    virtual int getColumnCount() const
+    {
+        return 6;
+    }
+
+    virtual QString getHeading(int column) const
+    {
+        switch (column) {
+        case 0: return tr("Time");
+        case 1: return tr("Frame");
+        case 2: return tr("Value");
+        case 3: return tr("Duration");
+        case 4: return tr("Label");
+        default: return tr("Unknown");
+        }
+    }
+
+    virtual QVariant getData(int row, int column, int role) const
+    {
+        if (column < 4) {
+            return IntervalModel<RegionRec>::getData(row, column, role);
+        }
+
+        PointListIterator i = getPointListIteratorForRow(row);
+        if (i == m_points.end()) return QVariant();
+
+        switch (column) {
+        case 4: return i->label;
+        default: return QVariant();
+        }
+    }
+
+    virtual Command *getSetDataCommand(int row, int column, const QVariant &value, int role)
+    {
+        if (column < 4) {
+            return IntervalModel<RegionRec>::getSetDataCommand
+                (row, column, value, role);
+        }
+
+        if (role != Qt::EditRole) return false;
+        PointListIterator i = getPointListIteratorForRow(row);
+        if (i == m_points.end()) return false;
+        EditCommand *command = new EditCommand(this, tr("Edit Data"));
+
+        Point point(*i);
+        command->deletePoint(point);
+
+        switch (column) {
+        case 4: point.label = value.toString(); break;
+        }
+
+        command->addPoint(point);
+        return command->finish();
+    }
+
+    virtual SortType getSortType(int column) const
+    {
+        if (column == 5) return SortAlphabetical;
+        return SortNumeric;
+    }
+
+protected:
+    float m_valueQuantization;
+};
+
+#endif
--- a/transform/FeatureExtractionModelTransformer.cpp	Thu Sep 18 12:33:30 2008 +0000
+++ b/transform/FeatureExtractionModelTransformer.cpp	Thu Sep 18 16:08:14 2008 +0000
@@ -27,6 +27,7 @@
 #include "data/model/EditableDenseThreeDimensionalModel.h"
 #include "data/model/DenseTimeValueModel.h"
 #include "data/model/NoteModel.h"
+#include "data/model/RegionModel.h"
 #include "data/model/FFTModel.h"
 #include "data/model/WaveFileModel.h"
 
@@ -154,8 +155,7 @@
 	if (m_transform.getOutput() == "" ||
             outputs[i].identifier == m_transform.getOutput().toStdString()) {
 	    m_outputFeatureNo = i;
-	    m_descriptor = new Vamp::Plugin::OutputDescriptor
-		(outputs[i]);
+	    m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]);
 	    break;
 	}
     }
@@ -207,12 +207,92 @@
 	break;
     }
 
-    if (binCount == 0) {
+    bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
+
+    if (binCount == 0 &&
+        (preDurationPlugin || !m_descriptor->hasDuration)) {
 
 	m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
 						 false);
 
-    } else if (binCount == 1) {
+    } else if ((preDurationPlugin && binCount > 1 &&
+                (m_descriptor->sampleType ==
+                 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
+               (!preDurationPlugin && m_descriptor->hasDuration)) {
+
+        // For plugins using the old v1 API without explicit duration,
+        // we treat anything that has multiple bins (i.e. that has the
+        // potential to have value and duration) and a variable sample
+        // rate as a note model, taking its values as pitch, duration
+        // and velocity (if present) respectively.  This is the same
+        // behaviour as always applied by SV to these plugins in the
+        // past.
+
+        // For plugins with the newer API, we treat anything with
+        // duration as either a note model with pitch and velocity, or
+        // a region model.
+
+        // How do we know whether it's an interval or note model?
+        // What's the essential difference?  Is a note model any
+        // interval model using a Hz or "MIDI pitch" scale?  There
+        // isn't really a reliable test for "MIDI pitch"...  Does a
+        // note model always have velocity?  This is a good question
+        // to be addressed by accompanying RDF, but for the moment we
+        // will do the following...
+
+        bool isNoteModel = false;
+        
+        // Regions have only value (and duration -- we can't extract a
+        // region model from an old-style plugin that doesn't support
+        // duration)
+        if (binCount > 1) isNoteModel = true;
+
+        // Regions do not have units of Hz (a sweeping assumption!)
+        if (m_descriptor->unit == "Hz") isNoteModel = true;
+
+        // If we had a "sparse 3D model", we would have the additional
+        // problem of determining whether to use that here (if bin
+        // count > 1).  But we don't.
+
+        if (isNoteModel) {
+
+            NoteModel *model;
+            if (haveExtents) {
+                model = new NoteModel
+                    (modelRate, modelResolution, minValue, maxValue, false);
+            } else {
+                model = new NoteModel
+                    (modelRate, modelResolution, false);
+            }
+            model->setScaleUnits(m_descriptor->unit.c_str());
+            m_output = model;
+
+        } else {
+
+            RegionModel *model;
+            if (haveExtents) {
+                model = new RegionModel
+                    (modelRate, modelResolution, minValue, maxValue, false);
+            } else {
+                model = new RegionModel
+                    (modelRate, modelResolution, false);
+            }
+            model->setScaleUnits(m_descriptor->unit.c_str());
+            m_output = model;
+        }
+
+    } else if (binCount == 1 ||
+               (m_descriptor->sampleType == 
+                Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
+
+        // Anything that is not a 1D, note, or interval model and that
+        // has only one value per result must be a sparse time value
+        // model.
+
+        // Anything that is not a 1D, note, or interval model and that
+        // has a variable sample rate is also treated as a sparse time
+        // value model regardless of its bin count, because we lack a
+        // sparse 3D model.
 
         SparseTimeValueModel *model;
         if (haveExtents) {
@@ -226,30 +306,11 @@
 
         m_output = model;
 
-    } else if (m_descriptor->sampleType ==
-	       Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
+    } else {
 
-        // We don't have a sparse 3D model, so interpret this as a
-        // note model.  There's nothing to define which values to use
-        // as which parameters of the note -- for the moment let's
-        // treat the first as pitch, second as duration in frames,
-        // third (if present) as velocity. (Our note model doesn't
-        // yet store velocity.)
-        //!!! todo: ask the user!
-	
-        NoteModel *model;
-        if (haveExtents) {
-            model = new NoteModel
-                (modelRate, modelResolution, minValue, maxValue, false);
-        } else {
-            model = new NoteModel
-                (modelRate, modelResolution, false);
-        }            
-        model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
-
-        m_output = model;
-
-    } else {
+        // Anything that is not a 1D, note, or interval model and that
+        // has a fixed sample rate and more than one value per result
+        // must be a dense 3D model.
 
         EditableDenseThreeDimensionalModel *model =
             new EditableDenseThreeDimensionalModel
@@ -541,15 +602,21 @@
 	}
     }
 	
-    if (binCount == 0) {
+    // Rather than repeat the complicated tests from the constructor
+    // to determine what sort of model we must be adding the features
+    // to, we instead test what sort of model the constructor decided
+    // to create.
 
-	SparseOneDimensionalModel *model =
+    if (isOutput<SparseOneDimensionalModel>()) {
+
+        SparseOneDimensionalModel *model =
             getConformingOutput<SparseOneDimensionalModel>();
 	if (!model) return;
 
-	model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
+        model->addPoint(SparseOneDimensionalModel::Point
+                       (frame, feature.label.c_str()));
 	
-    } else if (binCount == 1) {
+    } else if (isOutput<SparseTimeValueModel>()) {
 
 	float value = 0.0;
 	if (feature.values.size() > 0) value = feature.values[0];
@@ -558,32 +625,52 @@
             getConformingOutput<SparseTimeValueModel>();
 	if (!model) return;
 
-	model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
-//        std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl;
+	model->addPoint(SparseTimeValueModel::Point
+                        (frame, value, feature.label.c_str()));
 
-    } else if (m_descriptor->sampleType == 
-	       Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
+    } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) {
 
-        float pitch = 0.0;
-        if (feature.values.size() > 0) pitch = feature.values[0];
+        int index = 0;
+
+        float value = 0.0;
+        if (feature.values.size() > index) {
+            value = feature.values[index++];
+        }
 
         float duration = 1;
-        if (feature.values.size() > 1) duration = feature.values[1];
+        if (feature.hasDuration) {
+            duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
+        } else {
+            if (feature.values.size() > index) {
+                duration = feature.values[index++];
+            }
+        }
         
-        float velocity = 100;
-        if (feature.values.size() > 2) velocity = feature.values[2];
-        if (velocity < 0) velocity = 127;
-        if (velocity > 127) velocity = 127;
+        if (isOutput<NoteModel>()) {
 
-        NoteModel *model = getConformingOutput<NoteModel>();
-        if (!model) return;
+            float velocity = 100;
+            if (feature.values.size() > index) {
+                velocity = feature.values[index++];
+            }
+            if (velocity < 0) velocity = 127;
+            if (velocity > 127) velocity = 127;
 
-        model->addPoint(NoteModel::Point(frame, pitch,
-                                         lrintf(duration),
-                                         velocity / 127.f,
-                                         feature.label.c_str()));
+            NoteModel *model = getConformingOutput<NoteModel>();
+            if (!model) return;
+            model->addPoint(NoteModel::Point(frame, value, // value is pitch
+                                             lrintf(duration),
+                                             velocity / 127.f,
+                                             feature.label.c_str()));
+        } else {
+            RegionModel *model = getConformingOutput<RegionModel>();
+            if (model) {
+                model->addPoint(RegionModel::Point(frame, value,
+                                                   lrintf(duration),
+                                                   feature.label.c_str()));
+            } else return;
+        }
 	
-    } else {
+    } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
 	
 	DenseThreeDimensionalModel::Column values = feature.values;
 	
@@ -592,6 +679,9 @@
 	if (!model) return;
 
 	model->setColumn(frame / model->getResolution(), values);
+
+    } else {
+        std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl;
     }
 }
 
@@ -606,29 +696,33 @@
 //    std::cerr << "FeatureExtractionModelTransformer::setCompletion("
 //              << completion << ")" << std::endl;
 
-    if (binCount == 0) {
+    if (isOutput<SparseOneDimensionalModel>()) {
 
 	SparseOneDimensionalModel *model =
             getConformingOutput<SparseOneDimensionalModel>();
 	if (!model) return;
-	model->setCompletion(completion, true); //!!!m_context.updates);
+	model->setCompletion(completion, true);
 
-    } else if (binCount == 1) {
+    } else if (isOutput<SparseTimeValueModel>()) {
 
 	SparseTimeValueModel *model =
             getConformingOutput<SparseTimeValueModel>();
 	if (!model) return;
-	model->setCompletion(completion, true); //!!!m_context.updates);
+	model->setCompletion(completion, true);
 
-    } else if (m_descriptor->sampleType ==
-	       Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
+    } else if (isOutput<NoteModel>()) {
 
-	NoteModel *model =
-            getConformingOutput<NoteModel>();
+	NoteModel *model = getConformingOutput<NoteModel>();
 	if (!model) return;
-	model->setCompletion(completion, true); //!!!m_context.updates);
+	model->setCompletion(completion, true);
 
-    } else {
+    } else if (isOutput<RegionModel>()) {
+
+	RegionModel *model = getConformingOutput<RegionModel>();
+	if (!model) return;
+	model->setCompletion(completion, true);
+
+    } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
 
 	EditableDenseThreeDimensionalModel *model =
             getConformingOutput<EditableDenseThreeDimensionalModel>();
--- a/transform/FeatureExtractionModelTransformer.h	Thu Sep 18 12:33:30 2008 +0000
+++ b/transform/FeatureExtractionModelTransformer.h	Thu Sep 18 16:08:14 2008 +0000
@@ -51,7 +51,13 @@
                    float **buffer);
 
     // just casts
+
     DenseTimeValueModel *getConformingInput();
+
+    template <typename ModelClass> bool isOutput() {
+        return dynamic_cast<ModelClass *>(m_output) != 0;
+    }
+
     template <typename ModelClass> ModelClass *getConformingOutput() {
 	ModelClass *mc = dynamic_cast<ModelClass *>(m_output);
 	if (!mc) {