diff rdf/RDFImporter.cpp @ 439:beb2948baa77

* Merge revisions 1041 to 1130 from sv-rdf-import branch
author Chris Cannam
date Thu, 18 Sep 2008 12:09:32 +0000
parents
children 5746c559af15
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rdf/RDFImporter.cpp	Thu Sep 18 12:09:32 2008 +0000
@@ -0,0 +1,435 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    This file copyright 2008 QMUL.
+   
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#include "RDFImporter.h"
+
+#include <map>
+#include <vector>
+
+#include <iostream>
+#include <cmath>
+
+#include "SimpleSPARQLQuery.h"
+
+#include "base/ProgressReporter.h"
+#include "base/RealTime.h"
+
+#include "data/model/SparseOneDimensionalModel.h"
+#include "data/model/SparseTimeValueModel.h"
+#include "data/model/EditableDenseThreeDimensionalModel.h"
+
+using std::cerr;
+using std::endl;
+
+class RDFImporterImpl
+{
+public:
+    RDFImporterImpl(QString url, int sampleRate);
+    virtual ~RDFImporterImpl();
+    
+    bool isOK();
+    QString getErrorString() const;
+
+    std::vector<Model *> getDataModels(ProgressReporter *);
+
+protected:
+    QString m_uristring;
+    QString m_errorString;
+    int m_sampleRate;
+
+    typedef std::vector<float> ValueList;
+    typedef std::map<RealTime, ValueList> TimeValueMap;
+    typedef std::map<QString, TimeValueMap> TypeTimeValueMap;
+    typedef std::map<QString, TypeTimeValueMap> SourceTypeTimeValueMap;
+
+    void extractStructure(const TimeValueMap &map, bool &sparse,
+                          int &minValueCount, int &maxValueCount);
+
+    void fillModel(SparseOneDimensionalModel *, const TimeValueMap &);
+    void fillModel(SparseTimeValueModel *, const TimeValueMap &);
+    void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &);
+};
+
+
+QString
+RDFImporter::getKnownExtensions()
+{
+    return "*.rdf *.n3 *.ttl";
+}
+
+RDFImporter::RDFImporter(QString url, int sampleRate) :
+    m_d(new RDFImporterImpl(url, sampleRate)) 
+{
+}
+
+RDFImporter::~RDFImporter()
+{
+    delete m_d;
+}
+
+bool
+RDFImporter::isOK()
+{
+    return m_d->isOK();
+}
+
+QString
+RDFImporter::getErrorString() const
+{
+    return m_d->getErrorString();
+}
+
+std::vector<Model *>
+RDFImporter::getDataModels(ProgressReporter *r)
+{
+    return m_d->getDataModels(r);
+}
+
+RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) :
+    m_uristring(uri),
+    m_sampleRate(sampleRate)
+{
+}
+
+RDFImporterImpl::~RDFImporterImpl()
+{
+}
+
+bool
+RDFImporterImpl::isOK()
+{
+    return (m_errorString == "");
+}
+
+QString
+RDFImporterImpl::getErrorString() const
+{
+    return m_errorString;
+}
+
+std::vector<Model *>
+RDFImporterImpl::getDataModels(ProgressReporter *reporter)
+{
+    std::vector<Model *> models;
+
+    // Our query is intended to retrieve every thing that has a time,
+    // and every feature type and value associated with a thing that
+    // has a time.
+
+    // We will then need to refine this big bag of results into a set
+    // of data models.
+
+    // Results that have different source signals should go into
+    // different models.
+
+    // Results that have different feature types should go into
+    // different models.
+
+    // Results that are sparse should go into different models from
+    // those that are dense (we need to examine the timestamps to
+    // establish this -- if the timestamps are regular, the results
+    // are dense -- so we can't do it as we go along, only after
+    // collecting all results).
+
+    // Timed things that have features associated with them should not
+    // appear directly in any model -- their features should appear
+    // instead -- and these should be different models from those used
+    // for timed things that do not have features.
+
+    // As we load the results, we'll push them into a partially
+    // structured container that maps from source signal (URI as
+    // string) -> feature type (likewise) -> time -> list of values.
+    // If the source signal or feature type is unavailable, the empty
+    // string will do.
+
+    SourceTypeTimeValueMap m;
+
+    QString queryString = QString(
+
+        " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
+        " PREFIX time: <http://purl.org/NET/c4dm/timeline.owl#>"
+        " PREFIX mo: <http://purl.org/ontology/mo/>"
+        " PREFIX af: <http://purl.org/ontology/af/>"
+
+        " SELECT ?signalSource ?time ?eventType ?value"
+        " FROM <%1>"
+
+        " WHERE {"
+        "   ?signal mo:available_as ?signalSource ."
+        "   ?signal mo:time ?interval ."
+        "   ?interval time:onTimeLine ?tl ."
+        "   ?t time:onTimeLine ?tl ."
+        "   ?t time:at ?time ."
+        "   ?timedThing event:time ?t ."
+        "   ?timedThing a ?eventType ."
+        "   OPTIONAL {"
+        "     ?timedThing af:hasFeature ?feature ."
+        "     ?feature af:value ?value"
+        "   }"
+        " }"
+
+        ).arg(m_uristring);
+
+    SimpleSPARQLQuery query(queryString);
+    query.setProgressReporter(reporter);
+
+    cerr << "Query will be: " << queryString.toStdString() << endl;
+
+    SimpleSPARQLQuery::ResultList results = query.execute();
+
+    if (!query.isOK()) {
+        m_errorString = query.getErrorString();
+        return models;
+    }
+
+    if (query.wasCancelled()) {
+        m_errorString = "Query cancelled";
+        return models;
+    }        
+
+    for (int i = 0; i < results.size(); ++i) {
+
+        QString source = results[i]["signalSource"].value;
+
+        QString timestring = results[i]["time"].value;
+        RealTime time;
+        time = RealTime::fromXsdDuration(timestring.toStdString());
+        cerr << "time = " << time.toString() << " (from xsd:duration \""
+             << timestring.toStdString() << "\")" << endl;
+
+        QString type = results[i]["eventType"].value;
+
+        QString valuestring = results[i]["value"].value;
+        float value = 0.f;
+        bool haveValue = false;
+        if (valuestring != "") {
+            value = valuestring.toFloat(&haveValue);
+            cerr << "value = " << value << endl;
+        }
+
+        if (haveValue) {
+            m[source][type][time].push_back(value);
+        } else if (m[source][type].find(time) == m[source][type].end()) {
+            m[source][type][time] = ValueList();
+        }
+    }
+
+    for (SourceTypeTimeValueMap::const_iterator mi = m.begin();
+         mi != m.end(); ++mi) {
+        
+        QString source = mi->first;
+
+        for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin();
+             ttvi != mi->second.end(); ++ttvi) {
+            
+            QString type = ttvi->first;
+
+            // Now we need to work out what sort of model to use for
+            // this source/type combination.  Ultimately we'll
+            // hopefully be able to map directly from the type to the
+            // model on the basis of known structures for the types,
+            // but we also want to be able to handle untyped data
+            // according to its apparent structure so let's do that
+            // first.
+
+            bool sparse = false;
+            int minValueCount = 0, maxValueCount = 0;
+
+            extractStructure(ttvi->second, sparse, minValueCount, maxValueCount);
+    
+            cerr << "For source \"" << source.toStdString() << "\", type \""
+                 << type.toStdString() << "\" we have sparse = " << sparse
+                 << ", min value count = " << minValueCount << ", max = "
+                 << maxValueCount << endl;
+
+            // Model allocations:
+            //
+            // Sparse, no values: SparseOneDimensionalModel
+            //
+            // Sparse, always 1 value: SparseTimeValueModel
+            //
+            // Sparse, > 1 value: No standard model for this.  If
+            // there are always 2 values, perhaps hack it into
+            // NoteModel for now?  Or always use SparseTimeValueModel
+            // and discard all but the first value.
+            //
+            // Dense, no values: Meaningless; no suitable model
+            //
+            // Dense, > 0 values: EditableDenseThreeDimensionalModel
+            //
+            // These should just be our fallback positions; we want to
+            // be reading semantic data from the RDF in order to pick
+            // the right model directly
+
+            enum { SODM, STVM, EDTDM } modelType = SODM;
+
+            if (sparse) {
+                if (maxValueCount == 0) {
+                    modelType = SODM;
+                } else if (minValueCount == 1 && maxValueCount == 1) {
+                    modelType = STVM;
+                } else {
+                    cerr << "WARNING: No suitable model available for sparse data with between " << minValueCount << " and " << maxValueCount << " values" << endl;
+                    modelType = STVM;
+                }
+            } else {
+                if (maxValueCount == 0) {
+                    cerr << "WARNING: Dense data set with no values is not meaningful, skipping" << endl;
+                    continue;
+                } else {
+                    modelType = EDTDM;
+                }
+            }
+
+            //!!! set model name &c
+
+            if (modelType == SODM) {
+
+                SparseOneDimensionalModel *model = 
+                    new SparseOneDimensionalModel(m_sampleRate, 1, false);
+                
+                fillModel(model, ttvi->second);
+                models.push_back(model);
+
+            } else if (modelType == STVM) {
+
+                SparseTimeValueModel *model = 
+                    new SparseTimeValueModel(m_sampleRate, 1, false);
+                
+                fillModel(model, ttvi->second);
+                models.push_back(model);
+
+            } else {
+                
+                EditableDenseThreeDimensionalModel *model =
+                    new EditableDenseThreeDimensionalModel(m_sampleRate, 1, 0,
+                                                           false);
+
+                fillModel(model, ttvi->second);
+                models.push_back(model);
+            }
+        }
+    }
+
+
+    return models;
+}
+
+void
+RDFImporterImpl::extractStructure(const TimeValueMap &tvm,
+                                  bool &sparse,
+                                  int &minValueCount,
+                                  int &maxValueCount)
+{
+    // These are floats intentionally rather than RealTime --
+    // see logic for handling rounding error below
+    float firstTime = 0.f;
+    float timeStep = 0.f;
+    bool haveTimeStep = false;
+    
+    for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) {
+        
+        RealTime time = tvi->first;
+        int valueCount = tvi->second.size();
+        
+        if (tvi == tvm.begin()) {
+            
+            minValueCount = valueCount;
+            maxValueCount = valueCount;
+            
+            firstTime = time.toDouble();
+            
+        } else {
+            
+            if (valueCount < minValueCount) minValueCount = valueCount;
+            if (valueCount > maxValueCount) maxValueCount = valueCount;
+            
+            if (!haveTimeStep) {
+                timeStep = time.toDouble() - firstTime;
+                if (timeStep == 0.f) sparse = true;
+                haveTimeStep = true;
+            } else if (!sparse) {
+                // test whether this time is within
+                // rounding-error range of being an integer
+                // multiple of some constant away from the
+                // first time
+                float timeAsFloat = time.toDouble();
+                int count = int((timeAsFloat - firstTime) / timeStep + 0.5);
+                float expected = firstTime + (timeStep * count);
+                if (fabsf(expected - timeAsFloat) > 1e-6) {
+                    cerr << "Event at " << timeAsFloat << " is not evenly spaced -- would expect it to be " << expected << " for a spacing of " << count << " * " << timeStep << endl;
+                    sparse = true;
+                }
+            }
+        }
+    }
+}
+
+void
+RDFImporterImpl::fillModel(SparseOneDimensionalModel *model,
+                           const TimeValueMap &tvm)
+{
+    //!!! labels &c not yet handled
+
+    for (TimeValueMap::const_iterator tvi = tvm.begin();
+         tvi != tvm.end(); ++tvi) {
+        
+        RealTime time = tvi->first;
+        long frame = RealTime::realTime2Frame(time, m_sampleRate);
+
+        SparseOneDimensionalModel::Point point(frame);
+
+        model->addPoint(point);
+    }
+}
+
+void
+RDFImporterImpl::fillModel(SparseTimeValueModel *model,
+                           const TimeValueMap &tvm)
+{
+    //!!! labels &c not yet handled
+
+    for (TimeValueMap::const_iterator tvi = tvm.begin();
+         tvi != tvm.end(); ++tvi) {
+        
+        RealTime time = tvi->first;
+        long frame = RealTime::realTime2Frame(time, m_sampleRate);
+
+        float value = 0.f;
+        if (!tvi->second.empty()) value = *tvi->second.begin();
+        
+        SparseTimeValueModel::Point point(frame, value, "");
+
+        model->addPoint(point);
+    }
+}
+
+void
+RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model,
+                           const TimeValueMap &tvm)
+{
+    //!!! labels &c not yet handled
+
+    //!!! start time offset not yet handled
+
+    size_t col = 0;
+
+    for (TimeValueMap::const_iterator tvi = tvm.begin();
+         tvi != tvm.end(); ++tvi) {
+        
+        model->setColumn(col++, tvi->second);
+    }
+}
+