view rdf/RDFImporter.cpp @ 440:5746c559af15

* Merge revisions 1131 to 1201 from sv-rdf-import branch
author Chris Cannam
date Thu, 18 Sep 2008 12:33:30 +0000
parents beb2948baa77
children 2fb0061c5d23
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Sonic Visualiser
    An audio file viewer and annotation editor.
    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2008 QMUL.
   
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include "RDFImporter.h"

#include <map>
#include <vector>

#include <iostream>
#include <cmath>

#include "SimpleSPARQLQuery.h"

#include "base/ProgressReporter.h"
#include "base/RealTime.h"

#include "data/model/SparseOneDimensionalModel.h"
#include "data/model/SparseTimeValueModel.h"
#include "data/model/EditableDenseThreeDimensionalModel.h"

using std::cerr;
using std::endl;

class RDFImporterImpl
{
public:
    RDFImporterImpl(QString url, int sampleRate);
    virtual ~RDFImporterImpl();
    
    bool isOK();
    QString getErrorString() const;

    std::vector<Model *> getDataModels(ProgressReporter *);

protected:
    QString m_uristring;
    QString m_errorString;
    int m_sampleRate;

    typedef std::vector<float> ValueList;
    typedef std::map<RealTime, ValueList> TimeValueMap;
    typedef std::map<QString, TimeValueMap> TypeTimeValueMap;
    typedef std::map<QString, TypeTimeValueMap> SourceTypeTimeValueMap;

    void getDataModelsSparse(std::vector<Model *> &, ProgressReporter *);
    void getDataModelsDense(std::vector<Model *> &, ProgressReporter *);

    void getDenseFeatureProperties(QString featureUri,
                                   int &sampleRate, int &windowLength,
                                   int &hopSize, int &width, int &height);

    void extractStructure(const TimeValueMap &map, bool &sparse,
                          int &minValueCount, int &maxValueCount);

    void fillModel(SparseOneDimensionalModel *, const TimeValueMap &);
    void fillModel(SparseTimeValueModel *, const TimeValueMap &);
    void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &);
};


QString
RDFImporter::getKnownExtensions()
{
    return "*.rdf *.n3 *.ttl";
}

RDFImporter::RDFImporter(QString url, int sampleRate) :
    m_d(new RDFImporterImpl(url, sampleRate)) 
{
}

RDFImporter::~RDFImporter()
{
    delete m_d;
}

bool
RDFImporter::isOK()
{
    return m_d->isOK();
}

QString
RDFImporter::getErrorString() const
{
    return m_d->getErrorString();
}

std::vector<Model *>
RDFImporter::getDataModels(ProgressReporter *r)
{
    return m_d->getDataModels(r);
}

RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) :
    m_uristring(uri),
    m_sampleRate(sampleRate)
{
}

RDFImporterImpl::~RDFImporterImpl()
{
}

bool
RDFImporterImpl::isOK()
{
    return (m_errorString == "");
}

QString
RDFImporterImpl::getErrorString() const
{
    return m_errorString;
}

std::vector<Model *>
RDFImporterImpl::getDataModels(ProgressReporter *reporter)
{
    std::vector<Model *> models;

    getDataModelsDense(models, reporter);

    QString error;
    if (!isOK()) error = m_errorString;
    m_errorString = "";

    getDataModelsSparse(models, reporter);

    if (isOK()) m_errorString = error;

    return models;
}

void
RDFImporterImpl::getDataModelsDense(std::vector<Model *> &models,
                                    ProgressReporter *reporter)
{
    SimpleSPARQLQuery query = SimpleSPARQLQuery
        (QString
         (
             " PREFIX mo: <http://purl.org/ontology/mo/>"
             " PREFIX af: <http://purl.org/ontology/af/>"
             
             " SELECT ?feature ?signal_source ?feature_signal_type ?value "
             " FROM <%1> "
             
             " WHERE { "
             
             "   ?signal a mo:Signal ; "
             "           mo:available_as ?signal_source ; "
             "           af:signal_feature ?feature . "
             
             "   ?feature a ?feature_signal_type ; "
             "            af:value ?value . "
    
             " } "
             )
         .arg(m_uristring));

    SimpleSPARQLQuery::ResultList results = query.execute();

    if (!query.isOK()) {
        m_errorString = query.getErrorString();
        return;
    }

    if (query.wasCancelled()) {
        m_errorString = "Query cancelled";
        return;
    }        

    for (int i = 0; i < results.size(); ++i) {

        QString feature = results[i]["feature"].value;
        QString source = results[i]["signal_source"].value;
        QString type = results[i]["feature_signal_type"].value;
        QString value = results[i]["value"].value;

        int sampleRate = 0;
        int windowLength = 0;
        int hopSize = 0;
        int width = 0;
        int height = 0;
        getDenseFeatureProperties
            (feature, sampleRate, windowLength, hopSize, width, height);

        if (sampleRate != 0 && sampleRate != m_sampleRate) {
            cerr << "WARNING: Sample rate in dense feature description does not match our underlying rate -- using rate from feature description" << endl;
        }
        if (sampleRate == 0) sampleRate = m_sampleRate;

        if (hopSize == 0) {
            cerr << "WARNING: Dense feature description does not specify a hop size -- assuming 1" << endl;
            hopSize = 1;
        }

        if (height == 0) {
            cerr << "WARNING: Dense feature description does not specify feature signal dimensions -- assuming one-dimensional (height = 1)" << endl;
            height = 1;
        }

        QStringList values = value.split(' ', QString::SkipEmptyParts);

        if (values.empty()) {
            cerr << "WARNING: Dense feature description does not specify any values!" << endl;
            continue;
        }

        if (height == 1) {

            SparseTimeValueModel *m = new SparseTimeValueModel
                (sampleRate, hopSize, false);

            for (int j = 0; j < values.size(); ++j) {
                float f = values[j].toFloat();
                SparseTimeValueModel::Point point(j * hopSize, f, "");
                m->addPoint(point);
            }
        
            models.push_back(m);

        } else {

            EditableDenseThreeDimensionalModel *m =
                new EditableDenseThreeDimensionalModel(sampleRate, hopSize,
                                                       height, false);
            
            EditableDenseThreeDimensionalModel::Column column;

            int x = 0;

            for (int j = 0; j < values.size(); ++j) {
                if (j % height == 0 && !column.empty()) {
                    m->setColumn(x++, column);
                    column.clear();
                }
                column.push_back(values[j].toFloat());
            }

            if (!column.empty()) {
                m->setColumn(x++, column);
            }

            models.push_back(m);
        }
    }
}

void
RDFImporterImpl::getDenseFeatureProperties(QString featureUri,
                                           int &sampleRate, int &windowLength,
                                           int &hopSize, int &width, int &height)
{
    QString dimensionsQuery 
        (
            " PREFIX mo: <http://purl.org/ontology/mo/>"
            " PREFIX af: <http://purl.org/ontology/af/>"
            
            " SELECT ?dimensions "
            " FROM <%1> "

            " WHERE { "

            "   <%2> af:dimensions ?dimensions . "
            
            " } "
            );

    SimpleSPARQLQuery::Value dimensionsValue =
        SimpleSPARQLQuery::singleResultQuery(dimensionsQuery
                                             .arg(m_uristring).arg(featureUri),
                                             "dimensions");

    cerr << "Dimensions = \"" << dimensionsValue.value.toStdString() << "\""
         << endl;

    if (dimensionsValue.value != "") {
        QStringList dl = dimensionsValue.value.split(" ");
        if (dl.empty()) dl.push_back(dimensionsValue.value);
        if (dl.size() > 0) height = dl[0].toInt();
        if (dl.size() > 1) width = dl[1].toInt();
    }

    QString queryTemplate
        (
            " PREFIX mo: <http://purl.org/ontology/mo/>"
            " PREFIX af: <http://purl.org/ontology/af/>"
            " PREFIX tl: <http://purl.org/NET/c4dm/timeline.owl#>"

            " SELECT ?%3 "
            " FROM <%1> "
            
            " WHERE { "
            
            "   <%2> mo:time ?time . "
            
            "   ?time a tl:Interval ; "
            "         tl:onTimeLine ?timeline . "

            "   ?map tl:rangeTimeLine ?timeline . "

            "   ?map tl:%3 ?%3 . "
            
            " } "
            );

    // Another laborious workaround for rasqal's failure to handle
    // multiple optionals properly

    SimpleSPARQLQuery::Value srValue = 
        SimpleSPARQLQuery::singleResultQuery(queryTemplate
                                             .arg(m_uristring).arg(featureUri)
                                             .arg("sampleRate"),
                                             "sampleRate");
    if (srValue.value != "") {
        sampleRate = srValue.value.toInt();
    }

    SimpleSPARQLQuery::Value hopValue = 
        SimpleSPARQLQuery::singleResultQuery(queryTemplate
                                             .arg(m_uristring).arg(featureUri)
                                             .arg("hopSize"),
                                             "hopSize");
    if (srValue.value != "") {
        hopSize = hopValue.value.toInt();
    }

    SimpleSPARQLQuery::Value winValue = 
        SimpleSPARQLQuery::singleResultQuery(queryTemplate
                                             .arg(m_uristring).arg(featureUri)
                                             .arg("windowLength"),
                                             "windowLength");
    if (winValue.value != "") {
        windowLength = winValue.value.toInt();
    }

    cerr << "sr = " << sampleRate << ", hop = " << hopSize << ", win = " << windowLength << endl;
}

void
RDFImporterImpl::getDataModelsSparse(std::vector<Model *> &models,
                                     ProgressReporter *reporter)
{
    // Our query is intended to retrieve every thing that has a time,
    // and every feature type and value associated with a thing that
    // has a time.

    // We will then need to refine this big bag of results into a set
    // of data models.

    // Results that have different source signals should go into
    // different models.

    // Results that have different feature types should go into
    // different models.

    // Results that are sparse should go into different models from
    // those that are dense (we need to examine the timestamps to
    // establish this -- if the timestamps are regular, the results
    // are dense -- so we can't do it as we go along, only after
    // collecting all results).

    // Timed things that have features associated with them should not
    // appear directly in any model -- their features should appear
    // instead -- and these should be different models from those used
    // for timed things that do not have features.

    // As we load the results, we'll push them into a partially
    // structured container that maps from source signal (URI as
    // string) -> feature type (likewise) -> time -> list of values.
    // If the source signal or feature type is unavailable, the empty
    // string will do.

    SourceTypeTimeValueMap m;

    QString queryString = QString(

        " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
        " PREFIX time: <http://purl.org/NET/c4dm/timeline.owl#>"
        " PREFIX mo: <http://purl.org/ontology/mo/>"
        " PREFIX af: <http://purl.org/ontology/af/>"

        " SELECT ?signal_source ?time ?event_type ?value"
        " FROM <%1>"

        " WHERE {"

        "   ?signal mo:available_as ?signal_source ."
        "   ?signal a mo:Signal ."

        "   ?signal mo:time ?interval ."
        "   ?interval time:onTimeLine ?tl ."
        "   ?t time:onTimeLine ?tl ."
        "   ?t time:at ?time ."
        "   ?timed_thing event:time ?t ."
        "   ?timed_thing a ?event_type ."

        "   OPTIONAL {"
        "     ?timed_thing af:feature ?value"
        "   }"
        " }"

        ).arg(m_uristring);

    SimpleSPARQLQuery query(queryString);
    query.setProgressReporter(reporter);

    cerr << "Query will be: " << queryString.toStdString() << endl;

    SimpleSPARQLQuery::ResultList results = query.execute();

    if (!query.isOK()) {
        m_errorString = query.getErrorString();
        return;
    }

    if (query.wasCancelled()) {
        m_errorString = "Query cancelled";
        return;
    }        

    for (int i = 0; i < results.size(); ++i) {

        QString source = results[i]["signal_source"].value;

        QString timestring = results[i]["time"].value;
        RealTime time;
        time = RealTime::fromXsdDuration(timestring.toStdString());
        cerr << "time = " << time.toString() << " (from xsd:duration \""
             << timestring.toStdString() << "\")" << endl;

        QString type = results[i]["event_type"].value;

        QString valuestring = results[i]["value"].value;
        float value = 0.f;
        bool haveValue = false;
        if (valuestring != "") {
            //!!! no -- runner actually writes a "CSV literal"
            value = valuestring.toFloat(&haveValue);
            cerr << "value = " << value << endl;
        }

        if (haveValue) {
            m[source][type][time].push_back(value);
        } else if (m[source][type].find(time) == m[source][type].end()) {
            m[source][type][time] = ValueList();
        }
    }

    for (SourceTypeTimeValueMap::const_iterator mi = m.begin();
         mi != m.end(); ++mi) {
        
        QString source = mi->first;

        for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin();
             ttvi != mi->second.end(); ++ttvi) {
            
            QString type = ttvi->first;

            // Now we need to work out what sort of model to use for
            // this source/type combination.  Ultimately we'll
            // hopefully be able to map directly from the type to the
            // model on the basis of known structures for the types,
            // but we also want to be able to handle untyped data
            // according to its apparent structure so let's do that
            // first.

            bool sparse = false;
            int minValueCount = 0, maxValueCount = 0;

            extractStructure(ttvi->second, sparse, minValueCount, maxValueCount);
    
            cerr << "For source \"" << source.toStdString() << "\", type \""
                 << type.toStdString() << "\" we have sparse = " << sparse
                 << ", min value count = " << minValueCount << ", max = "
                 << maxValueCount << endl;

            // Model allocations:
            //
            // Sparse, no values: SparseOneDimensionalModel
            //
            // Sparse, always 1 value: SparseTimeValueModel
            //
            // Sparse, > 1 value: No standard model for this.  If
            // there are always 2 values, perhaps hack it into
            // NoteModel for now?  Or always use SparseTimeValueModel
            // and discard all but the first value.
            //
            // Dense, no values: Meaningless; no suitable model
            //
            // Dense, > 0 values: EditableDenseThreeDimensionalModel
            //
            // These should just be our fallback positions; we want to
            // be reading semantic data from the RDF in order to pick
            // the right model directly

            enum { SODM, STVM, EDTDM } modelType = SODM;

            if (sparse) {
                if (maxValueCount == 0) {
                    modelType = SODM;
                } else if (minValueCount == 1 && maxValueCount == 1) {
                    modelType = STVM;
                } else {
                    cerr << "WARNING: No suitable model available for sparse data with between " << minValueCount << " and " << maxValueCount << " values" << endl;
                    modelType = STVM;
                }
            } else {
                if (maxValueCount == 0) {
                    cerr << "WARNING: Dense data set with no values is not meaningful, skipping" << endl;
                    continue;
                } else {
                    modelType = EDTDM;
                }
            }

            //!!! set model name &c

            if (modelType == SODM) {

                SparseOneDimensionalModel *model = 
                    new SparseOneDimensionalModel(m_sampleRate, 1, false);
                
                fillModel(model, ttvi->second);
                models.push_back(model);

            } else if (modelType == STVM) {

                SparseTimeValueModel *model = 
                    new SparseTimeValueModel(m_sampleRate, 1, false);
                
                fillModel(model, ttvi->second);
                models.push_back(model);

            } else {
                
                EditableDenseThreeDimensionalModel *model =
                    new EditableDenseThreeDimensionalModel(m_sampleRate, 1, 0,
                                                           false);

                fillModel(model, ttvi->second);
                models.push_back(model);
            }
        }
    }
}

void
RDFImporterImpl::extractStructure(const TimeValueMap &tvm,
                                  bool &sparse,
                                  int &minValueCount,
                                  int &maxValueCount)
{
    // These are floats intentionally rather than RealTime --
    // see logic for handling rounding error below
    float firstTime = 0.f;
    float timeStep = 0.f;
    bool haveTimeStep = false;
    
    for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) {
        
        RealTime time = tvi->first;
        int valueCount = tvi->second.size();
        
        if (tvi == tvm.begin()) {
            
            minValueCount = valueCount;
            maxValueCount = valueCount;
            
            firstTime = time.toDouble();
            
        } else {
            
            if (valueCount < minValueCount) minValueCount = valueCount;
            if (valueCount > maxValueCount) maxValueCount = valueCount;
            
            if (!haveTimeStep) {
                timeStep = time.toDouble() - firstTime;
                if (timeStep == 0.f) sparse = true;
                haveTimeStep = true;
            } else if (!sparse) {
                // test whether this time is within
                // rounding-error range of being an integer
                // multiple of some constant away from the
                // first time
                float timeAsFloat = time.toDouble();
                int count = int((timeAsFloat - firstTime) / timeStep + 0.5);
                float expected = firstTime + (timeStep * count);
                if (fabsf(expected - timeAsFloat) > 1e-6) {
                    cerr << "Event at " << timeAsFloat << " is not evenly spaced -- would expect it to be " << expected << " for a spacing of " << count << " * " << timeStep << endl;
                    sparse = true;
                }
            }
        }
    }
}

void
RDFImporterImpl::fillModel(SparseOneDimensionalModel *model,
                           const TimeValueMap &tvm)
{
    //!!! labels &c not yet handled

    for (TimeValueMap::const_iterator tvi = tvm.begin();
         tvi != tvm.end(); ++tvi) {
        
        RealTime time = tvi->first;
        long frame = RealTime::realTime2Frame(time, m_sampleRate);

        SparseOneDimensionalModel::Point point(frame);

        model->addPoint(point);
    }
}

void
RDFImporterImpl::fillModel(SparseTimeValueModel *model,
                           const TimeValueMap &tvm)
{
    //!!! labels &c not yet handled

    for (TimeValueMap::const_iterator tvi = tvm.begin();
         tvi != tvm.end(); ++tvi) {
        
        RealTime time = tvi->first;
        long frame = RealTime::realTime2Frame(time, m_sampleRate);

        float value = 0.f;
        if (!tvi->second.empty()) value = *tvi->second.begin();
        
        SparseTimeValueModel::Point point(frame, value, "");

        model->addPoint(point);
    }
}

void
RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model,
                           const TimeValueMap &tvm)
{
    //!!! labels &c not yet handled

    //!!! start time offset not yet handled

    size_t col = 0;

    for (TimeValueMap::const_iterator tvi = tvm.begin();
         tvi != tvm.end(); ++tvi) {
        
        model->setColumn(col++, tvi->second);
    }
}