view rdf/RDFImporter.cpp @ 489:82ab61fa9223

* Reorganise our sparql queries on the basis that Redland must be available, not only optional. So for anything querying the pool of data about plugins, we use a single datastore and model which is initialised at the outset by PluginRDFIndexer and then queried directly; for anything that "reads from a file" (e.g. loading annotations) we query directly using Rasqal, going to the datastore when we need additional plugin-related information. This may improve performance, but mostly it simplifies the code and fixes a serious issue with RDF import in the previous versions (namely that multiple sequential RDF imports would end up sharing the same RDF data pool!)
author Chris Cannam
date Fri, 21 Nov 2008 16:12:29 +0000
parents 1c66e199e7d9
children c3fb8258e34d
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Sonic Visualiser
    An audio file viewer and annotation editor.
    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2008 QMUL.
   
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include "RDFImporter.h"

#include <map>
#include <vector>

#include <iostream>
#include <cmath>

#include "SimpleSPARQLQuery.h"

#include "base/ProgressReporter.h"
#include "base/RealTime.h"

#include "data/model/SparseOneDimensionalModel.h"
#include "data/model/SparseTimeValueModel.h"
#include "data/model/EditableDenseThreeDimensionalModel.h"
#include "data/model/NoteModel.h"
#include "data/model/RegionModel.h"

using std::cerr;
using std::endl;

class RDFImporterImpl
{
public:
    RDFImporterImpl(QString url, int sampleRate);
    virtual ~RDFImporterImpl();
    
    bool isOK();
    QString getErrorString() const;

    std::vector<Model *> getDataModels(ProgressReporter *);

protected:
    QString m_uristring;
    QString m_errorString;
    int m_sampleRate;

    void getDataModelsSparse(std::vector<Model *> &, ProgressReporter *);
    void getDataModelsDense(std::vector<Model *> &, ProgressReporter *);

    void getDenseFeatureProperties(QString featureUri,
                                   int &sampleRate, int &windowLength,
                                   int &hopSize, int &width, int &height);


    void fillModel(Model *, long, long, bool, std::vector<float> &, QString);
};


QString
RDFImporter::getKnownExtensions()
{
    return "*.rdf *.n3 *.ttl";
}

RDFImporter::RDFImporter(QString url, int sampleRate) :
    m_d(new RDFImporterImpl(url, sampleRate)) 
{
}

RDFImporter::~RDFImporter()
{
    delete m_d;
}

bool
RDFImporter::isOK()
{
    return m_d->isOK();
}

QString
RDFImporter::getErrorString() const
{
    return m_d->getErrorString();
}

std::vector<Model *>
RDFImporter::getDataModels(ProgressReporter *r)
{
    return m_d->getDataModels(r);
}

RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) :
    m_uristring(uri),
    m_sampleRate(sampleRate)
{
}

RDFImporterImpl::~RDFImporterImpl()
{
}

bool
RDFImporterImpl::isOK()
{
    return (m_errorString == "");
}

QString
RDFImporterImpl::getErrorString() const
{
    return m_errorString;
}

std::vector<Model *>
RDFImporterImpl::getDataModels(ProgressReporter *reporter)
{
    std::vector<Model *> models;

    getDataModelsDense(models, reporter);

    QString error;
    if (!isOK()) error = m_errorString;
    m_errorString = "";

    getDataModelsSparse(models, reporter);

    if (isOK()) m_errorString = error;

    return models;
}

void
RDFImporterImpl::getDataModelsDense(std::vector<Model *> &models,
                                    ProgressReporter *reporter)
{
    SimpleSPARQLQuery query = SimpleSPARQLQuery
        (SimpleSPARQLQuery::QueryFromSingleSource,
         QString
         (
             " PREFIX mo: <http://purl.org/ontology/mo/>"
             " PREFIX af: <http://purl.org/ontology/af/>"
             
             " SELECT ?feature ?signal_source ?feature_signal_type ?value "
             " FROM <%1> "
             
             " WHERE { "
             
             "   ?signal a mo:Signal ; "
             "           mo:available_as ?signal_source ; "
             "           af:signal_feature ?feature . "
             
             "   ?feature a ?feature_signal_type ; "
             "            af:value ?value . "
    
             " } "
             )
         .arg(m_uristring));

    SimpleSPARQLQuery::ResultList results = query.execute();

    if (!query.isOK()) {
        m_errorString = query.getErrorString();
        return;
    }

    if (query.wasCancelled()) {
        m_errorString = "Query cancelled";
        return;
    }        

    for (int i = 0; i < results.size(); ++i) {

        QString feature = results[i]["feature"].value;
        QString source = results[i]["signal_source"].value;
        QString type = results[i]["feature_signal_type"].value;
        QString value = results[i]["value"].value;

        int sampleRate = 0;
        int windowLength = 0;
        int hopSize = 0;
        int width = 0;
        int height = 0;
        getDenseFeatureProperties
            (feature, sampleRate, windowLength, hopSize, width, height);

        if (sampleRate != 0 && sampleRate != m_sampleRate) {
            cerr << "WARNING: Sample rate in dense feature description does not match our underlying rate -- using rate from feature description" << endl;
        }
        if (sampleRate == 0) sampleRate = m_sampleRate;

        if (hopSize == 0) {
            cerr << "WARNING: Dense feature description does not specify a hop size -- assuming 1" << endl;
            hopSize = 1;
        }

        if (height == 0) {
            cerr << "WARNING: Dense feature description does not specify feature signal dimensions -- assuming one-dimensional (height = 1)" << endl;
            height = 1;
        }

        QStringList values = value.split(' ', QString::SkipEmptyParts);

        if (values.empty()) {
            cerr << "WARNING: Dense feature description does not specify any values!" << endl;
            continue;
        }

        if (height == 1) {

            SparseTimeValueModel *m = new SparseTimeValueModel
                (sampleRate, hopSize, false);

            for (int j = 0; j < values.size(); ++j) {
                float f = values[j].toFloat();
                SparseTimeValueModel::Point point(j * hopSize, f, "");
                m->addPoint(point);
            }
        
            models.push_back(m);

        } else {

            EditableDenseThreeDimensionalModel *m =
                new EditableDenseThreeDimensionalModel(sampleRate, hopSize,
                                                       height, false);
            
            EditableDenseThreeDimensionalModel::Column column;

            int x = 0;

            for (int j = 0; j < values.size(); ++j) {
                if (j % height == 0 && !column.empty()) {
                    m->setColumn(x++, column);
                    column.clear();
                }
                column.push_back(values[j].toFloat());
            }

            if (!column.empty()) {
                m->setColumn(x++, column);
            }

            models.push_back(m);
        }
    }
}

void
RDFImporterImpl::getDenseFeatureProperties(QString featureUri,
                                           int &sampleRate, int &windowLength,
                                           int &hopSize, int &width, int &height)
{
    SimpleSPARQLQuery::QueryType s = SimpleSPARQLQuery::QueryFromSingleSource;

    QString dimensionsQuery 
        (
            " PREFIX mo: <http://purl.org/ontology/mo/>"
            " PREFIX af: <http://purl.org/ontology/af/>"
            
            " SELECT ?dimensions "
            " FROM <%1> "

            " WHERE { "

            "   <%2> af:dimensions ?dimensions . "
            
            " } "
            );

    SimpleSPARQLQuery::Value dimensionsValue =
        SimpleSPARQLQuery::singleResultQuery
        (s, dimensionsQuery.arg(m_uristring).arg(featureUri), "dimensions");

    cerr << "Dimensions = \"" << dimensionsValue.value.toStdString() << "\""
         << endl;

    if (dimensionsValue.value != "") {
        QStringList dl = dimensionsValue.value.split(" ");
        if (dl.empty()) dl.push_back(dimensionsValue.value);
        if (dl.size() > 0) height = dl[0].toInt();
        if (dl.size() > 1) width = dl[1].toInt();
    }

    QString queryTemplate
        (
            " PREFIX mo: <http://purl.org/ontology/mo/>"
            " PREFIX af: <http://purl.org/ontology/af/>"
            " PREFIX tl: <http://purl.org/NET/c4dm/timeline.owl#>"

            " SELECT ?%3 "
            " FROM <%1> "
            
            " WHERE { "
            
            "   <%2> mo:time ?time . "
            
            "   ?time a tl:Interval ; "
            "         tl:onTimeLine ?timeline . "

            "   ?map tl:rangeTimeLine ?timeline . "

            "   ?map tl:%3 ?%3 . "
            
            " } "
            );

    // Another laborious workaround for rasqal's failure to handle
    // multiple optionals properly

    SimpleSPARQLQuery::Value srValue = 
        SimpleSPARQLQuery::singleResultQuery(s,
                                             queryTemplate
                                             .arg(m_uristring).arg(featureUri)
                                             .arg("sampleRate"),
                                             "sampleRate");
    if (srValue.value != "") {
        sampleRate = srValue.value.toInt();
    }

    SimpleSPARQLQuery::Value hopValue = 
        SimpleSPARQLQuery::singleResultQuery(s,
                                             queryTemplate
                                             .arg(m_uristring).arg(featureUri)
                                             .arg("hopSize"),
                                             "hopSize");
    if (srValue.value != "") {
        hopSize = hopValue.value.toInt();
    }

    SimpleSPARQLQuery::Value winValue = 
        SimpleSPARQLQuery::singleResultQuery(s,
                                             queryTemplate
                                             .arg(m_uristring).arg(featureUri)
                                             .arg("windowLength"),
                                             "windowLength");
    if (winValue.value != "") {
        windowLength = winValue.value.toInt();
    }

    cerr << "sr = " << sampleRate << ", hop = " << hopSize << ", win = " << windowLength << endl;
}

void
RDFImporterImpl::getDataModelsSparse(std::vector<Model *> &models,
                                     ProgressReporter *reporter)
{
    SimpleSPARQLQuery::QueryType s = SimpleSPARQLQuery::QueryFromSingleSource;

    // Our query is intended to retrieve every thing that has a time,
    // and every feature type and value associated with a thing that
    // has a time.

    // We will then need to refine this big bag of results into a set
    // of data models.

    // Results that have different source signals should go into
    // different models.

    // Results that have different feature types should go into
    // different models.

    // Results that are sparse should go into different models from
    // those that are dense (we need to examine the timestamps to
    // establish this -- if the timestamps are regular, the results
    // are dense -- so we can't do it as we go along, only after
    // collecting all results).

    // Timed things that have features associated with them should not
    // appear directly in any model -- their features should appear
    // instead -- and these should be different models from those used
    // for timed things that do not have features.

    // As we load the results, we'll push them into a partially
    // structured container that maps from source signal (URI as
    // string) -> feature type (likewise) -> time -> list of values.
    // If the source signal or feature type is unavailable, the empty
    // string will do.

    QString prefixes = QString(
        " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
        " PREFIX tl: <http://purl.org/NET/c4dm/timeline.owl#>"
        " PREFIX mo: <http://purl.org/ontology/mo/>"
        " PREFIX af: <http://purl.org/ontology/af/>"
        " PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"
        );

    QString queryString = prefixes + QString(

        " SELECT ?signal_source ?timed_thing ?event_type ?value"
        " FROM <%1>"

        " WHERE {"

        "   ?signal mo:available_as ?signal_source ."
        "   ?signal a mo:Signal ."

        "   ?signal mo:time ?interval ."
        "   ?interval tl:onTimeLine ?tl ."
        "   ?time tl:onTimeLine ?tl ."
        "   ?timed_thing event:time ?time ."
        "   ?timed_thing a ?event_type ."

        "   OPTIONAL {"
        "     ?timed_thing af:feature ?value"
        "   }"
        " }"

        ).arg(m_uristring);

    QString timeQueryString = prefixes + QString(
        
        " SELECT ?time FROM <%1> "
        " WHERE { "        
        "   <%2> event:time ?t . "
        "   ?t tl:at ?time . "
        " } "

        ).arg(m_uristring);

    QString rangeQueryString = prefixes + QString(
        
        " SELECT ?time ?duration FROM <%1> "
        " WHERE { "
        "   <%2> event:time ?t . "
        "   ?t tl:beginsAt ?time . "
        "   ?t tl:duration ?duration . "
        " } "

        ).arg(m_uristring);

    QString labelQueryString = prefixes + QString(
        
        " SELECT ?label FROM <%1> "
        " WHERE { "
        "   <%2> rdfs:label ?label . "
        " } "

        ).arg(m_uristring);

    SimpleSPARQLQuery query(s, queryString);
    query.setProgressReporter(reporter);

    cerr << "Query will be: " << queryString.toStdString() << endl;

    SimpleSPARQLQuery::ResultList results = query.execute();

    if (!query.isOK()) {
        m_errorString = query.getErrorString();
        return;
    }

    if (query.wasCancelled()) {
        m_errorString = "Query cancelled";
        return;
    }        



    /*

      This function is now only used for sparse data (for dense data
      we would be in getDataModelsDense instead).

      For sparse data, the determining factors in deciding what model
      to use are: Do the features have values? and Do the features
      have duration?

      We can run through the results and check off whether we find
      values and duration for each of the source+type keys, and then
      run through the source+type keys pushing each of the results
      into a suitable model.

      Unfortunately, at this point we do not yet have any actual
      timing data (time/duration) -- just the time URI.

      What we _could_ do is to create one of each type of model at the
      start, for each of the source+type keys, and then push each
      feature into the relevant model depending on what we find out
      about it.  Then return only non-empty models.

      
    */

    // Map from signal source to event type to dimensionality to
    // presence of duration to model ptr.  Whee!
    std::map<QString, std::map<QString, std::map<int, std::map<bool, Model *> > > >
        modelMap;

    for (int i = 0; i < results.size(); ++i) {

        QString source = results[i]["signal_source"].value;
        QString type = results[i]["event_type"].value;
        QString thinguri = results[i]["timed_thing"].value;
        
        RealTime time;
        RealTime duration;

        bool haveTime = false;
        bool haveDuration = false;

        QString label = SimpleSPARQLQuery::singleResultQuery
            (s, labelQueryString.arg(thinguri), "label").value;

        SimpleSPARQLQuery rangeQuery(s, rangeQueryString.arg(thinguri));
        SimpleSPARQLQuery::ResultList rangeResults = rangeQuery.execute();
        if (!rangeResults.empty()) {
//                std::cerr << rangeResults.size() << " range results" << std::endl;
            time = RealTime::fromXsdDuration
                (rangeResults[0]["time"].value.toStdString());
            duration = RealTime::fromXsdDuration
                (rangeResults[0]["duration"].value.toStdString());
//                std::cerr << "duration string " << rangeResults[0]["duration"].value.toStdString() << std::endl;
            haveTime = true;
            haveDuration = true;
        } else {
            QString timestring = SimpleSPARQLQuery::singleResultQuery
                (s, timeQueryString.arg(thinguri), "time").value;
            if (timestring != "") {
                time = RealTime::fromXsdDuration(timestring.toStdString());
                haveTime = true;
            }
        }

        QString valuestring = results[i]["value"].value;
        std::vector<float> values;

        if (valuestring != "") {
            QStringList vsl = valuestring.split(" ", QString::SkipEmptyParts);
            for (int j = 0; j < vsl.size(); ++j) {
                bool success = false;
                float v = vsl[j].toFloat(&success);
                if (success) values.push_back(v);
            }
        }

        int dimensions = 1;
        if (values.size() == 1) dimensions = 2;
        else if (values.size() > 1) dimensions = 3;

        Model *model = 0;

        if (modelMap[source][type][dimensions].find(haveDuration) ==
            modelMap[source][type][dimensions].end()) {

/*
            std::cerr << "Creating new model: source = " << source.toStdString()
                      << ", type = " << type.toStdString() << ", dimensions = "
                      << dimensions << ", haveDuration = " << haveDuration
                      << ", time = " << time << ", duration = " << duration
                      << std::endl;
*/
            
            if (!haveDuration) {

                if (dimensions == 1) {

//                    std::cerr << "SparseOneDimensionalModel" << std::endl;
                    model = new SparseOneDimensionalModel(m_sampleRate, 1, false);

                } else if (dimensions == 2) {

//                    std::cerr << "SparseTimeValueModel" << std::endl;
                    model = new SparseTimeValueModel(m_sampleRate, 1, false);

                } else {

                    // We don't have a three-dimensional sparse model,
                    // so use a note model.  We do have some logic (in
                    // extractStructure below) for guessing whether
                    // this should after all have been a dense model,
                    // but it's hard to apply it because we don't have
                    // all the necessary timing data yet... hmm

//                    std::cerr << "NoteModel" << std::endl;
                    model = new NoteModel(m_sampleRate, 1, false);
                }

            } else { // haveDuration

                if (dimensions == 1 || dimensions == 2) {

                    // If our units are frequency or midi pitch, we
                    // should be using a note model... hm
                    
//                    std::cerr << "RegionModel" << std::endl;
                    model = new RegionModel(m_sampleRate, 1, false);

                } else {

                    // We don't have a three-dimensional sparse model,
                    // so use a note model.  We do have some logic (in
                    // extractStructure below) for guessing whether
                    // this should after all have been a dense model,
                    // but it's hard to apply it because we don't have
                    // all the necessary timing data yet... hmm

//                    std::cerr << "NoteModel" << std::endl;
                    model = new NoteModel(m_sampleRate, 1, false);
                }
            }

            modelMap[source][type][dimensions][haveDuration] = model;
            models.push_back(model);
        }

        model = modelMap[source][type][dimensions][haveDuration];

        if (model) {
            long ftime = RealTime::realTime2Frame(time, m_sampleRate);
            long fduration = RealTime::realTime2Frame(duration, m_sampleRate);
            fillModel(model, ftime, fduration, haveDuration, values, label);
        }
    }
}

void
RDFImporterImpl::fillModel(Model *model,
                           long ftime,
                           long fduration,
                           bool haveDuration,
                           std::vector<float> &values,
                           QString label)
{
    SparseOneDimensionalModel *sodm =
        dynamic_cast<SparseOneDimensionalModel *>(model);
    if (sodm) {
        SparseOneDimensionalModel::Point point(ftime, label);
        sodm->addPoint(point);
        return;
    }

    SparseTimeValueModel *stvm =
        dynamic_cast<SparseTimeValueModel *>(model);
    if (stvm) {
        SparseTimeValueModel::Point point
            (ftime, values.empty() ? 0.f : values[0], label);
        stvm->addPoint(point);
        return;
    }

    NoteModel *nm =
        dynamic_cast<NoteModel *>(model);
    if (nm) {
        if (haveDuration) {
            float value = 0.f, level = 1.f;
            if (!values.empty()) {
                value = values[0];
                if (values.size() > 1) {
                    level = values[1];
                }
            }
            NoteModel::Point point(ftime, value, fduration, level, label);
            nm->addPoint(point);
        } else {
            float value = 0.f, duration = 1.f, level = 1.f;
            if (!values.empty()) {
                value = values[0];
                if (values.size() > 1) {
                    duration = values[1];
                    if (values.size() > 2) {
                        level = values[2];
                    }
                }
            }
            NoteModel::Point point(ftime, value, duration, level, label);
            nm->addPoint(point);
        }
        return;
    }

    RegionModel *rm = 
        dynamic_cast<RegionModel *>(model);
    if (rm) {
        if (haveDuration) {
            RegionModel::Point point
                (ftime, values.empty() ? 0.f : values[0], fduration, label);
            rm->addPoint(point);
        } else {
            // This won't actually happen -- we only create region models
            // if we do have duration -- but just for completeness
            float value = 0.f, duration = 1.f;
            if (!values.empty()) {
                value = values[0];
                if (values.size() > 1) {
                    duration = values[1];
                }
            }
            RegionModel::Point point(ftime, value, duration, label);
            rm->addPoint(point);
        }
        return;
    }
            
    std::cerr << "WARNING: RDFImporterImpl::fillModel: Unknown or unexpected model type" << std::endl;
    return;
}