view rdf/SimpleSPARQLQuery.cpp @ 489:82ab61fa9223

* Reorganise our sparql queries on the basis that Redland must be available, not only optional. So for anything querying the pool of data about plugins, we use a single datastore and model which is initialised at the outset by PluginRDFIndexer and then queried directly; for anything that "reads from a file" (e.g. loading annotations) we query directly using Rasqal, going to the datastore when we need additional plugin-related information. This may improve performance, but mostly it simplifies the code and fixes a serious issue with RDF import in the previous versions (namely that multiple sequential RDF imports would end up sharing the same RDF data pool!)
author Chris Cannam
date Fri, 21 Nov 2008 16:12:29 +0000
parents a82645e788fc
children c3fb8258e34d
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Sonic Visualiser
    An audio file viewer and annotation editor.
    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2008 QMUL.
   
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include "SimpleSPARQLQuery.h"
#include "base/ProgressReporter.h"
#include "base/Profiler.h"

#include <QMutex>
#include <QMutexLocker>
#include <QRegExp>

#include <set>

#ifdef USE_NEW_RASQAL_API
#include <rasqal/rasqal.h>
#else
#include <rasqal.h>
#endif

#include <redland.h>

//#define DEBUG_SIMPLE_SPARQL_QUERY 1

#include <iostream>

using std::cerr;
using std::endl;

#ifdef USE_NEW_RASQAL_API
class WrasqalWorldWrapper // wrong but wromantic, etc
{
public:
    WrasqalWorldWrapper() :
        m_world(0)
    {
        m_world = rasqal_new_world();
        if (!m_world) {
            cerr << "SimpleSPARQLQuery: ERROR: Failed to create RASQAL world!" << endl;
            return;
        }
/*!!! This appears to be new for 0.9.17?
        if (rasqal_world_open(m_world)) {
            cerr << "SimpleSPARQLQuery: ERROR: Failed to open RASQAL world!" << endl;
            return;
        }
*/
    }
    ~WrasqalWorldWrapper()
    {
        rasqal_free_world(m_world);
    }

    bool isOK() const { return (m_world != 0); }

    rasqal_world *getWorld() { return m_world; }
    const rasqal_world *getWorld() const { return m_world; }

private:
    rasqal_world *m_world;
};
#endif

class WredlandWorldWrapper
{
public:
    WredlandWorldWrapper() :
        m_world(0), m_storage(0), m_model(0)
    {
        m_world = librdf_new_world();
        if (!m_world) {
            cerr << "SimpleSPARQLQuery: ERROR: Failed to create LIBRDF world!" << endl;
            return;
        }
        librdf_world_open(m_world);
        m_storage = librdf_new_storage(m_world, "trees", NULL, NULL);
        if (!m_storage) {
            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland trees datastore, falling back to memory store" << std::endl;
            m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
            if (!m_storage) {
                std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl;
                return;
            }                
        }
        m_model = librdf_new_model(m_world, m_storage, NULL);
        if (!m_model) {
            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland data model" << std::endl;
            return;
        }
    }

    ~WredlandWorldWrapper()
    {
        while (!m_parsedUris.empty()) {
            librdf_free_uri(m_parsedUris.begin()->second);
            m_parsedUris.erase(m_parsedUris.begin());
        }
        if (m_model) librdf_free_model(m_model);
        if (m_storage) librdf_free_storage(m_storage);
        if (m_world) librdf_free_world(m_world);
    }

    bool isOK() const { return (m_model != 0); }

    librdf_uri *getUri(QString uriString, QString &errorString)
    {
        QMutexLocker locker(&m_mutex);

        if (m_parsedUris.find(uriString) != m_parsedUris.end()) {
            return m_parsedUris[uriString];
        }

        librdf_uri *uri = librdf_new_uri
            (m_world, (const unsigned char *)uriString.toUtf8().data());
        if (!uri) {
            errorString = "Failed to construct librdf_uri!";
            return 0;
        }

        librdf_parser *parser = librdf_new_parser(m_world, "guess", NULL, NULL);
        if (!parser) {
            errorString = "Failed to initialise Redland parser";
            return 0;
        }
        
        std::cerr << "About to parse \"" << uriString.toStdString() << "\"" << std::endl;

        Profiler p("SimpleSPARQLQuery: Parse URI into LIBRDF model");

        if (librdf_parser_parse_into_model(parser, uri, NULL, m_model)) {

            errorString = QString("Failed to parse RDF from URI \"%1\"")
                .arg(uriString);
            librdf_free_parser(parser);
            librdf_free_uri(uri);
            return 0;

        } else {

            librdf_free_parser(parser);
            m_parsedUris[uriString] = uri;
            return uri;
        }
    }
        
    librdf_world *getWorld() { return m_world; }
    const librdf_world *getWorld() const { return m_world; }
        
    librdf_model *getModel() { return m_model; }
    const librdf_model *getModel() const { return m_model; }

private:
    librdf_world *m_world;
    librdf_storage *m_storage;
    librdf_model *m_model;

    QMutex m_mutex;
    std::map<QString, librdf_uri *> m_parsedUris;
};

class SimpleSPARQLQuery::Impl
{
public:
    Impl(SimpleSPARQLQuery::QueryType, QString query);
    ~Impl();

    static bool addSourceToModel(QString sourceUri);

    void setProgressReporter(ProgressReporter *reporter) { m_reporter = reporter; }
    bool wasCancelled() const { return m_cancelled; }

    ResultList execute();

    bool isOK() const;
    QString getErrorString() const;

protected:
    static void errorHandler(void *, raptor_locator *, const char *);

    static QMutex m_mutex;

#ifdef USE_NEW_RASQAL_API
    static WrasqalWorldWrapper *m_rasqal;
#else
    static bool m_rasqalInitialised;
#endif

    static WredlandWorldWrapper *m_redland;

    ResultList executeDirectParser();
    ResultList executeDatastore();

    QueryType m_type;
    QString m_query;
    QString m_errorString;
    ProgressReporter *m_reporter;
    bool m_cancelled;
};

#ifdef USE_NEW_RASQAL_API
WrasqalWorldWrapper *SimpleSPARQLQuery::Impl::m_rasqal = 0;
#else
bool SimpleSPARQLQuery::Impl::m_rasqalInitialised = false;
#endif

WredlandWorldWrapper *SimpleSPARQLQuery::Impl::m_redland = 0;

QMutex SimpleSPARQLQuery::Impl::m_mutex;

SimpleSPARQLQuery::SimpleSPARQLQuery(QueryType type, QString query) :
    m_impl(new Impl(type, query))
{
}

SimpleSPARQLQuery::~SimpleSPARQLQuery() 
{
    delete m_impl;
}

void
SimpleSPARQLQuery::setProgressReporter(ProgressReporter *reporter)
{
    m_impl->setProgressReporter(reporter);
}

bool
SimpleSPARQLQuery::wasCancelled() const
{
    return m_impl->wasCancelled();
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::execute()
{
    return m_impl->execute();
}

bool
SimpleSPARQLQuery::isOK() const
{
    return m_impl->isOK();
}

QString
SimpleSPARQLQuery::getErrorString() const
{
    return m_impl->getErrorString();
}

bool
SimpleSPARQLQuery::addSourceToModel(QString sourceUri)
{
    return SimpleSPARQLQuery::Impl::addSourceToModel(sourceUri);
}

SimpleSPARQLQuery::Impl::Impl(QueryType type, QString query) :
    m_type(type),
    m_query(query),
    m_reporter(0),
    m_cancelled(false)
{
#ifdef DEBUG_SIMPLE_SPARQL_QUERY
    std::cerr << "SimpleSPARQLQuery::Impl: Query is: \"" << query.toStdString() << "\"" << std::endl;
#endif
}

SimpleSPARQLQuery::Impl::~Impl()
{
}

bool
SimpleSPARQLQuery::Impl::isOK() const
{
    return (m_errorString == "");
}

QString
SimpleSPARQLQuery::Impl::getErrorString() const
{
    return m_errorString;
}

void
SimpleSPARQLQuery::Impl::errorHandler(void *data, 
                                      raptor_locator *locator,
                                      const char *message) 
{
    SimpleSPARQLQuery::Impl *impl = (SimpleSPARQLQuery::Impl *)data;
    
    char buffer[256];
    raptor_format_locator(buffer, 255, locator);
    QString loc(buffer);
    if (loc != "") {
        impl->m_errorString = QString("%1 - %2").arg(loc).arg(message);
    } else {
        impl->m_errorString = message;
    }

    cerr << "SimpleSPARQLQuery: ERROR: " << impl->m_errorString.toStdString() << endl;
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::Impl::execute()
{
    ResultList list;

    m_mutex.lock();

    if (m_type == QueryFromModel) {
        if (!m_redland) {
            // There can be no results, because no sources have been
            // added to the model yet (m_redland is only created when
            // addSourceToModel is called)
            cerr << "SimpleSPARQLQuery::execute: NOTE: No sources have been added to data model yet, so no results are possible" << endl;
            m_mutex.unlock();
            return list;
        }
    }

    if (m_type == QueryFromSingleSource) {
#ifdef USE_NEW_RASQAL_API
        if (!m_rasqal) {
            m_rasqal = new WrasqalWorldWrapper();
            if (!m_rasqal->isOK()) {
                cerr << "ERROR: SimpleSPARQLQuery::execute: Failed to initialise Rasqal query engine" << endl;
                delete m_rasqal;
                m_rasqal = 0;
                m_mutex.unlock();
                return list;
            }
        }
#else
        if (!m_rasqalInitialised) {
            rasqal_init();
            m_rasqalInitialised = true;
        }
#endif
    }

    m_mutex.unlock();

    if (m_type == QueryFromSingleSource) {
        return executeDirectParser();
    } else {
        return executeDatastore();
    }
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::Impl::executeDirectParser()
{
    ResultList list;

    Profiler profiler("SimpleSPARQLQuery::executeDirectParser");

#ifdef USE_NEW_RASQAL_API
    rasqal_query *query = rasqal_new_query(m_rasqal->getWorld(), "sparql", NULL);
#else
    rasqal_query *query = rasqal_new_query("sparql", NULL);
#endif
    if (!query) {
        m_errorString = "Failed to construct query";
        cerr << "SimpleSPARQLQuery: ERROR: " << m_errorString.toStdString() << endl;
        return list;
    }

    rasqal_query_set_error_handler(query, this, errorHandler);
    rasqal_query_set_fatal_error_handler(query, this, errorHandler);

    {
        Profiler p("SimpleSPARQLQuery: Prepare RASQAL query");

        if (rasqal_query_prepare
            (query, (const unsigned char *)m_query.toUtf8().data(), NULL)) {
            cerr << "SimpleSPARQLQuery: Failed to prepare query" << endl;
            rasqal_free_query(query);
            return list;
        }
    }

    rasqal_query_results *results;
    
    {
        Profiler p("SimpleSPARQLQuery: Execute RASQAL query");
        results = rasqal_query_execute(query);
    }
    
//    cerr << "Query executed" << endl;

    if (!results) {
        cerr << "SimpleSPARQLQuery: RASQAL query failed" << endl;
        rasqal_free_query(query);
        return list;
    }

    if (!rasqal_query_results_is_bindings(results)) {
        cerr << "SimpleSPARQLQuery: RASQAL query has wrong result type (not bindings)" << endl;
        rasqal_free_query_results(results);
        rasqal_free_query(query);
        return list;
    }
    
    int resultCount = 0;
    int resultTotal = rasqal_query_results_get_count(results); // probably wrong
    m_cancelled = false;

    while (!rasqal_query_results_finished(results)) {

        int count = rasqal_query_results_get_bindings_count(results);

        KeyValueMap resultmap;

        for (int i = 0; i < count; ++i) {

            const unsigned char *name =
                rasqal_query_results_get_binding_name(results, i);

            rasqal_literal *literal =
                rasqal_query_results_get_binding_value(results, i);

            QString key = (const char *)name;

            if (!literal) {
                resultmap[key] = Value();
                continue;
            }

            ValueType type = LiteralValue;
            if (literal->type == RASQAL_LITERAL_URI) type = URIValue;
            else if (literal->type == RASQAL_LITERAL_BLANK) type = BlankValue;

            QString text = (const char *)rasqal_literal_as_string(literal);

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
            std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl;
#endif

            resultmap[key] = Value(type, text);
        }

        list.push_back(resultmap);

        rasqal_query_results_next(results);

        resultCount++;

        if (m_reporter) {
            if (resultCount >= resultTotal) {
                if (m_reporter->isDefinite()) m_reporter->setDefinite(false);
                m_reporter->setProgress(resultCount);
            } else {
                m_reporter->setProgress((resultCount * 100) / resultTotal);
            }

            if (m_reporter->wasCancelled()) {
                m_cancelled = true;
                break;
            }
        }
    }

    rasqal_free_query_results(results);
    rasqal_free_query(query);

    return list;
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::Impl::executeDatastore()
{
    ResultList list;

    Profiler profiler("SimpleSPARQLQuery::executeDatastore");

/*!!!
    static std::map<QString, int> counter;
    if (counter.find(m_query) == counter.end()) counter[m_query] = 1;
    else ++counter[m_query];
    std::cerr << "Counter for this query: " << counter[m_query] << std::endl;
    std::cerr << "Base URI is: \"" << m_fromUri.toStdString() << "\"" << std::endl;
*/

    librdf_query *query;

    {
        Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query");
        query = librdf_new_query
            (m_redland->getWorld(), "sparql", NULL,
             (const unsigned char *)m_query.toUtf8().data(), NULL);
    }
    
    if (!query) {
        m_errorString = "Failed to construct query";
        return list;
    }

    librdf_query_results *results;
    {
        Profiler p("SimpleSPARQLQuery: Execute LIBRDF query");
        results = librdf_query_execute(query, m_redland->getModel());
    }

    if (!results) {
        cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl;
        librdf_free_query(query);
        return list;
    }

    if (!librdf_query_results_is_bindings(results)) {
        cerr << "SimpleSPARQLQuery: LIBRDF query has wrong result type (not bindings)" << endl;
        librdf_free_query_results(results);
        librdf_free_query(query);
        return list;
    }
    
    int resultCount = 0;
    int resultTotal = librdf_query_results_get_count(results); // probably wrong
    m_cancelled = false;

    while (!librdf_query_results_finished(results)) {

        int count = librdf_query_results_get_bindings_count(results);

        KeyValueMap resultmap;

        for (int i = 0; i < count; ++i) {

            const char *name =
                librdf_query_results_get_binding_name(results, i);

            librdf_node *node =
                librdf_query_results_get_binding_value(results, i);

            QString key = (const char *)name;

            if (!node) {
                resultmap[key] = Value();
                continue;
            }

            ValueType type = LiteralValue;
            QString text;

            if (librdf_node_is_resource(node)) {

                type = URIValue;
                librdf_uri *uri = librdf_node_get_uri(node);
                text = (const char *)librdf_uri_as_string(uri);

            } else if (librdf_node_is_literal(node)) {

                type = LiteralValue;
                text = (const char *)librdf_node_get_literal_value(node);

            } else if (librdf_node_is_blank(node)) {

                type = BlankValue;

            } else {

                cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl;
            }

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
            cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << endl;
#endif

            resultmap[key] = Value(type, text);

            librdf_free_node(node);
        }

        list.push_back(resultmap);

        librdf_query_results_next(results);

        resultCount++;

        if (m_reporter) {
            if (resultCount >= resultTotal) {
                if (m_reporter->isDefinite()) m_reporter->setDefinite(false);
                m_reporter->setProgress(resultCount);
            } else {
                m_reporter->setProgress((resultCount * 100) / resultTotal);
            }

            if (m_reporter->wasCancelled()) {
                m_cancelled = true;
                break;
            }
        }
    }

    librdf_free_query_results(results);
    librdf_free_query(query);

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
    cerr << "All results retrieved (" << resultCount << " of them)" << endl;
#endif

    return list;
}

bool
SimpleSPARQLQuery::Impl::addSourceToModel(QString sourceUri)
{
    QString err;

    m_mutex.lock();

    if (!m_redland) {
        m_redland = new WredlandWorldWrapper();
        if (!m_redland->isOK()) {
            cerr << "ERROR: SimpleSPARQLQuery::addSourceToModel: Failed to initialise Redland datastore" << endl;
            delete m_redland;
            m_redland = 0;
            m_mutex.unlock();
            return false;
        }
    }

    m_mutex.unlock();

    librdf_uri *uri = m_redland->getUri(sourceUri, err);

    if (!uri) {
        std::cerr << "SimpleSPARQLQuery::addSourceToModel: Failed to add source URI \"" << sourceUri.toStdString() << ": " << err.toStdString() << std::endl;
        return false;
    }
    return true;
}

SimpleSPARQLQuery::Value
SimpleSPARQLQuery::singleResultQuery(QueryType type,
                                     QString query, QString binding)
{
    SimpleSPARQLQuery q(type, query);
    ResultList results = q.execute();
    if (!q.isOK()) {
        cerr << "SimpleSPARQLQuery::singleResultQuery: ERROR: "
             << q.getErrorString().toStdString() << endl;
        return Value();
    }
    if (results.empty()) {
        return Value();
    }
    for (int i = 0; i < results.size(); ++i) {
        if (results[i].find(binding) != results[i].end() &&
            results[i][binding].type != NoValue) {
            return results[i][binding];
        }
    }
    return Value();
}