view rdf/SimpleSPARQLQuery.cpp @ 481:a82645e788fc

* Auto-select RDF datastore/parsing backend; use trees datastore if available * Make CachedFile remember whether a file has already been successfully located locally (avoiding system call out to look at filesystem)
author Chris Cannam
date Fri, 14 Nov 2008 10:10:05 +0000
parents 3ffce691c9bf
children 82ab61fa9223
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Sonic Visualiser
    An audio file viewer and annotation editor.
    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2008 QMUL.
   
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include "SimpleSPARQLQuery.h"
#include "base/ProgressReporter.h"
#include "base/Profiler.h"

#include <QMutex>
#include <QMutexLocker>
#include <QRegExp>

#include <set>

#ifdef USE_NEW_RASQAL_API
#include <rasqal/rasqal.h>
#else
#include <rasqal.h>
#endif

#ifdef HAVE_REDLAND
#include <redland.h>
#endif

//#define DEBUG_SIMPLE_SPARQL_QUERY 1

#include <iostream>

using std::cerr;
using std::endl;

#ifdef USE_NEW_RASQAL_API
class WrasqalWorldWrapper // wrong but wromantic, etc
{
public:
    WrasqalWorldWrapper() :
        m_world(0)
    {
        m_world = rasqal_new_world();
        if (!m_world) {
            cerr << "SimpleSPARQLQuery: ERROR: Failed to create RASQAL world!" << endl;
            return;
        }
/*!!! This appears to be new for 0.9.17?
        if (rasqal_world_open(m_world)) {
            cerr << "SimpleSPARQLQuery: ERROR: Failed to open RASQAL world!" << endl;
            return;
        }
*/
    }
    ~WrasqalWorldWrapper()
    {
        rasqal_free_world(m_world);
    }

    rasqal_world *getWorld() { return m_world; }
    const rasqal_world *getWorld() const { return m_world; }

private:
    rasqal_world *m_world;
};
#endif

#ifdef HAVE_REDLAND
class WredlandWorldWrapper
{
public:
    WredlandWorldWrapper() :
        m_world(0), m_storage(0), m_model(0)
    {
        m_world = librdf_new_world();
        if (!m_world) {
            cerr << "SimpleSPARQLQuery: ERROR: Failed to create LIBRDF world!" << endl;
            return;
        }
        librdf_world_open(m_world);
        m_storage = librdf_new_storage(m_world, "trees", NULL, NULL);
        if (!m_storage) {
            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland trees datastore, falling back to memory store" << std::endl;
            m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
            if (!m_storage) {
                std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl;
                return;
            }                
        }
        m_model = librdf_new_model(m_world, m_storage, NULL);
        if (!m_model) {
            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland data model" << std::endl;
            return;
        }
    }

    ~WredlandWorldWrapper()
    {
        while (!m_parsedUris.empty()) {
            librdf_free_uri(m_parsedUris.begin()->second);
            m_parsedUris.erase(m_parsedUris.begin());
        }
        if (m_model) librdf_free_model(m_model);
        if (m_storage) librdf_free_storage(m_storage);
        if (m_world) librdf_free_world(m_world);
    }

    bool isOK() const { return (m_model != 0); }

    librdf_uri *getUri(QString uriString, QString &errorString)
    {
        QMutexLocker locker(&m_mutex);

        if (m_parsedUris.find(uriString) != m_parsedUris.end()) {
            return m_parsedUris[uriString];
        }

        librdf_uri *uri = librdf_new_uri
            (m_world, (const unsigned char *)uriString.toUtf8().data());
        if (!uri) {
            errorString = "Failed to construct librdf_uri!";
            return 0;
        }

        librdf_parser *parser = librdf_new_parser(m_world, "guess", NULL, NULL);
        if (!parser) {
            errorString = "Failed to initialise Redland parser";
            return 0;
        }
        
        std::cerr << "About to parse \"" << uriString.toStdString() << "\"" << std::endl;

        Profiler p("SimpleSPARQLQuery: Parse URI into LIBRDF model");

        if (librdf_parser_parse_into_model(parser, uri, NULL, m_model)) {

            errorString = QString("Failed to parse RDF from URI \"%1\"")
                .arg(uriString);
            librdf_free_parser(parser);
            librdf_free_uri(uri);
            return 0;

        } else {

            librdf_free_parser(parser);
            m_parsedUris[uriString] = uri;
            return uri;
        }
    }
        
    librdf_world *getWorld() { return m_world; }
    const librdf_world *getWorld() const { return m_world; }
        
    librdf_model *getModel() { return m_model; }
    const librdf_model *getModel() const { return m_model; }

private:
    librdf_world *m_world;
    librdf_storage *m_storage;
    librdf_model *m_model;

    QMutex m_mutex;
    std::map<QString, librdf_uri *> m_parsedUris;
};
#endif

class SimpleSPARQLQuery::Impl
{
public:
    Impl(QString fromUri, QString query);
    ~Impl();

    void setProgressReporter(ProgressReporter *reporter) { m_reporter = reporter; }
    bool wasCancelled() const { return m_cancelled; }

    ResultList execute();

    bool isOK() const;
    QString getErrorString() const;

    static void setBackEnd(SimpleSPARQLQuery::BackEndPreference p) {
        m_preference = p;
    }

protected:
    static void errorHandler(void *, raptor_locator *, const char *);

    static QMutex m_mutex;

#ifdef USE_NEW_RASQAL_API
    static WrasqalWorldWrapper *m_rasqal;
#else
    static bool m_rasqalInitialised;
#endif

#ifdef HAVE_REDLAND
    static WredlandWorldWrapper *m_redland;
#endif

    static SimpleSPARQLQuery::BackEndPreference m_preference;

    ResultList executeDirectParser();
    ResultList executeDatastore();

    QString m_fromUri;
    QString m_query;
    QString m_errorString;
    ProgressReporter *m_reporter;
    bool m_cancelled;
};

#ifdef USE_NEW_RASQAL_API
WrasqalWorldWrapper *SimpleSPARQLQuery::Impl::m_rasqal = 0;
#else
bool SimpleSPARQLQuery::Impl::m_rasqalInitialised = false;
#endif

#ifdef HAVE_REDLAND
WredlandWorldWrapper *SimpleSPARQLQuery::Impl::m_redland = 0;
#endif

QMutex SimpleSPARQLQuery::Impl::m_mutex;

SimpleSPARQLQuery::BackEndPreference
SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::AutoSelectBackEnd;

SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) :
    m_impl(new Impl(fromUri, query))
{
}

SimpleSPARQLQuery::~SimpleSPARQLQuery() 
{
    delete m_impl;
}

void
SimpleSPARQLQuery::setProgressReporter(ProgressReporter *reporter)
{
    m_impl->setProgressReporter(reporter);
}

bool
SimpleSPARQLQuery::wasCancelled() const
{
    return m_impl->wasCancelled();
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::execute()
{
    return m_impl->execute();
}

bool
SimpleSPARQLQuery::isOK() const
{
    return m_impl->isOK();
}

QString
SimpleSPARQLQuery::getErrorString() const
{
    return m_impl->getErrorString();
}

void
SimpleSPARQLQuery::setBackEnd(BackEndPreference p)
{
    SimpleSPARQLQuery::Impl::setBackEnd(p);
}

SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) :
    m_fromUri(fromUri),
    m_query(query),
    m_reporter(0),
    m_cancelled(false)
{
#ifdef DEBUG_SIMPLE_SPARQL_QUERY
    std::cerr << "SimpleSPARQLQuery::Impl: Query is: \"" << query.toStdString() << "\"" << std::endl;
#endif
}

SimpleSPARQLQuery::Impl::~Impl()
{
}

bool
SimpleSPARQLQuery::Impl::isOK() const
{
    return (m_errorString == "");
}

QString
SimpleSPARQLQuery::Impl::getErrorString() const
{
    return m_errorString;
}

void
SimpleSPARQLQuery::Impl::errorHandler(void *data, 
                                      raptor_locator *locator,
                                      const char *message) 
{
    SimpleSPARQLQuery::Impl *impl = (SimpleSPARQLQuery::Impl *)data;
    
    char buffer[256];
    raptor_format_locator(buffer, 255, locator);
    QString loc(buffer);
    if (loc != "") {
        impl->m_errorString = QString("%1 - %2").arg(loc).arg(message);
    } else {
        impl->m_errorString = message;
    }

    cerr << "SimpleSPARQLQuery: ERROR: " << impl->m_errorString.toStdString() << endl;
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::Impl::execute()
{
    ResultList list;

    BackEndPreference preference;

    m_mutex.lock();

    if (m_preference == AutoSelectBackEnd) {
#ifdef HAVE_REDLAND
//        cerr << "librdf version: " << librdf_version_major << "." << librdf_version_minor << "." << librdf_version_release << endl;
        if (librdf_version_major > 1 ||
            (librdf_version_major == 1 &&
             (librdf_version_minor > 0 ||
              (librdf_version_minor == 0 &&
               librdf_version_release > 7)))) {
            cerr << "SimpleSPARQLQuery: Auto-selecting LIBRDF back-end for tree-based storage" << endl;
            m_preference = DatastoreBackEnd;
        }
#endif
        if (m_preference == AutoSelectBackEnd) {
            cerr << "SimpleSPARQLQuery: Auto-selecting RASQAL back-end" << endl;
            m_preference = DirectParserBackEnd;
        }
    }

    if (m_preference == DatastoreBackEnd) {
#ifdef HAVE_REDLAND
        if (!m_redland) {
            m_redland = new WredlandWorldWrapper();
            if (!m_redland->isOK()) {
                cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl;
                delete m_redland;
                m_preference = DirectParserBackEnd;
            }
        }
#else
        cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl;
        m_preference = DirectParserBackEnd;
#endif
    }

    if (m_preference == DirectParserBackEnd) {
#ifdef USE_NEW_RASQAL_API
        if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper();
#else
        if (!m_rasqalInitialised) {
            rasqal_init();
            m_rasqalInitialised = true;
        }
#endif
    }

    preference = m_preference;
    m_mutex.unlock();

    if (preference == SimpleSPARQLQuery::DirectParserBackEnd) {
        return executeDirectParser();
    } else {
        return executeDatastore();
    }
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::Impl::executeDirectParser()
{
    ResultList list;

    Profiler profiler("SimpleSPARQLQuery::executeDirectParser");

#ifdef USE_NEW_RASQAL_API
    rasqal_query *query = rasqal_new_query(m_rasqal->getWorld(), "sparql", NULL);
#else
    rasqal_query *query = rasqal_new_query("sparql", NULL);
#endif
    if (!query) {
        m_errorString = "Failed to construct query";
        cerr << "SimpleSPARQLQuery: ERROR: " << m_errorString.toStdString() << endl;
        return list;
    }

    rasqal_query_set_error_handler(query, this, errorHandler);
    rasqal_query_set_fatal_error_handler(query, this, errorHandler);

    {
        Profiler p("SimpleSPARQLQuery: Prepare RASQAL query");

        if (rasqal_query_prepare
            (query, (const unsigned char *)m_query.toUtf8().data(), NULL)) {
            cerr << "SimpleSPARQLQuery: Failed to prepare query" << endl;
            rasqal_free_query(query);
            return list;
        }
    }

    rasqal_query_results *results;
    
    {
        Profiler p("SimpleSPARQLQuery: Execute RASQAL query");
        results = rasqal_query_execute(query);
    }
    
//    cerr << "Query executed" << endl;

    if (!results) {
        cerr << "SimpleSPARQLQuery: RASQAL query failed" << endl;
        rasqal_free_query(query);
        return list;
    }

    if (!rasqal_query_results_is_bindings(results)) {
        cerr << "SimpleSPARQLQuery: RASQAL query has wrong result type (not bindings)" << endl;
        rasqal_free_query_results(results);
        rasqal_free_query(query);
        return list;
    }
    
    int resultCount = 0;
    int resultTotal = rasqal_query_results_get_count(results); // probably wrong
    m_cancelled = false;

    while (!rasqal_query_results_finished(results)) {

        int count = rasqal_query_results_get_bindings_count(results);

        KeyValueMap resultmap;

        for (int i = 0; i < count; ++i) {

            const unsigned char *name =
                rasqal_query_results_get_binding_name(results, i);

            rasqal_literal *literal =
                rasqal_query_results_get_binding_value(results, i);

            QString key = (const char *)name;

            if (!literal) {
                resultmap[key] = Value();
                continue;
            }

            ValueType type = LiteralValue;
            if (literal->type == RASQAL_LITERAL_URI) type = URIValue;
            else if (literal->type == RASQAL_LITERAL_BLANK) type = BlankValue;

            QString text = (const char *)rasqal_literal_as_string(literal);

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
            std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl;
#endif

            resultmap[key] = Value(type, text);
        }

        list.push_back(resultmap);

        rasqal_query_results_next(results);

        resultCount++;

        if (m_reporter) {
            if (resultCount >= resultTotal) {
                if (m_reporter->isDefinite()) m_reporter->setDefinite(false);
                m_reporter->setProgress(resultCount);
            } else {
                m_reporter->setProgress((resultCount * 100) / resultTotal);
            }

            if (m_reporter->wasCancelled()) {
                m_cancelled = true;
                break;
            }
        }
    }

    rasqal_free_query_results(results);
    rasqal_free_query(query);

    return list;
}

SimpleSPARQLQuery::ResultList
SimpleSPARQLQuery::Impl::executeDatastore()
{
    ResultList list;
#ifndef HAVE_REDLAND
    // This should have been caught by execute()
    cerr << "SimpleSPARQLQuery: INTERNAL ERROR: Datastore not compiled in" << endl;
    return list;
#else
    Profiler profiler("SimpleSPARQLQuery::executeDatastore");

    librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString);
    if (!uri) return list;

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
    std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl;
#endif
/*!!!
    static std::map<QString, int> counter;
    if (counter.find(m_query) == counter.end()) counter[m_query] = 1;
    else ++counter[m_query];
    std::cerr << "Counter for this query: " << counter[m_query] << std::endl;
    std::cerr << "Base URI is: \"" << m_fromUri.toStdString() << "\"" << std::endl;
*/

    librdf_query *query;

    {
        Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query");
        query = librdf_new_query
            (m_redland->getWorld(), "sparql", NULL,
             (const unsigned char *)m_query.toUtf8().data(), uri);
    }
    
    if (!query) {
        m_errorString = "Failed to construct query";
        return list;
    }

    librdf_query_results *results;
    {
        Profiler p("SimpleSPARQLQuery: Execute LIBRDF query");
        results = librdf_query_execute(query, m_redland->getModel());
    }

    if (!results) {
        cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl;
        librdf_free_query(query);
        return list;
    }

    if (!librdf_query_results_is_bindings(results)) {
        cerr << "SimpleSPARQLQuery: LIBRDF query has wrong result type (not bindings)" << endl;
        librdf_free_query_results(results);
        librdf_free_query(query);
        return list;
    }
    
    int resultCount = 0;
    int resultTotal = librdf_query_results_get_count(results); // probably wrong
    m_cancelled = false;

    while (!librdf_query_results_finished(results)) {

        int count = librdf_query_results_get_bindings_count(results);

        KeyValueMap resultmap;

        for (int i = 0; i < count; ++i) {

            const char *name =
                librdf_query_results_get_binding_name(results, i);

            librdf_node *node =
                librdf_query_results_get_binding_value(results, i);

            QString key = (const char *)name;

            if (!node) {
                resultmap[key] = Value();
                continue;
            }

            ValueType type = LiteralValue;
            QString text;

            if (librdf_node_is_resource(node)) {

                type = URIValue;
                librdf_uri *uri = librdf_node_get_uri(node);
                text = (const char *)librdf_uri_as_string(uri);

            } else if (librdf_node_is_literal(node)) {

                type = LiteralValue;
                text = (const char *)librdf_node_get_literal_value(node);

            } else if (librdf_node_is_blank(node)) {

                type = BlankValue;

            } else {

                cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl;
            }

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
            cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << endl;
#endif

            resultmap[key] = Value(type, text);

            librdf_free_node(node);
        }

        list.push_back(resultmap);

        librdf_query_results_next(results);

        resultCount++;

        if (m_reporter) {
            if (resultCount >= resultTotal) {
                if (m_reporter->isDefinite()) m_reporter->setDefinite(false);
                m_reporter->setProgress(resultCount);
            } else {
                m_reporter->setProgress((resultCount * 100) / resultTotal);
            }

            if (m_reporter->wasCancelled()) {
                m_cancelled = true;
                break;
            }
        }
    }

    librdf_free_query_results(results);
    librdf_free_query(query);

#ifdef DEBUG_SIMPLE_SPARQL_QUERY
    cerr << "All results retrieved (" << resultCount << " of them)" << endl;
#endif

    return list;
#endif
}

SimpleSPARQLQuery::Value
SimpleSPARQLQuery::singleResultQuery(QString fromUri,
                                     QString query, QString binding)
{
    SimpleSPARQLQuery q(fromUri, query);
    ResultList results = q.execute();
    if (!q.isOK()) {
        cerr << "SimpleSPARQLQuery::singleResultQuery: ERROR: "
             << q.getErrorString().toStdString() << endl;
        return Value();
    }
    if (results.empty()) {
        return Value();
    }
    for (int i = 0; i < results.size(); ++i) {
        if (results[i].find(binding) != results[i].end() &&
            results[i][binding].type != NoValue) {
            return results[i][binding];
        }
    }
    return Value();
}