diff rdf/SimpleSPARQLQuery.cpp @ 480:3ffce691c9bf

* Add Redland datastore support to SimpleSPARQLQuery
author Chris Cannam
date Thu, 13 Nov 2008 14:23:23 +0000
parents 2019d89ebcf9
children a82645e788fc
line wrap: on
line diff
--- a/rdf/SimpleSPARQLQuery.cpp	Wed Nov 12 16:39:29 2008 +0000
+++ b/rdf/SimpleSPARQLQuery.cpp	Thu Nov 13 14:23:23 2008 +0000
@@ -15,6 +15,12 @@
 
 #include "SimpleSPARQLQuery.h"
 #include "base/ProgressReporter.h"
+#include "base/Profiler.h"
+
+#include <QMutex>
+#include <QMutexLocker>
+
+#include <set>
 
 #ifdef USE_NEW_RASQAL_API
 #include <rasqal/rasqal.h>
@@ -22,6 +28,10 @@
 #include <rasqal.h>
 #endif
 
+#ifdef HAVE_REDLAND
+#include <redland.h>
+#endif
+
 //#define DEBUG_SIMPLE_SPARQL_QUERY 1
 
 #include <iostream>
@@ -36,17 +46,115 @@
     WrasqalWorldWrapper() : m_world(rasqal_new_world()) { }
     ~WrasqalWorldWrapper() { rasqal_free_world(m_world); }
 
-    rasqal_world *getWorld() const { return m_world; }
+    rasqal_world *getWorld() { return m_world; }
+    const rasqal_world *getWorld() const { return m_world; }
 
 private:
     rasqal_world *m_world;
 };
 #endif
 
+#ifdef HAVE_REDLAND
+class WredlandWorldWrapper
+{
+public:
+    WredlandWorldWrapper() :
+        m_world(0), m_storage(0), m_model(0)
+    {
+        m_world = librdf_new_world();
+        librdf_world_open(m_world);
+            m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
+//        m_storage = librdf_new_storage(m_world, "hashes", NULL,
+//.                                       "hash-type='memory',indexes=1");
+        if (!m_storage) {
+            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland hashes datastore, falling back to memory store" << std::endl;
+            m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
+            if (!m_storage) {
+                std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl;
+                return;
+            }                
+        }
+        m_model = librdf_new_model(m_world, m_storage, NULL);
+        if (!m_model) {
+            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland data model" << std::endl;
+            return;
+        }
+    }
+
+    ~WredlandWorldWrapper()
+    {
+        while (!m_parsedUris.empty()) {
+            librdf_free_uri(m_parsedUris.begin()->second);
+            m_parsedUris.erase(m_parsedUris.begin());
+        }
+        if (m_model) librdf_free_model(m_model);
+        if (m_storage) librdf_free_storage(m_storage);
+        if (m_world) librdf_free_world(m_world);
+    }
+
+    bool isOK() const { return (m_model != 0); }
+
+    librdf_uri *getUri(QString uriString, QString &errorString)
+    {
+        QMutexLocker locker(&m_mutex);
+
+        if (m_parsedUris.find(uriString) != m_parsedUris.end()) {
+            return m_parsedUris[uriString];
+        }
+
+        librdf_uri *uri = librdf_new_uri
+            (m_world, (const unsigned char *)uriString.toUtf8().data());
+        if (!uri) {
+            errorString = "Failed to construct librdf_uri!";
+            return 0;
+        }
+
+        librdf_parser *parser = librdf_new_parser(m_world, "guess", NULL, NULL);
+        if (!parser) {
+            errorString = "Failed to initialise Redland parser";
+            return 0;
+        }
+        
+        std::cerr << "About to parse \"" << uriString.toStdString() << "\"" << std::endl;
+
+        Profiler p("SimpleSPARQLQuery: Parse URI into LIBRDF model");
+
+        if (librdf_parser_parse_into_model(parser, uri, NULL, m_model)) {
+
+            errorString = QString("Failed to parse RDF from URI \"%1\"")
+                .arg(uriString);
+            librdf_free_parser(parser);
+            librdf_free_uri(uri);
+            return 0;
+
+        } else {
+
+            librdf_free_parser(parser);
+            m_parsedUris[uriString] = uri;
+            return uri;
+        }
+    }
+        
+    librdf_world *getWorld() { return m_world; }
+    const librdf_world *getWorld() const { return m_world; }
+        
+    librdf_model *getModel() { return m_model; }
+    const librdf_model *getModel() const { return m_model; }
+
+private:
+    librdf_world *m_world;
+    librdf_storage *m_storage;
+    librdf_model *m_model;
+
+    QMutex m_mutex;
+    std::map<QString, librdf_uri *> m_parsedUris;
+};
+#endif
+
 class SimpleSPARQLQuery::Impl
 {
 public:
-    Impl(QString query);
+    Impl(QString fromUri, QString query);
     ~Impl();
 
     void setProgressReporter(ProgressReporter *reporter) { m_reporter = reporter; }
@@ -57,15 +165,32 @@
     bool isOK() const;
     QString getErrorString() const;
 
+    static void setImplementationPreference
+    (SimpleSPARQLQuery::ImplementationPreference p) {
+        m_preference = p;
+    }
+
 protected:
     static void errorHandler(void *, raptor_locator *, const char *);
 
+    static QMutex m_mutex;
+
 #ifdef USE_NEW_RASQAL_API
-    static WrasqalWorldWrapper m_www;
+    static WrasqalWorldWrapper *m_rasqal;
 #else
-    static bool m_initialised;
+    static bool m_rasqalInitialised;
 #endif
-    
+
+#ifdef HAVE_REDLAND
+    static WredlandWorldWrapper *m_redland;
+#endif
+
+    static SimpleSPARQLQuery::ImplementationPreference m_preference;
+
+    ResultList executeDirectParser();
+    ResultList executeDatastore();
+
+    QString m_fromUri;
     QString m_query;
     QString m_errorString;
     ProgressReporter *m_reporter;
@@ -73,15 +198,24 @@
 };
 
 #ifdef USE_NEW_RASQAL_API
-WrasqalWorldWrapper
-SimpleSPARQLQuery::Impl::m_www;
+WrasqalWorldWrapper *SimpleSPARQLQuery::Impl::m_rasqal = 0;
 #else
-bool
-SimpleSPARQLQuery::Impl::m_initialised = false;
+bool SimpleSPARQLQuery::Impl::m_rasqalInitialised = false;
 #endif
 
-SimpleSPARQLQuery::SimpleSPARQLQuery(QString query) :
-    m_impl(new Impl(query)) { }
+#ifdef HAVE_REDLAND
+WredlandWorldWrapper *SimpleSPARQLQuery::Impl::m_redland = 0;
+#endif
+
+QMutex SimpleSPARQLQuery::Impl::m_mutex;
+
+SimpleSPARQLQuery::ImplementationPreference
+SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::UseDirectParser;
+
+SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) :
+    m_impl(new Impl(fromUri, query))
+{
+}
 
 SimpleSPARQLQuery::~SimpleSPARQLQuery() 
 {
@@ -118,7 +252,14 @@
     return m_impl->getErrorString();
 }
 
-SimpleSPARQLQuery::Impl::Impl(QString query) :
+void
+SimpleSPARQLQuery::setImplementationPreference(ImplementationPreference p)
+{
+    SimpleSPARQLQuery::Impl::setImplementationPreference(p);
+}
+
+SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) :
+    m_fromUri(fromUri),
     m_query(query),
     m_reporter(0),
     m_cancelled(false)
@@ -165,13 +306,57 @@
 {
     ResultList list;
 
+    ImplementationPreference preference;
+
+    m_mutex.lock();
+
+    if (m_preference == UseDatastore) {
+#ifdef HAVE_REDLAND
+        if (!m_redland) {
+            m_redland = new WredlandWorldWrapper();
+            if (!m_redland->isOK()) {
+                cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl;
+                delete m_redland;
+                m_preference = UseDirectParser;
+            }
+        }
+#else
+        cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl;
+        m_preference = UseDirectParser;
+#endif
+    }
+
+    if (m_preference == UseDirectParser) {
 #ifdef USE_NEW_RASQAL_API
-    rasqal_query *query = rasqal_new_query(m_www.getWorld(), "sparql", NULL);
+        if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper();
 #else
-    if (!m_initialised) {
-        m_initialised = true;
-        rasqal_init();
+        if (!m_rasqalInitialised) {
+            rasqal_init();
+            m_rasqalInitialised = true;
+        }
+#endif
     }
+
+    preference = m_preference;
+    m_mutex.unlock();
+
+    if (preference == SimpleSPARQLQuery::UseDirectParser) {
+        return executeDirectParser();
+    } else {
+        return executeDatastore();
+    }
+}
+
+SimpleSPARQLQuery::ResultList
+SimpleSPARQLQuery::Impl::executeDirectParser()
+{
+    ResultList list;
+
+    Profiler profiler("SimpleSPARQLQuery::executeDirectParser");
+
+#ifdef USE_NEW_RASQAL_API
+    rasqal_query *query = rasqal_new_query(m_rasqal->getWorld(), "sparql", NULL);
+#else
     rasqal_query *query = rasqal_new_query("sparql", NULL);
 #endif
     if (!query) {
@@ -183,14 +368,23 @@
     rasqal_query_set_error_handler(query, this, errorHandler);
     rasqal_query_set_fatal_error_handler(query, this, errorHandler);
 
-    if (rasqal_query_prepare
-        (query, (const unsigned char *)m_query.toUtf8().data(), NULL)) {
-        cerr << "SimpleSPARQLQuery: Failed to prepare query" << endl;
-        rasqal_free_query(query);
-        return list;
+    {
+        Profiler p("SimpleSPARQLQuery: Prepare RASQAL query");
+
+        if (rasqal_query_prepare
+            (query, (const unsigned char *)m_query.toUtf8().data(), NULL)) {
+            cerr << "SimpleSPARQLQuery: Failed to prepare query" << endl;
+            rasqal_free_query(query);
+            return list;
+        }
     }
 
-    rasqal_query_results *results = rasqal_query_execute(query);
+    rasqal_query_results *results;
+    
+    {
+        Profiler p("SimpleSPARQLQuery: Execute RASQAL query");
+        results = rasqal_query_execute(query);
+    }
     
 //    cerr << "Query executed" << endl;
 
@@ -272,10 +466,143 @@
     return list;
 }
 
+SimpleSPARQLQuery::ResultList
+SimpleSPARQLQuery::Impl::executeDatastore()
+{
+    ResultList list;
+#ifndef HAVE_REDLAND
+    // This should have been caught by execute()
+    cerr << "SimpleSPARQLQuery: INTERNAL ERROR: Datastore not compiled in" << endl;
+    return list;
+#else
+    Profiler profiler("SimpleSPARQLQuery::executeDatastore");
+
+    librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString);
+    if (!uri) return list;
+
+    std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl;
+    static std::map<QString, int> counter;
+    if (counter.find(m_query) == counter.end()) counter[m_query] = 1;
+    else ++counter[m_query];
+    std::cerr << "Counter for this query: " << counter[m_query] << std::endl;
+
+    librdf_query *query;
+
+    {
+        Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query");
+        query = librdf_new_query
+            (m_redland->getWorld(), "sparql", NULL,
+             (const unsigned char *)m_query.toUtf8().data(), uri);
+    }
+    std::cerr << "Prepared" << std::endl;
+    
+    if (!query) {
+        m_errorString = "Failed to construct query";
+        return list;
+    }
+
+    librdf_query_results *results;
+    {
+        Profiler p("SimpleSPARQLQuery: Execute LIBRDF query");
+        results = librdf_query_execute(query, m_redland->getModel());
+    }
+    std::cerr << "Executed" << std::endl;
+
+    if (!results) {
+        cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl;
+        librdf_free_query(query);
+        return list;
+    }
+
+    if (!librdf_query_results_is_bindings(results)) {
+        cerr << "SimpleSPARQLQuery: LIBRDF query has wrong result type (not bindings)" << endl;
+        librdf_free_query_results(results);
+        librdf_free_query(query);
+        return list;
+    }
+    
+    int resultCount = 0;
+    int resultTotal = librdf_query_results_get_count(results); // probably wrong
+    m_cancelled = false;
+
+    while (!librdf_query_results_finished(results)) {
+
+        int count = librdf_query_results_get_bindings_count(results);
+
+        KeyValueMap resultmap;
+
+        for (int i = 0; i < count; ++i) {
+
+            const char *name =
+                librdf_query_results_get_binding_name(results, i);
+
+            librdf_node *node =
+                librdf_query_results_get_binding_value(results, i);
+
+            QString key = (const char *)name;
+
+            if (!node) {
+                resultmap[key] = Value();
+                continue;
+            }
+
+            ValueType type = LiteralValue;
+            if (librdf_node_is_resource(node)) type = URIValue;
+            else if (librdf_node_is_literal(node)) type = LiteralValue;
+            else if (librdf_node_is_blank(node)) type = BlankValue;
+            else {
+                cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl;
+                resultmap[key] = Value();
+                librdf_free_node(node);
+                continue;
+            }
+
+            QString text = (const char *)librdf_node_get_literal_value(node);
+
+#ifdef DEBUG_SIMPLE_SPARQL_QUERY
+            std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl;
+#endif
+
+            resultmap[key] = Value(type, text);
+
+            librdf_free_node(node);
+        }
+
+        list.push_back(resultmap);
+
+        librdf_query_results_next(results);
+
+        resultCount++;
+
+        if (m_reporter) {
+            if (resultCount >= resultTotal) {
+                if (m_reporter->isDefinite()) m_reporter->setDefinite(false);
+                m_reporter->setProgress(resultCount);
+            } else {
+                m_reporter->setProgress((resultCount * 100) / resultTotal);
+            }
+
+            if (m_reporter->wasCancelled()) {
+                m_cancelled = true;
+                break;
+            }
+        }
+    }
+
+    librdf_free_query_results(results);
+    librdf_free_query(query);
+
+    std::cerr << "All results retrieved" << std::endl;
+
+    return list;
+#endif
+}
+
 SimpleSPARQLQuery::Value
-SimpleSPARQLQuery::singleResultQuery(QString query, QString binding)
+SimpleSPARQLQuery::singleResultQuery(QString fromUri,
+                                     QString query, QString binding)
 {
-    SimpleSPARQLQuery q(query);
+    SimpleSPARQLQuery q(fromUri, query);
     ResultList results = q.execute();
     if (!q.isOK()) {
         cerr << "SimpleSPARQLQuery::singleResultQuery: ERROR: "