Mercurial > hg > svcore
changeset 480:3ffce691c9bf
* Add Redland datastore support to SimpleSPARQLQuery
author | Chris Cannam |
---|---|
date | Thu, 13 Nov 2008 14:23:23 +0000 |
parents | f933062a7f80 |
children | a82645e788fc |
files | rdf/PluginRDFDescription.cpp rdf/PluginRDFIndexer.cpp rdf/RDFImporter.cpp rdf/RDFTransformFactory.cpp rdf/SimpleSPARQLQuery.cpp rdf/SimpleSPARQLQuery.h rdf/rdf.pro |
diffstat | 7 files changed, 401 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/rdf/PluginRDFDescription.cpp Wed Nov 12 16:39:29 2008 +0000 +++ b/rdf/PluginRDFDescription.cpp Thu Nov 13 14:23:23 2008 +0000 @@ -201,21 +201,22 @@ SimpleSPARQLQuery::Value v; v = SimpleSPARQLQuery::singleResultQuery - (queryTemplate.arg("vamp:name").arg("name"), "name"); + (url, queryTemplate.arg("vamp:name").arg("name"), "name"); if (v.type == SimpleSPARQLQuery::LiteralValue && v.value != "") { m_pluginName = v.value; } v = SimpleSPARQLQuery::singleResultQuery - (queryTemplate.arg("dc:description").arg("description"), "description"); + (url, queryTemplate.arg("dc:description").arg("description"), "description"); if (v.type == SimpleSPARQLQuery::LiteralValue && v.value != "") { m_pluginDescription = v.value; } v = SimpleSPARQLQuery::singleResultQuery - (QString( + (url, + QString( " PREFIX vamp: <http://purl.org/ontology/vamp/> " " PREFIX foaf: <http://xmlns.com/foaf/0.1/> " " SELECT ?name FROM <%1> " @@ -239,7 +240,8 @@ // perhaps that would be unwise v = SimpleSPARQLQuery::singleResultQuery - (QString( + (url, + QString( " PREFIX vamp: <http://purl.org/ontology/vamp/> " " PREFIX foaf: <http://xmlns.com/foaf/0.1/> " " SELECT ?page from <%1> " @@ -258,7 +260,8 @@ } else { v = SimpleSPARQLQuery::singleResultQuery - (QString( + (url, + QString( " PREFIX vamp: <http://purl.org/ontology/vamp/> " " PREFIX foaf: <http://xmlns.com/foaf/0.1/> " " SELECT ?page from <%1> " @@ -287,7 +290,8 @@ Profiler profiler("PluginRDFDescription::indexOutputs"); SimpleSPARQLQuery query - (QString + (url, + QString ( " PREFIX vamp: <http://purl.org/ontology/vamp/> " @@ -361,7 +365,8 @@ SimpleSPARQLQuery::Value v; v = SimpleSPARQLQuery::singleResultQuery - (QString(" PREFIX vamp: <http://purl.org/ontology/vamp/> " + (url, + QString(" PREFIX vamp: <http://purl.org/ontology/vamp/> " " PREFIX dc: <http://purl.org/dc/elements/1.1/> " " SELECT ?title FROM <%1> " " WHERE { <%2> dc:title ?title } ") @@ -378,21 +383,21 @@ .arg(url).arg(outputUri); v = SimpleSPARQLQuery::singleResultQuery - (queryTemplate.arg("event_type"), "event_type"); + (url, queryTemplate.arg("event_type"), "event_type"); if (v.type == SimpleSPARQLQuery::URIValue && v.value != "") { m_outputEventTypeURIMap[outputId] = v.value; } v = SimpleSPARQLQuery::singleResultQuery - (queryTemplate.arg("feature_attribute"), "feature_attribute"); + (url, queryTemplate.arg("feature_attribute"), "feature_attribute"); if (v.type == SimpleSPARQLQuery::URIValue && v.value != "") { m_outputFeatureAttributeURIMap[outputId] = v.value; } v = SimpleSPARQLQuery::singleResultQuery - (queryTemplate.arg("signal_type"), "signal_type"); + (url, queryTemplate.arg("signal_type"), "signal_type"); if (v.type == SimpleSPARQLQuery::URIValue && v.value != "") { m_outputSignalTypeURIMap[outputId] = v.value;
--- a/rdf/PluginRDFIndexer.cpp Wed Nov 12 16:39:29 2008 +0000 +++ b/rdf/PluginRDFIndexer.cpp Thu Nov 13 14:23:23 2008 +0000 @@ -257,13 +257,14 @@ return false; } - localString = cf.getLocalFilename(); + localString = "file://" + cf.getLocalFilename(); //!!! crud - fix! } // cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl; SimpleSPARQLQuery query - (QString + (localString, + QString ( " PREFIX vamp: <http://purl.org/ontology/vamp/> "
--- a/rdf/RDFImporter.cpp Wed Nov 12 16:39:29 2008 +0000 +++ b/rdf/RDFImporter.cpp Thu Nov 13 14:23:23 2008 +0000 @@ -157,7 +157,8 @@ ProgressReporter *reporter) { SimpleSPARQLQuery query = SimpleSPARQLQuery - (QString + (m_uristring, + QString ( " PREFIX mo: <http://purl.org/ontology/mo/>" " PREFIX af: <http://purl.org/ontology/af/>" @@ -288,7 +289,8 @@ ); SimpleSPARQLQuery::Value dimensionsValue = - SimpleSPARQLQuery::singleResultQuery(dimensionsQuery + SimpleSPARQLQuery::singleResultQuery(m_uristring, + dimensionsQuery .arg(m_uristring).arg(featureUri), "dimensions"); @@ -329,7 +331,8 @@ // multiple optionals properly SimpleSPARQLQuery::Value srValue = - SimpleSPARQLQuery::singleResultQuery(queryTemplate + SimpleSPARQLQuery::singleResultQuery(m_uristring, + queryTemplate .arg(m_uristring).arg(featureUri) .arg("sampleRate"), "sampleRate"); @@ -338,7 +341,8 @@ } SimpleSPARQLQuery::Value hopValue = - SimpleSPARQLQuery::singleResultQuery(queryTemplate + SimpleSPARQLQuery::singleResultQuery(m_uristring, + queryTemplate .arg(m_uristring).arg(featureUri) .arg("hopSize"), "hopSize"); @@ -347,7 +351,8 @@ } SimpleSPARQLQuery::Value winValue = - SimpleSPARQLQuery::singleResultQuery(queryTemplate + SimpleSPARQLQuery::singleResultQuery(m_uristring, + queryTemplate .arg(m_uristring).arg(featureUri) .arg("windowLength"), "windowLength"); @@ -455,7 +460,7 @@ ).arg(m_uristring); - SimpleSPARQLQuery query(queryString); + SimpleSPARQLQuery query(m_uristring, queryString); query.setProgressReporter(reporter); cerr << "Query will be: " << queryString.toStdString() << endl; @@ -517,9 +522,9 @@ bool haveDuration = false; QString label = SimpleSPARQLQuery::singleResultQuery - (labelQueryString.arg(thinguri), "label").value; + (m_uristring, labelQueryString.arg(thinguri), "label").value; - SimpleSPARQLQuery rangeQuery(rangeQueryString.arg(thinguri)); + SimpleSPARQLQuery rangeQuery(m_uristring, rangeQueryString.arg(thinguri)); SimpleSPARQLQuery::ResultList rangeResults = rangeQuery.execute(); if (!rangeResults.empty()) { // std::cerr << rangeResults.size() << " range results" << std::endl; @@ -532,7 +537,7 @@ haveDuration = true; } else { QString timestring = SimpleSPARQLQuery::singleResultQuery - (timeQueryString.arg(thinguri), "time").value; + (m_uristring, timeQueryString.arg(thinguri), "time").value; if (timestring != "") { time = RealTime::fromXsdDuration(timestring.toStdString()); haveTime = true;
--- a/rdf/RDFTransformFactory.cpp Wed Nov 12 16:39:29 2008 +0000 +++ b/rdf/RDFTransformFactory.cpp Thu Nov 13 14:23:23 2008 +0000 @@ -143,7 +143,7 @@ " } "; SimpleSPARQLQuery transformsQuery - (queryTemplate.arg("").arg(m_urlString).arg("")); + (m_urlString, queryTemplate.arg("").arg(m_urlString).arg("")); SimpleSPARQLQuery::ResultList transformResults = transformsQuery.execute(); @@ -203,7 +203,8 @@ QString optional = optionals[i]; SimpleSPARQLQuery query - (queryTemplate + (m_urlString, + queryTemplate .arg(QString("?%1").arg(optional)) .arg(m_urlString) .arg(QString("?transform vamp:%1 ?%2") @@ -277,7 +278,8 @@ QString pluginDescriptionURL) { SimpleSPARQLQuery outputQuery - (QString + (m_urlString, + QString ( " PREFIX vamp: <http://purl.org/ontology/vamp/> " @@ -322,7 +324,8 @@ QString pluginDescriptionURL) { SimpleSPARQLQuery paramQuery - (QString + (m_urlString, + QString ( " PREFIX vamp: <http://purl.org/ontology/vamp/> "
--- a/rdf/SimpleSPARQLQuery.cpp Wed Nov 12 16:39:29 2008 +0000 +++ b/rdf/SimpleSPARQLQuery.cpp Thu Nov 13 14:23:23 2008 +0000 @@ -15,6 +15,12 @@ #include "SimpleSPARQLQuery.h" #include "base/ProgressReporter.h" +#include "base/Profiler.h" + +#include <QMutex> +#include <QMutexLocker> + +#include <set> #ifdef USE_NEW_RASQAL_API #include <rasqal/rasqal.h> @@ -22,6 +28,10 @@ #include <rasqal.h> #endif +#ifdef HAVE_REDLAND +#include <redland.h> +#endif + //#define DEBUG_SIMPLE_SPARQL_QUERY 1 #include <iostream> @@ -36,17 +46,115 @@ WrasqalWorldWrapper() : m_world(rasqal_new_world()) { } ~WrasqalWorldWrapper() { rasqal_free_world(m_world); } - rasqal_world *getWorld() const { return m_world; } + rasqal_world *getWorld() { return m_world; } + const rasqal_world *getWorld() const { return m_world; } private: rasqal_world *m_world; }; #endif +#ifdef HAVE_REDLAND +class WredlandWorldWrapper +{ +public: + WredlandWorldWrapper() : + m_world(0), m_storage(0), m_model(0) + { + m_world = librdf_new_world(); + librdf_world_open(m_world); + m_storage = librdf_new_storage(m_world, NULL, NULL, NULL); +// m_storage = librdf_new_storage(m_world, "hashes", NULL, +//. "hash-type='memory',indexes=1"); + if (!m_storage) { + std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland hashes datastore, falling back to memory store" << std::endl; + m_storage = librdf_new_storage(m_world, NULL, NULL, NULL); + if (!m_storage) { + std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl; + return; + } + } + m_model = librdf_new_model(m_world, m_storage, NULL); + if (!m_model) { + std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland data model" << std::endl; + return; + } + } + + ~WredlandWorldWrapper() + { + while (!m_parsedUris.empty()) { + librdf_free_uri(m_parsedUris.begin()->second); + m_parsedUris.erase(m_parsedUris.begin()); + } + if (m_model) librdf_free_model(m_model); + if (m_storage) librdf_free_storage(m_storage); + if (m_world) librdf_free_world(m_world); + } + + bool isOK() const { return (m_model != 0); } + + librdf_uri *getUri(QString uriString, QString &errorString) + { + QMutexLocker locker(&m_mutex); + + if (m_parsedUris.find(uriString) != m_parsedUris.end()) { + return m_parsedUris[uriString]; + } + + librdf_uri *uri = librdf_new_uri + (m_world, (const unsigned char *)uriString.toUtf8().data()); + if (!uri) { + errorString = "Failed to construct librdf_uri!"; + return 0; + } + + librdf_parser *parser = librdf_new_parser(m_world, "guess", NULL, NULL); + if (!parser) { + errorString = "Failed to initialise Redland parser"; + return 0; + } + + std::cerr << "About to parse \"" << uriString.toStdString() << "\"" << std::endl; + + Profiler p("SimpleSPARQLQuery: Parse URI into LIBRDF model"); + + if (librdf_parser_parse_into_model(parser, uri, NULL, m_model)) { + + errorString = QString("Failed to parse RDF from URI \"%1\"") + .arg(uriString); + librdf_free_parser(parser); + librdf_free_uri(uri); + return 0; + + } else { + + librdf_free_parser(parser); + m_parsedUris[uriString] = uri; + return uri; + } + } + + librdf_world *getWorld() { return m_world; } + const librdf_world *getWorld() const { return m_world; } + + librdf_model *getModel() { return m_model; } + const librdf_model *getModel() const { return m_model; } + +private: + librdf_world *m_world; + librdf_storage *m_storage; + librdf_model *m_model; + + QMutex m_mutex; + std::map<QString, librdf_uri *> m_parsedUris; +}; +#endif + class SimpleSPARQLQuery::Impl { public: - Impl(QString query); + Impl(QString fromUri, QString query); ~Impl(); void setProgressReporter(ProgressReporter *reporter) { m_reporter = reporter; } @@ -57,15 +165,32 @@ bool isOK() const; QString getErrorString() const; + static void setImplementationPreference + (SimpleSPARQLQuery::ImplementationPreference p) { + m_preference = p; + } + protected: static void errorHandler(void *, raptor_locator *, const char *); + static QMutex m_mutex; + #ifdef USE_NEW_RASQAL_API - static WrasqalWorldWrapper m_www; + static WrasqalWorldWrapper *m_rasqal; #else - static bool m_initialised; + static bool m_rasqalInitialised; #endif - + +#ifdef HAVE_REDLAND + static WredlandWorldWrapper *m_redland; +#endif + + static SimpleSPARQLQuery::ImplementationPreference m_preference; + + ResultList executeDirectParser(); + ResultList executeDatastore(); + + QString m_fromUri; QString m_query; QString m_errorString; ProgressReporter *m_reporter; @@ -73,15 +198,24 @@ }; #ifdef USE_NEW_RASQAL_API -WrasqalWorldWrapper -SimpleSPARQLQuery::Impl::m_www; +WrasqalWorldWrapper *SimpleSPARQLQuery::Impl::m_rasqal = 0; #else -bool -SimpleSPARQLQuery::Impl::m_initialised = false; +bool SimpleSPARQLQuery::Impl::m_rasqalInitialised = false; #endif -SimpleSPARQLQuery::SimpleSPARQLQuery(QString query) : - m_impl(new Impl(query)) { } +#ifdef HAVE_REDLAND +WredlandWorldWrapper *SimpleSPARQLQuery::Impl::m_redland = 0; +#endif + +QMutex SimpleSPARQLQuery::Impl::m_mutex; + +SimpleSPARQLQuery::ImplementationPreference +SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::UseDirectParser; + +SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) : + m_impl(new Impl(fromUri, query)) +{ +} SimpleSPARQLQuery::~SimpleSPARQLQuery() { @@ -118,7 +252,14 @@ return m_impl->getErrorString(); } -SimpleSPARQLQuery::Impl::Impl(QString query) : +void +SimpleSPARQLQuery::setImplementationPreference(ImplementationPreference p) +{ + SimpleSPARQLQuery::Impl::setImplementationPreference(p); +} + +SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) : + m_fromUri(fromUri), m_query(query), m_reporter(0), m_cancelled(false) @@ -165,13 +306,57 @@ { ResultList list; + ImplementationPreference preference; + + m_mutex.lock(); + + if (m_preference == UseDatastore) { +#ifdef HAVE_REDLAND + if (!m_redland) { + m_redland = new WredlandWorldWrapper(); + if (!m_redland->isOK()) { + cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl; + delete m_redland; + m_preference = UseDirectParser; + } + } +#else + cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl; + m_preference = UseDirectParser; +#endif + } + + if (m_preference == UseDirectParser) { #ifdef USE_NEW_RASQAL_API - rasqal_query *query = rasqal_new_query(m_www.getWorld(), "sparql", NULL); + if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper(); #else - if (!m_initialised) { - m_initialised = true; - rasqal_init(); + if (!m_rasqalInitialised) { + rasqal_init(); + m_rasqalInitialised = true; + } +#endif } + + preference = m_preference; + m_mutex.unlock(); + + if (preference == SimpleSPARQLQuery::UseDirectParser) { + return executeDirectParser(); + } else { + return executeDatastore(); + } +} + +SimpleSPARQLQuery::ResultList +SimpleSPARQLQuery::Impl::executeDirectParser() +{ + ResultList list; + + Profiler profiler("SimpleSPARQLQuery::executeDirectParser"); + +#ifdef USE_NEW_RASQAL_API + rasqal_query *query = rasqal_new_query(m_rasqal->getWorld(), "sparql", NULL); +#else rasqal_query *query = rasqal_new_query("sparql", NULL); #endif if (!query) { @@ -183,14 +368,23 @@ rasqal_query_set_error_handler(query, this, errorHandler); rasqal_query_set_fatal_error_handler(query, this, errorHandler); - if (rasqal_query_prepare - (query, (const unsigned char *)m_query.toUtf8().data(), NULL)) { - cerr << "SimpleSPARQLQuery: Failed to prepare query" << endl; - rasqal_free_query(query); - return list; + { + Profiler p("SimpleSPARQLQuery: Prepare RASQAL query"); + + if (rasqal_query_prepare + (query, (const unsigned char *)m_query.toUtf8().data(), NULL)) { + cerr << "SimpleSPARQLQuery: Failed to prepare query" << endl; + rasqal_free_query(query); + return list; + } } - rasqal_query_results *results = rasqal_query_execute(query); + rasqal_query_results *results; + + { + Profiler p("SimpleSPARQLQuery: Execute RASQAL query"); + results = rasqal_query_execute(query); + } // cerr << "Query executed" << endl; @@ -272,10 +466,143 @@ return list; } +SimpleSPARQLQuery::ResultList +SimpleSPARQLQuery::Impl::executeDatastore() +{ + ResultList list; +#ifndef HAVE_REDLAND + // This should have been caught by execute() + cerr << "SimpleSPARQLQuery: INTERNAL ERROR: Datastore not compiled in" << endl; + return list; +#else + Profiler profiler("SimpleSPARQLQuery::executeDatastore"); + + librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString); + if (!uri) return list; + + std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl; + static std::map<QString, int> counter; + if (counter.find(m_query) == counter.end()) counter[m_query] = 1; + else ++counter[m_query]; + std::cerr << "Counter for this query: " << counter[m_query] << std::endl; + + librdf_query *query; + + { + Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query"); + query = librdf_new_query + (m_redland->getWorld(), "sparql", NULL, + (const unsigned char *)m_query.toUtf8().data(), uri); + } + std::cerr << "Prepared" << std::endl; + + if (!query) { + m_errorString = "Failed to construct query"; + return list; + } + + librdf_query_results *results; + { + Profiler p("SimpleSPARQLQuery: Execute LIBRDF query"); + results = librdf_query_execute(query, m_redland->getModel()); + } + std::cerr << "Executed" << std::endl; + + if (!results) { + cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl; + librdf_free_query(query); + return list; + } + + if (!librdf_query_results_is_bindings(results)) { + cerr << "SimpleSPARQLQuery: LIBRDF query has wrong result type (not bindings)" << endl; + librdf_free_query_results(results); + librdf_free_query(query); + return list; + } + + int resultCount = 0; + int resultTotal = librdf_query_results_get_count(results); // probably wrong + m_cancelled = false; + + while (!librdf_query_results_finished(results)) { + + int count = librdf_query_results_get_bindings_count(results); + + KeyValueMap resultmap; + + for (int i = 0; i < count; ++i) { + + const char *name = + librdf_query_results_get_binding_name(results, i); + + librdf_node *node = + librdf_query_results_get_binding_value(results, i); + + QString key = (const char *)name; + + if (!node) { + resultmap[key] = Value(); + continue; + } + + ValueType type = LiteralValue; + if (librdf_node_is_resource(node)) type = URIValue; + else if (librdf_node_is_literal(node)) type = LiteralValue; + else if (librdf_node_is_blank(node)) type = BlankValue; + else { + cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl; + resultmap[key] = Value(); + librdf_free_node(node); + continue; + } + + QString text = (const char *)librdf_node_get_literal_value(node); + +#ifdef DEBUG_SIMPLE_SPARQL_QUERY + std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl; +#endif + + resultmap[key] = Value(type, text); + + librdf_free_node(node); + } + + list.push_back(resultmap); + + librdf_query_results_next(results); + + resultCount++; + + if (m_reporter) { + if (resultCount >= resultTotal) { + if (m_reporter->isDefinite()) m_reporter->setDefinite(false); + m_reporter->setProgress(resultCount); + } else { + m_reporter->setProgress((resultCount * 100) / resultTotal); + } + + if (m_reporter->wasCancelled()) { + m_cancelled = true; + break; + } + } + } + + librdf_free_query_results(results); + librdf_free_query(query); + + std::cerr << "All results retrieved" << std::endl; + + return list; +#endif +} + SimpleSPARQLQuery::Value -SimpleSPARQLQuery::singleResultQuery(QString query, QString binding) +SimpleSPARQLQuery::singleResultQuery(QString fromUri, + QString query, QString binding) { - SimpleSPARQLQuery q(query); + SimpleSPARQLQuery q(fromUri, query); ResultList results = q.execute(); if (!q.isOK()) { cerr << "SimpleSPARQLQuery::singleResultQuery: ERROR: "
--- a/rdf/SimpleSPARQLQuery.h Wed Nov 12 16:39:29 2008 +0000 +++ b/rdf/SimpleSPARQLQuery.h Thu Nov 13 14:23:23 2008 +0000 @@ -37,7 +37,7 @@ typedef std::map<QString, Value> KeyValueMap; typedef std::vector<KeyValueMap> ResultList; - SimpleSPARQLQuery(QString query); + SimpleSPARQLQuery(QString fromUri, QString query); ~SimpleSPARQLQuery(); void setProgressReporter(ProgressReporter *reporter); @@ -50,7 +50,15 @@ // Do a query and return the value for the given binding, from the // first result that has a value for it - static Value singleResultQuery(QString query, QString binding); + static Value singleResultQuery(QString fromUri, + QString query, + QString binding); + + enum ImplementationPreference { + UseDirectParser, // rasqal (default because it's simpler if seldom used) + UseDatastore // redland + }; + static void setImplementationPreference(ImplementationPreference); protected: class Impl;