Mercurial > hg > svcore
view rdf/SimpleSPARQLQuery.cpp @ 492:23945cdd7161
* Update RDF query stuff again so as to set up a temporary datastore
each time we want to query over an rdf file, instead of using rasqal
against the file. Seems the only way to avoid threading and storage
management issues when trying to load from a single-source file and
perform queries against our main datastore at the same time. Maybe.
author | Chris Cannam |
---|---|
date | Mon, 24 Nov 2008 16:26:11 +0000 |
parents | c3fb8258e34d |
children | 3931711b5671 |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Sonic Visualiser An audio file viewer and annotation editor. Centre for Digital Music, Queen Mary, University of London. This file copyright 2008 QMUL. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "SimpleSPARQLQuery.h" #include "base/ProgressReporter.h" #include "base/Profiler.h" #include <QMutex> #include <QMutexLocker> #include <QRegExp> #include <set> #include <redland.h> //#define DEBUG_SIMPLE_SPARQL_QUERY 1 #include <iostream> using std::cerr; using std::endl; class WredlandWorldWrapper { public: WredlandWorldWrapper(); ~WredlandWorldWrapper(); bool isOK() const; bool loadUriIntoDefaultModel(QString uriString, QString &errorString); librdf_world *getWorld() { return m_world; } const librdf_world *getWorld() const { return m_world; } librdf_model *getDefaultModel() { return m_defaultModel; } const librdf_model *getDefaultModel() const { return m_defaultModel; } librdf_model *getModel(QString fromUri); void freeModel(QString forUri); private: QMutex m_mutex; librdf_world *m_world; librdf_storage *m_defaultStorage; librdf_model *m_defaultModel; std::set<QString> m_defaultModelUris; std::map<QString, librdf_storage *> m_ownStorageUris; std::map<QString, librdf_model *> m_ownModelUris; bool loadUri(librdf_model *model, QString uri, QString &errorString); }; WredlandWorldWrapper::WredlandWorldWrapper() : m_world(0), m_defaultStorage(0), m_defaultModel(0) { m_world = librdf_new_world(); if (!m_world) { cerr << "SimpleSPARQLQuery: ERROR: Failed to create LIBRDF world!" << endl; return; } librdf_world_open(m_world); m_defaultStorage = librdf_new_storage(m_world, "trees", NULL, NULL); if (!m_defaultStorage) { std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland trees datastore, falling back to memory store" << std::endl; m_defaultStorage = librdf_new_storage(m_world, NULL, NULL, NULL); if (!m_defaultStorage) { std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl; return; } } m_defaultModel = librdf_new_model(m_world, m_defaultStorage, NULL); if (!m_defaultModel) { std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland data model" << std::endl; return; } } WredlandWorldWrapper::~WredlandWorldWrapper() { while (!m_ownModelUris.empty()) { librdf_free_model(m_ownModelUris.begin()->second); m_ownModelUris.erase(m_ownModelUris.begin()); } while (!m_ownStorageUris.empty()) { librdf_free_storage(m_ownStorageUris.begin()->second); m_ownStorageUris.erase(m_ownStorageUris.begin()); } if (m_defaultModel) librdf_free_model(m_defaultModel); if (m_defaultStorage) librdf_free_storage(m_defaultStorage); if (m_world) librdf_free_world(m_world); } bool WredlandWorldWrapper::isOK() const { return (m_defaultModel != 0); } bool WredlandWorldWrapper::loadUriIntoDefaultModel(QString uriString, QString &errorString) { QMutexLocker locker(&m_mutex); if (m_defaultModelUris.find(uriString) != m_defaultModelUris.end()) { return true; } if (loadUri(m_defaultModel, uriString, errorString)) { m_defaultModelUris.insert(uriString); return true; } else { return false; } } librdf_model * WredlandWorldWrapper::getModel(QString fromUri) { QMutexLocker locker(&m_mutex); if (fromUri == "") { return getDefaultModel(); } if (m_ownModelUris.find(fromUri) != m_ownModelUris.end()) { return m_ownModelUris[fromUri]; } librdf_storage *storage = librdf_new_storage(m_world, "trees", NULL, NULL); if (!storage) { // don't warn here, we probably already did it in main ctor storage = librdf_new_storage(m_world, NULL, NULL, NULL); } librdf_model *model = librdf_new_model(m_world, storage, NULL); if (!model) { std::cerr << "SimpleSPARQLQuery: ERROR: Failed to create new model" << std::endl; librdf_free_storage(storage); return 0; } QString err; if (!loadUri(model, fromUri, err)) { std::cerr << "SimpleSPARQLQuery: ERROR: Failed to parse into new model: " << err.toStdString() << std::endl; librdf_free_model(model); librdf_free_storage(storage); m_ownModelUris[fromUri] = 0; return 0; } m_ownModelUris[fromUri] = model; m_ownStorageUris[fromUri] = storage; return model; } void WredlandWorldWrapper::freeModel(QString forUri) { QMutexLocker locker(&m_mutex); if (forUri == "") { std::cerr << "SimpleSPARQLQuery::freeModel: ERROR: Can't free default model" << std::endl; return; } if (m_ownModelUris.find(forUri) == m_ownModelUris.end()) { std::cerr << "SimpleSPARQLQuery::freeModel: ERROR: Never heard of this model (uri = \"" << forUri.toStdString() << "\")" << std::endl; return; } librdf_model *model = m_ownModelUris[forUri]; if (model) librdf_free_model(model); m_ownModelUris.erase(forUri); if (m_ownStorageUris.find(forUri) != m_ownStorageUris.end()) { librdf_storage *storage = m_ownStorageUris[forUri]; if (storage) librdf_free_storage(storage); m_ownStorageUris.erase(forUri); } } bool WredlandWorldWrapper::loadUri(librdf_model *model, QString uri, QString &errorString) { librdf_uri *luri = librdf_new_uri (m_world, (const unsigned char *)uri.toUtf8().data()); if (!luri) { errorString = "Failed to construct librdf_uri!"; return false; } librdf_parser *parser = librdf_new_parser(m_world, "guess", NULL, NULL); if (!parser) { errorString = "Failed to initialise Redland parser"; return false; } std::cerr << "About to parse \"" << uri.toStdString() << "\"" << std::endl; Profiler p("SimpleSPARQLQuery: Parse URI into LIBRDF model"); if (librdf_parser_parse_into_model(parser, luri, NULL, model)) { errorString = QString("Failed to parse RDF from URI \"%1\"") .arg(uri); librdf_free_parser(parser); return false; } else { librdf_free_parser(parser); return true; } } class SimpleSPARQLQuery::Impl { public: Impl(SimpleSPARQLQuery::QueryType, QString query); ~Impl(); static bool addSourceToModel(QString sourceUri); static void closeSingleSource(QString sourceUri); void setProgressReporter(ProgressReporter *reporter) { m_reporter = reporter; } bool wasCancelled() const { return m_cancelled; } ResultList execute(); bool isOK() const; QString getErrorString() const; protected: static QMutex m_mutex; static WredlandWorldWrapper m_redland; ResultList executeDirectParser(); ResultList executeDatastore(); ResultList executeFor(QString modelUri); QueryType m_type; QString m_query; QString m_errorString; ProgressReporter *m_reporter; bool m_cancelled; }; WredlandWorldWrapper SimpleSPARQLQuery::Impl::m_redland; QMutex SimpleSPARQLQuery::Impl::m_mutex; SimpleSPARQLQuery::SimpleSPARQLQuery(QueryType type, QString query) : m_impl(new Impl(type, query)) { } SimpleSPARQLQuery::~SimpleSPARQLQuery() { delete m_impl; } void SimpleSPARQLQuery::setProgressReporter(ProgressReporter *reporter) { m_impl->setProgressReporter(reporter); } bool SimpleSPARQLQuery::wasCancelled() const { return m_impl->wasCancelled(); } SimpleSPARQLQuery::ResultList SimpleSPARQLQuery::execute() { return m_impl->execute(); } bool SimpleSPARQLQuery::isOK() const { return m_impl->isOK(); } QString SimpleSPARQLQuery::getErrorString() const { return m_impl->getErrorString(); } bool SimpleSPARQLQuery::addSourceToModel(QString sourceUri) { return SimpleSPARQLQuery::Impl::addSourceToModel(sourceUri); } void SimpleSPARQLQuery::closeSingleSource(QString sourceUri) { SimpleSPARQLQuery::Impl::closeSingleSource(sourceUri); } SimpleSPARQLQuery::Impl::Impl(QueryType type, QString query) : m_type(type), m_query(query), m_reporter(0), m_cancelled(false) { } SimpleSPARQLQuery::Impl::~Impl() { } bool SimpleSPARQLQuery::Impl::isOK() const { return (m_errorString == ""); } QString SimpleSPARQLQuery::Impl::getErrorString() const { return m_errorString; } SimpleSPARQLQuery::ResultList SimpleSPARQLQuery::Impl::execute() { ResultList list; QMutexLocker locker(&m_mutex); if (!m_redland.isOK()) { cerr << "ERROR: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore" << endl; return list; } if (m_type == QueryFromSingleSource) { return executeDirectParser(); } else { return executeDatastore(); } #ifdef DEBUG_SIMPLE_SPARQL_QUERY if (m_errorString != "") { std::cerr << "SimpleSPARQLQuery::execute: error returned: \"" << m_errorString.toStdString() << "\"" << std::endl; } #endif } SimpleSPARQLQuery::ResultList SimpleSPARQLQuery::Impl::executeDirectParser() { #ifdef DEBUG_SIMPLE_SPARQL_QUERY std::cerr << "SimpleSPARQLQuery::executeDirectParser: Query is: \"" << m_query.toStdString() << "\"" << std::endl; #endif ResultList list; Profiler profiler("SimpleSPARQLQuery::executeDirectParser"); static QRegExp fromRE("from\\s+<([^>]+)>", Qt::CaseInsensitive); QString fromUri; if (fromRE.indexIn(m_query) < 0) { std::cerr << "SimpleSPARQLQuery::executeDirectParser: Query contains no FROM clause, nothing to parse from" << std::endl; return list; } else { fromUri = fromRE.cap(1); #ifdef DEBUG_SIMPLE_SPARQL_QUERY std::cerr << "SimpleSPARQLQuery::executeDirectParser: FROM URI is <" << fromUri.toStdString() << ">" << std::endl; #endif } return executeFor(fromUri); } SimpleSPARQLQuery::ResultList SimpleSPARQLQuery::Impl::executeDatastore() { #ifdef DEBUG_SIMPLE_SPARQL_QUERY std::cerr << "SimpleSPARQLQuery::executeDatastore: Query is: \"" << m_query.toStdString() << "\"" << std::endl; #endif ResultList list; Profiler profiler("SimpleSPARQLQuery::executeDatastore"); return executeFor(""); } SimpleSPARQLQuery::ResultList SimpleSPARQLQuery::Impl::executeFor(QString modelUri) { ResultList list; librdf_query *query; static std::map<QString, int> counter; if (counter.find(m_query) == counter.end()) counter[m_query] = 1; else ++counter[m_query]; std::cerr << "Counter for this query: " << counter[m_query] << std::endl; std::cerr << "Base URI is: \"" << modelUri.toStdString() << "\"" << std::endl; { Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query"); query = librdf_new_query (m_redland.getWorld(), "sparql", NULL, (const unsigned char *)m_query.toUtf8().data(), NULL); } if (!query) { m_errorString = "Failed to construct query"; return list; } librdf_query_results *results; { Profiler p("SimpleSPARQLQuery: Execute LIBRDF query"); results = librdf_query_execute(query, m_redland.getModel(modelUri)); } if (!results) { cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl; librdf_free_query(query); return list; } if (!librdf_query_results_is_bindings(results)) { cerr << "SimpleSPARQLQuery: LIBRDF query has wrong result type (not bindings)" << endl; librdf_free_query_results(results); librdf_free_query(query); return list; } int resultCount = 0; int resultTotal = librdf_query_results_get_count(results); // probably wrong m_cancelled = false; while (!librdf_query_results_finished(results)) { int count = librdf_query_results_get_bindings_count(results); KeyValueMap resultmap; for (int i = 0; i < count; ++i) { const char *name = librdf_query_results_get_binding_name(results, i); if (!name) { std::cerr << "WARNING: Result " << i << " of query has no name" << std::endl; continue; } librdf_node *node = librdf_query_results_get_binding_value(results, i); QString key = (const char *)name; if (!node) { #ifdef DEBUG_SIMPLE_SPARQL_QUERY std::cerr << i << ". " << key.toStdString() << " -> (nil)" << std::endl; #endif resultmap[key] = Value(); continue; } ValueType type = LiteralValue; QString text; if (librdf_node_is_resource(node)) { type = URIValue; librdf_uri *uri = librdf_node_get_uri(node); const char *us = (const char *)librdf_uri_as_string(uri); if (!us) { std::cerr << "WARNING: Result " << i << " of query claims URI type, but has null URI" << std::endl; } else { text = us; } } else if (librdf_node_is_literal(node)) { type = LiteralValue; const char *lit = (const char *)librdf_node_get_literal_value(node); if (!lit) { std::cerr << "WARNING: Result " << i << " of query claims literal type, but has no literal" << std::endl; } else { text = lit; } } else if (librdf_node_is_blank(node)) { type = BlankValue; } else { cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl; } #ifdef DEBUG_SIMPLE_SPARQL_QUERY cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << endl; #endif resultmap[key] = Value(type, text); // librdf_free_node(node); } list.push_back(resultmap); librdf_query_results_next(results); resultCount++; if (m_reporter) { if (resultCount >= resultTotal) { if (m_reporter->isDefinite()) m_reporter->setDefinite(false); m_reporter->setProgress(resultCount); } else { m_reporter->setProgress((resultCount * 100) / resultTotal); } if (m_reporter->wasCancelled()) { m_cancelled = true; break; } } } librdf_free_query_results(results); librdf_free_query(query); #ifdef DEBUG_SIMPLE_SPARQL_QUERY cerr << "SimpleSPARQLQuery::executeDatastore: All results retrieved (" << resultCount << " of them)" << endl; #endif return list; } bool SimpleSPARQLQuery::Impl::addSourceToModel(QString sourceUri) { QString err; QMutexLocker locker(&m_mutex); if (!m_redland.isOK()) { std::cerr << "SimpleSPARQLQuery::addSourceToModel: Failed to initialise Redland datastore" << std::endl; return false; } if (!m_redland.loadUriIntoDefaultModel(sourceUri, err)) { std::cerr << "SimpleSPARQLQuery::addSourceToModel: Failed to add source URI \"" << sourceUri.toStdString() << ": " << err.toStdString() << std::endl; return false; } return true; } void SimpleSPARQLQuery::Impl::closeSingleSource(QString sourceUri) { QMutexLocker locker(&m_mutex); m_redland.freeModel(sourceUri); } SimpleSPARQLQuery::Value SimpleSPARQLQuery::singleResultQuery(QueryType type, QString query, QString binding) { SimpleSPARQLQuery q(type, query); ResultList results = q.execute(); if (!q.isOK()) { cerr << "SimpleSPARQLQuery::singleResultQuery: ERROR: " << q.getErrorString().toStdString() << endl; return Value(); } if (results.empty()) { return Value(); } for (int i = 0; i < results.size(); ++i) { if (results[i].find(binding) != results[i].end() && results[i][binding].type != NoValue) { return results[i][binding]; } } return Value(); }