Mercurial > hg > svcore
changeset 481:a82645e788fc
* Auto-select RDF datastore/parsing backend; use trees datastore if
available
* Make CachedFile remember whether a file has already been successfully
located locally (avoiding system call out to look at filesystem)
author | Chris Cannam |
---|---|
date | Fri, 14 Nov 2008 10:10:05 +0000 |
parents | 3ffce691c9bf |
children | f54381e01141 |
files | data/fileio/CachedFile.cpp data/fileio/CachedFile.h rdf/PluginRDFDescription.cpp rdf/PluginRDFIndexer.cpp rdf/SimpleSPARQLQuery.cpp rdf/SimpleSPARQLQuery.h transform/TransformFactory.cpp transform/TransformFactory.h |
diffstat | 8 files changed, 196 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/data/fileio/CachedFile.cpp Thu Nov 13 14:23:23 2008 +0000 +++ b/data/fileio/CachedFile.cpp Fri Nov 14 10:10:05 2008 +0000 @@ -28,11 +28,18 @@ #include <QDir> #include <QCryptographicHash> +#include "base/Profiler.h" + #include <iostream> +CachedFile::OriginLocalFilenameMap +CachedFile::m_knownGoodCaches; + QString CachedFile::getLocalFilenameFor(QUrl url) { + Profiler p("CachedFile::getLocalFilenameFor"); + QDir dir(getCacheDirectory()); QString filename = @@ -63,10 +70,11 @@ CachedFile::CachedFile(QString origin, ProgressReporter *reporter) : m_origin(origin), - m_localFilename(getLocalFilenameFor(m_origin)), m_reporter(reporter), m_ok(false) { + Profiler p("CachedFile::CachedFile[1]"); + std::cerr << "CachedFile::CachedFile: origin is \"" << origin.toStdString() << "\"" << std::endl; check(); @@ -74,10 +82,11 @@ CachedFile::CachedFile(QUrl url, ProgressReporter *reporter) : m_origin(url.toString()), - m_localFilename(getLocalFilenameFor(m_origin)), m_reporter(reporter), m_ok(false) { + Profiler p("CachedFile::CachedFile[2]"); + std::cerr << "CachedFile::CachedFile: url is \"" << url.toString().toStdString() << "\"" << std::endl; check(); @@ -105,7 +114,16 @@ //!!! n.b. obvious race condition here if different CachedFile // objects for same url used in more than one thread -- need to // lock appropriately. also consider race condition between - // separate instances of the program + // separate instances of the program! + + OriginLocalFilenameMap::const_iterator i = m_knownGoodCaches.find(m_origin); + if (i != m_knownGoodCaches.end()) { + m_ok = true; + m_localFilename = i->second; + return; + } + + m_localFilename = getLocalFilenameFor(m_origin); if (!QFileInfo(m_localFilename).exists()) { std::cerr << "CachedFile::check: Local file does not exist, making a note that it hasn't been retrieved" << std::endl; @@ -149,6 +167,10 @@ // retrieval timestamp is already invalid } } + + if (m_ok) { + m_knownGoodCaches[m_origin] = m_localFilename; + } } bool
--- a/data/fileio/CachedFile.h Thu Nov 13 14:23:23 2008 +0000 +++ b/data/fileio/CachedFile.h Fri Nov 14 10:10:05 2008 +0000 @@ -19,6 +19,7 @@ #include <QString> #include <QUrl> #include <QDateTime> +#include <map> class ProgressReporter; @@ -48,6 +49,9 @@ static QString getCacheDirectory(); static QString getLocalFilenameFor(QUrl url); + + typedef std::map<QString, QString> OriginLocalFilenameMap; + static OriginLocalFilenameMap m_knownGoodCaches; }; #endif
--- a/rdf/PluginRDFDescription.cpp Thu Nov 13 14:23:23 2008 +0000 +++ b/rdf/PluginRDFDescription.cpp Fri Nov 14 10:10:05 2008 +0000 @@ -191,8 +191,8 @@ " PREFIX dc: <http://purl.org/dc/elements/1.1/> " " SELECT ?%4 FROM <%1> " " WHERE { " - " ?plugin a vamp:Plugin ; " - " vamp:identifier \"%2\" ; " + " ?plugin vamp:identifier \"%2\" ; " + " a vamp:Plugin ; " " %3 ?%4 . " " }") .arg(url) @@ -221,8 +221,8 @@ " PREFIX foaf: <http://xmlns.com/foaf/0.1/> " " SELECT ?name FROM <%1> " " WHERE { " - " ?plugin a vamp:Plugin ; " - " vamp:identifier \"%2\" ; " + " ?plugin vamp:identifier \"%2\" ; " + " a vamp:Plugin ; " " foaf:maker ?maker . " " ?maker foaf:name ?name . " " }") @@ -246,8 +246,8 @@ " PREFIX foaf: <http://xmlns.com/foaf/0.1/> " " SELECT ?page from <%1> " " WHERE { " - " ?plugin a vamp:Plugin ; " - " vamp:identifier \"%2\" ; " + " ?plugin vamp:identifier \"%2\" ; " + " a vamp:Plugin ; " " foaf:page ?page . " " }") .arg(url) @@ -300,8 +300,8 @@ " WHERE { " - " ?plugin a vamp:Plugin ; " - " vamp:identifier \"%2\" ; " + " ?plugin vamp:identifier \"%2\" ; " + " a vamp:Plugin ; " " vamp:output ?output . " " ?output vamp:identifier ?output_id ; "
--- a/rdf/PluginRDFIndexer.cpp Thu Nov 13 14:23:23 2008 +0000 +++ b/rdf/PluginRDFIndexer.cpp Fri Nov 14 10:10:05 2008 +0000 @@ -261,7 +261,7 @@ } // cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl; - +/*!!! SimpleSPARQLQuery query (localString, QString @@ -293,6 +293,26 @@ " } " ) .arg(localString)); +*/ + SimpleSPARQLQuery query + (localString, + QString + ( + " PREFIX vamp: <http://purl.org/ontology/vamp/> " + + " SELECT ?plugin ?library ?plugin_id " + " FROM <%1> " + + " WHERE { " + " ?plugin a vamp:Plugin . " + " ?plugin vamp:identifier ?plugin_id . " + + " OPTIONAL { " + " ?library vamp:available_plugin ?plugin " + " } " + " } " + ) + .arg(localString)); SimpleSPARQLQuery::ResultList results = query.execute(); @@ -317,7 +337,8 @@ i != results.end(); ++i) { QString pluginUri = (*i)["plugin"].value; - QString soname = (*i)["library_id"].value; +//!!! QString soname = (*i)["library_id"].value; + QString soUri = (*i)["library"].value; QString identifier = (*i)["plugin_id"].value; if (identifier == "") { @@ -328,13 +349,38 @@ << endl; continue; } - if (soname == "") { + if (soUri == "") { cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <" << urlString.toStdString() << "> does not associate plugin <" << pluginUri.toStdString() << "> with any implementation library" << endl; continue; } + + QString sonameQuery = + QString( + " PREFIX vamp: <http://purl.org/ontology/vamp/> " + " SELECT ?library_id " + " FROM <%1> " + " WHERE { " + " <%2> vamp:identifier ?library_id " + " } " + ) + .arg(localString) + .arg(soUri); + + SimpleSPARQLQuery::Value sonameValue = + SimpleSPARQLQuery::singleResultQuery(localString, sonameQuery, "library_id"); + QString soname = sonameValue.value; + if (soname == "") { + cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <" + << urlString.toStdString() << "> omits identifier for library <" + << soUri.toStdString() << ">" + << endl; + continue; + } + + /* cerr << "PluginRDFIndexer::indexURL: Document for plugin \"" << soname.toStdString() << ":" << identifier.toStdString()
--- a/rdf/SimpleSPARQLQuery.cpp Thu Nov 13 14:23:23 2008 +0000 +++ b/rdf/SimpleSPARQLQuery.cpp Fri Nov 14 10:10:05 2008 +0000 @@ -19,6 +19,7 @@ #include <QMutex> #include <QMutexLocker> +#include <QRegExp> #include <set> @@ -43,8 +44,25 @@ class WrasqalWorldWrapper // wrong but wromantic, etc { public: - WrasqalWorldWrapper() : m_world(rasqal_new_world()) { } - ~WrasqalWorldWrapper() { rasqal_free_world(m_world); } + WrasqalWorldWrapper() : + m_world(0) + { + m_world = rasqal_new_world(); + if (!m_world) { + cerr << "SimpleSPARQLQuery: ERROR: Failed to create RASQAL world!" << endl; + return; + } +/*!!! This appears to be new for 0.9.17? + if (rasqal_world_open(m_world)) { + cerr << "SimpleSPARQLQuery: ERROR: Failed to open RASQAL world!" << endl; + return; + } +*/ + } + ~WrasqalWorldWrapper() + { + rasqal_free_world(m_world); + } rasqal_world *getWorld() { return m_world; } const rasqal_world *getWorld() const { return m_world; } @@ -62,12 +80,14 @@ m_world(0), m_storage(0), m_model(0) { m_world = librdf_new_world(); + if (!m_world) { + cerr << "SimpleSPARQLQuery: ERROR: Failed to create LIBRDF world!" << endl; + return; + } librdf_world_open(m_world); - m_storage = librdf_new_storage(m_world, NULL, NULL, NULL); -// m_storage = librdf_new_storage(m_world, "hashes", NULL, -//. "hash-type='memory',indexes=1"); + m_storage = librdf_new_storage(m_world, "trees", NULL, NULL); if (!m_storage) { - std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland hashes datastore, falling back to memory store" << std::endl; + std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland trees datastore, falling back to memory store" << std::endl; m_storage = librdf_new_storage(m_world, NULL, NULL, NULL); if (!m_storage) { std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl; @@ -165,8 +185,7 @@ bool isOK() const; QString getErrorString() const; - static void setImplementationPreference - (SimpleSPARQLQuery::ImplementationPreference p) { + static void setBackEnd(SimpleSPARQLQuery::BackEndPreference p) { m_preference = p; } @@ -185,7 +204,7 @@ static WredlandWorldWrapper *m_redland; #endif - static SimpleSPARQLQuery::ImplementationPreference m_preference; + static SimpleSPARQLQuery::BackEndPreference m_preference; ResultList executeDirectParser(); ResultList executeDatastore(); @@ -209,8 +228,8 @@ QMutex SimpleSPARQLQuery::Impl::m_mutex; -SimpleSPARQLQuery::ImplementationPreference -SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::UseDirectParser; +SimpleSPARQLQuery::BackEndPreference +SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::AutoSelectBackEnd; SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) : m_impl(new Impl(fromUri, query)) @@ -253,9 +272,9 @@ } void -SimpleSPARQLQuery::setImplementationPreference(ImplementationPreference p) +SimpleSPARQLQuery::setBackEnd(BackEndPreference p) { - SimpleSPARQLQuery::Impl::setImplementationPreference(p); + SimpleSPARQLQuery::Impl::setBackEnd(p); } SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) : @@ -292,11 +311,14 @@ { SimpleSPARQLQuery::Impl *impl = (SimpleSPARQLQuery::Impl *)data; -// char buffer[256]; -// raptor_format_locator(buffer, 255, locator); -// impl->m_errorString = QString("%1 - %2").arg(buffer).arg(message); - - impl->m_errorString = message; + char buffer[256]; + raptor_format_locator(buffer, 255, locator); + QString loc(buffer); + if (loc != "") { + impl->m_errorString = QString("%1 - %2").arg(loc).arg(message); + } else { + impl->m_errorString = message; + } cerr << "SimpleSPARQLQuery: ERROR: " << impl->m_errorString.toStdString() << endl; } @@ -306,27 +328,45 @@ { ResultList list; - ImplementationPreference preference; + BackEndPreference preference; m_mutex.lock(); - if (m_preference == UseDatastore) { + if (m_preference == AutoSelectBackEnd) { +#ifdef HAVE_REDLAND +// cerr << "librdf version: " << librdf_version_major << "." << librdf_version_minor << "." << librdf_version_release << endl; + if (librdf_version_major > 1 || + (librdf_version_major == 1 && + (librdf_version_minor > 0 || + (librdf_version_minor == 0 && + librdf_version_release > 7)))) { + cerr << "SimpleSPARQLQuery: Auto-selecting LIBRDF back-end for tree-based storage" << endl; + m_preference = DatastoreBackEnd; + } +#endif + if (m_preference == AutoSelectBackEnd) { + cerr << "SimpleSPARQLQuery: Auto-selecting RASQAL back-end" << endl; + m_preference = DirectParserBackEnd; + } + } + + if (m_preference == DatastoreBackEnd) { #ifdef HAVE_REDLAND if (!m_redland) { m_redland = new WredlandWorldWrapper(); if (!m_redland->isOK()) { cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl; delete m_redland; - m_preference = UseDirectParser; + m_preference = DirectParserBackEnd; } } #else cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl; - m_preference = UseDirectParser; + m_preference = DirectParserBackEnd; #endif } - if (m_preference == UseDirectParser) { + if (m_preference == DirectParserBackEnd) { #ifdef USE_NEW_RASQAL_API if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper(); #else @@ -340,7 +380,7 @@ preference = m_preference; m_mutex.unlock(); - if (preference == SimpleSPARQLQuery::UseDirectParser) { + if (preference == SimpleSPARQLQuery::DirectParserBackEnd) { return executeDirectParser(); } else { return executeDatastore(); @@ -480,11 +520,16 @@ librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString); if (!uri) return list; +#ifdef DEBUG_SIMPLE_SPARQL_QUERY std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl; +#endif +/*!!! static std::map<QString, int> counter; if (counter.find(m_query) == counter.end()) counter[m_query] = 1; else ++counter[m_query]; std::cerr << "Counter for this query: " << counter[m_query] << std::endl; + std::cerr << "Base URI is: \"" << m_fromUri.toStdString() << "\"" << std::endl; +*/ librdf_query *query; @@ -494,7 +539,6 @@ (m_redland->getWorld(), "sparql", NULL, (const unsigned char *)m_query.toUtf8().data(), uri); } - std::cerr << "Prepared" << std::endl; if (!query) { m_errorString = "Failed to construct query"; @@ -506,7 +550,6 @@ Profiler p("SimpleSPARQLQuery: Execute LIBRDF query"); results = librdf_query_execute(query, m_redland->getModel()); } - std::cerr << "Executed" << std::endl; if (!results) { cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl; @@ -547,20 +590,30 @@ } ValueType type = LiteralValue; - if (librdf_node_is_resource(node)) type = URIValue; - else if (librdf_node_is_literal(node)) type = LiteralValue; - else if (librdf_node_is_blank(node)) type = BlankValue; - else { + QString text; + + if (librdf_node_is_resource(node)) { + + type = URIValue; + librdf_uri *uri = librdf_node_get_uri(node); + text = (const char *)librdf_uri_as_string(uri); + + } else if (librdf_node_is_literal(node)) { + + type = LiteralValue; + text = (const char *)librdf_node_get_literal_value(node); + + } else if (librdf_node_is_blank(node)) { + + type = BlankValue; + + } else { + cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl; - resultmap[key] = Value(); - librdf_free_node(node); - continue; } - QString text = (const char *)librdf_node_get_literal_value(node); - #ifdef DEBUG_SIMPLE_SPARQL_QUERY - std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl; + cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << endl; #endif resultmap[key] = Value(type, text); @@ -592,7 +645,9 @@ librdf_free_query_results(results); librdf_free_query(query); - std::cerr << "All results retrieved" << std::endl; +#ifdef DEBUG_SIMPLE_SPARQL_QUERY + cerr << "All results retrieved (" << resultCount << " of them)" << endl; +#endif return list; #endif
--- a/rdf/SimpleSPARQLQuery.h Thu Nov 13 14:23:23 2008 +0000 +++ b/rdf/SimpleSPARQLQuery.h Fri Nov 14 10:10:05 2008 +0000 @@ -54,11 +54,16 @@ QString query, QString binding); - enum ImplementationPreference { - UseDirectParser, // rasqal (default because it's simpler if seldom used) - UseDatastore // redland + enum BackEndPreference { + AutoSelectBackEnd, // pick based on likely speed of available storage + DirectParserBackEnd, // use rasqal (simpler if seldom used) + DatastoreBackEnd, // use redland (faster if version not too old) }; - static void setImplementationPreference(ImplementationPreference); + /** + * Select the preferred query back end. This should be called + * before any queries are made. The default is AutoSelectBackEnd. + */ + static void setBackEnd(BackEndPreference); protected: class Impl;
--- a/transform/TransformFactory.cpp Thu Nov 13 14:23:23 2008 +0000 +++ b/transform/TransformFactory.cpp Fri Nov 14 10:10:05 2008 +0000 @@ -72,6 +72,14 @@ m_thread->start(); } +void +TransformFactory::UninstalledTransformsPopulateThread::run() +{ + m_factory->m_populatingSlowly = true; + sleep(1); + m_factory->populateUninstalledTransforms(); +} + TransformList TransformFactory::getAllTransformDescriptions() {
--- a/transform/TransformFactory.h Thu Nov 13 14:23:23 2008 +0000 +++ b/transform/TransformFactory.h Fri Nov 14 10:10:05 2008 +0000 @@ -217,11 +217,7 @@ UninstalledTransformsPopulateThread(TransformFactory *factory) : m_factory(factory) { } - virtual void run() { - m_factory->m_populatingSlowly = true; - sleep(2); - m_factory->populateUninstalledTransforms(); - } + virtual void run(); TransformFactory *m_factory; };