changeset 481:a82645e788fc

* Auto-select RDF datastore/parsing backend; use trees datastore if available * Make CachedFile remember whether a file has already been successfully located locally (avoiding system call out to look at filesystem)
author Chris Cannam
date Fri, 14 Nov 2008 10:10:05 +0000
parents 3ffce691c9bf
children f54381e01141
files data/fileio/CachedFile.cpp data/fileio/CachedFile.h rdf/PluginRDFDescription.cpp rdf/PluginRDFIndexer.cpp rdf/SimpleSPARQLQuery.cpp rdf/SimpleSPARQLQuery.h transform/TransformFactory.cpp transform/TransformFactory.h
diffstat 8 files changed, 196 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/CachedFile.cpp	Thu Nov 13 14:23:23 2008 +0000
+++ b/data/fileio/CachedFile.cpp	Fri Nov 14 10:10:05 2008 +0000
@@ -28,11 +28,18 @@
 #include <QDir>
 #include <QCryptographicHash>
 
+#include "base/Profiler.h"
+
 #include <iostream>
 
+CachedFile::OriginLocalFilenameMap
+CachedFile::m_knownGoodCaches;
+
 QString
 CachedFile::getLocalFilenameFor(QUrl url)
 {
+    Profiler p("CachedFile::getLocalFilenameFor");
+
     QDir dir(getCacheDirectory());
 
     QString filename =
@@ -63,10 +70,11 @@
 
 CachedFile::CachedFile(QString origin, ProgressReporter *reporter) :
     m_origin(origin),
-    m_localFilename(getLocalFilenameFor(m_origin)),
     m_reporter(reporter),
     m_ok(false)
 {
+    Profiler p("CachedFile::CachedFile[1]");
+
     std::cerr << "CachedFile::CachedFile: origin is \""
               << origin.toStdString() << "\"" << std::endl;
     check();
@@ -74,10 +82,11 @@
 
 CachedFile::CachedFile(QUrl url, ProgressReporter *reporter) :
     m_origin(url.toString()),
-    m_localFilename(getLocalFilenameFor(m_origin)),
     m_reporter(reporter),
     m_ok(false)
 {
+    Profiler p("CachedFile::CachedFile[2]");
+
     std::cerr << "CachedFile::CachedFile: url is \""
               << url.toString().toStdString() << "\"" << std::endl;
     check();
@@ -105,7 +114,16 @@
     //!!! n.b. obvious race condition here if different CachedFile
     // objects for same url used in more than one thread -- need to
     // lock appropriately.  also consider race condition between
-    // separate instances of the program
+    // separate instances of the program!
+
+    OriginLocalFilenameMap::const_iterator i = m_knownGoodCaches.find(m_origin);
+    if (i != m_knownGoodCaches.end()) {
+        m_ok = true;
+        m_localFilename = i->second;
+        return;
+    }
+
+    m_localFilename = getLocalFilenameFor(m_origin);
 
     if (!QFileInfo(m_localFilename).exists()) {
         std::cerr << "CachedFile::check: Local file does not exist, making a note that it hasn't been retrieved" << std::endl;
@@ -149,6 +167,10 @@
             // retrieval timestamp is already invalid
         }
     }
+
+    if (m_ok) {
+        m_knownGoodCaches[m_origin] = m_localFilename;
+    }
 }
 
 bool
--- a/data/fileio/CachedFile.h	Thu Nov 13 14:23:23 2008 +0000
+++ b/data/fileio/CachedFile.h	Fri Nov 14 10:10:05 2008 +0000
@@ -19,6 +19,7 @@
 #include <QString>
 #include <QUrl>
 #include <QDateTime>
+#include <map>
 
 class ProgressReporter;
 
@@ -48,6 +49,9 @@
 
     static QString getCacheDirectory();
     static QString getLocalFilenameFor(QUrl url);
+
+    typedef std::map<QString, QString> OriginLocalFilenameMap;
+    static OriginLocalFilenameMap m_knownGoodCaches;
 };
 
 #endif
--- a/rdf/PluginRDFDescription.cpp	Thu Nov 13 14:23:23 2008 +0000
+++ b/rdf/PluginRDFDescription.cpp	Fri Nov 14 10:10:05 2008 +0000
@@ -191,8 +191,8 @@
             " PREFIX dc: <http://purl.org/dc/elements/1.1/> "
             " SELECT ?%4 FROM <%1> "
             " WHERE { "
-            "   ?plugin a vamp:Plugin ; "
-            "           vamp:identifier \"%2\" ; "
+            "   ?plugin vamp:identifier \"%2\" ; "
+            "           a vamp:Plugin ; "
             "           %3 ?%4 . "
             " }")
         .arg(url)
@@ -221,8 +221,8 @@
             " PREFIX foaf: <http://xmlns.com/foaf/0.1/> "
             " SELECT ?name FROM <%1> "
             " WHERE { "
-            "   ?plugin a vamp:Plugin ; "
-            "           vamp:identifier \"%2\" ; "
+            "   ?plugin vamp:identifier \"%2\" ; "
+            "           a vamp:Plugin ; "
             "           foaf:maker ?maker . "
             "   ?maker foaf:name ?name . "
             " }")
@@ -246,8 +246,8 @@
             " PREFIX foaf: <http://xmlns.com/foaf/0.1/> "
             " SELECT ?page from <%1> "
             " WHERE { "
-            "   ?plugin a vamp:Plugin ; "
-            "           vamp:identifier \"%2\" ; "
+            "   ?plugin vamp:identifier \"%2\" ; "
+            "           a vamp:Plugin ; "
             "           foaf:page ?page . "
             " }")
          .arg(url)
@@ -300,8 +300,8 @@
 
              " WHERE { "
 
-             "   ?plugin a vamp:Plugin ; "
-             "           vamp:identifier \"%2\" ; "
+             "   ?plugin vamp:identifier \"%2\" ; "
+             "           a vamp:Plugin ; "
              "           vamp:output ?output . "
 
              "   ?output vamp:identifier ?output_id ; "
--- a/rdf/PluginRDFIndexer.cpp	Thu Nov 13 14:23:23 2008 +0000
+++ b/rdf/PluginRDFIndexer.cpp	Fri Nov 14 10:10:05 2008 +0000
@@ -261,7 +261,7 @@
     }
 
 //    cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl;
-
+/*!!!
     SimpleSPARQLQuery query
         (localString,
          QString
@@ -293,6 +293,26 @@
              " } "
              )
          .arg(localString));
+*/
+    SimpleSPARQLQuery query
+        (localString,
+         QString
+         (
+             " PREFIX vamp: <http://purl.org/ontology/vamp/> "
+
+             " SELECT ?plugin ?library ?plugin_id "
+             " FROM <%1> "
+
+             " WHERE { "
+             "   ?plugin a vamp:Plugin . "
+             "   ?plugin vamp:identifier ?plugin_id . "
+
+             "   OPTIONAL { "
+             "     ?library vamp:available_plugin ?plugin "
+             "   } "
+             " } "
+             )
+         .arg(localString));
 
     SimpleSPARQLQuery::ResultList results = query.execute();
 
@@ -317,7 +337,8 @@
          i != results.end(); ++i) {
 
         QString pluginUri = (*i)["plugin"].value;
-        QString soname = (*i)["library_id"].value;
+//!!!        QString soname = (*i)["library_id"].value;
+        QString soUri = (*i)["library"].value;
         QString identifier = (*i)["plugin_id"].value;
 
         if (identifier == "") {
@@ -328,13 +349,38 @@
                  << endl;
             continue;
         }
-        if (soname == "") {
+        if (soUri == "") {
             cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
                  << urlString.toStdString() << "> does not associate plugin <"
                  << pluginUri.toStdString() << "> with any implementation library"
                  << endl;
             continue;
         }
+
+        QString sonameQuery =
+            QString(
+                " PREFIX vamp: <http://purl.org/ontology/vamp/> "
+                " SELECT ?library_id "
+                " FROM <%1> "
+                " WHERE { "
+                "   <%2> vamp:identifier ?library_id "
+                " } "
+                )
+            .arg(localString)
+            .arg(soUri);
+
+        SimpleSPARQLQuery::Value sonameValue = 
+            SimpleSPARQLQuery::singleResultQuery(localString, sonameQuery, "library_id");
+        QString soname = sonameValue.value;
+        if (soname == "") {
+            cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
+                 << urlString.toStdString() << "> omits identifier for library <"
+                 << soUri.toStdString() << ">"
+                 << endl;
+            continue;
+        }
+
+
 /*
         cerr << "PluginRDFIndexer::indexURL: Document for plugin \""
              << soname.toStdString() << ":" << identifier.toStdString()
--- a/rdf/SimpleSPARQLQuery.cpp	Thu Nov 13 14:23:23 2008 +0000
+++ b/rdf/SimpleSPARQLQuery.cpp	Fri Nov 14 10:10:05 2008 +0000
@@ -19,6 +19,7 @@
 
 #include <QMutex>
 #include <QMutexLocker>
+#include <QRegExp>
 
 #include <set>
 
@@ -43,8 +44,25 @@
 class WrasqalWorldWrapper // wrong but wromantic, etc
 {
 public:
-    WrasqalWorldWrapper() : m_world(rasqal_new_world()) { }
-    ~WrasqalWorldWrapper() { rasqal_free_world(m_world); }
+    WrasqalWorldWrapper() :
+        m_world(0)
+    {
+        m_world = rasqal_new_world();
+        if (!m_world) {
+            cerr << "SimpleSPARQLQuery: ERROR: Failed to create RASQAL world!" << endl;
+            return;
+        }
+/*!!! This appears to be new for 0.9.17?
+        if (rasqal_world_open(m_world)) {
+            cerr << "SimpleSPARQLQuery: ERROR: Failed to open RASQAL world!" << endl;
+            return;
+        }
+*/
+    }
+    ~WrasqalWorldWrapper()
+    {
+        rasqal_free_world(m_world);
+    }
 
     rasqal_world *getWorld() { return m_world; }
     const rasqal_world *getWorld() const { return m_world; }
@@ -62,12 +80,14 @@
         m_world(0), m_storage(0), m_model(0)
     {
         m_world = librdf_new_world();
+        if (!m_world) {
+            cerr << "SimpleSPARQLQuery: ERROR: Failed to create LIBRDF world!" << endl;
+            return;
+        }
         librdf_world_open(m_world);
-            m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
-//        m_storage = librdf_new_storage(m_world, "hashes", NULL,
-//.                                       "hash-type='memory',indexes=1");
+        m_storage = librdf_new_storage(m_world, "trees", NULL, NULL);
         if (!m_storage) {
-            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland hashes datastore, falling back to memory store" << std::endl;
+            std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland trees datastore, falling back to memory store" << std::endl;
             m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
             if (!m_storage) {
                 std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl;
@@ -165,8 +185,7 @@
     bool isOK() const;
     QString getErrorString() const;
 
-    static void setImplementationPreference
-    (SimpleSPARQLQuery::ImplementationPreference p) {
+    static void setBackEnd(SimpleSPARQLQuery::BackEndPreference p) {
         m_preference = p;
     }
 
@@ -185,7 +204,7 @@
     static WredlandWorldWrapper *m_redland;
 #endif
 
-    static SimpleSPARQLQuery::ImplementationPreference m_preference;
+    static SimpleSPARQLQuery::BackEndPreference m_preference;
 
     ResultList executeDirectParser();
     ResultList executeDatastore();
@@ -209,8 +228,8 @@
 
 QMutex SimpleSPARQLQuery::Impl::m_mutex;
 
-SimpleSPARQLQuery::ImplementationPreference
-SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::UseDirectParser;
+SimpleSPARQLQuery::BackEndPreference
+SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::AutoSelectBackEnd;
 
 SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) :
     m_impl(new Impl(fromUri, query))
@@ -253,9 +272,9 @@
 }
 
 void
-SimpleSPARQLQuery::setImplementationPreference(ImplementationPreference p)
+SimpleSPARQLQuery::setBackEnd(BackEndPreference p)
 {
-    SimpleSPARQLQuery::Impl::setImplementationPreference(p);
+    SimpleSPARQLQuery::Impl::setBackEnd(p);
 }
 
 SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) :
@@ -292,11 +311,14 @@
 {
     SimpleSPARQLQuery::Impl *impl = (SimpleSPARQLQuery::Impl *)data;
     
-//    char buffer[256];
-//    raptor_format_locator(buffer, 255, locator);
-//    impl->m_errorString = QString("%1 - %2").arg(buffer).arg(message);
-
-    impl->m_errorString = message;
+    char buffer[256];
+    raptor_format_locator(buffer, 255, locator);
+    QString loc(buffer);
+    if (loc != "") {
+        impl->m_errorString = QString("%1 - %2").arg(loc).arg(message);
+    } else {
+        impl->m_errorString = message;
+    }
 
     cerr << "SimpleSPARQLQuery: ERROR: " << impl->m_errorString.toStdString() << endl;
 }
@@ -306,27 +328,45 @@
 {
     ResultList list;
 
-    ImplementationPreference preference;
+    BackEndPreference preference;
 
     m_mutex.lock();
 
-    if (m_preference == UseDatastore) {
+    if (m_preference == AutoSelectBackEnd) {
+#ifdef HAVE_REDLAND
+//        cerr << "librdf version: " << librdf_version_major << "." << librdf_version_minor << "." << librdf_version_release << endl;
+        if (librdf_version_major > 1 ||
+            (librdf_version_major == 1 &&
+             (librdf_version_minor > 0 ||
+              (librdf_version_minor == 0 &&
+               librdf_version_release > 7)))) {
+            cerr << "SimpleSPARQLQuery: Auto-selecting LIBRDF back-end for tree-based storage" << endl;
+            m_preference = DatastoreBackEnd;
+        }
+#endif
+        if (m_preference == AutoSelectBackEnd) {
+            cerr << "SimpleSPARQLQuery: Auto-selecting RASQAL back-end" << endl;
+            m_preference = DirectParserBackEnd;
+        }
+    }
+
+    if (m_preference == DatastoreBackEnd) {
 #ifdef HAVE_REDLAND
         if (!m_redland) {
             m_redland = new WredlandWorldWrapper();
             if (!m_redland->isOK()) {
                 cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl;
                 delete m_redland;
-                m_preference = UseDirectParser;
+                m_preference = DirectParserBackEnd;
             }
         }
 #else
         cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl;
-        m_preference = UseDirectParser;
+        m_preference = DirectParserBackEnd;
 #endif
     }
 
-    if (m_preference == UseDirectParser) {
+    if (m_preference == DirectParserBackEnd) {
 #ifdef USE_NEW_RASQAL_API
         if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper();
 #else
@@ -340,7 +380,7 @@
     preference = m_preference;
     m_mutex.unlock();
 
-    if (preference == SimpleSPARQLQuery::UseDirectParser) {
+    if (preference == SimpleSPARQLQuery::DirectParserBackEnd) {
         return executeDirectParser();
     } else {
         return executeDatastore();
@@ -480,11 +520,16 @@
     librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString);
     if (!uri) return list;
 
+#ifdef DEBUG_SIMPLE_SPARQL_QUERY
     std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl;
+#endif
+/*!!!
     static std::map<QString, int> counter;
     if (counter.find(m_query) == counter.end()) counter[m_query] = 1;
     else ++counter[m_query];
     std::cerr << "Counter for this query: " << counter[m_query] << std::endl;
+    std::cerr << "Base URI is: \"" << m_fromUri.toStdString() << "\"" << std::endl;
+*/
 
     librdf_query *query;
 
@@ -494,7 +539,6 @@
             (m_redland->getWorld(), "sparql", NULL,
              (const unsigned char *)m_query.toUtf8().data(), uri);
     }
-    std::cerr << "Prepared" << std::endl;
     
     if (!query) {
         m_errorString = "Failed to construct query";
@@ -506,7 +550,6 @@
         Profiler p("SimpleSPARQLQuery: Execute LIBRDF query");
         results = librdf_query_execute(query, m_redland->getModel());
     }
-    std::cerr << "Executed" << std::endl;
 
     if (!results) {
         cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl;
@@ -547,20 +590,30 @@
             }
 
             ValueType type = LiteralValue;
-            if (librdf_node_is_resource(node)) type = URIValue;
-            else if (librdf_node_is_literal(node)) type = LiteralValue;
-            else if (librdf_node_is_blank(node)) type = BlankValue;
-            else {
+            QString text;
+
+            if (librdf_node_is_resource(node)) {
+
+                type = URIValue;
+                librdf_uri *uri = librdf_node_get_uri(node);
+                text = (const char *)librdf_uri_as_string(uri);
+
+            } else if (librdf_node_is_literal(node)) {
+
+                type = LiteralValue;
+                text = (const char *)librdf_node_get_literal_value(node);
+
+            } else if (librdf_node_is_blank(node)) {
+
+                type = BlankValue;
+
+            } else {
+
                 cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl;
-                resultmap[key] = Value();
-                librdf_free_node(node);
-                continue;
             }
 
-            QString text = (const char *)librdf_node_get_literal_value(node);
-
 #ifdef DEBUG_SIMPLE_SPARQL_QUERY
-            std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl;
+            cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << endl;
 #endif
 
             resultmap[key] = Value(type, text);
@@ -592,7 +645,9 @@
     librdf_free_query_results(results);
     librdf_free_query(query);
 
-    std::cerr << "All results retrieved" << std::endl;
+#ifdef DEBUG_SIMPLE_SPARQL_QUERY
+    cerr << "All results retrieved (" << resultCount << " of them)" << endl;
+#endif
 
     return list;
 #endif
--- a/rdf/SimpleSPARQLQuery.h	Thu Nov 13 14:23:23 2008 +0000
+++ b/rdf/SimpleSPARQLQuery.h	Fri Nov 14 10:10:05 2008 +0000
@@ -54,11 +54,16 @@
                                    QString query,
                                    QString binding);
 
-    enum ImplementationPreference {
-        UseDirectParser, // rasqal (default because it's simpler if seldom used)
-        UseDatastore     // redland
+    enum BackEndPreference {
+        AutoSelectBackEnd,   // pick based on likely speed of available storage
+        DirectParserBackEnd, // use rasqal (simpler if seldom used)
+        DatastoreBackEnd,    // use redland (faster if version not too old)
     };
-    static void setImplementationPreference(ImplementationPreference);
+    /**
+     * Select the preferred query back end.  This should be called
+     * before any queries are made.  The default is AutoSelectBackEnd.
+     */
+    static void setBackEnd(BackEndPreference);
 
 protected:
     class Impl;
--- a/transform/TransformFactory.cpp	Thu Nov 13 14:23:23 2008 +0000
+++ b/transform/TransformFactory.cpp	Fri Nov 14 10:10:05 2008 +0000
@@ -72,6 +72,14 @@
     m_thread->start();
 }
 
+void
+TransformFactory::UninstalledTransformsPopulateThread::run()
+{
+    m_factory->m_populatingSlowly = true;
+    sleep(1);
+    m_factory->populateUninstalledTransforms();
+}
+
 TransformList
 TransformFactory::getAllTransformDescriptions()
 {
--- a/transform/TransformFactory.h	Thu Nov 13 14:23:23 2008 +0000
+++ b/transform/TransformFactory.h	Fri Nov 14 10:10:05 2008 +0000
@@ -217,11 +217,7 @@
         UninstalledTransformsPopulateThread(TransformFactory *factory) :
             m_factory(factory) {
         }
-        virtual void run() {
-            m_factory->m_populatingSlowly = true;
-            sleep(2);
-            m_factory->populateUninstalledTransforms();
-        }
+        virtual void run();
         TransformFactory *m_factory;
     };