diff rdf/PluginRDFIndexer.cpp @ 489:82ab61fa9223

* Reorganise our sparql queries on the basis that Redland must be available, not only optional. So for anything querying the pool of data about plugins, we use a single datastore and model which is initialised at the outset by PluginRDFIndexer and then queried directly; for anything that "reads from a file" (e.g. loading annotations) we query directly using Rasqal, going to the datastore when we need additional plugin-related information. This may improve performance, but mostly it simplifies the code and fixes a serious issue with RDF import in the previous versions (namely that multiple sequential RDF imports would end up sharing the same RDF data pool!)
author Chris Cannam
date Fri, 21 Nov 2008 16:12:29 +0000
parents b13213785a6f
children 1b8c748fd7ea
line wrap: on
line diff
--- a/rdf/PluginRDFIndexer.cpp	Fri Nov 21 14:25:33 2008 +0000
+++ b/rdf/PluginRDFIndexer.cpp	Fri Nov 21 16:12:29 2008 +0000
@@ -87,7 +87,7 @@
         for (QStringList::const_iterator j = entries.begin();
              j != entries.end(); ++j) {
             QFileInfo fi(dir.filePath(*j));
-            indexFile(fi.absoluteFilePath());
+            pullFile(fi.absoluteFilePath());
         }
 
         QStringList subdirs = dir.entryList
@@ -102,11 +102,13 @@
                 for (QStringList::const_iterator k = entries.begin();
                      k != entries.end(); ++k) {
                     QFileInfo fi(subdir.filePath(*k));
-                    indexFile(fi.absoluteFilePath());
+                    pullFile(fi.absoluteFilePath());
                 }
             }
         }
     }
+
+    reindex();
 }
 
 bool
@@ -140,7 +142,7 @@
              j != list.end(); ++j) {
             std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is "
                       << j->toStdString() << std::endl;
-            indexURL(*j);
+            pullURL(*j);
         }
     }
 
@@ -148,10 +150,11 @@
     QStringList urls = settings.value(urlListKey).toStringList();
 
     for (int i = 0; i < urls.size(); ++i) {
-        indexURL(urls[i]);
+        pullURL(urls[i]);
     }
     
     settings.endGroup();
+    reindex();
     return true;
 }
 
@@ -198,49 +201,39 @@
     return id;
 }
 
-QString
-PluginRDFIndexer::getDescriptionURLForPluginId(QString pluginId)
-{
-    QMutexLocker locker(&m_mutex);
-
-    if (m_idToDescriptionMap.find(pluginId) == m_idToDescriptionMap.end()) return "";
-    return m_idToDescriptionMap[pluginId];
-}
-
-QString
-PluginRDFIndexer::getDescriptionURLForPluginURI(QString uri)
-{
-    QMutexLocker locker(&m_mutex);
-
-    QString id = getIdForPluginURI(uri);
-    if (id == "") return "";
-    return getDescriptionURLForPluginId(id);
-}
-
 QStringList
 PluginRDFIndexer::getIndexedPluginIds() 
 {
     QMutexLocker locker(&m_mutex);
 
     QStringList ids;
-    for (StringMap::const_iterator i = m_idToDescriptionMap.begin();
-         i != m_idToDescriptionMap.end(); ++i) {
+    for (StringMap::const_iterator i = m_idToUriMap.begin();
+         i != m_idToUriMap.end(); ++i) {
         ids.push_back(i->first);
     }
     return ids;
 }
 
 bool
-PluginRDFIndexer::indexFile(QString filepath)
+PluginRDFIndexer::pullFile(QString filepath)
 {
     QUrl url = QUrl::fromLocalFile(filepath);
     QString urlString = url.toString();
-    return indexURL(urlString);
+    return pullURL(urlString);
 }
 
 bool
 PluginRDFIndexer::indexURL(QString urlString)
 {
+    bool pulled = pullURL(urlString);
+    if (!pulled) return false;
+    reindex();
+    return true;
+}
+
+bool
+PluginRDFIndexer::pullURL(QString urlString)
+{
     Profiler profiler("PluginRDFIndexer::indexURL");
 
     std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl;
@@ -258,51 +251,23 @@
         }
 
         localString = QUrl::fromLocalFile(cf.getLocalFilename()).toString();
-//        localString = "file://" + cf.getLocalFilename(); //!!! crud - fix!
     }
 
-//    cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl;
-/*!!!
+    return SimpleSPARQLQuery::addSourceToModel(localString);
+}
+
+bool
+PluginRDFIndexer::reindex()
+{
+    SimpleSPARQLQuery::QueryType m = SimpleSPARQLQuery::QueryFromModel;
+
     SimpleSPARQLQuery query
-        (localString,
-         QString
-         (
-             " PREFIX vamp: <http://purl.org/ontology/vamp/> "
-
-             " SELECT ?plugin ?library_id ?plugin_id "
-             " FROM <%1> "
-
-             " WHERE { "
-             "   ?plugin a vamp:Plugin . "
-
-             // Make the identifier and library parts optional, so
-             // that we can check and report helpfully if one or both
-             // is absent instead of just getting no results
-
-             //!!! No -- because of rasqal's inability to correctly
-             // handle more than one OPTIONAL graph in a query, let's
-             // make identifier compulsory after all
-             //"   OPTIONAL { ?plugin vamp:identifier ?plugin_id } . "
-
-             "   ?plugin vamp:identifier ?plugin_id . "
-
-             "   OPTIONAL { "
-             "     ?library a vamp:PluginLibrary ; "
-             "              vamp:available_plugin ?plugin ; "
-             "              vamp:identifier ?library_id "
-             "   } "
-             " } "
-             )
-         .arg(localString));
-*/
-    SimpleSPARQLQuery query
-        (localString,
+        (m,
          QString
          (
              " PREFIX vamp: <http://purl.org/ontology/vamp/> "
 
              " SELECT ?plugin ?library ?plugin_id "
-             " FROM <%1> "
 
              " WHERE { "
              "   ?plugin a vamp:Plugin . "
@@ -312,22 +277,18 @@
              "     ?library vamp:available_plugin ?plugin "
              "   } "
              " } "
-             )
-         .arg(localString));
+             ));
 
     SimpleSPARQLQuery::ResultList results = query.execute();
 
     if (!query.isOK()) {
-        cerr << "ERROR: PluginRDFIndexer::indexURL: ERROR: Failed to index document at <"
-             << urlString.toStdString() << ">: "
+        cerr << "ERROR: PluginRDFIndexer::reindex: ERROR: Failed to query plugins from model: "
              << query.getErrorString().toStdString() << endl;
         return false;
     }
 
     if (results.empty()) {
-        cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
-             << urlString.toStdString()
-             << "> does not describe any vamp:Plugin resources" << endl;
+        cerr << "PluginRDFIndexer::reindex: NOTE: no vamp:Plugin resources found in indexed documents" << endl;
         return false;
     }
 
@@ -338,18 +299,17 @@
          i != results.end(); ++i) {
 
         QString pluginUri = (*i)["plugin"].value;
-//!!!        QString soname = (*i)["library_id"].value;
         QString soUri = (*i)["library"].value;
         QString identifier = (*i)["plugin_id"].value;
 
         if (identifier == "") {
-            cerr << "PluginRDFIndexer::indexURL: NOTE: No vamp:identifier for plugin <"
+            cerr << "PluginRDFIndexer::reindex: NOTE: No vamp:identifier for plugin <"
                  << pluginUri.toStdString() << ">"
                  << endl;
             continue;
         }
         if (soUri == "") {
-            cerr << "PluginRDFIndexer::indexURL: NOTE: No implementation library for plugin <"
+            cerr << "PluginRDFIndexer::reindex: NOTE: No implementation library for plugin <"
                  << pluginUri.toStdString() << ">"
                  << endl;
             continue;
@@ -359,69 +319,40 @@
             QString(
                 " PREFIX vamp: <http://purl.org/ontology/vamp/> "
                 " SELECT ?library_id "
-                " FROM <%1> "
                 " WHERE { "
-                "   <%2> vamp:identifier ?library_id "
+                "   <%1> vamp:identifier ?library_id "
                 " } "
                 )
-            .arg(localString)
             .arg(soUri);
 
         SimpleSPARQLQuery::Value sonameValue = 
-            SimpleSPARQLQuery::singleResultQuery(localString, sonameQuery, "library_id");
+            SimpleSPARQLQuery::singleResultQuery(m, sonameQuery, "library_id");
         QString soname = sonameValue.value;
         if (soname == "") {
-            cerr << "PluginRDFIndexer::indexURL: NOTE: No identifier for library <"
+            cerr << "PluginRDFIndexer::reindex: NOTE: No identifier for library <"
                  << soUri.toStdString() << ">"
                  << endl;
             continue;
         }
 
-
-/*
-        cerr << "PluginRDFIndexer::indexURL: Document for plugin \""
-             << soname.toStdString() << ":" << identifier.toStdString()
-             << "\" (uri <" << pluginUri.toStdString() << ">) is at url <"
-             << urlString.toStdString() << ">" << endl;
-*/
         QString pluginId = PluginIdentifier::createIdentifier
             ("vamp", soname, identifier);
 
         foundSomething = true;
 
-        if (m_idToDescriptionMap.find(pluginId) != m_idToDescriptionMap.end()) {
-/*!!!
-
-  This can happen quite legitimately when using an RDF datastore rather
-  than querying individual files, as of course the datastore contains
-  all plugin data found so far, and each time a file is added to it,
-  subsequent queries will return all older plugins as well.
-
-  It would be more efficient to add everything at once and then do all
-  queries, of course.
-
-            cerr << "PluginRDFIndexer::indexURL: NOTE: Plugin id \""
-                 << pluginId.toStdString() << "\", described in document at <"
-                 << urlString.toStdString()
-                 << ">, has already been described in document <"
-                 << m_idToDescriptionMap[pluginId].toStdString()
-                 << ">: ignoring this new description" << endl;
-*/
+        if (m_idToUriMap.find(pluginId) != m_idToUriMap.end()) {
             continue;
         }
 
-        m_idToDescriptionMap[pluginId] = urlString;
         m_idToUriMap[pluginId] = pluginUri;
 
         addedSomething = true;
 
         if (pluginUri != "") {
             if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) {
-                cerr << "PluginRDFIndexer::indexURL: WARNING: Found multiple plugins with the same URI:" << endl;
+                cerr << "PluginRDFIndexer::reindex: WARNING: Found multiple plugins with the same URI:" << endl;
                 cerr << "  1. Plugin id \"" << m_uriToIdMap[pluginUri].toStdString() << "\"" << endl;
-                cerr << "     described in <" << m_idToDescriptionMap[m_uriToIdMap[pluginUri]].toStdString() << ">" << endl;
                 cerr << "  2. Plugin id \"" << pluginId.toStdString() << "\"" << endl;
-                cerr << "     described in <" << urlString.toStdString() << ">" << endl;
                 cerr << "both claim URI <" << pluginUri.toStdString() << ">" << endl;
             } else {
                 m_uriToIdMap[pluginUri] = pluginId;
@@ -430,9 +361,7 @@
     }
 
     if (!foundSomething) {
-        cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
-             << urlString.toStdString()
-             << "> does not sufficiently describe any plugins" << endl;
+        cerr << "PluginRDFIndexer::reindex: NOTE: Plugins found, but none sufficiently described" << endl;
     }
     
     return addedSomething;