changeset 461:2019d89ebcf9

* Some work on querying and cacheing plugin RDF from a central index
author Chris Cannam
date Fri, 17 Oct 2008 15:26:29 +0000
parents 93fb1ebff76b
children 957e6a5c8495
files data/fileio/FileSource.cpp rdf/PluginRDFDescription.cpp rdf/PluginRDFIndexer.cpp rdf/PluginRDFIndexer.h rdf/SimpleSPARQLQuery.cpp transform/TransformFactory.cpp
diffstat 6 files changed, 148 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/FileSource.cpp	Fri Oct 17 13:32:55 2008 +0000
+++ b/data/fileio/FileSource.cpp	Fri Oct 17 15:26:29 2008 +0000
@@ -421,6 +421,10 @@
 void
 FileSource::cleanup()
 {
+    if (m_done) {
+        delete m_localFile; // does not actually delete the file
+        m_localFile = 0;
+    }
     m_done = true;
     if (m_http) {
         QHttp *h = m_http;
@@ -434,8 +438,10 @@
         f->abort();
         f->deleteLater();
     }
-    delete m_localFile; // does not actually delete the file
-    m_localFile = 0;
+    if (m_localFile) {
+        delete m_localFile; // does not actually delete the file
+        m_localFile = 0;
+    }
 }
 
 bool
@@ -660,7 +666,8 @@
 
     if (error) {
 #ifdef DEBUG_FILE_SOURCE
-        std::cerr << "FileSource::done: error is " << error << ", deleting cache file" << std::endl;
+        std::cerr << "FileSource::done: error is " << error << " (\"" 
+                  << m_errorString.toStdString() << "\"), deleting cache file" << std::endl;
 #endif
         deleteCacheFile();
     }
@@ -756,7 +763,7 @@
     QString filename =
         QString::fromLocal8Bit
         (QCryptographicHash::hash(url.toString().toLocal8Bit(),
-                                  QCryptographicHash::Sha1).toBase64());
+                                  QCryptographicHash::Sha1).toHex());
 
     return dir.filePath(filename);
 }
--- a/rdf/PluginRDFDescription.cpp	Fri Oct 17 13:32:55 2008 +0000
+++ b/rdf/PluginRDFDescription.cpp	Fri Oct 17 15:26:29 2008 +0000
@@ -159,8 +159,11 @@
 
     if (FileSource::isRemote(url) &&
         FileSource::canHandleScheme(url)) {
+        
+        //!!! persistent with expiry
 
-        m_source = new FileSource(url);
+        m_source = new FileSource(url, 0, FileSource::PersistentCache);
+
         if (!m_source->isAvailable()) {
             delete m_source;
             m_source = 0;
--- a/rdf/PluginRDFIndexer.cpp	Fri Oct 17 13:32:55 2008 +0000
+++ b/rdf/PluginRDFIndexer.cpp	Fri Oct 17 15:26:29 2008 +0000
@@ -18,6 +18,7 @@
 #include "SimpleSPARQLQuery.h"
 
 #include "data/fileio/FileSource.h"
+#include "data/fileio/PlaylistFileReader.h"
 #include "plugin/PluginIdentifier.h"
 
 #include "base/Profiler.h"
@@ -27,6 +28,9 @@
 #include <QFileInfo>
 #include <QDir>
 #include <QUrl>
+#include <QDateTime>
+#include <QSettings>
+#include <QFile>
 
 #include <iostream>
 using std::cerr;
@@ -95,15 +99,66 @@
 
 PluginRDFIndexer::~PluginRDFIndexer()
 {
+    QMutexLocker locker(&m_mutex);
+
     while (!m_sources.empty()) {
         delete *m_sources.begin();
         m_sources.erase(m_sources.begin());
     }
 }
 
+bool
+PluginRDFIndexer::indexConfiguredURLs()
+{
+    std::cerr << "PluginRDFIndexer::indexConfiguredURLs" << std::endl;
+
+    QSettings settings;
+    settings.beginGroup("RDF");
+    
+    QString indexKey("rdf-indices");
+    QStringList indices = settings.value(indexKey).toStringList();
+    
+    for (int i = 0; i < indices.size(); ++i) {
+
+        QString index = indices[i];
+
+        std::cerr << "PluginRDFIndexer::indexConfiguredURLs: index url is "
+                  << index.toStdString() << std::endl;
+
+        expireCacheMaybe(index);
+
+        FileSource indexSource(index, 0, FileSource::PersistentCache);
+        if (!indexSource.isAvailable()) continue;
+        indexSource.waitForData();
+
+        PlaylistFileReader reader(indexSource);
+        if (!reader.isOK()) continue;
+
+        PlaylistFileReader::Playlist list = reader.load();
+        for (PlaylistFileReader::Playlist::const_iterator j = list.begin();
+             j != list.end(); ++j) {
+            std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is "
+                      << j->toStdString() << std::endl;
+            indexURL(*j);
+        }
+    }
+
+    QString urlListKey("rdf-urls");
+    QStringList urls = settings.value(urlListKey).toStringList();
+
+    for (int i = 0; i < urls.size(); ++i) {
+        indexURL(urls[i]);
+    }
+    
+    settings.endGroup();
+    return true;
+}
+
 QString
 PluginRDFIndexer::getURIForPluginId(QString pluginId)
 {
+    QMutexLocker locker(&m_mutex);
+
     if (m_idToUriMap.find(pluginId) == m_idToUriMap.end()) return "";
     return m_idToUriMap[pluginId];
 }
@@ -111,6 +166,8 @@
 QString
 PluginRDFIndexer::getIdForPluginURI(QString uri)
 {
+    QMutexLocker locker(&m_mutex);
+
     if (m_uriToIdMap.find(uri) == m_uriToIdMap.end()) {
 
         // Haven't found this uri referenced in any document on the
@@ -137,6 +194,8 @@
 QString
 PluginRDFIndexer::getDescriptionURLForPluginId(QString pluginId)
 {
+    QMutexLocker locker(&m_mutex);
+
     if (m_idToDescriptionMap.find(pluginId) == m_idToDescriptionMap.end()) return "";
     return m_idToDescriptionMap[pluginId];
 }
@@ -144,6 +203,8 @@
 QString
 PluginRDFIndexer::getDescriptionURLForPluginURI(QString uri)
 {
+    QMutexLocker locker(&m_mutex);
+
     QString id = getIdForPluginURI(uri);
     if (id == "") return "";
     return getDescriptionURLForPluginId(id);
@@ -152,6 +213,8 @@
 QStringList
 PluginRDFIndexer::getIndexedPluginIds() 
 {
+    QMutexLocker locker(&m_mutex);
+
     QStringList ids;
     for (StringMap::const_iterator i = m_idToDescriptionMap.begin();
          i != m_idToDescriptionMap.end(); ++i) {
@@ -168,16 +231,61 @@
     return indexURL(urlString);
 }
 
+void
+PluginRDFIndexer::expireCacheMaybe(QString urlString)
+{
+    QString cacheFile = FileSource::getPersistentCacheFilePath(urlString);
+
+    QSettings settings;
+    settings.beginGroup("RDF");
+
+    QString key("rdf-expiry-times");
+
+    QMap<QString, QVariant> expiryMap = settings.value(key).toMap();
+    QDateTime lastExpiry = expiryMap[urlString].toDateTime();
+
+    if (!QFileInfo(cacheFile).exists()) {
+        expiryMap[urlString] = QDateTime::currentDateTime();
+        settings.setValue(key, expiryMap);
+        settings.endGroup();
+        return;
+    }
+
+    if (!lastExpiry.isValid() ||
+        (lastExpiry.addDays(2) < QDateTime::currentDateTime())) {
+
+        std::cerr << "Expiring old cache file " << cacheFile.toStdString()
+                  << std::endl;
+
+        if (QFile(cacheFile).remove()) {
+
+            expiryMap[urlString] = QDateTime::currentDateTime();
+            settings.setValue(key, expiryMap);
+        }
+    }
+
+    settings.endGroup();
+}
+
 bool
 PluginRDFIndexer::indexURL(QString urlString)
 {
     Profiler profiler("PluginRDFIndexer::indexURL");
 
+    std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl;
+
+    QMutexLocker locker(&m_mutex);
+
     QString localString = urlString;
 
     if (FileSource::isRemote(urlString) &&
         FileSource::canHandleScheme(urlString)) {
 
+        //!!! how do we avoid hammering the server if it doesn't have
+        //!!! the file, and/or the network if it can't get through?
+
+        expireCacheMaybe(urlString);
+
         FileSource *source = new FileSource
             (urlString, 0, FileSource::PersistentCache);
         if (!source->isAvailable()) {
--- a/rdf/PluginRDFIndexer.h	Fri Oct 17 13:32:55 2008 +0000
+++ b/rdf/PluginRDFIndexer.h	Fri Oct 17 15:26:29 2008 +0000
@@ -18,6 +18,7 @@
 
 #include <QString>
 #include <QStringList>
+#include <QMutex>
 #include <map>
 #include <set>
 
@@ -28,6 +29,18 @@
 public:
     static PluginRDFIndexer *getInstance();
 
+    /**
+     * Index all URLs obtained from index files defined in the current
+     * settings.  This is not done automatically because it may incur
+     * significant processing and networking effort.  It could be
+     * called from a background thread at startup, for example.
+     *
+     * Note that this class has a single mutex, so other functions
+     * will block if called from a different thread while this one is
+     * running.
+     */
+    bool indexConfiguredURLs();
+
     bool indexURL(QString url); // in addition to "installed" URLs
 
     QString getURIForPluginId(QString pluginId);
@@ -41,6 +54,7 @@
 
 protected:
     PluginRDFIndexer();
+    QMutex m_mutex;
     std::set<FileSource *> m_sources;
     typedef std::map<QString, QString> StringMap;
     StringMap m_uriToIdMap;
@@ -48,6 +62,7 @@
     StringMap m_idToDescriptionMap;
     bool indexFile(QString path);
     static PluginRDFIndexer *m_instance;
+    void expireCacheMaybe(QString);
 };
 
 #endif
--- a/rdf/SimpleSPARQLQuery.cpp	Fri Oct 17 13:32:55 2008 +0000
+++ b/rdf/SimpleSPARQLQuery.cpp	Fri Oct 17 15:26:29 2008 +0000
@@ -22,6 +22,8 @@
 #include <rasqal.h>
 #endif
 
+//#define DEBUG_SIMPLE_SPARQL_QUERY 1
+
 #include <iostream>
 
 using std::cerr;
@@ -121,7 +123,9 @@
     m_reporter(0),
     m_cancelled(false)
 {
-//    std::cerr << "SimpleSPARQLQuery::Impl: Query is: \"" << query.toStdString() << "\"" << std::endl;
+#ifdef DEBUG_SIMPLE_SPARQL_QUERY
+    std::cerr << "SimpleSPARQLQuery::Impl: Query is: \"" << query.toStdString() << "\"" << std::endl;
+#endif
 }
 
 SimpleSPARQLQuery::Impl::~Impl()
@@ -234,7 +238,9 @@
 
             QString text = (const char *)rasqal_literal_as_string(literal);
 
+#ifdef DEBUG_SIMPLE_SPARQL_QUERY
             std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl;
+#endif
 
             resultmap[key] = Value(type, text);
         }
--- a/transform/TransformFactory.cpp	Fri Oct 17 13:32:55 2008 +0000
+++ b/transform/TransformFactory.cpp	Fri Oct 17 15:26:29 2008 +0000
@@ -529,8 +529,9 @@
                        "TransformFactory::populateUninstalledTransforms");
     if (m_uninstalledTransformsPopulated) return;
 
-    PluginRDFIndexer::getInstance()->indexURL
-        ("http://www.vamp-plugins.org/rdf/plugins/vamp-example-plugins");
+//        ("http://www.vamp-plugins.org/rdf/plugins/vamp-example-plugins");
+
+    PluginRDFIndexer::getInstance()->indexConfiguredURLs();
 
     //!!! This will be amazingly slow