Mercurial > hg > svcore
changeset 461:2019d89ebcf9
* Some work on querying and cacheing plugin RDF from a central index
author | Chris Cannam |
---|---|
date | Fri, 17 Oct 2008 15:26:29 +0000 |
parents | 93fb1ebff76b |
children | 957e6a5c8495 |
files | data/fileio/FileSource.cpp rdf/PluginRDFDescription.cpp rdf/PluginRDFIndexer.cpp rdf/PluginRDFIndexer.h rdf/SimpleSPARQLQuery.cpp transform/TransformFactory.cpp |
diffstat | 6 files changed, 148 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/data/fileio/FileSource.cpp Fri Oct 17 13:32:55 2008 +0000 +++ b/data/fileio/FileSource.cpp Fri Oct 17 15:26:29 2008 +0000 @@ -421,6 +421,10 @@ void FileSource::cleanup() { + if (m_done) { + delete m_localFile; // does not actually delete the file + m_localFile = 0; + } m_done = true; if (m_http) { QHttp *h = m_http; @@ -434,8 +438,10 @@ f->abort(); f->deleteLater(); } - delete m_localFile; // does not actually delete the file - m_localFile = 0; + if (m_localFile) { + delete m_localFile; // does not actually delete the file + m_localFile = 0; + } } bool @@ -660,7 +666,8 @@ if (error) { #ifdef DEBUG_FILE_SOURCE - std::cerr << "FileSource::done: error is " << error << ", deleting cache file" << std::endl; + std::cerr << "FileSource::done: error is " << error << " (\"" + << m_errorString.toStdString() << "\"), deleting cache file" << std::endl; #endif deleteCacheFile(); } @@ -756,7 +763,7 @@ QString filename = QString::fromLocal8Bit (QCryptographicHash::hash(url.toString().toLocal8Bit(), - QCryptographicHash::Sha1).toBase64()); + QCryptographicHash::Sha1).toHex()); return dir.filePath(filename); }
--- a/rdf/PluginRDFDescription.cpp Fri Oct 17 13:32:55 2008 +0000 +++ b/rdf/PluginRDFDescription.cpp Fri Oct 17 15:26:29 2008 +0000 @@ -159,8 +159,11 @@ if (FileSource::isRemote(url) && FileSource::canHandleScheme(url)) { + + //!!! persistent with expiry - m_source = new FileSource(url); + m_source = new FileSource(url, 0, FileSource::PersistentCache); + if (!m_source->isAvailable()) { delete m_source; m_source = 0;
--- a/rdf/PluginRDFIndexer.cpp Fri Oct 17 13:32:55 2008 +0000 +++ b/rdf/PluginRDFIndexer.cpp Fri Oct 17 15:26:29 2008 +0000 @@ -18,6 +18,7 @@ #include "SimpleSPARQLQuery.h" #include "data/fileio/FileSource.h" +#include "data/fileio/PlaylistFileReader.h" #include "plugin/PluginIdentifier.h" #include "base/Profiler.h" @@ -27,6 +28,9 @@ #include <QFileInfo> #include <QDir> #include <QUrl> +#include <QDateTime> +#include <QSettings> +#include <QFile> #include <iostream> using std::cerr; @@ -95,15 +99,66 @@ PluginRDFIndexer::~PluginRDFIndexer() { + QMutexLocker locker(&m_mutex); + while (!m_sources.empty()) { delete *m_sources.begin(); m_sources.erase(m_sources.begin()); } } +bool +PluginRDFIndexer::indexConfiguredURLs() +{ + std::cerr << "PluginRDFIndexer::indexConfiguredURLs" << std::endl; + + QSettings settings; + settings.beginGroup("RDF"); + + QString indexKey("rdf-indices"); + QStringList indices = settings.value(indexKey).toStringList(); + + for (int i = 0; i < indices.size(); ++i) { + + QString index = indices[i]; + + std::cerr << "PluginRDFIndexer::indexConfiguredURLs: index url is " + << index.toStdString() << std::endl; + + expireCacheMaybe(index); + + FileSource indexSource(index, 0, FileSource::PersistentCache); + if (!indexSource.isAvailable()) continue; + indexSource.waitForData(); + + PlaylistFileReader reader(indexSource); + if (!reader.isOK()) continue; + + PlaylistFileReader::Playlist list = reader.load(); + for (PlaylistFileReader::Playlist::const_iterator j = list.begin(); + j != list.end(); ++j) { + std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is " + << j->toStdString() << std::endl; + indexURL(*j); + } + } + + QString urlListKey("rdf-urls"); + QStringList urls = settings.value(urlListKey).toStringList(); + + for (int i = 0; i < urls.size(); ++i) { + indexURL(urls[i]); + } + + settings.endGroup(); + return true; +} + QString PluginRDFIndexer::getURIForPluginId(QString pluginId) { + QMutexLocker locker(&m_mutex); + if (m_idToUriMap.find(pluginId) == m_idToUriMap.end()) return ""; return m_idToUriMap[pluginId]; } @@ -111,6 +166,8 @@ QString PluginRDFIndexer::getIdForPluginURI(QString uri) { + QMutexLocker locker(&m_mutex); + if (m_uriToIdMap.find(uri) == m_uriToIdMap.end()) { // Haven't found this uri referenced in any document on the @@ -137,6 +194,8 @@ QString PluginRDFIndexer::getDescriptionURLForPluginId(QString pluginId) { + QMutexLocker locker(&m_mutex); + if (m_idToDescriptionMap.find(pluginId) == m_idToDescriptionMap.end()) return ""; return m_idToDescriptionMap[pluginId]; } @@ -144,6 +203,8 @@ QString PluginRDFIndexer::getDescriptionURLForPluginURI(QString uri) { + QMutexLocker locker(&m_mutex); + QString id = getIdForPluginURI(uri); if (id == "") return ""; return getDescriptionURLForPluginId(id); @@ -152,6 +213,8 @@ QStringList PluginRDFIndexer::getIndexedPluginIds() { + QMutexLocker locker(&m_mutex); + QStringList ids; for (StringMap::const_iterator i = m_idToDescriptionMap.begin(); i != m_idToDescriptionMap.end(); ++i) { @@ -168,16 +231,61 @@ return indexURL(urlString); } +void +PluginRDFIndexer::expireCacheMaybe(QString urlString) +{ + QString cacheFile = FileSource::getPersistentCacheFilePath(urlString); + + QSettings settings; + settings.beginGroup("RDF"); + + QString key("rdf-expiry-times"); + + QMap<QString, QVariant> expiryMap = settings.value(key).toMap(); + QDateTime lastExpiry = expiryMap[urlString].toDateTime(); + + if (!QFileInfo(cacheFile).exists()) { + expiryMap[urlString] = QDateTime::currentDateTime(); + settings.setValue(key, expiryMap); + settings.endGroup(); + return; + } + + if (!lastExpiry.isValid() || + (lastExpiry.addDays(2) < QDateTime::currentDateTime())) { + + std::cerr << "Expiring old cache file " << cacheFile.toStdString() + << std::endl; + + if (QFile(cacheFile).remove()) { + + expiryMap[urlString] = QDateTime::currentDateTime(); + settings.setValue(key, expiryMap); + } + } + + settings.endGroup(); +} + bool PluginRDFIndexer::indexURL(QString urlString) { Profiler profiler("PluginRDFIndexer::indexURL"); + std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl; + + QMutexLocker locker(&m_mutex); + QString localString = urlString; if (FileSource::isRemote(urlString) && FileSource::canHandleScheme(urlString)) { + //!!! how do we avoid hammering the server if it doesn't have + //!!! the file, and/or the network if it can't get through? + + expireCacheMaybe(urlString); + FileSource *source = new FileSource (urlString, 0, FileSource::PersistentCache); if (!source->isAvailable()) {
--- a/rdf/PluginRDFIndexer.h Fri Oct 17 13:32:55 2008 +0000 +++ b/rdf/PluginRDFIndexer.h Fri Oct 17 15:26:29 2008 +0000 @@ -18,6 +18,7 @@ #include <QString> #include <QStringList> +#include <QMutex> #include <map> #include <set> @@ -28,6 +29,18 @@ public: static PluginRDFIndexer *getInstance(); + /** + * Index all URLs obtained from index files defined in the current + * settings. This is not done automatically because it may incur + * significant processing and networking effort. It could be + * called from a background thread at startup, for example. + * + * Note that this class has a single mutex, so other functions + * will block if called from a different thread while this one is + * running. + */ + bool indexConfiguredURLs(); + bool indexURL(QString url); // in addition to "installed" URLs QString getURIForPluginId(QString pluginId); @@ -41,6 +54,7 @@ protected: PluginRDFIndexer(); + QMutex m_mutex; std::set<FileSource *> m_sources; typedef std::map<QString, QString> StringMap; StringMap m_uriToIdMap; @@ -48,6 +62,7 @@ StringMap m_idToDescriptionMap; bool indexFile(QString path); static PluginRDFIndexer *m_instance; + void expireCacheMaybe(QString); }; #endif
--- a/rdf/SimpleSPARQLQuery.cpp Fri Oct 17 13:32:55 2008 +0000 +++ b/rdf/SimpleSPARQLQuery.cpp Fri Oct 17 15:26:29 2008 +0000 @@ -22,6 +22,8 @@ #include <rasqal.h> #endif +//#define DEBUG_SIMPLE_SPARQL_QUERY 1 + #include <iostream> using std::cerr; @@ -121,7 +123,9 @@ m_reporter(0), m_cancelled(false) { -// std::cerr << "SimpleSPARQLQuery::Impl: Query is: \"" << query.toStdString() << "\"" << std::endl; +#ifdef DEBUG_SIMPLE_SPARQL_QUERY + std::cerr << "SimpleSPARQLQuery::Impl: Query is: \"" << query.toStdString() << "\"" << std::endl; +#endif } SimpleSPARQLQuery::Impl::~Impl() @@ -234,7 +238,9 @@ QString text = (const char *)rasqal_literal_as_string(literal); +#ifdef DEBUG_SIMPLE_SPARQL_QUERY std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl; +#endif resultmap[key] = Value(type, text); }
--- a/transform/TransformFactory.cpp Fri Oct 17 13:32:55 2008 +0000 +++ b/transform/TransformFactory.cpp Fri Oct 17 15:26:29 2008 +0000 @@ -529,8 +529,9 @@ "TransformFactory::populateUninstalledTransforms"); if (m_uninstalledTransformsPopulated) return; - PluginRDFIndexer::getInstance()->indexURL - ("http://www.vamp-plugins.org/rdf/plugins/vamp-example-plugins"); +// ("http://www.vamp-plugins.org/rdf/plugins/vamp-example-plugins"); + + PluginRDFIndexer::getInstance()->indexConfiguredURLs(); //!!! This will be amazingly slow