# HG changeset patch # User Chris Cannam # Date 1224250375 0 # Node ID 93fb1ebff76bb7e857cf2a620cc4282c02a420e7 # Parent 6441b31b37acdb48fa0e4ea4d83b66c35cf222ad * Add persistent cache file support to FileSource (e.g. for RDF descriptions) * Query RDF plugin data in a background thread on startup diff -r 6441b31b37ac -r 93fb1ebff76b base/TempDirectory.cpp --- a/base/TempDirectory.cpp Thu Oct 16 13:38:33 2008 +0000 +++ b/base/TempDirectory.cpp Fri Oct 17 13:32:55 2008 +0000 @@ -53,12 +53,10 @@ } QString -TempDirectory::getPath() +TempDirectory::getContainingPath() { QMutexLocker locker(&m_mutex); - if (m_tmpdir != "") return m_tmpdir; - QSettings settings; settings.beginGroup("TempDirectory"); QString svDirParent = settings.value("create-in", "$HOME").toString(); @@ -80,7 +78,15 @@ cleanupAbandonedDirectories(svDir); - return createTempDirectoryIn(svDir); + return svDir; +} + +QString +TempDirectory::getPath() +{ + if (m_tmpdir != "") return m_tmpdir; + + return createTempDirectoryIn(getContainingPath()); } QString diff -r 6441b31b37ac -r 93fb1ebff76b base/TempDirectory.h --- a/base/TempDirectory.h Thu Oct 16 13:38:33 2008 +0000 +++ b/base/TempDirectory.h Fri Oct 17 13:32:55 2008 +0000 @@ -38,17 +38,36 @@ virtual ~TempDirectory(); /** + * Return the path of the directory in which the temporary + * directory has been or will be created. This directory is + * particular to this application, although not to this instance + * of it, and it will not be removed when the application exits. + * Persistent cache data or similar may be placed in this + * directory or other, non-temporary subdirectories of it. + * + * If this directory does not exist, it will be created. Throw + * DirectoryCreationFailed if the directory cannot be created. + */ + QString getContainingPath(); + + /** * Create the root temporary directory if necessary, and return - * its path. Throw DirectoryCreationFailed if the directory - * cannot be created. + * its path. This directory will be removed when the application + * exits. + * + * Throw DirectoryCreationFailed if the directory cannot be + * created. */ QString getPath(); /** * Create an immediate subdirectory of the root temporary * directory of the given name, if it doesn't already exist, and - * return its path. Throw DirectoryCreationFailed if the - * directory cannot be created. + * return its path. This directory will be removed when the + * application exits. + * + * Throw DirectoryCreationFailed if the directory cannot be + * created. */ QString getSubDirectoryPath(QString subdir); diff -r 6441b31b37ac -r 93fb1ebff76b data/fileio/FileSource.cpp --- a/data/fileio/FileSource.cpp Thu Oct 16 13:38:33 2008 +0000 +++ b/data/fileio/FileSource.cpp Fri Oct 17 13:32:55 2008 +0000 @@ -24,12 +24,13 @@ #include #include #include +#include #include #include #include -//#define DEBUG_FILE_SOURCE 1 +#define DEBUG_FILE_SOURCE 1 int FileSource::m_count = 0; @@ -46,8 +47,10 @@ QMutex FileSource::m_mapMutex; -FileSource::FileSource(QString fileOrUrl, ProgressReporter *reporter) : +FileSource::FileSource(QString fileOrUrl, ProgressReporter *reporter, + LocalCacheMode cacheMode) : m_url(fileOrUrl), + m_cacheMode(cacheMode), m_ftp(0), m_http(0), m_localFile(0), @@ -60,7 +63,7 @@ m_refCounted(false) { #ifdef DEBUG_FILE_SOURCE - std::cerr << "FileSource::FileSource(" << fileOrUrl.toStdString() << ")" << std::endl; + std::cerr << "FileSource::FileSource(" << fileOrUrl.toStdString() << ", " << cacheMode << ")" << std::endl; #endif if (!canHandleScheme(m_url)) { @@ -110,8 +113,10 @@ } } -FileSource::FileSource(QUrl url, ProgressReporter *reporter) : +FileSource::FileSource(QUrl url, ProgressReporter *reporter, + LocalCacheMode cacheMode) : m_url(url), + m_cacheMode(cacheMode), m_ftp(0), m_http(0), m_localFile(0), @@ -139,6 +144,7 @@ FileSource::FileSource(const FileSource &rf) : QObject(), m_url(rf.m_url), + m_cacheMode(rf.m_cacheMode), m_ftp(0), m_http(0), m_localFile(0), @@ -160,8 +166,14 @@ return; } - if (!isRemote()) { + if (m_cacheMode == PersistentCache) { + m_localFilename = rf.m_localFilename; + + } else if (!isRemote()) { + + m_localFilename = rf.m_localFilename; + } else { QMutexLocker locker(&m_mapMutex); #ifdef DEBUG_FILE_SOURCE @@ -192,7 +204,9 @@ cleanup(); - if (isRemote() && !m_leaveLocalFile) deleteCacheFile(); + if (isRemote() && (m_cacheMode == TemporaryCache) && !m_leaveLocalFile) { + deleteCacheFile(); + } } void @@ -238,7 +252,8 @@ if (createCacheFile()) { #ifdef DEBUG_FILE_SOURCE - std::cerr << "FileSource::init: Already have this one" << std::endl; + std::cerr << "FileSource::init: Already have this one at " + << m_localFilename.toStdString() << std::endl; #endif m_ok = true; if (!QFileInfo(m_localFilename).exists()) { @@ -258,7 +273,8 @@ #ifdef DEBUG_FILE_SOURCE std::cerr << "FileSource::init: Don't have local copy of \"" - << m_url.toString().toStdString() << "\", retrieving" << std::endl; + << m_url.toString().toStdString() << "\", retrieving to " + << m_localFilename.toStdString() << std::endl; #endif if (scheme == "http") { @@ -693,10 +709,15 @@ m_fileCreationMutex.lock(); + // We always delete the file here, even in PersistentCache mode, + // because this function is also used if retrieval failed (in + // which case we want to delete the cache so that subsequent users + // won't trust it). It's up to the calling code to determine + // whether we actually want to delete the cache or not, e.g. on + // destruction. + if (!QFile(m_localFilename).remove()) { -#ifdef DEBUG_FILE_SOURCE std::cerr << "FileSource::deleteCacheFile: ERROR: Failed to delete file \"" << m_localFilename.toStdString() << "\"" << std::endl; -#endif } else { #ifdef DEBUG_FILE_SOURCE std::cerr << "FileSource::deleteCacheFile: Deleted cache file \"" << m_localFilename.toStdString() << "\"" << std::endl; @@ -709,9 +730,46 @@ m_done = true; } +QString +FileSource::getPersistentCacheDirectory() +{ + QDir dir = TempDirectory::getInstance()->getContainingPath(); + + QString cacheDirName("cache"); + + QFileInfo fi(dir.filePath(cacheDirName)); + + if ((fi.exists() && !fi.isDir()) || + (!fi.exists() && !dir.mkdir(cacheDirName))) { + + throw DirectoryCreationFailed(fi.filePath()); + } + + return fi.filePath(); +} + +QString +FileSource::getPersistentCacheFilePath(QUrl url) +{ + QDir dir(getPersistentCacheDirectory()); + + QString filename = + QString::fromLocal8Bit + (QCryptographicHash::hash(url.toString().toLocal8Bit(), + QCryptographicHash::Sha1).toBase64()); + + return dir.filePath(filename); +} + bool FileSource::createCacheFile() { + if (m_cacheMode == PersistentCache) { + m_localFilename = getPersistentCacheFilePath(m_url); + if (QFileInfo(m_localFilename).exists()) return true; + else return false; + } + { QMutexLocker locker(&m_mapMutex); diff -r 6441b31b37ac -r 93fb1ebff76b data/fileio/FileSource.h --- a/data/fileio/FileSource.h Thu Oct 16 13:38:33 2008 +0000 +++ b/data/fileio/FileSource.h Fri Oct 17 13:32:55 2008 +0000 @@ -51,10 +51,12 @@ * pass FileSource objects by value. FileSource only makes sense for * stateless URLs that result in the same data on each request. * - * Cached files share a lifetime with their "owning" FileSource - * objects; when the last FileSource referring to a given URL is - * deleted or goes out of scope, its cached file (if any) is also - * removed. + * Cached files (in their default temporary mode) share a lifetime + * with their "owning" FileSource objects. When the last FileSource + * referring to a given URL is deleted or goes out of scope, its + * cached file (if any) is also removed. You can change this + * behaviour by using persistent cache mode; \see LocalCacheMode for + * details and caveats. */ class FileSource : public QObject { @@ -62,6 +64,36 @@ public: /** + * Type of local cache to be used when retrieving remote files. + * + * Temporary cache files are created when a FileSource object is + * first created for a given URL, and removed when the last extant + * temporary cache mode FileSource object referring to a given URL + * is deleted (i.e. when its reference count is lowered to zero). + * They are also stored in a temporary directory that will be + * deleted when the program exits. + * + * Persistent cache files are created only when first retrieving a + * URL for which no persistent cache already exists, and are never + * deleted (by FileSource anyway). They are stored in a directory + * that is not deleted when the program exits. FileSource creates + * a unique local file name for each source URL, so as long as the + * local cache file remains on disc, the remote URL will not be + * retrieved again during any further run of the program. You can + * find out what local file name will be used for the persistent + * cache of a given URL by calling getPersistentCacheFilePath, if + * you want to do something such as delete it by hand. + * + * Note that FileSource does not cache local files (i.e. does not + * make a copy of files that already appear to be stored on the + * local filesystem) in either mode. + */ + enum LocalCacheMode { + TemporaryCache, + PersistentCache + }; + + /** * Construct a FileSource using the given local file path or URL. * The URL may be raw or encoded. * @@ -69,8 +101,13 @@ * progress status. Note that the progress() signal will also be * emitted regularly during retrieval, even if no reporter is * supplied here. Caller retains ownership of the reporter object. + * + * If LocalCacheMode is PersistentCache, a persistent cache file + * will be used. See LocalCacheMode documentation for details. */ - FileSource(QString fileOrUrl, ProgressReporter *reporter = 0); + FileSource(QString fileOrUrl, + ProgressReporter *reporter = 0, + LocalCacheMode mode = TemporaryCache); /** * Construct a FileSource using the given remote URL. @@ -79,8 +116,13 @@ * progress status. Note that the progress() signal will also be * emitted regularly during retrieval, even if no reporter is * supplied here. Caller retains ownership of the reporter object. + * + * If LocalCacheMode is PersistentCache, a persistent cache file + * will be used. See LocalCacheMode documentation for details. */ - FileSource(QUrl url, ProgressReporter *reporter = 0); + FileSource(QUrl url, + ProgressReporter *reporter = 0, + LocalCacheMode mode = TemporaryCache); FileSource(const FileSource &); @@ -158,7 +200,11 @@ /** * Specify whether any local, cached file should remain on disc * after this FileSource has been destroyed. The default is false - * (cached files share their FileSource owners' lifespans). + * (cached files share their FileSource owners' lifespans). This + * is only meaningful in TemporaryCache mode; even if this setting + * is true, the temporary cache will still be deleted when the + * program exits. Use PersistentCache mode if you want the cache + * to outlast the program. */ void setLeaveLocalFile(bool leave); @@ -174,6 +220,17 @@ */ static bool canHandleScheme(QUrl url); + /** + * Return the path that will be used for the cache file copy of + * the given remote URL by a FileSource object constructed in + * PersistentCache mode. + * + * This method also creates the containing directory for such + * cache files, if it does not already exist, and so may throw + * DirectoryCreationFailed. + */ + static QString getPersistentCacheFilePath(QUrl url); + signals: /** * Emitted during URL retrieval, when the retrieval progress @@ -206,6 +263,7 @@ FileSource &operator=(const FileSource &); // not provided QUrl m_url; + LocalCacheMode m_cacheMode; QFtp *m_ftp; QHttp *m_http; QFile *m_localFile; @@ -237,6 +295,8 @@ bool createCacheFile(); void deleteCacheFile(); + static QString getPersistentCacheDirectory(); + static QMutex m_fileCreationMutex; static int m_count; }; diff -r 6441b31b37ac -r 93fb1ebff76b rdf/PluginRDFIndexer.cpp --- a/rdf/PluginRDFIndexer.cpp Thu Oct 16 13:38:33 2008 +0000 +++ b/rdf/PluginRDFIndexer.cpp Fri Oct 17 13:32:55 2008 +0000 @@ -178,7 +178,8 @@ if (FileSource::isRemote(urlString) && FileSource::canHandleScheme(urlString)) { - FileSource *source = new FileSource(urlString); + FileSource *source = new FileSource + (urlString, 0, FileSource::PersistentCache); if (!source->isAvailable()) { delete source; return false; diff -r 6441b31b37ac -r 93fb1ebff76b transform/TransformFactory.cpp --- a/transform/TransformFactory.cpp Thu Oct 16 13:38:33 2008 +0000 +++ b/transform/TransformFactory.cpp Fri Oct 17 13:32:55 2008 +0000 @@ -35,6 +35,8 @@ #include #include +#include "base/Thread.h" + using std::cerr; using std::endl; @@ -51,6 +53,9 @@ m_transformsPopulated(false), m_uninstalledTransformsPopulated(false) { + UninstalledTransformsPopulateThread *thread = + new UninstalledTransformsPopulateThread(this); + thread->start(); } TransformFactory::~TransformFactory() @@ -60,7 +65,7 @@ TransformList TransformFactory::getAllTransformDescriptions() { - if (!m_transformsPopulated) populateTransforms(); + populateTransforms(); std::set dset; for (TransformDescriptionMap::const_iterator i = m_transforms.begin(); @@ -82,7 +87,7 @@ TransformDescription TransformFactory::getTransformDescription(TransformId id) { - if (!m_transformsPopulated) populateTransforms(); + populateTransforms(); if (m_transforms.find(id) == m_transforms.end()) { return TransformDescription(); @@ -94,7 +99,7 @@ TransformList TransformFactory::getUninstalledTransformDescriptions() { - if (!m_uninstalledTransformsPopulated) populateUninstalledTransforms(); + populateUninstalledTransforms(); std::set dset; for (TransformDescriptionMap::const_iterator i = m_uninstalledTransforms.begin(); @@ -106,7 +111,7 @@ TransformList list; for (std::set::const_iterator i = dset.begin(); i != dset.end(); ++i) { -// cerr << "inserting transform into list: id = " << i->identifier.toStdString() << endl; +// cerr << "inserting transform into uninstalled list: id = " << i->identifier.toStdString() << endl; list.push_back(*i); } @@ -116,7 +121,7 @@ TransformDescription TransformFactory::getUninstalledTransformDescription(TransformId id) { - if (!m_uninstalledTransformsPopulated) populateUninstalledTransforms(); + populateUninstalledTransforms(); if (m_uninstalledTransforms.find(id) == m_uninstalledTransforms.end()) { return TransformDescription(); @@ -128,8 +133,8 @@ TransformFactory::TransformInstallStatus TransformFactory::getTransformInstallStatus(TransformId id) { - if (!m_transformsPopulated) populateTransforms(); - if (!m_uninstalledTransformsPopulated) populateUninstalledTransforms(); + populateTransforms(); + populateUninstalledTransforms(); if (m_transforms.find(id) != m_transforms.end()) { return TransformInstalled; @@ -144,7 +149,7 @@ std::vector TransformFactory::getAllTransformTypes() { - if (!m_transformsPopulated) populateTransforms(); + populateTransforms(); std::set types; for (TransformDescriptionMap::const_iterator i = m_transforms.begin(); @@ -163,7 +168,7 @@ std::vector TransformFactory::getTransformCategories(QString transformType) { - if (!m_transformsPopulated) populateTransforms(); + populateTransforms(); std::set categories; for (TransformDescriptionMap::const_iterator i = m_transforms.begin(); @@ -190,7 +195,7 @@ std::vector TransformFactory::getTransformMakers(QString transformType) { - if (!m_transformsPopulated) populateTransforms(); + populateTransforms(); std::set makers; for (TransformDescriptionMap::const_iterator i = m_transforms.begin(); @@ -217,6 +222,12 @@ void TransformFactory::populateTransforms() { + MutexLocker locker(&m_transformsMutex, + "TransformFactory::populateTransforms"); + if (m_transformsPopulated) { + return; + } + TransformDescriptionMap transforms; populateFeatureExtractionPlugins(transforms); @@ -512,8 +523,14 @@ void TransformFactory::populateUninstalledTransforms() { - if (!m_uninstalledTransforms.empty()) return; - if (m_transforms.empty()) populateTransforms(); + populateTransforms(); + + MutexLocker locker(&m_uninstalledTransformsMutex, + "TransformFactory::populateUninstalledTransforms"); + if (m_uninstalledTransformsPopulated) return; + + PluginRDFIndexer::getInstance()->indexURL + ("http://www.vamp-plugins.org/rdf/plugins/vamp-example-plugins"); //!!! This will be amazingly slow @@ -575,6 +592,8 @@ } m_uninstalledTransformsPopulated = true; + + std::cerr << "populateUninstalledTransforms exiting" << std::endl; } Transform @@ -660,7 +679,7 @@ bool TransformFactory::haveTransform(TransformId identifier) { - if (m_transforms.empty()) populateTransforms(); + populateTransforms(); return (m_transforms.find(identifier) != m_transforms.end()); } @@ -927,7 +946,7 @@ TransformFactory::SearchResults TransformFactory::search(QStringList keywords) { - if (m_transforms.empty()) populateTransforms(); + populateTransforms(); if (keywords.size() > 1) { // Additional score for all keywords in a row @@ -955,7 +974,21 @@ if (match.score > 0) results[i->first] = match; } - if (m_uninstalledTransforms.empty()) populateUninstalledTransforms(); + if (!m_uninstalledTransformsMutex.tryLock()) { + // uninstalled transforms are being populated; this may take some time, + // and they aren't critical + std::cerr << "TransformFactory::search: Uninstalled transforms mutex is held, skipping" << std::endl; + return results; + } + + if (!m_uninstalledTransformsPopulated) { + std::cerr << "WARNING: TransformFactory::search: Uninstalled transforms are not populated yet" << endl + << "and are not being populated either -- was the thread not started correctly?" << endl; + m_uninstalledTransformsMutex.unlock(); + return results; + } + + m_uninstalledTransformsMutex.unlock(); for (TransformDescriptionMap::const_iterator i = m_uninstalledTransforms.begin(); i != m_uninstalledTransforms.end(); ++i) { diff -r 6441b31b37ac -r 93fb1ebff76b transform/TransformFactory.h --- a/transform/TransformFactory.h Thu Oct 16 13:38:33 2008 +0000 +++ b/transform/TransformFactory.h Fri Oct 17 13:32:55 2008 +0000 @@ -24,6 +24,8 @@ #include #include +#include +#include #include #include @@ -192,6 +194,21 @@ Vamp::PluginBase *instantiateDefaultPluginFor(TransformId id, size_t rate); + QMutex m_transformsMutex; + QMutex m_uninstalledTransformsMutex; + + class UninstalledTransformsPopulateThread : public QThread + { + public: + UninstalledTransformsPopulateThread(TransformFactory *factory) : + m_factory(factory) { + } + virtual void run() { + m_factory->populateUninstalledTransforms(); + } + TransformFactory *m_factory; + }; + static TransformFactory *m_instance; };