changeset 460:93fb1ebff76b

* Add persistent cache file support to FileSource (e.g. for RDF descriptions) * Query RDF plugin data in a background thread on startup
author Chris Cannam
date Fri, 17 Oct 2008 13:32:55 +0000
parents 6441b31b37ac
children 2019d89ebcf9
files base/TempDirectory.cpp base/TempDirectory.h data/fileio/FileSource.cpp data/fileio/FileSource.h rdf/PluginRDFIndexer.cpp transform/TransformFactory.cpp transform/TransformFactory.h
diffstat 7 files changed, 235 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/base/TempDirectory.cpp	Thu Oct 16 13:38:33 2008 +0000
+++ b/base/TempDirectory.cpp	Fri Oct 17 13:32:55 2008 +0000
@@ -53,12 +53,10 @@
 }
 
 QString
-TempDirectory::getPath()
+TempDirectory::getContainingPath()
 {
     QMutexLocker locker(&m_mutex);
     
-    if (m_tmpdir != "") return m_tmpdir;
-
     QSettings settings;
     settings.beginGroup("TempDirectory");
     QString svDirParent = settings.value("create-in", "$HOME").toString();
@@ -80,7 +78,15 @@
 
     cleanupAbandonedDirectories(svDir);
 
-    return createTempDirectoryIn(svDir);
+    return svDir;
+}    
+
+QString
+TempDirectory::getPath()
+{
+    if (m_tmpdir != "") return m_tmpdir;
+
+    return createTempDirectoryIn(getContainingPath());
 }
 
 QString
--- a/base/TempDirectory.h	Thu Oct 16 13:38:33 2008 +0000
+++ b/base/TempDirectory.h	Fri Oct 17 13:32:55 2008 +0000
@@ -38,17 +38,36 @@
     virtual ~TempDirectory();
 
     /**
+     * Return the path of the directory in which the temporary
+     * directory has been or will be created.  This directory is
+     * particular to this application, although not to this instance
+     * of it, and it will not be removed when the application exits.
+     * Persistent cache data or similar may be placed in this
+     * directory or other, non-temporary subdirectories of it.
+     *
+     * If this directory does not exist, it will be created.  Throw
+     * DirectoryCreationFailed if the directory cannot be created.
+     */
+    QString getContainingPath();
+
+    /**
      * Create the root temporary directory if necessary, and return
-     * its path.  Throw DirectoryCreationFailed if the directory
-     * cannot be created.
+     * its path.  This directory will be removed when the application
+     * exits.
+     *
+     * Throw DirectoryCreationFailed if the directory cannot be
+     * created.
      */
     QString getPath();
 
     /** 
      * Create an immediate subdirectory of the root temporary
      * directory of the given name, if it doesn't already exist, and
-     * return its path.  Throw DirectoryCreationFailed if the
-     * directory cannot be created.
+     * return its path.  This directory will be removed when the
+     * application exits.
+     * 
+     * Throw DirectoryCreationFailed if the directory cannot be
+     * created.
      */
     QString getSubDirectoryPath(QString subdir);
 
--- a/data/fileio/FileSource.cpp	Thu Oct 16 13:38:33 2008 +0000
+++ b/data/fileio/FileSource.cpp	Fri Oct 17 13:32:55 2008 +0000
@@ -24,12 +24,13 @@
 #include <QFileInfo>
 #include <QDir>
 #include <QCoreApplication>
+#include <QCryptographicHash>
 #include <QHttpResponseHeader>
 
 #include <iostream>
 #include <cstdlib>
 
-//#define DEBUG_FILE_SOURCE 1
+#define DEBUG_FILE_SOURCE 1
 
 int
 FileSource::m_count = 0;
@@ -46,8 +47,10 @@
 QMutex
 FileSource::m_mapMutex;
 
-FileSource::FileSource(QString fileOrUrl, ProgressReporter *reporter) :
+FileSource::FileSource(QString fileOrUrl, ProgressReporter *reporter,
+                       LocalCacheMode cacheMode) :
     m_url(fileOrUrl),
+    m_cacheMode(cacheMode),
     m_ftp(0),
     m_http(0),
     m_localFile(0),
@@ -60,7 +63,7 @@
     m_refCounted(false)
 {
 #ifdef DEBUG_FILE_SOURCE
-    std::cerr << "FileSource::FileSource(" << fileOrUrl.toStdString() << ")" << std::endl;
+    std::cerr << "FileSource::FileSource(" << fileOrUrl.toStdString() << ", " << cacheMode << ")" << std::endl;
 #endif
 
     if (!canHandleScheme(m_url)) {
@@ -110,8 +113,10 @@
     }
 }
 
-FileSource::FileSource(QUrl url, ProgressReporter *reporter) :
+FileSource::FileSource(QUrl url, ProgressReporter *reporter,
+                       LocalCacheMode cacheMode) :
     m_url(url),
+    m_cacheMode(cacheMode),
     m_ftp(0),
     m_http(0),
     m_localFile(0),
@@ -139,6 +144,7 @@
 FileSource::FileSource(const FileSource &rf) :
     QObject(),
     m_url(rf.m_url),
+    m_cacheMode(rf.m_cacheMode),
     m_ftp(0),
     m_http(0),
     m_localFile(0),
@@ -160,8 +166,14 @@
         return;
     }
 
-    if (!isRemote()) {
+    if (m_cacheMode == PersistentCache) {
+
         m_localFilename = rf.m_localFilename;
+
+    } else if (!isRemote()) {
+
+        m_localFilename = rf.m_localFilename;
+
     } else {
         QMutexLocker locker(&m_mapMutex);
 #ifdef DEBUG_FILE_SOURCE
@@ -192,7 +204,9 @@
 
     cleanup();
 
-    if (isRemote() && !m_leaveLocalFile) deleteCacheFile();
+    if (isRemote() && (m_cacheMode == TemporaryCache) && !m_leaveLocalFile) {
+        deleteCacheFile();
+    }
 }
 
 void
@@ -238,7 +252,8 @@
 
     if (createCacheFile()) {
 #ifdef DEBUG_FILE_SOURCE
-        std::cerr << "FileSource::init: Already have this one" << std::endl;
+        std::cerr << "FileSource::init: Already have this one at "
+                  << m_localFilename.toStdString() << std::endl;
 #endif
         m_ok = true;
         if (!QFileInfo(m_localFilename).exists()) {
@@ -258,7 +273,8 @@
 
 #ifdef DEBUG_FILE_SOURCE
     std::cerr << "FileSource::init: Don't have local copy of \""
-              << m_url.toString().toStdString() << "\", retrieving" << std::endl;
+              << m_url.toString().toStdString() << "\", retrieving to "
+              << m_localFilename.toStdString() << std::endl;
 #endif
 
     if (scheme == "http") {
@@ -693,10 +709,15 @@
 
     m_fileCreationMutex.lock();
 
+    // We always delete the file here, even in PersistentCache mode,
+    // because this function is also used if retrieval failed (in
+    // which case we want to delete the cache so that subsequent users
+    // won't trust it).  It's up to the calling code to determine
+    // whether we actually want to delete the cache or not, e.g. on
+    // destruction.
+
     if (!QFile(m_localFilename).remove()) {
-#ifdef DEBUG_FILE_SOURCE
         std::cerr << "FileSource::deleteCacheFile: ERROR: Failed to delete file \"" << m_localFilename.toStdString() << "\"" << std::endl;
-#endif
     } else {
 #ifdef DEBUG_FILE_SOURCE
         std::cerr << "FileSource::deleteCacheFile: Deleted cache file \"" << m_localFilename.toStdString() << "\"" << std::endl;
@@ -709,9 +730,46 @@
     m_done = true;
 }
 
+QString
+FileSource::getPersistentCacheDirectory()
+{
+    QDir dir = TempDirectory::getInstance()->getContainingPath();
+
+    QString cacheDirName("cache");
+
+    QFileInfo fi(dir.filePath(cacheDirName));
+
+    if ((fi.exists() && !fi.isDir()) ||
+        (!fi.exists() && !dir.mkdir(cacheDirName))) {
+
+        throw DirectoryCreationFailed(fi.filePath());
+    }
+
+    return fi.filePath();
+}
+
+QString
+FileSource::getPersistentCacheFilePath(QUrl url)
+{
+    QDir dir(getPersistentCacheDirectory());
+
+    QString filename =
+        QString::fromLocal8Bit
+        (QCryptographicHash::hash(url.toString().toLocal8Bit(),
+                                  QCryptographicHash::Sha1).toBase64());
+
+    return dir.filePath(filename);
+}
+
 bool
 FileSource::createCacheFile()
 {
+    if (m_cacheMode == PersistentCache) {
+        m_localFilename = getPersistentCacheFilePath(m_url);
+        if (QFileInfo(m_localFilename).exists()) return true;
+        else return false;
+    }
+
     {
         QMutexLocker locker(&m_mapMutex);
 
--- a/data/fileio/FileSource.h	Thu Oct 16 13:38:33 2008 +0000
+++ b/data/fileio/FileSource.h	Fri Oct 17 13:32:55 2008 +0000
@@ -51,10 +51,12 @@
  * pass FileSource objects by value.  FileSource only makes sense for
  * stateless URLs that result in the same data on each request.
  *
- * Cached files share a lifetime with their "owning" FileSource
- * objects; when the last FileSource referring to a given URL is
- * deleted or goes out of scope, its cached file (if any) is also
- * removed.
+ * Cached files (in their default temporary mode) share a lifetime
+ * with their "owning" FileSource objects.  When the last FileSource
+ * referring to a given URL is deleted or goes out of scope, its
+ * cached file (if any) is also removed.  You can change this
+ * behaviour by using persistent cache mode; \see LocalCacheMode for
+ * details and caveats.
  */
 class FileSource : public QObject
 {
@@ -62,6 +64,36 @@
 
 public:
     /**
+     * Type of local cache to be used when retrieving remote files.
+     *
+     * Temporary cache files are created when a FileSource object is
+     * first created for a given URL, and removed when the last extant
+     * temporary cache mode FileSource object referring to a given URL
+     * is deleted (i.e. when its reference count is lowered to zero).
+     * They are also stored in a temporary directory that will be
+     * deleted when the program exits.
+     *
+     * Persistent cache files are created only when first retrieving a
+     * URL for which no persistent cache already exists, and are never
+     * deleted (by FileSource anyway).  They are stored in a directory
+     * that is not deleted when the program exits.  FileSource creates
+     * a unique local file name for each source URL, so as long as the
+     * local cache file remains on disc, the remote URL will not be
+     * retrieved again during any further run of the program.  You can
+     * find out what local file name will be used for the persistent
+     * cache of a given URL by calling getPersistentCacheFilePath, if
+     * you want to do something such as delete it by hand.
+     *
+     * Note that FileSource does not cache local files (i.e. does not
+     * make a copy of files that already appear to be stored on the
+     * local filesystem) in either mode.
+     */
+    enum LocalCacheMode {
+        TemporaryCache,
+        PersistentCache
+    };
+
+    /**
      * Construct a FileSource using the given local file path or URL.
      * The URL may be raw or encoded.
      *
@@ -69,8 +101,13 @@
      * progress status.  Note that the progress() signal will also be
      * emitted regularly during retrieval, even if no reporter is
      * supplied here.  Caller retains ownership of the reporter object.
+     *
+     * If LocalCacheMode is PersistentCache, a persistent cache file
+     * will be used.  See LocalCacheMode documentation for details.
      */
-    FileSource(QString fileOrUrl, ProgressReporter *reporter = 0);
+    FileSource(QString fileOrUrl,
+               ProgressReporter *reporter = 0,
+               LocalCacheMode mode = TemporaryCache);
 
     /**
      * Construct a FileSource using the given remote URL.
@@ -79,8 +116,13 @@
      * progress status.  Note that the progress() signal will also be
      * emitted regularly during retrieval, even if no reporter is
      * supplied here.  Caller retains ownership of the reporter object.
+     *
+     * If LocalCacheMode is PersistentCache, a persistent cache file
+     * will be used.  See LocalCacheMode documentation for details.
      */
-    FileSource(QUrl url, ProgressReporter *reporter = 0);
+    FileSource(QUrl url,
+               ProgressReporter *reporter = 0,
+               LocalCacheMode mode = TemporaryCache);
 
     FileSource(const FileSource &);
 
@@ -158,7 +200,11 @@
     /**
      * Specify whether any local, cached file should remain on disc
      * after this FileSource has been destroyed.  The default is false
-     * (cached files share their FileSource owners' lifespans).
+     * (cached files share their FileSource owners' lifespans).  This
+     * is only meaningful in TemporaryCache mode; even if this setting
+     * is true, the temporary cache will still be deleted when the
+     * program exits.  Use PersistentCache mode if you want the cache
+     * to outlast the program.
      */
     void setLeaveLocalFile(bool leave);
 
@@ -174,6 +220,17 @@
      */
     static bool canHandleScheme(QUrl url);
 
+    /**
+     * Return the path that will be used for the cache file copy of
+     * the given remote URL by a FileSource object constructed in
+     * PersistentCache mode.
+     *
+     * This method also creates the containing directory for such
+     * cache files, if it does not already exist, and so may throw
+     * DirectoryCreationFailed.
+    */
+    static QString getPersistentCacheFilePath(QUrl url);
+
 signals:
     /**
      * Emitted during URL retrieval, when the retrieval progress
@@ -206,6 +263,7 @@
     FileSource &operator=(const FileSource &); // not provided
 
     QUrl m_url;
+    LocalCacheMode m_cacheMode;
     QFtp *m_ftp;
     QHttp *m_http;
     QFile *m_localFile;
@@ -237,6 +295,8 @@
     bool createCacheFile();
     void deleteCacheFile();
 
+    static QString getPersistentCacheDirectory();
+
     static QMutex m_fileCreationMutex;
     static int m_count;
 };
--- a/rdf/PluginRDFIndexer.cpp	Thu Oct 16 13:38:33 2008 +0000
+++ b/rdf/PluginRDFIndexer.cpp	Fri Oct 17 13:32:55 2008 +0000
@@ -178,7 +178,8 @@
     if (FileSource::isRemote(urlString) &&
         FileSource::canHandleScheme(urlString)) {
 
-        FileSource *source = new FileSource(urlString);
+        FileSource *source = new FileSource
+            (urlString, 0, FileSource::PersistentCache);
         if (!source->isAvailable()) {
             delete source;
             return false;
--- a/transform/TransformFactory.cpp	Thu Oct 16 13:38:33 2008 +0000
+++ b/transform/TransformFactory.cpp	Fri Oct 17 13:32:55 2008 +0000
@@ -35,6 +35,8 @@
 #include <QRegExp>
 #include <QTextStream>
 
+#include "base/Thread.h"
+
 using std::cerr;
 using std::endl;
 
@@ -51,6 +53,9 @@
     m_transformsPopulated(false),
     m_uninstalledTransformsPopulated(false)
 {
+    UninstalledTransformsPopulateThread *thread =
+        new UninstalledTransformsPopulateThread(this);
+    thread->start();
 }
 
 TransformFactory::~TransformFactory()
@@ -60,7 +65,7 @@
 TransformList
 TransformFactory::getAllTransformDescriptions()
 {
-    if (!m_transformsPopulated) populateTransforms();
+    populateTransforms();
 
     std::set<TransformDescription> dset;
     for (TransformDescriptionMap::const_iterator i = m_transforms.begin();
@@ -82,7 +87,7 @@
 TransformDescription
 TransformFactory::getTransformDescription(TransformId id)
 {
-    if (!m_transformsPopulated) populateTransforms();
+    populateTransforms();
 
     if (m_transforms.find(id) == m_transforms.end()) {
         return TransformDescription();
@@ -94,7 +99,7 @@
 TransformList
 TransformFactory::getUninstalledTransformDescriptions()
 {
-    if (!m_uninstalledTransformsPopulated) populateUninstalledTransforms();
+    populateUninstalledTransforms();
     
     std::set<TransformDescription> dset;
     for (TransformDescriptionMap::const_iterator i = m_uninstalledTransforms.begin();
@@ -106,7 +111,7 @@
     TransformList list;
     for (std::set<TransformDescription>::const_iterator i = dset.begin();
 	 i != dset.end(); ++i) {
-//        cerr << "inserting transform into list: id = " << i->identifier.toStdString() << endl;
+//        cerr << "inserting transform into uninstalled list: id = " << i->identifier.toStdString() << endl;
 	list.push_back(*i);
     }
 
@@ -116,7 +121,7 @@
 TransformDescription
 TransformFactory::getUninstalledTransformDescription(TransformId id)
 {
-    if (!m_uninstalledTransformsPopulated) populateUninstalledTransforms();
+    populateUninstalledTransforms();
 
     if (m_uninstalledTransforms.find(id) == m_uninstalledTransforms.end()) {
         return TransformDescription();
@@ -128,8 +133,8 @@
 TransformFactory::TransformInstallStatus
 TransformFactory::getTransformInstallStatus(TransformId id)
 {
-    if (!m_transformsPopulated) populateTransforms();
-    if (!m_uninstalledTransformsPopulated) populateUninstalledTransforms();
+    populateTransforms();
+    populateUninstalledTransforms();
 
     if (m_transforms.find(id) != m_transforms.end()) {
         return TransformInstalled;
@@ -144,7 +149,7 @@
 std::vector<QString>
 TransformFactory::getAllTransformTypes()
 {
-    if (!m_transformsPopulated) populateTransforms();
+    populateTransforms();
 
     std::set<QString> types;
     for (TransformDescriptionMap::const_iterator i = m_transforms.begin();
@@ -163,7 +168,7 @@
 std::vector<QString>
 TransformFactory::getTransformCategories(QString transformType)
 {
-    if (!m_transformsPopulated) populateTransforms();
+    populateTransforms();
 
     std::set<QString> categories;
     for (TransformDescriptionMap::const_iterator i = m_transforms.begin();
@@ -190,7 +195,7 @@
 std::vector<QString>
 TransformFactory::getTransformMakers(QString transformType)
 {
-    if (!m_transformsPopulated) populateTransforms();
+    populateTransforms();
 
     std::set<QString> makers;
     for (TransformDescriptionMap::const_iterator i = m_transforms.begin();
@@ -217,6 +222,12 @@
 void
 TransformFactory::populateTransforms()
 {
+    MutexLocker locker(&m_transformsMutex,
+                       "TransformFactory::populateTransforms");
+    if (m_transformsPopulated) {
+        return;
+    }
+
     TransformDescriptionMap transforms;
 
     populateFeatureExtractionPlugins(transforms);
@@ -512,8 +523,14 @@
 void
 TransformFactory::populateUninstalledTransforms()
 {
-    if (!m_uninstalledTransforms.empty()) return;
-    if (m_transforms.empty()) populateTransforms();
+    populateTransforms();
+
+    MutexLocker locker(&m_uninstalledTransformsMutex,
+                       "TransformFactory::populateUninstalledTransforms");
+    if (m_uninstalledTransformsPopulated) return;
+
+    PluginRDFIndexer::getInstance()->indexURL
+        ("http://www.vamp-plugins.org/rdf/plugins/vamp-example-plugins");
 
     //!!! This will be amazingly slow
 
@@ -575,6 +592,8 @@
     }
 
     m_uninstalledTransformsPopulated = true;
+
+    std::cerr << "populateUninstalledTransforms exiting" << std::endl;
 }
 
 Transform
@@ -660,7 +679,7 @@
 bool
 TransformFactory::haveTransform(TransformId identifier)
 {
-    if (m_transforms.empty()) populateTransforms();
+    populateTransforms();
     return (m_transforms.find(identifier) != m_transforms.end());
 }
 
@@ -927,7 +946,7 @@
 TransformFactory::SearchResults
 TransformFactory::search(QStringList keywords)
 {
-    if (m_transforms.empty()) populateTransforms();
+    populateTransforms();
 
     if (keywords.size() > 1) {
         // Additional score for all keywords in a row
@@ -955,7 +974,21 @@
         if (match.score > 0) results[i->first] = match;
     }
 
-    if (m_uninstalledTransforms.empty()) populateUninstalledTransforms();
+    if (!m_uninstalledTransformsMutex.tryLock()) {
+        // uninstalled transforms are being populated; this may take some time,
+        // and they aren't critical
+        std::cerr << "TransformFactory::search: Uninstalled transforms mutex is held, skipping" << std::endl;
+        return results;
+    }
+
+    if (!m_uninstalledTransformsPopulated) {
+        std::cerr << "WARNING: TransformFactory::search: Uninstalled transforms are not populated yet" << endl
+                  << "and are not being populated either -- was the thread not started correctly?" << endl;
+        m_uninstalledTransformsMutex.unlock();
+        return results;
+    }
+
+    m_uninstalledTransformsMutex.unlock();
 
     for (TransformDescriptionMap::const_iterator i = m_uninstalledTransforms.begin();
          i != m_uninstalledTransforms.end(); ++i) {
--- a/transform/TransformFactory.h	Thu Oct 16 13:38:33 2008 +0000
+++ b/transform/TransformFactory.h	Fri Oct 17 13:32:55 2008 +0000
@@ -24,6 +24,8 @@
 
 #include <QObject>
 #include <QStringList>
+#include <QThread>
+#include <QMutex>
 
 #include <map>
 #include <set>
@@ -192,6 +194,21 @@
 
     Vamp::PluginBase *instantiateDefaultPluginFor(TransformId id, size_t rate);
 
+    QMutex m_transformsMutex;
+    QMutex m_uninstalledTransformsMutex;
+
+    class UninstalledTransformsPopulateThread : public QThread
+    {
+    public:
+        UninstalledTransformsPopulateThread(TransformFactory *factory) :
+            m_factory(factory) {
+        }
+        virtual void run() {
+            m_factory->populateUninstalledTransforms();
+        }
+        TransformFactory *m_factory;
+    };
+
     static TransformFactory *m_instance;
 };