annotate rdf/PluginRDFIndexer.cpp @ 461:2019d89ebcf9

* Some work on querying and cacheing plugin RDF from a central index
author Chris Cannam
date Fri, 17 Oct 2008 15:26:29 +0000
parents 93fb1ebff76b
children c9b055f84326
rev   line source
Chris@439 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@439 2
Chris@439 3 /*
Chris@439 4 Sonic Visualiser
Chris@439 5 An audio file viewer and annotation editor.
Chris@439 6 Centre for Digital Music, Queen Mary, University of London.
Chris@439 7 This file copyright 2008 QMUL.
Chris@439 8
Chris@439 9 This program is free software; you can redistribute it and/or
Chris@439 10 modify it under the terms of the GNU General Public License as
Chris@439 11 published by the Free Software Foundation; either version 2 of the
Chris@439 12 License, or (at your option) any later version. See the file
Chris@439 13 COPYING included with this distribution for more information.
Chris@439 14 */
Chris@439 15
Chris@439 16 #include "PluginRDFIndexer.h"
Chris@439 17
Chris@439 18 #include "SimpleSPARQLQuery.h"
Chris@439 19
Chris@439 20 #include "data/fileio/FileSource.h"
Chris@461 21 #include "data/fileio/PlaylistFileReader.h"
Chris@439 22 #include "plugin/PluginIdentifier.h"
Chris@439 23
Chris@457 24 #include "base/Profiler.h"
Chris@457 25
Chris@439 26 #include <vamp-sdk/PluginHostAdapter.h>
Chris@439 27
Chris@439 28 #include <QFileInfo>
Chris@439 29 #include <QDir>
Chris@439 30 #include <QUrl>
Chris@461 31 #include <QDateTime>
Chris@461 32 #include <QSettings>
Chris@461 33 #include <QFile>
Chris@439 34
Chris@439 35 #include <iostream>
Chris@439 36 using std::cerr;
Chris@439 37 using std::endl;
Chris@439 38 using std::vector;
Chris@439 39 using std::string;
Chris@439 40 using Vamp::PluginHostAdapter;
Chris@439 41
Chris@439 42 PluginRDFIndexer *
Chris@439 43 PluginRDFIndexer::m_instance = 0;
Chris@439 44
Chris@439 45 PluginRDFIndexer *
Chris@439 46 PluginRDFIndexer::getInstance()
Chris@439 47 {
Chris@439 48 if (!m_instance) m_instance = new PluginRDFIndexer();
Chris@439 49 return m_instance;
Chris@439 50 }
Chris@439 51
Chris@439 52 PluginRDFIndexer::PluginRDFIndexer()
Chris@439 53 {
Chris@439 54 vector<string> paths = PluginHostAdapter::getPluginPath();
Chris@439 55
Chris@439 56 QStringList filters;
Chris@439 57 filters << "*.n3";
Chris@439 58 filters << "*.N3";
Chris@439 59 filters << "*.rdf";
Chris@439 60 filters << "*.RDF";
Chris@439 61
Chris@439 62 // Search each Vamp plugin path for a .rdf file that either has
Chris@439 63 // name "soname", "soname:label" or "soname/label" plus RDF
Chris@439 64 // extension. Use that order of preference, and prefer n3 over
Chris@439 65 // rdf extension.
Chris@439 66
Chris@439 67 for (vector<string>::const_iterator i = paths.begin(); i != paths.end(); ++i) {
Chris@439 68
Chris@439 69 QDir dir(i->c_str());
Chris@439 70 if (!dir.exists()) continue;
Chris@439 71
Chris@439 72 QStringList entries = dir.entryList
Chris@439 73 (filters, QDir::Files | QDir::Readable);
Chris@439 74
Chris@439 75 for (QStringList::const_iterator j = entries.begin();
Chris@439 76 j != entries.end(); ++j) {
Chris@439 77 QFileInfo fi(dir.filePath(*j));
Chris@439 78 indexFile(fi.absoluteFilePath());
Chris@439 79 }
Chris@439 80
Chris@439 81 QStringList subdirs = dir.entryList
Chris@439 82 (QDir::AllDirs | QDir::NoDotAndDotDot | QDir::Readable);
Chris@439 83
Chris@439 84 for (QStringList::const_iterator j = subdirs.begin();
Chris@439 85 j != subdirs.end(); ++j) {
Chris@439 86 QDir subdir(dir.filePath(*j));
Chris@439 87 if (subdir.exists()) {
Chris@439 88 entries = subdir.entryList
Chris@439 89 (filters, QDir::Files | QDir::Readable);
Chris@439 90 for (QStringList::const_iterator k = entries.begin();
Chris@439 91 k != entries.end(); ++k) {
Chris@439 92 QFileInfo fi(subdir.filePath(*k));
Chris@439 93 indexFile(fi.absoluteFilePath());
Chris@439 94 }
Chris@439 95 }
Chris@439 96 }
Chris@439 97 }
Chris@439 98 }
Chris@439 99
Chris@439 100 PluginRDFIndexer::~PluginRDFIndexer()
Chris@439 101 {
Chris@461 102 QMutexLocker locker(&m_mutex);
Chris@461 103
Chris@457 104 while (!m_sources.empty()) {
Chris@457 105 delete *m_sources.begin();
Chris@457 106 m_sources.erase(m_sources.begin());
Chris@439 107 }
Chris@439 108 }
Chris@439 109
Chris@461 110 bool
Chris@461 111 PluginRDFIndexer::indexConfiguredURLs()
Chris@461 112 {
Chris@461 113 std::cerr << "PluginRDFIndexer::indexConfiguredURLs" << std::endl;
Chris@461 114
Chris@461 115 QSettings settings;
Chris@461 116 settings.beginGroup("RDF");
Chris@461 117
Chris@461 118 QString indexKey("rdf-indices");
Chris@461 119 QStringList indices = settings.value(indexKey).toStringList();
Chris@461 120
Chris@461 121 for (int i = 0; i < indices.size(); ++i) {
Chris@461 122
Chris@461 123 QString index = indices[i];
Chris@461 124
Chris@461 125 std::cerr << "PluginRDFIndexer::indexConfiguredURLs: index url is "
Chris@461 126 << index.toStdString() << std::endl;
Chris@461 127
Chris@461 128 expireCacheMaybe(index);
Chris@461 129
Chris@461 130 FileSource indexSource(index, 0, FileSource::PersistentCache);
Chris@461 131 if (!indexSource.isAvailable()) continue;
Chris@461 132 indexSource.waitForData();
Chris@461 133
Chris@461 134 PlaylistFileReader reader(indexSource);
Chris@461 135 if (!reader.isOK()) continue;
Chris@461 136
Chris@461 137 PlaylistFileReader::Playlist list = reader.load();
Chris@461 138 for (PlaylistFileReader::Playlist::const_iterator j = list.begin();
Chris@461 139 j != list.end(); ++j) {
Chris@461 140 std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is "
Chris@461 141 << j->toStdString() << std::endl;
Chris@461 142 indexURL(*j);
Chris@461 143 }
Chris@461 144 }
Chris@461 145
Chris@461 146 QString urlListKey("rdf-urls");
Chris@461 147 QStringList urls = settings.value(urlListKey).toStringList();
Chris@461 148
Chris@461 149 for (int i = 0; i < urls.size(); ++i) {
Chris@461 150 indexURL(urls[i]);
Chris@461 151 }
Chris@461 152
Chris@461 153 settings.endGroup();
Chris@461 154 return true;
Chris@461 155 }
Chris@461 156
Chris@439 157 QString
Chris@439 158 PluginRDFIndexer::getURIForPluginId(QString pluginId)
Chris@439 159 {
Chris@461 160 QMutexLocker locker(&m_mutex);
Chris@461 161
Chris@439 162 if (m_idToUriMap.find(pluginId) == m_idToUriMap.end()) return "";
Chris@439 163 return m_idToUriMap[pluginId];
Chris@439 164 }
Chris@439 165
Chris@439 166 QString
Chris@439 167 PluginRDFIndexer::getIdForPluginURI(QString uri)
Chris@439 168 {
Chris@461 169 QMutexLocker locker(&m_mutex);
Chris@461 170
Chris@439 171 if (m_uriToIdMap.find(uri) == m_uriToIdMap.end()) {
Chris@439 172
Chris@439 173 // Haven't found this uri referenced in any document on the
Chris@439 174 // local filesystem; try resolving the pre-fragment part of
Chris@439 175 // the uri as a document URL and reading that if possible.
Chris@439 176
Chris@439 177 // Because we may want to refer to this document again, we
Chris@439 178 // cache it locally if it turns out to exist.
Chris@439 179
Chris@439 180 cerr << "PluginRDFIndexer::getIdForPluginURI: NOTE: Failed to find a local RDF document describing plugin <" << uri.toStdString() << ">: attempting to retrieve one remotely by guesswork" << endl;
Chris@439 181
Chris@439 182 QString baseUrl = QUrl(uri).toString(QUrl::RemoveFragment);
Chris@439 183
Chris@457 184 indexURL(baseUrl);
Chris@439 185
Chris@439 186 if (m_uriToIdMap.find(uri) == m_uriToIdMap.end()) {
Chris@439 187 m_uriToIdMap[uri] = "";
Chris@439 188 }
Chris@439 189 }
Chris@439 190
Chris@439 191 return m_uriToIdMap[uri];
Chris@439 192 }
Chris@439 193
Chris@439 194 QString
Chris@439 195 PluginRDFIndexer::getDescriptionURLForPluginId(QString pluginId)
Chris@439 196 {
Chris@461 197 QMutexLocker locker(&m_mutex);
Chris@461 198
Chris@439 199 if (m_idToDescriptionMap.find(pluginId) == m_idToDescriptionMap.end()) return "";
Chris@439 200 return m_idToDescriptionMap[pluginId];
Chris@439 201 }
Chris@439 202
Chris@439 203 QString
Chris@439 204 PluginRDFIndexer::getDescriptionURLForPluginURI(QString uri)
Chris@439 205 {
Chris@461 206 QMutexLocker locker(&m_mutex);
Chris@461 207
Chris@439 208 QString id = getIdForPluginURI(uri);
Chris@439 209 if (id == "") return "";
Chris@439 210 return getDescriptionURLForPluginId(id);
Chris@439 211 }
Chris@439 212
Chris@456 213 QStringList
Chris@456 214 PluginRDFIndexer::getIndexedPluginIds()
Chris@456 215 {
Chris@461 216 QMutexLocker locker(&m_mutex);
Chris@461 217
Chris@456 218 QStringList ids;
Chris@456 219 for (StringMap::const_iterator i = m_idToDescriptionMap.begin();
Chris@456 220 i != m_idToDescriptionMap.end(); ++i) {
Chris@456 221 ids.push_back(i->first);
Chris@456 222 }
Chris@456 223 return ids;
Chris@456 224 }
Chris@456 225
Chris@439 226 bool
Chris@439 227 PluginRDFIndexer::indexFile(QString filepath)
Chris@439 228 {
Chris@439 229 QUrl url = QUrl::fromLocalFile(filepath);
Chris@439 230 QString urlString = url.toString();
Chris@439 231 return indexURL(urlString);
Chris@439 232 }
Chris@439 233
Chris@461 234 void
Chris@461 235 PluginRDFIndexer::expireCacheMaybe(QString urlString)
Chris@461 236 {
Chris@461 237 QString cacheFile = FileSource::getPersistentCacheFilePath(urlString);
Chris@461 238
Chris@461 239 QSettings settings;
Chris@461 240 settings.beginGroup("RDF");
Chris@461 241
Chris@461 242 QString key("rdf-expiry-times");
Chris@461 243
Chris@461 244 QMap<QString, QVariant> expiryMap = settings.value(key).toMap();
Chris@461 245 QDateTime lastExpiry = expiryMap[urlString].toDateTime();
Chris@461 246
Chris@461 247 if (!QFileInfo(cacheFile).exists()) {
Chris@461 248 expiryMap[urlString] = QDateTime::currentDateTime();
Chris@461 249 settings.setValue(key, expiryMap);
Chris@461 250 settings.endGroup();
Chris@461 251 return;
Chris@461 252 }
Chris@461 253
Chris@461 254 if (!lastExpiry.isValid() ||
Chris@461 255 (lastExpiry.addDays(2) < QDateTime::currentDateTime())) {
Chris@461 256
Chris@461 257 std::cerr << "Expiring old cache file " << cacheFile.toStdString()
Chris@461 258 << std::endl;
Chris@461 259
Chris@461 260 if (QFile(cacheFile).remove()) {
Chris@461 261
Chris@461 262 expiryMap[urlString] = QDateTime::currentDateTime();
Chris@461 263 settings.setValue(key, expiryMap);
Chris@461 264 }
Chris@461 265 }
Chris@461 266
Chris@461 267 settings.endGroup();
Chris@461 268 }
Chris@461 269
Chris@439 270 bool
Chris@439 271 PluginRDFIndexer::indexURL(QString urlString)
Chris@439 272 {
Chris@457 273 Profiler profiler("PluginRDFIndexer::indexURL");
Chris@457 274
Chris@461 275 std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl;
Chris@461 276
Chris@461 277 QMutexLocker locker(&m_mutex);
Chris@461 278
Chris@457 279 QString localString = urlString;
Chris@457 280
Chris@457 281 if (FileSource::isRemote(urlString) &&
Chris@457 282 FileSource::canHandleScheme(urlString)) {
Chris@457 283
Chris@461 284 //!!! how do we avoid hammering the server if it doesn't have
Chris@461 285 //!!! the file, and/or the network if it can't get through?
Chris@461 286
Chris@461 287 expireCacheMaybe(urlString);
Chris@461 288
Chris@460 289 FileSource *source = new FileSource
Chris@460 290 (urlString, 0, FileSource::PersistentCache);
Chris@457 291 if (!source->isAvailable()) {
Chris@457 292 delete source;
Chris@457 293 return false;
Chris@457 294 }
Chris@457 295 source->waitForData();
Chris@457 296 localString = QUrl::fromLocalFile(source->getLocalFilename()).toString();
Chris@457 297 m_sources.insert(source);
Chris@457 298 }
Chris@457 299
Chris@439 300 // cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl;
Chris@439 301
Chris@439 302 SimpleSPARQLQuery query
Chris@439 303 (QString
Chris@439 304 (
Chris@439 305 " PREFIX vamp: <http://purl.org/ontology/vamp/> "
Chris@439 306
Chris@439 307 " SELECT ?plugin ?library_id ?plugin_id "
Chris@439 308 " FROM <%1> "
Chris@439 309
Chris@439 310 " WHERE { "
Chris@439 311 " ?plugin a vamp:Plugin . "
Chris@439 312
Chris@439 313 // Make the identifier and library parts optional, so
Chris@439 314 // that we can check and report helpfully if one or both
Chris@439 315 // is absent instead of just getting no results
Chris@439 316
Chris@440 317 //!!! No -- because of rasqal's inability to correctly
Chris@440 318 // handle more than one OPTIONAL graph in a query, let's
Chris@440 319 // make identifier compulsory after all
Chris@440 320 //" OPTIONAL { ?plugin vamp:identifier ?plugin_id } . "
Chris@440 321
Chris@440 322 " ?plugin vamp:identifier ?plugin_id . "
Chris@439 323
Chris@439 324 " OPTIONAL { "
Chris@439 325 " ?library a vamp:PluginLibrary ; "
Chris@439 326 " vamp:available_plugin ?plugin ; "
Chris@439 327 " vamp:identifier ?library_id "
Chris@439 328 " } "
Chris@439 329 " } "
Chris@439 330 )
Chris@457 331 .arg(localString));
Chris@439 332
Chris@439 333 SimpleSPARQLQuery::ResultList results = query.execute();
Chris@439 334
Chris@439 335 if (!query.isOK()) {
Chris@439 336 cerr << "ERROR: PluginRDFIndexer::indexURL: ERROR: Failed to index document at <"
Chris@439 337 << urlString.toStdString() << ">: "
Chris@439 338 << query.getErrorString().toStdString() << endl;
Chris@439 339 return false;
Chris@439 340 }
Chris@439 341
Chris@439 342 if (results.empty()) {
Chris@439 343 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
Chris@439 344 << urlString.toStdString()
Chris@439 345 << "> does not describe any vamp:Plugin resources" << endl;
Chris@439 346 return false;
Chris@439 347 }
Chris@439 348
Chris@439 349 bool foundSomething = false;
Chris@439 350 bool addedSomething = false;
Chris@439 351
Chris@439 352 for (SimpleSPARQLQuery::ResultList::iterator i = results.begin();
Chris@439 353 i != results.end(); ++i) {
Chris@439 354
Chris@439 355 QString pluginUri = (*i)["plugin"].value;
Chris@439 356 QString soname = (*i)["library_id"].value;
Chris@439 357 QString identifier = (*i)["plugin_id"].value;
Chris@439 358
Chris@439 359 if (identifier == "") {
Chris@439 360 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
Chris@439 361 << urlString.toStdString()
Chris@439 362 << "> fails to define any vamp:identifier for plugin <"
Chris@439 363 << pluginUri.toStdString() << ">"
Chris@439 364 << endl;
Chris@439 365 continue;
Chris@439 366 }
Chris@439 367 if (soname == "") {
Chris@439 368 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
Chris@439 369 << urlString.toStdString() << "> does not associate plugin <"
Chris@439 370 << pluginUri.toStdString() << "> with any implementation library"
Chris@439 371 << endl;
Chris@439 372 continue;
Chris@439 373 }
Chris@439 374 /*
Chris@439 375 cerr << "PluginRDFIndexer::indexURL: Document for plugin \""
Chris@439 376 << soname.toStdString() << ":" << identifier.toStdString()
Chris@439 377 << "\" (uri <" << pluginUri.toStdString() << ">) is at url <"
Chris@439 378 << urlString.toStdString() << ">" << endl;
Chris@439 379 */
Chris@439 380 QString pluginId = PluginIdentifier::createIdentifier
Chris@439 381 ("vamp", soname, identifier);
Chris@439 382
Chris@439 383 foundSomething = true;
Chris@439 384
Chris@439 385 if (m_idToDescriptionMap.find(pluginId) != m_idToDescriptionMap.end()) {
Chris@439 386 cerr << "PluginRDFIndexer::indexURL: NOTE: Plugin id \""
Chris@439 387 << pluginId.toStdString() << "\", described in document at <"
Chris@439 388 << urlString.toStdString()
Chris@439 389 << ">, has already been described in document <"
Chris@439 390 << m_idToDescriptionMap[pluginId].toStdString()
Chris@439 391 << ">: ignoring this new description" << endl;
Chris@439 392 continue;
Chris@439 393 }
Chris@439 394
Chris@439 395 m_idToDescriptionMap[pluginId] = urlString;
Chris@439 396 m_idToUriMap[pluginId] = pluginUri;
Chris@439 397
Chris@439 398 addedSomething = true;
Chris@439 399
Chris@439 400 if (pluginUri != "") {
Chris@439 401 if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) {
Chris@439 402 cerr << "PluginRDFIndexer::indexURL: WARNING: Found multiple plugins with the same URI:" << endl;
Chris@439 403 cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri].toStdString() << "\"" << endl;
Chris@439 404 cerr << " described in <" << m_idToDescriptionMap[m_uriToIdMap[pluginUri]].toStdString() << ">" << endl;
Chris@439 405 cerr << " 2. Plugin id \"" << pluginId.toStdString() << "\"" << endl;
Chris@439 406 cerr << " described in <" << urlString.toStdString() << ">" << endl;
Chris@439 407 cerr << "both claim URI <" << pluginUri.toStdString() << ">" << endl;
Chris@439 408 } else {
Chris@439 409 m_uriToIdMap[pluginUri] = pluginId;
Chris@439 410 }
Chris@439 411 }
Chris@439 412 }
Chris@439 413
Chris@439 414 if (!foundSomething) {
Chris@439 415 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <"
Chris@439 416 << urlString.toStdString()
Chris@439 417 << "> does not sufficiently describe any plugins" << endl;
Chris@439 418 }
Chris@439 419
Chris@439 420 return addedSomething;
Chris@439 421 }
Chris@439 422
Chris@439 423
Chris@439 424