annotate rdf/PluginRDFIndexer.cpp @ 1257:5236543343c3 3.0-integration

A note on cache hit findings
author Chris Cannam
date Thu, 10 Nov 2016 09:23:05 +0000
parents f5cd33909744
children 6a7ea3bd0e10
rev   line source
Chris@439 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@439 2
Chris@439 3 /*
Chris@439 4 Sonic Visualiser
Chris@439 5 An audio file viewer and annotation editor.
Chris@439 6 Centre for Digital Music, Queen Mary, University of London.
Chris@727 7 This file copyright 2008-2012 QMUL.
Chris@439 8
Chris@439 9 This program is free software; you can redistribute it and/or
Chris@439 10 modify it under the terms of the GNU General Public License as
Chris@439 11 published by the Free Software Foundation; either version 2 of the
Chris@439 12 License, or (at your option) any later version. See the file
Chris@439 13 COPYING included with this distribution for more information.
Chris@439 14 */
Chris@439 15
Chris@439 16 #include "PluginRDFIndexer.h"
Chris@439 17
Chris@467 18 #include "data/fileio/CachedFile.h"
Chris@471 19 #include "data/fileio/FileSource.h"
Chris@461 20 #include "data/fileio/PlaylistFileReader.h"
Chris@439 21 #include "plugin/PluginIdentifier.h"
Chris@439 22
Chris@457 23 #include "base/Profiler.h"
Chris@457 24
Chris@475 25 #include <vamp-hostsdk/PluginHostAdapter.h>
Chris@439 26
Chris@725 27 #include <dataquay/BasicStore.h>
Chris@725 28 #include <dataquay/RDFException.h>
Chris@725 29
Chris@439 30 #include <QFileInfo>
Chris@439 31 #include <QDir>
Chris@439 32 #include <QUrl>
Chris@461 33 #include <QDateTime>
Chris@461 34 #include <QSettings>
Chris@461 35 #include <QFile>
Chris@439 36
Chris@439 37 #include <iostream>
Chris@843 38
Chris@439 39 using std::vector;
Chris@439 40 using std::string;
Chris@439 41 using Vamp::PluginHostAdapter;
Chris@439 42
Chris@725 43 using Dataquay::Uri;
Chris@725 44 using Dataquay::Node;
Chris@725 45 using Dataquay::Nodes;
Chris@725 46 using Dataquay::Triple;
Chris@725 47 using Dataquay::Triples;
Chris@725 48 using Dataquay::BasicStore;
Chris@725 49 using Dataquay::RDFException;
Chris@725 50 using Dataquay::RDFDuplicateImportException;
Chris@725 51
Chris@439 52 PluginRDFIndexer *
Chris@439 53 PluginRDFIndexer::m_instance = 0;
Chris@439 54
Chris@439 55 PluginRDFIndexer *
Chris@439 56 PluginRDFIndexer::getInstance()
Chris@439 57 {
Chris@439 58 if (!m_instance) m_instance = new PluginRDFIndexer();
Chris@439 59 return m_instance;
Chris@439 60 }
Chris@439 61
Chris@725 62 PluginRDFIndexer::PluginRDFIndexer() :
Chris@725 63 m_index(new Dataquay::BasicStore)
Chris@439 64 {
Chris@725 65 m_index->addPrefix("vamp", Uri("http://purl.org/ontology/vamp/"));
Chris@725 66 m_index->addPrefix("foaf", Uri("http://xmlns.com/foaf/0.1/"));
Chris@725 67 m_index->addPrefix("dc", Uri("http://purl.org/dc/elements/1.1/"));
Chris@477 68 indexInstalledURLs();
Chris@477 69 }
Chris@477 70
Chris@725 71 const BasicStore *
Chris@725 72 PluginRDFIndexer::getIndex()
Chris@725 73 {
Chris@725 74 return m_index;
Chris@725 75 }
Chris@725 76
Chris@477 77 PluginRDFIndexer::~PluginRDFIndexer()
Chris@477 78 {
Chris@477 79 QMutexLocker locker(&m_mutex);
Chris@477 80 }
Chris@477 81
Chris@477 82 void
Chris@477 83 PluginRDFIndexer::indexInstalledURLs()
Chris@477 84 {
Chris@439 85 vector<string> paths = PluginHostAdapter::getPluginPath();
Chris@439 86
Chris@843 87 // cerr << "\nPluginRDFIndexer::indexInstalledURLs: pid is " << getpid() << endl;
Chris@730 88
Chris@439 89 QStringList filters;
Chris@731 90 filters << "*.ttl";
Chris@731 91 filters << "*.TTL";
Chris@439 92 filters << "*.n3";
Chris@439 93 filters << "*.N3";
Chris@439 94 filters << "*.rdf";
Chris@439 95 filters << "*.RDF";
Chris@439 96
Chris@731 97 // Search each Vamp plugin path for an RDF file that either has
Chris@439 98 // name "soname", "soname:label" or "soname/label" plus RDF
Chris@731 99 // extension. Use that order of preference, and prefer ttl over
Chris@731 100 // n3 over rdf extension.
Chris@439 101
Chris@439 102 for (vector<string>::const_iterator i = paths.begin(); i != paths.end(); ++i) {
Chris@718 103
Chris@439 104 QDir dir(i->c_str());
Chris@439 105 if (!dir.exists()) continue;
Chris@439 106
Chris@439 107 QStringList entries = dir.entryList
Chris@439 108 (filters, QDir::Files | QDir::Readable);
Chris@439 109
Chris@439 110 for (QStringList::const_iterator j = entries.begin();
Chris@439 111 j != entries.end(); ++j) {
Chris@718 112
Chris@439 113 QFileInfo fi(dir.filePath(*j));
Chris@489 114 pullFile(fi.absoluteFilePath());
Chris@439 115 }
Chris@439 116
Chris@439 117 QStringList subdirs = dir.entryList
Chris@439 118 (QDir::AllDirs | QDir::NoDotAndDotDot | QDir::Readable);
Chris@439 119
Chris@439 120 for (QStringList::const_iterator j = subdirs.begin();
Chris@439 121 j != subdirs.end(); ++j) {
Chris@718 122
Chris@439 123 QDir subdir(dir.filePath(*j));
Chris@439 124 if (subdir.exists()) {
Chris@439 125 entries = subdir.entryList
Chris@439 126 (filters, QDir::Files | QDir::Readable);
Chris@439 127 for (QStringList::const_iterator k = entries.begin();
Chris@439 128 k != entries.end(); ++k) {
Chris@439 129 QFileInfo fi(subdir.filePath(*k));
Chris@489 130 pullFile(fi.absoluteFilePath());
Chris@439 131 }
Chris@439 132 }
Chris@439 133 }
Chris@439 134 }
Chris@489 135
Chris@489 136 reindex();
Chris@439 137 }
Chris@439 138
Chris@461 139 bool
Chris@461 140 PluginRDFIndexer::indexConfiguredURLs()
Chris@461 141 {
Chris@690 142 SVDEBUG << "PluginRDFIndexer::indexConfiguredURLs" << endl;
Chris@461 143
Chris@461 144 QSettings settings;
Chris@461 145 settings.beginGroup("RDF");
Chris@461 146
Chris@461 147 QString indexKey("rdf-indices");
Chris@461 148 QStringList indices = settings.value(indexKey).toStringList();
Chris@461 149
Chris@461 150 for (int i = 0; i < indices.size(); ++i) {
Chris@461 151
Chris@461 152 QString index = indices[i];
Chris@461 153
Chris@690 154 SVDEBUG << "PluginRDFIndexer::indexConfiguredURLs: index url is "
Chris@687 155 << index << endl;
Chris@461 156
Chris@467 157 CachedFile cf(index);
Chris@467 158 if (!cf.isOK()) continue;
Chris@467 159
Chris@467 160 FileSource indexSource(cf.getLocalFilename());
Chris@461 161
Chris@461 162 PlaylistFileReader reader(indexSource);
Chris@461 163 if (!reader.isOK()) continue;
Chris@461 164
Chris@461 165 PlaylistFileReader::Playlist list = reader.load();
Chris@461 166 for (PlaylistFileReader::Playlist::const_iterator j = list.begin();
Chris@461 167 j != list.end(); ++j) {
Chris@690 168 SVDEBUG << "PluginRDFIndexer::indexConfiguredURLs: url is "
Chris@844 169 << *j << endl;
Chris@489 170 pullURL(*j);
Chris@461 171 }
Chris@461 172 }
Chris@461 173
Chris@461 174 QString urlListKey("rdf-urls");
Chris@461 175 QStringList urls = settings.value(urlListKey).toStringList();
Chris@461 176
Chris@461 177 for (int i = 0; i < urls.size(); ++i) {
Chris@489 178 pullURL(urls[i]);
Chris@461 179 }
Chris@461 180
Chris@461 181 settings.endGroup();
Chris@489 182 reindex();
Chris@461 183 return true;
Chris@461 184 }
Chris@461 185
Chris@439 186 QString
Chris@439 187 PluginRDFIndexer::getURIForPluginId(QString pluginId)
Chris@439 188 {
Chris@461 189 QMutexLocker locker(&m_mutex);
Chris@461 190
Chris@439 191 if (m_idToUriMap.find(pluginId) == m_idToUriMap.end()) return "";
Chris@439 192 return m_idToUriMap[pluginId];
Chris@439 193 }
Chris@439 194
Chris@439 195 QString
Chris@439 196 PluginRDFIndexer::getIdForPluginURI(QString uri)
Chris@439 197 {
Chris@476 198 m_mutex.lock();
Chris@461 199
Chris@439 200 if (m_uriToIdMap.find(uri) == m_uriToIdMap.end()) {
Chris@439 201
Chris@476 202 m_mutex.unlock();
Chris@476 203
Chris@439 204 // Haven't found this uri referenced in any document on the
Chris@439 205 // local filesystem; try resolving the pre-fragment part of
Chris@439 206 // the uri as a document URL and reading that if possible.
Chris@439 207
Chris@439 208 // Because we may want to refer to this document again, we
Chris@439 209 // cache it locally if it turns out to exist.
Chris@439 210
Chris@686 211 cerr << "PluginRDFIndexer::getIdForPluginURI: NOTE: Failed to find a local RDF document describing plugin <" << uri << ">: attempting to retrieve one remotely by guesswork" << endl;
Chris@439 212
Chris@439 213 QString baseUrl = QUrl(uri).toString(QUrl::RemoveFragment);
Chris@439 214
Chris@457 215 indexURL(baseUrl);
Chris@439 216
Chris@476 217 m_mutex.lock();
Chris@476 218
Chris@439 219 if (m_uriToIdMap.find(uri) == m_uriToIdMap.end()) {
Chris@439 220 m_uriToIdMap[uri] = "";
Chris@439 221 }
Chris@439 222 }
Chris@439 223
Chris@476 224 QString id = m_uriToIdMap[uri];
Chris@476 225 m_mutex.unlock();
Chris@476 226 return id;
Chris@439 227 }
Chris@439 228
Chris@456 229 QStringList
Chris@456 230 PluginRDFIndexer::getIndexedPluginIds()
Chris@456 231 {
Chris@461 232 QMutexLocker locker(&m_mutex);
Chris@461 233
Chris@456 234 QStringList ids;
Chris@489 235 for (StringMap::const_iterator i = m_idToUriMap.begin();
Chris@489 236 i != m_idToUriMap.end(); ++i) {
Chris@456 237 ids.push_back(i->first);
Chris@456 238 }
Chris@456 239 return ids;
Chris@456 240 }
Chris@456 241
Chris@439 242 bool
Chris@489 243 PluginRDFIndexer::pullFile(QString filepath)
Chris@439 244 {
Chris@439 245 QUrl url = QUrl::fromLocalFile(filepath);
Chris@439 246 QString urlString = url.toString();
Chris@489 247 return pullURL(urlString);
Chris@439 248 }
Chris@461 249
Chris@439 250 bool
Chris@439 251 PluginRDFIndexer::indexURL(QString urlString)
Chris@439 252 {
Chris@489 253 bool pulled = pullURL(urlString);
Chris@489 254 if (!pulled) return false;
Chris@489 255 reindex();
Chris@489 256 return true;
Chris@489 257 }
Chris@489 258
Chris@489 259 bool
Chris@489 260 PluginRDFIndexer::pullURL(QString urlString)
Chris@489 261 {
Chris@457 262 Profiler profiler("PluginRDFIndexer::indexURL");
Chris@457 263
Chris@844 264 // cerr << "PluginRDFIndexer::indexURL(" << urlString << ")" << endl;
Chris@461 265
Chris@461 266 QMutexLocker locker(&m_mutex);
Chris@461 267
Chris@725 268 QUrl local = urlString;
Chris@457 269
Chris@457 270 if (FileSource::isRemote(urlString) &&
Chris@457 271 FileSource::canHandleScheme(urlString)) {
Chris@457 272
Chris@520 273 CachedFile cf(urlString, 0, "application/rdf+xml");
Chris@467 274 if (!cf.isOK()) {
Chris@467 275 return false;
Chris@467 276 }
Chris@467 277
Chris@725 278 local = QUrl::fromLocalFile(cf.getLocalFilename());
Chris@725 279
Chris@730 280 } else if (urlString.startsWith("file:")) {
Chris@730 281
Chris@730 282 local = QUrl(urlString);
Chris@730 283
Chris@725 284 } else {
Chris@725 285
Chris@725 286 local = QUrl::fromLocalFile(urlString);
Chris@457 287 }
Chris@457 288
Chris@725 289 try {
Chris@725 290 m_index->import(local, BasicStore::ImportFailOnDuplicates);
Chris@725 291 } catch (RDFDuplicateImportException &e) {
Chris@730 292 cerr << e.what() << endl;
Chris@725 293 cerr << "PluginRDFIndexer::pullURL: Document at " << urlString
Chris@730 294 << " duplicates triples found in earlier loaded document -- skipping it" << endl;
Chris@725 295 return false;
Chris@725 296 } catch (RDFException &e) {
Chris@730 297 cerr << e.what() << endl;
Chris@725 298 cerr << "PluginRDFIndexer::pullURL: Failed to import document from "
Chris@725 299 << urlString << ": " << e.what() << endl;
Chris@725 300 return false;
Chris@725 301 }
Chris@725 302 return true;
Chris@489 303 }
Chris@489 304
Chris@489 305 bool
Chris@489 306 PluginRDFIndexer::reindex()
Chris@489 307 {
Chris@725 308 Triples tt = m_index->match
Chris@730 309 (Triple(Node(), Uri("a"), m_index->expand("vamp:Plugin")));
Chris@730 310 Nodes plugins = tt.subjects();
Chris@439 311
Chris@439 312 bool foundSomething = false;
Chris@439 313 bool addedSomething = false;
Chris@439 314
Chris@725 315 foreach (Node plugin, plugins) {
Chris@725 316
Chris@725 317 if (plugin.type != Node::URI) {
Chris@725 318 cerr << "PluginRDFIndexer::reindex: Plugin has no URI: node is "
Chris@725 319 << plugin << endl;
Chris@439 320 continue;
Chris@439 321 }
Chris@725 322
Chris@730 323 Node idn = m_index->complete
Chris@730 324 (Triple(plugin, m_index->expand("vamp:identifier"), Node()));
Chris@730 325
Chris@730 326 if (idn.type != Node::Literal) {
Chris@725 327 cerr << "PluginRDFIndexer::reindex: Plugin " << plugin
Chris@725 328 << " lacks vamp:identifier literal" << endl;
Chris@439 329 continue;
Chris@439 330 }
Chris@481 331
Chris@730 332 Node libn = m_index->complete
Chris@730 333 (Triple(Node(), m_index->expand("vamp:available_plugin"), plugin));
Chris@481 334
Chris@730 335 if (libn.type != Node::URI) {
Chris@725 336 cerr << "PluginRDFIndexer::reindex: Plugin " << plugin
Chris@725 337 << " is not vamp:available_plugin in any library" << endl;
Chris@481 338 continue;
Chris@481 339 }
Chris@481 340
Chris@730 341 Node son = m_index->complete
Chris@730 342 (Triple(libn, m_index->expand("vamp:identifier"), Node()));
Chris@725 343
Chris@730 344 if (son.type != Node::Literal) {
Chris@730 345 cerr << "PluginRDFIndexer::reindex: Library " << libn
Chris@725 346 << " lacks vamp:identifier for soname" << endl;
Chris@725 347 continue;
Chris@725 348 }
Chris@725 349
Chris@725 350 QString pluginUri = plugin.value;
Chris@730 351 QString identifier = idn.value;
Chris@730 352 QString soname = son.value;
Chris@725 353
Chris@439 354 QString pluginId = PluginIdentifier::createIdentifier
Chris@439 355 ("vamp", soname, identifier);
Chris@439 356
Chris@439 357 foundSomething = true;
Chris@439 358
Chris@489 359 if (m_idToUriMap.find(pluginId) != m_idToUriMap.end()) {
Chris@439 360 continue;
Chris@439 361 }
Chris@439 362
Chris@439 363 m_idToUriMap[pluginId] = pluginUri;
Chris@439 364
Chris@439 365 addedSomething = true;
Chris@439 366
Chris@439 367 if (pluginUri != "") {
Chris@439 368 if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) {
Chris@718 369 cerr << "PluginRDFIndexer::reindex: WARNING: Found multiple plugins with the same URI:" << endl;
Chris@686 370 cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri] << "\"" << endl;
Chris@686 371 cerr << " 2. Plugin id \"" << pluginId << "\"" << endl;
Chris@686 372 cerr << "both claim URI <" << pluginUri << ">" << endl;
Chris@439 373 } else {
Chris@439 374 m_uriToIdMap[pluginUri] = pluginId;
Chris@439 375 }
Chris@439 376 }
Chris@439 377 }
Chris@439 378
Chris@439 379 if (!foundSomething) {
Chris@718 380 cerr << "PluginRDFIndexer::reindex: NOTE: Plugins found, but none sufficiently described" << endl;
Chris@439 381 }
Chris@439 382
Chris@439 383 return addedSomething;
Chris@439 384 }