comparison rdf/PluginRDFIndexer.cpp @ 489:82ab61fa9223

* Reorganise our sparql queries on the basis that Redland must be available, not only optional. So for anything querying the pool of data about plugins, we use a single datastore and model which is initialised at the outset by PluginRDFIndexer and then queried directly; for anything that "reads from a file" (e.g. loading annotations) we query directly using Rasqal, going to the datastore when we need additional plugin-related information. This may improve performance, but mostly it simplifies the code and fixes a serious issue with RDF import in the previous versions (namely that multiple sequential RDF imports would end up sharing the same RDF data pool!)
author Chris Cannam
date Fri, 21 Nov 2008 16:12:29 +0000
parents b13213785a6f
children 1b8c748fd7ea
comparison
equal deleted inserted replaced
488:1c66e199e7d9 489:82ab61fa9223
85 (filters, QDir::Files | QDir::Readable); 85 (filters, QDir::Files | QDir::Readable);
86 86
87 for (QStringList::const_iterator j = entries.begin(); 87 for (QStringList::const_iterator j = entries.begin();
88 j != entries.end(); ++j) { 88 j != entries.end(); ++j) {
89 QFileInfo fi(dir.filePath(*j)); 89 QFileInfo fi(dir.filePath(*j));
90 indexFile(fi.absoluteFilePath()); 90 pullFile(fi.absoluteFilePath());
91 } 91 }
92 92
93 QStringList subdirs = dir.entryList 93 QStringList subdirs = dir.entryList
94 (QDir::AllDirs | QDir::NoDotAndDotDot | QDir::Readable); 94 (QDir::AllDirs | QDir::NoDotAndDotDot | QDir::Readable);
95 95
100 entries = subdir.entryList 100 entries = subdir.entryList
101 (filters, QDir::Files | QDir::Readable); 101 (filters, QDir::Files | QDir::Readable);
102 for (QStringList::const_iterator k = entries.begin(); 102 for (QStringList::const_iterator k = entries.begin();
103 k != entries.end(); ++k) { 103 k != entries.end(); ++k) {
104 QFileInfo fi(subdir.filePath(*k)); 104 QFileInfo fi(subdir.filePath(*k));
105 indexFile(fi.absoluteFilePath()); 105 pullFile(fi.absoluteFilePath());
106 } 106 }
107 } 107 }
108 } 108 }
109 } 109 }
110
111 reindex();
110 } 112 }
111 113
112 bool 114 bool
113 PluginRDFIndexer::indexConfiguredURLs() 115 PluginRDFIndexer::indexConfiguredURLs()
114 { 116 {
138 PlaylistFileReader::Playlist list = reader.load(); 140 PlaylistFileReader::Playlist list = reader.load();
139 for (PlaylistFileReader::Playlist::const_iterator j = list.begin(); 141 for (PlaylistFileReader::Playlist::const_iterator j = list.begin();
140 j != list.end(); ++j) { 142 j != list.end(); ++j) {
141 std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is " 143 std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is "
142 << j->toStdString() << std::endl; 144 << j->toStdString() << std::endl;
143 indexURL(*j); 145 pullURL(*j);
144 } 146 }
145 } 147 }
146 148
147 QString urlListKey("rdf-urls"); 149 QString urlListKey("rdf-urls");
148 QStringList urls = settings.value(urlListKey).toStringList(); 150 QStringList urls = settings.value(urlListKey).toStringList();
149 151
150 for (int i = 0; i < urls.size(); ++i) { 152 for (int i = 0; i < urls.size(); ++i) {
151 indexURL(urls[i]); 153 pullURL(urls[i]);
152 } 154 }
153 155
154 settings.endGroup(); 156 settings.endGroup();
157 reindex();
155 return true; 158 return true;
156 } 159 }
157 160
158 QString 161 QString
159 PluginRDFIndexer::getURIForPluginId(QString pluginId) 162 PluginRDFIndexer::getURIForPluginId(QString pluginId)
196 QString id = m_uriToIdMap[uri]; 199 QString id = m_uriToIdMap[uri];
197 m_mutex.unlock(); 200 m_mutex.unlock();
198 return id; 201 return id;
199 } 202 }
200 203
201 QString
202 PluginRDFIndexer::getDescriptionURLForPluginId(QString pluginId)
203 {
204 QMutexLocker locker(&m_mutex);
205
206 if (m_idToDescriptionMap.find(pluginId) == m_idToDescriptionMap.end()) return "";
207 return m_idToDescriptionMap[pluginId];
208 }
209
210 QString
211 PluginRDFIndexer::getDescriptionURLForPluginURI(QString uri)
212 {
213 QMutexLocker locker(&m_mutex);
214
215 QString id = getIdForPluginURI(uri);
216 if (id == "") return "";
217 return getDescriptionURLForPluginId(id);
218 }
219
220 QStringList 204 QStringList
221 PluginRDFIndexer::getIndexedPluginIds() 205 PluginRDFIndexer::getIndexedPluginIds()
222 { 206 {
223 QMutexLocker locker(&m_mutex); 207 QMutexLocker locker(&m_mutex);
224 208
225 QStringList ids; 209 QStringList ids;
226 for (StringMap::const_iterator i = m_idToDescriptionMap.begin(); 210 for (StringMap::const_iterator i = m_idToUriMap.begin();
227 i != m_idToDescriptionMap.end(); ++i) { 211 i != m_idToUriMap.end(); ++i) {
228 ids.push_back(i->first); 212 ids.push_back(i->first);
229 } 213 }
230 return ids; 214 return ids;
231 } 215 }
232 216
233 bool 217 bool
234 PluginRDFIndexer::indexFile(QString filepath) 218 PluginRDFIndexer::pullFile(QString filepath)
235 { 219 {
236 QUrl url = QUrl::fromLocalFile(filepath); 220 QUrl url = QUrl::fromLocalFile(filepath);
237 QString urlString = url.toString(); 221 QString urlString = url.toString();
238 return indexURL(urlString); 222 return pullURL(urlString);
239 } 223 }
240 224
241 bool 225 bool
242 PluginRDFIndexer::indexURL(QString urlString) 226 PluginRDFIndexer::indexURL(QString urlString)
227 {
228 bool pulled = pullURL(urlString);
229 if (!pulled) return false;
230 reindex();
231 return true;
232 }
233
234 bool
235 PluginRDFIndexer::pullURL(QString urlString)
243 { 236 {
244 Profiler profiler("PluginRDFIndexer::indexURL"); 237 Profiler profiler("PluginRDFIndexer::indexURL");
245 238
246 std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl; 239 std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl;
247 240
256 if (!cf.isOK()) { 249 if (!cf.isOK()) {
257 return false; 250 return false;
258 } 251 }
259 252
260 localString = QUrl::fromLocalFile(cf.getLocalFilename()).toString(); 253 localString = QUrl::fromLocalFile(cf.getLocalFilename()).toString();
261 // localString = "file://" + cf.getLocalFilename(); //!!! crud - fix! 254 }
262 } 255
263 256 return SimpleSPARQLQuery::addSourceToModel(localString);
264 // cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl; 257 }
265 /*!!! 258
259 bool
260 PluginRDFIndexer::reindex()
261 {
262 SimpleSPARQLQuery::QueryType m = SimpleSPARQLQuery::QueryFromModel;
263
266 SimpleSPARQLQuery query 264 SimpleSPARQLQuery query
267 (localString, 265 (m,
268 QString 266 QString
269 ( 267 (
270 " PREFIX vamp: <http://purl.org/ontology/vamp/> " 268 " PREFIX vamp: <http://purl.org/ontology/vamp/> "
271 269
272 " SELECT ?plugin ?library_id ?plugin_id "
273 " FROM <%1> "
274
275 " WHERE { "
276 " ?plugin a vamp:Plugin . "
277
278 // Make the identifier and library parts optional, so
279 // that we can check and report helpfully if one or both
280 // is absent instead of just getting no results
281
282 //!!! No -- because of rasqal's inability to correctly
283 // handle more than one OPTIONAL graph in a query, let's
284 // make identifier compulsory after all
285 //" OPTIONAL { ?plugin vamp:identifier ?plugin_id } . "
286
287 " ?plugin vamp:identifier ?plugin_id . "
288
289 " OPTIONAL { "
290 " ?library a vamp:PluginLibrary ; "
291 " vamp:available_plugin ?plugin ; "
292 " vamp:identifier ?library_id "
293 " } "
294 " } "
295 )
296 .arg(localString));
297 */
298 SimpleSPARQLQuery query
299 (localString,
300 QString
301 (
302 " PREFIX vamp: <http://purl.org/ontology/vamp/> "
303
304 " SELECT ?plugin ?library ?plugin_id " 270 " SELECT ?plugin ?library ?plugin_id "
305 " FROM <%1> "
306 271
307 " WHERE { " 272 " WHERE { "
308 " ?plugin a vamp:Plugin . " 273 " ?plugin a vamp:Plugin . "
309 " ?plugin vamp:identifier ?plugin_id . " 274 " ?plugin vamp:identifier ?plugin_id . "
310 275
311 " OPTIONAL { " 276 " OPTIONAL { "
312 " ?library vamp:available_plugin ?plugin " 277 " ?library vamp:available_plugin ?plugin "
313 " } " 278 " } "
314 " } " 279 " } "
315 ) 280 ));
316 .arg(localString));
317 281
318 SimpleSPARQLQuery::ResultList results = query.execute(); 282 SimpleSPARQLQuery::ResultList results = query.execute();
319 283
320 if (!query.isOK()) { 284 if (!query.isOK()) {
321 cerr << "ERROR: PluginRDFIndexer::indexURL: ERROR: Failed to index document at <" 285 cerr << "ERROR: PluginRDFIndexer::reindex: ERROR: Failed to query plugins from model: "
322 << urlString.toStdString() << ">: "
323 << query.getErrorString().toStdString() << endl; 286 << query.getErrorString().toStdString() << endl;
324 return false; 287 return false;
325 } 288 }
326 289
327 if (results.empty()) { 290 if (results.empty()) {
328 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <" 291 cerr << "PluginRDFIndexer::reindex: NOTE: no vamp:Plugin resources found in indexed documents" << endl;
329 << urlString.toStdString()
330 << "> does not describe any vamp:Plugin resources" << endl;
331 return false; 292 return false;
332 } 293 }
333 294
334 bool foundSomething = false; 295 bool foundSomething = false;
335 bool addedSomething = false; 296 bool addedSomething = false;
336 297
337 for (SimpleSPARQLQuery::ResultList::iterator i = results.begin(); 298 for (SimpleSPARQLQuery::ResultList::iterator i = results.begin();
338 i != results.end(); ++i) { 299 i != results.end(); ++i) {
339 300
340 QString pluginUri = (*i)["plugin"].value; 301 QString pluginUri = (*i)["plugin"].value;
341 //!!! QString soname = (*i)["library_id"].value;
342 QString soUri = (*i)["library"].value; 302 QString soUri = (*i)["library"].value;
343 QString identifier = (*i)["plugin_id"].value; 303 QString identifier = (*i)["plugin_id"].value;
344 304
345 if (identifier == "") { 305 if (identifier == "") {
346 cerr << "PluginRDFIndexer::indexURL: NOTE: No vamp:identifier for plugin <" 306 cerr << "PluginRDFIndexer::reindex: NOTE: No vamp:identifier for plugin <"
347 << pluginUri.toStdString() << ">" 307 << pluginUri.toStdString() << ">"
348 << endl; 308 << endl;
349 continue; 309 continue;
350 } 310 }
351 if (soUri == "") { 311 if (soUri == "") {
352 cerr << "PluginRDFIndexer::indexURL: NOTE: No implementation library for plugin <" 312 cerr << "PluginRDFIndexer::reindex: NOTE: No implementation library for plugin <"
353 << pluginUri.toStdString() << ">" 313 << pluginUri.toStdString() << ">"
354 << endl; 314 << endl;
355 continue; 315 continue;
356 } 316 }
357 317
358 QString sonameQuery = 318 QString sonameQuery =
359 QString( 319 QString(
360 " PREFIX vamp: <http://purl.org/ontology/vamp/> " 320 " PREFIX vamp: <http://purl.org/ontology/vamp/> "
361 " SELECT ?library_id " 321 " SELECT ?library_id "
362 " FROM <%1> "
363 " WHERE { " 322 " WHERE { "
364 " <%2> vamp:identifier ?library_id " 323 " <%1> vamp:identifier ?library_id "
365 " } " 324 " } "
366 ) 325 )
367 .arg(localString)
368 .arg(soUri); 326 .arg(soUri);
369 327
370 SimpleSPARQLQuery::Value sonameValue = 328 SimpleSPARQLQuery::Value sonameValue =
371 SimpleSPARQLQuery::singleResultQuery(localString, sonameQuery, "library_id"); 329 SimpleSPARQLQuery::singleResultQuery(m, sonameQuery, "library_id");
372 QString soname = sonameValue.value; 330 QString soname = sonameValue.value;
373 if (soname == "") { 331 if (soname == "") {
374 cerr << "PluginRDFIndexer::indexURL: NOTE: No identifier for library <" 332 cerr << "PluginRDFIndexer::reindex: NOTE: No identifier for library <"
375 << soUri.toStdString() << ">" 333 << soUri.toStdString() << ">"
376 << endl; 334 << endl;
377 continue; 335 continue;
378 } 336 }
379 337
380
381 /*
382 cerr << "PluginRDFIndexer::indexURL: Document for plugin \""
383 << soname.toStdString() << ":" << identifier.toStdString()
384 << "\" (uri <" << pluginUri.toStdString() << ">) is at url <"
385 << urlString.toStdString() << ">" << endl;
386 */
387 QString pluginId = PluginIdentifier::createIdentifier 338 QString pluginId = PluginIdentifier::createIdentifier
388 ("vamp", soname, identifier); 339 ("vamp", soname, identifier);
389 340
390 foundSomething = true; 341 foundSomething = true;
391 342
392 if (m_idToDescriptionMap.find(pluginId) != m_idToDescriptionMap.end()) { 343 if (m_idToUriMap.find(pluginId) != m_idToUriMap.end()) {
393 /*!!!
394
395 This can happen quite legitimately when using an RDF datastore rather
396 than querying individual files, as of course the datastore contains
397 all plugin data found so far, and each time a file is added to it,
398 subsequent queries will return all older plugins as well.
399
400 It would be more efficient to add everything at once and then do all
401 queries, of course.
402
403 cerr << "PluginRDFIndexer::indexURL: NOTE: Plugin id \""
404 << pluginId.toStdString() << "\", described in document at <"
405 << urlString.toStdString()
406 << ">, has already been described in document <"
407 << m_idToDescriptionMap[pluginId].toStdString()
408 << ">: ignoring this new description" << endl;
409 */
410 continue; 344 continue;
411 } 345 }
412 346
413 m_idToDescriptionMap[pluginId] = urlString;
414 m_idToUriMap[pluginId] = pluginUri; 347 m_idToUriMap[pluginId] = pluginUri;
415 348
416 addedSomething = true; 349 addedSomething = true;
417 350
418 if (pluginUri != "") { 351 if (pluginUri != "") {
419 if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) { 352 if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) {
420 cerr << "PluginRDFIndexer::indexURL: WARNING: Found multiple plugins with the same URI:" << endl; 353 cerr << "PluginRDFIndexer::reindex: WARNING: Found multiple plugins with the same URI:" << endl;
421 cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri].toStdString() << "\"" << endl; 354 cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri].toStdString() << "\"" << endl;
422 cerr << " described in <" << m_idToDescriptionMap[m_uriToIdMap[pluginUri]].toStdString() << ">" << endl;
423 cerr << " 2. Plugin id \"" << pluginId.toStdString() << "\"" << endl; 355 cerr << " 2. Plugin id \"" << pluginId.toStdString() << "\"" << endl;
424 cerr << " described in <" << urlString.toStdString() << ">" << endl;
425 cerr << "both claim URI <" << pluginUri.toStdString() << ">" << endl; 356 cerr << "both claim URI <" << pluginUri.toStdString() << ">" << endl;
426 } else { 357 } else {
427 m_uriToIdMap[pluginUri] = pluginId; 358 m_uriToIdMap[pluginUri] = pluginId;
428 } 359 }
429 } 360 }
430 } 361 }
431 362
432 if (!foundSomething) { 363 if (!foundSomething) {
433 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <" 364 cerr << "PluginRDFIndexer::reindex: NOTE: Plugins found, but none sufficiently described" << endl;
434 << urlString.toStdString()
435 << "> does not sufficiently describe any plugins" << endl;
436 } 365 }
437 366
438 return addedSomething; 367 return addedSomething;
439 } 368 }
440 369