Mercurial > hg > svcore
comparison rdf/PluginRDFIndexer.cpp @ 489:82ab61fa9223
* Reorganise our sparql queries on the basis that Redland must be
available, not only optional. So for anything querying the pool
of data about plugins, we use a single datastore and model which
is initialised at the outset by PluginRDFIndexer and then queried
directly; for anything that "reads from a file" (e.g. loading
annotations) we query directly using Rasqal, going to the
datastore when we need additional plugin-related information.
This may improve performance, but mostly it simplifies the code
and fixes a serious issue with RDF import in the previous versions
(namely that multiple sequential RDF imports would end up sharing
the same RDF data pool!)
author | Chris Cannam |
---|---|
date | Fri, 21 Nov 2008 16:12:29 +0000 |
parents | b13213785a6f |
children | 1b8c748fd7ea |
comparison
equal
deleted
inserted
replaced
488:1c66e199e7d9 | 489:82ab61fa9223 |
---|---|
85 (filters, QDir::Files | QDir::Readable); | 85 (filters, QDir::Files | QDir::Readable); |
86 | 86 |
87 for (QStringList::const_iterator j = entries.begin(); | 87 for (QStringList::const_iterator j = entries.begin(); |
88 j != entries.end(); ++j) { | 88 j != entries.end(); ++j) { |
89 QFileInfo fi(dir.filePath(*j)); | 89 QFileInfo fi(dir.filePath(*j)); |
90 indexFile(fi.absoluteFilePath()); | 90 pullFile(fi.absoluteFilePath()); |
91 } | 91 } |
92 | 92 |
93 QStringList subdirs = dir.entryList | 93 QStringList subdirs = dir.entryList |
94 (QDir::AllDirs | QDir::NoDotAndDotDot | QDir::Readable); | 94 (QDir::AllDirs | QDir::NoDotAndDotDot | QDir::Readable); |
95 | 95 |
100 entries = subdir.entryList | 100 entries = subdir.entryList |
101 (filters, QDir::Files | QDir::Readable); | 101 (filters, QDir::Files | QDir::Readable); |
102 for (QStringList::const_iterator k = entries.begin(); | 102 for (QStringList::const_iterator k = entries.begin(); |
103 k != entries.end(); ++k) { | 103 k != entries.end(); ++k) { |
104 QFileInfo fi(subdir.filePath(*k)); | 104 QFileInfo fi(subdir.filePath(*k)); |
105 indexFile(fi.absoluteFilePath()); | 105 pullFile(fi.absoluteFilePath()); |
106 } | 106 } |
107 } | 107 } |
108 } | 108 } |
109 } | 109 } |
110 | |
111 reindex(); | |
110 } | 112 } |
111 | 113 |
112 bool | 114 bool |
113 PluginRDFIndexer::indexConfiguredURLs() | 115 PluginRDFIndexer::indexConfiguredURLs() |
114 { | 116 { |
138 PlaylistFileReader::Playlist list = reader.load(); | 140 PlaylistFileReader::Playlist list = reader.load(); |
139 for (PlaylistFileReader::Playlist::const_iterator j = list.begin(); | 141 for (PlaylistFileReader::Playlist::const_iterator j = list.begin(); |
140 j != list.end(); ++j) { | 142 j != list.end(); ++j) { |
141 std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is " | 143 std::cerr << "PluginRDFIndexer::indexConfiguredURLs: url is " |
142 << j->toStdString() << std::endl; | 144 << j->toStdString() << std::endl; |
143 indexURL(*j); | 145 pullURL(*j); |
144 } | 146 } |
145 } | 147 } |
146 | 148 |
147 QString urlListKey("rdf-urls"); | 149 QString urlListKey("rdf-urls"); |
148 QStringList urls = settings.value(urlListKey).toStringList(); | 150 QStringList urls = settings.value(urlListKey).toStringList(); |
149 | 151 |
150 for (int i = 0; i < urls.size(); ++i) { | 152 for (int i = 0; i < urls.size(); ++i) { |
151 indexURL(urls[i]); | 153 pullURL(urls[i]); |
152 } | 154 } |
153 | 155 |
154 settings.endGroup(); | 156 settings.endGroup(); |
157 reindex(); | |
155 return true; | 158 return true; |
156 } | 159 } |
157 | 160 |
158 QString | 161 QString |
159 PluginRDFIndexer::getURIForPluginId(QString pluginId) | 162 PluginRDFIndexer::getURIForPluginId(QString pluginId) |
196 QString id = m_uriToIdMap[uri]; | 199 QString id = m_uriToIdMap[uri]; |
197 m_mutex.unlock(); | 200 m_mutex.unlock(); |
198 return id; | 201 return id; |
199 } | 202 } |
200 | 203 |
201 QString | |
202 PluginRDFIndexer::getDescriptionURLForPluginId(QString pluginId) | |
203 { | |
204 QMutexLocker locker(&m_mutex); | |
205 | |
206 if (m_idToDescriptionMap.find(pluginId) == m_idToDescriptionMap.end()) return ""; | |
207 return m_idToDescriptionMap[pluginId]; | |
208 } | |
209 | |
210 QString | |
211 PluginRDFIndexer::getDescriptionURLForPluginURI(QString uri) | |
212 { | |
213 QMutexLocker locker(&m_mutex); | |
214 | |
215 QString id = getIdForPluginURI(uri); | |
216 if (id == "") return ""; | |
217 return getDescriptionURLForPluginId(id); | |
218 } | |
219 | |
220 QStringList | 204 QStringList |
221 PluginRDFIndexer::getIndexedPluginIds() | 205 PluginRDFIndexer::getIndexedPluginIds() |
222 { | 206 { |
223 QMutexLocker locker(&m_mutex); | 207 QMutexLocker locker(&m_mutex); |
224 | 208 |
225 QStringList ids; | 209 QStringList ids; |
226 for (StringMap::const_iterator i = m_idToDescriptionMap.begin(); | 210 for (StringMap::const_iterator i = m_idToUriMap.begin(); |
227 i != m_idToDescriptionMap.end(); ++i) { | 211 i != m_idToUriMap.end(); ++i) { |
228 ids.push_back(i->first); | 212 ids.push_back(i->first); |
229 } | 213 } |
230 return ids; | 214 return ids; |
231 } | 215 } |
232 | 216 |
233 bool | 217 bool |
234 PluginRDFIndexer::indexFile(QString filepath) | 218 PluginRDFIndexer::pullFile(QString filepath) |
235 { | 219 { |
236 QUrl url = QUrl::fromLocalFile(filepath); | 220 QUrl url = QUrl::fromLocalFile(filepath); |
237 QString urlString = url.toString(); | 221 QString urlString = url.toString(); |
238 return indexURL(urlString); | 222 return pullURL(urlString); |
239 } | 223 } |
240 | 224 |
241 bool | 225 bool |
242 PluginRDFIndexer::indexURL(QString urlString) | 226 PluginRDFIndexer::indexURL(QString urlString) |
227 { | |
228 bool pulled = pullURL(urlString); | |
229 if (!pulled) return false; | |
230 reindex(); | |
231 return true; | |
232 } | |
233 | |
234 bool | |
235 PluginRDFIndexer::pullURL(QString urlString) | |
243 { | 236 { |
244 Profiler profiler("PluginRDFIndexer::indexURL"); | 237 Profiler profiler("PluginRDFIndexer::indexURL"); |
245 | 238 |
246 std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl; | 239 std::cerr << "PluginRDFIndexer::indexURL(" << urlString.toStdString() << ")" << std::endl; |
247 | 240 |
256 if (!cf.isOK()) { | 249 if (!cf.isOK()) { |
257 return false; | 250 return false; |
258 } | 251 } |
259 | 252 |
260 localString = QUrl::fromLocalFile(cf.getLocalFilename()).toString(); | 253 localString = QUrl::fromLocalFile(cf.getLocalFilename()).toString(); |
261 // localString = "file://" + cf.getLocalFilename(); //!!! crud - fix! | 254 } |
262 } | 255 |
263 | 256 return SimpleSPARQLQuery::addSourceToModel(localString); |
264 // cerr << "PluginRDFIndexer::indexURL: url = <" << urlString.toStdString() << ">" << endl; | 257 } |
265 /*!!! | 258 |
259 bool | |
260 PluginRDFIndexer::reindex() | |
261 { | |
262 SimpleSPARQLQuery::QueryType m = SimpleSPARQLQuery::QueryFromModel; | |
263 | |
266 SimpleSPARQLQuery query | 264 SimpleSPARQLQuery query |
267 (localString, | 265 (m, |
268 QString | 266 QString |
269 ( | 267 ( |
270 " PREFIX vamp: <http://purl.org/ontology/vamp/> " | 268 " PREFIX vamp: <http://purl.org/ontology/vamp/> " |
271 | 269 |
272 " SELECT ?plugin ?library_id ?plugin_id " | |
273 " FROM <%1> " | |
274 | |
275 " WHERE { " | |
276 " ?plugin a vamp:Plugin . " | |
277 | |
278 // Make the identifier and library parts optional, so | |
279 // that we can check and report helpfully if one or both | |
280 // is absent instead of just getting no results | |
281 | |
282 //!!! No -- because of rasqal's inability to correctly | |
283 // handle more than one OPTIONAL graph in a query, let's | |
284 // make identifier compulsory after all | |
285 //" OPTIONAL { ?plugin vamp:identifier ?plugin_id } . " | |
286 | |
287 " ?plugin vamp:identifier ?plugin_id . " | |
288 | |
289 " OPTIONAL { " | |
290 " ?library a vamp:PluginLibrary ; " | |
291 " vamp:available_plugin ?plugin ; " | |
292 " vamp:identifier ?library_id " | |
293 " } " | |
294 " } " | |
295 ) | |
296 .arg(localString)); | |
297 */ | |
298 SimpleSPARQLQuery query | |
299 (localString, | |
300 QString | |
301 ( | |
302 " PREFIX vamp: <http://purl.org/ontology/vamp/> " | |
303 | |
304 " SELECT ?plugin ?library ?plugin_id " | 270 " SELECT ?plugin ?library ?plugin_id " |
305 " FROM <%1> " | |
306 | 271 |
307 " WHERE { " | 272 " WHERE { " |
308 " ?plugin a vamp:Plugin . " | 273 " ?plugin a vamp:Plugin . " |
309 " ?plugin vamp:identifier ?plugin_id . " | 274 " ?plugin vamp:identifier ?plugin_id . " |
310 | 275 |
311 " OPTIONAL { " | 276 " OPTIONAL { " |
312 " ?library vamp:available_plugin ?plugin " | 277 " ?library vamp:available_plugin ?plugin " |
313 " } " | 278 " } " |
314 " } " | 279 " } " |
315 ) | 280 )); |
316 .arg(localString)); | |
317 | 281 |
318 SimpleSPARQLQuery::ResultList results = query.execute(); | 282 SimpleSPARQLQuery::ResultList results = query.execute(); |
319 | 283 |
320 if (!query.isOK()) { | 284 if (!query.isOK()) { |
321 cerr << "ERROR: PluginRDFIndexer::indexURL: ERROR: Failed to index document at <" | 285 cerr << "ERROR: PluginRDFIndexer::reindex: ERROR: Failed to query plugins from model: " |
322 << urlString.toStdString() << ">: " | |
323 << query.getErrorString().toStdString() << endl; | 286 << query.getErrorString().toStdString() << endl; |
324 return false; | 287 return false; |
325 } | 288 } |
326 | 289 |
327 if (results.empty()) { | 290 if (results.empty()) { |
328 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <" | 291 cerr << "PluginRDFIndexer::reindex: NOTE: no vamp:Plugin resources found in indexed documents" << endl; |
329 << urlString.toStdString() | |
330 << "> does not describe any vamp:Plugin resources" << endl; | |
331 return false; | 292 return false; |
332 } | 293 } |
333 | 294 |
334 bool foundSomething = false; | 295 bool foundSomething = false; |
335 bool addedSomething = false; | 296 bool addedSomething = false; |
336 | 297 |
337 for (SimpleSPARQLQuery::ResultList::iterator i = results.begin(); | 298 for (SimpleSPARQLQuery::ResultList::iterator i = results.begin(); |
338 i != results.end(); ++i) { | 299 i != results.end(); ++i) { |
339 | 300 |
340 QString pluginUri = (*i)["plugin"].value; | 301 QString pluginUri = (*i)["plugin"].value; |
341 //!!! QString soname = (*i)["library_id"].value; | |
342 QString soUri = (*i)["library"].value; | 302 QString soUri = (*i)["library"].value; |
343 QString identifier = (*i)["plugin_id"].value; | 303 QString identifier = (*i)["plugin_id"].value; |
344 | 304 |
345 if (identifier == "") { | 305 if (identifier == "") { |
346 cerr << "PluginRDFIndexer::indexURL: NOTE: No vamp:identifier for plugin <" | 306 cerr << "PluginRDFIndexer::reindex: NOTE: No vamp:identifier for plugin <" |
347 << pluginUri.toStdString() << ">" | 307 << pluginUri.toStdString() << ">" |
348 << endl; | 308 << endl; |
349 continue; | 309 continue; |
350 } | 310 } |
351 if (soUri == "") { | 311 if (soUri == "") { |
352 cerr << "PluginRDFIndexer::indexURL: NOTE: No implementation library for plugin <" | 312 cerr << "PluginRDFIndexer::reindex: NOTE: No implementation library for plugin <" |
353 << pluginUri.toStdString() << ">" | 313 << pluginUri.toStdString() << ">" |
354 << endl; | 314 << endl; |
355 continue; | 315 continue; |
356 } | 316 } |
357 | 317 |
358 QString sonameQuery = | 318 QString sonameQuery = |
359 QString( | 319 QString( |
360 " PREFIX vamp: <http://purl.org/ontology/vamp/> " | 320 " PREFIX vamp: <http://purl.org/ontology/vamp/> " |
361 " SELECT ?library_id " | 321 " SELECT ?library_id " |
362 " FROM <%1> " | |
363 " WHERE { " | 322 " WHERE { " |
364 " <%2> vamp:identifier ?library_id " | 323 " <%1> vamp:identifier ?library_id " |
365 " } " | 324 " } " |
366 ) | 325 ) |
367 .arg(localString) | |
368 .arg(soUri); | 326 .arg(soUri); |
369 | 327 |
370 SimpleSPARQLQuery::Value sonameValue = | 328 SimpleSPARQLQuery::Value sonameValue = |
371 SimpleSPARQLQuery::singleResultQuery(localString, sonameQuery, "library_id"); | 329 SimpleSPARQLQuery::singleResultQuery(m, sonameQuery, "library_id"); |
372 QString soname = sonameValue.value; | 330 QString soname = sonameValue.value; |
373 if (soname == "") { | 331 if (soname == "") { |
374 cerr << "PluginRDFIndexer::indexURL: NOTE: No identifier for library <" | 332 cerr << "PluginRDFIndexer::reindex: NOTE: No identifier for library <" |
375 << soUri.toStdString() << ">" | 333 << soUri.toStdString() << ">" |
376 << endl; | 334 << endl; |
377 continue; | 335 continue; |
378 } | 336 } |
379 | 337 |
380 | |
381 /* | |
382 cerr << "PluginRDFIndexer::indexURL: Document for plugin \"" | |
383 << soname.toStdString() << ":" << identifier.toStdString() | |
384 << "\" (uri <" << pluginUri.toStdString() << ">) is at url <" | |
385 << urlString.toStdString() << ">" << endl; | |
386 */ | |
387 QString pluginId = PluginIdentifier::createIdentifier | 338 QString pluginId = PluginIdentifier::createIdentifier |
388 ("vamp", soname, identifier); | 339 ("vamp", soname, identifier); |
389 | 340 |
390 foundSomething = true; | 341 foundSomething = true; |
391 | 342 |
392 if (m_idToDescriptionMap.find(pluginId) != m_idToDescriptionMap.end()) { | 343 if (m_idToUriMap.find(pluginId) != m_idToUriMap.end()) { |
393 /*!!! | |
394 | |
395 This can happen quite legitimately when using an RDF datastore rather | |
396 than querying individual files, as of course the datastore contains | |
397 all plugin data found so far, and each time a file is added to it, | |
398 subsequent queries will return all older plugins as well. | |
399 | |
400 It would be more efficient to add everything at once and then do all | |
401 queries, of course. | |
402 | |
403 cerr << "PluginRDFIndexer::indexURL: NOTE: Plugin id \"" | |
404 << pluginId.toStdString() << "\", described in document at <" | |
405 << urlString.toStdString() | |
406 << ">, has already been described in document <" | |
407 << m_idToDescriptionMap[pluginId].toStdString() | |
408 << ">: ignoring this new description" << endl; | |
409 */ | |
410 continue; | 344 continue; |
411 } | 345 } |
412 | 346 |
413 m_idToDescriptionMap[pluginId] = urlString; | |
414 m_idToUriMap[pluginId] = pluginUri; | 347 m_idToUriMap[pluginId] = pluginUri; |
415 | 348 |
416 addedSomething = true; | 349 addedSomething = true; |
417 | 350 |
418 if (pluginUri != "") { | 351 if (pluginUri != "") { |
419 if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) { | 352 if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) { |
420 cerr << "PluginRDFIndexer::indexURL: WARNING: Found multiple plugins with the same URI:" << endl; | 353 cerr << "PluginRDFIndexer::reindex: WARNING: Found multiple plugins with the same URI:" << endl; |
421 cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri].toStdString() << "\"" << endl; | 354 cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri].toStdString() << "\"" << endl; |
422 cerr << " described in <" << m_idToDescriptionMap[m_uriToIdMap[pluginUri]].toStdString() << ">" << endl; | |
423 cerr << " 2. Plugin id \"" << pluginId.toStdString() << "\"" << endl; | 355 cerr << " 2. Plugin id \"" << pluginId.toStdString() << "\"" << endl; |
424 cerr << " described in <" << urlString.toStdString() << ">" << endl; | |
425 cerr << "both claim URI <" << pluginUri.toStdString() << ">" << endl; | 356 cerr << "both claim URI <" << pluginUri.toStdString() << ">" << endl; |
426 } else { | 357 } else { |
427 m_uriToIdMap[pluginUri] = pluginId; | 358 m_uriToIdMap[pluginUri] = pluginId; |
428 } | 359 } |
429 } | 360 } |
430 } | 361 } |
431 | 362 |
432 if (!foundSomething) { | 363 if (!foundSomething) { |
433 cerr << "PluginRDFIndexer::indexURL: NOTE: Document at <" | 364 cerr << "PluginRDFIndexer::reindex: NOTE: Plugins found, but none sufficiently described" << endl; |
434 << urlString.toStdString() | |
435 << "> does not sufficiently describe any plugins" << endl; | |
436 } | 365 } |
437 | 366 |
438 return addedSomething; | 367 return addedSomething; |
439 } | 368 } |
440 | 369 |