comparison rdf/SimpleSPARQLQuery.cpp @ 481:a82645e788fc

* Auto-select RDF datastore/parsing backend; use trees datastore if available * Make CachedFile remember whether a file has already been successfully located locally (avoiding system call out to look at filesystem)
author Chris Cannam
date Fri, 14 Nov 2008 10:10:05 +0000
parents 3ffce691c9bf
children 82ab61fa9223
comparison
equal deleted inserted replaced
480:3ffce691c9bf 481:a82645e788fc
17 #include "base/ProgressReporter.h" 17 #include "base/ProgressReporter.h"
18 #include "base/Profiler.h" 18 #include "base/Profiler.h"
19 19
20 #include <QMutex> 20 #include <QMutex>
21 #include <QMutexLocker> 21 #include <QMutexLocker>
22 #include <QRegExp>
22 23
23 #include <set> 24 #include <set>
24 25
25 #ifdef USE_NEW_RASQAL_API 26 #ifdef USE_NEW_RASQAL_API
26 #include <rasqal/rasqal.h> 27 #include <rasqal/rasqal.h>
41 42
42 #ifdef USE_NEW_RASQAL_API 43 #ifdef USE_NEW_RASQAL_API
43 class WrasqalWorldWrapper // wrong but wromantic, etc 44 class WrasqalWorldWrapper // wrong but wromantic, etc
44 { 45 {
45 public: 46 public:
46 WrasqalWorldWrapper() : m_world(rasqal_new_world()) { } 47 WrasqalWorldWrapper() :
47 ~WrasqalWorldWrapper() { rasqal_free_world(m_world); } 48 m_world(0)
49 {
50 m_world = rasqal_new_world();
51 if (!m_world) {
52 cerr << "SimpleSPARQLQuery: ERROR: Failed to create RASQAL world!" << endl;
53 return;
54 }
55 /*!!! This appears to be new for 0.9.17?
56 if (rasqal_world_open(m_world)) {
57 cerr << "SimpleSPARQLQuery: ERROR: Failed to open RASQAL world!" << endl;
58 return;
59 }
60 */
61 }
62 ~WrasqalWorldWrapper()
63 {
64 rasqal_free_world(m_world);
65 }
48 66
49 rasqal_world *getWorld() { return m_world; } 67 rasqal_world *getWorld() { return m_world; }
50 const rasqal_world *getWorld() const { return m_world; } 68 const rasqal_world *getWorld() const { return m_world; }
51 69
52 private: 70 private:
60 public: 78 public:
61 WredlandWorldWrapper() : 79 WredlandWorldWrapper() :
62 m_world(0), m_storage(0), m_model(0) 80 m_world(0), m_storage(0), m_model(0)
63 { 81 {
64 m_world = librdf_new_world(); 82 m_world = librdf_new_world();
83 if (!m_world) {
84 cerr << "SimpleSPARQLQuery: ERROR: Failed to create LIBRDF world!" << endl;
85 return;
86 }
65 librdf_world_open(m_world); 87 librdf_world_open(m_world);
66 m_storage = librdf_new_storage(m_world, NULL, NULL, NULL); 88 m_storage = librdf_new_storage(m_world, "trees", NULL, NULL);
67 // m_storage = librdf_new_storage(m_world, "hashes", NULL,
68 //. "hash-type='memory',indexes=1");
69 if (!m_storage) { 89 if (!m_storage) {
70 std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland hashes datastore, falling back to memory store" << std::endl; 90 std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland trees datastore, falling back to memory store" << std::endl;
71 m_storage = librdf_new_storage(m_world, NULL, NULL, NULL); 91 m_storage = librdf_new_storage(m_world, NULL, NULL, NULL);
72 if (!m_storage) { 92 if (!m_storage) {
73 std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl; 93 std::cerr << "SimpleSPARQLQuery: ERROR: Failed to initialise Redland memory datastore" << std::endl;
74 return; 94 return;
75 } 95 }
163 ResultList execute(); 183 ResultList execute();
164 184
165 bool isOK() const; 185 bool isOK() const;
166 QString getErrorString() const; 186 QString getErrorString() const;
167 187
168 static void setImplementationPreference 188 static void setBackEnd(SimpleSPARQLQuery::BackEndPreference p) {
169 (SimpleSPARQLQuery::ImplementationPreference p) {
170 m_preference = p; 189 m_preference = p;
171 } 190 }
172 191
173 protected: 192 protected:
174 static void errorHandler(void *, raptor_locator *, const char *); 193 static void errorHandler(void *, raptor_locator *, const char *);
183 202
184 #ifdef HAVE_REDLAND 203 #ifdef HAVE_REDLAND
185 static WredlandWorldWrapper *m_redland; 204 static WredlandWorldWrapper *m_redland;
186 #endif 205 #endif
187 206
188 static SimpleSPARQLQuery::ImplementationPreference m_preference; 207 static SimpleSPARQLQuery::BackEndPreference m_preference;
189 208
190 ResultList executeDirectParser(); 209 ResultList executeDirectParser();
191 ResultList executeDatastore(); 210 ResultList executeDatastore();
192 211
193 QString m_fromUri; 212 QString m_fromUri;
207 WredlandWorldWrapper *SimpleSPARQLQuery::Impl::m_redland = 0; 226 WredlandWorldWrapper *SimpleSPARQLQuery::Impl::m_redland = 0;
208 #endif 227 #endif
209 228
210 QMutex SimpleSPARQLQuery::Impl::m_mutex; 229 QMutex SimpleSPARQLQuery::Impl::m_mutex;
211 230
212 SimpleSPARQLQuery::ImplementationPreference 231 SimpleSPARQLQuery::BackEndPreference
213 SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::UseDirectParser; 232 SimpleSPARQLQuery::Impl::m_preference = SimpleSPARQLQuery::AutoSelectBackEnd;
214 233
215 SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) : 234 SimpleSPARQLQuery::SimpleSPARQLQuery(QString fromUri, QString query) :
216 m_impl(new Impl(fromUri, query)) 235 m_impl(new Impl(fromUri, query))
217 { 236 {
218 } 237 }
251 { 270 {
252 return m_impl->getErrorString(); 271 return m_impl->getErrorString();
253 } 272 }
254 273
255 void 274 void
256 SimpleSPARQLQuery::setImplementationPreference(ImplementationPreference p) 275 SimpleSPARQLQuery::setBackEnd(BackEndPreference p)
257 { 276 {
258 SimpleSPARQLQuery::Impl::setImplementationPreference(p); 277 SimpleSPARQLQuery::Impl::setBackEnd(p);
259 } 278 }
260 279
261 SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) : 280 SimpleSPARQLQuery::Impl::Impl(QString fromUri, QString query) :
262 m_fromUri(fromUri), 281 m_fromUri(fromUri),
263 m_query(query), 282 m_query(query),
290 raptor_locator *locator, 309 raptor_locator *locator,
291 const char *message) 310 const char *message)
292 { 311 {
293 SimpleSPARQLQuery::Impl *impl = (SimpleSPARQLQuery::Impl *)data; 312 SimpleSPARQLQuery::Impl *impl = (SimpleSPARQLQuery::Impl *)data;
294 313
295 // char buffer[256]; 314 char buffer[256];
296 // raptor_format_locator(buffer, 255, locator); 315 raptor_format_locator(buffer, 255, locator);
297 // impl->m_errorString = QString("%1 - %2").arg(buffer).arg(message); 316 QString loc(buffer);
298 317 if (loc != "") {
299 impl->m_errorString = message; 318 impl->m_errorString = QString("%1 - %2").arg(loc).arg(message);
319 } else {
320 impl->m_errorString = message;
321 }
300 322
301 cerr << "SimpleSPARQLQuery: ERROR: " << impl->m_errorString.toStdString() << endl; 323 cerr << "SimpleSPARQLQuery: ERROR: " << impl->m_errorString.toStdString() << endl;
302 } 324 }
303 325
304 SimpleSPARQLQuery::ResultList 326 SimpleSPARQLQuery::ResultList
305 SimpleSPARQLQuery::Impl::execute() 327 SimpleSPARQLQuery::Impl::execute()
306 { 328 {
307 ResultList list; 329 ResultList list;
308 330
309 ImplementationPreference preference; 331 BackEndPreference preference;
310 332
311 m_mutex.lock(); 333 m_mutex.lock();
312 334
313 if (m_preference == UseDatastore) { 335 if (m_preference == AutoSelectBackEnd) {
336 #ifdef HAVE_REDLAND
337 // cerr << "librdf version: " << librdf_version_major << "." << librdf_version_minor << "." << librdf_version_release << endl;
338 if (librdf_version_major > 1 ||
339 (librdf_version_major == 1 &&
340 (librdf_version_minor > 0 ||
341 (librdf_version_minor == 0 &&
342 librdf_version_release > 7)))) {
343 cerr << "SimpleSPARQLQuery: Auto-selecting LIBRDF back-end for tree-based storage" << endl;
344 m_preference = DatastoreBackEnd;
345 }
346 #endif
347 if (m_preference == AutoSelectBackEnd) {
348 cerr << "SimpleSPARQLQuery: Auto-selecting RASQAL back-end" << endl;
349 m_preference = DirectParserBackEnd;
350 }
351 }
352
353 if (m_preference == DatastoreBackEnd) {
314 #ifdef HAVE_REDLAND 354 #ifdef HAVE_REDLAND
315 if (!m_redland) { 355 if (!m_redland) {
316 m_redland = new WredlandWorldWrapper(); 356 m_redland = new WredlandWorldWrapper();
317 if (!m_redland->isOK()) { 357 if (!m_redland->isOK()) {
318 cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl; 358 cerr << "WARNING: SimpleSPARQLQuery::execute: Failed to initialise Redland datastore, falling back to direct parser implementation" << endl;
319 delete m_redland; 359 delete m_redland;
320 m_preference = UseDirectParser; 360 m_preference = DirectParserBackEnd;
321 } 361 }
322 } 362 }
323 #else 363 #else
324 cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl; 364 cerr << "WARNING: SimpleSPARQLQuery::execute: Datastore implementation preference indicated, but no datastore compiled in; using direct parser" << endl;
325 m_preference = UseDirectParser; 365 m_preference = DirectParserBackEnd;
326 #endif 366 #endif
327 } 367 }
328 368
329 if (m_preference == UseDirectParser) { 369 if (m_preference == DirectParserBackEnd) {
330 #ifdef USE_NEW_RASQAL_API 370 #ifdef USE_NEW_RASQAL_API
331 if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper(); 371 if (!m_rasqal) m_rasqal = new WrasqalWorldWrapper();
332 #else 372 #else
333 if (!m_rasqalInitialised) { 373 if (!m_rasqalInitialised) {
334 rasqal_init(); 374 rasqal_init();
338 } 378 }
339 379
340 preference = m_preference; 380 preference = m_preference;
341 m_mutex.unlock(); 381 m_mutex.unlock();
342 382
343 if (preference == SimpleSPARQLQuery::UseDirectParser) { 383 if (preference == SimpleSPARQLQuery::DirectParserBackEnd) {
344 return executeDirectParser(); 384 return executeDirectParser();
345 } else { 385 } else {
346 return executeDatastore(); 386 return executeDatastore();
347 } 387 }
348 } 388 }
478 Profiler profiler("SimpleSPARQLQuery::executeDatastore"); 518 Profiler profiler("SimpleSPARQLQuery::executeDatastore");
479 519
480 librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString); 520 librdf_uri *uri = m_redland->getUri(m_fromUri, m_errorString);
481 if (!uri) return list; 521 if (!uri) return list;
482 522
523 #ifdef DEBUG_SIMPLE_SPARQL_QUERY
483 std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl; 524 std::cerr << "SimpleSPARQLQuery: Query is: \"" << m_query.toStdString() << "\"" << std::endl;
525 #endif
526 /*!!!
484 static std::map<QString, int> counter; 527 static std::map<QString, int> counter;
485 if (counter.find(m_query) == counter.end()) counter[m_query] = 1; 528 if (counter.find(m_query) == counter.end()) counter[m_query] = 1;
486 else ++counter[m_query]; 529 else ++counter[m_query];
487 std::cerr << "Counter for this query: " << counter[m_query] << std::endl; 530 std::cerr << "Counter for this query: " << counter[m_query] << std::endl;
531 std::cerr << "Base URI is: \"" << m_fromUri.toStdString() << "\"" << std::endl;
532 */
488 533
489 librdf_query *query; 534 librdf_query *query;
490 535
491 { 536 {
492 Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query"); 537 Profiler p("SimpleSPARQLQuery: Prepare LIBRDF query");
493 query = librdf_new_query 538 query = librdf_new_query
494 (m_redland->getWorld(), "sparql", NULL, 539 (m_redland->getWorld(), "sparql", NULL,
495 (const unsigned char *)m_query.toUtf8().data(), uri); 540 (const unsigned char *)m_query.toUtf8().data(), uri);
496 } 541 }
497 std::cerr << "Prepared" << std::endl;
498 542
499 if (!query) { 543 if (!query) {
500 m_errorString = "Failed to construct query"; 544 m_errorString = "Failed to construct query";
501 return list; 545 return list;
502 } 546 }
504 librdf_query_results *results; 548 librdf_query_results *results;
505 { 549 {
506 Profiler p("SimpleSPARQLQuery: Execute LIBRDF query"); 550 Profiler p("SimpleSPARQLQuery: Execute LIBRDF query");
507 results = librdf_query_execute(query, m_redland->getModel()); 551 results = librdf_query_execute(query, m_redland->getModel());
508 } 552 }
509 std::cerr << "Executed" << std::endl;
510 553
511 if (!results) { 554 if (!results) {
512 cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl; 555 cerr << "SimpleSPARQLQuery: LIBRDF query failed" << endl;
513 librdf_free_query(query); 556 librdf_free_query(query);
514 return list; 557 return list;
545 resultmap[key] = Value(); 588 resultmap[key] = Value();
546 continue; 589 continue;
547 } 590 }
548 591
549 ValueType type = LiteralValue; 592 ValueType type = LiteralValue;
550 if (librdf_node_is_resource(node)) type = URIValue; 593 QString text;
551 else if (librdf_node_is_literal(node)) type = LiteralValue; 594
552 else if (librdf_node_is_blank(node)) type = BlankValue; 595 if (librdf_node_is_resource(node)) {
553 else { 596
597 type = URIValue;
598 librdf_uri *uri = librdf_node_get_uri(node);
599 text = (const char *)librdf_uri_as_string(uri);
600
601 } else if (librdf_node_is_literal(node)) {
602
603 type = LiteralValue;
604 text = (const char *)librdf_node_get_literal_value(node);
605
606 } else if (librdf_node_is_blank(node)) {
607
608 type = BlankValue;
609
610 } else {
611
554 cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl; 612 cerr << "SimpleSPARQLQuery: LIBRDF query returned unknown node type (not resource, literal, or blank)" << endl;
555 resultmap[key] = Value(); 613 }
556 librdf_free_node(node);
557 continue;
558 }
559
560 QString text = (const char *)librdf_node_get_literal_value(node);
561 614
562 #ifdef DEBUG_SIMPLE_SPARQL_QUERY 615 #ifdef DEBUG_SIMPLE_SPARQL_QUERY
563 std::cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << std::endl; 616 cerr << i << ". " << key.toStdString() << " -> " << text.toStdString() << " (type " << type << ")" << endl;
564 #endif 617 #endif
565 618
566 resultmap[key] = Value(type, text); 619 resultmap[key] = Value(type, text);
567 620
568 librdf_free_node(node); 621 librdf_free_node(node);
590 } 643 }
591 644
592 librdf_free_query_results(results); 645 librdf_free_query_results(results);
593 librdf_free_query(query); 646 librdf_free_query(query);
594 647
595 std::cerr << "All results retrieved" << std::endl; 648 #ifdef DEBUG_SIMPLE_SPARQL_QUERY
649 cerr << "All results retrieved (" << resultCount << " of them)" << endl;
650 #endif
596 651
597 return list; 652 return list;
598 #endif 653 #endif
599 } 654 }
600 655