comparison rdf/RDFImporter.cpp @ 726:1bfb40549003 dataquay

Convert RDFImporter to Dataquay
author Chris Cannam
date Fri, 18 May 2012 21:06:45 +0100
parents 1424aa29ae95
children 211efc770335
comparison
equal deleted inserted replaced
725:c789deb83bd4 726:1bfb40549003
19 #include <vector> 19 #include <vector>
20 20
21 #include <iostream> 21 #include <iostream>
22 #include <cmath> 22 #include <cmath>
23 23
24 #include "SimpleSPARQLQuery.h"
25
26 #include "base/ProgressReporter.h" 24 #include "base/ProgressReporter.h"
27 #include "base/RealTime.h" 25 #include "base/RealTime.h"
28 26
29 #include "data/model/SparseOneDimensionalModel.h" 27 #include "data/model/SparseOneDimensionalModel.h"
30 #include "data/model/SparseTimeValueModel.h" 28 #include "data/model/SparseTimeValueModel.h"
36 34
37 #include "data/fileio/FileSource.h" 35 #include "data/fileio/FileSource.h"
38 #include "data/fileio/CachedFile.h" 36 #include "data/fileio/CachedFile.h"
39 #include "data/fileio/FileFinder.h" 37 #include "data/fileio/FileFinder.h"
40 38
39 #include <dataquay/BasicStore.h>
40 #include <dataquay/PropertyObject.h>
41
42 using Dataquay::Uri;
43 using Dataquay::Node;
44 using Dataquay::Nodes;
45 using Dataquay::Triple;
46 using Dataquay::Triples;
47 using Dataquay::BasicStore;
48 using Dataquay::PropertyObject;
49
41 using std::cerr; 50 using std::cerr;
42 using std::endl; 51 using std::endl;
43 52
44 class RDFImporterImpl 53 class RDFImporterImpl
45 { 54 {
53 QString getErrorString() const; 62 QString getErrorString() const;
54 63
55 std::vector<Model *> getDataModels(ProgressReporter *); 64 std::vector<Model *> getDataModels(ProgressReporter *);
56 65
57 protected: 66 protected:
67 BasicStore *m_store;
68
58 QString m_uristring; 69 QString m_uristring;
59 QString m_errorString; 70 QString m_errorString;
60 std::map<QString, Model *> m_audioModelMap; 71 std::map<QString, Model *> m_audioModelMap;
61 int m_sampleRate; 72 int m_sampleRate;
62 73
63 std::map<Model *, std::map<QString, float> > m_labelValueMap; 74 std::map<Model *, std::map<QString, float> > m_labelValueMap;
64 75
65 static bool m_prefixesLoaded;
66 static void loadPrefixes(ProgressReporter *reporter);
67
68 void getDataModelsAudio(std::vector<Model *> &, ProgressReporter *); 76 void getDataModelsAudio(std::vector<Model *> &, ProgressReporter *);
69 void getDataModelsSparse(std::vector<Model *> &, ProgressReporter *); 77 void getDataModelsSparse(std::vector<Model *> &, ProgressReporter *);
70 void getDataModelsDense(std::vector<Model *> &, ProgressReporter *); 78 void getDataModelsDense(std::vector<Model *> &, ProgressReporter *);
71 79
72 void getDenseModelTitle(Model *, QString, QString); 80 void getDenseModelTitle(Model *, QString, QString);
76 int &hopSize, int &width, int &height); 84 int &hopSize, int &width, int &height);
77 85
78 void fillModel(Model *, long, long, bool, std::vector<float> &, QString); 86 void fillModel(Model *, long, long, bool, std::vector<float> &, QString);
79 }; 87 };
80 88
81 bool RDFImporterImpl::m_prefixesLoaded = false;
82
83 QString 89 QString
84 RDFImporter::getKnownExtensions() 90 RDFImporter::getKnownExtensions()
85 { 91 {
86 return "*.rdf *.n3 *.ttl"; 92 return "*.rdf *.n3 *.ttl";
87 } 93 }
119 { 125 {
120 return m_d->getDataModels(r); 126 return m_d->getDataModels(r);
121 } 127 }
122 128
123 RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) : 129 RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) :
130 m_store(new BasicStore),
124 m_uristring(uri), 131 m_uristring(uri),
125 m_sampleRate(sampleRate) 132 m_sampleRate(sampleRate)
126 { 133 {
134 //!!! retrieve data if remote... then
135
136 m_store->addPrefix("mo", Uri("http://purl.org/ontology/mo/"));
137 m_store->addPrefix("af", Uri("http://purl.org/ontology/af/"));
138 m_store->addPrefix("dc", Uri("http://purl.org/dc/elements/1.1/"));
139 m_store->addPrefix("tl", Uri("http://purl.org/NET/c4dm/timeline.owl#"));
140 m_store->addPrefix("event", Uri("http://purl.org/NET/c4dm/event.owl#"));
141 m_store->addPrefix("rdfs", Uri("http://www.w3.org/2000/01/rdf-schema#"));
142 m_store->import(QUrl::fromLocalFile(uri), BasicStore::ImportIgnoreDuplicates);
127 } 143 }
128 144
129 RDFImporterImpl::~RDFImporterImpl() 145 RDFImporterImpl::~RDFImporterImpl()
130 { 146 {
131 SimpleSPARQLQuery::closeSingleSource(m_uristring); 147 delete m_store;
132 } 148 }
133 149
134 bool 150 bool
135 RDFImporterImpl::isOK() 151 RDFImporterImpl::isOK()
136 { 152 {
144 } 160 }
145 161
146 std::vector<Model *> 162 std::vector<Model *>
147 RDFImporterImpl::getDataModels(ProgressReporter *reporter) 163 RDFImporterImpl::getDataModels(ProgressReporter *reporter)
148 { 164 {
149 loadPrefixes(reporter);
150
151 std::vector<Model *> models; 165 std::vector<Model *> models;
152 166
153 getDataModelsAudio(models, reporter); 167 getDataModelsAudio(models, reporter);
154 168
155 if (m_sampleRate == 0) { 169 if (m_sampleRate == 0) {
183 197
184 void 198 void
185 RDFImporterImpl::getDataModelsAudio(std::vector<Model *> &models, 199 RDFImporterImpl::getDataModelsAudio(std::vector<Model *> &models,
186 ProgressReporter *reporter) 200 ProgressReporter *reporter)
187 { 201 {
188 SimpleSPARQLQuery query 202 Nodes sigs = m_store->match
189 (SimpleSPARQLQuery::QueryFromSingleSource, 203 (Triple(Node(), "a", m_store->expand("mo:Signal"))).a();
190 QString 204
191 ( 205 foreach (Node sig, sigs) {
192 " PREFIX mo: <http://purl.org/ontology/mo/> "
193 " SELECT ?signal ?source FROM <%1> "
194 " WHERE { ?source a mo:AudioFile . "
195 " ?signal a mo:Signal . "
196 " ?source mo:encodes ?signal } "
197 )
198 .arg(m_uristring));
199
200 SimpleSPARQLQuery::ResultList results = query.execute();
201
202 if (results.empty()) {
203
204 SimpleSPARQLQuery query2
205 (SimpleSPARQLQuery::QueryFromSingleSource,
206 QString
207 (
208 " PREFIX mo: <http://purl.org/ontology/mo/> "
209 " SELECT ?signal ?source FROM <%1> "
210 " WHERE { ?signal a mo:Signal ; mo:available_as ?source } "
211 )
212 .arg(m_uristring));
213 206
214 results = query.execute(); 207 Node file = m_store->matchFirst(Triple(Node(), "mo:encodes", sig)).a;
215 } 208 if (file == Node()) {
216 209 file = m_store->matchFirst(Triple(sig, "mo:available_as", Node())).c;
217 for (int i = 0; i < (int)results.size(); ++i) { 210 }
218 211 if (file == Node()) {
219 QString signal = results[i]["signal"].value; 212 std::cerr << "RDFImporterImpl::getDataModelsAudio: ERROR: No source for signal " << sig << std::endl;
220 QString source = results[i]["source"].value; 213 continue;
221 214 }
222 SVDEBUG << "NOTE: Seeking signal source \"" << source << "\"..." << endl; 215
216 QString signal = sig.value;
217 QString source = file.value;
218
219 SVDEBUG << "NOTE: Seeking signal source \"" << source
220 << "\"..." << endl;
223 221
224 FileSource *fs = new FileSource(source, reporter); 222 FileSource *fs = new FileSource(source, reporter);
225 if (fs->isAvailable()) { 223 if (fs->isAvailable()) {
226 SVDEBUG << "NOTE: Source is available: Local filename is \"" 224 SVDEBUG << "NOTE: Source is available: Local filename is \""
227 << fs->getLocalFilename() << "\"..." << endl; 225 << fs->getLocalFilename()
226 << "\"..." << endl;
228 } 227 }
229 228
230 #ifdef NO_SV_GUI 229 #ifdef NO_SV_GUI
231 if (!fs->isAvailable()) { 230 if (!fs->isAvailable()) {
232 m_errorString = QString("Signal source \"%1\" is not available").arg(source); 231 m_errorString = QString("Signal source \"%1\" is not available").arg(source);
233 delete fs; 232 delete fs;
234 continue; 233 continue;
235 } 234 }
236 #else 235 #else
237 if (!fs->isAvailable()) { 236 if (!fs->isAvailable()) {
238 SVDEBUG << "NOTE: Signal source \"" << source << "\" is not available, using file finder..." << endl; 237 SVDEBUG << "NOTE: Signal source \"" << source
238 << "\" is not available, using file finder..." << endl;
239 FileFinder *ff = FileFinder::getInstance(); 239 FileFinder *ff = FileFinder::getInstance();
240 if (ff) { 240 if (ff) {
241 QString path = ff->find(FileFinder::AudioFile, 241 QString path = ff->find(FileFinder::AudioFile,
242 fs->getLocation(), 242 fs->getLocation(),
243 m_uristring); 243 m_uristring);
282 { 282 {
283 if (reporter) { 283 if (reporter) {
284 reporter->setMessage(RDFImporter::tr("Importing dense signal data from RDF...")); 284 reporter->setMessage(RDFImporter::tr("Importing dense signal data from RDF..."));
285 } 285 }
286 286
287 SimpleSPARQLQuery query 287 Nodes sigFeatures = m_store->match
288 (SimpleSPARQLQuery::QueryFromSingleSource, 288 (Triple(Node(), "af:signal_feature", Node())).c();
289 QString 289
290 ( 290 foreach (Node sf, sigFeatures) {
291 " PREFIX mo: <http://purl.org/ontology/mo/>" 291
292 " PREFIX af: <http://purl.org/ontology/af/>" 292 if (sf.type != Node::URI && sf.type != Node::Blank) continue;
293 293
294 " SELECT ?feature ?feature_signal_type ?value " 294 Node t = m_store->matchFirst(Triple(sf, "a", Node())).c;
295 " FROM <%1> " 295 Node v = m_store->matchFirst(Triple(sf, "af:value", Node())).c;
296 296
297 " WHERE { " 297 QString feature = sf.value;
298 298 QString type = t.value;
299 " ?signal af:signal_feature ?feature . " 299 QString value = v.value;
300 300
301 " ?feature a ?feature_signal_type ; " 301 if (type == "" || value == "") continue;
302 " af:value ?value . "
303
304 " } "
305 )
306 .arg(m_uristring));
307
308 SimpleSPARQLQuery::ResultList results = query.execute();
309
310 if (!query.isOK()) {
311 m_errorString = query.getErrorString();
312 return;
313 }
314
315 if (query.wasCancelled()) {
316 m_errorString = "Query cancelled";
317 return;
318 }
319
320 for (int i = 0; i < (int)results.size(); ++i) {
321
322 QString feature = results[i]["feature"].value;
323 QString type = results[i]["feature_signal_type"].value;
324 QString value = results[i]["value"].value;
325 302
326 int sampleRate = 0; 303 int sampleRate = 0;
327 int windowLength = 0; 304 int windowLength = 0;
328 int hopSize = 0; 305 int hopSize = 0;
329 int width = 0; 306 int width = 0;
405 void 382 void
406 RDFImporterImpl::getDenseModelTitle(Model *m, 383 RDFImporterImpl::getDenseModelTitle(Model *m,
407 QString featureUri, 384 QString featureUri,
408 QString featureTypeUri) 385 QString featureTypeUri)
409 { 386 {
410 QString titleQuery = QString 387 Node n = m_store->matchFirst
411 ( 388 (Triple(Uri(featureUri), "dc:title", Node())).c;
412 " PREFIX dc: <http://purl.org/dc/elements/1.1/> " 389
413 " SELECT ?title " 390 if (n.type == Node::Literal && n.value != "") {
414 " FROM <%1> " 391 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: Title (from signal) \"" << n.value << "\"" << endl;
415 " WHERE { " 392 m->setObjectName(n.value);
416 " <%2> dc:title ?title . "
417 " } "
418 ).arg(m_uristring);
419
420 SimpleSPARQLQuery::Value v;
421
422 v = SimpleSPARQLQuery::singleResultQuery
423 (SimpleSPARQLQuery::QueryFromSingleSource,
424 titleQuery.arg(featureUri),
425 "title");
426
427 if (v.value != "") {
428 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: Title (from signal) \"" << v.value << "\"" << endl;
429 m->setObjectName(v.value);
430 return; 393 return;
431 } 394 }
432 395
433 v = SimpleSPARQLQuery::singleResultQuery 396 n = m_store->matchFirst
434 (SimpleSPARQLQuery::QueryFromSingleSource, 397 (Triple(Uri(featureTypeUri), "dc:title", Node())).c;
435 titleQuery.arg(featureTypeUri), 398
436 "title"); 399 if (n.type == Node::Literal && n.value != "") {
437 400 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: Title (from signal type) \"" << n.value << "\"" << endl;
438 if (v.value != "") { 401 m->setObjectName(n.value);
439 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: Title (from signal type) \"" << v.value << "\"" << endl;
440 m->setObjectName(v.value);
441 return; 402 return;
442 } 403 }
443 404
444 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: No title available for feature <" << featureUri << ">" << endl; 405 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: No title available for feature <" << featureUri << ">" << endl;
445 } 406 }
447 void 408 void
448 RDFImporterImpl::getDenseFeatureProperties(QString featureUri, 409 RDFImporterImpl::getDenseFeatureProperties(QString featureUri,
449 int &sampleRate, int &windowLength, 410 int &sampleRate, int &windowLength,
450 int &hopSize, int &width, int &height) 411 int &hopSize, int &width, int &height)
451 { 412 {
452 SimpleSPARQLQuery::QueryType s = SimpleSPARQLQuery::QueryFromSingleSource; 413 Node dim = m_store->matchFirst
453 414 (Triple(Uri(featureUri), "af:dimensions", Node())).c;
454 QString dimensionsQuery 415
455 ( 416 cerr << "Dimensions = \"" << dim.value << "\"" << endl;
456 " PREFIX mo: <http://purl.org/ontology/mo/>" 417
457 " PREFIX af: <http://purl.org/ontology/af/>" 418 if (dim.type == Node::Literal && dim.value != "") {
458 419 QStringList dl = dim.value.split(" ");
459 " SELECT ?dimensions " 420 if (dl.empty()) dl.push_back(dim.value);
460 " FROM <%1> "
461
462 " WHERE { "
463
464 " <%2> af:dimensions ?dimensions . "
465
466 " } "
467 );
468
469 SimpleSPARQLQuery::Value dimensionsValue =
470 SimpleSPARQLQuery::singleResultQuery
471 (s, dimensionsQuery.arg(m_uristring).arg(featureUri), "dimensions");
472
473 cerr << "Dimensions = \"" << dimensionsValue.value << "\""
474 << endl;
475
476 if (dimensionsValue.value != "") {
477 QStringList dl = dimensionsValue.value.split(" ");
478 if (dl.empty()) dl.push_back(dimensionsValue.value);
479 if (dl.size() > 0) height = dl[0].toInt(); 421 if (dl.size() > 0) height = dl[0].toInt();
480 if (dl.size() > 1) width = dl[1].toInt(); 422 if (dl.size() > 1) width = dl[1].toInt();
481 } 423 }
482 424
483 QString queryTemplate 425 // Looking for rate, hop, window from:
484 ( 426 //
485 " PREFIX mo: <http://purl.org/ontology/mo/>" 427 // ?feature mo:time ?time .
486 " PREFIX af: <http://purl.org/ontology/af/>" 428 // ?time a tl:Interval .
487 " PREFIX tl: <http://purl.org/NET/c4dm/timeline.owl#>" 429 // ?time tl:onTimeLine ?timeline .
488 430 // ?map tl:rangeTimeLine ?timeline .
489 " SELECT ?%3 " 431 // ?map tl:sampleRate ?rate .
490 " FROM <%1> " 432 // ?map tl:hopSize ?hop .
491 433 // ?map tl:windowLength ?window .
492 " WHERE { " 434
493 435 Node interval = m_store->matchFirst(Triple(Uri(featureUri), "mo:time", Node())).c;
494 " <%2> mo:time ?time . " 436
495 437 if (!m_store->contains(Triple(interval, "a", m_store->expand("tl:Interval")))) {
496 " ?time a tl:Interval ; " 438 cerr << "RDFImporterImpl::getDenseFeatureProperties: Feature time node "
497 " tl:onTimeLine ?timeline . " 439 << interval << " is not a tl:Interval" << endl;
498 440 return;
499 " ?map tl:rangeTimeLine ?timeline . " 441 }
500 442
501 " ?map tl:%3 ?%3 . " 443 Node tl = m_store->matchFirst(Triple(interval, "tl:onTimeLine", Node())).c;
502 444
503 " } " 445 if (tl == Node()) {
504 ); 446 cerr << "RDFImporterImpl::getDenseFeatureProperties: Interval node "
505 447 << interval << " lacks tl:onTimeLine property" << endl;
506 // Another laborious workaround for rasqal's failure to handle 448 return;
507 // multiple optionals properly 449 }
508 450
509 SimpleSPARQLQuery::Value srValue = 451 Node map = m_store->matchFirst(Triple(Node(), "tl:rangeTimeLine", tl)).a;
510 SimpleSPARQLQuery::singleResultQuery(s, 452
511 queryTemplate 453 if (map == Node()) {
512 .arg(m_uristring).arg(featureUri) 454 cerr << "RDFImporterImpl::getDenseFeatureProperties: No map for "
513 .arg("sampleRate"), 455 << "timeline node " << tl << endl;
514 "sampleRate"); 456 }
515 if (srValue.value != "") { 457
516 sampleRate = srValue.value.toInt(); 458 PropertyObject po(m_store, "tl:", map);
517 } 459
518 460 if (po.hasProperty("sampleRate")) {
519 SimpleSPARQLQuery::Value hopValue = 461 sampleRate = po.getProperty("sampleRate").toInt();
520 SimpleSPARQLQuery::singleResultQuery(s, 462 }
521 queryTemplate 463 if (po.hasProperty("hopSize")) {
522 .arg(m_uristring).arg(featureUri) 464 hopSize = po.getProperty("hopSize").toInt();
523 .arg("hopSize"), 465 }
524 "hopSize"); 466 if (po.hasProperty("windowLength")) {
525 if (srValue.value != "") { 467 windowLength = po.getProperty("windowLength").toInt();
526 hopSize = hopValue.value.toInt();
527 }
528
529 SimpleSPARQLQuery::Value winValue =
530 SimpleSPARQLQuery::singleResultQuery(s,
531 queryTemplate
532 .arg(m_uristring).arg(featureUri)
533 .arg("windowLength"),
534 "windowLength");
535 if (winValue.value != "") {
536 windowLength = winValue.value.toInt();
537 } 468 }
538 469
539 cerr << "sr = " << sampleRate << ", hop = " << hopSize << ", win = " << windowLength << endl; 470 cerr << "sr = " << sampleRate << ", hop = " << hopSize << ", win = " << windowLength << endl;
540 } 471 }
541 472
545 { 476 {
546 if (reporter) { 477 if (reporter) {
547 reporter->setMessage(RDFImporter::tr("Importing event data from RDF...")); 478 reporter->setMessage(RDFImporter::tr("Importing event data from RDF..."));
548 } 479 }
549 480
550 SimpleSPARQLQuery::QueryType s = SimpleSPARQLQuery::QueryFromSingleSource;
551
552 // Our query is intended to retrieve every thing that has a time,
553 // and every feature type and value associated with a thing that
554 // has a time.
555
556 // We will then need to refine this big bag of results into a set
557 // of data models.
558
559 // Results that have different source signals should go into
560 // different models.
561
562 // Results that have different feature types should go into
563 // different models.
564
565 // Results that are sparse should go into different models from
566 // those that are dense (we need to examine the timestamps to
567 // establish this -- if the timestamps are regular, the results
568 // are dense -- so we can't do it as we go along, only after
569 // collecting all results).
570
571 // Timed things that have features associated with them should not
572 // appear directly in any model -- their features should appear
573 // instead -- and these should be different models from those used
574 // for timed things that do not have features.
575
576 // As we load the results, we'll push them into a partially
577 // structured container that maps from source signal (URI as
578 // string) -> feature type (likewise) -> time -> list of values.
579 // If the source signal or feature type is unavailable, the empty
580 // string will do.
581
582 QString prefixes = QString(
583 " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
584 " PREFIX tl: <http://purl.org/NET/c4dm/timeline.owl#>"
585 " PREFIX mo: <http://purl.org/ontology/mo/>"
586 " PREFIX af: <http://purl.org/ontology/af/>"
587 " PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"
588 );
589
590 QString queryString = prefixes + QString(
591
592 " SELECT ?signal ?timed_thing ?timeline ?event_type ?value"
593 " FROM <%1>"
594
595 " WHERE {"
596
597 " ?signal a mo:Signal ."
598
599 " ?signal mo:time ?interval ."
600 " ?interval tl:onTimeLine ?timeline ."
601 " ?time tl:onTimeLine ?timeline ."
602 " ?timed_thing event:time ?time ."
603 " ?timed_thing a ?event_type ."
604
605 " OPTIONAL {"
606 " ?timed_thing af:feature ?value"
607 " }"
608 " }"
609
610 ).arg(m_uristring);
611
612 //!!! NB we're using rather old terminology for these things, apparently:
613 // beginsAt -> start
614 // onTimeLine -> timeline
615
616 QString timeQueryString = prefixes + QString(
617
618 " SELECT ?time FROM <%1> "
619 " WHERE { "
620 " <%2> event:time ?t . "
621 " ?t tl:at ?time . "
622 " } "
623
624 ).arg(m_uristring);
625
626 QString rangeQueryString = prefixes + QString(
627
628 " SELECT ?time ?duration FROM <%1> "
629 " WHERE { "
630 " <%2> event:time ?t . "
631 " ?t tl:beginsAt ?time . "
632 " ?t tl:duration ?duration . "
633 " } "
634
635 ).arg(m_uristring);
636
637 QString labelQueryString = prefixes + QString(
638
639 " SELECT ?label FROM <%1> "
640 " WHERE { "
641 " <%2> rdfs:label ?label . "
642 " } "
643
644 ).arg(m_uristring);
645
646 QString textQueryString = prefixes + QString(
647
648 " SELECT ?label FROM <%1> "
649 " WHERE { "
650 " <%2> af:text ?label . "
651 " } "
652
653 ).arg(m_uristring);
654
655 SimpleSPARQLQuery query(s, queryString);
656 query.setProgressReporter(reporter);
657
658 // cerr << "Query will be: " << queryString << endl;
659
660 SimpleSPARQLQuery::ResultList results = query.execute();
661
662 if (!query.isOK()) {
663 m_errorString = query.getErrorString();
664 return;
665 }
666
667 if (query.wasCancelled()) {
668 m_errorString = "Query cancelled";
669 return;
670 }
671
672 /* 481 /*
673 This function is now only used for sparse data (for dense data 482 This function is only used for sparse data (for dense data we
674 we would be in getDataModelsDense instead). 483 would be in getDataModelsDense instead).
675 484
676 For sparse data, the determining factors in deciding what model 485 Our query is intended to retrieve every thing that has a time,
677 to use are: Do the features have values? and Do the features 486 and every feature type and value associated with a thing that
678 have duration? 487 has a time.
679 488
680 We can run through the results and check off whether we find 489 We will then need to refine this big bag of results into a set
681 values and duration for each of the source+type keys, and then 490 of data models.
682 run through the source+type keys pushing each of the results 491
683 into a suitable model. 492 Results that have different source signals should go into
684 493 different models.
685 Unfortunately, at this point we do not yet have any actual 494
686 timing data (time/duration) -- just the time URI. 495 Results that have different feature types should go into
687 496 different models.
688 What we _could_ do is to create one of each type of model at the
689 start, for each of the source+type keys, and then push each
690 feature into the relevant model depending on what we find out
691 about it. Then return only non-empty models.
692 */ 497 */
498
499 Nodes sigs = m_store->match
500 (Triple(Node(), "a", m_store->expand("mo:Signal"))).a();
693 501
694 // Map from timeline uri to event type to dimensionality to 502 // Map from timeline uri to event type to dimensionality to
695 // presence of duration to model ptr. Whee! 503 // presence of duration to model ptr. Whee!
696 std::map<QString, std::map<QString, std::map<int, std::map<bool, Model *> > > > 504 std::map<QString, std::map<QString, std::map<int, std::map<bool, Model *> > > >
697 modelMap; 505 modelMap;
698 506
699 for (int i = 0; i < (int)results.size(); ++i) { 507 foreach (Node sig, sigs) {
700
701 if (i % 4 == 0) {
702 if (reporter) reporter->setProgress(i/4);
703 }
704
705 QString source = results[i]["signal"].value;
706 QString timeline = results[i]["timeline"].value;
707 QString type = results[i]["event_type"].value;
708 QString thinguri = results[i]["timed_thing"].value;
709 508
710 RealTime time; 509 Node interval = m_store->matchFirst(Triple(sig, "mo:time", Node())).c;
711 RealTime duration; 510 if (interval == Node()) continue;
712 511
713 bool haveTime = false; 512 Node tl = m_store->matchFirst(Triple(interval, "tl:onTimeLine", Node())).c;
714 bool haveDuration = false; 513 if (tl == Node()) continue;
715 514
716 QString label = ""; 515 Nodes times = m_store->match(Triple(Node(), "tl:onTimeLine", tl)).a();
717 bool text = (type.contains("Text") || type.contains("text")); // Ha, ha 516
718 bool note = (type.contains("Note") || type.contains("note")); // Guffaw 517 foreach (Node tn, times) {
719 518
720 if (text) { 519 Nodes timedThings = m_store->match(Triple(Node(), "event:time", tn)).a();
721 label = SimpleSPARQLQuery::singleResultQuery 520
722 (s, textQueryString.arg(thinguri), "label").value; 521 foreach (Node thing, timedThings) {
723 } 522
724 523 Node typ = m_store->matchFirst(Triple(thing, "a", Node())).c;
725 if (label == "") { 524 if (typ == Node()) continue;
726 label = SimpleSPARQLQuery::singleResultQuery 525
727 (s, labelQueryString.arg(thinguri), "label").value; 526 Node valu = m_store->matchFirst(Triple(thing, "af:feature", Node())).c;
728 } 527
729 528 QString source = sig.value;
730 SimpleSPARQLQuery rangeQuery(s, rangeQueryString.arg(thinguri)); 529 QString timeline = tl.value;
731 SimpleSPARQLQuery::ResultList rangeResults = rangeQuery.execute(); 530 QString type = typ.value;
732 if (!rangeResults.empty()) { 531 QString thinguri = thing.value;
733 // std::cerr << rangeResults.size() << " range results" << std::endl; 532
734 time = RealTime::fromXsdDuration 533 /*
735 (rangeResults[0]["time"].value.toStdString()); 534 For sparse data, the determining factors in deciding
736 duration = RealTime::fromXsdDuration 535 what model to use are: Do the features have values?
737 (rangeResults[0]["duration"].value.toStdString()); 536 and Do the features have duration?
738 // std::cerr << "duration string " << rangeResults[0]["duration"].value << std::endl; 537
739 haveTime = true; 538 We can run through the results and check off whether
740 haveDuration = true; 539 we find values and duration for each of the
741 } else { 540 source+type keys, and then run through the
742 QString timestring = SimpleSPARQLQuery::singleResultQuery 541 source+type keys pushing each of the results into a
743 (s, timeQueryString.arg(thinguri), "time").value; 542 suitable model.
744 // SVDEBUG << "timestring = " << timestring << endl; 543
745 if (timestring != "") { 544 Unfortunately, at this point we do not yet have any
746 time = RealTime::fromXsdDuration(timestring.toStdString()); 545 actual timing data (time/duration) -- just the time
747 haveTime = true; 546 URI.
748 } 547
749 } 548 What we _could_ do is to create one of each type of
750 549 model at the start, for each of the source+type
751 QString valuestring = results[i]["value"].value; 550 keys, and then push each feature into the relevant
752 std::vector<float> values; 551 model depending on what we find out about it. Then
753 552 return only non-empty models.
754 if (valuestring != "") { 553 */
755 QStringList vsl = valuestring.split(" ", QString::SkipEmptyParts); 554
756 for (int j = 0; j < vsl.size(); ++j) { 555 QString label = "";
757 bool success = false; 556 bool text = (type.contains("Text") || type.contains("text")); // Ha, ha
758 float v = vsl[j].toFloat(&success); 557 bool note = (type.contains("Note") || type.contains("note")); // Guffaw
759 if (success) values.push_back(v); 558
760 } 559 if (text) {
761 } 560 label = m_store->matchFirst(Triple(thing, "af:text", Node())).c.value;
762 561 }
763 int dimensions = 1; 562
764 if (values.size() == 1) dimensions = 2; 563 if (label == "") {
765 else if (values.size() > 1) dimensions = 3; 564 label = m_store->matchFirst(Triple(thing, "rdfs:label", Node())).c.value;
766 565 }
767 Model *model = 0; 566
768 567 RealTime time;
769 if (modelMap[timeline][type][dimensions].find(haveDuration) == 568 RealTime duration;
770 modelMap[timeline][type][dimensions].end()) { 569
570 bool haveTime = false;
571 bool haveDuration = false;
572
573 Node at = m_store->matchFirst(Triple(tn, "tl:at", Node())).c;
574
575 if (at != Node()) {
576 time = RealTime::fromXsdDuration(at.value.toStdString());
577 haveTime = true;
578 } else {
579 //!!! NB we're using rather old terminology for these things, apparently:
580 // beginsAt -> start
581 // onTimeLine -> timeline
582
583 Node start = m_store->matchFirst(Triple(tn, "tl:beginsAt", Node())).c;
584 Node dur = m_store->matchFirst(Triple(tn, "tl:duration", Node())).c;
585 if (start != Node() && dur != Node()) {
586 time = RealTime::fromXsdDuration
587 (start.value.toStdString());
588 duration = RealTime::fromXsdDuration
589 (dur.value.toStdString());
590 haveTime = haveDuration = true;
591 }
592 }
593
594 QString valuestring = valu.value;
595 std::vector<float> values;
596
597 if (valuestring != "") {
598 QStringList vsl = valuestring.split(" ", QString::SkipEmptyParts);
599 for (int j = 0; j < vsl.size(); ++j) {
600 bool success = false;
601 float v = vsl[j].toFloat(&success);
602 if (success) values.push_back(v);
603 }
604 }
605
606 int dimensions = 1;
607 if (values.size() == 1) dimensions = 2;
608 else if (values.size() > 1) dimensions = 3;
609
610 Model *model = 0;
611
612 if (modelMap[timeline][type][dimensions].find(haveDuration) ==
613 modelMap[timeline][type][dimensions].end()) {
771 614
772 /* 615 /*
773 SVDEBUG << "Creating new model: source = " << source << ", type = " << type << ", dimensions = " 616 SVDEBUG << "Creating new model: source = " << source << ", type = " << type << ", dimensions = "
774 << dimensions << ", haveDuration = " << haveDuration 617 << dimensions << ", haveDuration = " << haveDuration
775 << ", time = " << time << ", duration = " << duration 618 << ", time = " << time << ", duration = " << duration
776 << endl; 619 << endl;
777 */ 620 */
778 621
779 if (!haveDuration) { 622 if (!haveDuration) {
780 623
781 if (dimensions == 1) { 624 if (dimensions == 1) {
782 625 if (text) {
783 if (text) { 626 model = new TextModel(m_sampleRate, 1, false);
784 627 } else {
785 model = new TextModel(m_sampleRate, 1, false); 628 model = new SparseOneDimensionalModel(m_sampleRate, 1, false);
786 629 }
787 } else { 630 } else if (dimensions == 2) {
788 631 if (text) {
789 model = new SparseOneDimensionalModel(m_sampleRate, 1, false); 632 model = new TextModel(m_sampleRate, 1, false);
633 } else {
634 model = new SparseTimeValueModel(m_sampleRate, 1, false);
635 }
636 } else {
637 // We don't have a three-dimensional sparse model,
638 // so use a note model. We do have some logic (in
639 // extractStructure below) for guessing whether
640 // this should after all have been a dense model,
641 // but it's hard to apply it because we don't have
642 // all the necessary timing data yet... hmm
643 model = new NoteModel(m_sampleRate, 1, false);
644 }
645
646 } else { // haveDuration
647
648 if (note || (dimensions > 2)) {
649 model = new NoteModel(m_sampleRate, 1, false);
650 } else {
651 // If our units are frequency or midi pitch, we
652 // should be using a note model... hm
653 model = new RegionModel(m_sampleRate, 1, false);
654 }
790 } 655 }
791 656
792 } else if (dimensions == 2) { 657 model->setRDFTypeURI(type);
793 658
794 if (text) { 659 if (m_audioModelMap.find(source) != m_audioModelMap.end()) {
795 660 std::cerr << "source model for " << model << " is " << m_audioModelMap[source] << std::endl;
796 model = new TextModel(m_sampleRate, 1, false); 661 model->setSourceModel(m_audioModelMap[source]);
797
798 } else {
799
800 model = new SparseTimeValueModel(m_sampleRate, 1, false);
801 } 662 }
802 663
803 } else { 664 QString title = m_store->matchFirst
804 665 (Triple(typ, "dc:title", Node())).a.value;
805 // We don't have a three-dimensional sparse model, 666 if (title == "") {
806 // so use a note model. We do have some logic (in 667 // take it from the end of the event type
807 // extractStructure below) for guessing whether 668 title = type;
808 // this should after all have been a dense model, 669 title.replace(QRegExp("^.*[/#]"), "");
809 // but it's hard to apply it because we don't have 670 }
810 // all the necessary timing data yet... hmm 671 model->setObjectName(title);
811 672
812 model = new NoteModel(m_sampleRate, 1, false); 673 modelMap[timeline][type][dimensions][haveDuration] = model;
813 } 674 models.push_back(model);
814 675 }
815 } else { // haveDuration 676
816 677 model = modelMap[timeline][type][dimensions][haveDuration];
817 if (note || (dimensions > 2)) { 678
818 679 if (model) {
819 model = new NoteModel(m_sampleRate, 1, false); 680 long ftime = RealTime::realTime2Frame(time, m_sampleRate);
820 681 long fduration = RealTime::realTime2Frame(duration, m_sampleRate);
821 } else { 682 fillModel(model, ftime, fduration, haveDuration, values, label);
822 683 }
823 // If our units are frequency or midi pitch, we 684 }
824 // should be using a note model... hm
825
826 model = new RegionModel(m_sampleRate, 1, false);
827 }
828 }
829
830 model->setRDFTypeURI(type);
831
832 if (m_audioModelMap.find(source) != m_audioModelMap.end()) {
833 std::cerr << "source model for " << model << " is " << m_audioModelMap[source] << std::endl;
834 model->setSourceModel(m_audioModelMap[source]);
835 }
836
837 QString titleQuery = QString
838 (
839 " PREFIX dc: <http://purl.org/dc/elements/1.1/> "
840 " SELECT ?title "
841 " FROM <%1> "
842 " WHERE { "
843 " <%2> dc:title ?title . "
844 " } "
845 ).arg(m_uristring).arg(type);
846 QString title = SimpleSPARQLQuery::singleResultQuery
847 (s, titleQuery, "title").value;
848 if (title == "") {
849 // take it from the end of the event type
850 title = type;
851 title.replace(QRegExp("^.*[/#]"), "");
852 }
853 model->setObjectName(title);
854
855 modelMap[timeline][type][dimensions][haveDuration] = model;
856 models.push_back(model);
857 }
858
859 model = modelMap[timeline][type][dimensions][haveDuration];
860
861 if (model) {
862 long ftime = RealTime::realTime2Frame(time, m_sampleRate);
863 long fduration = RealTime::realTime2Frame(duration, m_sampleRate);
864 fillModel(model, ftime, fduration, haveDuration, values, label);
865 } 685 }
866 } 686 }
867 } 687 }
868 688
869 void 689 void
973 RDFImporter::RDFDocumentType 793 RDFImporter::RDFDocumentType
974 RDFImporter::identifyDocumentType(QString url) 794 RDFImporter::identifyDocumentType(QString url)
975 { 795 {
976 bool haveAudio = false; 796 bool haveAudio = false;
977 bool haveAnnotations = false; 797 bool haveAnnotations = false;
978 798 bool haveRDF = false;
979 // This query is not expected to return any values, but if it 799
980 // executes successfully (leaving no error in the error string) 800 BasicStore *store = 0;
981 // then we know we have RDF 801
982 SimpleSPARQLQuery q(SimpleSPARQLQuery::QueryFromSingleSource, 802 // This is not expected to return anything useful, but if it does
983 QString(" SELECT ?x FROM <%1> WHERE { ?x <y> <z> } ") 803 // anything at all then we know we have RDF
984 .arg(url)); 804 try {
985 805 //!!! non-local document?
986 SimpleSPARQLQuery::ResultList r = q.execute(); 806 store = BasicStore::load(QUrl(url));
987 if (!q.isOK()) { 807 Triple t = store->matchFirst(Triple());
988 SimpleSPARQLQuery::closeSingleSource(url); 808 if (t != Triple()) haveRDF = true;
809 } catch (...) {
810 }
811
812 if (!haveRDF) {
813 delete store;
989 return NotRDF; 814 return NotRDF;
990 } 815 }
991 816
817 store->addPrefix("mo", Uri("http://purl.org/ontology/mo/"));
818 store->addPrefix("event", Uri("http://purl.org/NET/c4dm/event.owl#"));
819 store->addPrefix("af", Uri("http://purl.org/ontology/af/"));
820
992 // "MO-conformant" structure for audio files 821 // "MO-conformant" structure for audio files
993 822
994 SimpleSPARQLQuery::Value value = 823 Node n = store->matchFirst(Triple(Node(), "a", store->expand("mo:AudioFile"))).a;
995 SimpleSPARQLQuery::singleResultQuery 824 if (n != Node() && n.type == Node::URI) {
996 (SimpleSPARQLQuery::QueryFromSingleSource,
997 QString
998 (" PREFIX mo: <http://purl.org/ontology/mo/> "
999 " SELECT ?url FROM <%1> "
1000 " WHERE { ?url a mo:AudioFile } "
1001 ).arg(url),
1002 "url");
1003
1004 if (value.type == SimpleSPARQLQuery::URIValue) {
1005 825
1006 haveAudio = true; 826 haveAudio = true;
1007 827
1008 } else { 828 } else {
1009 829
1010 // Sonic Annotator v0.2 and below used to write this structure 830 // Sonic Annotator v0.2 and below used to write this structure
1011 // (which is not properly in conformance with the Music 831 // (which is not properly in conformance with the Music
1012 // Ontology) 832 // Ontology)
1013 833
1014 value = 834 Nodes sigs = store->match(Triple(Node(), "a", store->expand("mo:Signal"))).a();
1015 SimpleSPARQLQuery::singleResultQuery 835 foreach (Node sig, sigs) {
1016 (SimpleSPARQLQuery::QueryFromSingleSource, 836 Node aa = store->matchFirst(Triple(sig, "mo:available_as", Node())).c;
1017 QString 837 if (aa != Node()) {
1018 (" PREFIX mo: <http://purl.org/ontology/mo/> " 838 haveAudio = true;
1019 " SELECT ?url FROM <%1> " 839 break;
1020 " WHERE { ?signal a mo:Signal ; mo:available_as ?url } " 840 }
1021 ).arg(url),
1022 "url");
1023
1024 if (value.type == SimpleSPARQLQuery::URIValue) {
1025 haveAudio = true;
1026 } 841 }
1027 } 842 }
1028 843
1029 SVDEBUG << "NOTE: RDFImporter::identifyDocumentType: haveAudio = " 844 SVDEBUG << "NOTE: RDFImporter::identifyDocumentType: haveAudio = "
1030 << haveAudio << endl; 845 << haveAudio << endl;
1031 846
1032 value = 847 n = store->matchFirst(Triple(Node(), "event:time", Node())).a;
1033 SimpleSPARQLQuery::singleResultQuery 848 if (n != Node()) {
1034 (SimpleSPARQLQuery::QueryFromSingleSource,
1035 QString
1036 (" PREFIX event: <http://purl.org/NET/c4dm/event.owl#> "
1037 " SELECT ?thing FROM <%1> "
1038 " WHERE { ?thing event:time ?time } "
1039 ).arg(url),
1040 "thing");
1041
1042 if (value.type == SimpleSPARQLQuery::URIValue) {
1043 haveAnnotations = true; 849 haveAnnotations = true;
1044 } 850 }
1045 851
1046 if (!haveAnnotations) { 852 if (!haveAnnotations) {
1047 853 n = store->matchFirst(Triple(Node(), "af:signal_feature", Node())).a;
1048 value = 854 if (n != Node()) {
1049 SimpleSPARQLQuery::singleResultQuery
1050 (SimpleSPARQLQuery::QueryFromSingleSource,
1051 QString
1052 (" PREFIX af: <http://purl.org/ontology/af/> "
1053 " SELECT ?thing FROM <%1> "
1054 " WHERE { ?signal af:signal_feature ?thing } "
1055 ).arg(url),
1056 "thing");
1057
1058 if (value.type == SimpleSPARQLQuery::URIValue) {
1059 haveAnnotations = true; 855 haveAnnotations = true;
1060 } 856 }
1061 } 857 }
1062 858
1063 SVDEBUG << "NOTE: RDFImporter::identifyDocumentType: haveAnnotations = " 859 SVDEBUG << "NOTE: RDFImporter::identifyDocumentType: haveAnnotations = "
1064 << haveAnnotations << endl; 860 << haveAnnotations << endl;
1065 861
1066 SimpleSPARQLQuery::closeSingleSource(url); 862 delete store;
1067 863
1068 if (haveAudio) { 864 if (haveAudio) {
1069 if (haveAnnotations) { 865 if (haveAnnotations) {
1070 return AudioRefAndAnnotations; 866 return AudioRefAndAnnotations;
1071 } else { 867 } else {
1080 } 876 }
1081 877
1082 return OtherRDFDocument; 878 return OtherRDFDocument;
1083 } 879 }
1084 880
1085 void
1086 RDFImporterImpl::loadPrefixes(ProgressReporter *reporter)
1087 {
1088 return;
1089 //!!!
1090 if (m_prefixesLoaded) return;
1091 const char *prefixes[] = {
1092 "http://purl.org/NET/c4dm/event.owl",
1093 "http://purl.org/NET/c4dm/timeline.owl",
1094 "http://purl.org/ontology/mo/",
1095 "http://purl.org/ontology/af/",
1096 "http://www.w3.org/2000/01/rdf-schema",
1097 "http://purl.org/dc/elements/1.1/",
1098 };
1099 for (size_t i = 0; i < sizeof(prefixes)/sizeof(prefixes[0]); ++i) {
1100 CachedFile cf(prefixes[i], reporter, "application/rdf+xml");
1101 if (!cf.isOK()) continue;
1102 SimpleSPARQLQuery::addSourceToModel
1103 (QUrl::fromLocalFile(cf.getLocalFilename()).toString());
1104 }
1105 m_prefixesLoaded = true;
1106 }