comparison rdf/RDFImporter.cpp @ 449:a75edaa08d28

* Support importing features from RDF whose times are intervals rather than only instants; import them into region or note models. Sadly this makes RDF import much, much slower, because we need to work around Rasqal's single-OPTIONAL limitation by repeatedly querying each feature for time and range. * Add segmentation view to region layer, and display label texts
author Chris Cannam
date Tue, 07 Oct 2008 12:42:17 +0000
parents 2fb0061c5d23
children d8a2c28ba9f6
comparison
equal deleted inserted replaced
448:2fb0061c5d23 449:a75edaa08d28
27 #include "base/RealTime.h" 27 #include "base/RealTime.h"
28 28
29 #include "data/model/SparseOneDimensionalModel.h" 29 #include "data/model/SparseOneDimensionalModel.h"
30 #include "data/model/SparseTimeValueModel.h" 30 #include "data/model/SparseTimeValueModel.h"
31 #include "data/model/EditableDenseThreeDimensionalModel.h" 31 #include "data/model/EditableDenseThreeDimensionalModel.h"
32 #include "data/model/NoteModel.h"
33 #include "data/model/RegionModel.h"
32 34
33 using std::cerr; 35 using std::cerr;
34 using std::endl; 36 using std::endl;
35 37
36 class RDFImporterImpl 38 class RDFImporterImpl
47 protected: 49 protected:
48 QString m_uristring; 50 QString m_uristring;
49 QString m_errorString; 51 QString m_errorString;
50 int m_sampleRate; 52 int m_sampleRate;
51 53
52 typedef std::vector<float> ValueList;
53 typedef std::map<RealTime, ValueList> TimeValueMap;
54 typedef std::map<QString, TimeValueMap> TypeTimeValueMap;
55 typedef std::map<QString, TypeTimeValueMap> SourceTypeTimeValueMap;
56
57 void getDataModelsSparse(std::vector<Model *> &, ProgressReporter *); 54 void getDataModelsSparse(std::vector<Model *> &, ProgressReporter *);
58 void getDataModelsDense(std::vector<Model *> &, ProgressReporter *); 55 void getDataModelsDense(std::vector<Model *> &, ProgressReporter *);
59 56
60 void getDenseFeatureProperties(QString featureUri, 57 void getDenseFeatureProperties(QString featureUri,
61 int &sampleRate, int &windowLength, 58 int &sampleRate, int &windowLength,
62 int &hopSize, int &width, int &height); 59 int &hopSize, int &width, int &height);
63 60
64 void extractStructure(const TimeValueMap &map, bool &sparse, 61
62 void fillModel(Model *, long, long, bool, std::vector<float> &, QString);
63
64 /*
65
66 typedef std::vector<std::pair<RealTime, float> > DurationValueList;
67 typedef std::map<RealTime, DurationValueList> TimeDurationValueMap;
68 typedef std::map<QString, TimeDurationValueMap> TypeTimeDurationValueMap;
69 typedef std::map<QString, TypeTimeDurationValueMap> SourceTypeTimeDurationValueMap;
70
71 void extractStructure(const TimeDurationValueMap &map, bool &sparse,
65 int &minValueCount, int &maxValueCount); 72 int &minValueCount, int &maxValueCount);
66 73
67 void fillModel(SparseOneDimensionalModel *, const TimeValueMap &); 74 void fillModel(SparseOneDimensionalModel *, const TimeDurationValueMap &);
68 void fillModel(SparseTimeValueModel *, const TimeValueMap &); 75 void fillModel(SparseTimeValueModel *, const TimeDurationValueMap &);
69 void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &); 76 void fillModel(EditableDenseThreeDimensionalModel *, const TimeDurationValueMap &);
77 */
70 }; 78 };
71 79
72 80
73 QString 81 QString
74 RDFImporter::getKnownExtensions() 82 RDFImporter::getKnownExtensions()
382 // structured container that maps from source signal (URI as 390 // structured container that maps from source signal (URI as
383 // string) -> feature type (likewise) -> time -> list of values. 391 // string) -> feature type (likewise) -> time -> list of values.
384 // If the source signal or feature type is unavailable, the empty 392 // If the source signal or feature type is unavailable, the empty
385 // string will do. 393 // string will do.
386 394
387 SourceTypeTimeValueMap m; 395 // SourceTypeTimeDurationValueMap m;
388 396
389 QString queryString = QString( 397 QString prefixes = QString(
390
391 " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>" 398 " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
392 " PREFIX time: <http://purl.org/NET/c4dm/timeline.owl#>" 399 " PREFIX tl: <http://purl.org/NET/c4dm/timeline.owl#>"
393 " PREFIX mo: <http://purl.org/ontology/mo/>" 400 " PREFIX mo: <http://purl.org/ontology/mo/>"
394 " PREFIX af: <http://purl.org/ontology/af/>" 401 " PREFIX af: <http://purl.org/ontology/af/>"
395 402 " PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"
396 " SELECT ?signal_source ?time ?event_type ?value" 403 );
404
405 QString queryString = prefixes + QString(
406
407 " SELECT ?signal_source ?timed_thing ?event_type ?value"
397 " FROM <%1>" 408 " FROM <%1>"
398 409
399 " WHERE {" 410 " WHERE {"
400 411
401 " ?signal mo:available_as ?signal_source ." 412 " ?signal mo:available_as ?signal_source ."
402 " ?signal a mo:Signal ." 413 " ?signal a mo:Signal ."
403 414
404 " ?signal mo:time ?interval ." 415 " ?signal mo:time ?interval ."
405 " ?interval time:onTimeLine ?tl ." 416 " ?interval tl:onTimeLine ?tl ."
406 " ?t time:onTimeLine ?tl ." 417 " ?time tl:onTimeLine ?tl ."
407 " ?t time:at ?time ." 418 " ?timed_thing event:time ?time ."
408 " ?timed_thing event:time ?t ."
409 " ?timed_thing a ?event_type ." 419 " ?timed_thing a ?event_type ."
410 420
411 " OPTIONAL {" 421 " OPTIONAL {"
412 " ?timed_thing af:feature ?value" 422 " ?timed_thing af:feature ?value"
413 " }" 423 " }"
414 " }" 424 " }"
415 425
416 ).arg(m_uristring); 426 ).arg(m_uristring);
417 427
428 QString timeQueryString = prefixes + QString(
429
430 " SELECT ?time FROM <%1> "
431 " WHERE { "
432 " <%2> event:time ?t . "
433 " ?t tl:at ?time . "
434 " } "
435
436 ).arg(m_uristring);
437
438 QString rangeQueryString = prefixes + QString(
439
440 " SELECT ?time ?duration FROM <%1> "
441 " WHERE { "
442 " <%2> event:time ?t . "
443 " ?t tl:beginsAt ?time . "
444 " ?t tl:duration ?duration . "
445 " } "
446
447 ).arg(m_uristring);
448
449 QString labelQueryString = prefixes + QString(
450
451 " SELECT ?label FROM <%1> "
452 " WHERE { "
453 " <%2> rdfs:label ?label . "
454 " } "
455
456 ).arg(m_uristring);
457
418 SimpleSPARQLQuery query(queryString); 458 SimpleSPARQLQuery query(queryString);
419 query.setProgressReporter(reporter); 459 query.setProgressReporter(reporter);
420 460
421 cerr << "Query will be: " << queryString.toStdString() << endl; 461 cerr << "Query will be: " << queryString.toStdString() << endl;
422 462
430 if (query.wasCancelled()) { 470 if (query.wasCancelled()) {
431 m_errorString = "Query cancelled"; 471 m_errorString = "Query cancelled";
432 return; 472 return;
433 } 473 }
434 474
475
476
477 /*
478
479 This function is now only used for sparse data (for dense data
480 we would be in getDataModelsDense instead).
481
482 For sparse data, the determining factors in deciding what model
483 to use are: Do the features have values? and Do the features
484 have duration?
485
486 We can run through the results and check off whether we find
487 values and duration for each of the source+type keys, and then
488 run through the source+type keys pushing each of the results
489 into a suitable model.
490
491 Unfortunately, at this point we do not yet have any actual
492 timing data (time/duration) -- just the time URI.
493
494 What we _could_ do is to create one of each type of model at the
495 start, for each of the source+type keys, and then push each
496 feature into the relevant model depending on what we find out
497 about it. Then return only non-empty models.
498
499
500 */
501
502 // Map from signal source to event type to dimensionality to
503 // presence of duration to model ptr. Whee!
504 std::map<QString, std::map<QString, std::map<int, std::map<bool, Model *> > > >
505 modelMap;
506
435 for (int i = 0; i < results.size(); ++i) { 507 for (int i = 0; i < results.size(); ++i) {
436 508
437 QString source = results[i]["signal_source"].value; 509 QString source = results[i]["signal_source"].value;
438 510 QString type = results[i]["event_type"].value;
511 QString thinguri = results[i]["timed_thing"].value;
512
439 RealTime time; 513 RealTime time;
440 QString timestring = results[i]["time"].value; 514 RealTime duration;
441 time = RealTime::fromXsdDuration(timestring.toStdString()); 515
442 cerr << "time = " << time.toString() << " (from xsd:duration \"" 516 bool haveTime = false;
443 << timestring.toStdString() << "\")" << endl; 517 bool haveDuration = false;
444 518
445 QString type = results[i]["event_type"].value; 519 QString label = SimpleSPARQLQuery::singleResultQuery
520 (labelQueryString.arg(thinguri), "label").value;
521
522 QString timestring = SimpleSPARQLQuery::singleResultQuery
523 (timeQueryString.arg(thinguri), "time").value;
524
525 if (timestring != "") {
526
527 time = RealTime::fromXsdDuration(timestring.toStdString());
528 haveTime = true;
529
530 } else {
531
532 SimpleSPARQLQuery rangeQuery(rangeQueryString.arg(thinguri));
533 SimpleSPARQLQuery::ResultList rangeResults = rangeQuery.execute();
534 if (!rangeResults.empty()) {
535 // std::cerr << rangeResults.size() << " range results" << std::endl;
536 time = RealTime::fromXsdDuration
537 (rangeResults[0]["time"].value.toStdString());
538 duration = RealTime::fromXsdDuration
539 (rangeResults[0]["duration"].value.toStdString());
540 // std::cerr << "duration string " << rangeResults[0]["duration"].value.toStdString() << std::endl;
541 haveTime = true;
542 haveDuration = true;
543 }
544 }
446 545
447 QString valuestring = results[i]["value"].value; 546 QString valuestring = results[i]["value"].value;
448 float value = 0.f; 547 std::vector<float> values;
449 bool haveValue = false; 548
450 if (valuestring != "") { 549 if (valuestring != "") {
451 //!!! no -- runner actually writes a "CSV literal" 550 QStringList vsl = valuestring.split(" ", QString::SkipEmptyParts);
452 value = valuestring.toFloat(&haveValue); 551 for (int j = 0; j < vsl.size(); ++j) {
453 cerr << "value = " << value << endl; 552 bool success = false;
454 } 553 float v = vsl[j].toFloat(&success);
455 554 if (success) values.push_back(v);
456 if (haveValue) { 555 }
457 m[source][type][time].push_back(value); 556 }
458 } else if (m[source][type].find(time) == m[source][type].end()) { 557
459 m[source][type][time] = ValueList(); 558 int dimensions = 1;
460 } 559 if (values.size() == 1) dimensions = 2;
461 } 560 else if (values.size() > 1) dimensions = 3;
462 561
463 for (SourceTypeTimeValueMap::const_iterator mi = m.begin(); 562 Model *model = 0;
563
564 if (modelMap[source][type][dimensions].find(haveDuration) ==
565 modelMap[source][type][dimensions].end()) {
566
567 /*
568 std::cerr << "Creating new model: source = " << source.toStdString()
569 << ", type = " << type.toStdString() << ", dimensions = "
570 << dimensions << ", haveDuration = " << haveDuration
571 << ", time = " << time << ", duration = " << duration
572 << std::endl;
573 */
574
575 if (!haveDuration) {
576
577 if (dimensions == 1) {
578
579 // std::cerr << "SparseOneDimensionalModel" << std::endl;
580 model = new SparseOneDimensionalModel(m_sampleRate, 1, false);
581
582 } else if (dimensions == 2) {
583
584 // std::cerr << "SparseTimeValueModel" << std::endl;
585 model = new SparseTimeValueModel(m_sampleRate, 1, false);
586
587 } else {
588
589 // We don't have a three-dimensional sparse model,
590 // so use a note model. We do have some logic (in
591 // extractStructure below) for guessing whether
592 // this should after all have been a dense model,
593 // but it's hard to apply it because we don't have
594 // all the necessary timing data yet... hmm
595
596 // std::cerr << "NoteModel" << std::endl;
597 model = new NoteModel(m_sampleRate, 1, false);
598 }
599
600 } else { // haveDuration
601
602 if (dimensions == 1 || dimensions == 2) {
603
604 // If our units are frequency or midi pitch, we
605 // should be using a note model... hm
606
607 // std::cerr << "RegionModel" << std::endl;
608 model = new RegionModel(m_sampleRate, 1, false);
609
610 } else {
611
612 // We don't have a three-dimensional sparse model,
613 // so use a note model. We do have some logic (in
614 // extractStructure below) for guessing whether
615 // this should after all have been a dense model,
616 // but it's hard to apply it because we don't have
617 // all the necessary timing data yet... hmm
618
619 // std::cerr << "NoteModel" << std::endl;
620 model = new NoteModel(m_sampleRate, 1, false);
621 }
622 }
623
624 modelMap[source][type][dimensions][haveDuration] = model;
625 models.push_back(model);
626 }
627
628 model = modelMap[source][type][dimensions][haveDuration];
629
630 if (model) {
631 long ftime = RealTime::realTime2Frame(time, m_sampleRate);
632 long fduration = RealTime::realTime2Frame(duration, m_sampleRate);
633 fillModel(model, ftime, fduration, haveDuration, values, label);
634 }
635 }
636
637
638 /*
639 for (SourceTypeTimeDurationValueMap::const_iterator mi = m.begin();
464 mi != m.end(); ++mi) { 640 mi != m.end(); ++mi) {
465 641
466 QString source = mi->first; 642 QString source = mi->first;
467 643
468 for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin(); 644 for (TypeTimeDurationValueMap::const_iterator ttvi = mi->second.begin();
469 ttvi != mi->second.end(); ++ttvi) { 645 ttvi != mi->second.end(); ++ttvi) {
470 646
471 QString type = ttvi->first; 647 QString type = ttvi->first;
472 648
473 // Now we need to work out what sort of model to use for 649 // Now we need to work out what sort of model to use for
554 fillModel(model, ttvi->second); 730 fillModel(model, ttvi->second);
555 models.push_back(model); 731 models.push_back(model);
556 } 732 }
557 } 733 }
558 } 734 }
559 } 735 */
560 736 }
737
738 /*
561 void 739 void
562 RDFImporterImpl::extractStructure(const TimeValueMap &tvm, 740 RDFImporterImpl::extractStructure(const TimeDurationValueMap &tvm,
563 bool &sparse, 741 bool &sparse,
564 int &minValueCount, 742 int &minValueCount,
565 int &maxValueCount) 743 int &maxValueCount)
566 { 744 {
567 // These are floats intentionally rather than RealTime -- 745 // These are floats intentionally rather than RealTime --
568 // see logic for handling rounding error below 746 // see logic for handling rounding error below
569 float firstTime = 0.f; 747 float firstTime = 0.f;
570 float timeStep = 0.f; 748 float timeStep = 0.f;
571 bool haveTimeStep = false; 749 bool haveTimeStep = false;
572 750
573 for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) { 751 for (TimeDurationValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) {
574 752
575 RealTime time = tvi->first; 753 RealTime time = tvi->first;
576 int valueCount = tvi->second.size(); 754 int valueCount = tvi->second.size();
577 755
578 if (tvi == tvm.begin()) { 756 if (tvi == tvm.begin()) {
605 } 783 }
606 } 784 }
607 } 785 }
608 } 786 }
609 } 787 }
610 788 */
789
790 void
791 RDFImporterImpl::fillModel(Model *model,
792 long ftime,
793 long fduration,
794 bool haveDuration,
795 std::vector<float> &values,
796 QString label)
797 {
798 SparseOneDimensionalModel *sodm =
799 dynamic_cast<SparseOneDimensionalModel *>(model);
800 if (sodm) {
801 SparseOneDimensionalModel::Point point(ftime, label);
802 sodm->addPoint(point);
803 return;
804 }
805
806 SparseTimeValueModel *stvm =
807 dynamic_cast<SparseTimeValueModel *>(model);
808 if (stvm) {
809 SparseTimeValueModel::Point point
810 (ftime, values.empty() ? 0.f : values[0], label);
811 stvm->addPoint(point);
812 return;
813 }
814
815 NoteModel *nm =
816 dynamic_cast<NoteModel *>(model);
817 if (nm) {
818 if (haveDuration) {
819 float value = 0.f, level = 1.f;
820 if (!values.empty()) {
821 value = values[0];
822 if (values.size() > 1) {
823 level = values[1];
824 }
825 }
826 NoteModel::Point point(ftime, value, fduration, level, label);
827 nm->addPoint(point);
828 } else {
829 float value = 0.f, duration = 1.f, level = 1.f;
830 if (!values.empty()) {
831 value = values[0];
832 if (values.size() > 1) {
833 duration = values[1];
834 if (values.size() > 2) {
835 level = values[2];
836 }
837 }
838 }
839 NoteModel::Point point(ftime, value, duration, level, label);
840 nm->addPoint(point);
841 }
842 return;
843 }
844
845 RegionModel *rm =
846 dynamic_cast<RegionModel *>(model);
847 if (rm) {
848 if (haveDuration) {
849 RegionModel::Point point
850 (ftime, values.empty() ? 0.f : values[0], fduration, label);
851 rm->addPoint(point);
852 } else {
853 // This won't actually happen -- we only create region models
854 // if we do have duration -- but just for completeness
855 float value = 0.f, duration = 1.f;
856 if (!values.empty()) {
857 value = values[0];
858 if (values.size() > 1) {
859 duration = values[1];
860 }
861 }
862 RegionModel::Point point(ftime, value, duration, label);
863 rm->addPoint(point);
864 }
865 return;
866 }
867
868 std::cerr << "WARNING: RDFImporterImpl::fillModel: Unknown or unexpected model type" << std::endl;
869 return;
870 }
871
872
873 /*
611 void 874 void
612 RDFImporterImpl::fillModel(SparseOneDimensionalModel *model, 875 RDFImporterImpl::fillModel(SparseOneDimensionalModel *model,
613 const TimeValueMap &tvm) 876 const TimeDurationValueMap &tvm)
614 { 877 {
615 //!!! labels &c not yet handled 878 //!!! labels &c not yet handled
616 879
617 for (TimeValueMap::const_iterator tvi = tvm.begin(); 880 for (TimeDurationValueMap::const_iterator tvi = tvm.begin();
618 tvi != tvm.end(); ++tvi) { 881 tvi != tvm.end(); ++tvi) {
619 882
620 RealTime time = tvi->first; 883 RealTime time = tvi->first;
621 long frame = RealTime::realTime2Frame(time, m_sampleRate); 884 long frame = RealTime::realTime2Frame(time, m_sampleRate);
622 885
626 } 889 }
627 } 890 }
628 891
629 void 892 void
630 RDFImporterImpl::fillModel(SparseTimeValueModel *model, 893 RDFImporterImpl::fillModel(SparseTimeValueModel *model,
631 const TimeValueMap &tvm) 894 const TimeDurationValueMap &tvm)
632 { 895 {
633 //!!! labels &c not yet handled 896 //!!! labels &c not yet handled
634 897
635 for (TimeValueMap::const_iterator tvi = tvm.begin(); 898 for (TimeDurationValueMap::const_iterator tvi = tvm.begin();
636 tvi != tvm.end(); ++tvi) { 899 tvi != tvm.end(); ++tvi) {
637 900
638 RealTime time = tvi->first; 901 RealTime time = tvi->first;
639 long frame = RealTime::realTime2Frame(time, m_sampleRate); 902 long frame = RealTime::realTime2Frame(time, m_sampleRate);
640 903
641 float value = 0.f; 904 float value = 0.f;
642 if (!tvi->second.empty()) value = *tvi->second.begin(); 905 if (!tvi->second.empty()) value = *tvi->second.begin()->second;
643 906
644 SparseTimeValueModel::Point point(frame, value, ""); 907 SparseTimeValueModel::Point point(frame, value, "");
645 908
646 model->addPoint(point); 909 model->addPoint(point);
647 } 910 }
648 } 911 }
649 912
650 void 913 void
651 RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model, 914 RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model,
652 const TimeValueMap &tvm) 915 const TimeDurationValueMap &tvm)
653 { 916 {
654 //!!! labels &c not yet handled 917 //!!! labels &c not yet handled
655 918
656 //!!! start time offset not yet handled 919 //!!! start time offset not yet handled
657 920
658 size_t col = 0; 921 size_t col = 0;
659 922
660 for (TimeValueMap::const_iterator tvi = tvm.begin(); 923 for (TimeDurationValueMap::const_iterator tvi = tvm.begin();
661 tvi != tvm.end(); ++tvi) { 924 tvi != tvm.end(); ++tvi) {
662 925
663 model->setColumn(col++, tvi->second); 926 model->setColumn(col++, tvi->second.second);
664 } 927 }
665 } 928 }
666 929
930 */