annotate rdf/RDFImporter.cpp @ 1852:a454c7477b4f

Be more cautious about firing up an RDF file parser to identify a document - don't do it at all if the document is not apparently text
author Chris Cannam
date Thu, 30 Apr 2020 14:46:07 +0100
parents d484490cdf69
children db489a1ece9b
rev   line source
Chris@439 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@439 2
Chris@439 3 /*
Chris@439 4 Sonic Visualiser
Chris@439 5 An audio file viewer and annotation editor.
Chris@439 6 Centre for Digital Music, Queen Mary, University of London.
Chris@727 7 This file copyright 2008-2012 QMUL.
Chris@439 8
Chris@439 9 This program is free software; you can redistribute it and/or
Chris@439 10 modify it under the terms of the GNU General Public License as
Chris@439 11 published by the Free Software Foundation; either version 2 of the
Chris@439 12 License, or (at your option) any later version. See the file
Chris@439 13 COPYING included with this distribution for more information.
Chris@439 14 */
Chris@439 15
Chris@439 16 #include "RDFImporter.h"
Chris@439 17
Chris@439 18 #include <map>
Chris@439 19 #include <vector>
Chris@439 20
Chris@439 21 #include <iostream>
Chris@439 22 #include <cmath>
Chris@439 23
Chris@439 24 #include "base/ProgressReporter.h"
Chris@439 25 #include "base/RealTime.h"
Chris@1852 26 #include "base/StringBits.h"
Chris@439 27
Chris@439 28 #include "data/model/SparseOneDimensionalModel.h"
Chris@439 29 #include "data/model/SparseTimeValueModel.h"
Chris@439 30 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@449 31 #include "data/model/NoteModel.h"
Chris@510 32 #include "data/model/TextModel.h"
Chris@449 33 #include "data/model/RegionModel.h"
Chris@1122 34 #include "data/model/ReadOnlyWaveFileModel.h"
Chris@499 35
Chris@499 36 #include "data/fileio/FileSource.h"
Chris@520 37 #include "data/fileio/CachedFile.h"
Chris@581 38 #include "data/fileio/FileFinder.h"
Chris@522 39
Chris@726 40 #include <dataquay/BasicStore.h>
Chris@726 41 #include <dataquay/PropertyObject.h>
Chris@726 42
Chris@1852 43 #include <QFile>
Chris@1852 44 #include <QXmlInputSource>
Chris@1852 45
Chris@726 46 using Dataquay::Uri;
Chris@726 47 using Dataquay::Node;
Chris@726 48 using Dataquay::Nodes;
Chris@726 49 using Dataquay::Triple;
Chris@726 50 using Dataquay::Triples;
Chris@726 51 using Dataquay::BasicStore;
Chris@726 52 using Dataquay::PropertyObject;
Chris@726 53
Chris@439 54 class RDFImporterImpl
Chris@439 55 {
Chris@439 56 public:
Chris@1040 57 RDFImporterImpl(QString url, sv_samplerate_t sampleRate);
Chris@439 58 virtual ~RDFImporterImpl();
Chris@490 59
Chris@1040 60 void setSampleRate(sv_samplerate_t sampleRate) { m_sampleRate = sampleRate; }
Chris@439 61
Chris@439 62 bool isOK();
Chris@439 63 QString getErrorString() const;
Chris@439 64
Chris@1752 65 std::vector<ModelId> getDataModels(ProgressReporter *);
Chris@439 66
Chris@439 67 protected:
Chris@726 68 BasicStore *m_store;
Chris@730 69 Uri expand(QString s) { return m_store->expand(s); }
Chris@726 70
Chris@439 71 QString m_uristring;
Chris@439 72 QString m_errorString;
Chris@1752 73 std::map<QString, ModelId> m_audioModelMap;
Chris@1040 74 sv_samplerate_t m_sampleRate;
Chris@439 75
Chris@1752 76 std::map<ModelId, std::map<QString, float> > m_labelValueMap;
Chris@617 77
Chris@1752 78 void getDataModelsAudio(std::vector<ModelId> &, ProgressReporter *);
Chris@1752 79 void getDataModelsSparse(std::vector<ModelId> &, ProgressReporter *);
Chris@1752 80 void getDataModelsDense(std::vector<ModelId> &, ProgressReporter *);
Chris@440 81
Chris@1752 82 QString getDenseModelTitle(QString featureUri, QString featureTypeUri);
Chris@493 83
Chris@440 84 void getDenseFeatureProperties(QString featureUri,
Chris@1040 85 sv_samplerate_t &sampleRate, int &windowLength,
Chris@440 86 int &hopSize, int &width, int &height);
Chris@440 87
Chris@1752 88 void fillModel(ModelId, sv_frame_t, sv_frame_t,
Chris@1039 89 bool, std::vector<float> &, QString);
Chris@439 90 };
Chris@439 91
Chris@439 92 QString
Chris@439 93 RDFImporter::getKnownExtensions()
Chris@439 94 {
Chris@439 95 return "*.rdf *.n3 *.ttl";
Chris@439 96 }
Chris@439 97
Chris@1040 98 RDFImporter::RDFImporter(QString url, sv_samplerate_t sampleRate) :
Chris@439 99 m_d(new RDFImporterImpl(url, sampleRate))
Chris@439 100 {
Chris@439 101 }
Chris@439 102
Chris@439 103 RDFImporter::~RDFImporter()
Chris@439 104 {
Chris@439 105 delete m_d;
Chris@439 106 }
Chris@439 107
Chris@490 108 void
Chris@1040 109 RDFImporter::setSampleRate(sv_samplerate_t sampleRate)
Chris@490 110 {
Chris@490 111 m_d->setSampleRate(sampleRate);
Chris@490 112 }
Chris@490 113
Chris@439 114 bool
Chris@439 115 RDFImporter::isOK()
Chris@439 116 {
Chris@439 117 return m_d->isOK();
Chris@439 118 }
Chris@439 119
Chris@439 120 QString
Chris@439 121 RDFImporter::getErrorString() const
Chris@439 122 {
Chris@439 123 return m_d->getErrorString();
Chris@439 124 }
Chris@439 125
Chris@1752 126 std::vector<ModelId>
Chris@439 127 RDFImporter::getDataModels(ProgressReporter *r)
Chris@439 128 {
Chris@439 129 return m_d->getDataModels(r);
Chris@439 130 }
Chris@439 131
Chris@1040 132 RDFImporterImpl::RDFImporterImpl(QString uri, sv_samplerate_t sampleRate) :
Chris@726 133 m_store(new BasicStore),
Chris@439 134 m_uristring(uri),
Chris@439 135 m_sampleRate(sampleRate)
Chris@439 136 {
Chris@726 137 //!!! retrieve data if remote... then
Chris@726 138
Chris@726 139 m_store->addPrefix("mo", Uri("http://purl.org/ontology/mo/"));
Chris@726 140 m_store->addPrefix("af", Uri("http://purl.org/ontology/af/"));
Chris@726 141 m_store->addPrefix("dc", Uri("http://purl.org/dc/elements/1.1/"));
Chris@726 142 m_store->addPrefix("tl", Uri("http://purl.org/NET/c4dm/timeline.owl#"));
Chris@726 143 m_store->addPrefix("event", Uri("http://purl.org/NET/c4dm/event.owl#"));
Chris@726 144 m_store->addPrefix("rdfs", Uri("http://www.w3.org/2000/01/rdf-schema#"));
Chris@727 145
Chris@738 146 try {
Chris@738 147 QUrl url;
Chris@738 148 if (uri.startsWith("file:")) {
Chris@738 149 url = QUrl(uri);
Chris@738 150 } else {
Chris@738 151 url = QUrl::fromLocalFile(uri);
Chris@738 152 }
Chris@738 153 m_store->import(url, BasicStore::ImportIgnoreDuplicates);
Chris@738 154 } catch (std::exception &e) {
Chris@738 155 m_errorString = e.what();
Chris@736 156 }
Chris@439 157 }
Chris@439 158
Chris@439 159 RDFImporterImpl::~RDFImporterImpl()
Chris@439 160 {
Chris@726 161 delete m_store;
Chris@439 162 }
Chris@439 163
Chris@439 164 bool
Chris@439 165 RDFImporterImpl::isOK()
Chris@439 166 {
Chris@439 167 return (m_errorString == "");
Chris@439 168 }
Chris@439 169
Chris@439 170 QString
Chris@439 171 RDFImporterImpl::getErrorString() const
Chris@439 172 {
Chris@439 173 return m_errorString;
Chris@439 174 }
Chris@439 175
Chris@1752 176 std::vector<ModelId>
Chris@439 177 RDFImporterImpl::getDataModels(ProgressReporter *reporter)
Chris@439 178 {
Chris@1752 179 std::vector<ModelId> models;
Chris@439 180
Chris@499 181 getDataModelsAudio(models, reporter);
Chris@499 182
Chris@490 183 if (m_sampleRate == 0) {
Chris@616 184 m_errorString = QString("Invalid audio data model (is audio file format supported?)");
Chris@843 185 cerr << m_errorString << endl;
Chris@490 186 return models;
Chris@490 187 }
Chris@490 188
Chris@508 189 QString error;
Chris@508 190
Chris@522 191 if (m_errorString != "") {
Chris@522 192 error = m_errorString;
Chris@522 193 }
Chris@508 194 m_errorString = "";
Chris@508 195
Chris@440 196 getDataModelsDense(models, reporter);
Chris@440 197
Chris@522 198 if (m_errorString != "") {
Chris@522 199 error = m_errorString;
Chris@522 200 }
Chris@440 201 m_errorString = "";
Chris@440 202
Chris@440 203 getDataModelsSparse(models, reporter);
Chris@440 204
Chris@522 205 if (m_errorString == "" && error != "") {
Chris@522 206 m_errorString = error;
Chris@522 207 }
Chris@440 208
Chris@440 209 return models;
Chris@440 210 }
Chris@440 211
Chris@440 212 void
Chris@1752 213 RDFImporterImpl::getDataModelsAudio(std::vector<ModelId> &models,
Chris@499 214 ProgressReporter *reporter)
Chris@499 215 {
Chris@726 216 Nodes sigs = m_store->match
Chris@730 217 (Triple(Node(), Uri("a"), expand("mo:Signal"))).subjects();
Chris@499 218
Chris@726 219 foreach (Node sig, sigs) {
Chris@726 220
Chris@730 221 Node file = m_store->complete(Triple(Node(), expand("mo:encodes"), sig));
Chris@726 222 if (file == Node()) {
Chris@730 223 file = m_store->complete(Triple(sig, expand("mo:available_as"), Node()));
Chris@726 224 }
Chris@726 225 if (file == Node()) {
Chris@843 226 cerr << "RDFImporterImpl::getDataModelsAudio: ERROR: No source for signal " << sig << endl;
Chris@726 227 continue;
Chris@726 228 }
Chris@499 229
Chris@726 230 QString signal = sig.value;
Chris@726 231 QString source = file.value;
Chris@589 232
Chris@726 233 SVDEBUG << "NOTE: Seeking signal source \"" << source
Chris@726 234 << "\"..." << endl;
Chris@616 235
Chris@522 236 FileSource *fs = new FileSource(source, reporter);
Chris@616 237 if (fs->isAvailable()) {
Chris@690 238 SVDEBUG << "NOTE: Source is available: Local filename is \""
Chris@726 239 << fs->getLocalFilename()
Chris@726 240 << "\"..." << endl;
Chris@616 241 }
Chris@616 242
Chris@522 243 #ifdef NO_SV_GUI
Chris@522 244 if (!fs->isAvailable()) {
Chris@522 245 m_errorString = QString("Signal source \"%1\" is not available").arg(source);
Chris@522 246 delete fs;
Chris@522 247 continue;
Chris@522 248 }
Chris@522 249 #else
Chris@522 250 if (!fs->isAvailable()) {
Chris@726 251 SVDEBUG << "NOTE: Signal source \"" << source
Chris@726 252 << "\" is not available, using file finder..." << endl;
Chris@522 253 FileFinder *ff = FileFinder::getInstance();
Chris@581 254 if (ff) {
Chris@581 255 QString path = ff->find(FileFinder::AudioFile,
Chris@581 256 fs->getLocation(),
Chris@581 257 m_uristring);
Chris@581 258 if (path != "") {
Chris@844 259 cerr << "File finder returns: \"" << path
Chris@843 260 << "\"" << endl;
Chris@522 261 delete fs;
Chris@581 262 fs = new FileSource(path, reporter);
Chris@581 263 if (!fs->isAvailable()) {
Chris@581 264 delete fs;
Chris@581 265 m_errorString = QString("Signal source \"%1\" is not available").arg(source);
Chris@581 266 continue;
Chris@581 267 }
Chris@522 268 }
Chris@499 269 }
Chris@522 270 }
Chris@522 271 #endif
Chris@522 272
Chris@522 273 if (reporter) {
Chris@522 274 reporter->setMessage(RDFImporter::tr("Importing audio referenced in RDF..."));
Chris@522 275 }
Chris@522 276 fs->waitForData();
Chris@1752 277 auto newModel = std::make_shared<ReadOnlyWaveFileModel>
Chris@1752 278 (*fs, m_sampleRate);
Chris@522 279 if (newModel->isOK()) {
Chris@843 280 cerr << "Successfully created wave file model from source at \"" << source << "\"" << endl;
Chris@1752 281 auto modelId = ModelById::add(newModel);
Chris@1752 282 models.push_back(modelId);
Chris@1752 283 m_audioModelMap[signal] = modelId;
Chris@522 284 if (m_sampleRate == 0) {
Chris@522 285 m_sampleRate = newModel->getSampleRate();
Chris@499 286 }
Chris@508 287 } else {
Chris@522 288 m_errorString = QString("Failed to create wave file model from source at \"%1\"").arg(source);
Chris@499 289 }
Chris@522 290 delete fs;
Chris@499 291 }
Chris@499 292 }
Chris@499 293
Chris@499 294 void
Chris@1752 295 RDFImporterImpl::getDataModelsDense(std::vector<ModelId> &models,
Chris@440 296 ProgressReporter *reporter)
Chris@440 297 {
Chris@499 298 if (reporter) {
Chris@499 299 reporter->setMessage(RDFImporter::tr("Importing dense signal data from RDF..."));
Chris@499 300 }
Chris@499 301
Chris@726 302 Nodes sigFeatures = m_store->match
Chris@730 303 (Triple(Node(), expand("af:signal_feature"), Node())).objects();
Chris@440 304
Chris@726 305 foreach (Node sf, sigFeatures) {
Chris@440 306
Chris@726 307 if (sf.type != Node::URI && sf.type != Node::Blank) continue;
Chris@726 308
Chris@730 309 Node t = m_store->complete(Triple(sf, expand("a"), Node()));
Chris@730 310 Node v = m_store->complete(Triple(sf, expand("af:value"), Node()));
Chris@440 311
Chris@726 312 QString feature = sf.value;
Chris@726 313 QString type = t.value;
Chris@726 314 QString value = v.value;
Chris@726 315
Chris@726 316 if (type == "" || value == "") continue;
Chris@440 317
Chris@1040 318 sv_samplerate_t sampleRate = 0;
Chris@440 319 int windowLength = 0;
Chris@440 320 int hopSize = 0;
Chris@440 321 int width = 0;
Chris@440 322 int height = 0;
Chris@440 323 getDenseFeatureProperties
Chris@440 324 (feature, sampleRate, windowLength, hopSize, width, height);
Chris@440 325
Chris@440 326 if (sampleRate != 0 && sampleRate != m_sampleRate) {
Chris@440 327 cerr << "WARNING: Sample rate in dense feature description does not match our underlying rate -- using rate from feature description" << endl;
Chris@440 328 }
Chris@440 329 if (sampleRate == 0) sampleRate = m_sampleRate;
Chris@440 330
Chris@440 331 if (hopSize == 0) {
Chris@440 332 cerr << "WARNING: Dense feature description does not specify a hop size -- assuming 1" << endl;
Chris@440 333 hopSize = 1;
Chris@440 334 }
Chris@440 335
Chris@440 336 if (height == 0) {
Chris@440 337 cerr << "WARNING: Dense feature description does not specify feature signal dimensions -- assuming one-dimensional (height = 1)" << endl;
Chris@440 338 height = 1;
Chris@440 339 }
Chris@440 340
Chris@440 341 QStringList values = value.split(' ', QString::SkipEmptyParts);
Chris@440 342
Chris@440 343 if (values.empty()) {
Chris@440 344 cerr << "WARNING: Dense feature description does not specify any values!" << endl;
Chris@440 345 continue;
Chris@440 346 }
Chris@440 347
Chris@440 348 if (height == 1) {
Chris@440 349
Chris@1752 350 auto m = std::make_shared<SparseTimeValueModel>
Chris@440 351 (sampleRate, hopSize, false);
Chris@440 352
Chris@440 353 for (int j = 0; j < values.size(); ++j) {
Chris@440 354 float f = values[j].toFloat();
Chris@1651 355 Event e(j * hopSize, f, "");
Chris@1651 356 m->add(e);
Chris@440 357 }
Chris@493 358
Chris@1752 359 m->setObjectName(getDenseModelTitle(feature, type));
Chris@558 360 m->setRDFTypeURI(type);
Chris@1752 361 models.push_back(ModelById::add(m));
Chris@440 362
Chris@440 363 } else {
Chris@440 364
Chris@1752 365 auto m = std::make_shared<EditableDenseThreeDimensionalModel>
Chris@1777 366 (sampleRate, hopSize, height, false);
Chris@440 367
Chris@440 368 EditableDenseThreeDimensionalModel::Column column;
Chris@440 369
Chris@440 370 int x = 0;
Chris@440 371
Chris@440 372 for (int j = 0; j < values.size(); ++j) {
Chris@440 373 if (j % height == 0 && !column.empty()) {
Chris@440 374 m->setColumn(x++, column);
Chris@440 375 column.clear();
Chris@440 376 }
Chris@440 377 column.push_back(values[j].toFloat());
Chris@440 378 }
Chris@440 379
Chris@440 380 if (!column.empty()) {
Chris@440 381 m->setColumn(x++, column);
Chris@440 382 }
Chris@440 383
Chris@1752 384 m->setObjectName(getDenseModelTitle(feature, type));
Chris@558 385 m->setRDFTypeURI(type);
Chris@1752 386 models.push_back(ModelById::add(m));
Chris@440 387 }
Chris@440 388 }
Chris@440 389 }
Chris@440 390
Chris@1752 391 QString
Chris@1752 392 RDFImporterImpl::getDenseModelTitle(QString featureUri,
Chris@493 393 QString featureTypeUri)
Chris@493 394 {
Chris@730 395 Node n = m_store->complete
Chris@730 396 (Triple(Uri(featureUri), expand("dc:title"), Node()));
Chris@493 397
Chris@726 398 if (n.type == Node::Literal && n.value != "") {
Chris@726 399 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: Title (from signal) \"" << n.value << "\"" << endl;
Chris@1752 400 return n.value;
Chris@493 401 }
Chris@493 402
Chris@730 403 n = m_store->complete
Chris@730 404 (Triple(Uri(featureTypeUri), expand("dc:title"), Node()));
Chris@726 405
Chris@726 406 if (n.type == Node::Literal && n.value != "") {
Chris@726 407 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: Title (from signal type) \"" << n.value << "\"" << endl;
Chris@1752 408 return n.value;
Chris@493 409 }
Chris@493 410
Chris@690 411 SVDEBUG << "RDFImporterImpl::getDenseModelTitle: No title available for feature <" << featureUri << ">" << endl;
Chris@1752 412 return {};
Chris@493 413 }
Chris@493 414
Chris@493 415 void
Chris@440 416 RDFImporterImpl::getDenseFeatureProperties(QString featureUri,
Chris@1040 417 sv_samplerate_t &sampleRate, int &windowLength,
Chris@440 418 int &hopSize, int &width, int &height)
Chris@440 419 {
Chris@730 420 Node dim = m_store->complete
Chris@730 421 (Triple(Uri(featureUri), expand("af:dimensions"), Node()));
Chris@489 422
Chris@726 423 cerr << "Dimensions = \"" << dim.value << "\"" << endl;
Chris@440 424
Chris@726 425 if (dim.type == Node::Literal && dim.value != "") {
Chris@726 426 QStringList dl = dim.value.split(" ");
Chris@726 427 if (dl.empty()) dl.push_back(dim.value);
Chris@440 428 if (dl.size() > 0) height = dl[0].toInt();
Chris@440 429 if (dl.size() > 1) width = dl[1].toInt();
Chris@440 430 }
Chris@726 431
Chris@726 432 // Looking for rate, hop, window from:
Chris@726 433 //
Chris@726 434 // ?feature mo:time ?time .
Chris@726 435 // ?time a tl:Interval .
Chris@726 436 // ?time tl:onTimeLine ?timeline .
Chris@726 437 // ?map tl:rangeTimeLine ?timeline .
Chris@726 438 // ?map tl:sampleRate ?rate .
Chris@726 439 // ?map tl:hopSize ?hop .
Chris@726 440 // ?map tl:windowLength ?window .
Chris@440 441
Chris@730 442 Node interval = m_store->complete(Triple(Uri(featureUri), expand("mo:time"), Node()));
Chris@440 443
Chris@730 444 if (!m_store->contains(Triple(interval, expand("a"), expand("tl:Interval")))) {
Chris@726 445 cerr << "RDFImporterImpl::getDenseFeatureProperties: Feature time node "
Chris@726 446 << interval << " is not a tl:Interval" << endl;
Chris@726 447 return;
Chris@440 448 }
Chris@440 449
Chris@730 450 Node tl = m_store->complete(Triple(interval, expand("tl:onTimeLine"), Node()));
Chris@726 451
Chris@726 452 if (tl == Node()) {
Chris@726 453 cerr << "RDFImporterImpl::getDenseFeatureProperties: Interval node "
Chris@726 454 << interval << " lacks tl:onTimeLine property" << endl;
Chris@726 455 return;
Chris@440 456 }
Chris@440 457
Chris@730 458 Node map = m_store->complete(Triple(Node(), expand("tl:rangeTimeLine"), tl));
Chris@726 459
Chris@726 460 if (map == Node()) {
Chris@726 461 cerr << "RDFImporterImpl::getDenseFeatureProperties: No map for "
Chris@726 462 << "timeline node " << tl << endl;
Chris@726 463 }
Chris@726 464
Chris@726 465 PropertyObject po(m_store, "tl:", map);
Chris@726 466
Chris@726 467 if (po.hasProperty("sampleRate")) {
Chris@1040 468 sampleRate = po.getProperty("sampleRate").toDouble();
Chris@726 469 }
Chris@726 470 if (po.hasProperty("hopSize")) {
Chris@726 471 hopSize = po.getProperty("hopSize").toInt();
Chris@726 472 }
Chris@726 473 if (po.hasProperty("windowLength")) {
Chris@726 474 windowLength = po.getProperty("windowLength").toInt();
Chris@440 475 }
Chris@440 476
Chris@440 477 cerr << "sr = " << sampleRate << ", hop = " << hopSize << ", win = " << windowLength << endl;
Chris@440 478 }
Chris@440 479
Chris@440 480 void
Chris@1752 481 RDFImporterImpl::getDataModelsSparse(std::vector<ModelId> &models,
Chris@440 482 ProgressReporter *reporter)
Chris@440 483 {
Chris@499 484 if (reporter) {
Chris@499 485 reporter->setMessage(RDFImporter::tr("Importing event data from RDF..."));
Chris@499 486 }
Chris@499 487
Chris@726 488 /*
Chris@726 489 This function is only used for sparse data (for dense data we
Chris@726 490 would be in getDataModelsDense instead).
Chris@489 491
Chris@726 492 Our query is intended to retrieve every thing that has a time,
Chris@726 493 and every feature type and value associated with a thing that
Chris@726 494 has a time.
Chris@439 495
Chris@726 496 We will then need to refine this big bag of results into a set
Chris@726 497 of data models.
Chris@439 498
Chris@726 499 Results that have different source signals should go into
Chris@726 500 different models.
Chris@439 501
Chris@726 502 Results that have different feature types should go into
Chris@726 503 different models.
Chris@726 504 */
Chris@439 505
Chris@726 506 Nodes sigs = m_store->match
Chris@730 507 (Triple(Node(), expand("a"), expand("mo:Signal"))).subjects();
Chris@449 508
Chris@616 509 // Map from timeline uri to event type to dimensionality to
Chris@1752 510 // presence of duration to model id. Whee!
Chris@1752 511 std::map<QString, std::map<QString, std::map<int, std::map<bool, ModelId> > > >
Chris@449 512 modelMap;
Chris@449 513
Chris@726 514 foreach (Node sig, sigs) {
Chris@726 515
Chris@730 516 Node interval = m_store->complete(Triple(sig, expand("mo:time"), Node()));
Chris@726 517 if (interval == Node()) continue;
Chris@439 518
Chris@730 519 Node tl = m_store->complete(Triple(interval, expand("tl:onTimeLine"), Node()));
Chris@726 520 if (tl == Node()) continue;
Chris@499 521
Chris@730 522 Nodes times = m_store->match(Triple(Node(), expand("tl:onTimeLine"), tl)).subjects();
Chris@449 523
Chris@726 524 foreach (Node tn, times) {
Chris@726 525
Chris@730 526 Nodes timedThings = m_store->match(Triple(Node(), expand("event:time"), tn)).subjects();
Chris@439 527
Chris@726 528 foreach (Node thing, timedThings) {
Chris@726 529
Chris@730 530 Node typ = m_store->complete(Triple(thing, expand("a"), Node()));
Chris@726 531 if (typ == Node()) continue;
Chris@439 532
Chris@730 533 Node valu = m_store->complete(Triple(thing, expand("af:feature"), Node()));
Chris@510 534
Chris@726 535 QString source = sig.value;
Chris@726 536 QString timeline = tl.value;
Chris@726 537 QString type = typ.value;
Chris@726 538 QString thinguri = thing.value;
Chris@510 539
Chris@726 540 /*
Chris@726 541 For sparse data, the determining factors in deciding
Chris@726 542 what model to use are: Do the features have values?
Chris@726 543 and Do the features have duration?
Chris@449 544
Chris@726 545 We can run through the results and check off whether
Chris@726 546 we find values and duration for each of the
Chris@726 547 source+type keys, and then run through the
Chris@726 548 source+type keys pushing each of the results into a
Chris@726 549 suitable model.
Chris@439 550
Chris@726 551 Unfortunately, at this point we do not yet have any
Chris@726 552 actual timing data (time/duration) -- just the time
Chris@726 553 URI.
Chris@449 554
Chris@726 555 What we _could_ do is to create one of each type of
Chris@726 556 model at the start, for each of the source+type
Chris@726 557 keys, and then push each feature into the relevant
Chris@726 558 model depending on what we find out about it. Then
Chris@726 559 return only non-empty models.
Chris@726 560 */
Chris@439 561
Chris@726 562 QString label = "";
Chris@726 563 bool text = (type.contains("Text") || type.contains("text")); // Ha, ha
Chris@726 564 bool note = (type.contains("Note") || type.contains("note")); // Guffaw
Chris@449 565
Chris@726 566 if (text) {
Chris@730 567 label = m_store->complete(Triple(thing, expand("af:text"), Node())).value;
Chris@726 568 }
Chris@726 569
Chris@726 570 if (label == "") {
Chris@730 571 label = m_store->complete(Triple(thing, expand("rdfs:label"), Node())).value;
Chris@726 572 }
Chris@449 573
Chris@726 574 RealTime time;
Chris@726 575 RealTime duration;
Chris@726 576
Chris@930 577 // bool haveTime = false;
Chris@726 578 bool haveDuration = false;
Chris@726 579
Chris@730 580 Node at = m_store->complete(Triple(tn, expand("tl:at"), Node()));
Chris@726 581
Chris@726 582 if (at != Node()) {
Chris@726 583 time = RealTime::fromXsdDuration(at.value.toStdString());
Chris@930 584 // haveTime = true;
Chris@726 585 } else {
Chris@726 586 //!!! NB we're using rather old terminology for these things, apparently:
Chris@726 587 // beginsAt -> start
Chris@726 588 // onTimeLine -> timeline
Chris@726 589
Chris@730 590 Node start = m_store->complete(Triple(tn, expand("tl:beginsAt"), Node()));
Chris@730 591 Node dur = m_store->complete(Triple(tn, expand("tl:duration"), Node()));
Chris@726 592 if (start != Node() && dur != Node()) {
Chris@726 593 time = RealTime::fromXsdDuration
Chris@726 594 (start.value.toStdString());
Chris@726 595 duration = RealTime::fromXsdDuration
Chris@726 596 (dur.value.toStdString());
Chris@930 597 // haveTime = haveDuration = true;
Chris@726 598 }
Chris@726 599 }
Chris@726 600
Chris@726 601 QString valuestring = valu.value;
Chris@726 602 std::vector<float> values;
Chris@726 603
Chris@726 604 if (valuestring != "") {
Chris@726 605 QStringList vsl = valuestring.split(" ", QString::SkipEmptyParts);
Chris@726 606 for (int j = 0; j < vsl.size(); ++j) {
Chris@726 607 bool success = false;
Chris@726 608 float v = vsl[j].toFloat(&success);
Chris@726 609 if (success) values.push_back(v);
Chris@726 610 }
Chris@726 611 }
Chris@726 612
Chris@726 613 int dimensions = 1;
Chris@726 614 if (values.size() == 1) dimensions = 2;
Chris@726 615 else if (values.size() > 1) dimensions = 3;
Chris@726 616
Chris@1752 617 ModelId modelId;
Chris@726 618
Chris@726 619 if (modelMap[timeline][type][dimensions].find(haveDuration) ==
Chris@726 620 modelMap[timeline][type][dimensions].end()) {
Chris@449 621
Chris@449 622 /*
Chris@690 623 SVDEBUG << "Creating new model: source = " << source << ", type = " << type << ", dimensions = "
Chris@449 624 << dimensions << ", haveDuration = " << haveDuration
Chris@449 625 << ", time = " << time << ", duration = " << duration
Chris@687 626 << endl;
Chris@449 627 */
Chris@1752 628
Chris@1752 629 Model *model = nullptr;
Chris@1752 630
Chris@726 631 if (!haveDuration) {
Chris@449 632
Chris@726 633 if (dimensions == 1) {
Chris@726 634 if (text) {
Chris@726 635 model = new TextModel(m_sampleRate, 1, false);
Chris@726 636 } else {
Chris@726 637 model = new SparseOneDimensionalModel(m_sampleRate, 1, false);
Chris@726 638 }
Chris@726 639 } else if (dimensions == 2) {
Chris@726 640 if (text) {
Chris@726 641 model = new TextModel(m_sampleRate, 1, false);
Chris@726 642 } else {
Chris@726 643 model = new SparseTimeValueModel(m_sampleRate, 1, false);
Chris@726 644 }
Chris@726 645 } else {
Chris@726 646 // We don't have a three-dimensional sparse model,
Chris@726 647 // so use a note model. We do have some logic (in
Chris@726 648 // extractStructure below) for guessing whether
Chris@726 649 // this should after all have been a dense model,
Chris@726 650 // but it's hard to apply it because we don't have
Chris@726 651 // all the necessary timing data yet... hmm
Chris@726 652 model = new NoteModel(m_sampleRate, 1, false);
Chris@726 653 }
Chris@449 654
Chris@726 655 } else { // haveDuration
Chris@510 656
Chris@726 657 if (note || (dimensions > 2)) {
Chris@726 658 model = new NoteModel(m_sampleRate, 1, false);
Chris@726 659 } else {
Chris@726 660 // If our units are frequency or midi pitch, we
Chris@726 661 // should be using a note model... hm
Chris@726 662 model = new RegionModel(m_sampleRate, 1, false);
Chris@726 663 }
Chris@510 664 }
Chris@449 665
Chris@726 666 model->setRDFTypeURI(type);
Chris@449 667
Chris@726 668 if (m_audioModelMap.find(source) != m_audioModelMap.end()) {
Chris@843 669 cerr << "source model for " << model << " is " << m_audioModelMap[source] << endl;
Chris@1752 670 model->setSourceModel(m_audioModelMap[source]);
Chris@510 671 }
Chris@449 672
Chris@730 673 QString title = m_store->complete
Chris@730 674 (Triple(typ, expand("dc:title"), Node())).value;
Chris@726 675 if (title == "") {
Chris@726 676 // take it from the end of the event type
Chris@726 677 title = type;
Chris@726 678 title.replace(QRegExp("^.*[/#]"), "");
Chris@726 679 }
Chris@726 680 model->setObjectName(title);
Chris@449 681
Chris@1752 682 modelId = ModelById::add(std::shared_ptr<Model>(model));
Chris@1752 683 modelMap[timeline][type][dimensions][haveDuration] = modelId;
Chris@1752 684 models.push_back(modelId);
Chris@449 685 }
Chris@449 686
Chris@1752 687 modelId = modelMap[timeline][type][dimensions][haveDuration];
Chris@449 688
Chris@1752 689 if (!modelId.isNone()) {
Chris@1752 690 sv_frame_t ftime =
Chris@1752 691 RealTime::realTime2Frame(time, m_sampleRate);
Chris@1752 692 sv_frame_t fduration =
Chris@1752 693 RealTime::realTime2Frame(duration, m_sampleRate);
Chris@1752 694 fillModel(modelId, ftime, fduration,
Chris@1752 695 haveDuration, values, label);
Chris@449 696 }
Chris@449 697 }
Chris@439 698 }
Chris@439 699 }
Chris@439 700 }
Chris@439 701
Chris@439 702 void
Chris@1752 703 RDFImporterImpl::fillModel(ModelId modelId,
Chris@1039 704 sv_frame_t ftime,
Chris@1039 705 sv_frame_t fduration,
Chris@449 706 bool haveDuration,
Chris@449 707 std::vector<float> &values,
Chris@449 708 QString label)
Chris@449 709 {
Chris@690 710 // SVDEBUG << "RDFImporterImpl::fillModel: adding point at frame " << ftime << endl;
Chris@492 711
Chris@1752 712 if (auto sodm = ModelById::getAs<SparseOneDimensionalModel>(modelId)) {
Chris@1658 713 Event point(ftime, label);
Chris@1658 714 sodm->add(point);
Chris@449 715 return;
Chris@449 716 }
Chris@449 717
Chris@1752 718 if (auto tm = ModelById::getAs<TextModel>(modelId)) {
Chris@1661 719 Event e
Chris@510 720 (ftime,
Chris@510 721 values.empty() ? 0.5f : values[0] < 0.f ? 0.f : values[0] > 1.f ? 1.f : values[0], // I was young and feckless once too
Chris@510 722 label);
Chris@1661 723 tm->add(e);
Chris@510 724 return;
Chris@510 725 }
Chris@510 726
Chris@1752 727 if (auto stvm = ModelById::getAs<SparseTimeValueModel>(modelId)) {
Chris@1651 728 Event e(ftime, values.empty() ? 0.f : values[0], label);
Chris@1651 729 stvm->add(e);
Chris@449 730 return;
Chris@449 731 }
Chris@449 732
Chris@1752 733 if (auto nm = ModelById::getAs<NoteModel>(modelId)) {
Chris@449 734 if (haveDuration) {
Chris@449 735 float value = 0.f, level = 1.f;
Chris@449 736 if (!values.empty()) {
Chris@449 737 value = values[0];
Chris@449 738 if (values.size() > 1) {
Chris@449 739 level = values[1];
Chris@449 740 }
Chris@449 741 }
Chris@1644 742 Event e(ftime, value, fduration, level, label);
Chris@1644 743 nm->add(e);
Chris@449 744 } else {
Chris@449 745 float value = 0.f, duration = 1.f, level = 1.f;
Chris@449 746 if (!values.empty()) {
Chris@449 747 value = values[0];
Chris@449 748 if (values.size() > 1) {
Chris@449 749 duration = values[1];
Chris@449 750 if (values.size() > 2) {
Chris@449 751 level = values[2];
Chris@449 752 }
Chris@449 753 }
Chris@449 754 }
Chris@1644 755 Event e(ftime, value, sv_frame_t(lrintf(duration)),
Chris@1643 756 level, label);
Chris@1644 757 nm->add(e);
Chris@449 758 }
Chris@449 759 return;
Chris@449 760 }
Chris@449 761
Chris@1752 762 if (auto rm = ModelById::getAs<RegionModel>(modelId)) {
Chris@617 763 float value = 0.f;
Chris@617 764 if (values.empty()) {
Chris@617 765 // no values? map each unique label to a distinct value
Chris@1752 766 if (m_labelValueMap[modelId].find(label) == m_labelValueMap[modelId].end()) {
Chris@1752 767 m_labelValueMap[modelId][label] = rm->getValueMaximum() + 1.f;
Chris@617 768 }
Chris@1752 769 value = m_labelValueMap[modelId][label];
Chris@617 770 } else {
Chris@617 771 value = values[0];
Chris@617 772 }
Chris@449 773 if (haveDuration) {
Chris@1649 774 Event e(ftime, value, fduration, label);
Chris@1649 775 rm->add(e);
Chris@449 776 } else {
Chris@449 777 // This won't actually happen -- we only create region models
Chris@449 778 // if we do have duration -- but just for completeness
Chris@617 779 float duration = 1.f;
Chris@449 780 if (!values.empty()) {
Chris@449 781 value = values[0];
Chris@449 782 if (values.size() > 1) {
Chris@449 783 duration = values[1];
Chris@449 784 }
Chris@449 785 }
Chris@1649 786 Event e(ftime, value, sv_frame_t(lrintf(duration)), label);
Chris@1649 787 rm->add(e);
Chris@449 788 }
Chris@449 789 return;
Chris@449 790 }
Chris@449 791
Chris@843 792 cerr << "WARNING: RDFImporterImpl::fillModel: Unknown or unexpected model type" << endl;
Chris@449 793 return;
Chris@449 794 }
Chris@449 795
Chris@490 796 RDFImporter::RDFDocumentType
Chris@1852 797 RDFImporter::identifyDocumentType(QUrl url)
Chris@490 798 {
Chris@490 799 bool haveAudio = false;
Chris@490 800 bool haveAnnotations = false;
Chris@726 801 bool haveRDF = false;
Chris@449 802
Chris@1852 803 if (!isPlausibleDocumentOfAnyKind(url)) {
Chris@1852 804 return NotRDF;
Chris@1852 805 }
Chris@1852 806
Chris@1582 807 BasicStore *store = nullptr;
Chris@1852 808
Chris@726 809 // This is not expected to return anything useful, but if it does
Chris@726 810 // anything at all then we know we have RDF
Chris@726 811 try {
Chris@1852 812 store = BasicStore::load(url);
Chris@730 813 Triple t = store->matchOnce(Triple());
Chris@726 814 if (t != Triple()) haveRDF = true;
Chris@1471 815 } catch (std::exception &) {
Chris@738 816 // nothing; haveRDF will be false so the next bit catches it
Chris@726 817 }
Chris@726 818
Chris@726 819 if (!haveRDF) {
Chris@726 820 delete store;
Chris@499 821 return NotRDF;
Chris@499 822 }
Chris@499 823
Chris@726 824 store->addPrefix("mo", Uri("http://purl.org/ontology/mo/"));
Chris@726 825 store->addPrefix("event", Uri("http://purl.org/NET/c4dm/event.owl#"));
Chris@726 826 store->addPrefix("af", Uri("http://purl.org/ontology/af/"));
Chris@726 827
Chris@588 828 // "MO-conformant" structure for audio files
Chris@588 829
Chris@730 830 Node n = store->complete(Triple(Node(), Uri("a"), store->expand("mo:AudioFile")));
Chris@726 831 if (n != Node() && n.type == Node::URI) {
Chris@588 832
Chris@490 833 haveAudio = true;
Chris@588 834
Chris@588 835 } else {
Chris@588 836
Chris@588 837 // Sonic Annotator v0.2 and below used to write this structure
Chris@588 838 // (which is not properly in conformance with the Music
Chris@588 839 // Ontology)
Chris@588 840
Chris@730 841 Nodes sigs = store->match(Triple(Node(), Uri("a"), store->expand("mo:Signal"))).subjects();
Chris@726 842 foreach (Node sig, sigs) {
Chris@730 843 Node aa = store->complete(Triple(sig, store->expand("mo:available_as"), Node()));
Chris@726 844 if (aa != Node()) {
Chris@726 845 haveAudio = true;
Chris@726 846 break;
Chris@726 847 }
Chris@588 848 }
Chris@490 849 }
Chris@490 850
Chris@690 851 SVDEBUG << "NOTE: RDFImporter::identifyDocumentType: haveAudio = "
Chris@687 852 << haveAudio << endl;
Chris@616 853
Chris@736 854 // can't call complete() with two Nothing nodes
Chris@736 855 n = store->matchOnce(Triple(Node(), store->expand("event:time"), Node())).c;
Chris@726 856 if (n != Node()) {
Chris@490 857 haveAnnotations = true;
Chris@490 858 }
Chris@490 859
Chris@490 860 if (!haveAnnotations) {
Chris@736 861 // can't call complete() with two Nothing nodes
Chris@736 862 n = store->matchOnce(Triple(Node(), store->expand("af:signal_feature"), Node())).c;
Chris@726 863 if (n != Node()) {
Chris@490 864 haveAnnotations = true;
Chris@490 865 }
Chris@490 866 }
Chris@490 867
Chris@690 868 SVDEBUG << "NOTE: RDFImporter::identifyDocumentType: haveAnnotations = "
Chris@687 869 << haveAnnotations << endl;
Chris@616 870
Chris@726 871 delete store;
Chris@542 872
Chris@490 873 if (haveAudio) {
Chris@490 874 if (haveAnnotations) {
Chris@490 875 return AudioRefAndAnnotations;
Chris@490 876 } else {
Chris@490 877 return AudioRef;
Chris@490 878 }
Chris@490 879 } else {
Chris@490 880 if (haveAnnotations) {
Chris@490 881 return Annotations;
Chris@490 882 } else {
Chris@499 883 return OtherRDFDocument;
Chris@490 884 }
Chris@490 885 }
Chris@492 886
Chris@542 887 return OtherRDFDocument;
Chris@490 888 }
Chris@490 889
Chris@1852 890 bool
Chris@1852 891 RDFImporter::isPlausibleDocumentOfAnyKind(QUrl url)
Chris@1852 892 {
Chris@1852 893 // Return true if the document can be opened and contains some
Chris@1852 894 // sort of text, either UTF-8 (so it could be Turtle) or another
Chris@1852 895 // encoding that is recognised as XML
Chris@1852 896
Chris@1852 897 FileSource source(url);
Chris@1852 898
Chris@1852 899 if (!source.isAvailable()) {
Chris@1852 900 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Failed to retrieve document from " << url << endl;
Chris@1852 901 return false;
Chris@1852 902 }
Chris@1852 903
Chris@1852 904 QFile file(source.getLocalFilename());
Chris@1852 905 if (!file.open(QFile::ReadOnly)) {
Chris@1852 906 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Failed to open local file from " << source.getLocalFilename() << endl;
Chris@1852 907 return false;
Chris@1852 908 }
Chris@1852 909
Chris@1852 910 QByteArray bytes = file.read(200);
Chris@1852 911
Chris@1852 912 if (StringBits::isValidUtf8(bytes.toStdString(), true)) {
Chris@1852 913 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document appears to be UTF-8" << endl;
Chris@1852 914 return true; // good enough to be worth trying to parse
Chris@1852 915 }
Chris@1852 916
Chris@1852 917 QXmlInputSource xmlSource;
Chris@1852 918 xmlSource.setData(bytes); // guesses text encoding
Chris@1852 919
Chris@1852 920 if (xmlSource.data().startsWith("<?xml")) {
Chris@1852 921 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document appears to be XML" << endl;
Chris@1852 922 return true;
Chris@1852 923 }
Chris@1852 924
Chris@1852 925 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document is not UTF-8 and is not XML, rejecting" << endl;
Chris@1852 926 return false;
Chris@1852 927 }
Chris@1852 928