annotate rdf/RDFImporter.cpp @ 439:beb2948baa77

* Merge revisions 1041 to 1130 from sv-rdf-import branch
author Chris Cannam
date Thu, 18 Sep 2008 12:09:32 +0000 (2008-09-18)
parents
children 5746c559af15
rev   line source
Chris@439 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@439 2
Chris@439 3 /*
Chris@439 4 Sonic Visualiser
Chris@439 5 An audio file viewer and annotation editor.
Chris@439 6 Centre for Digital Music, Queen Mary, University of London.
Chris@439 7 This file copyright 2008 QMUL.
Chris@439 8
Chris@439 9 This program is free software; you can redistribute it and/or
Chris@439 10 modify it under the terms of the GNU General Public License as
Chris@439 11 published by the Free Software Foundation; either version 2 of the
Chris@439 12 License, or (at your option) any later version. See the file
Chris@439 13 COPYING included with this distribution for more information.
Chris@439 14 */
Chris@439 15
Chris@439 16 #include "RDFImporter.h"
Chris@439 17
Chris@439 18 #include <map>
Chris@439 19 #include <vector>
Chris@439 20
Chris@439 21 #include <iostream>
Chris@439 22 #include <cmath>
Chris@439 23
Chris@439 24 #include "SimpleSPARQLQuery.h"
Chris@439 25
Chris@439 26 #include "base/ProgressReporter.h"
Chris@439 27 #include "base/RealTime.h"
Chris@439 28
Chris@439 29 #include "data/model/SparseOneDimensionalModel.h"
Chris@439 30 #include "data/model/SparseTimeValueModel.h"
Chris@439 31 #include "data/model/EditableDenseThreeDimensionalModel.h"
Chris@439 32
Chris@439 33 using std::cerr;
Chris@439 34 using std::endl;
Chris@439 35
Chris@439 36 class RDFImporterImpl
Chris@439 37 {
Chris@439 38 public:
Chris@439 39 RDFImporterImpl(QString url, int sampleRate);
Chris@439 40 virtual ~RDFImporterImpl();
Chris@439 41
Chris@439 42 bool isOK();
Chris@439 43 QString getErrorString() const;
Chris@439 44
Chris@439 45 std::vector<Model *> getDataModels(ProgressReporter *);
Chris@439 46
Chris@439 47 protected:
Chris@439 48 QString m_uristring;
Chris@439 49 QString m_errorString;
Chris@439 50 int m_sampleRate;
Chris@439 51
Chris@439 52 typedef std::vector<float> ValueList;
Chris@439 53 typedef std::map<RealTime, ValueList> TimeValueMap;
Chris@439 54 typedef std::map<QString, TimeValueMap> TypeTimeValueMap;
Chris@439 55 typedef std::map<QString, TypeTimeValueMap> SourceTypeTimeValueMap;
Chris@439 56
Chris@439 57 void extractStructure(const TimeValueMap &map, bool &sparse,
Chris@439 58 int &minValueCount, int &maxValueCount);
Chris@439 59
Chris@439 60 void fillModel(SparseOneDimensionalModel *, const TimeValueMap &);
Chris@439 61 void fillModel(SparseTimeValueModel *, const TimeValueMap &);
Chris@439 62 void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &);
Chris@439 63 };
Chris@439 64
Chris@439 65
Chris@439 66 QString
Chris@439 67 RDFImporter::getKnownExtensions()
Chris@439 68 {
Chris@439 69 return "*.rdf *.n3 *.ttl";
Chris@439 70 }
Chris@439 71
Chris@439 72 RDFImporter::RDFImporter(QString url, int sampleRate) :
Chris@439 73 m_d(new RDFImporterImpl(url, sampleRate))
Chris@439 74 {
Chris@439 75 }
Chris@439 76
Chris@439 77 RDFImporter::~RDFImporter()
Chris@439 78 {
Chris@439 79 delete m_d;
Chris@439 80 }
Chris@439 81
Chris@439 82 bool
Chris@439 83 RDFImporter::isOK()
Chris@439 84 {
Chris@439 85 return m_d->isOK();
Chris@439 86 }
Chris@439 87
Chris@439 88 QString
Chris@439 89 RDFImporter::getErrorString() const
Chris@439 90 {
Chris@439 91 return m_d->getErrorString();
Chris@439 92 }
Chris@439 93
Chris@439 94 std::vector<Model *>
Chris@439 95 RDFImporter::getDataModels(ProgressReporter *r)
Chris@439 96 {
Chris@439 97 return m_d->getDataModels(r);
Chris@439 98 }
Chris@439 99
Chris@439 100 RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) :
Chris@439 101 m_uristring(uri),
Chris@439 102 m_sampleRate(sampleRate)
Chris@439 103 {
Chris@439 104 }
Chris@439 105
Chris@439 106 RDFImporterImpl::~RDFImporterImpl()
Chris@439 107 {
Chris@439 108 }
Chris@439 109
Chris@439 110 bool
Chris@439 111 RDFImporterImpl::isOK()
Chris@439 112 {
Chris@439 113 return (m_errorString == "");
Chris@439 114 }
Chris@439 115
Chris@439 116 QString
Chris@439 117 RDFImporterImpl::getErrorString() const
Chris@439 118 {
Chris@439 119 return m_errorString;
Chris@439 120 }
Chris@439 121
Chris@439 122 std::vector<Model *>
Chris@439 123 RDFImporterImpl::getDataModels(ProgressReporter *reporter)
Chris@439 124 {
Chris@439 125 std::vector<Model *> models;
Chris@439 126
Chris@439 127 // Our query is intended to retrieve every thing that has a time,
Chris@439 128 // and every feature type and value associated with a thing that
Chris@439 129 // has a time.
Chris@439 130
Chris@439 131 // We will then need to refine this big bag of results into a set
Chris@439 132 // of data models.
Chris@439 133
Chris@439 134 // Results that have different source signals should go into
Chris@439 135 // different models.
Chris@439 136
Chris@439 137 // Results that have different feature types should go into
Chris@439 138 // different models.
Chris@439 139
Chris@439 140 // Results that are sparse should go into different models from
Chris@439 141 // those that are dense (we need to examine the timestamps to
Chris@439 142 // establish this -- if the timestamps are regular, the results
Chris@439 143 // are dense -- so we can't do it as we go along, only after
Chris@439 144 // collecting all results).
Chris@439 145
Chris@439 146 // Timed things that have features associated with them should not
Chris@439 147 // appear directly in any model -- their features should appear
Chris@439 148 // instead -- and these should be different models from those used
Chris@439 149 // for timed things that do not have features.
Chris@439 150
Chris@439 151 // As we load the results, we'll push them into a partially
Chris@439 152 // structured container that maps from source signal (URI as
Chris@439 153 // string) -> feature type (likewise) -> time -> list of values.
Chris@439 154 // If the source signal or feature type is unavailable, the empty
Chris@439 155 // string will do.
Chris@439 156
Chris@439 157 SourceTypeTimeValueMap m;
Chris@439 158
Chris@439 159 QString queryString = QString(
Chris@439 160
Chris@439 161 " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
Chris@439 162 " PREFIX time: <http://purl.org/NET/c4dm/timeline.owl#>"
Chris@439 163 " PREFIX mo: <http://purl.org/ontology/mo/>"
Chris@439 164 " PREFIX af: <http://purl.org/ontology/af/>"
Chris@439 165
Chris@439 166 " SELECT ?signalSource ?time ?eventType ?value"
Chris@439 167 " FROM <%1>"
Chris@439 168
Chris@439 169 " WHERE {"
Chris@439 170 " ?signal mo:available_as ?signalSource ."
Chris@439 171 " ?signal mo:time ?interval ."
Chris@439 172 " ?interval time:onTimeLine ?tl ."
Chris@439 173 " ?t time:onTimeLine ?tl ."
Chris@439 174 " ?t time:at ?time ."
Chris@439 175 " ?timedThing event:time ?t ."
Chris@439 176 " ?timedThing a ?eventType ."
Chris@439 177 " OPTIONAL {"
Chris@439 178 " ?timedThing af:hasFeature ?feature ."
Chris@439 179 " ?feature af:value ?value"
Chris@439 180 " }"
Chris@439 181 " }"
Chris@439 182
Chris@439 183 ).arg(m_uristring);
Chris@439 184
Chris@439 185 SimpleSPARQLQuery query(queryString);
Chris@439 186 query.setProgressReporter(reporter);
Chris@439 187
Chris@439 188 cerr << "Query will be: " << queryString.toStdString() << endl;
Chris@439 189
Chris@439 190 SimpleSPARQLQuery::ResultList results = query.execute();
Chris@439 191
Chris@439 192 if (!query.isOK()) {
Chris@439 193 m_errorString = query.getErrorString();
Chris@439 194 return models;
Chris@439 195 }
Chris@439 196
Chris@439 197 if (query.wasCancelled()) {
Chris@439 198 m_errorString = "Query cancelled";
Chris@439 199 return models;
Chris@439 200 }
Chris@439 201
Chris@439 202 for (int i = 0; i < results.size(); ++i) {
Chris@439 203
Chris@439 204 QString source = results[i]["signalSource"].value;
Chris@439 205
Chris@439 206 QString timestring = results[i]["time"].value;
Chris@439 207 RealTime time;
Chris@439 208 time = RealTime::fromXsdDuration(timestring.toStdString());
Chris@439 209 cerr << "time = " << time.toString() << " (from xsd:duration \""
Chris@439 210 << timestring.toStdString() << "\")" << endl;
Chris@439 211
Chris@439 212 QString type = results[i]["eventType"].value;
Chris@439 213
Chris@439 214 QString valuestring = results[i]["value"].value;
Chris@439 215 float value = 0.f;
Chris@439 216 bool haveValue = false;
Chris@439 217 if (valuestring != "") {
Chris@439 218 value = valuestring.toFloat(&haveValue);
Chris@439 219 cerr << "value = " << value << endl;
Chris@439 220 }
Chris@439 221
Chris@439 222 if (haveValue) {
Chris@439 223 m[source][type][time].push_back(value);
Chris@439 224 } else if (m[source][type].find(time) == m[source][type].end()) {
Chris@439 225 m[source][type][time] = ValueList();
Chris@439 226 }
Chris@439 227 }
Chris@439 228
Chris@439 229 for (SourceTypeTimeValueMap::const_iterator mi = m.begin();
Chris@439 230 mi != m.end(); ++mi) {
Chris@439 231
Chris@439 232 QString source = mi->first;
Chris@439 233
Chris@439 234 for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin();
Chris@439 235 ttvi != mi->second.end(); ++ttvi) {
Chris@439 236
Chris@439 237 QString type = ttvi->first;
Chris@439 238
Chris@439 239 // Now we need to work out what sort of model to use for
Chris@439 240 // this source/type combination. Ultimately we'll
Chris@439 241 // hopefully be able to map directly from the type to the
Chris@439 242 // model on the basis of known structures for the types,
Chris@439 243 // but we also want to be able to handle untyped data
Chris@439 244 // according to its apparent structure so let's do that
Chris@439 245 // first.
Chris@439 246
Chris@439 247 bool sparse = false;
Chris@439 248 int minValueCount = 0, maxValueCount = 0;
Chris@439 249
Chris@439 250 extractStructure(ttvi->second, sparse, minValueCount, maxValueCount);
Chris@439 251
Chris@439 252 cerr << "For source \"" << source.toStdString() << "\", type \""
Chris@439 253 << type.toStdString() << "\" we have sparse = " << sparse
Chris@439 254 << ", min value count = " << minValueCount << ", max = "
Chris@439 255 << maxValueCount << endl;
Chris@439 256
Chris@439 257 // Model allocations:
Chris@439 258 //
Chris@439 259 // Sparse, no values: SparseOneDimensionalModel
Chris@439 260 //
Chris@439 261 // Sparse, always 1 value: SparseTimeValueModel
Chris@439 262 //
Chris@439 263 // Sparse, > 1 value: No standard model for this. If
Chris@439 264 // there are always 2 values, perhaps hack it into
Chris@439 265 // NoteModel for now? Or always use SparseTimeValueModel
Chris@439 266 // and discard all but the first value.
Chris@439 267 //
Chris@439 268 // Dense, no values: Meaningless; no suitable model
Chris@439 269 //
Chris@439 270 // Dense, > 0 values: EditableDenseThreeDimensionalModel
Chris@439 271 //
Chris@439 272 // These should just be our fallback positions; we want to
Chris@439 273 // be reading semantic data from the RDF in order to pick
Chris@439 274 // the right model directly
Chris@439 275
Chris@439 276 enum { SODM, STVM, EDTDM } modelType = SODM;
Chris@439 277
Chris@439 278 if (sparse) {
Chris@439 279 if (maxValueCount == 0) {
Chris@439 280 modelType = SODM;
Chris@439 281 } else if (minValueCount == 1 && maxValueCount == 1) {
Chris@439 282 modelType = STVM;
Chris@439 283 } else {
Chris@439 284 cerr << "WARNING: No suitable model available for sparse data with between " << minValueCount << " and " << maxValueCount << " values" << endl;
Chris@439 285 modelType = STVM;
Chris@439 286 }
Chris@439 287 } else {
Chris@439 288 if (maxValueCount == 0) {
Chris@439 289 cerr << "WARNING: Dense data set with no values is not meaningful, skipping" << endl;
Chris@439 290 continue;
Chris@439 291 } else {
Chris@439 292 modelType = EDTDM;
Chris@439 293 }
Chris@439 294 }
Chris@439 295
Chris@439 296 //!!! set model name &c
Chris@439 297
Chris@439 298 if (modelType == SODM) {
Chris@439 299
Chris@439 300 SparseOneDimensionalModel *model =
Chris@439 301 new SparseOneDimensionalModel(m_sampleRate, 1, false);
Chris@439 302
Chris@439 303 fillModel(model, ttvi->second);
Chris@439 304 models.push_back(model);
Chris@439 305
Chris@439 306 } else if (modelType == STVM) {
Chris@439 307
Chris@439 308 SparseTimeValueModel *model =
Chris@439 309 new SparseTimeValueModel(m_sampleRate, 1, false);
Chris@439 310
Chris@439 311 fillModel(model, ttvi->second);
Chris@439 312 models.push_back(model);
Chris@439 313
Chris@439 314 } else {
Chris@439 315
Chris@439 316 EditableDenseThreeDimensionalModel *model =
Chris@439 317 new EditableDenseThreeDimensionalModel(m_sampleRate, 1, 0,
Chris@439 318 false);
Chris@439 319
Chris@439 320 fillModel(model, ttvi->second);
Chris@439 321 models.push_back(model);
Chris@439 322 }
Chris@439 323 }
Chris@439 324 }
Chris@439 325
Chris@439 326
Chris@439 327 return models;
Chris@439 328 }
Chris@439 329
Chris@439 330 void
Chris@439 331 RDFImporterImpl::extractStructure(const TimeValueMap &tvm,
Chris@439 332 bool &sparse,
Chris@439 333 int &minValueCount,
Chris@439 334 int &maxValueCount)
Chris@439 335 {
Chris@439 336 // These are floats intentionally rather than RealTime --
Chris@439 337 // see logic for handling rounding error below
Chris@439 338 float firstTime = 0.f;
Chris@439 339 float timeStep = 0.f;
Chris@439 340 bool haveTimeStep = false;
Chris@439 341
Chris@439 342 for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) {
Chris@439 343
Chris@439 344 RealTime time = tvi->first;
Chris@439 345 int valueCount = tvi->second.size();
Chris@439 346
Chris@439 347 if (tvi == tvm.begin()) {
Chris@439 348
Chris@439 349 minValueCount = valueCount;
Chris@439 350 maxValueCount = valueCount;
Chris@439 351
Chris@439 352 firstTime = time.toDouble();
Chris@439 353
Chris@439 354 } else {
Chris@439 355
Chris@439 356 if (valueCount < minValueCount) minValueCount = valueCount;
Chris@439 357 if (valueCount > maxValueCount) maxValueCount = valueCount;
Chris@439 358
Chris@439 359 if (!haveTimeStep) {
Chris@439 360 timeStep = time.toDouble() - firstTime;
Chris@439 361 if (timeStep == 0.f) sparse = true;
Chris@439 362 haveTimeStep = true;
Chris@439 363 } else if (!sparse) {
Chris@439 364 // test whether this time is within
Chris@439 365 // rounding-error range of being an integer
Chris@439 366 // multiple of some constant away from the
Chris@439 367 // first time
Chris@439 368 float timeAsFloat = time.toDouble();
Chris@439 369 int count = int((timeAsFloat - firstTime) / timeStep + 0.5);
Chris@439 370 float expected = firstTime + (timeStep * count);
Chris@439 371 if (fabsf(expected - timeAsFloat) > 1e-6) {
Chris@439 372 cerr << "Event at " << timeAsFloat << " is not evenly spaced -- would expect it to be " << expected << " for a spacing of " << count << " * " << timeStep << endl;
Chris@439 373 sparse = true;
Chris@439 374 }
Chris@439 375 }
Chris@439 376 }
Chris@439 377 }
Chris@439 378 }
Chris@439 379
Chris@439 380 void
Chris@439 381 RDFImporterImpl::fillModel(SparseOneDimensionalModel *model,
Chris@439 382 const TimeValueMap &tvm)
Chris@439 383 {
Chris@439 384 //!!! labels &c not yet handled
Chris@439 385
Chris@439 386 for (TimeValueMap::const_iterator tvi = tvm.begin();
Chris@439 387 tvi != tvm.end(); ++tvi) {
Chris@439 388
Chris@439 389 RealTime time = tvi->first;
Chris@439 390 long frame = RealTime::realTime2Frame(time, m_sampleRate);
Chris@439 391
Chris@439 392 SparseOneDimensionalModel::Point point(frame);
Chris@439 393
Chris@439 394 model->addPoint(point);
Chris@439 395 }
Chris@439 396 }
Chris@439 397
Chris@439 398 void
Chris@439 399 RDFImporterImpl::fillModel(SparseTimeValueModel *model,
Chris@439 400 const TimeValueMap &tvm)
Chris@439 401 {
Chris@439 402 //!!! labels &c not yet handled
Chris@439 403
Chris@439 404 for (TimeValueMap::const_iterator tvi = tvm.begin();
Chris@439 405 tvi != tvm.end(); ++tvi) {
Chris@439 406
Chris@439 407 RealTime time = tvi->first;
Chris@439 408 long frame = RealTime::realTime2Frame(time, m_sampleRate);
Chris@439 409
Chris@439 410 float value = 0.f;
Chris@439 411 if (!tvi->second.empty()) value = *tvi->second.begin();
Chris@439 412
Chris@439 413 SparseTimeValueModel::Point point(frame, value, "");
Chris@439 414
Chris@439 415 model->addPoint(point);
Chris@439 416 }
Chris@439 417 }
Chris@439 418
Chris@439 419 void
Chris@439 420 RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model,
Chris@439 421 const TimeValueMap &tvm)
Chris@439 422 {
Chris@439 423 //!!! labels &c not yet handled
Chris@439 424
Chris@439 425 //!!! start time offset not yet handled
Chris@439 426
Chris@439 427 size_t col = 0;
Chris@439 428
Chris@439 429 for (TimeValueMap::const_iterator tvi = tvm.begin();
Chris@439 430 tvi != tvm.end(); ++tvi) {
Chris@439 431
Chris@439 432 model->setColumn(col++, tvi->second);
Chris@439 433 }
Chris@439 434 }
Chris@439 435