comparison rdf/RDFImporter.cpp @ 439:beb2948baa77

* Merge revisions 1041 to 1130 from sv-rdf-import branch
author Chris Cannam
date Thu, 18 Sep 2008 12:09:32 +0000
parents
children 5746c559af15
comparison
equal deleted inserted replaced
438:32c399d06374 439:beb2948baa77
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2008 QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "RDFImporter.h"
17
18 #include <map>
19 #include <vector>
20
21 #include <iostream>
22 #include <cmath>
23
24 #include "SimpleSPARQLQuery.h"
25
26 #include "base/ProgressReporter.h"
27 #include "base/RealTime.h"
28
29 #include "data/model/SparseOneDimensionalModel.h"
30 #include "data/model/SparseTimeValueModel.h"
31 #include "data/model/EditableDenseThreeDimensionalModel.h"
32
33 using std::cerr;
34 using std::endl;
35
36 class RDFImporterImpl
37 {
38 public:
39 RDFImporterImpl(QString url, int sampleRate);
40 virtual ~RDFImporterImpl();
41
42 bool isOK();
43 QString getErrorString() const;
44
45 std::vector<Model *> getDataModels(ProgressReporter *);
46
47 protected:
48 QString m_uristring;
49 QString m_errorString;
50 int m_sampleRate;
51
52 typedef std::vector<float> ValueList;
53 typedef std::map<RealTime, ValueList> TimeValueMap;
54 typedef std::map<QString, TimeValueMap> TypeTimeValueMap;
55 typedef std::map<QString, TypeTimeValueMap> SourceTypeTimeValueMap;
56
57 void extractStructure(const TimeValueMap &map, bool &sparse,
58 int &minValueCount, int &maxValueCount);
59
60 void fillModel(SparseOneDimensionalModel *, const TimeValueMap &);
61 void fillModel(SparseTimeValueModel *, const TimeValueMap &);
62 void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &);
63 };
64
65
66 QString
67 RDFImporter::getKnownExtensions()
68 {
69 return "*.rdf *.n3 *.ttl";
70 }
71
72 RDFImporter::RDFImporter(QString url, int sampleRate) :
73 m_d(new RDFImporterImpl(url, sampleRate))
74 {
75 }
76
77 RDFImporter::~RDFImporter()
78 {
79 delete m_d;
80 }
81
82 bool
83 RDFImporter::isOK()
84 {
85 return m_d->isOK();
86 }
87
88 QString
89 RDFImporter::getErrorString() const
90 {
91 return m_d->getErrorString();
92 }
93
94 std::vector<Model *>
95 RDFImporter::getDataModels(ProgressReporter *r)
96 {
97 return m_d->getDataModels(r);
98 }
99
100 RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) :
101 m_uristring(uri),
102 m_sampleRate(sampleRate)
103 {
104 }
105
106 RDFImporterImpl::~RDFImporterImpl()
107 {
108 }
109
110 bool
111 RDFImporterImpl::isOK()
112 {
113 return (m_errorString == "");
114 }
115
116 QString
117 RDFImporterImpl::getErrorString() const
118 {
119 return m_errorString;
120 }
121
122 std::vector<Model *>
123 RDFImporterImpl::getDataModels(ProgressReporter *reporter)
124 {
125 std::vector<Model *> models;
126
127 // Our query is intended to retrieve every thing that has a time,
128 // and every feature type and value associated with a thing that
129 // has a time.
130
131 // We will then need to refine this big bag of results into a set
132 // of data models.
133
134 // Results that have different source signals should go into
135 // different models.
136
137 // Results that have different feature types should go into
138 // different models.
139
140 // Results that are sparse should go into different models from
141 // those that are dense (we need to examine the timestamps to
142 // establish this -- if the timestamps are regular, the results
143 // are dense -- so we can't do it as we go along, only after
144 // collecting all results).
145
146 // Timed things that have features associated with them should not
147 // appear directly in any model -- their features should appear
148 // instead -- and these should be different models from those used
149 // for timed things that do not have features.
150
151 // As we load the results, we'll push them into a partially
152 // structured container that maps from source signal (URI as
153 // string) -> feature type (likewise) -> time -> list of values.
154 // If the source signal or feature type is unavailable, the empty
155 // string will do.
156
157 SourceTypeTimeValueMap m;
158
159 QString queryString = QString(
160
161 " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>"
162 " PREFIX time: <http://purl.org/NET/c4dm/timeline.owl#>"
163 " PREFIX mo: <http://purl.org/ontology/mo/>"
164 " PREFIX af: <http://purl.org/ontology/af/>"
165
166 " SELECT ?signalSource ?time ?eventType ?value"
167 " FROM <%1>"
168
169 " WHERE {"
170 " ?signal mo:available_as ?signalSource ."
171 " ?signal mo:time ?interval ."
172 " ?interval time:onTimeLine ?tl ."
173 " ?t time:onTimeLine ?tl ."
174 " ?t time:at ?time ."
175 " ?timedThing event:time ?t ."
176 " ?timedThing a ?eventType ."
177 " OPTIONAL {"
178 " ?timedThing af:hasFeature ?feature ."
179 " ?feature af:value ?value"
180 " }"
181 " }"
182
183 ).arg(m_uristring);
184
185 SimpleSPARQLQuery query(queryString);
186 query.setProgressReporter(reporter);
187
188 cerr << "Query will be: " << queryString.toStdString() << endl;
189
190 SimpleSPARQLQuery::ResultList results = query.execute();
191
192 if (!query.isOK()) {
193 m_errorString = query.getErrorString();
194 return models;
195 }
196
197 if (query.wasCancelled()) {
198 m_errorString = "Query cancelled";
199 return models;
200 }
201
202 for (int i = 0; i < results.size(); ++i) {
203
204 QString source = results[i]["signalSource"].value;
205
206 QString timestring = results[i]["time"].value;
207 RealTime time;
208 time = RealTime::fromXsdDuration(timestring.toStdString());
209 cerr << "time = " << time.toString() << " (from xsd:duration \""
210 << timestring.toStdString() << "\")" << endl;
211
212 QString type = results[i]["eventType"].value;
213
214 QString valuestring = results[i]["value"].value;
215 float value = 0.f;
216 bool haveValue = false;
217 if (valuestring != "") {
218 value = valuestring.toFloat(&haveValue);
219 cerr << "value = " << value << endl;
220 }
221
222 if (haveValue) {
223 m[source][type][time].push_back(value);
224 } else if (m[source][type].find(time) == m[source][type].end()) {
225 m[source][type][time] = ValueList();
226 }
227 }
228
229 for (SourceTypeTimeValueMap::const_iterator mi = m.begin();
230 mi != m.end(); ++mi) {
231
232 QString source = mi->first;
233
234 for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin();
235 ttvi != mi->second.end(); ++ttvi) {
236
237 QString type = ttvi->first;
238
239 // Now we need to work out what sort of model to use for
240 // this source/type combination. Ultimately we'll
241 // hopefully be able to map directly from the type to the
242 // model on the basis of known structures for the types,
243 // but we also want to be able to handle untyped data
244 // according to its apparent structure so let's do that
245 // first.
246
247 bool sparse = false;
248 int minValueCount = 0, maxValueCount = 0;
249
250 extractStructure(ttvi->second, sparse, minValueCount, maxValueCount);
251
252 cerr << "For source \"" << source.toStdString() << "\", type \""
253 << type.toStdString() << "\" we have sparse = " << sparse
254 << ", min value count = " << minValueCount << ", max = "
255 << maxValueCount << endl;
256
257 // Model allocations:
258 //
259 // Sparse, no values: SparseOneDimensionalModel
260 //
261 // Sparse, always 1 value: SparseTimeValueModel
262 //
263 // Sparse, > 1 value: No standard model for this. If
264 // there are always 2 values, perhaps hack it into
265 // NoteModel for now? Or always use SparseTimeValueModel
266 // and discard all but the first value.
267 //
268 // Dense, no values: Meaningless; no suitable model
269 //
270 // Dense, > 0 values: EditableDenseThreeDimensionalModel
271 //
272 // These should just be our fallback positions; we want to
273 // be reading semantic data from the RDF in order to pick
274 // the right model directly
275
276 enum { SODM, STVM, EDTDM } modelType = SODM;
277
278 if (sparse) {
279 if (maxValueCount == 0) {
280 modelType = SODM;
281 } else if (minValueCount == 1 && maxValueCount == 1) {
282 modelType = STVM;
283 } else {
284 cerr << "WARNING: No suitable model available for sparse data with between " << minValueCount << " and " << maxValueCount << " values" << endl;
285 modelType = STVM;
286 }
287 } else {
288 if (maxValueCount == 0) {
289 cerr << "WARNING: Dense data set with no values is not meaningful, skipping" << endl;
290 continue;
291 } else {
292 modelType = EDTDM;
293 }
294 }
295
296 //!!! set model name &c
297
298 if (modelType == SODM) {
299
300 SparseOneDimensionalModel *model =
301 new SparseOneDimensionalModel(m_sampleRate, 1, false);
302
303 fillModel(model, ttvi->second);
304 models.push_back(model);
305
306 } else if (modelType == STVM) {
307
308 SparseTimeValueModel *model =
309 new SparseTimeValueModel(m_sampleRate, 1, false);
310
311 fillModel(model, ttvi->second);
312 models.push_back(model);
313
314 } else {
315
316 EditableDenseThreeDimensionalModel *model =
317 new EditableDenseThreeDimensionalModel(m_sampleRate, 1, 0,
318 false);
319
320 fillModel(model, ttvi->second);
321 models.push_back(model);
322 }
323 }
324 }
325
326
327 return models;
328 }
329
330 void
331 RDFImporterImpl::extractStructure(const TimeValueMap &tvm,
332 bool &sparse,
333 int &minValueCount,
334 int &maxValueCount)
335 {
336 // These are floats intentionally rather than RealTime --
337 // see logic for handling rounding error below
338 float firstTime = 0.f;
339 float timeStep = 0.f;
340 bool haveTimeStep = false;
341
342 for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) {
343
344 RealTime time = tvi->first;
345 int valueCount = tvi->second.size();
346
347 if (tvi == tvm.begin()) {
348
349 minValueCount = valueCount;
350 maxValueCount = valueCount;
351
352 firstTime = time.toDouble();
353
354 } else {
355
356 if (valueCount < minValueCount) minValueCount = valueCount;
357 if (valueCount > maxValueCount) maxValueCount = valueCount;
358
359 if (!haveTimeStep) {
360 timeStep = time.toDouble() - firstTime;
361 if (timeStep == 0.f) sparse = true;
362 haveTimeStep = true;
363 } else if (!sparse) {
364 // test whether this time is within
365 // rounding-error range of being an integer
366 // multiple of some constant away from the
367 // first time
368 float timeAsFloat = time.toDouble();
369 int count = int((timeAsFloat - firstTime) / timeStep + 0.5);
370 float expected = firstTime + (timeStep * count);
371 if (fabsf(expected - timeAsFloat) > 1e-6) {
372 cerr << "Event at " << timeAsFloat << " is not evenly spaced -- would expect it to be " << expected << " for a spacing of " << count << " * " << timeStep << endl;
373 sparse = true;
374 }
375 }
376 }
377 }
378 }
379
380 void
381 RDFImporterImpl::fillModel(SparseOneDimensionalModel *model,
382 const TimeValueMap &tvm)
383 {
384 //!!! labels &c not yet handled
385
386 for (TimeValueMap::const_iterator tvi = tvm.begin();
387 tvi != tvm.end(); ++tvi) {
388
389 RealTime time = tvi->first;
390 long frame = RealTime::realTime2Frame(time, m_sampleRate);
391
392 SparseOneDimensionalModel::Point point(frame);
393
394 model->addPoint(point);
395 }
396 }
397
398 void
399 RDFImporterImpl::fillModel(SparseTimeValueModel *model,
400 const TimeValueMap &tvm)
401 {
402 //!!! labels &c not yet handled
403
404 for (TimeValueMap::const_iterator tvi = tvm.begin();
405 tvi != tvm.end(); ++tvi) {
406
407 RealTime time = tvi->first;
408 long frame = RealTime::realTime2Frame(time, m_sampleRate);
409
410 float value = 0.f;
411 if (!tvi->second.empty()) value = *tvi->second.begin();
412
413 SparseTimeValueModel::Point point(frame, value, "");
414
415 model->addPoint(point);
416 }
417 }
418
419 void
420 RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model,
421 const TimeValueMap &tvm)
422 {
423 //!!! labels &c not yet handled
424
425 //!!! start time offset not yet handled
426
427 size_t col = 0;
428
429 for (TimeValueMap::const_iterator tvi = tvm.begin();
430 tvi != tvm.end(); ++tvi) {
431
432 model->setColumn(col++, tvi->second);
433 }
434 }
435