Mercurial > hg > svcore
comparison rdf/RDFImporter.cpp @ 439:beb2948baa77
* Merge revisions 1041 to 1130 from sv-rdf-import branch
author | Chris Cannam |
---|---|
date | Thu, 18 Sep 2008 12:09:32 +0000 |
parents | |
children | 5746c559af15 |
comparison
equal
deleted
inserted
replaced
438:32c399d06374 | 439:beb2948baa77 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Sonic Visualiser | |
5 An audio file viewer and annotation editor. | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2008 QMUL. | |
8 | |
9 This program is free software; you can redistribute it and/or | |
10 modify it under the terms of the GNU General Public License as | |
11 published by the Free Software Foundation; either version 2 of the | |
12 License, or (at your option) any later version. See the file | |
13 COPYING included with this distribution for more information. | |
14 */ | |
15 | |
16 #include "RDFImporter.h" | |
17 | |
18 #include <map> | |
19 #include <vector> | |
20 | |
21 #include <iostream> | |
22 #include <cmath> | |
23 | |
24 #include "SimpleSPARQLQuery.h" | |
25 | |
26 #include "base/ProgressReporter.h" | |
27 #include "base/RealTime.h" | |
28 | |
29 #include "data/model/SparseOneDimensionalModel.h" | |
30 #include "data/model/SparseTimeValueModel.h" | |
31 #include "data/model/EditableDenseThreeDimensionalModel.h" | |
32 | |
33 using std::cerr; | |
34 using std::endl; | |
35 | |
36 class RDFImporterImpl | |
37 { | |
38 public: | |
39 RDFImporterImpl(QString url, int sampleRate); | |
40 virtual ~RDFImporterImpl(); | |
41 | |
42 bool isOK(); | |
43 QString getErrorString() const; | |
44 | |
45 std::vector<Model *> getDataModels(ProgressReporter *); | |
46 | |
47 protected: | |
48 QString m_uristring; | |
49 QString m_errorString; | |
50 int m_sampleRate; | |
51 | |
52 typedef std::vector<float> ValueList; | |
53 typedef std::map<RealTime, ValueList> TimeValueMap; | |
54 typedef std::map<QString, TimeValueMap> TypeTimeValueMap; | |
55 typedef std::map<QString, TypeTimeValueMap> SourceTypeTimeValueMap; | |
56 | |
57 void extractStructure(const TimeValueMap &map, bool &sparse, | |
58 int &minValueCount, int &maxValueCount); | |
59 | |
60 void fillModel(SparseOneDimensionalModel *, const TimeValueMap &); | |
61 void fillModel(SparseTimeValueModel *, const TimeValueMap &); | |
62 void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &); | |
63 }; | |
64 | |
65 | |
66 QString | |
67 RDFImporter::getKnownExtensions() | |
68 { | |
69 return "*.rdf *.n3 *.ttl"; | |
70 } | |
71 | |
72 RDFImporter::RDFImporter(QString url, int sampleRate) : | |
73 m_d(new RDFImporterImpl(url, sampleRate)) | |
74 { | |
75 } | |
76 | |
77 RDFImporter::~RDFImporter() | |
78 { | |
79 delete m_d; | |
80 } | |
81 | |
82 bool | |
83 RDFImporter::isOK() | |
84 { | |
85 return m_d->isOK(); | |
86 } | |
87 | |
88 QString | |
89 RDFImporter::getErrorString() const | |
90 { | |
91 return m_d->getErrorString(); | |
92 } | |
93 | |
94 std::vector<Model *> | |
95 RDFImporter::getDataModels(ProgressReporter *r) | |
96 { | |
97 return m_d->getDataModels(r); | |
98 } | |
99 | |
100 RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) : | |
101 m_uristring(uri), | |
102 m_sampleRate(sampleRate) | |
103 { | |
104 } | |
105 | |
106 RDFImporterImpl::~RDFImporterImpl() | |
107 { | |
108 } | |
109 | |
110 bool | |
111 RDFImporterImpl::isOK() | |
112 { | |
113 return (m_errorString == ""); | |
114 } | |
115 | |
116 QString | |
117 RDFImporterImpl::getErrorString() const | |
118 { | |
119 return m_errorString; | |
120 } | |
121 | |
122 std::vector<Model *> | |
123 RDFImporterImpl::getDataModels(ProgressReporter *reporter) | |
124 { | |
125 std::vector<Model *> models; | |
126 | |
127 // Our query is intended to retrieve every thing that has a time, | |
128 // and every feature type and value associated with a thing that | |
129 // has a time. | |
130 | |
131 // We will then need to refine this big bag of results into a set | |
132 // of data models. | |
133 | |
134 // Results that have different source signals should go into | |
135 // different models. | |
136 | |
137 // Results that have different feature types should go into | |
138 // different models. | |
139 | |
140 // Results that are sparse should go into different models from | |
141 // those that are dense (we need to examine the timestamps to | |
142 // establish this -- if the timestamps are regular, the results | |
143 // are dense -- so we can't do it as we go along, only after | |
144 // collecting all results). | |
145 | |
146 // Timed things that have features associated with them should not | |
147 // appear directly in any model -- their features should appear | |
148 // instead -- and these should be different models from those used | |
149 // for timed things that do not have features. | |
150 | |
151 // As we load the results, we'll push them into a partially | |
152 // structured container that maps from source signal (URI as | |
153 // string) -> feature type (likewise) -> time -> list of values. | |
154 // If the source signal or feature type is unavailable, the empty | |
155 // string will do. | |
156 | |
157 SourceTypeTimeValueMap m; | |
158 | |
159 QString queryString = QString( | |
160 | |
161 " PREFIX event: <http://purl.org/NET/c4dm/event.owl#>" | |
162 " PREFIX time: <http://purl.org/NET/c4dm/timeline.owl#>" | |
163 " PREFIX mo: <http://purl.org/ontology/mo/>" | |
164 " PREFIX af: <http://purl.org/ontology/af/>" | |
165 | |
166 " SELECT ?signalSource ?time ?eventType ?value" | |
167 " FROM <%1>" | |
168 | |
169 " WHERE {" | |
170 " ?signal mo:available_as ?signalSource ." | |
171 " ?signal mo:time ?interval ." | |
172 " ?interval time:onTimeLine ?tl ." | |
173 " ?t time:onTimeLine ?tl ." | |
174 " ?t time:at ?time ." | |
175 " ?timedThing event:time ?t ." | |
176 " ?timedThing a ?eventType ." | |
177 " OPTIONAL {" | |
178 " ?timedThing af:hasFeature ?feature ." | |
179 " ?feature af:value ?value" | |
180 " }" | |
181 " }" | |
182 | |
183 ).arg(m_uristring); | |
184 | |
185 SimpleSPARQLQuery query(queryString); | |
186 query.setProgressReporter(reporter); | |
187 | |
188 cerr << "Query will be: " << queryString.toStdString() << endl; | |
189 | |
190 SimpleSPARQLQuery::ResultList results = query.execute(); | |
191 | |
192 if (!query.isOK()) { | |
193 m_errorString = query.getErrorString(); | |
194 return models; | |
195 } | |
196 | |
197 if (query.wasCancelled()) { | |
198 m_errorString = "Query cancelled"; | |
199 return models; | |
200 } | |
201 | |
202 for (int i = 0; i < results.size(); ++i) { | |
203 | |
204 QString source = results[i]["signalSource"].value; | |
205 | |
206 QString timestring = results[i]["time"].value; | |
207 RealTime time; | |
208 time = RealTime::fromXsdDuration(timestring.toStdString()); | |
209 cerr << "time = " << time.toString() << " (from xsd:duration \"" | |
210 << timestring.toStdString() << "\")" << endl; | |
211 | |
212 QString type = results[i]["eventType"].value; | |
213 | |
214 QString valuestring = results[i]["value"].value; | |
215 float value = 0.f; | |
216 bool haveValue = false; | |
217 if (valuestring != "") { | |
218 value = valuestring.toFloat(&haveValue); | |
219 cerr << "value = " << value << endl; | |
220 } | |
221 | |
222 if (haveValue) { | |
223 m[source][type][time].push_back(value); | |
224 } else if (m[source][type].find(time) == m[source][type].end()) { | |
225 m[source][type][time] = ValueList(); | |
226 } | |
227 } | |
228 | |
229 for (SourceTypeTimeValueMap::const_iterator mi = m.begin(); | |
230 mi != m.end(); ++mi) { | |
231 | |
232 QString source = mi->first; | |
233 | |
234 for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin(); | |
235 ttvi != mi->second.end(); ++ttvi) { | |
236 | |
237 QString type = ttvi->first; | |
238 | |
239 // Now we need to work out what sort of model to use for | |
240 // this source/type combination. Ultimately we'll | |
241 // hopefully be able to map directly from the type to the | |
242 // model on the basis of known structures for the types, | |
243 // but we also want to be able to handle untyped data | |
244 // according to its apparent structure so let's do that | |
245 // first. | |
246 | |
247 bool sparse = false; | |
248 int minValueCount = 0, maxValueCount = 0; | |
249 | |
250 extractStructure(ttvi->second, sparse, minValueCount, maxValueCount); | |
251 | |
252 cerr << "For source \"" << source.toStdString() << "\", type \"" | |
253 << type.toStdString() << "\" we have sparse = " << sparse | |
254 << ", min value count = " << minValueCount << ", max = " | |
255 << maxValueCount << endl; | |
256 | |
257 // Model allocations: | |
258 // | |
259 // Sparse, no values: SparseOneDimensionalModel | |
260 // | |
261 // Sparse, always 1 value: SparseTimeValueModel | |
262 // | |
263 // Sparse, > 1 value: No standard model for this. If | |
264 // there are always 2 values, perhaps hack it into | |
265 // NoteModel for now? Or always use SparseTimeValueModel | |
266 // and discard all but the first value. | |
267 // | |
268 // Dense, no values: Meaningless; no suitable model | |
269 // | |
270 // Dense, > 0 values: EditableDenseThreeDimensionalModel | |
271 // | |
272 // These should just be our fallback positions; we want to | |
273 // be reading semantic data from the RDF in order to pick | |
274 // the right model directly | |
275 | |
276 enum { SODM, STVM, EDTDM } modelType = SODM; | |
277 | |
278 if (sparse) { | |
279 if (maxValueCount == 0) { | |
280 modelType = SODM; | |
281 } else if (minValueCount == 1 && maxValueCount == 1) { | |
282 modelType = STVM; | |
283 } else { | |
284 cerr << "WARNING: No suitable model available for sparse data with between " << minValueCount << " and " << maxValueCount << " values" << endl; | |
285 modelType = STVM; | |
286 } | |
287 } else { | |
288 if (maxValueCount == 0) { | |
289 cerr << "WARNING: Dense data set with no values is not meaningful, skipping" << endl; | |
290 continue; | |
291 } else { | |
292 modelType = EDTDM; | |
293 } | |
294 } | |
295 | |
296 //!!! set model name &c | |
297 | |
298 if (modelType == SODM) { | |
299 | |
300 SparseOneDimensionalModel *model = | |
301 new SparseOneDimensionalModel(m_sampleRate, 1, false); | |
302 | |
303 fillModel(model, ttvi->second); | |
304 models.push_back(model); | |
305 | |
306 } else if (modelType == STVM) { | |
307 | |
308 SparseTimeValueModel *model = | |
309 new SparseTimeValueModel(m_sampleRate, 1, false); | |
310 | |
311 fillModel(model, ttvi->second); | |
312 models.push_back(model); | |
313 | |
314 } else { | |
315 | |
316 EditableDenseThreeDimensionalModel *model = | |
317 new EditableDenseThreeDimensionalModel(m_sampleRate, 1, 0, | |
318 false); | |
319 | |
320 fillModel(model, ttvi->second); | |
321 models.push_back(model); | |
322 } | |
323 } | |
324 } | |
325 | |
326 | |
327 return models; | |
328 } | |
329 | |
330 void | |
331 RDFImporterImpl::extractStructure(const TimeValueMap &tvm, | |
332 bool &sparse, | |
333 int &minValueCount, | |
334 int &maxValueCount) | |
335 { | |
336 // These are floats intentionally rather than RealTime -- | |
337 // see logic for handling rounding error below | |
338 float firstTime = 0.f; | |
339 float timeStep = 0.f; | |
340 bool haveTimeStep = false; | |
341 | |
342 for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) { | |
343 | |
344 RealTime time = tvi->first; | |
345 int valueCount = tvi->second.size(); | |
346 | |
347 if (tvi == tvm.begin()) { | |
348 | |
349 minValueCount = valueCount; | |
350 maxValueCount = valueCount; | |
351 | |
352 firstTime = time.toDouble(); | |
353 | |
354 } else { | |
355 | |
356 if (valueCount < minValueCount) minValueCount = valueCount; | |
357 if (valueCount > maxValueCount) maxValueCount = valueCount; | |
358 | |
359 if (!haveTimeStep) { | |
360 timeStep = time.toDouble() - firstTime; | |
361 if (timeStep == 0.f) sparse = true; | |
362 haveTimeStep = true; | |
363 } else if (!sparse) { | |
364 // test whether this time is within | |
365 // rounding-error range of being an integer | |
366 // multiple of some constant away from the | |
367 // first time | |
368 float timeAsFloat = time.toDouble(); | |
369 int count = int((timeAsFloat - firstTime) / timeStep + 0.5); | |
370 float expected = firstTime + (timeStep * count); | |
371 if (fabsf(expected - timeAsFloat) > 1e-6) { | |
372 cerr << "Event at " << timeAsFloat << " is not evenly spaced -- would expect it to be " << expected << " for a spacing of " << count << " * " << timeStep << endl; | |
373 sparse = true; | |
374 } | |
375 } | |
376 } | |
377 } | |
378 } | |
379 | |
380 void | |
381 RDFImporterImpl::fillModel(SparseOneDimensionalModel *model, | |
382 const TimeValueMap &tvm) | |
383 { | |
384 //!!! labels &c not yet handled | |
385 | |
386 for (TimeValueMap::const_iterator tvi = tvm.begin(); | |
387 tvi != tvm.end(); ++tvi) { | |
388 | |
389 RealTime time = tvi->first; | |
390 long frame = RealTime::realTime2Frame(time, m_sampleRate); | |
391 | |
392 SparseOneDimensionalModel::Point point(frame); | |
393 | |
394 model->addPoint(point); | |
395 } | |
396 } | |
397 | |
398 void | |
399 RDFImporterImpl::fillModel(SparseTimeValueModel *model, | |
400 const TimeValueMap &tvm) | |
401 { | |
402 //!!! labels &c not yet handled | |
403 | |
404 for (TimeValueMap::const_iterator tvi = tvm.begin(); | |
405 tvi != tvm.end(); ++tvi) { | |
406 | |
407 RealTime time = tvi->first; | |
408 long frame = RealTime::realTime2Frame(time, m_sampleRate); | |
409 | |
410 float value = 0.f; | |
411 if (!tvi->second.empty()) value = *tvi->second.begin(); | |
412 | |
413 SparseTimeValueModel::Point point(frame, value, ""); | |
414 | |
415 model->addPoint(point); | |
416 } | |
417 } | |
418 | |
419 void | |
420 RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model, | |
421 const TimeValueMap &tvm) | |
422 { | |
423 //!!! labels &c not yet handled | |
424 | |
425 //!!! start time offset not yet handled | |
426 | |
427 size_t col = 0; | |
428 | |
429 for (TimeValueMap::const_iterator tvi = tvm.begin(); | |
430 tvi != tvm.end(); ++tvi) { | |
431 | |
432 model->setColumn(col++, tvi->second); | |
433 } | |
434 } | |
435 |