Chris@439: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@439: Chris@439: /* Chris@439: Sonic Visualiser Chris@439: An audio file viewer and annotation editor. Chris@439: Centre for Digital Music, Queen Mary, University of London. Chris@439: This file copyright 2008 QMUL. Chris@439: Chris@439: This program is free software; you can redistribute it and/or Chris@439: modify it under the terms of the GNU General Public License as Chris@439: published by the Free Software Foundation; either version 2 of the Chris@439: License, or (at your option) any later version. See the file Chris@439: COPYING included with this distribution for more information. Chris@439: */ Chris@439: Chris@439: #include "RDFImporter.h" Chris@439: Chris@439: #include Chris@439: #include Chris@439: Chris@439: #include Chris@439: #include Chris@439: Chris@439: #include "SimpleSPARQLQuery.h" Chris@439: Chris@439: #include "base/ProgressReporter.h" Chris@439: #include "base/RealTime.h" Chris@439: Chris@439: #include "data/model/SparseOneDimensionalModel.h" Chris@439: #include "data/model/SparseTimeValueModel.h" Chris@439: #include "data/model/EditableDenseThreeDimensionalModel.h" Chris@439: Chris@439: using std::cerr; Chris@439: using std::endl; Chris@439: Chris@439: class RDFImporterImpl Chris@439: { Chris@439: public: Chris@439: RDFImporterImpl(QString url, int sampleRate); Chris@439: virtual ~RDFImporterImpl(); Chris@439: Chris@439: bool isOK(); Chris@439: QString getErrorString() const; Chris@439: Chris@439: std::vector getDataModels(ProgressReporter *); Chris@439: Chris@439: protected: Chris@439: QString m_uristring; Chris@439: QString m_errorString; Chris@439: int m_sampleRate; Chris@439: Chris@439: typedef std::vector ValueList; Chris@439: typedef std::map TimeValueMap; Chris@439: typedef std::map TypeTimeValueMap; Chris@439: typedef std::map SourceTypeTimeValueMap; Chris@439: Chris@439: void extractStructure(const TimeValueMap &map, bool &sparse, Chris@439: int &minValueCount, int &maxValueCount); Chris@439: Chris@439: void fillModel(SparseOneDimensionalModel *, const TimeValueMap &); Chris@439: void fillModel(SparseTimeValueModel *, const TimeValueMap &); Chris@439: void fillModel(EditableDenseThreeDimensionalModel *, const TimeValueMap &); Chris@439: }; Chris@439: Chris@439: Chris@439: QString Chris@439: RDFImporter::getKnownExtensions() Chris@439: { Chris@439: return "*.rdf *.n3 *.ttl"; Chris@439: } Chris@439: Chris@439: RDFImporter::RDFImporter(QString url, int sampleRate) : Chris@439: m_d(new RDFImporterImpl(url, sampleRate)) Chris@439: { Chris@439: } Chris@439: Chris@439: RDFImporter::~RDFImporter() Chris@439: { Chris@439: delete m_d; Chris@439: } Chris@439: Chris@439: bool Chris@439: RDFImporter::isOK() Chris@439: { Chris@439: return m_d->isOK(); Chris@439: } Chris@439: Chris@439: QString Chris@439: RDFImporter::getErrorString() const Chris@439: { Chris@439: return m_d->getErrorString(); Chris@439: } Chris@439: Chris@439: std::vector Chris@439: RDFImporter::getDataModels(ProgressReporter *r) Chris@439: { Chris@439: return m_d->getDataModels(r); Chris@439: } Chris@439: Chris@439: RDFImporterImpl::RDFImporterImpl(QString uri, int sampleRate) : Chris@439: m_uristring(uri), Chris@439: m_sampleRate(sampleRate) Chris@439: { Chris@439: } Chris@439: Chris@439: RDFImporterImpl::~RDFImporterImpl() Chris@439: { Chris@439: } Chris@439: Chris@439: bool Chris@439: RDFImporterImpl::isOK() Chris@439: { Chris@439: return (m_errorString == ""); Chris@439: } Chris@439: Chris@439: QString Chris@439: RDFImporterImpl::getErrorString() const Chris@439: { Chris@439: return m_errorString; Chris@439: } Chris@439: Chris@439: std::vector Chris@439: RDFImporterImpl::getDataModels(ProgressReporter *reporter) Chris@439: { Chris@439: std::vector models; Chris@439: Chris@439: // Our query is intended to retrieve every thing that has a time, Chris@439: // and every feature type and value associated with a thing that Chris@439: // has a time. Chris@439: Chris@439: // We will then need to refine this big bag of results into a set Chris@439: // of data models. Chris@439: Chris@439: // Results that have different source signals should go into Chris@439: // different models. Chris@439: Chris@439: // Results that have different feature types should go into Chris@439: // different models. Chris@439: Chris@439: // Results that are sparse should go into different models from Chris@439: // those that are dense (we need to examine the timestamps to Chris@439: // establish this -- if the timestamps are regular, the results Chris@439: // are dense -- so we can't do it as we go along, only after Chris@439: // collecting all results). Chris@439: Chris@439: // Timed things that have features associated with them should not Chris@439: // appear directly in any model -- their features should appear Chris@439: // instead -- and these should be different models from those used Chris@439: // for timed things that do not have features. Chris@439: Chris@439: // As we load the results, we'll push them into a partially Chris@439: // structured container that maps from source signal (URI as Chris@439: // string) -> feature type (likewise) -> time -> list of values. Chris@439: // If the source signal or feature type is unavailable, the empty Chris@439: // string will do. Chris@439: Chris@439: SourceTypeTimeValueMap m; Chris@439: Chris@439: QString queryString = QString( Chris@439: Chris@439: " PREFIX event: " Chris@439: " PREFIX time: " Chris@439: " PREFIX mo: " Chris@439: " PREFIX af: " Chris@439: Chris@439: " SELECT ?signalSource ?time ?eventType ?value" Chris@439: " FROM <%1>" Chris@439: Chris@439: " WHERE {" Chris@439: " ?signal mo:available_as ?signalSource ." Chris@439: " ?signal mo:time ?interval ." Chris@439: " ?interval time:onTimeLine ?tl ." Chris@439: " ?t time:onTimeLine ?tl ." Chris@439: " ?t time:at ?time ." Chris@439: " ?timedThing event:time ?t ." Chris@439: " ?timedThing a ?eventType ." Chris@439: " OPTIONAL {" Chris@439: " ?timedThing af:hasFeature ?feature ." Chris@439: " ?feature af:value ?value" Chris@439: " }" Chris@439: " }" Chris@439: Chris@439: ).arg(m_uristring); Chris@439: Chris@439: SimpleSPARQLQuery query(queryString); Chris@439: query.setProgressReporter(reporter); Chris@439: Chris@439: cerr << "Query will be: " << queryString.toStdString() << endl; Chris@439: Chris@439: SimpleSPARQLQuery::ResultList results = query.execute(); Chris@439: Chris@439: if (!query.isOK()) { Chris@439: m_errorString = query.getErrorString(); Chris@439: return models; Chris@439: } Chris@439: Chris@439: if (query.wasCancelled()) { Chris@439: m_errorString = "Query cancelled"; Chris@439: return models; Chris@439: } Chris@439: Chris@439: for (int i = 0; i < results.size(); ++i) { Chris@439: Chris@439: QString source = results[i]["signalSource"].value; Chris@439: Chris@439: QString timestring = results[i]["time"].value; Chris@439: RealTime time; Chris@439: time = RealTime::fromXsdDuration(timestring.toStdString()); Chris@439: cerr << "time = " << time.toString() << " (from xsd:duration \"" Chris@439: << timestring.toStdString() << "\")" << endl; Chris@439: Chris@439: QString type = results[i]["eventType"].value; Chris@439: Chris@439: QString valuestring = results[i]["value"].value; Chris@439: float value = 0.f; Chris@439: bool haveValue = false; Chris@439: if (valuestring != "") { Chris@439: value = valuestring.toFloat(&haveValue); Chris@439: cerr << "value = " << value << endl; Chris@439: } Chris@439: Chris@439: if (haveValue) { Chris@439: m[source][type][time].push_back(value); Chris@439: } else if (m[source][type].find(time) == m[source][type].end()) { Chris@439: m[source][type][time] = ValueList(); Chris@439: } Chris@439: } Chris@439: Chris@439: for (SourceTypeTimeValueMap::const_iterator mi = m.begin(); Chris@439: mi != m.end(); ++mi) { Chris@439: Chris@439: QString source = mi->first; Chris@439: Chris@439: for (TypeTimeValueMap::const_iterator ttvi = mi->second.begin(); Chris@439: ttvi != mi->second.end(); ++ttvi) { Chris@439: Chris@439: QString type = ttvi->first; Chris@439: Chris@439: // Now we need to work out what sort of model to use for Chris@439: // this source/type combination. Ultimately we'll Chris@439: // hopefully be able to map directly from the type to the Chris@439: // model on the basis of known structures for the types, Chris@439: // but we also want to be able to handle untyped data Chris@439: // according to its apparent structure so let's do that Chris@439: // first. Chris@439: Chris@439: bool sparse = false; Chris@439: int minValueCount = 0, maxValueCount = 0; Chris@439: Chris@439: extractStructure(ttvi->second, sparse, minValueCount, maxValueCount); Chris@439: Chris@439: cerr << "For source \"" << source.toStdString() << "\", type \"" Chris@439: << type.toStdString() << "\" we have sparse = " << sparse Chris@439: << ", min value count = " << minValueCount << ", max = " Chris@439: << maxValueCount << endl; Chris@439: Chris@439: // Model allocations: Chris@439: // Chris@439: // Sparse, no values: SparseOneDimensionalModel Chris@439: // Chris@439: // Sparse, always 1 value: SparseTimeValueModel Chris@439: // Chris@439: // Sparse, > 1 value: No standard model for this. If Chris@439: // there are always 2 values, perhaps hack it into Chris@439: // NoteModel for now? Or always use SparseTimeValueModel Chris@439: // and discard all but the first value. Chris@439: // Chris@439: // Dense, no values: Meaningless; no suitable model Chris@439: // Chris@439: // Dense, > 0 values: EditableDenseThreeDimensionalModel Chris@439: // Chris@439: // These should just be our fallback positions; we want to Chris@439: // be reading semantic data from the RDF in order to pick Chris@439: // the right model directly Chris@439: Chris@439: enum { SODM, STVM, EDTDM } modelType = SODM; Chris@439: Chris@439: if (sparse) { Chris@439: if (maxValueCount == 0) { Chris@439: modelType = SODM; Chris@439: } else if (minValueCount == 1 && maxValueCount == 1) { Chris@439: modelType = STVM; Chris@439: } else { Chris@439: cerr << "WARNING: No suitable model available for sparse data with between " << minValueCount << " and " << maxValueCount << " values" << endl; Chris@439: modelType = STVM; Chris@439: } Chris@439: } else { Chris@439: if (maxValueCount == 0) { Chris@439: cerr << "WARNING: Dense data set with no values is not meaningful, skipping" << endl; Chris@439: continue; Chris@439: } else { Chris@439: modelType = EDTDM; Chris@439: } Chris@439: } Chris@439: Chris@439: //!!! set model name &c Chris@439: Chris@439: if (modelType == SODM) { Chris@439: Chris@439: SparseOneDimensionalModel *model = Chris@439: new SparseOneDimensionalModel(m_sampleRate, 1, false); Chris@439: Chris@439: fillModel(model, ttvi->second); Chris@439: models.push_back(model); Chris@439: Chris@439: } else if (modelType == STVM) { Chris@439: Chris@439: SparseTimeValueModel *model = Chris@439: new SparseTimeValueModel(m_sampleRate, 1, false); Chris@439: Chris@439: fillModel(model, ttvi->second); Chris@439: models.push_back(model); Chris@439: Chris@439: } else { Chris@439: Chris@439: EditableDenseThreeDimensionalModel *model = Chris@439: new EditableDenseThreeDimensionalModel(m_sampleRate, 1, 0, Chris@439: false); Chris@439: Chris@439: fillModel(model, ttvi->second); Chris@439: models.push_back(model); Chris@439: } Chris@439: } Chris@439: } Chris@439: Chris@439: Chris@439: return models; Chris@439: } Chris@439: Chris@439: void Chris@439: RDFImporterImpl::extractStructure(const TimeValueMap &tvm, Chris@439: bool &sparse, Chris@439: int &minValueCount, Chris@439: int &maxValueCount) Chris@439: { Chris@439: // These are floats intentionally rather than RealTime -- Chris@439: // see logic for handling rounding error below Chris@439: float firstTime = 0.f; Chris@439: float timeStep = 0.f; Chris@439: bool haveTimeStep = false; Chris@439: Chris@439: for (TimeValueMap::const_iterator tvi = tvm.begin(); tvi != tvm.end(); ++tvi) { Chris@439: Chris@439: RealTime time = tvi->first; Chris@439: int valueCount = tvi->second.size(); Chris@439: Chris@439: if (tvi == tvm.begin()) { Chris@439: Chris@439: minValueCount = valueCount; Chris@439: maxValueCount = valueCount; Chris@439: Chris@439: firstTime = time.toDouble(); Chris@439: Chris@439: } else { Chris@439: Chris@439: if (valueCount < minValueCount) minValueCount = valueCount; Chris@439: if (valueCount > maxValueCount) maxValueCount = valueCount; Chris@439: Chris@439: if (!haveTimeStep) { Chris@439: timeStep = time.toDouble() - firstTime; Chris@439: if (timeStep == 0.f) sparse = true; Chris@439: haveTimeStep = true; Chris@439: } else if (!sparse) { Chris@439: // test whether this time is within Chris@439: // rounding-error range of being an integer Chris@439: // multiple of some constant away from the Chris@439: // first time Chris@439: float timeAsFloat = time.toDouble(); Chris@439: int count = int((timeAsFloat - firstTime) / timeStep + 0.5); Chris@439: float expected = firstTime + (timeStep * count); Chris@439: if (fabsf(expected - timeAsFloat) > 1e-6) { Chris@439: cerr << "Event at " << timeAsFloat << " is not evenly spaced -- would expect it to be " << expected << " for a spacing of " << count << " * " << timeStep << endl; Chris@439: sparse = true; Chris@439: } Chris@439: } Chris@439: } Chris@439: } Chris@439: } Chris@439: Chris@439: void Chris@439: RDFImporterImpl::fillModel(SparseOneDimensionalModel *model, Chris@439: const TimeValueMap &tvm) Chris@439: { Chris@439: //!!! labels &c not yet handled Chris@439: Chris@439: for (TimeValueMap::const_iterator tvi = tvm.begin(); Chris@439: tvi != tvm.end(); ++tvi) { Chris@439: Chris@439: RealTime time = tvi->first; Chris@439: long frame = RealTime::realTime2Frame(time, m_sampleRate); Chris@439: Chris@439: SparseOneDimensionalModel::Point point(frame); Chris@439: Chris@439: model->addPoint(point); Chris@439: } Chris@439: } Chris@439: Chris@439: void Chris@439: RDFImporterImpl::fillModel(SparseTimeValueModel *model, Chris@439: const TimeValueMap &tvm) Chris@439: { Chris@439: //!!! labels &c not yet handled Chris@439: Chris@439: for (TimeValueMap::const_iterator tvi = tvm.begin(); Chris@439: tvi != tvm.end(); ++tvi) { Chris@439: Chris@439: RealTime time = tvi->first; Chris@439: long frame = RealTime::realTime2Frame(time, m_sampleRate); Chris@439: Chris@439: float value = 0.f; Chris@439: if (!tvi->second.empty()) value = *tvi->second.begin(); Chris@439: Chris@439: SparseTimeValueModel::Point point(frame, value, ""); Chris@439: Chris@439: model->addPoint(point); Chris@439: } Chris@439: } Chris@439: Chris@439: void Chris@439: RDFImporterImpl::fillModel(EditableDenseThreeDimensionalModel *model, Chris@439: const TimeValueMap &tvm) Chris@439: { Chris@439: //!!! labels &c not yet handled Chris@439: Chris@439: //!!! start time offset not yet handled Chris@439: Chris@439: size_t col = 0; Chris@439: Chris@439: for (TimeValueMap::const_iterator tvi = tvm.begin(); Chris@439: tvi != tvm.end(); ++tvi) { Chris@439: Chris@439: model->setColumn(col++, tvi->second); Chris@439: } Chris@439: } Chris@439: