# HG changeset patch # User Chris Cannam # Date 1588254367 -3600 # Node ID a454c7477b4f77c3f1a7d69666285eb6327202ce # Parent 91056142abd019533686924e752f3467c79aa9ff Be more cautious about firing up an RDF file parser to identify a document - don't do it at all if the document is not apparently text diff -r 91056142abd0 -r a454c7477b4f rdf/RDFImporter.cpp --- a/rdf/RDFImporter.cpp Thu Apr 30 14:45:24 2020 +0100 +++ b/rdf/RDFImporter.cpp Thu Apr 30 14:46:07 2020 +0100 @@ -23,6 +23,7 @@ #include "base/ProgressReporter.h" #include "base/RealTime.h" +#include "base/StringBits.h" #include "data/model/SparseOneDimensionalModel.h" #include "data/model/SparseTimeValueModel.h" @@ -39,6 +40,9 @@ #include #include +#include +#include + using Dataquay::Uri; using Dataquay::Node; using Dataquay::Nodes; @@ -790,19 +794,22 @@ } RDFImporter::RDFDocumentType -RDFImporter::identifyDocumentType(QString url) +RDFImporter::identifyDocumentType(QUrl url) { bool haveAudio = false; bool haveAnnotations = false; bool haveRDF = false; + if (!isPlausibleDocumentOfAnyKind(url)) { + return NotRDF; + } + BasicStore *store = nullptr; - + // This is not expected to return anything useful, but if it does // anything at all then we know we have RDF try { - //!!! non-local document? - store = BasicStore::load(QUrl(url)); + store = BasicStore::load(url); Triple t = store->matchOnce(Triple()); if (t != Triple()) haveRDF = true; } catch (std::exception &) { @@ -880,3 +887,42 @@ return OtherRDFDocument; } +bool +RDFImporter::isPlausibleDocumentOfAnyKind(QUrl url) +{ + // Return true if the document can be opened and contains some + // sort of text, either UTF-8 (so it could be Turtle) or another + // encoding that is recognised as XML + + FileSource source(url); + + if (!source.isAvailable()) { + SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Failed to retrieve document from " << url << endl; + return false; + } + + QFile file(source.getLocalFilename()); + if (!file.open(QFile::ReadOnly)) { + SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Failed to open local file from " << source.getLocalFilename() << endl; + return false; + } + + QByteArray bytes = file.read(200); + + if (StringBits::isValidUtf8(bytes.toStdString(), true)) { + SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document appears to be UTF-8" << endl; + return true; // good enough to be worth trying to parse + } + + QXmlInputSource xmlSource; + xmlSource.setData(bytes); // guesses text encoding + + if (xmlSource.data().startsWith("