Mercurial > hg > svcore
comparison rdf/RDFImporter.cpp @ 1855:db489a1ece9b
Pull out text-document check; it's useful elsewhere
author | Chris Cannam |
---|---|
date | Mon, 11 May 2020 17:27:18 +0100 |
parents | a454c7477b4f |
children |
comparison
equal
deleted
inserted
replaced
1854:bde22957545e | 1855:db489a1ece9b |
---|---|
34 #include "data/model/ReadOnlyWaveFileModel.h" | 34 #include "data/model/ReadOnlyWaveFileModel.h" |
35 | 35 |
36 #include "data/fileio/FileSource.h" | 36 #include "data/fileio/FileSource.h" |
37 #include "data/fileio/CachedFile.h" | 37 #include "data/fileio/CachedFile.h" |
38 #include "data/fileio/FileFinder.h" | 38 #include "data/fileio/FileFinder.h" |
39 #include "data/fileio/TextTest.h" | |
39 | 40 |
40 #include <dataquay/BasicStore.h> | 41 #include <dataquay/BasicStore.h> |
41 #include <dataquay/PropertyObject.h> | 42 #include <dataquay/PropertyObject.h> |
42 | |
43 #include <QFile> | |
44 #include <QXmlInputSource> | |
45 | 43 |
46 using Dataquay::Uri; | 44 using Dataquay::Uri; |
47 using Dataquay::Node; | 45 using Dataquay::Node; |
48 using Dataquay::Nodes; | 46 using Dataquay::Nodes; |
49 using Dataquay::Triple; | 47 using Dataquay::Triple; |
888 } | 886 } |
889 | 887 |
890 bool | 888 bool |
891 RDFImporter::isPlausibleDocumentOfAnyKind(QUrl url) | 889 RDFImporter::isPlausibleDocumentOfAnyKind(QUrl url) |
892 { | 890 { |
893 // Return true if the document can be opened and contains some | 891 return TextTest::isApparentTextDocument(FileSource(url)); |
894 // sort of text, either UTF-8 (so it could be Turtle) or another | 892 } |
895 // encoding that is recognised as XML | 893 |
896 | |
897 FileSource source(url); | |
898 | |
899 if (!source.isAvailable()) { | |
900 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Failed to retrieve document from " << url << endl; | |
901 return false; | |
902 } | |
903 | |
904 QFile file(source.getLocalFilename()); | |
905 if (!file.open(QFile::ReadOnly)) { | |
906 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Failed to open local file from " << source.getLocalFilename() << endl; | |
907 return false; | |
908 } | |
909 | |
910 QByteArray bytes = file.read(200); | |
911 | |
912 if (StringBits::isValidUtf8(bytes.toStdString(), true)) { | |
913 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document appears to be UTF-8" << endl; | |
914 return true; // good enough to be worth trying to parse | |
915 } | |
916 | |
917 QXmlInputSource xmlSource; | |
918 xmlSource.setData(bytes); // guesses text encoding | |
919 | |
920 if (xmlSource.data().startsWith("<?xml")) { | |
921 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document appears to be XML" << endl; | |
922 return true; | |
923 } | |
924 | |
925 SVDEBUG << "NOTE: RDFImporter::isPlausibleDocumentOfAnyKind: Document is not UTF-8 and is not XML, rejecting" << endl; | |
926 return false; | |
927 } | |
928 |