Mercurial > hg > svcore
comparison data/fileio/TextTest.cpp @ 1855:db489a1ece9b
Pull out text-document check; it's useful elsewhere
author | Chris Cannam |
---|---|
date | Mon, 11 May 2020 17:27:18 +0100 |
parents | |
children | 4c5736a517e0 |
comparison
equal
deleted
inserted
replaced
1854:bde22957545e | 1855:db489a1ece9b |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Sonic Visualiser | |
5 An audio file viewer and annotation editor. | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 | |
8 This program is free software; you can redistribute it and/or | |
9 modify it under the terms of the GNU General Public License as | |
10 published by the Free Software Foundation; either version 2 of the | |
11 License, or (at your option) any later version. See the file | |
12 COPYING included with this distribution for more information. | |
13 */ | |
14 | |
15 #include "TextTest.h" | |
16 | |
17 #include "base/Debug.h" | |
18 #include "base/StringBits.h" | |
19 | |
20 #include <QFile> | |
21 #include <QXmlInputSource> | |
22 | |
23 bool | |
24 TextTest::isApparentTextDocument(FileSource source) | |
25 { | |
26 // Return true if the document can be opened and contains some | |
27 // sort of text, either UTF-8 (so it could be Turtle) or another | |
28 // encoding that is recognised as XML | |
29 | |
30 if (!source.isAvailable()) { | |
31 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to retrieve document from " << source.getLocation() << endl; | |
32 return false; | |
33 } | |
34 | |
35 QFile file(source.getLocalFilename()); | |
36 if (!file.open(QFile::ReadOnly)) { | |
37 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to open local file from " << source.getLocalFilename() << endl; | |
38 return false; | |
39 } | |
40 | |
41 QByteArray bytes = file.read(200); | |
42 | |
43 if (StringBits::isValidUtf8(bytes.toStdString(), true)) { | |
44 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be UTF-8" << endl; | |
45 return true; // good enough to be worth trying to parse | |
46 } | |
47 | |
48 QXmlInputSource xmlSource; | |
49 xmlSource.setData(bytes); // guesses text encoding | |
50 | |
51 if (xmlSource.data().startsWith("<?xml")) { | |
52 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be XML" << endl; | |
53 return true; | |
54 } | |
55 | |
56 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document is not UTF-8 and is not XML, rejecting" << endl; | |
57 return false; | |
58 } |