comparison data/fileio/TextTest.cpp @ 1855:db489a1ece9b

Pull out text-document check; it's useful elsewhere
author Chris Cannam
date Mon, 11 May 2020 17:27:18 +0100 (2020-05-11)
parents
children 4c5736a517e0
comparison
equal deleted inserted replaced
1854:bde22957545e 1855:db489a1ece9b
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
13 */
14
15 #include "TextTest.h"
16
17 #include "base/Debug.h"
18 #include "base/StringBits.h"
19
20 #include <QFile>
21 #include <QXmlInputSource>
22
23 bool
24 TextTest::isApparentTextDocument(FileSource source)
25 {
26 // Return true if the document can be opened and contains some
27 // sort of text, either UTF-8 (so it could be Turtle) or another
28 // encoding that is recognised as XML
29
30 if (!source.isAvailable()) {
31 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to retrieve document from " << source.getLocation() << endl;
32 return false;
33 }
34
35 QFile file(source.getLocalFilename());
36 if (!file.open(QFile::ReadOnly)) {
37 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to open local file from " << source.getLocalFilename() << endl;
38 return false;
39 }
40
41 QByteArray bytes = file.read(200);
42
43 if (StringBits::isValidUtf8(bytes.toStdString(), true)) {
44 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be UTF-8" << endl;
45 return true; // good enough to be worth trying to parse
46 }
47
48 QXmlInputSource xmlSource;
49 xmlSource.setData(bytes); // guesses text encoding
50
51 if (xmlSource.data().startsWith("<?xml")) {
52 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be XML" << endl;
53 return true;
54 }
55
56 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document is not UTF-8 and is not XML, rejecting" << endl;
57 return false;
58 }