annotate data/fileio/TextTest.cpp @ 1855:db489a1ece9b

Pull out text-document check; it's useful elsewhere
author Chris Cannam
date Mon, 11 May 2020 17:27:18 +0100
parents
children 4c5736a517e0
rev   line source
Chris@1855 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@1855 2
Chris@1855 3 /*
Chris@1855 4 Sonic Visualiser
Chris@1855 5 An audio file viewer and annotation editor.
Chris@1855 6 Centre for Digital Music, Queen Mary, University of London.
Chris@1855 7
Chris@1855 8 This program is free software; you can redistribute it and/or
Chris@1855 9 modify it under the terms of the GNU General Public License as
Chris@1855 10 published by the Free Software Foundation; either version 2 of the
Chris@1855 11 License, or (at your option) any later version. See the file
Chris@1855 12 COPYING included with this distribution for more information.
Chris@1855 13 */
Chris@1855 14
Chris@1855 15 #include "TextTest.h"
Chris@1855 16
Chris@1855 17 #include "base/Debug.h"
Chris@1855 18 #include "base/StringBits.h"
Chris@1855 19
Chris@1855 20 #include <QFile>
Chris@1855 21 #include <QXmlInputSource>
Chris@1855 22
Chris@1855 23 bool
Chris@1855 24 TextTest::isApparentTextDocument(FileSource source)
Chris@1855 25 {
Chris@1855 26 // Return true if the document can be opened and contains some
Chris@1855 27 // sort of text, either UTF-8 (so it could be Turtle) or another
Chris@1855 28 // encoding that is recognised as XML
Chris@1855 29
Chris@1855 30 if (!source.isAvailable()) {
Chris@1855 31 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to retrieve document from " << source.getLocation() << endl;
Chris@1855 32 return false;
Chris@1855 33 }
Chris@1855 34
Chris@1855 35 QFile file(source.getLocalFilename());
Chris@1855 36 if (!file.open(QFile::ReadOnly)) {
Chris@1855 37 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to open local file from " << source.getLocalFilename() << endl;
Chris@1855 38 return false;
Chris@1855 39 }
Chris@1855 40
Chris@1855 41 QByteArray bytes = file.read(200);
Chris@1855 42
Chris@1855 43 if (StringBits::isValidUtf8(bytes.toStdString(), true)) {
Chris@1855 44 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be UTF-8" << endl;
Chris@1855 45 return true; // good enough to be worth trying to parse
Chris@1855 46 }
Chris@1855 47
Chris@1855 48 QXmlInputSource xmlSource;
Chris@1855 49 xmlSource.setData(bytes); // guesses text encoding
Chris@1855 50
Chris@1855 51 if (xmlSource.data().startsWith("<?xml")) {
Chris@1855 52 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be XML" << endl;
Chris@1855 53 return true;
Chris@1855 54 }
Chris@1855 55
Chris@1855 56 SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document is not UTF-8 and is not XML, rejecting" << endl;
Chris@1855 57 return false;
Chris@1855 58 }