Mercurial > hg > svcore
diff data/fileio/TextTest.cpp @ 1855:db489a1ece9b
Pull out text-document check; it's useful elsewhere
author | Chris Cannam |
---|---|
date | Mon, 11 May 2020 17:27:18 +0100 |
parents | |
children | 4c5736a517e0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/TextTest.cpp Mon May 11 17:27:18 2020 +0100 @@ -0,0 +1,58 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Visualiser + An audio file viewer and annotation editor. + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "TextTest.h" + +#include "base/Debug.h" +#include "base/StringBits.h" + +#include <QFile> +#include <QXmlInputSource> + +bool +TextTest::isApparentTextDocument(FileSource source) +{ + // Return true if the document can be opened and contains some + // sort of text, either UTF-8 (so it could be Turtle) or another + // encoding that is recognised as XML + + if (!source.isAvailable()) { + SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to retrieve document from " << source.getLocation() << endl; + return false; + } + + QFile file(source.getLocalFilename()); + if (!file.open(QFile::ReadOnly)) { + SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to open local file from " << source.getLocalFilename() << endl; + return false; + } + + QByteArray bytes = file.read(200); + + if (StringBits::isValidUtf8(bytes.toStdString(), true)) { + SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be UTF-8" << endl; + return true; // good enough to be worth trying to parse + } + + QXmlInputSource xmlSource; + xmlSource.setData(bytes); // guesses text encoding + + if (xmlSource.data().startsWith("<?xml")) { + SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be XML" << endl; + return true; + } + + SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document is not UTF-8 and is not XML, rejecting" << endl; + return false; +}