Mercurial > hg > svcore
view data/fileio/TextTest.cpp @ 1855:db489a1ece9b
Pull out text-document check; it's useful elsewhere
author | Chris Cannam |
---|---|
date | Mon, 11 May 2020 17:27:18 +0100 |
parents | |
children | 4c5736a517e0 |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Sonic Visualiser An audio file viewer and annotation editor. Centre for Digital Music, Queen Mary, University of London. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "TextTest.h" #include "base/Debug.h" #include "base/StringBits.h" #include <QFile> #include <QXmlInputSource> bool TextTest::isApparentTextDocument(FileSource source) { // Return true if the document can be opened and contains some // sort of text, either UTF-8 (so it could be Turtle) or another // encoding that is recognised as XML if (!source.isAvailable()) { SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to retrieve document from " << source.getLocation() << endl; return false; } QFile file(source.getLocalFilename()); if (!file.open(QFile::ReadOnly)) { SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Failed to open local file from " << source.getLocalFilename() << endl; return false; } QByteArray bytes = file.read(200); if (StringBits::isValidUtf8(bytes.toStdString(), true)) { SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be UTF-8" << endl; return true; // good enough to be worth trying to parse } QXmlInputSource xmlSource; xmlSource.setData(bytes); // guesses text encoding if (xmlSource.data().startsWith("<?xml")) { SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document appears to be XML" << endl; return true; } SVDEBUG << "NOTE: TextTest::isApparentTextDocument: Document is not UTF-8 and is not XML, rejecting" << endl; return false; }