annotate base/StringBits.h @ 1854:bde22957545e

Add support for doubling escapes for quotes in quoted texts in CSV-like formats on import (similar to how we, and the relevant RFC, do escaping on export now)
author Chris Cannam
date Mon, 11 May 2020 14:43:58 +0100
parents 91056142abd0
children
rev   line source
Chris@629 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@629 2
Chris@629 3 /*
Chris@629 4 Sonic Visualiser
Chris@629 5 An audio file viewer and annotation editor.
Chris@629 6 Centre for Digital Music, Queen Mary, University of London.
Chris@629 7
Chris@629 8 This program is free software; you can redistribute it and/or
Chris@629 9 modify it under the terms of the GNU General Public License as
Chris@629 10 published by the Free Software Foundation; either version 2 of the
Chris@629 11 License, or (at your option) any later version. See the file
Chris@629 12 COPYING included with this distribution for more information.
Chris@629 13 */
Chris@629 14
Chris@629 15 /*
Chris@629 16 This is a modified version of a source file from the
Chris@629 17 Rosegarden MIDI and audio sequencer and notation editor.
Chris@629 18 This file copyright 2000-2010 Chris Cannam.
Chris@629 19 */
Chris@629 20
Chris@1524 21 #ifndef SV_STRING_BITS_H
Chris@1524 22 #define SV_STRING_BITS_H
Chris@629 23
Chris@629 24 #include <QString>
Chris@629 25 #include <QStringList>
Chris@629 26 #include <QChar>
Chris@629 27
Chris@629 28 class StringBits
Chris@629 29 {
Chris@629 30 public:
Chris@629 31 /**
Chris@629 32 * Convert a string to a double using basic "C"-locale syntax,
Chris@629 33 * i.e. always using '.' as a decimal point. We use this as a
Chris@629 34 * fallback when parsing files from an unknown source, if
Chris@629 35 * locale-specific conversion fails. Does not support e notation.
Chris@629 36 * If ok is non-NULL, *ok will be set to true if conversion
Chris@629 37 * succeeds or false otherwise.
Chris@629 38 */
Chris@629 39 static double stringToDoubleLocaleFree(QString s, bool *ok = 0);
Chris@629 40
Chris@1854 41 enum EscapeMode {
Chris@1854 42 EscapeAny, // support both backslash and doubling escapes
Chris@1854 43 EscapeBackslash, // support backslash escapes only
Chris@1854 44 EscapeDoubling, // support doubling escapes ("" for " etc) only
Chris@1854 45 EscapeNone // support no escapes
Chris@1854 46 };
Chris@1854 47
Chris@629 48 /**
Chris@629 49 * Split a string at the given separator character, allowing
Chris@629 50 * quoted sections that contain the separator. If the separator
Chris@629 51 * is ' ', any (amount of) whitespace will be considered as a
Chris@629 52 * single separator. If the separator is another whitespace
Chris@629 53 * character such as '\t', it will be used literally.
Chris@629 54 */
Chris@1854 55 static QStringList splitQuoted(QString s,
Chris@1854 56 QChar separator,
Chris@1854 57 EscapeMode escapeMode = EscapeAny);
Chris@629 58
Chris@629 59 /**
Chris@629 60 * Split a string at the given separator character. If quoted is
Chris@1854 61 * true, do so by calling splitQuoted (above) in EscapeAny escape
Chris@1854 62 * mode. If quoted is false, use QString::split; if separator is
Chris@1854 63 * ' ', use SkipEmptyParts behaviour, otherwise use KeepEmptyParts
Chris@1854 64 * (this is analogous to the behaviour of splitQuoted).
Chris@629 65 */
Chris@1854 66 static QStringList split(QString s,
Chris@1854 67 QChar separator,
Chris@1854 68 bool quoted);
Chris@1833 69
Chris@1833 70 /**
Chris@1833 71 * Join a vector of strings into a single string, with the
Chris@1833 72 * delimiter as the joining string. If a string contains the
Chris@1833 73 * delimiter already, quote it with double-quotes, replacing any
Chris@1833 74 * existing double-quotes within it by a pair of double-quotes, as
Chris@1833 75 * specified in RFC 4180 Common Format and MIME Type for
Chris@1833 76 * Comma-Separated Values (CSV) Files.
Chris@1833 77 */
Chris@1833 78 static QString joinDelimited(QVector<QString> row, QString delimiter);
Chris@1851 79
Chris@1851 80 /**
Chris@1851 81 * Return true if the given byte array contains a valid UTF-8
Chris@1851 82 * sequence, false if not. If isTruncated is true, the byte array
Chris@1851 83 * will be treated as the prefix of a longer byte sequence, and
Chris@1851 84 * any errors resulting from a multibyte code ending prematurely
Chris@1851 85 * at the end of the array will be ignored.
Chris@1851 86 */
Chris@1851 87 static bool isValidUtf8(const std::string &bytes, bool isTruncated);
Chris@629 88 };
Chris@629 89
Chris@629 90 #endif