Chris@629: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@629: Chris@629: /* Chris@629: Sonic Visualiser Chris@629: An audio file viewer and annotation editor. Chris@629: Centre for Digital Music, Queen Mary, University of London. Chris@629: Chris@629: This program is free software; you can redistribute it and/or Chris@629: modify it under the terms of the GNU General Public License as Chris@629: published by the Free Software Foundation; either version 2 of the Chris@629: License, or (at your option) any later version. See the file Chris@629: COPYING included with this distribution for more information. Chris@629: */ Chris@629: Chris@629: /* Chris@629: This is a modified version of a source file from the Chris@629: Rosegarden MIDI and audio sequencer and notation editor. Chris@629: This file copyright 2000-2010 Chris Cannam. Chris@629: */ Chris@629: Chris@629: #include "StringBits.h" Chris@629: Chris@1022: #include "Debug.h" Chris@1022: Chris@1022: using namespace std; Chris@1022: Chris@629: double Chris@629: StringBits::stringToDoubleLocaleFree(QString s, bool *ok) Chris@629: { Chris@629: int dp = 0; Chris@629: int sign = 1; Chris@629: int i = 0; Chris@629: double result = 0.0; Chris@629: int len = s.length(); Chris@629: Chris@629: result = 0.0; Chris@629: Chris@629: if (ok) *ok = true; Chris@629: Chris@629: while (i < len && s[i].isSpace()) ++i; Chris@629: if (i < len && s[i] == '-') sign = -1; Chris@629: Chris@629: while (i < len) { Chris@629: Chris@1429: QChar c = s[i]; Chris@629: Chris@629: if (c.isDigit()) { Chris@629: Chris@629: double d = c.digitValue(); Chris@629: Chris@629: if (dp > 0) { Chris@629: for (int p = dp; p > 0; --p) d /= 10.0; Chris@629: ++dp; Chris@629: } else { Chris@629: result *= 10.0; Chris@629: } Chris@629: Chris@629: result += d; Chris@629: Chris@629: } else if (c == '.') { Chris@629: Chris@629: dp = 1; Chris@629: Chris@629: } else if (ok) { Chris@629: *ok = false; Chris@629: } Chris@629: Chris@629: ++i; Chris@629: } Chris@629: Chris@629: return result * sign; Chris@629: } Chris@629: Chris@629: QStringList Chris@1854: StringBits::splitQuoted(QString s, QChar separator, EscapeMode escapeMode) Chris@629: { Chris@629: QStringList tokens; Chris@629: QString tok; Chris@629: Chris@1854: // beg -> at beginning of line Chris@1854: // sep -> just seen a field separator Chris@1022: // unq -> in an unquoted field Chris@1022: // q1 -> in a single-quoted field Chris@1022: // q2 -> in a double-quoted field Chris@1022: Chris@1854: enum { beg, sep, unq, q1, q2 } mode = beg; Chris@1854: Chris@1854: bool use_doubling = (escapeMode == EscapeDoubling || Chris@1854: escapeMode == EscapeAny); Chris@1854: bool use_backslash = (escapeMode == EscapeBackslash || Chris@1854: escapeMode == EscapeAny); Chris@629: Chris@629: for (int i = 0; i < s.length(); ++i) { Chris@1429: Chris@1429: QChar c = s[i]; Chris@629: Chris@1429: if (c == '\'') { Chris@1429: switch (mode) { Chris@1854: case beg: case sep: mode = q1; break; Chris@1429: case unq: case q2: tok += c; break; Chris@1854: case q1: Chris@1854: if (use_doubling && i+1 < s.length() && s[i+1] == c) { Chris@1854: tok += c; ++i; break; Chris@1854: } else { Chris@1854: mode = unq; break; Chris@1854: } Chris@1429: } Chris@629: Chris@1429: } else if (c == '"') { Chris@1429: switch (mode) { Chris@1854: case beg: case sep: mode = q2; break; Chris@1429: case unq: case q1: tok += c; break; Chris@1854: case q2: Chris@1854: if (use_doubling && i+1 < s.length() && s[i+1] == c) { Chris@1854: tok += c; ++i; break; Chris@1854: } else { Chris@1854: mode = unq; break; Chris@1854: } Chris@1429: } Chris@629: Chris@1429: } else if (c == separator || (separator == ' ' && c.isSpace())) { Chris@1429: switch (mode) { Chris@1854: case beg: mode = sep; tokens << ""; break; Chris@1429: case sep: if (separator != ' ') tokens << ""; break; Chris@1429: case unq: mode = sep; tokens << tok; tok = ""; break; Chris@1429: case q1: case q2: tok += c; break; Chris@1429: } Chris@629: Chris@1854: } else if (c == '\\' && use_backslash) { Chris@1429: if (++i < s.length()) { Chris@1429: c = s[i]; Chris@1429: switch (mode) { Chris@1854: case beg: case sep: mode = unq; tok += c; break; Chris@928: case unq: case q1: case q2: tok += c; break; Chris@1429: } Chris@1429: } Chris@629: Chris@1429: } else { Chris@1429: switch (mode) { Chris@1854: case beg: case sep: mode = unq; tok += c; break; Chris@928: case unq: case q1: case q2: tok += c; break; Chris@1429: } Chris@1429: } Chris@629: } Chris@629: Chris@1854: if (tok != "" || mode != beg) { Chris@1022: if (mode == q1) { Chris@1022: tokens << ("'" + tok); // turns out it wasn't quoted after all Chris@1022: } else if (mode == q2) { Chris@1022: tokens << ("\"" + tok); Chris@1022: } else { Chris@1022: tokens << tok; Chris@1022: } Chris@1022: } Chris@1022: Chris@629: return tokens; Chris@629: } Chris@629: Chris@629: QStringList Chris@629: StringBits::split(QString line, QChar separator, bool quoted) Chris@629: { Chris@629: if (quoted) { Chris@629: return splitQuoted(line, separator); Chris@629: } else { Chris@629: return line.split(separator, Chris@629: separator == ' ' ? QString::SkipEmptyParts : Chris@629: QString::KeepEmptyParts); Chris@629: } Chris@629: } Chris@629: Chris@1833: QString Chris@1833: StringBits::joinDelimited(QVector row, QString delimiter) Chris@1833: { Chris@1833: QString s; Chris@1833: for (auto col: row) { Chris@1833: if (s != "") { Chris@1833: s += delimiter; Chris@1833: } Chris@1833: if (col.contains(delimiter)) { Chris@1833: col.replace("\"", "\"\""); Chris@1833: col = "\"" + col + "\""; Chris@1833: } Chris@1833: s += col; Chris@1833: } Chris@1833: return s; Chris@1833: } Chris@1833: Chris@1851: bool Chris@1851: StringBits::isValidUtf8(const std::string &bytes, bool isTruncated) Chris@1851: { Chris@1851: size_t len = bytes.length(); Chris@1851: size_t mblen = 0; Chris@1851: unsigned char first = '\0'; Chris@1851: Chris@1851: for (size_t i = 0; i < len; ++i) { Chris@1851: Chris@1851: unsigned char c = bytes[i]; Chris@1851: Chris@1851: if (((c & 0xc0) == 0xc0) || !(c & 0x80)) { Chris@1851: Chris@1851: // 11xxxxxx or 0xxxxxxx: first byte of a character sequence Chris@1851: Chris@1851: if (mblen > 0) { Chris@1851: Chris@1851: // have we seen a valid sequence? Chris@1851: size_t length = Chris@1851: (!(first & 0x20)) ? 2 : Chris@1851: (!(first & 0x10)) ? 3 : Chris@1851: (!(first & 0x08)) ? 4 : Chris@1851: (!(first & 0x04)) ? 5 : 0; Chris@1851: Chris@1851: if (length != 0 && mblen != length) { Chris@1851: // previous multibyte sequence had invalid length Chris@1851: return false; Chris@1851: } Chris@1851: } Chris@1851: Chris@1851: mblen = 0; Chris@1851: first = c; Chris@1851: Chris@1851: } else { Chris@1851: Chris@1851: // second or subsequent byte Chris@1851: Chris@1851: if (mblen == 0) { Chris@1851: // ... without a first byte! Chris@1851: return false; Chris@1851: } Chris@1851: } Chris@1851: } Chris@1851: Chris@1851: // at the end Chris@1851: Chris@1851: if (isTruncated) { Chris@1851: // can't trust any errors arising now Chris@1851: return true; Chris@1851: } Chris@1851: Chris@1851: if (mblen > 0) { Chris@1851: Chris@1851: // have we seen a valid sequence? Chris@1851: size_t length = Chris@1851: (!(first & 0x20)) ? 2 : Chris@1851: (!(first & 0x10)) ? 3 : Chris@1851: (!(first & 0x08)) ? 4 : Chris@1851: (!(first & 0x04)) ? 5 : 0; Chris@1851: Chris@1851: if (length != 0 && mblen != length) { Chris@1851: // final multibyte sequence had invalid length Chris@1851: return false; Chris@1851: } Chris@1851: } Chris@1851: Chris@1851: return true; Chris@1851: } Chris@1851: Chris@1851: Chris@1851: Chris@1851: