annotate base/StringBits.cpp @ 1858:14747f24ad04

Use cancellable serialiser; add some more profiling points
author Chris Cannam
date Thu, 14 May 2020 16:36:48 +0100
parents bde22957545e
children
rev   line source
Chris@629 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@629 2
Chris@629 3 /*
Chris@629 4 Sonic Visualiser
Chris@629 5 An audio file viewer and annotation editor.
Chris@629 6 Centre for Digital Music, Queen Mary, University of London.
Chris@629 7
Chris@629 8 This program is free software; you can redistribute it and/or
Chris@629 9 modify it under the terms of the GNU General Public License as
Chris@629 10 published by the Free Software Foundation; either version 2 of the
Chris@629 11 License, or (at your option) any later version. See the file
Chris@629 12 COPYING included with this distribution for more information.
Chris@629 13 */
Chris@629 14
Chris@629 15 /*
Chris@629 16 This is a modified version of a source file from the
Chris@629 17 Rosegarden MIDI and audio sequencer and notation editor.
Chris@629 18 This file copyright 2000-2010 Chris Cannam.
Chris@629 19 */
Chris@629 20
Chris@629 21 #include "StringBits.h"
Chris@629 22
Chris@1022 23 #include "Debug.h"
Chris@1022 24
Chris@1022 25 using namespace std;
Chris@1022 26
Chris@629 27 double
Chris@629 28 StringBits::stringToDoubleLocaleFree(QString s, bool *ok)
Chris@629 29 {
Chris@629 30 int dp = 0;
Chris@629 31 int sign = 1;
Chris@629 32 int i = 0;
Chris@629 33 double result = 0.0;
Chris@629 34 int len = s.length();
Chris@629 35
Chris@629 36 result = 0.0;
Chris@629 37
Chris@629 38 if (ok) *ok = true;
Chris@629 39
Chris@629 40 while (i < len && s[i].isSpace()) ++i;
Chris@629 41 if (i < len && s[i] == '-') sign = -1;
Chris@629 42
Chris@629 43 while (i < len) {
Chris@629 44
Chris@1429 45 QChar c = s[i];
Chris@629 46
Chris@629 47 if (c.isDigit()) {
Chris@629 48
Chris@629 49 double d = c.digitValue();
Chris@629 50
Chris@629 51 if (dp > 0) {
Chris@629 52 for (int p = dp; p > 0; --p) d /= 10.0;
Chris@629 53 ++dp;
Chris@629 54 } else {
Chris@629 55 result *= 10.0;
Chris@629 56 }
Chris@629 57
Chris@629 58 result += d;
Chris@629 59
Chris@629 60 } else if (c == '.') {
Chris@629 61
Chris@629 62 dp = 1;
Chris@629 63
Chris@629 64 } else if (ok) {
Chris@629 65 *ok = false;
Chris@629 66 }
Chris@629 67
Chris@629 68 ++i;
Chris@629 69 }
Chris@629 70
Chris@629 71 return result * sign;
Chris@629 72 }
Chris@629 73
Chris@629 74 QStringList
Chris@1854 75 StringBits::splitQuoted(QString s, QChar separator, EscapeMode escapeMode)
Chris@629 76 {
Chris@629 77 QStringList tokens;
Chris@629 78 QString tok;
Chris@629 79
Chris@1854 80 // beg -> at beginning of line
Chris@1854 81 // sep -> just seen a field separator
Chris@1022 82 // unq -> in an unquoted field
Chris@1022 83 // q1 -> in a single-quoted field
Chris@1022 84 // q2 -> in a double-quoted field
Chris@1022 85
Chris@1854 86 enum { beg, sep, unq, q1, q2 } mode = beg;
Chris@1854 87
Chris@1854 88 bool use_doubling = (escapeMode == EscapeDoubling ||
Chris@1854 89 escapeMode == EscapeAny);
Chris@1854 90 bool use_backslash = (escapeMode == EscapeBackslash ||
Chris@1854 91 escapeMode == EscapeAny);
Chris@629 92
Chris@629 93 for (int i = 0; i < s.length(); ++i) {
Chris@1429 94
Chris@1429 95 QChar c = s[i];
Chris@629 96
Chris@1429 97 if (c == '\'') {
Chris@1429 98 switch (mode) {
Chris@1854 99 case beg: case sep: mode = q1; break;
Chris@1429 100 case unq: case q2: tok += c; break;
Chris@1854 101 case q1:
Chris@1854 102 if (use_doubling && i+1 < s.length() && s[i+1] == c) {
Chris@1854 103 tok += c; ++i; break;
Chris@1854 104 } else {
Chris@1854 105 mode = unq; break;
Chris@1854 106 }
Chris@1429 107 }
Chris@629 108
Chris@1429 109 } else if (c == '"') {
Chris@1429 110 switch (mode) {
Chris@1854 111 case beg: case sep: mode = q2; break;
Chris@1429 112 case unq: case q1: tok += c; break;
Chris@1854 113 case q2:
Chris@1854 114 if (use_doubling && i+1 < s.length() && s[i+1] == c) {
Chris@1854 115 tok += c; ++i; break;
Chris@1854 116 } else {
Chris@1854 117 mode = unq; break;
Chris@1854 118 }
Chris@1429 119 }
Chris@629 120
Chris@1429 121 } else if (c == separator || (separator == ' ' && c.isSpace())) {
Chris@1429 122 switch (mode) {
Chris@1854 123 case beg: mode = sep; tokens << ""; break;
Chris@1429 124 case sep: if (separator != ' ') tokens << ""; break;
Chris@1429 125 case unq: mode = sep; tokens << tok; tok = ""; break;
Chris@1429 126 case q1: case q2: tok += c; break;
Chris@1429 127 }
Chris@629 128
Chris@1854 129 } else if (c == '\\' && use_backslash) {
Chris@1429 130 if (++i < s.length()) {
Chris@1429 131 c = s[i];
Chris@1429 132 switch (mode) {
Chris@1854 133 case beg: case sep: mode = unq; tok += c; break;
Chris@928 134 case unq: case q1: case q2: tok += c; break;
Chris@1429 135 }
Chris@1429 136 }
Chris@629 137
Chris@1429 138 } else {
Chris@1429 139 switch (mode) {
Chris@1854 140 case beg: case sep: mode = unq; tok += c; break;
Chris@928 141 case unq: case q1: case q2: tok += c; break;
Chris@1429 142 }
Chris@1429 143 }
Chris@629 144 }
Chris@629 145
Chris@1854 146 if (tok != "" || mode != beg) {
Chris@1022 147 if (mode == q1) {
Chris@1022 148 tokens << ("'" + tok); // turns out it wasn't quoted after all
Chris@1022 149 } else if (mode == q2) {
Chris@1022 150 tokens << ("\"" + tok);
Chris@1022 151 } else {
Chris@1022 152 tokens << tok;
Chris@1022 153 }
Chris@1022 154 }
Chris@1022 155
Chris@629 156 return tokens;
Chris@629 157 }
Chris@629 158
Chris@629 159 QStringList
Chris@629 160 StringBits::split(QString line, QChar separator, bool quoted)
Chris@629 161 {
Chris@629 162 if (quoted) {
Chris@629 163 return splitQuoted(line, separator);
Chris@629 164 } else {
Chris@629 165 return line.split(separator,
Chris@629 166 separator == ' ' ? QString::SkipEmptyParts :
Chris@629 167 QString::KeepEmptyParts);
Chris@629 168 }
Chris@629 169 }
Chris@629 170
Chris@1833 171 QString
Chris@1833 172 StringBits::joinDelimited(QVector<QString> row, QString delimiter)
Chris@1833 173 {
Chris@1833 174 QString s;
Chris@1833 175 for (auto col: row) {
Chris@1833 176 if (s != "") {
Chris@1833 177 s += delimiter;
Chris@1833 178 }
Chris@1833 179 if (col.contains(delimiter)) {
Chris@1833 180 col.replace("\"", "\"\"");
Chris@1833 181 col = "\"" + col + "\"";
Chris@1833 182 }
Chris@1833 183 s += col;
Chris@1833 184 }
Chris@1833 185 return s;
Chris@1833 186 }
Chris@1833 187
Chris@1851 188 bool
Chris@1851 189 StringBits::isValidUtf8(const std::string &bytes, bool isTruncated)
Chris@1851 190 {
Chris@1851 191 size_t len = bytes.length();
Chris@1851 192 size_t mblen = 0;
Chris@1851 193 unsigned char first = '\0';
Chris@1851 194
Chris@1851 195 for (size_t i = 0; i < len; ++i) {
Chris@1851 196
Chris@1851 197 unsigned char c = bytes[i];
Chris@1851 198
Chris@1851 199 if (((c & 0xc0) == 0xc0) || !(c & 0x80)) {
Chris@1851 200
Chris@1851 201 // 11xxxxxx or 0xxxxxxx: first byte of a character sequence
Chris@1851 202
Chris@1851 203 if (mblen > 0) {
Chris@1851 204
Chris@1851 205 // have we seen a valid sequence?
Chris@1851 206 size_t length =
Chris@1851 207 (!(first & 0x20)) ? 2 :
Chris@1851 208 (!(first & 0x10)) ? 3 :
Chris@1851 209 (!(first & 0x08)) ? 4 :
Chris@1851 210 (!(first & 0x04)) ? 5 : 0;
Chris@1851 211
Chris@1851 212 if (length != 0 && mblen != length) {
Chris@1851 213 // previous multibyte sequence had invalid length
Chris@1851 214 return false;
Chris@1851 215 }
Chris@1851 216 }
Chris@1851 217
Chris@1851 218 mblen = 0;
Chris@1851 219 first = c;
Chris@1851 220
Chris@1851 221 } else {
Chris@1851 222
Chris@1851 223 // second or subsequent byte
Chris@1851 224
Chris@1851 225 if (mblen == 0) {
Chris@1851 226 // ... without a first byte!
Chris@1851 227 return false;
Chris@1851 228 }
Chris@1851 229 }
Chris@1851 230 }
Chris@1851 231
Chris@1851 232 // at the end
Chris@1851 233
Chris@1851 234 if (isTruncated) {
Chris@1851 235 // can't trust any errors arising now
Chris@1851 236 return true;
Chris@1851 237 }
Chris@1851 238
Chris@1851 239 if (mblen > 0) {
Chris@1851 240
Chris@1851 241 // have we seen a valid sequence?
Chris@1851 242 size_t length =
Chris@1851 243 (!(first & 0x20)) ? 2 :
Chris@1851 244 (!(first & 0x10)) ? 3 :
Chris@1851 245 (!(first & 0x08)) ? 4 :
Chris@1851 246 (!(first & 0x04)) ? 5 : 0;
Chris@1851 247
Chris@1851 248 if (length != 0 && mblen != length) {
Chris@1851 249 // final multibyte sequence had invalid length
Chris@1851 250 return false;
Chris@1851 251 }
Chris@1851 252 }
Chris@1851 253
Chris@1851 254 return true;
Chris@1851 255 }
Chris@1851 256
Chris@1851 257
Chris@1851 258
Chris@1851 259