Chris@629
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@629
|
2
|
Chris@629
|
3 /*
|
Chris@629
|
4 Sonic Visualiser
|
Chris@629
|
5 An audio file viewer and annotation editor.
|
Chris@629
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@629
|
7
|
Chris@629
|
8 This program is free software; you can redistribute it and/or
|
Chris@629
|
9 modify it under the terms of the GNU General Public License as
|
Chris@629
|
10 published by the Free Software Foundation; either version 2 of the
|
Chris@629
|
11 License, or (at your option) any later version. See the file
|
Chris@629
|
12 COPYING included with this distribution for more information.
|
Chris@629
|
13 */
|
Chris@629
|
14
|
Chris@629
|
15 /*
|
Chris@629
|
16 This is a modified version of a source file from the
|
Chris@629
|
17 Rosegarden MIDI and audio sequencer and notation editor.
|
Chris@629
|
18 This file copyright 2000-2010 Chris Cannam.
|
Chris@629
|
19 */
|
Chris@629
|
20
|
Chris@629
|
21 #include "StringBits.h"
|
Chris@629
|
22
|
Chris@1022
|
23 #include "Debug.h"
|
Chris@1022
|
24
|
Chris@1022
|
25 using namespace std;
|
Chris@1022
|
26
|
Chris@629
|
27 double
|
Chris@629
|
28 StringBits::stringToDoubleLocaleFree(QString s, bool *ok)
|
Chris@629
|
29 {
|
Chris@629
|
30 int dp = 0;
|
Chris@629
|
31 int sign = 1;
|
Chris@629
|
32 int i = 0;
|
Chris@629
|
33 double result = 0.0;
|
Chris@629
|
34 int len = s.length();
|
Chris@629
|
35
|
Chris@629
|
36 result = 0.0;
|
Chris@629
|
37
|
Chris@629
|
38 if (ok) *ok = true;
|
Chris@629
|
39
|
Chris@629
|
40 while (i < len && s[i].isSpace()) ++i;
|
Chris@629
|
41 if (i < len && s[i] == '-') sign = -1;
|
Chris@629
|
42
|
Chris@629
|
43 while (i < len) {
|
Chris@629
|
44
|
Chris@1429
|
45 QChar c = s[i];
|
Chris@629
|
46
|
Chris@629
|
47 if (c.isDigit()) {
|
Chris@629
|
48
|
Chris@629
|
49 double d = c.digitValue();
|
Chris@629
|
50
|
Chris@629
|
51 if (dp > 0) {
|
Chris@629
|
52 for (int p = dp; p > 0; --p) d /= 10.0;
|
Chris@629
|
53 ++dp;
|
Chris@629
|
54 } else {
|
Chris@629
|
55 result *= 10.0;
|
Chris@629
|
56 }
|
Chris@629
|
57
|
Chris@629
|
58 result += d;
|
Chris@629
|
59
|
Chris@629
|
60 } else if (c == '.') {
|
Chris@629
|
61
|
Chris@629
|
62 dp = 1;
|
Chris@629
|
63
|
Chris@629
|
64 } else if (ok) {
|
Chris@629
|
65 *ok = false;
|
Chris@629
|
66 }
|
Chris@629
|
67
|
Chris@629
|
68 ++i;
|
Chris@629
|
69 }
|
Chris@629
|
70
|
Chris@629
|
71 return result * sign;
|
Chris@629
|
72 }
|
Chris@629
|
73
|
Chris@629
|
74 QStringList
|
Chris@1854
|
75 StringBits::splitQuoted(QString s, QChar separator, EscapeMode escapeMode)
|
Chris@629
|
76 {
|
Chris@629
|
77 QStringList tokens;
|
Chris@629
|
78 QString tok;
|
Chris@629
|
79
|
Chris@1854
|
80 // beg -> at beginning of line
|
Chris@1854
|
81 // sep -> just seen a field separator
|
Chris@1022
|
82 // unq -> in an unquoted field
|
Chris@1022
|
83 // q1 -> in a single-quoted field
|
Chris@1022
|
84 // q2 -> in a double-quoted field
|
Chris@1022
|
85
|
Chris@1854
|
86 enum { beg, sep, unq, q1, q2 } mode = beg;
|
Chris@1854
|
87
|
Chris@1854
|
88 bool use_doubling = (escapeMode == EscapeDoubling ||
|
Chris@1854
|
89 escapeMode == EscapeAny);
|
Chris@1854
|
90 bool use_backslash = (escapeMode == EscapeBackslash ||
|
Chris@1854
|
91 escapeMode == EscapeAny);
|
Chris@629
|
92
|
Chris@629
|
93 for (int i = 0; i < s.length(); ++i) {
|
Chris@1429
|
94
|
Chris@1429
|
95 QChar c = s[i];
|
Chris@629
|
96
|
Chris@1429
|
97 if (c == '\'') {
|
Chris@1429
|
98 switch (mode) {
|
Chris@1854
|
99 case beg: case sep: mode = q1; break;
|
Chris@1429
|
100 case unq: case q2: tok += c; break;
|
Chris@1854
|
101 case q1:
|
Chris@1854
|
102 if (use_doubling && i+1 < s.length() && s[i+1] == c) {
|
Chris@1854
|
103 tok += c; ++i; break;
|
Chris@1854
|
104 } else {
|
Chris@1854
|
105 mode = unq; break;
|
Chris@1854
|
106 }
|
Chris@1429
|
107 }
|
Chris@629
|
108
|
Chris@1429
|
109 } else if (c == '"') {
|
Chris@1429
|
110 switch (mode) {
|
Chris@1854
|
111 case beg: case sep: mode = q2; break;
|
Chris@1429
|
112 case unq: case q1: tok += c; break;
|
Chris@1854
|
113 case q2:
|
Chris@1854
|
114 if (use_doubling && i+1 < s.length() && s[i+1] == c) {
|
Chris@1854
|
115 tok += c; ++i; break;
|
Chris@1854
|
116 } else {
|
Chris@1854
|
117 mode = unq; break;
|
Chris@1854
|
118 }
|
Chris@1429
|
119 }
|
Chris@629
|
120
|
Chris@1429
|
121 } else if (c == separator || (separator == ' ' && c.isSpace())) {
|
Chris@1429
|
122 switch (mode) {
|
Chris@1854
|
123 case beg: mode = sep; tokens << ""; break;
|
Chris@1429
|
124 case sep: if (separator != ' ') tokens << ""; break;
|
Chris@1429
|
125 case unq: mode = sep; tokens << tok; tok = ""; break;
|
Chris@1429
|
126 case q1: case q2: tok += c; break;
|
Chris@1429
|
127 }
|
Chris@629
|
128
|
Chris@1854
|
129 } else if (c == '\\' && use_backslash) {
|
Chris@1429
|
130 if (++i < s.length()) {
|
Chris@1429
|
131 c = s[i];
|
Chris@1429
|
132 switch (mode) {
|
Chris@1854
|
133 case beg: case sep: mode = unq; tok += c; break;
|
Chris@928
|
134 case unq: case q1: case q2: tok += c; break;
|
Chris@1429
|
135 }
|
Chris@1429
|
136 }
|
Chris@629
|
137
|
Chris@1429
|
138 } else {
|
Chris@1429
|
139 switch (mode) {
|
Chris@1854
|
140 case beg: case sep: mode = unq; tok += c; break;
|
Chris@928
|
141 case unq: case q1: case q2: tok += c; break;
|
Chris@1429
|
142 }
|
Chris@1429
|
143 }
|
Chris@629
|
144 }
|
Chris@629
|
145
|
Chris@1854
|
146 if (tok != "" || mode != beg) {
|
Chris@1022
|
147 if (mode == q1) {
|
Chris@1022
|
148 tokens << ("'" + tok); // turns out it wasn't quoted after all
|
Chris@1022
|
149 } else if (mode == q2) {
|
Chris@1022
|
150 tokens << ("\"" + tok);
|
Chris@1022
|
151 } else {
|
Chris@1022
|
152 tokens << tok;
|
Chris@1022
|
153 }
|
Chris@1022
|
154 }
|
Chris@1022
|
155
|
Chris@629
|
156 return tokens;
|
Chris@629
|
157 }
|
Chris@629
|
158
|
Chris@629
|
159 QStringList
|
Chris@629
|
160 StringBits::split(QString line, QChar separator, bool quoted)
|
Chris@629
|
161 {
|
Chris@629
|
162 if (quoted) {
|
Chris@629
|
163 return splitQuoted(line, separator);
|
Chris@629
|
164 } else {
|
Chris@629
|
165 return line.split(separator,
|
Chris@629
|
166 separator == ' ' ? QString::SkipEmptyParts :
|
Chris@629
|
167 QString::KeepEmptyParts);
|
Chris@629
|
168 }
|
Chris@629
|
169 }
|
Chris@629
|
170
|
Chris@1833
|
171 QString
|
Chris@1833
|
172 StringBits::joinDelimited(QVector<QString> row, QString delimiter)
|
Chris@1833
|
173 {
|
Chris@1833
|
174 QString s;
|
Chris@1833
|
175 for (auto col: row) {
|
Chris@1833
|
176 if (s != "") {
|
Chris@1833
|
177 s += delimiter;
|
Chris@1833
|
178 }
|
Chris@1833
|
179 if (col.contains(delimiter)) {
|
Chris@1833
|
180 col.replace("\"", "\"\"");
|
Chris@1833
|
181 col = "\"" + col + "\"";
|
Chris@1833
|
182 }
|
Chris@1833
|
183 s += col;
|
Chris@1833
|
184 }
|
Chris@1833
|
185 return s;
|
Chris@1833
|
186 }
|
Chris@1833
|
187
|
Chris@1851
|
188 bool
|
Chris@1851
|
189 StringBits::isValidUtf8(const std::string &bytes, bool isTruncated)
|
Chris@1851
|
190 {
|
Chris@1851
|
191 size_t len = bytes.length();
|
Chris@1851
|
192 size_t mblen = 0;
|
Chris@1851
|
193 unsigned char first = '\0';
|
Chris@1851
|
194
|
Chris@1851
|
195 for (size_t i = 0; i < len; ++i) {
|
Chris@1851
|
196
|
Chris@1851
|
197 unsigned char c = bytes[i];
|
Chris@1851
|
198
|
Chris@1851
|
199 if (((c & 0xc0) == 0xc0) || !(c & 0x80)) {
|
Chris@1851
|
200
|
Chris@1851
|
201 // 11xxxxxx or 0xxxxxxx: first byte of a character sequence
|
Chris@1851
|
202
|
Chris@1851
|
203 if (mblen > 0) {
|
Chris@1851
|
204
|
Chris@1851
|
205 // have we seen a valid sequence?
|
Chris@1851
|
206 size_t length =
|
Chris@1851
|
207 (!(first & 0x20)) ? 2 :
|
Chris@1851
|
208 (!(first & 0x10)) ? 3 :
|
Chris@1851
|
209 (!(first & 0x08)) ? 4 :
|
Chris@1851
|
210 (!(first & 0x04)) ? 5 : 0;
|
Chris@1851
|
211
|
Chris@1851
|
212 if (length != 0 && mblen != length) {
|
Chris@1851
|
213 // previous multibyte sequence had invalid length
|
Chris@1851
|
214 return false;
|
Chris@1851
|
215 }
|
Chris@1851
|
216 }
|
Chris@1851
|
217
|
Chris@1851
|
218 mblen = 0;
|
Chris@1851
|
219 first = c;
|
Chris@1851
|
220
|
Chris@1851
|
221 } else {
|
Chris@1851
|
222
|
Chris@1851
|
223 // second or subsequent byte
|
Chris@1851
|
224
|
Chris@1851
|
225 if (mblen == 0) {
|
Chris@1851
|
226 // ... without a first byte!
|
Chris@1851
|
227 return false;
|
Chris@1851
|
228 }
|
Chris@1851
|
229 }
|
Chris@1851
|
230 }
|
Chris@1851
|
231
|
Chris@1851
|
232 // at the end
|
Chris@1851
|
233
|
Chris@1851
|
234 if (isTruncated) {
|
Chris@1851
|
235 // can't trust any errors arising now
|
Chris@1851
|
236 return true;
|
Chris@1851
|
237 }
|
Chris@1851
|
238
|
Chris@1851
|
239 if (mblen > 0) {
|
Chris@1851
|
240
|
Chris@1851
|
241 // have we seen a valid sequence?
|
Chris@1851
|
242 size_t length =
|
Chris@1851
|
243 (!(first & 0x20)) ? 2 :
|
Chris@1851
|
244 (!(first & 0x10)) ? 3 :
|
Chris@1851
|
245 (!(first & 0x08)) ? 4 :
|
Chris@1851
|
246 (!(first & 0x04)) ? 5 : 0;
|
Chris@1851
|
247
|
Chris@1851
|
248 if (length != 0 && mblen != length) {
|
Chris@1851
|
249 // final multibyte sequence had invalid length
|
Chris@1851
|
250 return false;
|
Chris@1851
|
251 }
|
Chris@1851
|
252 }
|
Chris@1851
|
253
|
Chris@1851
|
254 return true;
|
Chris@1851
|
255 }
|
Chris@1851
|
256
|
Chris@1851
|
257
|
Chris@1851
|
258
|
Chris@1851
|
259
|