StringBits.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version. See the file
12  COPYING included with this distribution for more information.
13 */
14 
15 /*
16  This is a modified version of a source file from the
17  Rosegarden MIDI and audio sequencer and notation editor.
18  This file copyright 2000-2010 Chris Cannam.
19 */
20 
21 #include "StringBits.h"
22 
23 #include "Debug.h"
24 
25 using namespace std;
26 
27 double
29 {
30  int dp = 0;
31  int sign = 1;
32  int i = 0;
33  double result = 0.0;
34  int len = s.length();
35 
36  result = 0.0;
37 
38  if (ok) *ok = true;
39 
40  while (i < len && s[i].isSpace()) ++i;
41  if (i < len && s[i] == '-') sign = -1;
42 
43  while (i < len) {
44 
45  QChar c = s[i];
46 
47  if (c.isDigit()) {
48 
49  double d = c.digitValue();
50 
51  if (dp > 0) {
52  for (int p = dp; p > 0; --p) d /= 10.0;
53  ++dp;
54  } else {
55  result *= 10.0;
56  }
57 
58  result += d;
59 
60  } else if (c == '.') {
61 
62  dp = 1;
63 
64  } else if (ok) {
65  *ok = false;
66  }
67 
68  ++i;
69  }
70 
71  return result * sign;
72 }
73 
74 QStringList
75 StringBits::splitQuoted(QString s, QChar separator, EscapeMode escapeMode)
76 {
77  QStringList tokens;
78  QString tok;
79 
80  // beg -> at beginning of line
81  // sep -> just seen a field separator
82  // unq -> in an unquoted field
83  // q1 -> in a single-quoted field
84  // q2 -> in a double-quoted field
85 
86  enum { beg, sep, unq, q1, q2 } mode = beg;
87 
88  bool use_doubling = (escapeMode == EscapeDoubling ||
89  escapeMode == EscapeAny);
90  bool use_backslash = (escapeMode == EscapeBackslash ||
91  escapeMode == EscapeAny);
92 
93  for (int i = 0; i < s.length(); ++i) {
94 
95  QChar c = s[i];
96 
97  if (c == '\'') {
98  switch (mode) {
99  case beg: case sep: mode = q1; break;
100  case unq: case q2: tok += c; break;
101  case q1:
102  if (use_doubling && i+1 < s.length() && s[i+1] == c) {
103  tok += c; ++i; break;
104  } else {
105  mode = unq; break;
106  }
107  }
108 
109  } else if (c == '"') {
110  switch (mode) {
111  case beg: case sep: mode = q2; break;
112  case unq: case q1: tok += c; break;
113  case q2:
114  if (use_doubling && i+1 < s.length() && s[i+1] == c) {
115  tok += c; ++i; break;
116  } else {
117  mode = unq; break;
118  }
119  }
120 
121  } else if (c == separator || (separator == ' ' && c.isSpace())) {
122  switch (mode) {
123  case beg: mode = sep; tokens << ""; break;
124  case sep: if (separator != ' ') tokens << ""; break;
125  case unq: mode = sep; tokens << tok; tok = ""; break;
126  case q1: case q2: tok += c; break;
127  }
128 
129  } else if (c == '\\' && use_backslash) {
130  if (++i < s.length()) {
131  c = s[i];
132  switch (mode) {
133  case beg: case sep: mode = unq; tok += c; break;
134  case unq: case q1: case q2: tok += c; break;
135  }
136  }
137 
138  } else {
139  switch (mode) {
140  case beg: case sep: mode = unq; tok += c; break;
141  case unq: case q1: case q2: tok += c; break;
142  }
143  }
144  }
145 
146  if (tok != "" || mode != beg) {
147  if (mode == q1) {
148  tokens << ("'" + tok); // turns out it wasn't quoted after all
149  } else if (mode == q2) {
150  tokens << ("\"" + tok);
151  } else {
152  tokens << tok;
153  }
154  }
155 
156  return tokens;
157 }
158 
159 QStringList
160 StringBits::split(QString line, QChar separator, bool quoted)
161 {
162  if (quoted) {
163  return splitQuoted(line, separator);
164  } else {
165  return line.split(separator,
166  separator == ' ' ? QString::SkipEmptyParts :
167  QString::KeepEmptyParts);
168  }
169 }
170 
171 QString
172 StringBits::joinDelimited(QVector<QString> row, QString delimiter)
173 {
174  QString s;
175  for (auto col: row) {
176  if (s != "") {
177  s += delimiter;
178  }
179  if (col.contains(delimiter)) {
180  col.replace("\"", "\"\"");
181  col = "\"" + col + "\"";
182  }
183  s += col;
184  }
185  return s;
186 }
187 
188 bool
189 StringBits::isValidUtf8(const std::string &bytes, bool isTruncated)
190 {
191  size_t len = bytes.length();
192  size_t mblen = 0;
193  unsigned char first = '\0';
194 
195  for (size_t i = 0; i < len; ++i) {
196 
197  unsigned char c = bytes[i];
198 
199  if (((c & 0xc0) == 0xc0) || !(c & 0x80)) {
200 
201  // 11xxxxxx or 0xxxxxxx: first byte of a character sequence
202 
203  if (mblen > 0) {
204 
205  // have we seen a valid sequence?
206  size_t length =
207  (!(first & 0x20)) ? 2 :
208  (!(first & 0x10)) ? 3 :
209  (!(first & 0x08)) ? 4 :
210  (!(first & 0x04)) ? 5 : 0;
211 
212  if (length != 0 && mblen != length) {
213  // previous multibyte sequence had invalid length
214  return false;
215  }
216  }
217 
218  mblen = 0;
219  first = c;
220 
221  } else {
222 
223  // second or subsequent byte
224 
225  if (mblen == 0) {
226  // ... without a first byte!
227  return false;
228  }
229  }
230  }
231 
232  // at the end
233 
234  if (isTruncated) {
235  // can't trust any errors arising now
236  return true;
237  }
238 
239  if (mblen > 0) {
240 
241  // have we seen a valid sequence?
242  size_t length =
243  (!(first & 0x20)) ? 2 :
244  (!(first & 0x10)) ? 3 :
245  (!(first & 0x08)) ? 4 :
246  (!(first & 0x04)) ? 5 : 0;
247 
248  if (length != 0 && mblen != length) {
249  // final multibyte sequence had invalid length
250  return false;
251  }
252  }
253 
254  return true;
255 }
256 
257 
258 
259 
static bool isValidUtf8(const std::string &bytes, bool isTruncated)
Return true if the given byte array contains a valid UTF-8 sequence, false if not.
Definition: StringBits.cpp:189
static QStringList split(QString s, QChar separator, bool quoted)
Split a string at the given separator character.
Definition: StringBits.cpp:160
static QString joinDelimited(QVector< QString > row, QString delimiter)
Join a vector of strings into a single string, with the delimiter as the joining string.
Definition: StringBits.cpp:172
static QStringList splitQuoted(QString s, QChar separator, EscapeMode escapeMode=EscapeAny)
Split a string at the given separator character, allowing quoted sections that contain the separator...
Definition: StringBits.cpp:75
static double stringToDoubleLocaleFree(QString s, bool *ok=0)
Convert a string to a double using basic "C"-locale syntax, i.e.
Definition: StringBits.cpp:28