annotate base/StringBits.cpp @ 1833:21c792334c2e sensible-delimited-data-strings

Rewrite all the DelimitedDataString stuff so as to return vectors of individual cell strings rather than having the classes add the delimiters themselves. Rename accordingly to names based on StringExport. Take advantage of this in the CSV writer code so as to properly quote cells that contain delimiter characters.
author Chris Cannam
date Fri, 03 Apr 2020 17:11:05 +0100
parents 48e9f538e6e9
children 91056142abd0
rev   line source
Chris@629 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@629 2
Chris@629 3 /*
Chris@629 4 Sonic Visualiser
Chris@629 5 An audio file viewer and annotation editor.
Chris@629 6 Centre for Digital Music, Queen Mary, University of London.
Chris@629 7
Chris@629 8 This program is free software; you can redistribute it and/or
Chris@629 9 modify it under the terms of the GNU General Public License as
Chris@629 10 published by the Free Software Foundation; either version 2 of the
Chris@629 11 License, or (at your option) any later version. See the file
Chris@629 12 COPYING included with this distribution for more information.
Chris@629 13 */
Chris@629 14
Chris@629 15 /*
Chris@629 16 This is a modified version of a source file from the
Chris@629 17 Rosegarden MIDI and audio sequencer and notation editor.
Chris@629 18 This file copyright 2000-2010 Chris Cannam.
Chris@629 19 */
Chris@629 20
Chris@629 21 #include "StringBits.h"
Chris@629 22
Chris@1022 23 #include "Debug.h"
Chris@1022 24
Chris@1022 25 using namespace std;
Chris@1022 26
Chris@629 27 double
Chris@629 28 StringBits::stringToDoubleLocaleFree(QString s, bool *ok)
Chris@629 29 {
Chris@629 30 int dp = 0;
Chris@629 31 int sign = 1;
Chris@629 32 int i = 0;
Chris@629 33 double result = 0.0;
Chris@629 34 int len = s.length();
Chris@629 35
Chris@629 36 result = 0.0;
Chris@629 37
Chris@629 38 if (ok) *ok = true;
Chris@629 39
Chris@629 40 while (i < len && s[i].isSpace()) ++i;
Chris@629 41 if (i < len && s[i] == '-') sign = -1;
Chris@629 42
Chris@629 43 while (i < len) {
Chris@629 44
Chris@1429 45 QChar c = s[i];
Chris@629 46
Chris@629 47 if (c.isDigit()) {
Chris@629 48
Chris@629 49 double d = c.digitValue();
Chris@629 50
Chris@629 51 if (dp > 0) {
Chris@629 52 for (int p = dp; p > 0; --p) d /= 10.0;
Chris@629 53 ++dp;
Chris@629 54 } else {
Chris@629 55 result *= 10.0;
Chris@629 56 }
Chris@629 57
Chris@629 58 result += d;
Chris@629 59
Chris@629 60 } else if (c == '.') {
Chris@629 61
Chris@629 62 dp = 1;
Chris@629 63
Chris@629 64 } else if (ok) {
Chris@629 65 *ok = false;
Chris@629 66 }
Chris@629 67
Chris@629 68 ++i;
Chris@629 69 }
Chris@629 70
Chris@629 71 return result * sign;
Chris@629 72 }
Chris@629 73
Chris@629 74 QStringList
Chris@629 75 StringBits::splitQuoted(QString s, QChar separator)
Chris@629 76 {
Chris@629 77 QStringList tokens;
Chris@629 78 QString tok;
Chris@629 79
Chris@1022 80 // sep -> just seen a field separator (or start of line)
Chris@1022 81 // unq -> in an unquoted field
Chris@1022 82 // q1 -> in a single-quoted field
Chris@1022 83 // q2 -> in a double-quoted field
Chris@1022 84
Chris@629 85 enum { sep, unq, q1, q2 } mode = sep;
Chris@629 86
Chris@629 87 for (int i = 0; i < s.length(); ++i) {
Chris@1429 88
Chris@1429 89 QChar c = s[i];
Chris@629 90
Chris@1429 91 if (c == '\'') {
Chris@1429 92 switch (mode) {
Chris@1429 93 case sep: mode = q1; break;
Chris@1429 94 case unq: case q2: tok += c; break;
Chris@1429 95 case q1: mode = unq; break;
Chris@1429 96 }
Chris@629 97
Chris@1429 98 } else if (c == '"') {
Chris@1429 99 switch (mode) {
Chris@1429 100 case sep: mode = q2; break;
Chris@1429 101 case unq: case q1: tok += c; break;
Chris@1429 102 case q2: mode = unq; break;
Chris@1429 103 }
Chris@629 104
Chris@1429 105 } else if (c == separator || (separator == ' ' && c.isSpace())) {
Chris@1429 106 switch (mode) {
Chris@1429 107 case sep: if (separator != ' ') tokens << ""; break;
Chris@1429 108 case unq: mode = sep; tokens << tok; tok = ""; break;
Chris@1429 109 case q1: case q2: tok += c; break;
Chris@1429 110 }
Chris@629 111
Chris@1429 112 } else if (c == '\\') {
Chris@1429 113 if (++i < s.length()) {
Chris@1429 114 c = s[i];
Chris@1429 115 switch (mode) {
Chris@1429 116 case sep: mode = unq; tok += c; break;
Chris@928 117 case unq: case q1: case q2: tok += c; break;
Chris@1429 118 }
Chris@1429 119 }
Chris@629 120
Chris@1429 121 } else {
Chris@1429 122 switch (mode) {
Chris@1429 123 case sep: mode = unq; tok += c; break;
Chris@928 124 case unq: case q1: case q2: tok += c; break;
Chris@1429 125 }
Chris@1429 126 }
Chris@629 127 }
Chris@629 128
Chris@1022 129 if (tok != "" || mode != sep) {
Chris@1022 130 if (mode == q1) {
Chris@1022 131 tokens << ("'" + tok); // turns out it wasn't quoted after all
Chris@1022 132 } else if (mode == q2) {
Chris@1022 133 tokens << ("\"" + tok);
Chris@1022 134 } else {
Chris@1022 135 tokens << tok;
Chris@1022 136 }
Chris@1022 137 }
Chris@1022 138
Chris@629 139 return tokens;
Chris@629 140 }
Chris@629 141
Chris@629 142 QStringList
Chris@629 143 StringBits::split(QString line, QChar separator, bool quoted)
Chris@629 144 {
Chris@629 145 if (quoted) {
Chris@629 146 return splitQuoted(line, separator);
Chris@629 147 } else {
Chris@629 148 return line.split(separator,
Chris@629 149 separator == ' ' ? QString::SkipEmptyParts :
Chris@629 150 QString::KeepEmptyParts);
Chris@629 151 }
Chris@629 152 }
Chris@629 153
Chris@1833 154 QString
Chris@1833 155 StringBits::joinDelimited(QVector<QString> row, QString delimiter)
Chris@1833 156 {
Chris@1833 157 QString s;
Chris@1833 158 for (auto col: row) {
Chris@1833 159 if (s != "") {
Chris@1833 160 s += delimiter;
Chris@1833 161 }
Chris@1833 162 if (col.contains(delimiter)) {
Chris@1833 163 col.replace("\"", "\"\"");
Chris@1833 164 col = "\"" + col + "\"";
Chris@1833 165 }
Chris@1833 166 s += col;
Chris@1833 167 }
Chris@1833 168 return s;
Chris@1833 169 }
Chris@1833 170