comparison data/fileio/CSVFormat.cpp @ 1524:64ef24ebb19c

Some CSV format tests and minor fixes
author Chris Cannam
date Fri, 14 Sep 2018 09:25:17 +0100
parents c1b2eab6ac51
children a92e94215863
comparison
equal deleted inserted replaced
1523:c1b2eab6ac51 1524:64ef24ebb19c
31 m_separator(""), 31 m_separator(""),
32 m_sampleRate(44100), 32 m_sampleRate(44100),
33 m_windowSize(1024), 33 m_windowSize(1024),
34 m_allowQuoting(true) 34 m_allowQuoting(true)
35 { 35 {
36 guessFormatFor(path); 36 (void)guessFormatFor(path);
37 } 37 }
38 38
39 void 39 bool
40 CSVFormat::guessFormatFor(QString path) 40 CSVFormat::guessFormatFor(QString path)
41 { 41 {
42 m_separator = ""; // to prompt guessing for it
43
42 m_modelType = TwoDimensionalModel; 44 m_modelType = TwoDimensionalModel;
43 m_timingType = ExplicitTiming; 45 m_timingType = ExplicitTiming;
44 m_timeUnits = TimeSeconds; 46 m_timeUnits = TimeSeconds;
45 47
46 m_maxExampleCols = 0; 48 m_maxExampleCols = 0;
51 m_columnQualities.clear(); 53 m_columnQualities.clear();
52 m_columnPurposes.clear(); 54 m_columnPurposes.clear();
53 m_prevValues.clear(); 55 m_prevValues.clear();
54 56
55 QFile file(path); 57 QFile file(path);
56 if (!file.exists()) return; 58 if (!file.exists()) {
57 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; 59 SVCERR << "CSVFormat::guessFormatFor(" << path
60 << "): File does not exist" << endl;
61 return false;
62 }
63 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
64 SVCERR << "CSVFormat::guessFormatFor(" << path
65 << "): File could not be opened for reading" << endl;
66 return false;
67 }
68 SVDEBUG << "CSVFormat::guessFormatFor(" << path << ")" << endl;
58 69
59 QTextStream in(&file); 70 QTextStream in(&file);
60 in.seek(0); 71 in.seek(0);
61 72
62 int lineno = 0; 73 int lineno = 0;
83 if (lineno >= 150) break; 94 if (lineno >= 150) break;
84 } 95 }
85 96
86 guessPurposes(); 97 guessPurposes();
87 guessAudioSampleRange(); 98 guessAudioSampleRange();
99
100 return true;
88 } 101 }
89 102
90 void 103 void
91 CSVFormat::guessSeparator(QString line) 104 CSVFormat::guessSeparator(QString line)
92 { 105 {
93 char candidates[] = { ',', '\t', ' ', '|', '/', ':' }; 106 QString candidates = "\t|,/: ";
94 for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) { 107
95 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) { 108 for (int i = 0; i < candidates.length(); ++i) {
109 auto bits = StringBits::split(line, candidates[i], m_allowQuoting);
110 if (bits.size() >= 2) {
111 SVDEBUG << "Successfully split the line into:" << endl;
112 for (auto b: bits) {
113 SVDEBUG << b << endl;
114 }
96 m_separator = candidates[i]; 115 m_separator = candidates[i];
97 SVDEBUG << "Estimated column separator: '" << m_separator 116 SVDEBUG << "Estimated column separator: '" << m_separator
98 << "'" << endl; 117 << "'" << endl;
99 return; 118 return;
100 } 119 }
102 } 121 }
103 122
104 void 123 void
105 CSVFormat::guessQualities(QString line, int lineno) 124 CSVFormat::guessQualities(QString line, int lineno)
106 { 125 {
107 if (m_separator == "") guessSeparator(line); 126 if (m_separator == "") {
127 guessSeparator(line);
128 }
108 129
109 QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting); 130 QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
110 131
111 int cols = list.size(); 132 int cols = list.size();
112 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols; 133 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
165 if (value < -1.f || value > 1.f) { 186 if (value < -1.f || value > 1.f) {
166 small = false; 187 small = false;
167 } 188 }
168 } else { 189 } else {
169 numeric = false; 190 numeric = false;
191
192 // If the column is not numeric, it can't be any of
193 // these things either
194 integral = false;
195 increasing = false;
196 small = false;
197 large = false;
198 signd = false;
170 } 199 }
171 } 200 }
172 201
173 if (numeric) { 202 if (numeric) {
174 203
184 } 213 }
185 } 214 }
186 215
187 m_prevValues[i] = value; 216 m_prevValues[i] = value;
188 } 217 }
189 218
190 m_columnQualities[i] = 219 m_columnQualities[i] =
191 (numeric ? ColumnNumeric : 0) | 220 (numeric ? ColumnNumeric : 0) |
192 (integral ? ColumnIntegral : 0) | 221 (integral ? ColumnIntegral : 0) |
193 (increasing ? ColumnIncreasing : 0) | 222 (increasing ? ColumnIncreasing : 0) |
194 (small ? ColumnSmall : 0) | 223 (small ? ColumnSmall : 0) |