Chris@392
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@392
|
2
|
Chris@392
|
3 /*
|
Chris@392
|
4 Sonic Visualiser
|
Chris@392
|
5 An audio file viewer and annotation editor.
|
Chris@392
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@392
|
7 This file copyright 2006 Chris Cannam.
|
Chris@392
|
8
|
Chris@392
|
9 This program is free software; you can redistribute it and/or
|
Chris@392
|
10 modify it under the terms of the GNU General Public License as
|
Chris@392
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@392
|
12 License, or (at your option) any later version. See the file
|
Chris@392
|
13 COPYING included with this distribution for more information.
|
Chris@392
|
14 */
|
Chris@392
|
15
|
Chris@392
|
16 #include "CSVFormat.h"
|
Chris@392
|
17
|
Chris@629
|
18 #include "base/StringBits.h"
|
Chris@629
|
19
|
Chris@392
|
20 #include <QFile>
|
Chris@392
|
21 #include <QString>
|
Chris@392
|
22 #include <QRegExp>
|
Chris@392
|
23 #include <QStringList>
|
Chris@392
|
24 #include <QTextStream>
|
Chris@392
|
25
|
Chris@392
|
26 #include <iostream>
|
Chris@392
|
27
|
Chris@629
|
28 CSVFormat::CSVFormat(QString path) :
|
Chris@629
|
29 m_separator(""),
|
Chris@392
|
30 m_sampleRate(44100),
|
Chris@392
|
31 m_windowSize(1024),
|
Chris@629
|
32 m_allowQuoting(true)
|
Chris@392
|
33 {
|
Chris@629
|
34 guessFormatFor(path);
|
Chris@629
|
35 }
|
Chris@629
|
36
|
Chris@629
|
37 void
|
Chris@629
|
38 CSVFormat::guessFormatFor(QString path)
|
Chris@629
|
39 {
|
Chris@629
|
40 m_modelType = TwoDimensionalModel;
|
Chris@629
|
41 m_timingType = ExplicitTiming;
|
Chris@629
|
42 m_timeUnits = TimeSeconds;
|
Chris@629
|
43
|
Chris@629
|
44 m_maxExampleCols = 0;
|
Chris@629
|
45 m_columnCount = 0;
|
Chris@629
|
46 m_variableColumnCount = false;
|
Chris@629
|
47
|
Chris@629
|
48 m_example.clear();
|
Chris@629
|
49 m_columnQualities.clear();
|
Chris@629
|
50 m_columnPurposes.clear();
|
Chris@629
|
51 m_prevValues.clear();
|
Chris@629
|
52
|
Chris@629
|
53 QFile file(path);
|
Chris@392
|
54 if (!file.exists()) return;
|
Chris@392
|
55 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
|
Chris@392
|
56
|
Chris@392
|
57 QTextStream in(&file);
|
Chris@392
|
58 in.seek(0);
|
Chris@392
|
59
|
Chris@629
|
60 int lineno = 0;
|
Chris@392
|
61
|
Chris@392
|
62 while (!in.atEnd()) {
|
Chris@392
|
63
|
Chris@392
|
64 // See comment about line endings in CSVFileReader::load()
|
Chris@392
|
65
|
Chris@392
|
66 QString chunk = in.readLine();
|
Chris@392
|
67 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
|
Chris@392
|
68
|
Chris@392
|
69 for (size_t li = 0; li < lines.size(); ++li) {
|
Chris@392
|
70
|
Chris@392
|
71 QString line = lines[li];
|
Chris@629
|
72 if (line.startsWith("#") || line == "") continue;
|
Chris@392
|
73
|
Chris@629
|
74 guessQualities(line, lineno);
|
Chris@392
|
75
|
Chris@629
|
76 if (++lineno == 50) break;
|
Chris@629
|
77 }
|
Chris@629
|
78 }
|
Chris@392
|
79
|
Chris@629
|
80 guessPurposes();
|
Chris@629
|
81 }
|
Chris@629
|
82
|
Chris@629
|
83 void
|
Chris@629
|
84 CSVFormat::guessSeparator(QString line)
|
Chris@629
|
85 {
|
Chris@629
|
86 char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
|
Chris@629
|
87 for (int i = 0; i < sizeof(candidates)/sizeof(candidates[0]); ++i) {
|
Chris@629
|
88 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
|
Chris@629
|
89 m_separator = candidates[i];
|
Chris@629
|
90 return;
|
Chris@629
|
91 }
|
Chris@629
|
92 }
|
Chris@629
|
93 m_separator = " ";
|
Chris@629
|
94 }
|
Chris@629
|
95
|
Chris@629
|
96 void
|
Chris@629
|
97 CSVFormat::guessQualities(QString line, int lineno)
|
Chris@629
|
98 {
|
Chris@629
|
99 if (m_separator == "") guessSeparator(line);
|
Chris@629
|
100
|
Chris@629
|
101 QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting);
|
Chris@629
|
102
|
Chris@629
|
103 int cols = list.size();
|
Chris@629
|
104 if (lineno == 0 || (cols < m_columnCount)) m_columnCount = cols;
|
Chris@629
|
105 if (cols != m_columnCount) m_variableColumnCount = true;
|
Chris@629
|
106
|
Chris@629
|
107 // All columns are regarded as having these qualities until we see
|
Chris@629
|
108 // something that indicates otherwise:
|
Chris@629
|
109
|
Chris@629
|
110 ColumnQualities defaultQualities =
|
Chris@629
|
111 ColumnNumeric | ColumnIntegral | ColumnIncreasing;
|
Chris@629
|
112
|
Chris@629
|
113 for (int i = 0; i < cols; ++i) {
|
Chris@629
|
114
|
Chris@629
|
115 while (m_columnQualities.size() <= i) {
|
Chris@629
|
116 m_columnQualities.push_back(defaultQualities);
|
Chris@629
|
117 m_prevValues.push_back(0.f);
|
Chris@629
|
118 }
|
Chris@629
|
119
|
Chris@629
|
120 QString s(list[i]);
|
Chris@629
|
121 bool ok = false;
|
Chris@629
|
122
|
Chris@629
|
123 ColumnQualities qualities = m_columnQualities[i];
|
Chris@629
|
124
|
Chris@629
|
125 bool numeric = (qualities & ColumnNumeric);
|
Chris@629
|
126 bool integral = (qualities & ColumnIntegral);
|
Chris@629
|
127 bool increasing = (qualities & ColumnIncreasing);
|
Chris@629
|
128 bool large = (qualities & ColumnLarge); // this one defaults to off
|
Chris@629
|
129
|
Chris@629
|
130 float value = 0.f;
|
Chris@629
|
131
|
Chris@629
|
132 //!!! how to take into account headers?
|
Chris@629
|
133
|
Chris@629
|
134 if (numeric) {
|
Chris@629
|
135 value = s.toFloat(&ok);
|
Chris@629
|
136 if (!ok) {
|
Chris@629
|
137 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
|
Chris@629
|
138 }
|
Chris@629
|
139 if (ok) {
|
Chris@629
|
140 if (lineno < 2 && value > 1000.f) large = true;
|
Chris@629
|
141 } else {
|
Chris@629
|
142 numeric = false;
|
Chris@629
|
143 }
|
Chris@629
|
144 }
|
Chris@629
|
145
|
Chris@629
|
146 if (numeric) {
|
Chris@629
|
147
|
Chris@629
|
148 if (integral) {
|
Chris@629
|
149 if (s.contains('.') || s.contains(',')) {
|
Chris@629
|
150 integral = false;
|
Chris@392
|
151 }
|
Chris@392
|
152 }
|
Chris@392
|
153
|
Chris@629
|
154 if (increasing) {
|
Chris@629
|
155 if (lineno > 0 && value <= m_prevValues[i]) {
|
Chris@629
|
156 increasing = false;
|
Chris@392
|
157 }
|
Chris@392
|
158 }
|
Chris@392
|
159
|
Chris@629
|
160 m_prevValues[i] = value;
|
Chris@629
|
161 }
|
Chris@392
|
162
|
Chris@629
|
163 m_columnQualities[i] =
|
Chris@629
|
164 (numeric ? ColumnNumeric : 0) |
|
Chris@629
|
165 (integral ? ColumnIntegral : 0) |
|
Chris@629
|
166 (increasing ? ColumnIncreasing : 0) |
|
Chris@629
|
167 (large ? ColumnLarge : 0);
|
Chris@629
|
168 }
|
Chris@392
|
169
|
Chris@629
|
170 if (lineno < 10) {
|
Chris@629
|
171 m_example.push_back(list);
|
Chris@629
|
172 if (lineno == 0 || cols > m_maxExampleCols) {
|
Chris@629
|
173 m_maxExampleCols = cols;
|
Chris@392
|
174 }
|
Chris@392
|
175 }
|
Chris@392
|
176
|
Chris@676
|
177 // std::cerr << "Estimated column qualities: ";
|
Chris@676
|
178 // for (int i = 0; i < m_columnCount; ++i) {
|
Chris@676
|
179 // std::cerr << int(m_columnQualities[i]) << " ";
|
Chris@676
|
180 // }
|
Chris@676
|
181 // std::cerr << std::endl;
|
Chris@629
|
182 }
|
Chris@629
|
183
|
Chris@629
|
184 void
|
Chris@629
|
185 CSVFormat::guessPurposes()
|
Chris@629
|
186 {
|
Chris@629
|
187 m_timingType = CSVFormat::ImplicitTiming;
|
Chris@629
|
188 m_timeUnits = CSVFormat::TimeWindows;
|
Chris@392
|
189
|
Chris@629
|
190 int timingColumnCount = 0;
|
Chris@629
|
191
|
Chris@629
|
192 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@629
|
193
|
Chris@629
|
194 ColumnPurpose purpose = ColumnUnknown;
|
Chris@629
|
195 bool primary = (i == 0);
|
Chris@392
|
196
|
Chris@629
|
197 ColumnQualities qualities = m_columnQualities[i];
|
Chris@392
|
198
|
Chris@629
|
199 bool numeric = (qualities & ColumnNumeric);
|
Chris@629
|
200 bool integral = (qualities & ColumnIntegral);
|
Chris@629
|
201 bool increasing = (qualities & ColumnIncreasing);
|
Chris@629
|
202 bool large = (qualities & ColumnLarge);
|
Chris@629
|
203
|
Chris@629
|
204 bool timingColumn = (numeric && increasing);
|
Chris@629
|
205
|
Chris@629
|
206 if (timingColumn) {
|
Chris@629
|
207
|
Chris@629
|
208 ++timingColumnCount;
|
Chris@629
|
209
|
Chris@629
|
210 if (primary) {
|
Chris@629
|
211
|
Chris@629
|
212 purpose = ColumnStartTime;
|
Chris@629
|
213
|
Chris@629
|
214 m_timingType = ExplicitTiming;
|
Chris@629
|
215
|
Chris@629
|
216 if (integral && large) {
|
Chris@629
|
217 m_timeUnits = TimeAudioFrames;
|
Chris@629
|
218 } else {
|
Chris@629
|
219 m_timeUnits = TimeSeconds;
|
Chris@629
|
220 }
|
Chris@629
|
221
|
Chris@629
|
222 } else {
|
Chris@629
|
223
|
Chris@629
|
224 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
|
Chris@629
|
225 purpose = ColumnEndTime;
|
Chris@629
|
226 }
|
Chris@629
|
227 }
|
Chris@629
|
228 }
|
Chris@629
|
229
|
Chris@629
|
230 if (purpose == ColumnUnknown) {
|
Chris@629
|
231 if (numeric) {
|
Chris@629
|
232 purpose = ColumnValue;
|
Chris@629
|
233 } else {
|
Chris@629
|
234 purpose = ColumnLabel;
|
Chris@629
|
235 }
|
Chris@629
|
236 }
|
Chris@629
|
237
|
Chris@631
|
238 setColumnPurpose(i, purpose);
|
Chris@629
|
239 }
|
Chris@629
|
240
|
Chris@629
|
241 int valueCount = 0;
|
Chris@629
|
242 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@629
|
243 if (m_columnPurposes[i] == ColumnValue) ++valueCount;
|
Chris@629
|
244 }
|
Chris@629
|
245
|
Chris@630
|
246 if (valueCount == 2 && timingColumnCount == 1) {
|
Chris@630
|
247 // If we have exactly two apparent value columns and only one
|
Chris@630
|
248 // timing column, but one value column is integral and the
|
Chris@630
|
249 // other is not, guess that whichever one matches the integral
|
Chris@630
|
250 // status of the time column is either duration or end time
|
Chris@630
|
251 if (m_timingType == ExplicitTiming) {
|
Chris@630
|
252 int a = -1, b = -1;
|
Chris@630
|
253 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@630
|
254 if (m_columnPurposes[i] == ColumnValue) {
|
Chris@630
|
255 if (a == -1) a = i;
|
Chris@630
|
256 else b = i;
|
Chris@630
|
257 }
|
Chris@630
|
258 }
|
Chris@630
|
259 if ((m_columnQualities[a] & ColumnIntegral) !=
|
Chris@630
|
260 (m_columnQualities[b] & ColumnIntegral)) {
|
Chris@630
|
261 int timecol = a;
|
Chris@630
|
262 if ((m_columnQualities[a] & ColumnIntegral) !=
|
Chris@630
|
263 (m_columnQualities[0] & ColumnIntegral)) {
|
Chris@630
|
264 timecol = b;
|
Chris@630
|
265 }
|
Chris@630
|
266 if (m_columnQualities[timecol] & ColumnIncreasing) {
|
Chris@630
|
267 // This shouldn't happen; should have been settled above
|
Chris@630
|
268 m_columnPurposes[timecol] = ColumnEndTime;
|
Chris@630
|
269 } else {
|
Chris@630
|
270 m_columnPurposes[timecol] = ColumnDuration;
|
Chris@630
|
271 }
|
Chris@630
|
272 --valueCount;
|
Chris@630
|
273 }
|
Chris@630
|
274 }
|
Chris@630
|
275 }
|
Chris@630
|
276
|
Chris@631
|
277 if (timingColumnCount > 1) {
|
Chris@631
|
278 m_modelType = TwoDimensionalModelWithDuration;
|
Chris@392
|
279 } else {
|
Chris@631
|
280 if (valueCount == 0) {
|
Chris@631
|
281 m_modelType = OneDimensionalModel;
|
Chris@631
|
282 } else if (valueCount == 1) {
|
Chris@631
|
283 m_modelType = TwoDimensionalModel;
|
Chris@631
|
284 } else {
|
Chris@631
|
285 m_modelType = ThreeDimensionalModel;
|
Chris@631
|
286 }
|
Chris@629
|
287 }
|
Chris@392
|
288
|
Chris@676
|
289 // std::cerr << "Estimated column purposes: ";
|
Chris@676
|
290 // for (int i = 0; i < m_columnCount; ++i) {
|
Chris@676
|
291 // std::cerr << int(m_columnPurposes[i]) << " ";
|
Chris@676
|
292 // }
|
Chris@676
|
293 // std::cerr << std::endl;
|
Chris@392
|
294
|
Chris@676
|
295 // std::cerr << "Estimated model type: " << m_modelType << std::endl;
|
Chris@676
|
296 // std::cerr << "Estimated timing type: " << m_timingType << std::endl;
|
Chris@676
|
297 // std::cerr << "Estimated units: " << m_timeUnits << std::endl;
|
Chris@392
|
298 }
|
Chris@392
|
299
|
Chris@631
|
300 CSVFormat::ColumnPurpose
|
Chris@631
|
301 CSVFormat::getColumnPurpose(int i)
|
Chris@631
|
302 {
|
Chris@631
|
303 while (m_columnPurposes.size() <= i) {
|
Chris@631
|
304 m_columnPurposes.push_back(ColumnUnknown);
|
Chris@631
|
305 }
|
Chris@631
|
306 return m_columnPurposes[i];
|
Chris@631
|
307 }
|
Chris@629
|
308
|
Chris@631
|
309 CSVFormat::ColumnPurpose
|
Chris@631
|
310 CSVFormat::getColumnPurpose(int i) const
|
Chris@631
|
311 {
|
Chris@668
|
312 if (m_columnPurposes.size() <= i) {
|
Chris@668
|
313 return ColumnUnknown;
|
Chris@668
|
314 }
|
Chris@631
|
315 return m_columnPurposes[i];
|
Chris@631
|
316 }
|
Chris@631
|
317
|
Chris@631
|
318 void
|
Chris@631
|
319 CSVFormat::setColumnPurpose(int i, ColumnPurpose p)
|
Chris@631
|
320 {
|
Chris@631
|
321 while (m_columnPurposes.size() <= i) {
|
Chris@631
|
322 m_columnPurposes.push_back(ColumnUnknown);
|
Chris@631
|
323 }
|
Chris@631
|
324 m_columnPurposes[i] = p;
|
Chris@631
|
325 }
|
Chris@631
|
326
|
Chris@631
|
327
|
Chris@631
|
328
|
Chris@631
|
329
|