Chris@392
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@392
|
2
|
Chris@392
|
3 /*
|
Chris@392
|
4 Sonic Visualiser
|
Chris@392
|
5 An audio file viewer and annotation editor.
|
Chris@392
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@392
|
7 This file copyright 2006 Chris Cannam.
|
Chris@392
|
8
|
Chris@392
|
9 This program is free software; you can redistribute it and/or
|
Chris@392
|
10 modify it under the terms of the GNU General Public License as
|
Chris@392
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@392
|
12 License, or (at your option) any later version. See the file
|
Chris@392
|
13 COPYING included with this distribution for more information.
|
Chris@392
|
14 */
|
Chris@392
|
15
|
Chris@392
|
16 #include "CSVFormat.h"
|
Chris@392
|
17
|
Chris@629
|
18 #include "base/StringBits.h"
|
Chris@629
|
19
|
Chris@392
|
20 #include <QFile>
|
Chris@392
|
21 #include <QString>
|
Chris@392
|
22 #include <QRegExp>
|
Chris@392
|
23 #include <QStringList>
|
Chris@392
|
24 #include <QTextStream>
|
Chris@392
|
25
|
Chris@392
|
26 #include <iostream>
|
Chris@392
|
27
|
Chris@1362
|
28 #include "base/Debug.h"
|
Chris@1362
|
29
|
Chris@629
|
30 CSVFormat::CSVFormat(QString path) :
|
Chris@629
|
31 m_separator(""),
|
Chris@392
|
32 m_sampleRate(44100),
|
Chris@392
|
33 m_windowSize(1024),
|
Chris@629
|
34 m_allowQuoting(true)
|
Chris@392
|
35 {
|
Chris@629
|
36 guessFormatFor(path);
|
Chris@629
|
37 }
|
Chris@629
|
38
|
Chris@629
|
39 void
|
Chris@629
|
40 CSVFormat::guessFormatFor(QString path)
|
Chris@629
|
41 {
|
Chris@629
|
42 m_modelType = TwoDimensionalModel;
|
Chris@629
|
43 m_timingType = ExplicitTiming;
|
Chris@629
|
44 m_timeUnits = TimeSeconds;
|
Chris@629
|
45
|
Chris@629
|
46 m_maxExampleCols = 0;
|
Chris@629
|
47 m_columnCount = 0;
|
Chris@629
|
48 m_variableColumnCount = false;
|
Chris@629
|
49
|
Chris@629
|
50 m_example.clear();
|
Chris@629
|
51 m_columnQualities.clear();
|
Chris@629
|
52 m_columnPurposes.clear();
|
Chris@629
|
53 m_prevValues.clear();
|
Chris@629
|
54
|
Chris@629
|
55 QFile file(path);
|
Chris@392
|
56 if (!file.exists()) return;
|
Chris@392
|
57 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
|
Chris@392
|
58
|
Chris@392
|
59 QTextStream in(&file);
|
Chris@392
|
60 in.seek(0);
|
Chris@392
|
61
|
Chris@629
|
62 int lineno = 0;
|
Chris@392
|
63
|
Chris@392
|
64 while (!in.atEnd()) {
|
Chris@392
|
65
|
Chris@392
|
66 // See comment about line endings in CSVFileReader::load()
|
Chris@392
|
67
|
Chris@392
|
68 QString chunk = in.readLine();
|
Chris@392
|
69 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
|
Chris@392
|
70
|
Chris@897
|
71 for (int li = 0; li < lines.size(); ++li) {
|
Chris@392
|
72
|
Chris@392
|
73 QString line = lines[li];
|
Chris@1512
|
74 if (line.startsWith("#") || line == "") {
|
Chris@1512
|
75 continue;
|
Chris@1512
|
76 }
|
Chris@392
|
77
|
Chris@629
|
78 guessQualities(line, lineno);
|
Chris@392
|
79
|
Chris@840
|
80 ++lineno;
|
Chris@629
|
81 }
|
Chris@840
|
82
|
Chris@1512
|
83 if (lineno >= 150) break;
|
Chris@629
|
84 }
|
Chris@392
|
85
|
Chris@629
|
86 guessPurposes();
|
Chris@1515
|
87 guessAudioSampleRange();
|
Chris@629
|
88 }
|
Chris@629
|
89
|
Chris@629
|
90 void
|
Chris@629
|
91 CSVFormat::guessSeparator(QString line)
|
Chris@629
|
92 {
|
Chris@629
|
93 char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
|
Chris@897
|
94 for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) {
|
Chris@629
|
95 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
|
Chris@629
|
96 m_separator = candidates[i];
|
Chris@1510
|
97 SVDEBUG << "Estimated column separator: '" << m_separator
|
Chris@1510
|
98 << "'" << endl;
|
Chris@629
|
99 return;
|
Chris@629
|
100 }
|
Chris@629
|
101 }
|
Chris@629
|
102 }
|
Chris@629
|
103
|
Chris@629
|
104 void
|
Chris@629
|
105 CSVFormat::guessQualities(QString line, int lineno)
|
Chris@629
|
106 {
|
Chris@629
|
107 if (m_separator == "") guessSeparator(line);
|
Chris@629
|
108
|
Chris@1362
|
109 QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
|
Chris@629
|
110
|
Chris@629
|
111 int cols = list.size();
|
Chris@991
|
112 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
|
Chris@629
|
113 if (cols != m_columnCount) m_variableColumnCount = true;
|
Chris@629
|
114
|
Chris@629
|
115 // All columns are regarded as having these qualities until we see
|
Chris@629
|
116 // something that indicates otherwise:
|
Chris@629
|
117
|
Chris@629
|
118 ColumnQualities defaultQualities =
|
Chris@1512
|
119 ColumnNumeric | ColumnIntegral | ColumnSmall |
|
Chris@1512
|
120 ColumnIncreasing | ColumnNearEmpty;
|
Chris@629
|
121
|
Chris@629
|
122 for (int i = 0; i < cols; ++i) {
|
Chris@1429
|
123
|
Chris@629
|
124 while (m_columnQualities.size() <= i) {
|
Chris@629
|
125 m_columnQualities.push_back(defaultQualities);
|
Chris@629
|
126 m_prevValues.push_back(0.f);
|
Chris@629
|
127 }
|
Chris@629
|
128
|
Chris@629
|
129 QString s(list[i]);
|
Chris@629
|
130 bool ok = false;
|
Chris@629
|
131
|
Chris@629
|
132 ColumnQualities qualities = m_columnQualities[i];
|
Chris@629
|
133
|
Chris@629
|
134 bool numeric = (qualities & ColumnNumeric);
|
Chris@629
|
135 bool integral = (qualities & ColumnIntegral);
|
Chris@629
|
136 bool increasing = (qualities & ColumnIncreasing);
|
Chris@1512
|
137 bool small = (qualities & ColumnSmall);
|
Chris@629
|
138 bool large = (qualities & ColumnLarge); // this one defaults to off
|
Chris@1512
|
139 bool signd = (qualities & ColumnSigned); // also defaults to off
|
Chris@1021
|
140 bool emptyish = (qualities & ColumnNearEmpty);
|
Chris@629
|
141
|
Chris@1021
|
142 if (lineno > 1 && s.trimmed() != "") {
|
Chris@1021
|
143 emptyish = false;
|
Chris@1021
|
144 }
|
Chris@1021
|
145
|
Chris@629
|
146 float value = 0.f;
|
Chris@629
|
147
|
Chris@629
|
148 //!!! how to take into account headers?
|
Chris@629
|
149
|
Chris@629
|
150 if (numeric) {
|
Chris@629
|
151 value = s.toFloat(&ok);
|
Chris@629
|
152 if (!ok) {
|
Chris@629
|
153 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
|
Chris@629
|
154 }
|
Chris@629
|
155 if (ok) {
|
Chris@1512
|
156 if (lineno < 2 && value > 1000.f) {
|
Chris@1512
|
157 large = true;
|
Chris@1512
|
158 }
|
Chris@1512
|
159 if (value < 0.f) {
|
Chris@1512
|
160 signd = true;
|
Chris@1512
|
161 }
|
Chris@1512
|
162 if (value < -1.f || value > 1.f) {
|
Chris@1512
|
163 small = false;
|
Chris@1512
|
164 }
|
Chris@629
|
165 } else {
|
Chris@629
|
166 numeric = false;
|
Chris@629
|
167 }
|
Chris@629
|
168 }
|
Chris@629
|
169
|
Chris@629
|
170 if (numeric) {
|
Chris@629
|
171
|
Chris@629
|
172 if (integral) {
|
Chris@629
|
173 if (s.contains('.') || s.contains(',')) {
|
Chris@629
|
174 integral = false;
|
Chris@392
|
175 }
|
Chris@392
|
176 }
|
Chris@392
|
177
|
Chris@629
|
178 if (increasing) {
|
Chris@629
|
179 if (lineno > 0 && value <= m_prevValues[i]) {
|
Chris@629
|
180 increasing = false;
|
Chris@392
|
181 }
|
Chris@392
|
182 }
|
Chris@392
|
183
|
Chris@629
|
184 m_prevValues[i] = value;
|
Chris@629
|
185 }
|
Chris@392
|
186
|
Chris@629
|
187 m_columnQualities[i] =
|
Chris@629
|
188 (numeric ? ColumnNumeric : 0) |
|
Chris@629
|
189 (integral ? ColumnIntegral : 0) |
|
Chris@629
|
190 (increasing ? ColumnIncreasing : 0) |
|
Chris@1512
|
191 (small ? ColumnSmall : 0) |
|
Chris@1021
|
192 (large ? ColumnLarge : 0) |
|
Chris@1512
|
193 (signd ? ColumnSigned : 0) |
|
Chris@1021
|
194 (emptyish ? ColumnNearEmpty : 0);
|
Chris@629
|
195 }
|
Chris@392
|
196
|
Chris@629
|
197 if (lineno < 10) {
|
Chris@629
|
198 m_example.push_back(list);
|
Chris@629
|
199 if (lineno == 0 || cols > m_maxExampleCols) {
|
Chris@629
|
200 m_maxExampleCols = cols;
|
Chris@392
|
201 }
|
Chris@392
|
202 }
|
Chris@392
|
203
|
Chris@1362
|
204 if (lineno < 10) {
|
Chris@1362
|
205 SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): ";
|
Chris@1362
|
206 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@1362
|
207 SVDEBUG << int(m_columnQualities[i]) << " ";
|
Chris@1362
|
208 }
|
Chris@1362
|
209 SVDEBUG << endl;
|
Chris@1362
|
210 }
|
Chris@629
|
211 }
|
Chris@629
|
212
|
Chris@629
|
213 void
|
Chris@629
|
214 CSVFormat::guessPurposes()
|
Chris@629
|
215 {
|
Chris@629
|
216 m_timingType = CSVFormat::ImplicitTiming;
|
Chris@629
|
217 m_timeUnits = CSVFormat::TimeWindows;
|
Chris@1429
|
218
|
Chris@629
|
219 int timingColumnCount = 0;
|
Chris@1021
|
220
|
Chris@1510
|
221 SVDEBUG << "Estimated column qualities overall: ";
|
Chris@1510
|
222 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@1510
|
223 SVDEBUG << int(m_columnQualities[i]) << " ";
|
Chris@1510
|
224 }
|
Chris@1510
|
225 SVDEBUG << endl;
|
Chris@1510
|
226
|
Chris@1021
|
227 // if our first column has zero or one entries in it and the rest
|
Chris@1021
|
228 // have more, then we'll default to ignoring the first column and
|
Chris@1021
|
229 // counting the next one as primary. (e.g. Sonic Annotator output
|
Chris@1021
|
230 // with filename at start of first column.)
|
Chris@1021
|
231
|
Chris@1021
|
232 int primaryColumnNo = 0;
|
Chris@1021
|
233
|
Chris@1021
|
234 if (m_columnCount >= 2) {
|
Chris@1021
|
235 if ( (m_columnQualities[0] & ColumnNearEmpty) &&
|
Chris@1021
|
236 !(m_columnQualities[1] & ColumnNearEmpty)) {
|
Chris@1021
|
237 primaryColumnNo = 1;
|
Chris@1021
|
238 }
|
Chris@1021
|
239 }
|
Chris@629
|
240
|
Chris@629
|
241 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@629
|
242
|
Chris@629
|
243 ColumnPurpose purpose = ColumnUnknown;
|
Chris@1021
|
244
|
Chris@1021
|
245 if (i < primaryColumnNo) {
|
Chris@1021
|
246 setColumnPurpose(i, purpose);
|
Chris@1021
|
247 continue;
|
Chris@1021
|
248 }
|
Chris@1021
|
249
|
Chris@1021
|
250 bool primary = (i == primaryColumnNo);
|
Chris@392
|
251
|
Chris@629
|
252 ColumnQualities qualities = m_columnQualities[i];
|
Chris@392
|
253
|
Chris@629
|
254 bool numeric = (qualities & ColumnNumeric);
|
Chris@629
|
255 bool integral = (qualities & ColumnIntegral);
|
Chris@629
|
256 bool increasing = (qualities & ColumnIncreasing);
|
Chris@629
|
257 bool large = (qualities & ColumnLarge);
|
Chris@629
|
258
|
Chris@629
|
259 bool timingColumn = (numeric && increasing);
|
Chris@629
|
260
|
Chris@629
|
261 if (timingColumn) {
|
Chris@629
|
262
|
Chris@629
|
263 ++timingColumnCount;
|
Chris@629
|
264
|
Chris@629
|
265 if (primary) {
|
Chris@629
|
266
|
Chris@629
|
267 purpose = ColumnStartTime;
|
Chris@629
|
268
|
Chris@629
|
269 m_timingType = ExplicitTiming;
|
Chris@629
|
270
|
Chris@629
|
271 if (integral && large) {
|
Chris@629
|
272 m_timeUnits = TimeAudioFrames;
|
Chris@629
|
273 } else {
|
Chris@629
|
274 m_timeUnits = TimeSeconds;
|
Chris@629
|
275 }
|
Chris@629
|
276
|
Chris@629
|
277 } else {
|
Chris@629
|
278
|
Chris@629
|
279 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
|
Chris@629
|
280 purpose = ColumnEndTime;
|
Chris@629
|
281 }
|
Chris@629
|
282 }
|
Chris@629
|
283 }
|
Chris@629
|
284
|
Chris@629
|
285 if (purpose == ColumnUnknown) {
|
Chris@629
|
286 if (numeric) {
|
Chris@629
|
287 purpose = ColumnValue;
|
Chris@629
|
288 } else {
|
Chris@629
|
289 purpose = ColumnLabel;
|
Chris@629
|
290 }
|
Chris@629
|
291 }
|
Chris@629
|
292
|
Chris@631
|
293 setColumnPurpose(i, purpose);
|
Chris@629
|
294 }
|
Chris@629
|
295
|
Chris@629
|
296 int valueCount = 0;
|
Chris@629
|
297 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@629
|
298 if (m_columnPurposes[i] == ColumnValue) ++valueCount;
|
Chris@629
|
299 }
|
Chris@629
|
300
|
Chris@630
|
301 if (valueCount == 2 && timingColumnCount == 1) {
|
Chris@630
|
302 // If we have exactly two apparent value columns and only one
|
Chris@630
|
303 // timing column, but one value column is integral and the
|
Chris@630
|
304 // other is not, guess that whichever one matches the integral
|
Chris@630
|
305 // status of the time column is either duration or end time
|
Chris@630
|
306 if (m_timingType == ExplicitTiming) {
|
Chris@630
|
307 int a = -1, b = -1;
|
Chris@630
|
308 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@630
|
309 if (m_columnPurposes[i] == ColumnValue) {
|
Chris@630
|
310 if (a == -1) a = i;
|
Chris@630
|
311 else b = i;
|
Chris@630
|
312 }
|
Chris@630
|
313 }
|
Chris@630
|
314 if ((m_columnQualities[a] & ColumnIntegral) !=
|
Chris@630
|
315 (m_columnQualities[b] & ColumnIntegral)) {
|
Chris@630
|
316 int timecol = a;
|
Chris@630
|
317 if ((m_columnQualities[a] & ColumnIntegral) !=
|
Chris@630
|
318 (m_columnQualities[0] & ColumnIntegral)) {
|
Chris@630
|
319 timecol = b;
|
Chris@630
|
320 }
|
Chris@630
|
321 if (m_columnQualities[timecol] & ColumnIncreasing) {
|
Chris@630
|
322 // This shouldn't happen; should have been settled above
|
Chris@630
|
323 m_columnPurposes[timecol] = ColumnEndTime;
|
Chris@630
|
324 } else {
|
Chris@630
|
325 m_columnPurposes[timecol] = ColumnDuration;
|
Chris@630
|
326 }
|
Chris@630
|
327 --valueCount;
|
Chris@630
|
328 }
|
Chris@630
|
329 }
|
Chris@630
|
330 }
|
Chris@630
|
331
|
Chris@631
|
332 if (timingColumnCount > 1) {
|
Chris@631
|
333 m_modelType = TwoDimensionalModelWithDuration;
|
Chris@392
|
334 } else {
|
Chris@631
|
335 if (valueCount == 0) {
|
Chris@631
|
336 m_modelType = OneDimensionalModel;
|
Chris@631
|
337 } else if (valueCount == 1) {
|
Chris@631
|
338 m_modelType = TwoDimensionalModel;
|
Chris@631
|
339 } else {
|
Chris@631
|
340 m_modelType = ThreeDimensionalModel;
|
Chris@631
|
341 }
|
Chris@629
|
342 }
|
Chris@392
|
343
|
Chris@1362
|
344 SVDEBUG << "Estimated column purposes: ";
|
Chris@1362
|
345 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@1362
|
346 SVDEBUG << int(m_columnPurposes[i]) << " ";
|
Chris@1362
|
347 }
|
Chris@1362
|
348 SVDEBUG << endl;
|
Chris@392
|
349
|
Chris@1362
|
350 SVDEBUG << "Estimated model type: " << m_modelType << endl;
|
Chris@1362
|
351 SVDEBUG << "Estimated timing type: " << m_timingType << endl;
|
Chris@1362
|
352 SVDEBUG << "Estimated units: " << m_timeUnits << endl;
|
Chris@392
|
353 }
|
Chris@392
|
354
|
Chris@1515
|
355 void
|
Chris@1515
|
356 CSVFormat::guessAudioSampleRange()
|
Chris@1515
|
357 {
|
Chris@1515
|
358 AudioSampleRange range = SampleRangeSigned1;
|
Chris@1515
|
359
|
Chris@1515
|
360 range = SampleRangeSigned1;
|
Chris@1515
|
361 bool knownSigned = false;
|
Chris@1515
|
362 bool knownNonIntegral = false;
|
Chris@1515
|
363
|
Chris@1515
|
364 for (int i = 0; i < m_columnCount; ++i) {
|
Chris@1515
|
365 if (!(m_columnQualities[i] & ColumnIntegral)) {
|
Chris@1515
|
366 knownNonIntegral = true;
|
Chris@1515
|
367 if (range == SampleRangeUnsigned255 ||
|
Chris@1515
|
368 range == SampleRangeSigned32767) {
|
Chris@1515
|
369 range = SampleRangeOther;
|
Chris@1515
|
370 }
|
Chris@1515
|
371 }
|
Chris@1515
|
372 if (m_columnQualities[i] & ColumnLarge) {
|
Chris@1515
|
373 if (range == SampleRangeSigned1 ||
|
Chris@1515
|
374 range == SampleRangeUnsigned255) {
|
Chris@1515
|
375 if (knownNonIntegral) {
|
Chris@1515
|
376 range = SampleRangeOther;
|
Chris@1515
|
377 } else {
|
Chris@1515
|
378 range = SampleRangeSigned32767;
|
Chris@1515
|
379 }
|
Chris@1515
|
380 }
|
Chris@1515
|
381 }
|
Chris@1515
|
382 if (m_columnQualities[i] & ColumnSigned) {
|
Chris@1515
|
383 knownSigned = true;
|
Chris@1515
|
384 if (range == SampleRangeUnsigned255) {
|
Chris@1515
|
385 range = SampleRangeSigned32767;
|
Chris@1515
|
386 }
|
Chris@1515
|
387 }
|
Chris@1515
|
388 if (!(m_columnQualities[i] & ColumnSmall)) {
|
Chris@1515
|
389 if (range == SampleRangeSigned1) {
|
Chris@1515
|
390 if (knownNonIntegral) {
|
Chris@1515
|
391 range = SampleRangeOther;
|
Chris@1515
|
392 } else if (knownSigned) {
|
Chris@1515
|
393 range = SampleRangeSigned32767;
|
Chris@1515
|
394 } else {
|
Chris@1515
|
395 range = SampleRangeUnsigned255;
|
Chris@1515
|
396 }
|
Chris@1515
|
397 }
|
Chris@1515
|
398 }
|
Chris@1515
|
399 }
|
Chris@1515
|
400
|
Chris@1515
|
401 m_audioSampleRange = range;
|
Chris@1515
|
402 }
|
Chris@1515
|
403
|
Chris@631
|
404 CSVFormat::ColumnPurpose
|
Chris@631
|
405 CSVFormat::getColumnPurpose(int i)
|
Chris@631
|
406 {
|
Chris@631
|
407 while (m_columnPurposes.size() <= i) {
|
Chris@631
|
408 m_columnPurposes.push_back(ColumnUnknown);
|
Chris@631
|
409 }
|
Chris@631
|
410 return m_columnPurposes[i];
|
Chris@631
|
411 }
|
Chris@629
|
412
|
Chris@631
|
413 CSVFormat::ColumnPurpose
|
Chris@631
|
414 CSVFormat::getColumnPurpose(int i) const
|
Chris@631
|
415 {
|
Chris@668
|
416 if (m_columnPurposes.size() <= i) {
|
Chris@668
|
417 return ColumnUnknown;
|
Chris@668
|
418 }
|
Chris@631
|
419 return m_columnPurposes[i];
|
Chris@631
|
420 }
|
Chris@631
|
421
|
Chris@631
|
422 void
|
Chris@631
|
423 CSVFormat::setColumnPurpose(int i, ColumnPurpose p)
|
Chris@631
|
424 {
|
Chris@631
|
425 while (m_columnPurposes.size() <= i) {
|
Chris@631
|
426 m_columnPurposes.push_back(ColumnUnknown);
|
Chris@631
|
427 }
|
Chris@631
|
428 m_columnPurposes[i] = p;
|
Chris@631
|
429 }
|
Chris@631
|
430
|
Chris@631
|
431
|
Chris@631
|
432
|
Chris@631
|
433
|