Chris@148
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@148
|
2
|
Chris@148
|
3 /*
|
Chris@148
|
4 Sonic Visualiser
|
Chris@148
|
5 An audio file viewer and annotation editor.
|
Chris@148
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@148
|
7 This file copyright 2006 Chris Cannam.
|
Chris@148
|
8
|
Chris@148
|
9 This program is free software; you can redistribute it and/or
|
Chris@148
|
10 modify it under the terms of the GNU General Public License as
|
Chris@148
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@148
|
12 License, or (at your option) any later version. See the file
|
Chris@148
|
13 COPYING included with this distribution for more information.
|
Chris@148
|
14 */
|
Chris@148
|
15
|
Chris@148
|
16 #include "CSVFileReader.h"
|
Chris@148
|
17
|
Chris@150
|
18 #include "model/Model.h"
|
Chris@148
|
19 #include "base/RealTime.h"
|
Chris@631
|
20 #include "base/StringBits.h"
|
Chris@148
|
21 #include "model/SparseOneDimensionalModel.h"
|
Chris@148
|
22 #include "model/SparseTimeValueModel.h"
|
Chris@152
|
23 #include "model/EditableDenseThreeDimensionalModel.h"
|
Chris@628
|
24 #include "model/RegionModel.h"
|
Chris@308
|
25 #include "DataFileReaderFactory.h"
|
Chris@148
|
26
|
Chris@148
|
27 #include <QFile>
|
Chris@148
|
28 #include <QString>
|
Chris@148
|
29 #include <QRegExp>
|
Chris@148
|
30 #include <QStringList>
|
Chris@148
|
31 #include <QTextStream>
|
Chris@148
|
32
|
Chris@148
|
33 #include <iostream>
|
Chris@628
|
34 #include <map>
|
Chris@148
|
35
|
Chris@392
|
36 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
|
Chris@392
|
37 size_t mainModelSampleRate) :
|
Chris@392
|
38 m_format(format),
|
Chris@148
|
39 m_file(0),
|
Chris@631
|
40 m_warnings(0),
|
Chris@148
|
41 m_mainModelSampleRate(mainModelSampleRate)
|
Chris@148
|
42 {
|
Chris@148
|
43 m_file = new QFile(path);
|
Chris@148
|
44 bool good = false;
|
Chris@148
|
45
|
Chris@148
|
46 if (!m_file->exists()) {
|
Chris@148
|
47 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
|
Chris@148
|
48 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
|
Chris@148
|
49 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
|
Chris@148
|
50 } else {
|
Chris@148
|
51 good = true;
|
Chris@148
|
52 }
|
Chris@148
|
53
|
Chris@148
|
54 if (!good) {
|
Chris@148
|
55 delete m_file;
|
Chris@148
|
56 m_file = 0;
|
Chris@148
|
57 }
|
Chris@148
|
58 }
|
Chris@148
|
59
|
Chris@148
|
60 CSVFileReader::~CSVFileReader()
|
Chris@148
|
61 {
|
Chris@690
|
62 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
|
Chris@148
|
63
|
Chris@148
|
64 if (m_file) {
|
Chris@690
|
65 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
|
Chris@148
|
66 m_file->close();
|
Chris@148
|
67 }
|
Chris@148
|
68 delete m_file;
|
Chris@148
|
69 }
|
Chris@148
|
70
|
Chris@148
|
71 bool
|
Chris@148
|
72 CSVFileReader::isOK() const
|
Chris@148
|
73 {
|
Chris@148
|
74 return (m_file != 0);
|
Chris@148
|
75 }
|
Chris@148
|
76
|
Chris@148
|
77 QString
|
Chris@148
|
78 CSVFileReader::getError() const
|
Chris@148
|
79 {
|
Chris@148
|
80 return m_error;
|
Chris@148
|
81 }
|
Chris@148
|
82
|
Chris@631
|
83 size_t
|
Chris@631
|
84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
|
Chris@631
|
85 size_t windowSize) const
|
Chris@631
|
86 {
|
Chris@631
|
87 QRegExp nonNumericRx("[^0-9eE.,+-]");
|
Chris@631
|
88 unsigned int warnLimit = 10;
|
Chris@631
|
89
|
Chris@631
|
90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
|
Chris@631
|
91
|
Chris@631
|
92 size_t calculatedFrame = 0;
|
Chris@631
|
93
|
Chris@631
|
94 bool ok = false;
|
Chris@631
|
95 QString numeric = s;
|
Chris@631
|
96 numeric.remove(nonNumericRx);
|
Chris@631
|
97
|
Chris@631
|
98 if (timeUnits == CSVFormat::TimeSeconds) {
|
Chris@631
|
99
|
Chris@631
|
100 double time = numeric.toDouble(&ok);
|
Chris@631
|
101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
|
Chris@631
|
102 calculatedFrame = int(time * sampleRate + 0.5);
|
Chris@631
|
103
|
Chris@631
|
104 } else {
|
Chris@631
|
105
|
Chris@631
|
106 long n = numeric.toLong(&ok);
|
Chris@631
|
107 if (n >= 0) calculatedFrame = n;
|
Chris@631
|
108
|
Chris@631
|
109 if (timeUnits == CSVFormat::TimeWindows) {
|
Chris@631
|
110 calculatedFrame *= windowSize;
|
Chris@631
|
111 }
|
Chris@631
|
112 }
|
Chris@631
|
113
|
Chris@631
|
114 if (!ok) {
|
Chris@631
|
115 if (m_warnings < warnLimit) {
|
Chris@843
|
116 cerr << "WARNING: CSVFileReader::load: "
|
Chris@631
|
117 << "Bad time format (\"" << s.toStdString()
|
Chris@631
|
118 << "\") in data line "
|
Chris@843
|
119 << lineno+1 << endl;
|
Chris@631
|
120 } else if (m_warnings == warnLimit) {
|
Chris@843
|
121 cerr << "WARNING: Too many warnings" << endl;
|
Chris@631
|
122 }
|
Chris@631
|
123 ++m_warnings;
|
Chris@631
|
124 }
|
Chris@631
|
125
|
Chris@631
|
126 return calculatedFrame;
|
Chris@631
|
127 }
|
Chris@631
|
128
|
Chris@148
|
129 Model *
|
Chris@148
|
130 CSVFileReader::load() const
|
Chris@148
|
131 {
|
Chris@148
|
132 if (!m_file) return 0;
|
Chris@148
|
133
|
Chris@628
|
134 CSVFormat::ModelType modelType = m_format.getModelType();
|
Chris@392
|
135 CSVFormat::TimingType timingType = m_format.getTimingType();
|
Chris@628
|
136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
|
Chris@392
|
137 size_t sampleRate = m_format.getSampleRate();
|
Chris@392
|
138 size_t windowSize = m_format.getWindowSize();
|
Chris@631
|
139 QChar separator = m_format.getSeparator();
|
Chris@631
|
140 bool allowQuoting = m_format.getAllowQuoting();
|
Chris@148
|
141
|
Chris@392
|
142 if (timingType == CSVFormat::ExplicitTiming) {
|
Chris@611
|
143 if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@611
|
144 // This will be overridden later if more than one line
|
Chris@611
|
145 // appears in our file, but we want to choose a default
|
Chris@611
|
146 // that's likely to be visible
|
Chris@611
|
147 windowSize = 1024;
|
Chris@611
|
148 } else {
|
Chris@611
|
149 windowSize = 1;
|
Chris@611
|
150 }
|
Chris@392
|
151 if (timeUnits == CSVFormat::TimeSeconds) {
|
Chris@148
|
152 sampleRate = m_mainModelSampleRate;
|
Chris@148
|
153 }
|
Chris@148
|
154 }
|
Chris@148
|
155
|
Chris@148
|
156 SparseOneDimensionalModel *model1 = 0;
|
Chris@148
|
157 SparseTimeValueModel *model2 = 0;
|
Chris@628
|
158 RegionModel *model2a = 0;
|
Chris@152
|
159 EditableDenseThreeDimensionalModel *model3 = 0;
|
Chris@148
|
160 Model *model = 0;
|
Chris@148
|
161
|
Chris@148
|
162 QTextStream in(m_file);
|
Chris@148
|
163 in.seek(0);
|
Chris@148
|
164
|
Chris@148
|
165 unsigned int warnings = 0, warnLimit = 10;
|
Chris@148
|
166 unsigned int lineno = 0;
|
Chris@148
|
167
|
Chris@148
|
168 float min = 0.0, max = 0.0;
|
Chris@148
|
169
|
Chris@148
|
170 size_t frameNo = 0;
|
Chris@628
|
171 size_t duration = 0;
|
Chris@631
|
172 size_t endFrame = 0;
|
Chris@631
|
173
|
Chris@631
|
174 bool haveAnyValue = false;
|
Chris@631
|
175 bool haveEndTime = false;
|
Chris@631
|
176
|
Chris@611
|
177 size_t startFrame = 0; // for calculation of dense model resolution
|
Chris@631
|
178 bool firstEverValue = true;
|
Chris@148
|
179
|
Chris@631
|
180 std::map<QString, int> labelCountMap;
|
Chris@631
|
181
|
Chris@676
|
182 int valueColumns = 0;
|
Chris@676
|
183 for (int i = 0; i < m_format.getColumnCount(); ++i) {
|
Chris@676
|
184 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
|
Chris@676
|
185 ++valueColumns;
|
Chris@676
|
186 }
|
Chris@676
|
187 }
|
Chris@676
|
188
|
Chris@148
|
189 while (!in.atEnd()) {
|
Chris@148
|
190
|
Chris@283
|
191 // QTextStream's readLine doesn't cope with old-style Mac
|
Chris@283
|
192 // CR-only line endings. Why did they bother making the class
|
Chris@283
|
193 // cope with more than one sort of line ending, if it still
|
Chris@283
|
194 // can't be configured to cope with all the common sorts?
|
Chris@148
|
195
|
Chris@283
|
196 // For the time being we'll deal with this case (which is
|
Chris@283
|
197 // relatively uncommon for us, but still necessary to handle)
|
Chris@283
|
198 // by reading the entire file using a single readLine, and
|
Chris@283
|
199 // splitting it. For CR and CR/LF line endings this will just
|
Chris@283
|
200 // read a line at a time, and that's obviously OK.
|
Chris@148
|
201
|
Chris@283
|
202 QString chunk = in.readLine();
|
Chris@283
|
203 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
|
Chris@283
|
204
|
Chris@283
|
205 for (size_t li = 0; li < lines.size(); ++li) {
|
Chris@148
|
206
|
Chris@283
|
207 QString line = lines[li];
|
Chris@148
|
208
|
Chris@283
|
209 if (line.startsWith("#")) continue;
|
Chris@283
|
210
|
Chris@631
|
211 QStringList list = StringBits::split(line, separator, allowQuoting);
|
Chris@283
|
212 if (!model) {
|
Chris@283
|
213
|
Chris@283
|
214 switch (modelType) {
|
Chris@283
|
215
|
Chris@392
|
216 case CSVFormat::OneDimensionalModel:
|
Chris@283
|
217 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
|
Chris@283
|
218 model = model1;
|
Chris@283
|
219 break;
|
Chris@148
|
220
|
Chris@392
|
221 case CSVFormat::TwoDimensionalModel:
|
Chris@283
|
222 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
|
Chris@283
|
223 model = model2;
|
Chris@283
|
224 break;
|
Chris@148
|
225
|
Chris@628
|
226 case CSVFormat::TwoDimensionalModelWithDuration:
|
Chris@628
|
227 model2a = new RegionModel(sampleRate, windowSize, false);
|
Chris@628
|
228 model = model2a;
|
Chris@628
|
229 break;
|
Chris@628
|
230
|
Chris@392
|
231 case CSVFormat::ThreeDimensionalModel:
|
Chris@535
|
232 model3 = new EditableDenseThreeDimensionalModel
|
Chris@535
|
233 (sampleRate,
|
Chris@535
|
234 windowSize,
|
Chris@676
|
235 valueColumns,
|
Chris@535
|
236 EditableDenseThreeDimensionalModel::NoCompression);
|
Chris@283
|
237 model = model3;
|
Chris@283
|
238 break;
|
Chris@283
|
239 }
|
Chris@283
|
240 }
|
Chris@148
|
241
|
Chris@631
|
242 float value = 0.f;
|
Chris@631
|
243 QString label = "";
|
Chris@148
|
244
|
Chris@631
|
245 duration = 0.f;
|
Chris@631
|
246 haveEndTime = false;
|
Chris@628
|
247
|
Chris@283
|
248 for (int i = 0; i < list.size(); ++i) {
|
Chris@148
|
249
|
Chris@631
|
250 QString s = list[i];
|
Chris@631
|
251
|
Chris@631
|
252 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
|
Chris@631
|
253
|
Chris@631
|
254 switch (purpose) {
|
Chris@631
|
255
|
Chris@631
|
256 case CSVFormat::ColumnUnknown:
|
Chris@631
|
257 break;
|
Chris@631
|
258
|
Chris@631
|
259 case CSVFormat::ColumnStartTime:
|
Chris@631
|
260 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
|
Chris@631
|
261 break;
|
Chris@631
|
262
|
Chris@631
|
263 case CSVFormat::ColumnEndTime:
|
Chris@631
|
264 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
|
Chris@631
|
265 haveEndTime = true;
|
Chris@631
|
266 break;
|
Chris@631
|
267
|
Chris@631
|
268 case CSVFormat::ColumnDuration:
|
Chris@631
|
269 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
|
Chris@631
|
270 break;
|
Chris@631
|
271
|
Chris@631
|
272 case CSVFormat::ColumnValue:
|
Chris@631
|
273 value = s.toFloat();
|
Chris@631
|
274 haveAnyValue = true;
|
Chris@631
|
275 break;
|
Chris@631
|
276
|
Chris@631
|
277 case CSVFormat::ColumnLabel:
|
Chris@631
|
278 label = s;
|
Chris@631
|
279 ++labelCountMap[label];
|
Chris@631
|
280 break;
|
Chris@283
|
281 }
|
Chris@631
|
282 }
|
Chris@148
|
283
|
Chris@631
|
284 if (haveEndTime) { // ... calculate duration now all cols read
|
Chris@631
|
285 if (endFrame > frameNo) {
|
Chris@631
|
286 duration = endFrame - frameNo;
|
Chris@628
|
287 }
|
Chris@283
|
288 }
|
Chris@148
|
289
|
Chris@392
|
290 if (modelType == CSVFormat::OneDimensionalModel) {
|
Chris@148
|
291
|
Chris@631
|
292 SparseOneDimensionalModel::Point point(frameNo, label);
|
Chris@283
|
293 model1->addPoint(point);
|
Chris@148
|
294
|
Chris@392
|
295 } else if (modelType == CSVFormat::TwoDimensionalModel) {
|
Chris@148
|
296
|
Chris@631
|
297 SparseTimeValueModel::Point point(frameNo, value, label);
|
Chris@283
|
298 model2->addPoint(point);
|
Chris@148
|
299
|
Chris@628
|
300 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
|
Chris@628
|
301
|
Chris@631
|
302 RegionModel::Point point(frameNo, value, duration, label);
|
Chris@628
|
303 model2a->addPoint(point);
|
Chris@628
|
304
|
Chris@392
|
305 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@148
|
306
|
Chris@283
|
307 DenseThreeDimensionalModel::Column values;
|
Chris@148
|
308
|
Chris@631
|
309 for (int i = 0; i < list.size(); ++i) {
|
Chris@148
|
310
|
Chris@676
|
311 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
|
Chris@676
|
312 continue;
|
Chris@676
|
313 }
|
Chris@676
|
314
|
Chris@283
|
315 bool ok = false;
|
Chris@283
|
316 float value = list[i].toFloat(&ok);
|
Chris@611
|
317
|
Chris@676
|
318 values.push_back(value);
|
Chris@148
|
319
|
Chris@631
|
320 if (firstEverValue || value < min) min = value;
|
Chris@631
|
321 if (firstEverValue || value > max) max = value;
|
Chris@676
|
322
|
Chris@631
|
323 if (firstEverValue) {
|
Chris@611
|
324 startFrame = frameNo;
|
Chris@611
|
325 model3->setStartFrame(startFrame);
|
Chris@611
|
326 } else if (lineno == 1 &&
|
Chris@611
|
327 timingType == CSVFormat::ExplicitTiming) {
|
Chris@611
|
328 model3->setResolution(frameNo - startFrame);
|
Chris@611
|
329 }
|
Chris@631
|
330
|
Chris@631
|
331 firstEverValue = false;
|
Chris@148
|
332
|
Chris@283
|
333 if (!ok) {
|
Chris@283
|
334 if (warnings < warnLimit) {
|
Chris@843
|
335 cerr << "WARNING: CSVFileReader::load: "
|
Chris@390
|
336 << "Non-numeric value \""
|
Chris@390
|
337 << list[i].toStdString()
|
Chris@491
|
338 << "\" in data line " << lineno+1
|
Chris@843
|
339 << ":" << endl;
|
Chris@843
|
340 cerr << line << endl;
|
Chris@283
|
341 ++warnings;
|
Chris@283
|
342 } else if (warnings == warnLimit) {
|
Chris@843
|
343 // cerr << "WARNING: Too many warnings" << endl;
|
Chris@283
|
344 }
|
Chris@283
|
345 }
|
Chris@283
|
346 }
|
Chris@148
|
347
|
Chris@690
|
348 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
|
Chris@687
|
349 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
|
Chris@148
|
350
|
Chris@611
|
351 model3->setColumn(lineno, values);
|
Chris@283
|
352 }
|
Chris@148
|
353
|
Chris@283
|
354 ++lineno;
|
Chris@392
|
355 if (timingType == CSVFormat::ImplicitTiming ||
|
Chris@283
|
356 list.size() == 0) {
|
Chris@283
|
357 frameNo += windowSize;
|
Chris@283
|
358 }
|
Chris@283
|
359 }
|
Chris@148
|
360 }
|
Chris@148
|
361
|
Chris@631
|
362 if (!haveAnyValue) {
|
Chris@631
|
363 if (model2a) {
|
Chris@631
|
364 // assign values for regions based on label frequency; we
|
Chris@631
|
365 // have this in our labelCountMap, sort of
|
Chris@631
|
366
|
Chris@631
|
367 std::map<int, std::map<QString, float> > countLabelValueMap;
|
Chris@631
|
368 for (std::map<QString, int>::iterator i = labelCountMap.begin();
|
Chris@631
|
369 i != labelCountMap.end(); ++i) {
|
Chris@631
|
370 countLabelValueMap[i->second][i->first] = 0.f;
|
Chris@631
|
371 }
|
Chris@631
|
372
|
Chris@631
|
373 float v = 0.f;
|
Chris@631
|
374 for (std::map<int, std::map<QString, float> >::iterator i =
|
Chris@631
|
375 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
|
Chris@631
|
376 --i;
|
Chris@631
|
377 for (std::map<QString, float>::iterator j = i->second.begin();
|
Chris@631
|
378 j != i->second.end(); ++j) {
|
Chris@631
|
379 j->second = v;
|
Chris@631
|
380 v = v + 1.f;
|
Chris@631
|
381 }
|
Chris@631
|
382 }
|
Chris@631
|
383
|
Chris@631
|
384 std::map<RegionModel::Point, RegionModel::Point,
|
Chris@631
|
385 RegionModel::Point::Comparator> pointMap;
|
Chris@631
|
386 for (RegionModel::PointList::const_iterator i =
|
Chris@631
|
387 model2a->getPoints().begin();
|
Chris@631
|
388 i != model2a->getPoints().end(); ++i) {
|
Chris@631
|
389 RegionModel::Point p(*i);
|
Chris@631
|
390 v = countLabelValueMap[labelCountMap[p.label]][p.label];
|
Chris@631
|
391 RegionModel::Point pp(p.frame, v, p.duration, p.label);
|
Chris@631
|
392 pointMap[p] = pp;
|
Chris@631
|
393 }
|
Chris@631
|
394
|
Chris@631
|
395 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
|
Chris@631
|
396 pointMap.begin(); i != pointMap.end(); ++i) {
|
Chris@631
|
397 model2a->deletePoint(i->first);
|
Chris@631
|
398 model2a->addPoint(i->second);
|
Chris@631
|
399 }
|
Chris@631
|
400 }
|
Chris@631
|
401 }
|
Chris@631
|
402
|
Chris@392
|
403 if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@148
|
404 model3->setMinimumLevel(min);
|
Chris@148
|
405 model3->setMaximumLevel(max);
|
Chris@148
|
406 }
|
Chris@148
|
407
|
Chris@148
|
408 return model;
|
Chris@148
|
409 }
|
Chris@148
|
410
|