Chris@148
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@148
|
2
|
Chris@148
|
3 /*
|
Chris@148
|
4 Sonic Visualiser
|
Chris@148
|
5 An audio file viewer and annotation editor.
|
Chris@148
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@148
|
7 This file copyright 2006 Chris Cannam.
|
Chris@148
|
8
|
Chris@148
|
9 This program is free software; you can redistribute it and/or
|
Chris@148
|
10 modify it under the terms of the GNU General Public License as
|
Chris@148
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@148
|
12 License, or (at your option) any later version. See the file
|
Chris@148
|
13 COPYING included with this distribution for more information.
|
Chris@148
|
14 */
|
Chris@148
|
15
|
Chris@148
|
16 #include "CSVFileReader.h"
|
Chris@148
|
17
|
Chris@150
|
18 #include "model/Model.h"
|
Chris@148
|
19 #include "base/RealTime.h"
|
Chris@148
|
20 #include "model/SparseOneDimensionalModel.h"
|
Chris@148
|
21 #include "model/SparseTimeValueModel.h"
|
Chris@152
|
22 #include "model/EditableDenseThreeDimensionalModel.h"
|
Chris@628
|
23 #include "model/RegionModel.h"
|
Chris@308
|
24 #include "DataFileReaderFactory.h"
|
Chris@148
|
25
|
Chris@148
|
26 #include <QFile>
|
Chris@148
|
27 #include <QString>
|
Chris@148
|
28 #include <QRegExp>
|
Chris@148
|
29 #include <QStringList>
|
Chris@148
|
30 #include <QTextStream>
|
Chris@148
|
31
|
Chris@148
|
32 #include <iostream>
|
Chris@628
|
33 #include <map>
|
Chris@148
|
34
|
Chris@392
|
35 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
|
Chris@392
|
36 size_t mainModelSampleRate) :
|
Chris@392
|
37 m_format(format),
|
Chris@148
|
38 m_file(0),
|
Chris@148
|
39 m_mainModelSampleRate(mainModelSampleRate)
|
Chris@148
|
40 {
|
Chris@148
|
41 m_file = new QFile(path);
|
Chris@148
|
42 bool good = false;
|
Chris@148
|
43
|
Chris@148
|
44 if (!m_file->exists()) {
|
Chris@148
|
45 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
|
Chris@148
|
46 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
|
Chris@148
|
47 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
|
Chris@148
|
48 } else {
|
Chris@148
|
49 good = true;
|
Chris@148
|
50 }
|
Chris@148
|
51
|
Chris@148
|
52 if (!good) {
|
Chris@148
|
53 delete m_file;
|
Chris@148
|
54 m_file = 0;
|
Chris@148
|
55 }
|
Chris@148
|
56 }
|
Chris@148
|
57
|
Chris@148
|
58 CSVFileReader::~CSVFileReader()
|
Chris@148
|
59 {
|
Chris@148
|
60 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
|
Chris@148
|
61
|
Chris@148
|
62 if (m_file) {
|
Chris@148
|
63 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
|
Chris@148
|
64 m_file->close();
|
Chris@148
|
65 }
|
Chris@148
|
66 delete m_file;
|
Chris@148
|
67 }
|
Chris@148
|
68
|
Chris@148
|
69 bool
|
Chris@148
|
70 CSVFileReader::isOK() const
|
Chris@148
|
71 {
|
Chris@148
|
72 return (m_file != 0);
|
Chris@148
|
73 }
|
Chris@148
|
74
|
Chris@148
|
75 QString
|
Chris@148
|
76 CSVFileReader::getError() const
|
Chris@148
|
77 {
|
Chris@148
|
78 return m_error;
|
Chris@148
|
79 }
|
Chris@148
|
80
|
Chris@148
|
81 Model *
|
Chris@148
|
82 CSVFileReader::load() const
|
Chris@148
|
83 {
|
Chris@148
|
84 if (!m_file) return 0;
|
Chris@392
|
85 /*!!!
|
Chris@148
|
86 CSVFormatDialog *dialog = new CSVFormatDialog
|
Chris@148
|
87 (0, m_file, m_mainModelSampleRate);
|
Chris@148
|
88
|
Chris@148
|
89 if (dialog->exec() == QDialog::Rejected) {
|
Chris@148
|
90 delete dialog;
|
Chris@308
|
91 throw DataFileReaderFactory::ImportCancelled;
|
Chris@148
|
92 }
|
Chris@392
|
93 */
|
Chris@148
|
94
|
Chris@628
|
95 CSVFormat::ModelType modelType = m_format.getModelType();
|
Chris@392
|
96 CSVFormat::TimingType timingType = m_format.getTimingType();
|
Chris@628
|
97 CSVFormat::DurationType durationType = m_format.getDurationType();
|
Chris@628
|
98 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
|
Chris@392
|
99 QString separator = m_format.getSeparator();
|
Chris@392
|
100 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
|
Chris@392
|
101 size_t sampleRate = m_format.getSampleRate();
|
Chris@392
|
102 size_t windowSize = m_format.getWindowSize();
|
Chris@148
|
103
|
Chris@392
|
104 if (timingType == CSVFormat::ExplicitTiming) {
|
Chris@611
|
105 if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@611
|
106 // This will be overridden later if more than one line
|
Chris@611
|
107 // appears in our file, but we want to choose a default
|
Chris@611
|
108 // that's likely to be visible
|
Chris@611
|
109 windowSize = 1024;
|
Chris@611
|
110 } else {
|
Chris@611
|
111 windowSize = 1;
|
Chris@611
|
112 }
|
Chris@392
|
113 if (timeUnits == CSVFormat::TimeSeconds) {
|
Chris@148
|
114 sampleRate = m_mainModelSampleRate;
|
Chris@148
|
115 }
|
Chris@148
|
116 }
|
Chris@148
|
117
|
Chris@148
|
118 SparseOneDimensionalModel *model1 = 0;
|
Chris@148
|
119 SparseTimeValueModel *model2 = 0;
|
Chris@628
|
120 RegionModel *model2a = 0;
|
Chris@152
|
121 EditableDenseThreeDimensionalModel *model3 = 0;
|
Chris@148
|
122 Model *model = 0;
|
Chris@148
|
123
|
Chris@148
|
124 QTextStream in(m_file);
|
Chris@148
|
125 in.seek(0);
|
Chris@148
|
126
|
Chris@148
|
127 unsigned int warnings = 0, warnLimit = 10;
|
Chris@148
|
128 unsigned int lineno = 0;
|
Chris@148
|
129
|
Chris@148
|
130 float min = 0.0, max = 0.0;
|
Chris@148
|
131
|
Chris@148
|
132 size_t frameNo = 0;
|
Chris@628
|
133 size_t duration = 0;
|
Chris@611
|
134 size_t startFrame = 0; // for calculation of dense model resolution
|
Chris@148
|
135
|
Chris@628
|
136 std::map<QString, float> labelValueMap;
|
Chris@628
|
137 float syntheticMax = 0.f;
|
Chris@628
|
138
|
Chris@148
|
139 while (!in.atEnd()) {
|
Chris@148
|
140
|
Chris@283
|
141 // QTextStream's readLine doesn't cope with old-style Mac
|
Chris@283
|
142 // CR-only line endings. Why did they bother making the class
|
Chris@283
|
143 // cope with more than one sort of line ending, if it still
|
Chris@283
|
144 // can't be configured to cope with all the common sorts?
|
Chris@148
|
145
|
Chris@283
|
146 // For the time being we'll deal with this case (which is
|
Chris@283
|
147 // relatively uncommon for us, but still necessary to handle)
|
Chris@283
|
148 // by reading the entire file using a single readLine, and
|
Chris@283
|
149 // splitting it. For CR and CR/LF line endings this will just
|
Chris@283
|
150 // read a line at a time, and that's obviously OK.
|
Chris@148
|
151
|
Chris@283
|
152 QString chunk = in.readLine();
|
Chris@283
|
153 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
|
Chris@283
|
154
|
Chris@283
|
155 for (size_t li = 0; li < lines.size(); ++li) {
|
Chris@148
|
156
|
Chris@283
|
157 QString line = lines[li];
|
Chris@148
|
158
|
Chris@283
|
159 if (line.startsWith("#")) continue;
|
Chris@283
|
160
|
Chris@390
|
161 QStringList list = line.split(separator, behaviour);
|
Chris@283
|
162
|
Chris@283
|
163 if (!model) {
|
Chris@283
|
164
|
Chris@283
|
165 switch (modelType) {
|
Chris@283
|
166
|
Chris@392
|
167 case CSVFormat::OneDimensionalModel:
|
Chris@283
|
168 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
|
Chris@283
|
169 model = model1;
|
Chris@283
|
170 break;
|
Chris@148
|
171
|
Chris@392
|
172 case CSVFormat::TwoDimensionalModel:
|
Chris@283
|
173 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
|
Chris@283
|
174 model = model2;
|
Chris@283
|
175 break;
|
Chris@148
|
176
|
Chris@628
|
177 case CSVFormat::TwoDimensionalModelWithDuration:
|
Chris@628
|
178 model2a = new RegionModel(sampleRate, windowSize, false);
|
Chris@628
|
179 model = model2a;
|
Chris@628
|
180 break;
|
Chris@628
|
181
|
Chris@392
|
182 case CSVFormat::ThreeDimensionalModel:
|
Chris@535
|
183 model3 = new EditableDenseThreeDimensionalModel
|
Chris@535
|
184 (sampleRate,
|
Chris@535
|
185 windowSize,
|
Chris@535
|
186 list.size(),
|
Chris@535
|
187 EditableDenseThreeDimensionalModel::NoCompression);
|
Chris@283
|
188 model = model3;
|
Chris@283
|
189 break;
|
Chris@283
|
190 }
|
Chris@283
|
191 }
|
Chris@148
|
192
|
Chris@283
|
193 QStringList tidyList;
|
Chris@390
|
194 QRegExp nonNumericRx("[^0-9eE.,+-]");
|
Chris@148
|
195
|
Chris@628
|
196 float value = 0.f;
|
Chris@628
|
197
|
Chris@283
|
198 for (int i = 0; i < list.size(); ++i) {
|
Chris@148
|
199
|
Chris@283
|
200 QString s(list[i].trimmed());
|
Chris@148
|
201
|
Chris@283
|
202 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
|
Chris@283
|
203 s = s.mid(1, s.length() - 2);
|
Chris@283
|
204 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
|
Chris@283
|
205 s = s.mid(1, s.length() - 2);
|
Chris@283
|
206 }
|
Chris@148
|
207
|
Chris@628
|
208 if (timingType == CSVFormat::ExplicitTiming) {
|
Chris@148
|
209
|
Chris@628
|
210 size_t calculatedFrame = 0;
|
Chris@628
|
211
|
Chris@628
|
212 if (i == 0 ||
|
Chris@628
|
213 (i == 1 &&
|
Chris@628
|
214 modelType == CSVFormat::TwoDimensionalModelWithDuration)) {
|
Chris@628
|
215
|
Chris@628
|
216 bool ok = false;
|
Chris@628
|
217 QString numeric = s;
|
Chris@628
|
218 numeric.remove(nonNumericRx);
|
Chris@628
|
219
|
Chris@628
|
220 if (timeUnits == CSVFormat::TimeSeconds) {
|
Chris@628
|
221
|
Chris@628
|
222 double time = numeric.toDouble(&ok);
|
Chris@628
|
223 calculatedFrame = int(time * sampleRate + 0.5);
|
Chris@628
|
224
|
Chris@628
|
225 } else {
|
Chris@628
|
226
|
Chris@628
|
227 calculatedFrame = numeric.toInt(&ok);
|
Chris@628
|
228
|
Chris@628
|
229 if (timeUnits == CSVFormat::TimeWindows) {
|
Chris@628
|
230 calculatedFrame *= windowSize;
|
Chris@628
|
231 }
|
Chris@628
|
232 }
|
Chris@628
|
233
|
Chris@628
|
234 if (!ok) {
|
Chris@628
|
235 if (warnings < warnLimit) {
|
Chris@628
|
236 std::cerr << "WARNING: CSVFileReader::load: "
|
Chris@628
|
237 << "Bad time format (\"" << s.toStdString()
|
Chris@628
|
238 << "\") in data line "
|
Chris@628
|
239 << lineno+1 << ":" << std::endl;
|
Chris@628
|
240 std::cerr << line.toStdString() << std::endl;
|
Chris@628
|
241 } else if (warnings == warnLimit) {
|
Chris@628
|
242 std::cerr << "WARNING: Too many warnings" << std::endl;
|
Chris@628
|
243 }
|
Chris@628
|
244 ++warnings;
|
Chris@628
|
245 }
|
Chris@628
|
246
|
Chris@628
|
247 if (i == 0) frameNo = calculatedFrame;
|
Chris@628
|
248 else {
|
Chris@628
|
249 if (durationType == CSVFormat::EndTimes) {
|
Chris@628
|
250 duration = calculatedFrame - frameNo;
|
Chris@628
|
251 } else {
|
Chris@628
|
252 duration = calculatedFrame;
|
Chris@628
|
253 }
|
Chris@628
|
254 }
|
Chris@628
|
255
|
Chris@628
|
256 continue;
|
Chris@628
|
257 }
|
Chris@628
|
258 }
|
Chris@628
|
259
|
Chris@628
|
260 if ((i == 1 &&
|
Chris@628
|
261 modelType == CSVFormat::TwoDimensionalModel) ||
|
Chris@628
|
262 (i == 2 &&
|
Chris@628
|
263 modelType == CSVFormat::TwoDimensionalModelWithDuration)) {
|
Chris@283
|
264 bool ok = false;
|
Chris@628
|
265 value = s.toFloat(&ok);
|
Chris@628
|
266 if (!ok) {
|
Chris@628
|
267 // cf. RDFImporter::fillModel
|
Chris@628
|
268 if (labelValueMap.find(s) == labelValueMap.end()) {
|
Chris@628
|
269 syntheticMax = syntheticMax + 1.f;
|
Chris@628
|
270 labelValueMap[s] = syntheticMax;
|
Chris@628
|
271 }
|
Chris@628
|
272 value = labelValueMap[s];
|
Chris@628
|
273 } else {
|
Chris@628
|
274 if (value > syntheticMax) syntheticMax = value;
|
Chris@628
|
275 }
|
Chris@628
|
276 if (i + 1 == list.size()) {
|
Chris@628
|
277 // keep text around for use as label (none other given)
|
Chris@628
|
278 tidyList.push_back(s);
|
Chris@628
|
279 }
|
Chris@628
|
280 continue;
|
Chris@628
|
281 }
|
Chris@148
|
282
|
Chris@628
|
283 tidyList.push_back(s);
|
Chris@283
|
284 }
|
Chris@148
|
285
|
Chris@392
|
286 if (modelType == CSVFormat::OneDimensionalModel) {
|
Chris@148
|
287
|
Chris@283
|
288 SparseOneDimensionalModel::Point point
|
Chris@283
|
289 (frameNo,
|
Chris@283
|
290 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
|
Chris@491
|
291 QString("%1").arg(lineno+1));
|
Chris@148
|
292
|
Chris@283
|
293 model1->addPoint(point);
|
Chris@148
|
294
|
Chris@392
|
295 } else if (modelType == CSVFormat::TwoDimensionalModel) {
|
Chris@148
|
296
|
Chris@283
|
297 SparseTimeValueModel::Point point
|
Chris@283
|
298 (frameNo,
|
Chris@628
|
299 value,
|
Chris@628
|
300 tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1));
|
Chris@148
|
301
|
Chris@283
|
302 model2->addPoint(point);
|
Chris@148
|
303
|
Chris@628
|
304 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
|
Chris@628
|
305
|
Chris@628
|
306 RegionModel::Point point
|
Chris@628
|
307 (frameNo,
|
Chris@628
|
308 value,
|
Chris@628
|
309 duration,
|
Chris@628
|
310 tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1));
|
Chris@628
|
311
|
Chris@628
|
312 model2a->addPoint(point);
|
Chris@628
|
313
|
Chris@392
|
314 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@148
|
315
|
Chris@283
|
316 DenseThreeDimensionalModel::Column values;
|
Chris@148
|
317
|
Chris@283
|
318 for (int i = 0; i < tidyList.size(); ++i) {
|
Chris@148
|
319
|
Chris@283
|
320 bool ok = false;
|
Chris@283
|
321 float value = list[i].toFloat(&ok);
|
Chris@611
|
322
|
Chris@611
|
323 if (i > 0 || timingType != CSVFormat::ExplicitTiming) {
|
Chris@611
|
324 values.push_back(value);
|
Chris@611
|
325 }
|
Chris@148
|
326
|
Chris@611
|
327 bool firstEver = (lineno == 0 && i == 0);
|
Chris@611
|
328
|
Chris@611
|
329 if (firstEver || value < min) min = value;
|
Chris@611
|
330 if (firstEver || value > max) max = value;
|
Chris@611
|
331
|
Chris@611
|
332 if (firstEver) {
|
Chris@611
|
333 startFrame = frameNo;
|
Chris@611
|
334 model3->setStartFrame(startFrame);
|
Chris@611
|
335 } else if (lineno == 1 &&
|
Chris@611
|
336 timingType == CSVFormat::ExplicitTiming) {
|
Chris@611
|
337 model3->setResolution(frameNo - startFrame);
|
Chris@611
|
338 }
|
Chris@148
|
339
|
Chris@283
|
340 if (!ok) {
|
Chris@283
|
341 if (warnings < warnLimit) {
|
Chris@283
|
342 std::cerr << "WARNING: CSVFileReader::load: "
|
Chris@390
|
343 << "Non-numeric value \""
|
Chris@390
|
344 << list[i].toStdString()
|
Chris@491
|
345 << "\" in data line " << lineno+1
|
Chris@283
|
346 << ":" << std::endl;
|
Chris@283
|
347 std::cerr << line.toStdString() << std::endl;
|
Chris@283
|
348 ++warnings;
|
Chris@283
|
349 } else if (warnings == warnLimit) {
|
Chris@390
|
350 // std::cerr << "WARNING: Too many warnings" << std::endl;
|
Chris@283
|
351 }
|
Chris@283
|
352 }
|
Chris@283
|
353 }
|
Chris@148
|
354
|
Chris@390
|
355 // std::cerr << "Setting bin values for count " << lineno << ", frame "
|
Chris@390
|
356 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
|
Chris@148
|
357
|
Chris@611
|
358 model3->setColumn(lineno, values);
|
Chris@283
|
359 }
|
Chris@148
|
360
|
Chris@283
|
361 ++lineno;
|
Chris@392
|
362 if (timingType == CSVFormat::ImplicitTiming ||
|
Chris@283
|
363 list.size() == 0) {
|
Chris@283
|
364 frameNo += windowSize;
|
Chris@283
|
365 }
|
Chris@283
|
366 }
|
Chris@148
|
367 }
|
Chris@148
|
368
|
Chris@392
|
369 if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@148
|
370 model3->setMinimumLevel(min);
|
Chris@148
|
371 model3->setMaximumLevel(max);
|
Chris@148
|
372 }
|
Chris@148
|
373
|
Chris@148
|
374 return model;
|
Chris@148
|
375 }
|
Chris@148
|
376
|