Chris@148
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@148
|
2
|
Chris@148
|
3 /*
|
Chris@148
|
4 Sonic Visualiser
|
Chris@148
|
5 An audio file viewer and annotation editor.
|
Chris@148
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@148
|
7 This file copyright 2006 Chris Cannam.
|
Chris@148
|
8
|
Chris@148
|
9 This program is free software; you can redistribute it and/or
|
Chris@148
|
10 modify it under the terms of the GNU General Public License as
|
Chris@148
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@148
|
12 License, or (at your option) any later version. See the file
|
Chris@148
|
13 COPYING included with this distribution for more information.
|
Chris@148
|
14 */
|
Chris@148
|
15
|
Chris@148
|
16 #include "CSVFileReader.h"
|
Chris@148
|
17
|
Chris@150
|
18 #include "model/Model.h"
|
Chris@148
|
19 #include "base/RealTime.h"
|
Chris@631
|
20 #include "base/StringBits.h"
|
Chris@148
|
21 #include "model/SparseOneDimensionalModel.h"
|
Chris@148
|
22 #include "model/SparseTimeValueModel.h"
|
Chris@152
|
23 #include "model/EditableDenseThreeDimensionalModel.h"
|
Chris@628
|
24 #include "model/RegionModel.h"
|
Chris@308
|
25 #include "DataFileReaderFactory.h"
|
Chris@148
|
26
|
Chris@148
|
27 #include <QFile>
|
Chris@148
|
28 #include <QString>
|
Chris@148
|
29 #include <QRegExp>
|
Chris@148
|
30 #include <QStringList>
|
Chris@148
|
31 #include <QTextStream>
|
Chris@148
|
32
|
Chris@148
|
33 #include <iostream>
|
Chris@628
|
34 #include <map>
|
Chris@148
|
35
|
Chris@392
|
36 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
|
Chris@392
|
37 size_t mainModelSampleRate) :
|
Chris@392
|
38 m_format(format),
|
Chris@148
|
39 m_file(0),
|
Chris@631
|
40 m_warnings(0),
|
Chris@148
|
41 m_mainModelSampleRate(mainModelSampleRate)
|
Chris@148
|
42 {
|
Chris@148
|
43 m_file = new QFile(path);
|
Chris@148
|
44 bool good = false;
|
Chris@148
|
45
|
Chris@148
|
46 if (!m_file->exists()) {
|
Chris@148
|
47 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
|
Chris@148
|
48 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
|
Chris@148
|
49 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
|
Chris@148
|
50 } else {
|
Chris@148
|
51 good = true;
|
Chris@148
|
52 }
|
Chris@148
|
53
|
Chris@148
|
54 if (!good) {
|
Chris@148
|
55 delete m_file;
|
Chris@148
|
56 m_file = 0;
|
Chris@148
|
57 }
|
Chris@148
|
58 }
|
Chris@148
|
59
|
Chris@148
|
60 CSVFileReader::~CSVFileReader()
|
Chris@148
|
61 {
|
Chris@148
|
62 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
|
Chris@148
|
63
|
Chris@148
|
64 if (m_file) {
|
Chris@148
|
65 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
|
Chris@148
|
66 m_file->close();
|
Chris@148
|
67 }
|
Chris@148
|
68 delete m_file;
|
Chris@148
|
69 }
|
Chris@148
|
70
|
Chris@148
|
71 bool
|
Chris@148
|
72 CSVFileReader::isOK() const
|
Chris@148
|
73 {
|
Chris@148
|
74 return (m_file != 0);
|
Chris@148
|
75 }
|
Chris@148
|
76
|
Chris@148
|
77 QString
|
Chris@148
|
78 CSVFileReader::getError() const
|
Chris@148
|
79 {
|
Chris@148
|
80 return m_error;
|
Chris@148
|
81 }
|
Chris@148
|
82
|
Chris@631
|
83 size_t
|
Chris@631
|
84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
|
Chris@631
|
85 size_t windowSize) const
|
Chris@631
|
86 {
|
Chris@631
|
87 QRegExp nonNumericRx("[^0-9eE.,+-]");
|
Chris@631
|
88 unsigned int warnLimit = 10;
|
Chris@631
|
89
|
Chris@631
|
90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
|
Chris@631
|
91
|
Chris@631
|
92 size_t calculatedFrame = 0;
|
Chris@631
|
93
|
Chris@631
|
94 bool ok = false;
|
Chris@631
|
95 QString numeric = s;
|
Chris@631
|
96 numeric.remove(nonNumericRx);
|
Chris@631
|
97
|
Chris@631
|
98 if (timeUnits == CSVFormat::TimeSeconds) {
|
Chris@631
|
99
|
Chris@631
|
100 double time = numeric.toDouble(&ok);
|
Chris@631
|
101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
|
Chris@631
|
102 calculatedFrame = int(time * sampleRate + 0.5);
|
Chris@631
|
103
|
Chris@631
|
104 } else {
|
Chris@631
|
105
|
Chris@631
|
106 long n = numeric.toLong(&ok);
|
Chris@631
|
107 if (n >= 0) calculatedFrame = n;
|
Chris@631
|
108
|
Chris@631
|
109 if (timeUnits == CSVFormat::TimeWindows) {
|
Chris@631
|
110 calculatedFrame *= windowSize;
|
Chris@631
|
111 }
|
Chris@631
|
112 }
|
Chris@631
|
113
|
Chris@631
|
114 if (!ok) {
|
Chris@631
|
115 if (m_warnings < warnLimit) {
|
Chris@631
|
116 std::cerr << "WARNING: CSVFileReader::load: "
|
Chris@631
|
117 << "Bad time format (\"" << s.toStdString()
|
Chris@631
|
118 << "\") in data line "
|
Chris@631
|
119 << lineno+1 << std::endl;
|
Chris@631
|
120 } else if (m_warnings == warnLimit) {
|
Chris@631
|
121 std::cerr << "WARNING: Too many warnings" << std::endl;
|
Chris@631
|
122 }
|
Chris@631
|
123 ++m_warnings;
|
Chris@631
|
124 }
|
Chris@631
|
125
|
Chris@631
|
126 return calculatedFrame;
|
Chris@631
|
127 }
|
Chris@631
|
128
|
Chris@148
|
129 Model *
|
Chris@148
|
130 CSVFileReader::load() const
|
Chris@148
|
131 {
|
Chris@148
|
132 if (!m_file) return 0;
|
Chris@148
|
133
|
Chris@628
|
134 CSVFormat::ModelType modelType = m_format.getModelType();
|
Chris@392
|
135 CSVFormat::TimingType timingType = m_format.getTimingType();
|
Chris@628
|
136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
|
Chris@392
|
137 size_t sampleRate = m_format.getSampleRate();
|
Chris@392
|
138 size_t windowSize = m_format.getWindowSize();
|
Chris@631
|
139 QChar separator = m_format.getSeparator();
|
Chris@631
|
140 bool allowQuoting = m_format.getAllowQuoting();
|
Chris@148
|
141
|
Chris@392
|
142 if (timingType == CSVFormat::ExplicitTiming) {
|
Chris@611
|
143 if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@611
|
144 // This will be overridden later if more than one line
|
Chris@611
|
145 // appears in our file, but we want to choose a default
|
Chris@611
|
146 // that's likely to be visible
|
Chris@611
|
147 windowSize = 1024;
|
Chris@611
|
148 } else {
|
Chris@611
|
149 windowSize = 1;
|
Chris@611
|
150 }
|
Chris@392
|
151 if (timeUnits == CSVFormat::TimeSeconds) {
|
Chris@148
|
152 sampleRate = m_mainModelSampleRate;
|
Chris@148
|
153 }
|
Chris@148
|
154 }
|
Chris@148
|
155
|
Chris@148
|
156 SparseOneDimensionalModel *model1 = 0;
|
Chris@148
|
157 SparseTimeValueModel *model2 = 0;
|
Chris@628
|
158 RegionModel *model2a = 0;
|
Chris@152
|
159 EditableDenseThreeDimensionalModel *model3 = 0;
|
Chris@148
|
160 Model *model = 0;
|
Chris@148
|
161
|
Chris@148
|
162 QTextStream in(m_file);
|
Chris@148
|
163 in.seek(0);
|
Chris@148
|
164
|
Chris@148
|
165 unsigned int warnings = 0, warnLimit = 10;
|
Chris@148
|
166 unsigned int lineno = 0;
|
Chris@148
|
167
|
Chris@148
|
168 float min = 0.0, max = 0.0;
|
Chris@148
|
169
|
Chris@148
|
170 size_t frameNo = 0;
|
Chris@628
|
171 size_t duration = 0;
|
Chris@631
|
172 size_t endFrame = 0;
|
Chris@631
|
173
|
Chris@631
|
174 bool haveAnyValue = false;
|
Chris@631
|
175 bool haveEndTime = false;
|
Chris@631
|
176
|
Chris@611
|
177 size_t startFrame = 0; // for calculation of dense model resolution
|
Chris@631
|
178 bool firstEverValue = true;
|
Chris@148
|
179
|
Chris@631
|
180 std::map<QString, int> labelCountMap;
|
Chris@631
|
181
|
Chris@148
|
182 while (!in.atEnd()) {
|
Chris@148
|
183
|
Chris@283
|
184 // QTextStream's readLine doesn't cope with old-style Mac
|
Chris@283
|
185 // CR-only line endings. Why did they bother making the class
|
Chris@283
|
186 // cope with more than one sort of line ending, if it still
|
Chris@283
|
187 // can't be configured to cope with all the common sorts?
|
Chris@148
|
188
|
Chris@283
|
189 // For the time being we'll deal with this case (which is
|
Chris@283
|
190 // relatively uncommon for us, but still necessary to handle)
|
Chris@283
|
191 // by reading the entire file using a single readLine, and
|
Chris@283
|
192 // splitting it. For CR and CR/LF line endings this will just
|
Chris@283
|
193 // read a line at a time, and that's obviously OK.
|
Chris@148
|
194
|
Chris@283
|
195 QString chunk = in.readLine();
|
Chris@283
|
196 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
|
Chris@283
|
197
|
Chris@283
|
198 for (size_t li = 0; li < lines.size(); ++li) {
|
Chris@148
|
199
|
Chris@283
|
200 QString line = lines[li];
|
Chris@148
|
201
|
Chris@283
|
202 if (line.startsWith("#")) continue;
|
Chris@283
|
203
|
Chris@631
|
204 QStringList list = StringBits::split(line, separator, allowQuoting);
|
Chris@283
|
205 if (!model) {
|
Chris@283
|
206
|
Chris@283
|
207 switch (modelType) {
|
Chris@283
|
208
|
Chris@392
|
209 case CSVFormat::OneDimensionalModel:
|
Chris@283
|
210 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
|
Chris@283
|
211 model = model1;
|
Chris@283
|
212 break;
|
Chris@148
|
213
|
Chris@392
|
214 case CSVFormat::TwoDimensionalModel:
|
Chris@283
|
215 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
|
Chris@283
|
216 model = model2;
|
Chris@283
|
217 break;
|
Chris@148
|
218
|
Chris@628
|
219 case CSVFormat::TwoDimensionalModelWithDuration:
|
Chris@628
|
220 model2a = new RegionModel(sampleRate, windowSize, false);
|
Chris@628
|
221 model = model2a;
|
Chris@628
|
222 break;
|
Chris@628
|
223
|
Chris@392
|
224 case CSVFormat::ThreeDimensionalModel:
|
Chris@535
|
225 model3 = new EditableDenseThreeDimensionalModel
|
Chris@535
|
226 (sampleRate,
|
Chris@535
|
227 windowSize,
|
Chris@535
|
228 list.size(),
|
Chris@535
|
229 EditableDenseThreeDimensionalModel::NoCompression);
|
Chris@283
|
230 model = model3;
|
Chris@283
|
231 break;
|
Chris@283
|
232 }
|
Chris@283
|
233 }
|
Chris@148
|
234
|
Chris@631
|
235 float value = 0.f;
|
Chris@631
|
236 QString label = "";
|
Chris@148
|
237
|
Chris@631
|
238 duration = 0.f;
|
Chris@631
|
239 haveEndTime = false;
|
Chris@628
|
240
|
Chris@283
|
241 for (int i = 0; i < list.size(); ++i) {
|
Chris@148
|
242
|
Chris@631
|
243 QString s = list[i];
|
Chris@631
|
244
|
Chris@631
|
245 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
|
Chris@631
|
246
|
Chris@631
|
247 switch (purpose) {
|
Chris@631
|
248
|
Chris@631
|
249 case CSVFormat::ColumnUnknown:
|
Chris@631
|
250 break;
|
Chris@631
|
251
|
Chris@631
|
252 case CSVFormat::ColumnStartTime:
|
Chris@631
|
253 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
|
Chris@631
|
254 break;
|
Chris@631
|
255
|
Chris@631
|
256 case CSVFormat::ColumnEndTime:
|
Chris@631
|
257 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
|
Chris@631
|
258 haveEndTime = true;
|
Chris@631
|
259 break;
|
Chris@631
|
260
|
Chris@631
|
261 case CSVFormat::ColumnDuration:
|
Chris@631
|
262 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
|
Chris@631
|
263 break;
|
Chris@631
|
264
|
Chris@631
|
265 case CSVFormat::ColumnValue:
|
Chris@631
|
266 value = s.toFloat();
|
Chris@631
|
267 haveAnyValue = true;
|
Chris@631
|
268 break;
|
Chris@631
|
269
|
Chris@631
|
270 case CSVFormat::ColumnLabel:
|
Chris@631
|
271 label = s;
|
Chris@631
|
272 ++labelCountMap[label];
|
Chris@631
|
273 break;
|
Chris@283
|
274 }
|
Chris@631
|
275 }
|
Chris@148
|
276
|
Chris@631
|
277 if (haveEndTime) { // ... calculate duration now all cols read
|
Chris@631
|
278 if (endFrame > frameNo) {
|
Chris@631
|
279 duration = endFrame - frameNo;
|
Chris@628
|
280 }
|
Chris@283
|
281 }
|
Chris@148
|
282
|
Chris@392
|
283 if (modelType == CSVFormat::OneDimensionalModel) {
|
Chris@148
|
284
|
Chris@631
|
285 SparseOneDimensionalModel::Point point(frameNo, label);
|
Chris@283
|
286 model1->addPoint(point);
|
Chris@148
|
287
|
Chris@392
|
288 } else if (modelType == CSVFormat::TwoDimensionalModel) {
|
Chris@148
|
289
|
Chris@631
|
290 SparseTimeValueModel::Point point(frameNo, value, label);
|
Chris@283
|
291 model2->addPoint(point);
|
Chris@148
|
292
|
Chris@628
|
293 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
|
Chris@628
|
294
|
Chris@631
|
295 RegionModel::Point point(frameNo, value, duration, label);
|
Chris@628
|
296 model2a->addPoint(point);
|
Chris@628
|
297
|
Chris@392
|
298 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@148
|
299
|
Chris@283
|
300 DenseThreeDimensionalModel::Column values;
|
Chris@148
|
301
|
Chris@631
|
302 for (int i = 0; i < list.size(); ++i) {
|
Chris@148
|
303
|
Chris@283
|
304 bool ok = false;
|
Chris@283
|
305 float value = list[i].toFloat(&ok);
|
Chris@611
|
306
|
Chris@631
|
307 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
|
Chris@611
|
308 values.push_back(value);
|
Chris@611
|
309 }
|
Chris@148
|
310
|
Chris@631
|
311 if (firstEverValue || value < min) min = value;
|
Chris@631
|
312 if (firstEverValue || value > max) max = value;
|
Chris@611
|
313
|
Chris@631
|
314 if (firstEverValue) {
|
Chris@611
|
315 startFrame = frameNo;
|
Chris@611
|
316 model3->setStartFrame(startFrame);
|
Chris@611
|
317 } else if (lineno == 1 &&
|
Chris@611
|
318 timingType == CSVFormat::ExplicitTiming) {
|
Chris@611
|
319 model3->setResolution(frameNo - startFrame);
|
Chris@611
|
320 }
|
Chris@631
|
321
|
Chris@631
|
322 firstEverValue = false;
|
Chris@148
|
323
|
Chris@283
|
324 if (!ok) {
|
Chris@283
|
325 if (warnings < warnLimit) {
|
Chris@283
|
326 std::cerr << "WARNING: CSVFileReader::load: "
|
Chris@390
|
327 << "Non-numeric value \""
|
Chris@390
|
328 << list[i].toStdString()
|
Chris@491
|
329 << "\" in data line " << lineno+1
|
Chris@283
|
330 << ":" << std::endl;
|
Chris@283
|
331 std::cerr << line.toStdString() << std::endl;
|
Chris@283
|
332 ++warnings;
|
Chris@283
|
333 } else if (warnings == warnLimit) {
|
Chris@390
|
334 // std::cerr << "WARNING: Too many warnings" << std::endl;
|
Chris@283
|
335 }
|
Chris@283
|
336 }
|
Chris@283
|
337 }
|
Chris@148
|
338
|
Chris@390
|
339 // std::cerr << "Setting bin values for count " << lineno << ", frame "
|
Chris@390
|
340 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
|
Chris@148
|
341
|
Chris@611
|
342 model3->setColumn(lineno, values);
|
Chris@283
|
343 }
|
Chris@148
|
344
|
Chris@283
|
345 ++lineno;
|
Chris@392
|
346 if (timingType == CSVFormat::ImplicitTiming ||
|
Chris@283
|
347 list.size() == 0) {
|
Chris@283
|
348 frameNo += windowSize;
|
Chris@283
|
349 }
|
Chris@283
|
350 }
|
Chris@148
|
351 }
|
Chris@148
|
352
|
Chris@631
|
353 if (!haveAnyValue) {
|
Chris@631
|
354 if (model2a) {
|
Chris@631
|
355 // assign values for regions based on label frequency; we
|
Chris@631
|
356 // have this in our labelCountMap, sort of
|
Chris@631
|
357
|
Chris@631
|
358 std::map<int, std::map<QString, float> > countLabelValueMap;
|
Chris@631
|
359 for (std::map<QString, int>::iterator i = labelCountMap.begin();
|
Chris@631
|
360 i != labelCountMap.end(); ++i) {
|
Chris@631
|
361 countLabelValueMap[i->second][i->first] = 0.f;
|
Chris@631
|
362 }
|
Chris@631
|
363
|
Chris@631
|
364 float v = 0.f;
|
Chris@631
|
365 for (std::map<int, std::map<QString, float> >::iterator i =
|
Chris@631
|
366 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
|
Chris@631
|
367 --i;
|
Chris@631
|
368 for (std::map<QString, float>::iterator j = i->second.begin();
|
Chris@631
|
369 j != i->second.end(); ++j) {
|
Chris@631
|
370 j->second = v;
|
Chris@631
|
371 v = v + 1.f;
|
Chris@631
|
372 }
|
Chris@631
|
373 }
|
Chris@631
|
374
|
Chris@631
|
375 std::map<RegionModel::Point, RegionModel::Point,
|
Chris@631
|
376 RegionModel::Point::Comparator> pointMap;
|
Chris@631
|
377 for (RegionModel::PointList::const_iterator i =
|
Chris@631
|
378 model2a->getPoints().begin();
|
Chris@631
|
379 i != model2a->getPoints().end(); ++i) {
|
Chris@631
|
380 RegionModel::Point p(*i);
|
Chris@631
|
381 v = countLabelValueMap[labelCountMap[p.label]][p.label];
|
Chris@631
|
382 RegionModel::Point pp(p.frame, v, p.duration, p.label);
|
Chris@631
|
383 pointMap[p] = pp;
|
Chris@631
|
384 }
|
Chris@631
|
385
|
Chris@631
|
386 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
|
Chris@631
|
387 pointMap.begin(); i != pointMap.end(); ++i) {
|
Chris@631
|
388 model2a->deletePoint(i->first);
|
Chris@631
|
389 model2a->addPoint(i->second);
|
Chris@631
|
390 }
|
Chris@631
|
391 }
|
Chris@631
|
392 }
|
Chris@631
|
393
|
Chris@392
|
394 if (modelType == CSVFormat::ThreeDimensionalModel) {
|
Chris@148
|
395 model3->setMinimumLevel(min);
|
Chris@148
|
396 model3->setMaximumLevel(max);
|
Chris@148
|
397 }
|
Chris@148
|
398
|
Chris@148
|
399 return model;
|
Chris@148
|
400 }
|
Chris@148
|
401
|