comparison data/fileio/CSVFileReader.cpp @ 742:c10cb8782576 coreaudio_tests

Merge from branch "default"
author Chris Cannam
date Sun, 01 Jul 2012 11:53:00 +0100
parents 1424aa29ae95
children e802e550a1f2
comparison
equal deleted inserted replaced
666:4efa7429cd85 742:c10cb8782576
15 15
16 #include "CSVFileReader.h" 16 #include "CSVFileReader.h"
17 17
18 #include "model/Model.h" 18 #include "model/Model.h"
19 #include "base/RealTime.h" 19 #include "base/RealTime.h"
20 #include "base/StringBits.h"
20 #include "model/SparseOneDimensionalModel.h" 21 #include "model/SparseOneDimensionalModel.h"
21 #include "model/SparseTimeValueModel.h" 22 #include "model/SparseTimeValueModel.h"
22 #include "model/EditableDenseThreeDimensionalModel.h" 23 #include "model/EditableDenseThreeDimensionalModel.h"
24 #include "model/RegionModel.h"
23 #include "DataFileReaderFactory.h" 25 #include "DataFileReaderFactory.h"
24 26
25 #include <QFile> 27 #include <QFile>
26 #include <QString> 28 #include <QString>
27 #include <QRegExp> 29 #include <QRegExp>
28 #include <QStringList> 30 #include <QStringList>
29 #include <QTextStream> 31 #include <QTextStream>
30 32
31 #include <iostream> 33 #include <iostream>
34 #include <map>
32 35
33 CSVFileReader::CSVFileReader(QString path, CSVFormat format, 36 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
34 size_t mainModelSampleRate) : 37 size_t mainModelSampleRate) :
35 m_format(format), 38 m_format(format),
36 m_file(0), 39 m_file(0),
40 m_warnings(0),
37 m_mainModelSampleRate(mainModelSampleRate) 41 m_mainModelSampleRate(mainModelSampleRate)
38 { 42 {
39 m_file = new QFile(path); 43 m_file = new QFile(path);
40 bool good = false; 44 bool good = false;
41 45
53 } 57 }
54 } 58 }
55 59
56 CSVFileReader::~CSVFileReader() 60 CSVFileReader::~CSVFileReader()
57 { 61 {
58 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl; 62 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
59 63
60 if (m_file) { 64 if (m_file) {
61 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl; 65 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
62 m_file->close(); 66 m_file->close();
63 } 67 }
64 delete m_file; 68 delete m_file;
65 } 69 }
66 70
74 CSVFileReader::getError() const 78 CSVFileReader::getError() const
75 { 79 {
76 return m_error; 80 return m_error;
77 } 81 }
78 82
83 size_t
84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
85 size_t windowSize) const
86 {
87 QRegExp nonNumericRx("[^0-9eE.,+-]");
88 unsigned int warnLimit = 10;
89
90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
91
92 size_t calculatedFrame = 0;
93
94 bool ok = false;
95 QString numeric = s;
96 numeric.remove(nonNumericRx);
97
98 if (timeUnits == CSVFormat::TimeSeconds) {
99
100 double time = numeric.toDouble(&ok);
101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
102 calculatedFrame = int(time * sampleRate + 0.5);
103
104 } else {
105
106 long n = numeric.toLong(&ok);
107 if (n >= 0) calculatedFrame = n;
108
109 if (timeUnits == CSVFormat::TimeWindows) {
110 calculatedFrame *= windowSize;
111 }
112 }
113
114 if (!ok) {
115 if (m_warnings < warnLimit) {
116 std::cerr << "WARNING: CSVFileReader::load: "
117 << "Bad time format (\"" << s.toStdString()
118 << "\") in data line "
119 << lineno+1 << std::endl;
120 } else if (m_warnings == warnLimit) {
121 std::cerr << "WARNING: Too many warnings" << std::endl;
122 }
123 ++m_warnings;
124 }
125
126 return calculatedFrame;
127 }
128
79 Model * 129 Model *
80 CSVFileReader::load() const 130 CSVFileReader::load() const
81 { 131 {
82 if (!m_file) return 0; 132 if (!m_file) return 0;
83 /*!!! 133
84 CSVFormatDialog *dialog = new CSVFormatDialog 134 CSVFormat::ModelType modelType = m_format.getModelType();
85 (0, m_file, m_mainModelSampleRate);
86
87 if (dialog->exec() == QDialog::Rejected) {
88 delete dialog;
89 throw DataFileReaderFactory::ImportCancelled;
90 }
91 */
92
93 CSVFormat::ModelType modelType = m_format.getModelType();
94 CSVFormat::TimingType timingType = m_format.getTimingType(); 135 CSVFormat::TimingType timingType = m_format.getTimingType();
95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); 136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
96 QString separator = m_format.getSeparator();
97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
98 size_t sampleRate = m_format.getSampleRate(); 137 size_t sampleRate = m_format.getSampleRate();
99 size_t windowSize = m_format.getWindowSize(); 138 size_t windowSize = m_format.getWindowSize();
139 QChar separator = m_format.getSeparator();
140 bool allowQuoting = m_format.getAllowQuoting();
100 141
101 if (timingType == CSVFormat::ExplicitTiming) { 142 if (timingType == CSVFormat::ExplicitTiming) {
102 if (modelType == CSVFormat::ThreeDimensionalModel) { 143 if (modelType == CSVFormat::ThreeDimensionalModel) {
103 // This will be overridden later if more than one line 144 // This will be overridden later if more than one line
104 // appears in our file, but we want to choose a default 145 // appears in our file, but we want to choose a default
112 } 153 }
113 } 154 }
114 155
115 SparseOneDimensionalModel *model1 = 0; 156 SparseOneDimensionalModel *model1 = 0;
116 SparseTimeValueModel *model2 = 0; 157 SparseTimeValueModel *model2 = 0;
158 RegionModel *model2a = 0;
117 EditableDenseThreeDimensionalModel *model3 = 0; 159 EditableDenseThreeDimensionalModel *model3 = 0;
118 Model *model = 0; 160 Model *model = 0;
119 161
120 QTextStream in(m_file); 162 QTextStream in(m_file);
121 in.seek(0); 163 in.seek(0);
124 unsigned int lineno = 0; 166 unsigned int lineno = 0;
125 167
126 float min = 0.0, max = 0.0; 168 float min = 0.0, max = 0.0;
127 169
128 size_t frameNo = 0; 170 size_t frameNo = 0;
171 size_t duration = 0;
172 size_t endFrame = 0;
173
174 bool haveAnyValue = false;
175 bool haveEndTime = false;
176
129 size_t startFrame = 0; // for calculation of dense model resolution 177 size_t startFrame = 0; // for calculation of dense model resolution
178 bool firstEverValue = true;
179
180 std::map<QString, int> labelCountMap;
181
182 int valueColumns = 0;
183 for (int i = 0; i < m_format.getColumnCount(); ++i) {
184 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
185 ++valueColumns;
186 }
187 }
130 188
131 while (!in.atEnd()) { 189 while (!in.atEnd()) {
132 190
133 // QTextStream's readLine doesn't cope with old-style Mac 191 // QTextStream's readLine doesn't cope with old-style Mac
134 // CR-only line endings. Why did they bother making the class 192 // CR-only line endings. Why did they bother making the class
148 206
149 QString line = lines[li]; 207 QString line = lines[li];
150 208
151 if (line.startsWith("#")) continue; 209 if (line.startsWith("#")) continue;
152 210
153 QStringList list = line.split(separator, behaviour); 211 QStringList list = StringBits::split(line, separator, allowQuoting);
154
155 if (!model) { 212 if (!model) {
156 213
157 switch (modelType) { 214 switch (modelType) {
158 215
159 case CSVFormat::OneDimensionalModel: 216 case CSVFormat::OneDimensionalModel:
162 break; 219 break;
163 220
164 case CSVFormat::TwoDimensionalModel: 221 case CSVFormat::TwoDimensionalModel:
165 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); 222 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
166 model = model2; 223 model = model2;
224 break;
225
226 case CSVFormat::TwoDimensionalModelWithDuration:
227 model2a = new RegionModel(sampleRate, windowSize, false);
228 model = model2a;
167 break; 229 break;
168 230
169 case CSVFormat::ThreeDimensionalModel: 231 case CSVFormat::ThreeDimensionalModel:
170 model3 = new EditableDenseThreeDimensionalModel 232 model3 = new EditableDenseThreeDimensionalModel
171 (sampleRate, 233 (sampleRate,
172 windowSize, 234 windowSize,
173 list.size(), 235 valueColumns,
174 EditableDenseThreeDimensionalModel::NoCompression); 236 EditableDenseThreeDimensionalModel::NoCompression);
175 model = model3; 237 model = model3;
176 break; 238 break;
177 } 239 }
178 } 240 }
179 241
180 QStringList tidyList; 242 float value = 0.f;
181 QRegExp nonNumericRx("[^0-9eE.,+-]"); 243 QString label = "";
244
245 duration = 0.f;
246 haveEndTime = false;
182 247
183 for (int i = 0; i < list.size(); ++i) { 248 for (int i = 0; i < list.size(); ++i) {
184 249
185 QString s(list[i].trimmed()); 250 QString s = list[i];
186 251
187 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { 252 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
188 s = s.mid(1, s.length() - 2); 253
189 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { 254 switch (purpose) {
190 s = s.mid(1, s.length() - 2); 255
256 case CSVFormat::ColumnUnknown:
257 break;
258
259 case CSVFormat::ColumnStartTime:
260 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
261 break;
262
263 case CSVFormat::ColumnEndTime:
264 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
265 haveEndTime = true;
266 break;
267
268 case CSVFormat::ColumnDuration:
269 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
270 break;
271
272 case CSVFormat::ColumnValue:
273 value = s.toFloat();
274 haveAnyValue = true;
275 break;
276
277 case CSVFormat::ColumnLabel:
278 label = s;
279 ++labelCountMap[label];
280 break;
191 } 281 }
192 282 }
193 if (i == 0 && timingType == CSVFormat::ExplicitTiming) { 283
194 284 if (haveEndTime) { // ... calculate duration now all cols read
195 bool ok = false; 285 if (endFrame > frameNo) {
196 QString numeric = s; 286 duration = endFrame - frameNo;
197 numeric.remove(nonNumericRx);
198
199 if (timeUnits == CSVFormat::TimeSeconds) {
200
201 double time = numeric.toDouble(&ok);
202 frameNo = int(time * sampleRate + 0.5);
203
204 } else {
205
206 frameNo = numeric.toInt(&ok);
207
208 if (timeUnits == CSVFormat::TimeWindows) {
209 frameNo *= windowSize;
210 }
211 }
212
213 if (!ok) {
214 if (warnings < warnLimit) {
215 std::cerr << "WARNING: CSVFileReader::load: "
216 << "Bad time format (\"" << s.toStdString()
217 << "\") in data line "
218 << lineno+1 << ":" << std::endl;
219 std::cerr << line.toStdString() << std::endl;
220 } else if (warnings == warnLimit) {
221 std::cerr << "WARNING: Too many warnings" << std::endl;
222 }
223 ++warnings;
224 }
225 } else {
226 tidyList.push_back(s);
227 } 287 }
228 } 288 }
229 289
230 if (modelType == CSVFormat::OneDimensionalModel) { 290 if (modelType == CSVFormat::OneDimensionalModel) {
231 291
232 SparseOneDimensionalModel::Point point 292 SparseOneDimensionalModel::Point point(frameNo, label);
233 (frameNo,
234 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
235 QString("%1").arg(lineno+1));
236
237 model1->addPoint(point); 293 model1->addPoint(point);
238 294
239 } else if (modelType == CSVFormat::TwoDimensionalModel) { 295 } else if (modelType == CSVFormat::TwoDimensionalModel) {
240 296
241 SparseTimeValueModel::Point point 297 SparseTimeValueModel::Point point(frameNo, value, label);
242 (frameNo,
243 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
244 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1));
245
246 model2->addPoint(point); 298 model2->addPoint(point);
247 299
300 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
301
302 RegionModel::Point point(frameNo, value, duration, label);
303 model2a->addPoint(point);
304
248 } else if (modelType == CSVFormat::ThreeDimensionalModel) { 305 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
249 306
250 DenseThreeDimensionalModel::Column values; 307 DenseThreeDimensionalModel::Column values;
251 308
252 for (int i = 0; i < tidyList.size(); ++i) { 309 for (int i = 0; i < list.size(); ++i) {
310
311 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
312 continue;
313 }
253 314
254 bool ok = false; 315 bool ok = false;
255 float value = list[i].toFloat(&ok); 316 float value = list[i].toFloat(&ok);
256 317
257 if (i > 0 || timingType != CSVFormat::ExplicitTiming) { 318 values.push_back(value);
258 values.push_back(value);
259 }
260 319
261 bool firstEver = (lineno == 0 && i == 0); 320 if (firstEverValue || value < min) min = value;
262 321 if (firstEverValue || value > max) max = value;
263 if (firstEver || value < min) min = value; 322
264 if (firstEver || value > max) max = value; 323 if (firstEverValue) {
265
266 if (firstEver) {
267 startFrame = frameNo; 324 startFrame = frameNo;
268 model3->setStartFrame(startFrame); 325 model3->setStartFrame(startFrame);
269 } else if (lineno == 1 && 326 } else if (lineno == 1 &&
270 timingType == CSVFormat::ExplicitTiming) { 327 timingType == CSVFormat::ExplicitTiming) {
271 model3->setResolution(frameNo - startFrame); 328 model3->setResolution(frameNo - startFrame);
272 } 329 }
330
331 firstEverValue = false;
273 332
274 if (!ok) { 333 if (!ok) {
275 if (warnings < warnLimit) { 334 if (warnings < warnLimit) {
276 std::cerr << "WARNING: CSVFileReader::load: " 335 std::cerr << "WARNING: CSVFileReader::load: "
277 << "Non-numeric value \"" 336 << "Non-numeric value \""
278 << list[i].toStdString() 337 << list[i].toStdString()
279 << "\" in data line " << lineno+1 338 << "\" in data line " << lineno+1
280 << ":" << std::endl; 339 << ":" << std::endl;
281 std::cerr << line.toStdString() << std::endl; 340 std::cerr << line << std::endl;
282 ++warnings; 341 ++warnings;
283 } else if (warnings == warnLimit) { 342 } else if (warnings == warnLimit) {
284 // std::cerr << "WARNING: Too many warnings" << std::endl; 343 // std::cerr << "WARNING: Too many warnings" << std::endl;
285 } 344 }
286 } 345 }
287 } 346 }
288 347
289 // std::cerr << "Setting bin values for count " << lineno << ", frame " 348 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
290 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; 349 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
291 350
292 model3->setColumn(lineno, values); 351 model3->setColumn(lineno, values);
293 } 352 }
294 353
295 ++lineno; 354 ++lineno;
298 frameNo += windowSize; 357 frameNo += windowSize;
299 } 358 }
300 } 359 }
301 } 360 }
302 361
362 if (!haveAnyValue) {
363 if (model2a) {
364 // assign values for regions based on label frequency; we
365 // have this in our labelCountMap, sort of
366
367 std::map<int, std::map<QString, float> > countLabelValueMap;
368 for (std::map<QString, int>::iterator i = labelCountMap.begin();
369 i != labelCountMap.end(); ++i) {
370 countLabelValueMap[i->second][i->first] = 0.f;
371 }
372
373 float v = 0.f;
374 for (std::map<int, std::map<QString, float> >::iterator i =
375 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
376 --i;
377 for (std::map<QString, float>::iterator j = i->second.begin();
378 j != i->second.end(); ++j) {
379 j->second = v;
380 v = v + 1.f;
381 }
382 }
383
384 std::map<RegionModel::Point, RegionModel::Point,
385 RegionModel::Point::Comparator> pointMap;
386 for (RegionModel::PointList::const_iterator i =
387 model2a->getPoints().begin();
388 i != model2a->getPoints().end(); ++i) {
389 RegionModel::Point p(*i);
390 v = countLabelValueMap[labelCountMap[p.label]][p.label];
391 RegionModel::Point pp(p.frame, v, p.duration, p.label);
392 pointMap[p] = pp;
393 }
394
395 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
396 pointMap.begin(); i != pointMap.end(); ++i) {
397 model2a->deletePoint(i->first);
398 model2a->addPoint(i->second);
399 }
400 }
401 }
402
303 if (modelType == CSVFormat::ThreeDimensionalModel) { 403 if (modelType == CSVFormat::ThreeDimensionalModel) {
304 model3->setMinimumLevel(min); 404 model3->setMinimumLevel(min);
305 model3->setMaximumLevel(max); 405 model3->setMaximumLevel(max);
306 } 406 }
307 407