Mercurial > hg > svcore
comparison data/fileio/CSVFileReader.cpp @ 742:c10cb8782576 coreaudio_tests
Merge from branch "default"
author | Chris Cannam |
---|---|
date | Sun, 01 Jul 2012 11:53:00 +0100 |
parents | 1424aa29ae95 |
children | e802e550a1f2 |
comparison
equal
deleted
inserted
replaced
666:4efa7429cd85 | 742:c10cb8782576 |
---|---|
15 | 15 |
16 #include "CSVFileReader.h" | 16 #include "CSVFileReader.h" |
17 | 17 |
18 #include "model/Model.h" | 18 #include "model/Model.h" |
19 #include "base/RealTime.h" | 19 #include "base/RealTime.h" |
20 #include "base/StringBits.h" | |
20 #include "model/SparseOneDimensionalModel.h" | 21 #include "model/SparseOneDimensionalModel.h" |
21 #include "model/SparseTimeValueModel.h" | 22 #include "model/SparseTimeValueModel.h" |
22 #include "model/EditableDenseThreeDimensionalModel.h" | 23 #include "model/EditableDenseThreeDimensionalModel.h" |
24 #include "model/RegionModel.h" | |
23 #include "DataFileReaderFactory.h" | 25 #include "DataFileReaderFactory.h" |
24 | 26 |
25 #include <QFile> | 27 #include <QFile> |
26 #include <QString> | 28 #include <QString> |
27 #include <QRegExp> | 29 #include <QRegExp> |
28 #include <QStringList> | 30 #include <QStringList> |
29 #include <QTextStream> | 31 #include <QTextStream> |
30 | 32 |
31 #include <iostream> | 33 #include <iostream> |
34 #include <map> | |
32 | 35 |
33 CSVFileReader::CSVFileReader(QString path, CSVFormat format, | 36 CSVFileReader::CSVFileReader(QString path, CSVFormat format, |
34 size_t mainModelSampleRate) : | 37 size_t mainModelSampleRate) : |
35 m_format(format), | 38 m_format(format), |
36 m_file(0), | 39 m_file(0), |
40 m_warnings(0), | |
37 m_mainModelSampleRate(mainModelSampleRate) | 41 m_mainModelSampleRate(mainModelSampleRate) |
38 { | 42 { |
39 m_file = new QFile(path); | 43 m_file = new QFile(path); |
40 bool good = false; | 44 bool good = false; |
41 | 45 |
53 } | 57 } |
54 } | 58 } |
55 | 59 |
56 CSVFileReader::~CSVFileReader() | 60 CSVFileReader::~CSVFileReader() |
57 { | 61 { |
58 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl; | 62 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl; |
59 | 63 |
60 if (m_file) { | 64 if (m_file) { |
61 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl; | 65 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl; |
62 m_file->close(); | 66 m_file->close(); |
63 } | 67 } |
64 delete m_file; | 68 delete m_file; |
65 } | 69 } |
66 | 70 |
74 CSVFileReader::getError() const | 78 CSVFileReader::getError() const |
75 { | 79 { |
76 return m_error; | 80 return m_error; |
77 } | 81 } |
78 | 82 |
83 size_t | |
84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate, | |
85 size_t windowSize) const | |
86 { | |
87 QRegExp nonNumericRx("[^0-9eE.,+-]"); | |
88 unsigned int warnLimit = 10; | |
89 | |
90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); | |
91 | |
92 size_t calculatedFrame = 0; | |
93 | |
94 bool ok = false; | |
95 QString numeric = s; | |
96 numeric.remove(nonNumericRx); | |
97 | |
98 if (timeUnits == CSVFormat::TimeSeconds) { | |
99 | |
100 double time = numeric.toDouble(&ok); | |
101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok); | |
102 calculatedFrame = int(time * sampleRate + 0.5); | |
103 | |
104 } else { | |
105 | |
106 long n = numeric.toLong(&ok); | |
107 if (n >= 0) calculatedFrame = n; | |
108 | |
109 if (timeUnits == CSVFormat::TimeWindows) { | |
110 calculatedFrame *= windowSize; | |
111 } | |
112 } | |
113 | |
114 if (!ok) { | |
115 if (m_warnings < warnLimit) { | |
116 std::cerr << "WARNING: CSVFileReader::load: " | |
117 << "Bad time format (\"" << s.toStdString() | |
118 << "\") in data line " | |
119 << lineno+1 << std::endl; | |
120 } else if (m_warnings == warnLimit) { | |
121 std::cerr << "WARNING: Too many warnings" << std::endl; | |
122 } | |
123 ++m_warnings; | |
124 } | |
125 | |
126 return calculatedFrame; | |
127 } | |
128 | |
79 Model * | 129 Model * |
80 CSVFileReader::load() const | 130 CSVFileReader::load() const |
81 { | 131 { |
82 if (!m_file) return 0; | 132 if (!m_file) return 0; |
83 /*!!! | 133 |
84 CSVFormatDialog *dialog = new CSVFormatDialog | 134 CSVFormat::ModelType modelType = m_format.getModelType(); |
85 (0, m_file, m_mainModelSampleRate); | |
86 | |
87 if (dialog->exec() == QDialog::Rejected) { | |
88 delete dialog; | |
89 throw DataFileReaderFactory::ImportCancelled; | |
90 } | |
91 */ | |
92 | |
93 CSVFormat::ModelType modelType = m_format.getModelType(); | |
94 CSVFormat::TimingType timingType = m_format.getTimingType(); | 135 CSVFormat::TimingType timingType = m_format.getTimingType(); |
95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); | 136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); |
96 QString separator = m_format.getSeparator(); | |
97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); | |
98 size_t sampleRate = m_format.getSampleRate(); | 137 size_t sampleRate = m_format.getSampleRate(); |
99 size_t windowSize = m_format.getWindowSize(); | 138 size_t windowSize = m_format.getWindowSize(); |
139 QChar separator = m_format.getSeparator(); | |
140 bool allowQuoting = m_format.getAllowQuoting(); | |
100 | 141 |
101 if (timingType == CSVFormat::ExplicitTiming) { | 142 if (timingType == CSVFormat::ExplicitTiming) { |
102 if (modelType == CSVFormat::ThreeDimensionalModel) { | 143 if (modelType == CSVFormat::ThreeDimensionalModel) { |
103 // This will be overridden later if more than one line | 144 // This will be overridden later if more than one line |
104 // appears in our file, but we want to choose a default | 145 // appears in our file, but we want to choose a default |
112 } | 153 } |
113 } | 154 } |
114 | 155 |
115 SparseOneDimensionalModel *model1 = 0; | 156 SparseOneDimensionalModel *model1 = 0; |
116 SparseTimeValueModel *model2 = 0; | 157 SparseTimeValueModel *model2 = 0; |
158 RegionModel *model2a = 0; | |
117 EditableDenseThreeDimensionalModel *model3 = 0; | 159 EditableDenseThreeDimensionalModel *model3 = 0; |
118 Model *model = 0; | 160 Model *model = 0; |
119 | 161 |
120 QTextStream in(m_file); | 162 QTextStream in(m_file); |
121 in.seek(0); | 163 in.seek(0); |
124 unsigned int lineno = 0; | 166 unsigned int lineno = 0; |
125 | 167 |
126 float min = 0.0, max = 0.0; | 168 float min = 0.0, max = 0.0; |
127 | 169 |
128 size_t frameNo = 0; | 170 size_t frameNo = 0; |
171 size_t duration = 0; | |
172 size_t endFrame = 0; | |
173 | |
174 bool haveAnyValue = false; | |
175 bool haveEndTime = false; | |
176 | |
129 size_t startFrame = 0; // for calculation of dense model resolution | 177 size_t startFrame = 0; // for calculation of dense model resolution |
178 bool firstEverValue = true; | |
179 | |
180 std::map<QString, int> labelCountMap; | |
181 | |
182 int valueColumns = 0; | |
183 for (int i = 0; i < m_format.getColumnCount(); ++i) { | |
184 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) { | |
185 ++valueColumns; | |
186 } | |
187 } | |
130 | 188 |
131 while (!in.atEnd()) { | 189 while (!in.atEnd()) { |
132 | 190 |
133 // QTextStream's readLine doesn't cope with old-style Mac | 191 // QTextStream's readLine doesn't cope with old-style Mac |
134 // CR-only line endings. Why did they bother making the class | 192 // CR-only line endings. Why did they bother making the class |
148 | 206 |
149 QString line = lines[li]; | 207 QString line = lines[li]; |
150 | 208 |
151 if (line.startsWith("#")) continue; | 209 if (line.startsWith("#")) continue; |
152 | 210 |
153 QStringList list = line.split(separator, behaviour); | 211 QStringList list = StringBits::split(line, separator, allowQuoting); |
154 | |
155 if (!model) { | 212 if (!model) { |
156 | 213 |
157 switch (modelType) { | 214 switch (modelType) { |
158 | 215 |
159 case CSVFormat::OneDimensionalModel: | 216 case CSVFormat::OneDimensionalModel: |
162 break; | 219 break; |
163 | 220 |
164 case CSVFormat::TwoDimensionalModel: | 221 case CSVFormat::TwoDimensionalModel: |
165 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); | 222 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); |
166 model = model2; | 223 model = model2; |
224 break; | |
225 | |
226 case CSVFormat::TwoDimensionalModelWithDuration: | |
227 model2a = new RegionModel(sampleRate, windowSize, false); | |
228 model = model2a; | |
167 break; | 229 break; |
168 | 230 |
169 case CSVFormat::ThreeDimensionalModel: | 231 case CSVFormat::ThreeDimensionalModel: |
170 model3 = new EditableDenseThreeDimensionalModel | 232 model3 = new EditableDenseThreeDimensionalModel |
171 (sampleRate, | 233 (sampleRate, |
172 windowSize, | 234 windowSize, |
173 list.size(), | 235 valueColumns, |
174 EditableDenseThreeDimensionalModel::NoCompression); | 236 EditableDenseThreeDimensionalModel::NoCompression); |
175 model = model3; | 237 model = model3; |
176 break; | 238 break; |
177 } | 239 } |
178 } | 240 } |
179 | 241 |
180 QStringList tidyList; | 242 float value = 0.f; |
181 QRegExp nonNumericRx("[^0-9eE.,+-]"); | 243 QString label = ""; |
244 | |
245 duration = 0.f; | |
246 haveEndTime = false; | |
182 | 247 |
183 for (int i = 0; i < list.size(); ++i) { | 248 for (int i = 0; i < list.size(); ++i) { |
184 | 249 |
185 QString s(list[i].trimmed()); | 250 QString s = list[i]; |
186 | 251 |
187 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | 252 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i); |
188 s = s.mid(1, s.length() - 2); | 253 |
189 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { | 254 switch (purpose) { |
190 s = s.mid(1, s.length() - 2); | 255 |
256 case CSVFormat::ColumnUnknown: | |
257 break; | |
258 | |
259 case CSVFormat::ColumnStartTime: | |
260 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize); | |
261 break; | |
262 | |
263 case CSVFormat::ColumnEndTime: | |
264 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize); | |
265 haveEndTime = true; | |
266 break; | |
267 | |
268 case CSVFormat::ColumnDuration: | |
269 duration = convertTimeValue(s, lineno, sampleRate, windowSize); | |
270 break; | |
271 | |
272 case CSVFormat::ColumnValue: | |
273 value = s.toFloat(); | |
274 haveAnyValue = true; | |
275 break; | |
276 | |
277 case CSVFormat::ColumnLabel: | |
278 label = s; | |
279 ++labelCountMap[label]; | |
280 break; | |
191 } | 281 } |
192 | 282 } |
193 if (i == 0 && timingType == CSVFormat::ExplicitTiming) { | 283 |
194 | 284 if (haveEndTime) { // ... calculate duration now all cols read |
195 bool ok = false; | 285 if (endFrame > frameNo) { |
196 QString numeric = s; | 286 duration = endFrame - frameNo; |
197 numeric.remove(nonNumericRx); | |
198 | |
199 if (timeUnits == CSVFormat::TimeSeconds) { | |
200 | |
201 double time = numeric.toDouble(&ok); | |
202 frameNo = int(time * sampleRate + 0.5); | |
203 | |
204 } else { | |
205 | |
206 frameNo = numeric.toInt(&ok); | |
207 | |
208 if (timeUnits == CSVFormat::TimeWindows) { | |
209 frameNo *= windowSize; | |
210 } | |
211 } | |
212 | |
213 if (!ok) { | |
214 if (warnings < warnLimit) { | |
215 std::cerr << "WARNING: CSVFileReader::load: " | |
216 << "Bad time format (\"" << s.toStdString() | |
217 << "\") in data line " | |
218 << lineno+1 << ":" << std::endl; | |
219 std::cerr << line.toStdString() << std::endl; | |
220 } else if (warnings == warnLimit) { | |
221 std::cerr << "WARNING: Too many warnings" << std::endl; | |
222 } | |
223 ++warnings; | |
224 } | |
225 } else { | |
226 tidyList.push_back(s); | |
227 } | 287 } |
228 } | 288 } |
229 | 289 |
230 if (modelType == CSVFormat::OneDimensionalModel) { | 290 if (modelType == CSVFormat::OneDimensionalModel) { |
231 | 291 |
232 SparseOneDimensionalModel::Point point | 292 SparseOneDimensionalModel::Point point(frameNo, label); |
233 (frameNo, | |
234 tidyList.size() > 0 ? tidyList[tidyList.size()-1] : | |
235 QString("%1").arg(lineno+1)); | |
236 | |
237 model1->addPoint(point); | 293 model1->addPoint(point); |
238 | 294 |
239 } else if (modelType == CSVFormat::TwoDimensionalModel) { | 295 } else if (modelType == CSVFormat::TwoDimensionalModel) { |
240 | 296 |
241 SparseTimeValueModel::Point point | 297 SparseTimeValueModel::Point point(frameNo, value, label); |
242 (frameNo, | |
243 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, | |
244 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1)); | |
245 | |
246 model2->addPoint(point); | 298 model2->addPoint(point); |
247 | 299 |
300 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { | |
301 | |
302 RegionModel::Point point(frameNo, value, duration, label); | |
303 model2a->addPoint(point); | |
304 | |
248 } else if (modelType == CSVFormat::ThreeDimensionalModel) { | 305 } else if (modelType == CSVFormat::ThreeDimensionalModel) { |
249 | 306 |
250 DenseThreeDimensionalModel::Column values; | 307 DenseThreeDimensionalModel::Column values; |
251 | 308 |
252 for (int i = 0; i < tidyList.size(); ++i) { | 309 for (int i = 0; i < list.size(); ++i) { |
310 | |
311 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) { | |
312 continue; | |
313 } | |
253 | 314 |
254 bool ok = false; | 315 bool ok = false; |
255 float value = list[i].toFloat(&ok); | 316 float value = list[i].toFloat(&ok); |
256 | 317 |
257 if (i > 0 || timingType != CSVFormat::ExplicitTiming) { | 318 values.push_back(value); |
258 values.push_back(value); | |
259 } | |
260 | 319 |
261 bool firstEver = (lineno == 0 && i == 0); | 320 if (firstEverValue || value < min) min = value; |
262 | 321 if (firstEverValue || value > max) max = value; |
263 if (firstEver || value < min) min = value; | 322 |
264 if (firstEver || value > max) max = value; | 323 if (firstEverValue) { |
265 | |
266 if (firstEver) { | |
267 startFrame = frameNo; | 324 startFrame = frameNo; |
268 model3->setStartFrame(startFrame); | 325 model3->setStartFrame(startFrame); |
269 } else if (lineno == 1 && | 326 } else if (lineno == 1 && |
270 timingType == CSVFormat::ExplicitTiming) { | 327 timingType == CSVFormat::ExplicitTiming) { |
271 model3->setResolution(frameNo - startFrame); | 328 model3->setResolution(frameNo - startFrame); |
272 } | 329 } |
330 | |
331 firstEverValue = false; | |
273 | 332 |
274 if (!ok) { | 333 if (!ok) { |
275 if (warnings < warnLimit) { | 334 if (warnings < warnLimit) { |
276 std::cerr << "WARNING: CSVFileReader::load: " | 335 std::cerr << "WARNING: CSVFileReader::load: " |
277 << "Non-numeric value \"" | 336 << "Non-numeric value \"" |
278 << list[i].toStdString() | 337 << list[i].toStdString() |
279 << "\" in data line " << lineno+1 | 338 << "\" in data line " << lineno+1 |
280 << ":" << std::endl; | 339 << ":" << std::endl; |
281 std::cerr << line.toStdString() << std::endl; | 340 std::cerr << line << std::endl; |
282 ++warnings; | 341 ++warnings; |
283 } else if (warnings == warnLimit) { | 342 } else if (warnings == warnLimit) { |
284 // std::cerr << "WARNING: Too many warnings" << std::endl; | 343 // std::cerr << "WARNING: Too many warnings" << std::endl; |
285 } | 344 } |
286 } | 345 } |
287 } | 346 } |
288 | 347 |
289 // std::cerr << "Setting bin values for count " << lineno << ", frame " | 348 // SVDEBUG << "Setting bin values for count " << lineno << ", frame " |
290 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; | 349 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl; |
291 | 350 |
292 model3->setColumn(lineno, values); | 351 model3->setColumn(lineno, values); |
293 } | 352 } |
294 | 353 |
295 ++lineno; | 354 ++lineno; |
298 frameNo += windowSize; | 357 frameNo += windowSize; |
299 } | 358 } |
300 } | 359 } |
301 } | 360 } |
302 | 361 |
362 if (!haveAnyValue) { | |
363 if (model2a) { | |
364 // assign values for regions based on label frequency; we | |
365 // have this in our labelCountMap, sort of | |
366 | |
367 std::map<int, std::map<QString, float> > countLabelValueMap; | |
368 for (std::map<QString, int>::iterator i = labelCountMap.begin(); | |
369 i != labelCountMap.end(); ++i) { | |
370 countLabelValueMap[i->second][i->first] = 0.f; | |
371 } | |
372 | |
373 float v = 0.f; | |
374 for (std::map<int, std::map<QString, float> >::iterator i = | |
375 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) { | |
376 --i; | |
377 for (std::map<QString, float>::iterator j = i->second.begin(); | |
378 j != i->second.end(); ++j) { | |
379 j->second = v; | |
380 v = v + 1.f; | |
381 } | |
382 } | |
383 | |
384 std::map<RegionModel::Point, RegionModel::Point, | |
385 RegionModel::Point::Comparator> pointMap; | |
386 for (RegionModel::PointList::const_iterator i = | |
387 model2a->getPoints().begin(); | |
388 i != model2a->getPoints().end(); ++i) { | |
389 RegionModel::Point p(*i); | |
390 v = countLabelValueMap[labelCountMap[p.label]][p.label]; | |
391 RegionModel::Point pp(p.frame, v, p.duration, p.label); | |
392 pointMap[p] = pp; | |
393 } | |
394 | |
395 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i = | |
396 pointMap.begin(); i != pointMap.end(); ++i) { | |
397 model2a->deletePoint(i->first); | |
398 model2a->addPoint(i->second); | |
399 } | |
400 } | |
401 } | |
402 | |
303 if (modelType == CSVFormat::ThreeDimensionalModel) { | 403 if (modelType == CSVFormat::ThreeDimensionalModel) { |
304 model3->setMinimumLevel(min); | 404 model3->setMinimumLevel(min); |
305 model3->setMaximumLevel(max); | 405 model3->setMaximumLevel(max); |
306 } | 406 } |
307 | 407 |