Mercurial > hg > svcore
comparison data/fileio/CSVFileReader.cpp @ 283:7336fe3a7caa
* Fix failure to properly load from text files with old-style Mac line endings
author | Chris Cannam |
---|---|
date | Thu, 09 Aug 2007 10:06:02 +0000 |
parents | 2fc6f3829f04 |
children | 14e0f60435b8 |
comparison
equal
deleted
inserted
replaced
282:e2fdcf9d35c5 | 283:7336fe3a7caa |
---|---|
125 | 125 |
126 size_t frameNo = 0; | 126 size_t frameNo = 0; |
127 | 127 |
128 while (!in.atEnd()) { | 128 while (!in.atEnd()) { |
129 | 129 |
130 QString line = in.readLine().trimmed(); | 130 // QTextStream's readLine doesn't cope with old-style Mac |
131 if (line.startsWith("#") || line.trimmed() == "") continue; | 131 // CR-only line endings. Why did they bother making the class |
132 | 132 // cope with more than one sort of line ending, if it still |
133 QStringList list = line.split(separator); | 133 // can't be configured to cope with all the common sorts? |
134 | 134 |
135 if (!model) { | 135 // For the time being we'll deal with this case (which is |
136 | 136 // relatively uncommon for us, but still necessary to handle) |
137 switch (modelType) { | 137 // by reading the entire file using a single readLine, and |
138 | 138 // splitting it. For CR and CR/LF line endings this will just |
139 case CSVFormatDialog::OneDimensionalModel: | 139 // read a line at a time, and that's obviously OK. |
140 model1 = new SparseOneDimensionalModel(sampleRate, windowSize); | 140 |
141 model = model1; | 141 QString chunk = in.readLine(); |
142 break; | 142 QStringList lines = chunk.split('\r', QString::SkipEmptyParts); |
143 | |
144 for (size_t li = 0; li < lines.size(); ++li) { | |
145 | |
146 QString line = lines[li]; | |
147 | |
148 if (line.startsWith("#")) continue; | |
149 | |
150 QStringList list = line.split(separator, QString::KeepEmptyParts); | |
151 | |
152 if (!model) { | |
153 | |
154 switch (modelType) { | |
155 | |
156 case CSVFormatDialog::OneDimensionalModel: | |
157 model1 = new SparseOneDimensionalModel(sampleRate, windowSize); | |
158 model = model1; | |
159 break; | |
143 | 160 |
144 case CSVFormatDialog::TwoDimensionalModel: | 161 case CSVFormatDialog::TwoDimensionalModel: |
145 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); | 162 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); |
146 model = model2; | 163 model = model2; |
147 break; | 164 break; |
148 | 165 |
149 case CSVFormatDialog::ThreeDimensionalModel: | 166 case CSVFormatDialog::ThreeDimensionalModel: |
150 model3 = new EditableDenseThreeDimensionalModel(sampleRate, | 167 model3 = new EditableDenseThreeDimensionalModel(sampleRate, |
151 windowSize, | 168 windowSize, |
152 list.size()); | 169 list.size()); |
153 model = model3; | 170 model = model3; |
154 break; | 171 break; |
155 } | 172 } |
156 } | 173 } |
157 | 174 |
158 QStringList tidyList; | 175 QStringList tidyList; |
159 QRegExp nonNumericRx("[^0-9.,+-]"); | 176 QRegExp nonNumericRx("[^0-9.,+-]"); |
160 | 177 |
161 for (int i = 0; i < list.size(); ++i) { | 178 for (int i = 0; i < list.size(); ++i) { |
162 | 179 |
163 QString s(list[i].trimmed()); | 180 QString s(list[i].trimmed()); |
164 | 181 |
165 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | 182 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { |
166 s = s.mid(1, s.length() - 2); | 183 s = s.mid(1, s.length() - 2); |
167 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { | 184 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { |
168 s = s.mid(1, s.length() - 2); | 185 s = s.mid(1, s.length() - 2); |
169 } | 186 } |
170 | 187 |
171 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) { | 188 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) { |
172 | 189 |
173 bool ok = false; | 190 bool ok = false; |
174 QString numeric = s; | 191 QString numeric = s; |
175 numeric.remove(nonNumericRx); | 192 numeric.remove(nonNumericRx); |
176 | 193 |
177 if (timeUnits == CSVFormatDialog::TimeSeconds) { | 194 if (timeUnits == CSVFormatDialog::TimeSeconds) { |
178 | 195 |
179 double time = numeric.toDouble(&ok); | 196 double time = numeric.toDouble(&ok); |
180 frameNo = int(time * sampleRate + 0.00001); | 197 frameNo = int(time * sampleRate + 0.00001); |
181 | 198 |
182 } else { | 199 } else { |
183 | 200 |
184 frameNo = numeric.toInt(&ok); | 201 frameNo = numeric.toInt(&ok); |
185 | 202 |
186 if (timeUnits == CSVFormatDialog::TimeWindows) { | 203 if (timeUnits == CSVFormatDialog::TimeWindows) { |
187 frameNo *= windowSize; | 204 frameNo *= windowSize; |
188 } | 205 } |
189 } | 206 } |
190 | 207 |
191 if (!ok) { | 208 if (!ok) { |
192 if (warnings < warnLimit) { | 209 if (warnings < warnLimit) { |
193 std::cerr << "WARNING: CSVFileReader::load: " | 210 std::cerr << "WARNING: CSVFileReader::load: " |
194 << "Bad time format (\"" << s.toStdString() | 211 << "Bad time format (\"" << s.toStdString() |
195 << "\") in data line " | 212 << "\") in data line " |
196 << lineno << ":" << std::endl; | 213 << lineno << ":" << std::endl; |
197 std::cerr << line.toStdString() << std::endl; | 214 std::cerr << line.toStdString() << std::endl; |
198 } else if (warnings == warnLimit) { | 215 } else if (warnings == warnLimit) { |
199 std::cerr << "WARNING: Too many warnings" << std::endl; | 216 std::cerr << "WARNING: Too many warnings" << std::endl; |
200 } | 217 } |
201 ++warnings; | 218 ++warnings; |
202 } | 219 } |
203 } else { | 220 } else { |
204 tidyList.push_back(s); | 221 tidyList.push_back(s); |
205 } | 222 } |
206 } | 223 } |
207 | 224 |
208 if (modelType == CSVFormatDialog::OneDimensionalModel) { | 225 if (modelType == CSVFormatDialog::OneDimensionalModel) { |
209 | 226 |
210 SparseOneDimensionalModel::Point point | 227 SparseOneDimensionalModel::Point point |
211 (frameNo, | 228 (frameNo, |
212 tidyList.size() > 0 ? tidyList[tidyList.size()-1] : | 229 tidyList.size() > 0 ? tidyList[tidyList.size()-1] : |
213 QString("%1").arg(lineno)); | 230 QString("%1").arg(lineno)); |
214 | 231 |
215 model1->addPoint(point); | 232 model1->addPoint(point); |
216 | 233 |
217 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) { | 234 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) { |
218 | 235 |
219 SparseTimeValueModel::Point point | 236 SparseTimeValueModel::Point point |
220 (frameNo, | 237 (frameNo, |
221 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, | 238 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, |
222 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno)); | 239 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno)); |
223 | 240 |
224 model2->addPoint(point); | 241 model2->addPoint(point); |
225 | 242 |
226 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) { | 243 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) { |
227 | 244 |
228 DenseThreeDimensionalModel::Column values; | 245 DenseThreeDimensionalModel::Column values; |
229 | 246 |
230 for (int i = 0; i < tidyList.size(); ++i) { | 247 for (int i = 0; i < tidyList.size(); ++i) { |
231 | 248 |
232 bool ok = false; | 249 bool ok = false; |
233 float value = list[i].toFloat(&ok); | 250 float value = list[i].toFloat(&ok); |
234 values.push_back(value); | 251 values.push_back(value); |
235 | 252 |
236 if ((lineno == 0 && i == 0) || value < min) min = value; | 253 if ((lineno == 0 && i == 0) || value < min) min = value; |
237 if ((lineno == 0 && i == 0) || value > max) max = value; | 254 if ((lineno == 0 && i == 0) || value > max) max = value; |
238 | 255 |
239 if (!ok) { | 256 if (!ok) { |
240 if (warnings < warnLimit) { | 257 if (warnings < warnLimit) { |
241 std::cerr << "WARNING: CSVFileReader::load: " | 258 std::cerr << "WARNING: CSVFileReader::load: " |
242 << "Non-numeric value in data line " << lineno | 259 << "Non-numeric value in data line " << lineno |
243 << ":" << std::endl; | 260 << ":" << std::endl; |
244 std::cerr << line.toStdString() << std::endl; | 261 std::cerr << line.toStdString() << std::endl; |
245 ++warnings; | 262 ++warnings; |
246 } else if (warnings == warnLimit) { | 263 } else if (warnings == warnLimit) { |
247 std::cerr << "WARNING: Too many warnings" << std::endl; | 264 std::cerr << "WARNING: Too many warnings" << std::endl; |
248 } | 265 } |
249 } | 266 } |
250 } | 267 } |
251 | 268 |
252 std::cerr << "Setting bin values for count " << lineno << ", frame " | 269 std::cerr << "Setting bin values for count " << lineno << ", frame " |
253 << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; | 270 << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; |
254 | 271 |
255 model3->setColumn(frameNo / model3->getResolution(), values); | 272 model3->setColumn(frameNo / model3->getResolution(), values); |
256 } | 273 } |
257 | 274 |
258 ++lineno; | 275 ++lineno; |
259 if (timingType == CSVFormatDialog::ImplicitTiming || | 276 if (timingType == CSVFormatDialog::ImplicitTiming || |
260 list.size() == 0) { | 277 list.size() == 0) { |
261 frameNo += windowSize; | 278 frameNo += windowSize; |
262 } | 279 } |
280 } | |
263 } | 281 } |
264 | 282 |
265 if (modelType == CSVFormatDialog::ThreeDimensionalModel) { | 283 if (modelType == CSVFormatDialog::ThreeDimensionalModel) { |
266 model3->setMinimumLevel(min); | 284 model3->setMinimumLevel(min); |
267 model3->setMaximumLevel(max); | 285 model3->setMaximumLevel(max); |
510 float prevPrimary = 0.0; | 528 float prevPrimary = 0.0; |
511 | 529 |
512 m_maxExampleCols = 0; | 530 m_maxExampleCols = 0; |
513 | 531 |
514 while (!in.atEnd()) { | 532 while (!in.atEnd()) { |
515 | 533 |
516 QString line = in.readLine().trimmed(); | 534 // See comment about line endings in load() above |
517 if (line.startsWith("#")) continue; | 535 |
518 | 536 QString chunk = in.readLine(); |
519 if (m_separator == "") { | 537 QStringList lines = chunk.split('\r', QString::SkipEmptyParts); |
520 //!!! to do: ask the user | 538 |
521 if (line.split(",").size() >= 2) m_separator = ","; | 539 for (size_t li = 0; li < lines.size(); ++li) { |
522 else if (line.split("\t").size() >= 2) m_separator = "\t"; | 540 |
523 else if (line.split("|").size() >= 2) m_separator = "|"; | 541 QString line = lines[li]; |
524 else if (line.split("/").size() >= 2) m_separator = "/"; | 542 |
525 else if (line.split(":").size() >= 2) m_separator = ":"; | 543 if (line.startsWith("#")) continue; |
526 else m_separator = " "; | 544 |
527 } | 545 if (m_separator == "") { |
528 | 546 //!!! to do: ask the user |
529 QStringList list = line.split(m_separator); | 547 if (line.split(",").size() >= 2) m_separator = ","; |
530 QStringList tidyList; | 548 else if (line.split("\t").size() >= 2) m_separator = "\t"; |
531 | 549 else if (line.split("|").size() >= 2) m_separator = "|"; |
532 for (int i = 0; i < list.size(); ++i) { | 550 else if (line.split("/").size() >= 2) m_separator = "/"; |
551 else if (line.split(":").size() >= 2) m_separator = ":"; | |
552 else m_separator = " "; | |
553 } | |
554 | |
555 QStringList list = line.split(m_separator); | |
556 QStringList tidyList; | |
557 | |
558 for (int i = 0; i < list.size(); ++i) { | |
533 | 559 |
534 QString s(list[i]); | 560 QString s(list[i]); |
535 bool numeric = false; | 561 bool numeric = false; |
536 | 562 |
537 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | 563 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { |
538 s = s.mid(1, s.length() - 2); | 564 s = s.mid(1, s.length() - 2); |
539 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { | 565 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { |
540 s = s.mid(1, s.length() - 2); | 566 s = s.mid(1, s.length() - 2); |
541 } else { | 567 } else { |
542 (void)s.toFloat(&numeric); | 568 (void)s.toFloat(&numeric); |
543 } | 569 } |
544 | 570 |
545 tidyList.push_back(s); | 571 tidyList.push_back(s); |
546 | 572 |
547 if (lineno == 0 || (list.size() < itemCount)) { | 573 if (lineno == 0 || (list.size() < itemCount)) { |
548 itemCount = list.size(); | 574 itemCount = list.size(); |
549 } else { | 575 } else { |
550 if (itemCount != list.size()) { | 576 if (itemCount != list.size()) { |
551 variableItemCount = true; | 577 variableItemCount = true; |
552 } | 578 } |
553 } | 579 } |
554 | 580 |
555 if (i == 0) { // primary | 581 if (i == 0) { // primary |
556 | 582 |
557 if (numeric) { | 583 if (numeric) { |
558 | 584 |
559 float primary = s.toFloat(); | 585 float primary = s.toFloat(); |
560 | 586 |
561 if (lineno > 0 && primary <= prevPrimary) { | 587 if (lineno > 0 && primary <= prevPrimary) { |
562 nonIncreasingPrimaries = true; | 588 nonIncreasingPrimaries = true; |
563 } | 589 } |
564 | 590 |
565 if (s.contains(".") || s.contains(",")) { | 591 if (s.contains(".") || s.contains(",")) { |
566 floatPrimaries = true; | 592 floatPrimaries = true; |
567 } | 593 } |
568 | 594 |
569 prevPrimary = primary; | 595 prevPrimary = primary; |
570 | 596 |
571 } else { | 597 } else { |
572 nonNumericPrimaries = true; | 598 nonNumericPrimaries = true; |
573 } | 599 } |
574 } else { // secondary | 600 } else { // secondary |
575 | 601 |
576 if (!numeric) { | 602 if (!numeric) { |
577 if (earliestNonNumericItem < 0 || | 603 if (earliestNonNumericItem < 0 || |
578 i < earliestNonNumericItem) { | 604 i < earliestNonNumericItem) { |
579 earliestNonNumericItem = i; | 605 earliestNonNumericItem = i; |
580 } | 606 } |
581 } | 607 } |
582 } | 608 } |
583 } | 609 } |
584 | 610 |
585 if (lineno < 10) { | 611 if (lineno < 10) { |
586 m_example.push_back(tidyList); | 612 m_example.push_back(tidyList); |
587 if (lineno == 0 || tidyList.size() > m_maxExampleCols) { | 613 if (lineno == 0 || tidyList.size() > m_maxExampleCols) { |
588 m_maxExampleCols = tidyList.size(); | 614 m_maxExampleCols = tidyList.size(); |
589 } | 615 } |
590 } | 616 } |
591 | 617 |
592 ++lineno; | 618 ++lineno; |
593 | 619 |
594 if (lineno == 50) break; | 620 if (lineno == 50) break; |
621 } | |
595 } | 622 } |
596 | 623 |
597 if (nonNumericPrimaries || nonIncreasingPrimaries) { | 624 if (nonNumericPrimaries || nonIncreasingPrimaries) { |
598 | 625 |
599 // Primaries are probably not a series of times | 626 // Primaries are probably not a series of times |