comparison data/fileio/CSVFileReader.cpp @ 283:7336fe3a7caa

* Fix failure to properly load from text files with old-style Mac line endings
author Chris Cannam
date Thu, 09 Aug 2007 10:06:02 +0000
parents 2fc6f3829f04
children 14e0f60435b8
comparison
equal deleted inserted replaced
282:e2fdcf9d35c5 283:7336fe3a7caa
125 125
126 size_t frameNo = 0; 126 size_t frameNo = 0;
127 127
128 while (!in.atEnd()) { 128 while (!in.atEnd()) {
129 129
130 QString line = in.readLine().trimmed(); 130 // QTextStream's readLine doesn't cope with old-style Mac
131 if (line.startsWith("#") || line.trimmed() == "") continue; 131 // CR-only line endings. Why did they bother making the class
132 132 // cope with more than one sort of line ending, if it still
133 QStringList list = line.split(separator); 133 // can't be configured to cope with all the common sorts?
134 134
135 if (!model) { 135 // For the time being we'll deal with this case (which is
136 136 // relatively uncommon for us, but still necessary to handle)
137 switch (modelType) { 137 // by reading the entire file using a single readLine, and
138 138 // splitting it. For CR and CR/LF line endings this will just
139 case CSVFormatDialog::OneDimensionalModel: 139 // read a line at a time, and that's obviously OK.
140 model1 = new SparseOneDimensionalModel(sampleRate, windowSize); 140
141 model = model1; 141 QString chunk = in.readLine();
142 break; 142 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
143
144 for (size_t li = 0; li < lines.size(); ++li) {
145
146 QString line = lines[li];
147
148 if (line.startsWith("#")) continue;
149
150 QStringList list = line.split(separator, QString::KeepEmptyParts);
151
152 if (!model) {
153
154 switch (modelType) {
155
156 case CSVFormatDialog::OneDimensionalModel:
157 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
158 model = model1;
159 break;
143 160
144 case CSVFormatDialog::TwoDimensionalModel: 161 case CSVFormatDialog::TwoDimensionalModel:
145 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); 162 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
146 model = model2; 163 model = model2;
147 break; 164 break;
148 165
149 case CSVFormatDialog::ThreeDimensionalModel: 166 case CSVFormatDialog::ThreeDimensionalModel:
150 model3 = new EditableDenseThreeDimensionalModel(sampleRate, 167 model3 = new EditableDenseThreeDimensionalModel(sampleRate,
151 windowSize, 168 windowSize,
152 list.size()); 169 list.size());
153 model = model3; 170 model = model3;
154 break; 171 break;
155 } 172 }
156 } 173 }
157 174
158 QStringList tidyList; 175 QStringList tidyList;
159 QRegExp nonNumericRx("[^0-9.,+-]"); 176 QRegExp nonNumericRx("[^0-9.,+-]");
160 177
161 for (int i = 0; i < list.size(); ++i) { 178 for (int i = 0; i < list.size(); ++i) {
162 179
163 QString s(list[i].trimmed()); 180 QString s(list[i].trimmed());
164 181
165 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { 182 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
166 s = s.mid(1, s.length() - 2); 183 s = s.mid(1, s.length() - 2);
167 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { 184 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
168 s = s.mid(1, s.length() - 2); 185 s = s.mid(1, s.length() - 2);
169 } 186 }
170 187
171 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) { 188 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) {
172 189
173 bool ok = false; 190 bool ok = false;
174 QString numeric = s; 191 QString numeric = s;
175 numeric.remove(nonNumericRx); 192 numeric.remove(nonNumericRx);
176 193
177 if (timeUnits == CSVFormatDialog::TimeSeconds) { 194 if (timeUnits == CSVFormatDialog::TimeSeconds) {
178 195
179 double time = numeric.toDouble(&ok); 196 double time = numeric.toDouble(&ok);
180 frameNo = int(time * sampleRate + 0.00001); 197 frameNo = int(time * sampleRate + 0.00001);
181 198
182 } else { 199 } else {
183 200
184 frameNo = numeric.toInt(&ok); 201 frameNo = numeric.toInt(&ok);
185 202
186 if (timeUnits == CSVFormatDialog::TimeWindows) { 203 if (timeUnits == CSVFormatDialog::TimeWindows) {
187 frameNo *= windowSize; 204 frameNo *= windowSize;
188 } 205 }
189 } 206 }
190 207
191 if (!ok) { 208 if (!ok) {
192 if (warnings < warnLimit) { 209 if (warnings < warnLimit) {
193 std::cerr << "WARNING: CSVFileReader::load: " 210 std::cerr << "WARNING: CSVFileReader::load: "
194 << "Bad time format (\"" << s.toStdString() 211 << "Bad time format (\"" << s.toStdString()
195 << "\") in data line " 212 << "\") in data line "
196 << lineno << ":" << std::endl; 213 << lineno << ":" << std::endl;
197 std::cerr << line.toStdString() << std::endl; 214 std::cerr << line.toStdString() << std::endl;
198 } else if (warnings == warnLimit) { 215 } else if (warnings == warnLimit) {
199 std::cerr << "WARNING: Too many warnings" << std::endl; 216 std::cerr << "WARNING: Too many warnings" << std::endl;
200 } 217 }
201 ++warnings; 218 ++warnings;
202 } 219 }
203 } else { 220 } else {
204 tidyList.push_back(s); 221 tidyList.push_back(s);
205 } 222 }
206 } 223 }
207 224
208 if (modelType == CSVFormatDialog::OneDimensionalModel) { 225 if (modelType == CSVFormatDialog::OneDimensionalModel) {
209 226
210 SparseOneDimensionalModel::Point point 227 SparseOneDimensionalModel::Point point
211 (frameNo, 228 (frameNo,
212 tidyList.size() > 0 ? tidyList[tidyList.size()-1] : 229 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
213 QString("%1").arg(lineno)); 230 QString("%1").arg(lineno));
214 231
215 model1->addPoint(point); 232 model1->addPoint(point);
216 233
217 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) { 234 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) {
218 235
219 SparseTimeValueModel::Point point 236 SparseTimeValueModel::Point point
220 (frameNo, 237 (frameNo,
221 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, 238 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
222 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno)); 239 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno));
223 240
224 model2->addPoint(point); 241 model2->addPoint(point);
225 242
226 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) { 243 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
227 244
228 DenseThreeDimensionalModel::Column values; 245 DenseThreeDimensionalModel::Column values;
229 246
230 for (int i = 0; i < tidyList.size(); ++i) { 247 for (int i = 0; i < tidyList.size(); ++i) {
231 248
232 bool ok = false; 249 bool ok = false;
233 float value = list[i].toFloat(&ok); 250 float value = list[i].toFloat(&ok);
234 values.push_back(value); 251 values.push_back(value);
235 252
236 if ((lineno == 0 && i == 0) || value < min) min = value; 253 if ((lineno == 0 && i == 0) || value < min) min = value;
237 if ((lineno == 0 && i == 0) || value > max) max = value; 254 if ((lineno == 0 && i == 0) || value > max) max = value;
238 255
239 if (!ok) { 256 if (!ok) {
240 if (warnings < warnLimit) { 257 if (warnings < warnLimit) {
241 std::cerr << "WARNING: CSVFileReader::load: " 258 std::cerr << "WARNING: CSVFileReader::load: "
242 << "Non-numeric value in data line " << lineno 259 << "Non-numeric value in data line " << lineno
243 << ":" << std::endl; 260 << ":" << std::endl;
244 std::cerr << line.toStdString() << std::endl; 261 std::cerr << line.toStdString() << std::endl;
245 ++warnings; 262 ++warnings;
246 } else if (warnings == warnLimit) { 263 } else if (warnings == warnLimit) {
247 std::cerr << "WARNING: Too many warnings" << std::endl; 264 std::cerr << "WARNING: Too many warnings" << std::endl;
248 } 265 }
249 } 266 }
250 } 267 }
251 268
252 std::cerr << "Setting bin values for count " << lineno << ", frame " 269 std::cerr << "Setting bin values for count " << lineno << ", frame "
253 << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; 270 << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
254 271
255 model3->setColumn(frameNo / model3->getResolution(), values); 272 model3->setColumn(frameNo / model3->getResolution(), values);
256 } 273 }
257 274
258 ++lineno; 275 ++lineno;
259 if (timingType == CSVFormatDialog::ImplicitTiming || 276 if (timingType == CSVFormatDialog::ImplicitTiming ||
260 list.size() == 0) { 277 list.size() == 0) {
261 frameNo += windowSize; 278 frameNo += windowSize;
262 } 279 }
280 }
263 } 281 }
264 282
265 if (modelType == CSVFormatDialog::ThreeDimensionalModel) { 283 if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
266 model3->setMinimumLevel(min); 284 model3->setMinimumLevel(min);
267 model3->setMaximumLevel(max); 285 model3->setMaximumLevel(max);
510 float prevPrimary = 0.0; 528 float prevPrimary = 0.0;
511 529
512 m_maxExampleCols = 0; 530 m_maxExampleCols = 0;
513 531
514 while (!in.atEnd()) { 532 while (!in.atEnd()) {
515 533
516 QString line = in.readLine().trimmed(); 534 // See comment about line endings in load() above
517 if (line.startsWith("#")) continue; 535
518 536 QString chunk = in.readLine();
519 if (m_separator == "") { 537 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
520 //!!! to do: ask the user 538
521 if (line.split(",").size() >= 2) m_separator = ","; 539 for (size_t li = 0; li < lines.size(); ++li) {
522 else if (line.split("\t").size() >= 2) m_separator = "\t"; 540
523 else if (line.split("|").size() >= 2) m_separator = "|"; 541 QString line = lines[li];
524 else if (line.split("/").size() >= 2) m_separator = "/"; 542
525 else if (line.split(":").size() >= 2) m_separator = ":"; 543 if (line.startsWith("#")) continue;
526 else m_separator = " "; 544
527 } 545 if (m_separator == "") {
528 546 //!!! to do: ask the user
529 QStringList list = line.split(m_separator); 547 if (line.split(",").size() >= 2) m_separator = ",";
530 QStringList tidyList; 548 else if (line.split("\t").size() >= 2) m_separator = "\t";
531 549 else if (line.split("|").size() >= 2) m_separator = "|";
532 for (int i = 0; i < list.size(); ++i) { 550 else if (line.split("/").size() >= 2) m_separator = "/";
551 else if (line.split(":").size() >= 2) m_separator = ":";
552 else m_separator = " ";
553 }
554
555 QStringList list = line.split(m_separator);
556 QStringList tidyList;
557
558 for (int i = 0; i < list.size(); ++i) {
533 559
534 QString s(list[i]); 560 QString s(list[i]);
535 bool numeric = false; 561 bool numeric = false;
536 562
537 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { 563 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
538 s = s.mid(1, s.length() - 2); 564 s = s.mid(1, s.length() - 2);
539 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { 565 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
540 s = s.mid(1, s.length() - 2); 566 s = s.mid(1, s.length() - 2);
541 } else { 567 } else {
542 (void)s.toFloat(&numeric); 568 (void)s.toFloat(&numeric);
543 } 569 }
544 570
545 tidyList.push_back(s); 571 tidyList.push_back(s);
546 572
547 if (lineno == 0 || (list.size() < itemCount)) { 573 if (lineno == 0 || (list.size() < itemCount)) {
548 itemCount = list.size(); 574 itemCount = list.size();
549 } else { 575 } else {
550 if (itemCount != list.size()) { 576 if (itemCount != list.size()) {
551 variableItemCount = true; 577 variableItemCount = true;
552 } 578 }
553 } 579 }
554 580
555 if (i == 0) { // primary 581 if (i == 0) { // primary
556 582
557 if (numeric) { 583 if (numeric) {
558 584
559 float primary = s.toFloat(); 585 float primary = s.toFloat();
560 586
561 if (lineno > 0 && primary <= prevPrimary) { 587 if (lineno > 0 && primary <= prevPrimary) {
562 nonIncreasingPrimaries = true; 588 nonIncreasingPrimaries = true;
563 } 589 }
564 590
565 if (s.contains(".") || s.contains(",")) { 591 if (s.contains(".") || s.contains(",")) {
566 floatPrimaries = true; 592 floatPrimaries = true;
567 } 593 }
568 594
569 prevPrimary = primary; 595 prevPrimary = primary;
570 596
571 } else { 597 } else {
572 nonNumericPrimaries = true; 598 nonNumericPrimaries = true;
573 } 599 }
574 } else { // secondary 600 } else { // secondary
575 601
576 if (!numeric) { 602 if (!numeric) {
577 if (earliestNonNumericItem < 0 || 603 if (earliestNonNumericItem < 0 ||
578 i < earliestNonNumericItem) { 604 i < earliestNonNumericItem) {
579 earliestNonNumericItem = i; 605 earliestNonNumericItem = i;
580 } 606 }
581 } 607 }
582 } 608 }
583 } 609 }
584 610
585 if (lineno < 10) { 611 if (lineno < 10) {
586 m_example.push_back(tidyList); 612 m_example.push_back(tidyList);
587 if (lineno == 0 || tidyList.size() > m_maxExampleCols) { 613 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
588 m_maxExampleCols = tidyList.size(); 614 m_maxExampleCols = tidyList.size();
589 } 615 }
590 } 616 }
591 617
592 ++lineno; 618 ++lineno;
593 619
594 if (lineno == 50) break; 620 if (lineno == 50) break;
621 }
595 } 622 }
596 623
597 if (nonNumericPrimaries || nonIncreasingPrimaries) { 624 if (nonNumericPrimaries || nonIncreasingPrimaries) {
598 625
599 // Primaries are probably not a series of times 626 // Primaries are probably not a series of times