Mercurial > hg > svcore
diff data/fileio/CSVFileReader.cpp @ 283:7336fe3a7caa
* Fix failure to properly load from text files with old-style Mac line endings
author | Chris Cannam |
---|---|
date | Thu, 09 Aug 2007 10:06:02 +0000 |
parents | 2fc6f3829f04 |
children | 14e0f60435b8 |
line wrap: on
line diff
--- a/data/fileio/CSVFileReader.cpp Wed Aug 08 12:03:35 2007 +0000 +++ b/data/fileio/CSVFileReader.cpp Thu Aug 09 10:06:02 2007 +0000 @@ -127,139 +127,157 @@ while (!in.atEnd()) { - QString line = in.readLine().trimmed(); - if (line.startsWith("#") || line.trimmed() == "") continue; + // QTextStream's readLine doesn't cope with old-style Mac + // CR-only line endings. Why did they bother making the class + // cope with more than one sort of line ending, if it still + // can't be configured to cope with all the common sorts? - QStringList list = line.split(separator); + // For the time being we'll deal with this case (which is + // relatively uncommon for us, but still necessary to handle) + // by reading the entire file using a single readLine, and + // splitting it. For CR and CR/LF line endings this will just + // read a line at a time, and that's obviously OK. - if (!model) { + QString chunk = in.readLine(); + QStringList lines = chunk.split('\r', QString::SkipEmptyParts); + + for (size_t li = 0; li < lines.size(); ++li) { - switch (modelType) { + QString line = lines[li]; - case CSVFormatDialog::OneDimensionalModel: - model1 = new SparseOneDimensionalModel(sampleRate, windowSize); - model = model1; - break; + if (line.startsWith("#")) continue; + + QStringList list = line.split(separator, QString::KeepEmptyParts); + + if (!model) { + + switch (modelType) { + + case CSVFormatDialog::OneDimensionalModel: + model1 = new SparseOneDimensionalModel(sampleRate, windowSize); + model = model1; + break; - case CSVFormatDialog::TwoDimensionalModel: - model2 = new SparseTimeValueModel(sampleRate, windowSize, false); - model = model2; - break; + case CSVFormatDialog::TwoDimensionalModel: + model2 = new SparseTimeValueModel(sampleRate, windowSize, false); + model = model2; + break; - case CSVFormatDialog::ThreeDimensionalModel: - model3 = new EditableDenseThreeDimensionalModel(sampleRate, - windowSize, - list.size()); - model = model3; - break; - } - } + case CSVFormatDialog::ThreeDimensionalModel: + model3 = new EditableDenseThreeDimensionalModel(sampleRate, + windowSize, + list.size()); + model = model3; + break; + } + } - QStringList tidyList; - QRegExp nonNumericRx("[^0-9.,+-]"); + QStringList tidyList; + QRegExp nonNumericRx("[^0-9.,+-]"); - for (int i = 0; i < list.size(); ++i) { + for (int i = 0; i < list.size(); ++i) { - QString s(list[i].trimmed()); + QString s(list[i].trimmed()); - if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { - s = s.mid(1, s.length() - 2); - } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { - s = s.mid(1, s.length() - 2); - } + if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { + s = s.mid(1, s.length() - 2); + } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { + s = s.mid(1, s.length() - 2); + } - if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) { + if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) { - bool ok = false; - QString numeric = s; - numeric.remove(nonNumericRx); + bool ok = false; + QString numeric = s; + numeric.remove(nonNumericRx); - if (timeUnits == CSVFormatDialog::TimeSeconds) { + if (timeUnits == CSVFormatDialog::TimeSeconds) { - double time = numeric.toDouble(&ok); - frameNo = int(time * sampleRate + 0.00001); + double time = numeric.toDouble(&ok); + frameNo = int(time * sampleRate + 0.00001); - } else { + } else { - frameNo = numeric.toInt(&ok); + frameNo = numeric.toInt(&ok); - if (timeUnits == CSVFormatDialog::TimeWindows) { - frameNo *= windowSize; - } - } + if (timeUnits == CSVFormatDialog::TimeWindows) { + frameNo *= windowSize; + } + } - if (!ok) { - if (warnings < warnLimit) { - std::cerr << "WARNING: CSVFileReader::load: " - << "Bad time format (\"" << s.toStdString() - << "\") in data line " - << lineno << ":" << std::endl; - std::cerr << line.toStdString() << std::endl; - } else if (warnings == warnLimit) { - std::cerr << "WARNING: Too many warnings" << std::endl; - } - ++warnings; - } - } else { - tidyList.push_back(s); - } - } + if (!ok) { + if (warnings < warnLimit) { + std::cerr << "WARNING: CSVFileReader::load: " + << "Bad time format (\"" << s.toStdString() + << "\") in data line " + << lineno << ":" << std::endl; + std::cerr << line.toStdString() << std::endl; + } else if (warnings == warnLimit) { + std::cerr << "WARNING: Too many warnings" << std::endl; + } + ++warnings; + } + } else { + tidyList.push_back(s); + } + } - if (modelType == CSVFormatDialog::OneDimensionalModel) { + if (modelType == CSVFormatDialog::OneDimensionalModel) { - SparseOneDimensionalModel::Point point - (frameNo, - tidyList.size() > 0 ? tidyList[tidyList.size()-1] : - QString("%1").arg(lineno)); + SparseOneDimensionalModel::Point point + (frameNo, + tidyList.size() > 0 ? tidyList[tidyList.size()-1] : + QString("%1").arg(lineno)); - model1->addPoint(point); + model1->addPoint(point); - } else if (modelType == CSVFormatDialog::TwoDimensionalModel) { + } else if (modelType == CSVFormatDialog::TwoDimensionalModel) { - SparseTimeValueModel::Point point - (frameNo, - tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, - tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno)); + SparseTimeValueModel::Point point + (frameNo, + tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, + tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno)); - model2->addPoint(point); + model2->addPoint(point); - } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) { + } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) { - DenseThreeDimensionalModel::Column values; + DenseThreeDimensionalModel::Column values; - for (int i = 0; i < tidyList.size(); ++i) { + for (int i = 0; i < tidyList.size(); ++i) { - bool ok = false; - float value = list[i].toFloat(&ok); - values.push_back(value); + bool ok = false; + float value = list[i].toFloat(&ok); + values.push_back(value); - if ((lineno == 0 && i == 0) || value < min) min = value; - if ((lineno == 0 && i == 0) || value > max) max = value; + if ((lineno == 0 && i == 0) || value < min) min = value; + if ((lineno == 0 && i == 0) || value > max) max = value; - if (!ok) { - if (warnings < warnLimit) { - std::cerr << "WARNING: CSVFileReader::load: " - << "Non-numeric value in data line " << lineno - << ":" << std::endl; - std::cerr << line.toStdString() << std::endl; - ++warnings; - } else if (warnings == warnLimit) { - std::cerr << "WARNING: Too many warnings" << std::endl; - } - } - } + if (!ok) { + if (warnings < warnLimit) { + std::cerr << "WARNING: CSVFileReader::load: " + << "Non-numeric value in data line " << lineno + << ":" << std::endl; + std::cerr << line.toStdString() << std::endl; + ++warnings; + } else if (warnings == warnLimit) { + std::cerr << "WARNING: Too many warnings" << std::endl; + } + } + } - std::cerr << "Setting bin values for count " << lineno << ", frame " - << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; + std::cerr << "Setting bin values for count " << lineno << ", frame " + << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; - model3->setColumn(frameNo / model3->getResolution(), values); - } + model3->setColumn(frameNo / model3->getResolution(), values); + } - ++lineno; - if (timingType == CSVFormatDialog::ImplicitTiming || - list.size() == 0) { - frameNo += windowSize; - } + ++lineno; + if (timingType == CSVFormatDialog::ImplicitTiming || + list.size() == 0) { + frameNo += windowSize; + } + } } if (modelType == CSVFormatDialog::ThreeDimensionalModel) { @@ -512,86 +530,95 @@ m_maxExampleCols = 0; while (!in.atEnd()) { - - QString line = in.readLine().trimmed(); - if (line.startsWith("#")) continue; - if (m_separator == "") { - //!!! to do: ask the user - if (line.split(",").size() >= 2) m_separator = ","; - else if (line.split("\t").size() >= 2) m_separator = "\t"; - else if (line.split("|").size() >= 2) m_separator = "|"; - else if (line.split("/").size() >= 2) m_separator = "/"; - else if (line.split(":").size() >= 2) m_separator = ":"; - else m_separator = " "; - } + // See comment about line endings in load() above - QStringList list = line.split(m_separator); - QStringList tidyList; + QString chunk = in.readLine(); + QStringList lines = chunk.split('\r', QString::SkipEmptyParts); - for (int i = 0; i < list.size(); ++i) { + for (size_t li = 0; li < lines.size(); ++li) { + + QString line = lines[li]; + + if (line.startsWith("#")) continue; + + if (m_separator == "") { + //!!! to do: ask the user + if (line.split(",").size() >= 2) m_separator = ","; + else if (line.split("\t").size() >= 2) m_separator = "\t"; + else if (line.split("|").size() >= 2) m_separator = "|"; + else if (line.split("/").size() >= 2) m_separator = "/"; + else if (line.split(":").size() >= 2) m_separator = ":"; + else m_separator = " "; + } + + QStringList list = line.split(m_separator); + QStringList tidyList; + + for (int i = 0; i < list.size(); ++i) { - QString s(list[i]); - bool numeric = false; + QString s(list[i]); + bool numeric = false; - if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { - s = s.mid(1, s.length() - 2); - } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { - s = s.mid(1, s.length() - 2); - } else { - (void)s.toFloat(&numeric); - } + if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { + s = s.mid(1, s.length() - 2); + } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { + s = s.mid(1, s.length() - 2); + } else { + (void)s.toFloat(&numeric); + } - tidyList.push_back(s); + tidyList.push_back(s); - if (lineno == 0 || (list.size() < itemCount)) { - itemCount = list.size(); - } else { - if (itemCount != list.size()) { - variableItemCount = true; - } - } + if (lineno == 0 || (list.size() < itemCount)) { + itemCount = list.size(); + } else { + if (itemCount != list.size()) { + variableItemCount = true; + } + } - if (i == 0) { // primary + if (i == 0) { // primary - if (numeric) { + if (numeric) { - float primary = s.toFloat(); + float primary = s.toFloat(); - if (lineno > 0 && primary <= prevPrimary) { - nonIncreasingPrimaries = true; - } + if (lineno > 0 && primary <= prevPrimary) { + nonIncreasingPrimaries = true; + } - if (s.contains(".") || s.contains(",")) { - floatPrimaries = true; - } + if (s.contains(".") || s.contains(",")) { + floatPrimaries = true; + } - prevPrimary = primary; + prevPrimary = primary; - } else { - nonNumericPrimaries = true; - } - } else { // secondary + } else { + nonNumericPrimaries = true; + } + } else { // secondary - if (!numeric) { - if (earliestNonNumericItem < 0 || - i < earliestNonNumericItem) { - earliestNonNumericItem = i; - } - } - } - } + if (!numeric) { + if (earliestNonNumericItem < 0 || + i < earliestNonNumericItem) { + earliestNonNumericItem = i; + } + } + } + } - if (lineno < 10) { - m_example.push_back(tidyList); - if (lineno == 0 || tidyList.size() > m_maxExampleCols) { - m_maxExampleCols = tidyList.size(); - } - } + if (lineno < 10) { + m_example.push_back(tidyList); + if (lineno == 0 || tidyList.size() > m_maxExampleCols) { + m_maxExampleCols = tidyList.size(); + } + } - ++lineno; + ++lineno; - if (lineno == 50) break; + if (lineno == 50) break; + } } if (nonNumericPrimaries || nonIncreasingPrimaries) {