annotate data/fileio/CSVFileReader.cpp @ 624:076dcd2ce209

* Truncate long output lines
author Chris Cannam
date Fri, 21 May 2010 10:20:52 +0000
parents dd97f7b3d120
children 001db550bd48
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@148 20 #include "model/SparseOneDimensionalModel.h"
Chris@148 21 #include "model/SparseTimeValueModel.h"
Chris@152 22 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@308 23 #include "DataFileReaderFactory.h"
Chris@148 24
Chris@148 25 #include <QFile>
Chris@148 26 #include <QString>
Chris@148 27 #include <QRegExp>
Chris@148 28 #include <QStringList>
Chris@148 29 #include <QTextStream>
Chris@148 30
Chris@148 31 #include <iostream>
Chris@148 32
Chris@392 33 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392 34 size_t mainModelSampleRate) :
Chris@392 35 m_format(format),
Chris@148 36 m_file(0),
Chris@148 37 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 38 {
Chris@148 39 m_file = new QFile(path);
Chris@148 40 bool good = false;
Chris@148 41
Chris@148 42 if (!m_file->exists()) {
Chris@148 43 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 44 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 45 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 46 } else {
Chris@148 47 good = true;
Chris@148 48 }
Chris@148 49
Chris@148 50 if (!good) {
Chris@148 51 delete m_file;
Chris@148 52 m_file = 0;
Chris@148 53 }
Chris@148 54 }
Chris@148 55
Chris@148 56 CSVFileReader::~CSVFileReader()
Chris@148 57 {
Chris@148 58 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
Chris@148 59
Chris@148 60 if (m_file) {
Chris@148 61 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
Chris@148 62 m_file->close();
Chris@148 63 }
Chris@148 64 delete m_file;
Chris@148 65 }
Chris@148 66
Chris@148 67 bool
Chris@148 68 CSVFileReader::isOK() const
Chris@148 69 {
Chris@148 70 return (m_file != 0);
Chris@148 71 }
Chris@148 72
Chris@148 73 QString
Chris@148 74 CSVFileReader::getError() const
Chris@148 75 {
Chris@148 76 return m_error;
Chris@148 77 }
Chris@148 78
Chris@148 79 Model *
Chris@148 80 CSVFileReader::load() const
Chris@148 81 {
Chris@148 82 if (!m_file) return 0;
Chris@392 83 /*!!!
Chris@148 84 CSVFormatDialog *dialog = new CSVFormatDialog
Chris@148 85 (0, m_file, m_mainModelSampleRate);
Chris@148 86
Chris@148 87 if (dialog->exec() == QDialog::Rejected) {
Chris@148 88 delete dialog;
Chris@308 89 throw DataFileReaderFactory::ImportCancelled;
Chris@148 90 }
Chris@392 91 */
Chris@148 92
Chris@392 93 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 94 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@392 95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392 96 QString separator = m_format.getSeparator();
Chris@392 97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
Chris@392 98 size_t sampleRate = m_format.getSampleRate();
Chris@392 99 size_t windowSize = m_format.getWindowSize();
Chris@148 100
Chris@392 101 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 102 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 103 // This will be overridden later if more than one line
Chris@611 104 // appears in our file, but we want to choose a default
Chris@611 105 // that's likely to be visible
Chris@611 106 windowSize = 1024;
Chris@611 107 } else {
Chris@611 108 windowSize = 1;
Chris@611 109 }
Chris@392 110 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 111 sampleRate = m_mainModelSampleRate;
Chris@148 112 }
Chris@148 113 }
Chris@148 114
Chris@148 115 SparseOneDimensionalModel *model1 = 0;
Chris@148 116 SparseTimeValueModel *model2 = 0;
Chris@152 117 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 118 Model *model = 0;
Chris@148 119
Chris@148 120 QTextStream in(m_file);
Chris@148 121 in.seek(0);
Chris@148 122
Chris@148 123 unsigned int warnings = 0, warnLimit = 10;
Chris@148 124 unsigned int lineno = 0;
Chris@148 125
Chris@148 126 float min = 0.0, max = 0.0;
Chris@148 127
Chris@148 128 size_t frameNo = 0;
Chris@611 129 size_t startFrame = 0; // for calculation of dense model resolution
Chris@148 130
Chris@148 131 while (!in.atEnd()) {
Chris@148 132
Chris@283 133 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 134 // CR-only line endings. Why did they bother making the class
Chris@283 135 // cope with more than one sort of line ending, if it still
Chris@283 136 // can't be configured to cope with all the common sorts?
Chris@148 137
Chris@283 138 // For the time being we'll deal with this case (which is
Chris@283 139 // relatively uncommon for us, but still necessary to handle)
Chris@283 140 // by reading the entire file using a single readLine, and
Chris@283 141 // splitting it. For CR and CR/LF line endings this will just
Chris@283 142 // read a line at a time, and that's obviously OK.
Chris@148 143
Chris@283 144 QString chunk = in.readLine();
Chris@283 145 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 146
Chris@283 147 for (size_t li = 0; li < lines.size(); ++li) {
Chris@148 148
Chris@283 149 QString line = lines[li];
Chris@148 150
Chris@283 151 if (line.startsWith("#")) continue;
Chris@283 152
Chris@390 153 QStringList list = line.split(separator, behaviour);
Chris@283 154
Chris@283 155 if (!model) {
Chris@283 156
Chris@283 157 switch (modelType) {
Chris@283 158
Chris@392 159 case CSVFormat::OneDimensionalModel:
Chris@283 160 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 161 model = model1;
Chris@283 162 break;
Chris@148 163
Chris@392 164 case CSVFormat::TwoDimensionalModel:
Chris@283 165 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 166 model = model2;
Chris@283 167 break;
Chris@148 168
Chris@392 169 case CSVFormat::ThreeDimensionalModel:
Chris@535 170 model3 = new EditableDenseThreeDimensionalModel
Chris@535 171 (sampleRate,
Chris@535 172 windowSize,
Chris@535 173 list.size(),
Chris@535 174 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 175 model = model3;
Chris@283 176 break;
Chris@283 177 }
Chris@283 178 }
Chris@148 179
Chris@283 180 QStringList tidyList;
Chris@390 181 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@148 182
Chris@283 183 for (int i = 0; i < list.size(); ++i) {
Chris@148 184
Chris@283 185 QString s(list[i].trimmed());
Chris@148 186
Chris@283 187 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
Chris@283 188 s = s.mid(1, s.length() - 2);
Chris@283 189 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
Chris@283 190 s = s.mid(1, s.length() - 2);
Chris@283 191 }
Chris@148 192
Chris@392 193 if (i == 0 && timingType == CSVFormat::ExplicitTiming) {
Chris@148 194
Chris@283 195 bool ok = false;
Chris@283 196 QString numeric = s;
Chris@283 197 numeric.remove(nonNumericRx);
Chris@148 198
Chris@392 199 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 200
Chris@283 201 double time = numeric.toDouble(&ok);
Chris@491 202 frameNo = int(time * sampleRate + 0.5);
Chris@148 203
Chris@283 204 } else {
Chris@148 205
Chris@283 206 frameNo = numeric.toInt(&ok);
Chris@148 207
Chris@392 208 if (timeUnits == CSVFormat::TimeWindows) {
Chris@283 209 frameNo *= windowSize;
Chris@283 210 }
Chris@283 211 }
Chris@148 212
Chris@283 213 if (!ok) {
Chris@283 214 if (warnings < warnLimit) {
Chris@283 215 std::cerr << "WARNING: CSVFileReader::load: "
Chris@283 216 << "Bad time format (\"" << s.toStdString()
Chris@283 217 << "\") in data line "
Chris@491 218 << lineno+1 << ":" << std::endl;
Chris@283 219 std::cerr << line.toStdString() << std::endl;
Chris@283 220 } else if (warnings == warnLimit) {
Chris@283 221 std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 222 }
Chris@283 223 ++warnings;
Chris@283 224 }
Chris@283 225 } else {
Chris@283 226 tidyList.push_back(s);
Chris@283 227 }
Chris@283 228 }
Chris@148 229
Chris@392 230 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 231
Chris@283 232 SparseOneDimensionalModel::Point point
Chris@283 233 (frameNo,
Chris@283 234 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
Chris@491 235 QString("%1").arg(lineno+1));
Chris@148 236
Chris@283 237 model1->addPoint(point);
Chris@148 238
Chris@392 239 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 240
Chris@283 241 SparseTimeValueModel::Point point
Chris@283 242 (frameNo,
Chris@283 243 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
Chris@491 244 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1));
Chris@148 245
Chris@283 246 model2->addPoint(point);
Chris@148 247
Chris@392 248 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 249
Chris@283 250 DenseThreeDimensionalModel::Column values;
Chris@148 251
Chris@283 252 for (int i = 0; i < tidyList.size(); ++i) {
Chris@148 253
Chris@283 254 bool ok = false;
Chris@283 255 float value = list[i].toFloat(&ok);
Chris@611 256
Chris@611 257 if (i > 0 || timingType != CSVFormat::ExplicitTiming) {
Chris@611 258 values.push_back(value);
Chris@611 259 }
Chris@148 260
Chris@611 261 bool firstEver = (lineno == 0 && i == 0);
Chris@611 262
Chris@611 263 if (firstEver || value < min) min = value;
Chris@611 264 if (firstEver || value > max) max = value;
Chris@611 265
Chris@611 266 if (firstEver) {
Chris@611 267 startFrame = frameNo;
Chris@611 268 model3->setStartFrame(startFrame);
Chris@611 269 } else if (lineno == 1 &&
Chris@611 270 timingType == CSVFormat::ExplicitTiming) {
Chris@611 271 model3->setResolution(frameNo - startFrame);
Chris@611 272 }
Chris@148 273
Chris@283 274 if (!ok) {
Chris@283 275 if (warnings < warnLimit) {
Chris@283 276 std::cerr << "WARNING: CSVFileReader::load: "
Chris@390 277 << "Non-numeric value \""
Chris@390 278 << list[i].toStdString()
Chris@491 279 << "\" in data line " << lineno+1
Chris@283 280 << ":" << std::endl;
Chris@283 281 std::cerr << line.toStdString() << std::endl;
Chris@283 282 ++warnings;
Chris@283 283 } else if (warnings == warnLimit) {
Chris@390 284 // std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 285 }
Chris@283 286 }
Chris@283 287 }
Chris@148 288
Chris@390 289 // std::cerr << "Setting bin values for count " << lineno << ", frame "
Chris@390 290 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
Chris@148 291
Chris@611 292 model3->setColumn(lineno, values);
Chris@283 293 }
Chris@148 294
Chris@283 295 ++lineno;
Chris@392 296 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 297 list.size() == 0) {
Chris@283 298 frameNo += windowSize;
Chris@283 299 }
Chris@283 300 }
Chris@148 301 }
Chris@148 302
Chris@392 303 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 304 model3->setMinimumLevel(min);
Chris@148 305 model3->setMaximumLevel(max);
Chris@148 306 }
Chris@148 307
Chris@148 308 return model;
Chris@148 309 }
Chris@148 310