annotate data/fileio/CSVFileReader.cpp @ 826:f9f178efd6b8 tonioni

Line endings
author Chris Cannam
date Wed, 17 Jul 2013 15:45:29 +0100
parents 1424aa29ae95
children e802e550a1f2
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@308 25 #include "DataFileReaderFactory.h"
Chris@148 26
Chris@148 27 #include <QFile>
Chris@148 28 #include <QString>
Chris@148 29 #include <QRegExp>
Chris@148 30 #include <QStringList>
Chris@148 31 #include <QTextStream>
Chris@148 32
Chris@148 33 #include <iostream>
Chris@628 34 #include <map>
Chris@148 35
Chris@392 36 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392 37 size_t mainModelSampleRate) :
Chris@392 38 m_format(format),
Chris@148 39 m_file(0),
Chris@631 40 m_warnings(0),
Chris@148 41 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 42 {
Chris@148 43 m_file = new QFile(path);
Chris@148 44 bool good = false;
Chris@148 45
Chris@148 46 if (!m_file->exists()) {
Chris@148 47 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 48 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 49 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 50 } else {
Chris@148 51 good = true;
Chris@148 52 }
Chris@148 53
Chris@148 54 if (!good) {
Chris@148 55 delete m_file;
Chris@148 56 m_file = 0;
Chris@148 57 }
Chris@148 58 }
Chris@148 59
Chris@148 60 CSVFileReader::~CSVFileReader()
Chris@148 61 {
Chris@690 62 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
Chris@148 63
Chris@148 64 if (m_file) {
Chris@690 65 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
Chris@148 66 m_file->close();
Chris@148 67 }
Chris@148 68 delete m_file;
Chris@148 69 }
Chris@148 70
Chris@148 71 bool
Chris@148 72 CSVFileReader::isOK() const
Chris@148 73 {
Chris@148 74 return (m_file != 0);
Chris@148 75 }
Chris@148 76
Chris@148 77 QString
Chris@148 78 CSVFileReader::getError() const
Chris@148 79 {
Chris@148 80 return m_error;
Chris@148 81 }
Chris@148 82
Chris@631 83 size_t
Chris@631 84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
Chris@631 85 size_t windowSize) const
Chris@631 86 {
Chris@631 87 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@631 88 unsigned int warnLimit = 10;
Chris@631 89
Chris@631 90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 91
Chris@631 92 size_t calculatedFrame = 0;
Chris@631 93
Chris@631 94 bool ok = false;
Chris@631 95 QString numeric = s;
Chris@631 96 numeric.remove(nonNumericRx);
Chris@631 97
Chris@631 98 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 99
Chris@631 100 double time = numeric.toDouble(&ok);
Chris@631 101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 102 calculatedFrame = int(time * sampleRate + 0.5);
Chris@631 103
Chris@631 104 } else {
Chris@631 105
Chris@631 106 long n = numeric.toLong(&ok);
Chris@631 107 if (n >= 0) calculatedFrame = n;
Chris@631 108
Chris@631 109 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 110 calculatedFrame *= windowSize;
Chris@631 111 }
Chris@631 112 }
Chris@631 113
Chris@631 114 if (!ok) {
Chris@631 115 if (m_warnings < warnLimit) {
Chris@631 116 std::cerr << "WARNING: CSVFileReader::load: "
Chris@631 117 << "Bad time format (\"" << s.toStdString()
Chris@631 118 << "\") in data line "
Chris@631 119 << lineno+1 << std::endl;
Chris@631 120 } else if (m_warnings == warnLimit) {
Chris@631 121 std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@631 122 }
Chris@631 123 ++m_warnings;
Chris@631 124 }
Chris@631 125
Chris@631 126 return calculatedFrame;
Chris@631 127 }
Chris@631 128
Chris@148 129 Model *
Chris@148 130 CSVFileReader::load() const
Chris@148 131 {
Chris@148 132 if (!m_file) return 0;
Chris@148 133
Chris@628 134 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 135 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392 137 size_t sampleRate = m_format.getSampleRate();
Chris@392 138 size_t windowSize = m_format.getWindowSize();
Chris@631 139 QChar separator = m_format.getSeparator();
Chris@631 140 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 141
Chris@392 142 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 143 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 144 // This will be overridden later if more than one line
Chris@611 145 // appears in our file, but we want to choose a default
Chris@611 146 // that's likely to be visible
Chris@611 147 windowSize = 1024;
Chris@611 148 } else {
Chris@611 149 windowSize = 1;
Chris@611 150 }
Chris@392 151 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 152 sampleRate = m_mainModelSampleRate;
Chris@148 153 }
Chris@148 154 }
Chris@148 155
Chris@148 156 SparseOneDimensionalModel *model1 = 0;
Chris@148 157 SparseTimeValueModel *model2 = 0;
Chris@628 158 RegionModel *model2a = 0;
Chris@152 159 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 160 Model *model = 0;
Chris@148 161
Chris@148 162 QTextStream in(m_file);
Chris@148 163 in.seek(0);
Chris@148 164
Chris@148 165 unsigned int warnings = 0, warnLimit = 10;
Chris@148 166 unsigned int lineno = 0;
Chris@148 167
Chris@148 168 float min = 0.0, max = 0.0;
Chris@148 169
Chris@148 170 size_t frameNo = 0;
Chris@628 171 size_t duration = 0;
Chris@631 172 size_t endFrame = 0;
Chris@631 173
Chris@631 174 bool haveAnyValue = false;
Chris@631 175 bool haveEndTime = false;
Chris@631 176
Chris@611 177 size_t startFrame = 0; // for calculation of dense model resolution
Chris@631 178 bool firstEverValue = true;
Chris@148 179
Chris@631 180 std::map<QString, int> labelCountMap;
Chris@631 181
Chris@676 182 int valueColumns = 0;
Chris@676 183 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 184 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 185 ++valueColumns;
Chris@676 186 }
Chris@676 187 }
Chris@676 188
Chris@148 189 while (!in.atEnd()) {
Chris@148 190
Chris@283 191 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 192 // CR-only line endings. Why did they bother making the class
Chris@283 193 // cope with more than one sort of line ending, if it still
Chris@283 194 // can't be configured to cope with all the common sorts?
Chris@148 195
Chris@283 196 // For the time being we'll deal with this case (which is
Chris@283 197 // relatively uncommon for us, but still necessary to handle)
Chris@283 198 // by reading the entire file using a single readLine, and
Chris@283 199 // splitting it. For CR and CR/LF line endings this will just
Chris@283 200 // read a line at a time, and that's obviously OK.
Chris@148 201
Chris@283 202 QString chunk = in.readLine();
Chris@283 203 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 204
Chris@283 205 for (size_t li = 0; li < lines.size(); ++li) {
Chris@148 206
Chris@283 207 QString line = lines[li];
Chris@148 208
Chris@283 209 if (line.startsWith("#")) continue;
Chris@283 210
Chris@631 211 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 212 if (!model) {
Chris@283 213
Chris@283 214 switch (modelType) {
Chris@283 215
Chris@392 216 case CSVFormat::OneDimensionalModel:
Chris@283 217 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 218 model = model1;
Chris@283 219 break;
Chris@148 220
Chris@392 221 case CSVFormat::TwoDimensionalModel:
Chris@283 222 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 223 model = model2;
Chris@283 224 break;
Chris@148 225
Chris@628 226 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 227 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 228 model = model2a;
Chris@628 229 break;
Chris@628 230
Chris@392 231 case CSVFormat::ThreeDimensionalModel:
Chris@535 232 model3 = new EditableDenseThreeDimensionalModel
Chris@535 233 (sampleRate,
Chris@535 234 windowSize,
Chris@676 235 valueColumns,
Chris@535 236 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 237 model = model3;
Chris@283 238 break;
Chris@283 239 }
Chris@283 240 }
Chris@148 241
Chris@631 242 float value = 0.f;
Chris@631 243 QString label = "";
Chris@148 244
Chris@631 245 duration = 0.f;
Chris@631 246 haveEndTime = false;
Chris@628 247
Chris@283 248 for (int i = 0; i < list.size(); ++i) {
Chris@148 249
Chris@631 250 QString s = list[i];
Chris@631 251
Chris@631 252 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 253
Chris@631 254 switch (purpose) {
Chris@631 255
Chris@631 256 case CSVFormat::ColumnUnknown:
Chris@631 257 break;
Chris@631 258
Chris@631 259 case CSVFormat::ColumnStartTime:
Chris@631 260 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 261 break;
Chris@631 262
Chris@631 263 case CSVFormat::ColumnEndTime:
Chris@631 264 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 265 haveEndTime = true;
Chris@631 266 break;
Chris@631 267
Chris@631 268 case CSVFormat::ColumnDuration:
Chris@631 269 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 270 break;
Chris@631 271
Chris@631 272 case CSVFormat::ColumnValue:
Chris@631 273 value = s.toFloat();
Chris@631 274 haveAnyValue = true;
Chris@631 275 break;
Chris@631 276
Chris@631 277 case CSVFormat::ColumnLabel:
Chris@631 278 label = s;
Chris@631 279 ++labelCountMap[label];
Chris@631 280 break;
Chris@283 281 }
Chris@631 282 }
Chris@148 283
Chris@631 284 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 285 if (endFrame > frameNo) {
Chris@631 286 duration = endFrame - frameNo;
Chris@628 287 }
Chris@283 288 }
Chris@148 289
Chris@392 290 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 291
Chris@631 292 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 293 model1->addPoint(point);
Chris@148 294
Chris@392 295 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 296
Chris@631 297 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 298 model2->addPoint(point);
Chris@148 299
Chris@628 300 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 301
Chris@631 302 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 303 model2a->addPoint(point);
Chris@628 304
Chris@392 305 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 306
Chris@283 307 DenseThreeDimensionalModel::Column values;
Chris@148 308
Chris@631 309 for (int i = 0; i < list.size(); ++i) {
Chris@148 310
Chris@676 311 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 312 continue;
Chris@676 313 }
Chris@676 314
Chris@283 315 bool ok = false;
Chris@283 316 float value = list[i].toFloat(&ok);
Chris@611 317
Chris@676 318 values.push_back(value);
Chris@148 319
Chris@631 320 if (firstEverValue || value < min) min = value;
Chris@631 321 if (firstEverValue || value > max) max = value;
Chris@676 322
Chris@631 323 if (firstEverValue) {
Chris@611 324 startFrame = frameNo;
Chris@611 325 model3->setStartFrame(startFrame);
Chris@611 326 } else if (lineno == 1 &&
Chris@611 327 timingType == CSVFormat::ExplicitTiming) {
Chris@611 328 model3->setResolution(frameNo - startFrame);
Chris@611 329 }
Chris@631 330
Chris@631 331 firstEverValue = false;
Chris@148 332
Chris@283 333 if (!ok) {
Chris@283 334 if (warnings < warnLimit) {
Chris@283 335 std::cerr << "WARNING: CSVFileReader::load: "
Chris@390 336 << "Non-numeric value \""
Chris@390 337 << list[i].toStdString()
Chris@491 338 << "\" in data line " << lineno+1
Chris@283 339 << ":" << std::endl;
Chris@686 340 std::cerr << line << std::endl;
Chris@283 341 ++warnings;
Chris@283 342 } else if (warnings == warnLimit) {
Chris@390 343 // std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 344 }
Chris@283 345 }
Chris@283 346 }
Chris@148 347
Chris@690 348 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 349 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 350
Chris@611 351 model3->setColumn(lineno, values);
Chris@283 352 }
Chris@148 353
Chris@283 354 ++lineno;
Chris@392 355 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 356 list.size() == 0) {
Chris@283 357 frameNo += windowSize;
Chris@283 358 }
Chris@283 359 }
Chris@148 360 }
Chris@148 361
Chris@631 362 if (!haveAnyValue) {
Chris@631 363 if (model2a) {
Chris@631 364 // assign values for regions based on label frequency; we
Chris@631 365 // have this in our labelCountMap, sort of
Chris@631 366
Chris@631 367 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 368 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 369 i != labelCountMap.end(); ++i) {
Chris@631 370 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 371 }
Chris@631 372
Chris@631 373 float v = 0.f;
Chris@631 374 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 375 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 376 --i;
Chris@631 377 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 378 j != i->second.end(); ++j) {
Chris@631 379 j->second = v;
Chris@631 380 v = v + 1.f;
Chris@631 381 }
Chris@631 382 }
Chris@631 383
Chris@631 384 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 385 RegionModel::Point::Comparator> pointMap;
Chris@631 386 for (RegionModel::PointList::const_iterator i =
Chris@631 387 model2a->getPoints().begin();
Chris@631 388 i != model2a->getPoints().end(); ++i) {
Chris@631 389 RegionModel::Point p(*i);
Chris@631 390 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 391 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 392 pointMap[p] = pp;
Chris@631 393 }
Chris@631 394
Chris@631 395 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 396 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 397 model2a->deletePoint(i->first);
Chris@631 398 model2a->addPoint(i->second);
Chris@631 399 }
Chris@631 400 }
Chris@631 401 }
Chris@631 402
Chris@392 403 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 404 model3->setMinimumLevel(min);
Chris@148 405 model3->setMaximumLevel(max);
Chris@148 406 }
Chris@148 407
Chris@148 408 return model;
Chris@148 409 }
Chris@148 410