annotate data/fileio/CSVFileReader.cpp @ 997:2104ea2204d2

Separate out stdout ability (not all writers that support one-file will necessarily want to support it, e.g. for binary formats)
author Chris Cannam
date Mon, 13 Oct 2014 10:56:16 +0100
parents dc1695b90a58
children e369dd281cf2
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@148 29 #include <QString>
Chris@148 30 #include <QRegExp>
Chris@148 31 #include <QStringList>
Chris@148 32 #include <QTextStream>
Chris@148 33
Chris@148 34 #include <iostream>
Chris@628 35 #include <map>
Chris@148 36
Chris@392 37 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@929 38 int mainModelSampleRate) :
Chris@392 39 m_format(format),
Chris@148 40 m_file(0),
Chris@631 41 m_warnings(0),
Chris@148 42 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 43 {
Chris@148 44 m_file = new QFile(path);
Chris@148 45 bool good = false;
Chris@148 46
Chris@148 47 if (!m_file->exists()) {
Chris@148 48 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 49 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 50 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 51 } else {
Chris@148 52 good = true;
Chris@148 53 }
Chris@148 54
Chris@148 55 if (!good) {
Chris@148 56 delete m_file;
Chris@148 57 m_file = 0;
Chris@148 58 }
Chris@148 59 }
Chris@148 60
Chris@148 61 CSVFileReader::~CSVFileReader()
Chris@148 62 {
Chris@690 63 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
Chris@148 64
Chris@148 65 if (m_file) {
Chris@690 66 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
Chris@148 67 m_file->close();
Chris@148 68 }
Chris@148 69 delete m_file;
Chris@148 70 }
Chris@148 71
Chris@148 72 bool
Chris@148 73 CSVFileReader::isOK() const
Chris@148 74 {
Chris@148 75 return (m_file != 0);
Chris@148 76 }
Chris@148 77
Chris@148 78 QString
Chris@148 79 CSVFileReader::getError() const
Chris@148 80 {
Chris@148 81 return m_error;
Chris@148 82 }
Chris@148 83
Chris@929 84 int
Chris@929 85 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate,
Chris@929 86 int windowSize) const
Chris@631 87 {
Chris@631 88 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 89 int warnLimit = 10;
Chris@631 90
Chris@631 91 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 92
Chris@929 93 int calculatedFrame = 0;
Chris@631 94
Chris@631 95 bool ok = false;
Chris@631 96 QString numeric = s;
Chris@631 97 numeric.remove(nonNumericRx);
Chris@631 98
Chris@631 99 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 100
Chris@631 101 double time = numeric.toDouble(&ok);
Chris@631 102 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 103 calculatedFrame = int(time * sampleRate + 0.5);
Chris@990 104
Chris@990 105 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 106
Chris@990 107 double time = numeric.toDouble(&ok);
Chris@990 108 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@990 109 calculatedFrame = int((time / 1000.0) * sampleRate + 0.5);
Chris@631 110
Chris@631 111 } else {
Chris@631 112
Chris@631 113 long n = numeric.toLong(&ok);
Chris@631 114 if (n >= 0) calculatedFrame = n;
Chris@631 115
Chris@631 116 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 117 calculatedFrame *= windowSize;
Chris@631 118 }
Chris@631 119 }
Chris@631 120
Chris@631 121 if (!ok) {
Chris@631 122 if (m_warnings < warnLimit) {
Chris@843 123 cerr << "WARNING: CSVFileReader::load: "
Chris@844 124 << "Bad time format (\"" << s
Chris@631 125 << "\") in data line "
Chris@843 126 << lineno+1 << endl;
Chris@631 127 } else if (m_warnings == warnLimit) {
Chris@843 128 cerr << "WARNING: Too many warnings" << endl;
Chris@631 129 }
Chris@631 130 ++m_warnings;
Chris@631 131 }
Chris@631 132
Chris@631 133 return calculatedFrame;
Chris@631 134 }
Chris@631 135
Chris@148 136 Model *
Chris@148 137 CSVFileReader::load() const
Chris@148 138 {
Chris@148 139 if (!m_file) return 0;
Chris@148 140
Chris@628 141 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 142 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 143 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@929 144 int sampleRate = m_format.getSampleRate();
Chris@929 145 int windowSize = m_format.getWindowSize();
Chris@631 146 QChar separator = m_format.getSeparator();
Chris@631 147 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 148
Chris@392 149 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 150 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 151 // This will be overridden later if more than one line
Chris@611 152 // appears in our file, but we want to choose a default
Chris@611 153 // that's likely to be visible
Chris@611 154 windowSize = 1024;
Chris@611 155 } else {
Chris@611 156 windowSize = 1;
Chris@611 157 }
Chris@990 158 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 159 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@148 160 sampleRate = m_mainModelSampleRate;
Chris@148 161 }
Chris@148 162 }
Chris@148 163
Chris@148 164 SparseOneDimensionalModel *model1 = 0;
Chris@148 165 SparseTimeValueModel *model2 = 0;
Chris@628 166 RegionModel *model2a = 0;
Chris@897 167 NoteModel *model2b = 0;
Chris@152 168 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 169 Model *model = 0;
Chris@148 170
Chris@148 171 QTextStream in(m_file);
Chris@148 172 in.seek(0);
Chris@148 173
Chris@148 174 unsigned int warnings = 0, warnLimit = 10;
Chris@148 175 unsigned int lineno = 0;
Chris@148 176
Chris@148 177 float min = 0.0, max = 0.0;
Chris@148 178
Chris@929 179 int frameNo = 0;
Chris@929 180 int duration = 0;
Chris@929 181 int endFrame = 0;
Chris@631 182
Chris@631 183 bool haveAnyValue = false;
Chris@631 184 bool haveEndTime = false;
Chris@897 185 bool pitchLooksLikeMIDI = true;
Chris@631 186
Chris@929 187 int startFrame = 0; // for calculation of dense model resolution
Chris@631 188 bool firstEverValue = true;
Chris@148 189
Chris@631 190 std::map<QString, int> labelCountMap;
Chris@631 191
Chris@676 192 int valueColumns = 0;
Chris@676 193 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 194 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 195 ++valueColumns;
Chris@676 196 }
Chris@676 197 }
Chris@676 198
Chris@148 199 while (!in.atEnd()) {
Chris@148 200
Chris@283 201 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 202 // CR-only line endings. Why did they bother making the class
Chris@283 203 // cope with more than one sort of line ending, if it still
Chris@283 204 // can't be configured to cope with all the common sorts?
Chris@148 205
Chris@283 206 // For the time being we'll deal with this case (which is
Chris@283 207 // relatively uncommon for us, but still necessary to handle)
Chris@283 208 // by reading the entire file using a single readLine, and
Chris@283 209 // splitting it. For CR and CR/LF line endings this will just
Chris@283 210 // read a line at a time, and that's obviously OK.
Chris@148 211
Chris@283 212 QString chunk = in.readLine();
Chris@283 213 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 214
Chris@897 215 for (int li = 0; li < lines.size(); ++li) {
Chris@148 216
Chris@283 217 QString line = lines[li];
Chris@148 218
Chris@283 219 if (line.startsWith("#")) continue;
Chris@283 220
Chris@631 221 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 222 if (!model) {
Chris@283 223
Chris@283 224 switch (modelType) {
Chris@283 225
Chris@392 226 case CSVFormat::OneDimensionalModel:
Chris@283 227 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 228 model = model1;
Chris@283 229 break;
Chris@148 230
Chris@392 231 case CSVFormat::TwoDimensionalModel:
Chris@283 232 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 233 model = model2;
Chris@283 234 break;
Chris@148 235
Chris@628 236 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 237 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 238 model = model2a;
Chris@628 239 break;
Chris@628 240
Chris@897 241 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 242 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 243 model = model2b;
Chris@897 244 break;
Chris@897 245
Chris@392 246 case CSVFormat::ThreeDimensionalModel:
Chris@535 247 model3 = new EditableDenseThreeDimensionalModel
Chris@535 248 (sampleRate,
Chris@535 249 windowSize,
Chris@676 250 valueColumns,
Chris@535 251 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 252 model = model3;
Chris@283 253 break;
Chris@283 254 }
Chris@283 255 }
Chris@148 256
Chris@631 257 float value = 0.f;
Chris@897 258 float pitch = 0.f;
Chris@631 259 QString label = "";
Chris@148 260
Chris@631 261 duration = 0.f;
Chris@631 262 haveEndTime = false;
Chris@628 263
Chris@283 264 for (int i = 0; i < list.size(); ++i) {
Chris@148 265
Chris@631 266 QString s = list[i];
Chris@631 267
Chris@631 268 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 269
Chris@631 270 switch (purpose) {
Chris@631 271
Chris@631 272 case CSVFormat::ColumnUnknown:
Chris@631 273 break;
Chris@631 274
Chris@631 275 case CSVFormat::ColumnStartTime:
Chris@631 276 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 277 break;
Chris@631 278
Chris@631 279 case CSVFormat::ColumnEndTime:
Chris@631 280 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 281 haveEndTime = true;
Chris@631 282 break;
Chris@631 283
Chris@631 284 case CSVFormat::ColumnDuration:
Chris@631 285 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 286 break;
Chris@631 287
Chris@631 288 case CSVFormat::ColumnValue:
Chris@631 289 value = s.toFloat();
Chris@631 290 haveAnyValue = true;
Chris@631 291 break;
Chris@631 292
Chris@897 293 case CSVFormat::ColumnPitch:
Chris@897 294 pitch = s.toFloat();
Chris@897 295 if (pitch < 0.f || pitch > 127.f) {
Chris@897 296 pitchLooksLikeMIDI = false;
Chris@897 297 }
Chris@897 298 break;
Chris@897 299
Chris@631 300 case CSVFormat::ColumnLabel:
Chris@631 301 label = s;
Chris@631 302 ++labelCountMap[label];
Chris@631 303 break;
Chris@283 304 }
Chris@631 305 }
Chris@148 306
Chris@631 307 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 308 if (endFrame > frameNo) {
Chris@631 309 duration = endFrame - frameNo;
Chris@628 310 }
Chris@283 311 }
Chris@148 312
Chris@392 313 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 314
Chris@631 315 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 316 model1->addPoint(point);
Chris@148 317
Chris@392 318 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 319
Chris@631 320 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 321 model2->addPoint(point);
Chris@148 322
Chris@628 323 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 324
Chris@631 325 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 326 model2a->addPoint(point);
Chris@628 327
Chris@897 328 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 329
Chris@897 330 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 331 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 332 model2b->addPoint(point);
Chris@897 333
Chris@392 334 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 335
Chris@283 336 DenseThreeDimensionalModel::Column values;
Chris@148 337
Chris@631 338 for (int i = 0; i < list.size(); ++i) {
Chris@148 339
Chris@676 340 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 341 continue;
Chris@676 342 }
Chris@676 343
Chris@283 344 bool ok = false;
Chris@283 345 float value = list[i].toFloat(&ok);
Chris@611 346
Chris@676 347 values.push_back(value);
Chris@148 348
Chris@631 349 if (firstEverValue || value < min) min = value;
Chris@631 350 if (firstEverValue || value > max) max = value;
Chris@676 351
Chris@631 352 if (firstEverValue) {
Chris@611 353 startFrame = frameNo;
Chris@611 354 model3->setStartFrame(startFrame);
Chris@611 355 } else if (lineno == 1 &&
Chris@611 356 timingType == CSVFormat::ExplicitTiming) {
Chris@611 357 model3->setResolution(frameNo - startFrame);
Chris@611 358 }
Chris@631 359
Chris@631 360 firstEverValue = false;
Chris@148 361
Chris@283 362 if (!ok) {
Chris@283 363 if (warnings < warnLimit) {
Chris@843 364 cerr << "WARNING: CSVFileReader::load: "
Chris@390 365 << "Non-numeric value \""
Chris@844 366 << list[i]
Chris@491 367 << "\" in data line " << lineno+1
Chris@843 368 << ":" << endl;
Chris@843 369 cerr << line << endl;
Chris@283 370 ++warnings;
Chris@283 371 } else if (warnings == warnLimit) {
Chris@843 372 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 373 }
Chris@283 374 }
Chris@283 375 }
Chris@148 376
Chris@690 377 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 378 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 379
Chris@611 380 model3->setColumn(lineno, values);
Chris@283 381 }
Chris@148 382
Chris@283 383 ++lineno;
Chris@392 384 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 385 list.size() == 0) {
Chris@283 386 frameNo += windowSize;
Chris@283 387 }
Chris@283 388 }
Chris@148 389 }
Chris@148 390
Chris@631 391 if (!haveAnyValue) {
Chris@631 392 if (model2a) {
Chris@631 393 // assign values for regions based on label frequency; we
Chris@631 394 // have this in our labelCountMap, sort of
Chris@631 395
Chris@631 396 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 397 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 398 i != labelCountMap.end(); ++i) {
Chris@631 399 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 400 }
Chris@631 401
Chris@631 402 float v = 0.f;
Chris@631 403 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 404 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 405 --i;
Chris@631 406 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 407 j != i->second.end(); ++j) {
Chris@631 408 j->second = v;
Chris@631 409 v = v + 1.f;
Chris@631 410 }
Chris@631 411 }
Chris@631 412
Chris@631 413 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 414 RegionModel::Point::Comparator> pointMap;
Chris@631 415 for (RegionModel::PointList::const_iterator i =
Chris@631 416 model2a->getPoints().begin();
Chris@631 417 i != model2a->getPoints().end(); ++i) {
Chris@631 418 RegionModel::Point p(*i);
Chris@631 419 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 420 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 421 pointMap[p] = pp;
Chris@631 422 }
Chris@631 423
Chris@631 424 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 425 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 426 model2a->deletePoint(i->first);
Chris@631 427 model2a->addPoint(i->second);
Chris@631 428 }
Chris@631 429 }
Chris@631 430 }
Chris@631 431
Chris@897 432 if (model2b) {
Chris@897 433 if (pitchLooksLikeMIDI) {
Chris@897 434 model2b->setScaleUnits("MIDI Pitch");
Chris@897 435 } else {
Chris@897 436 model2b->setScaleUnits("Hz");
Chris@897 437 }
Chris@897 438 }
Chris@897 439
Chris@961 440 if (model3) {
Chris@148 441 model3->setMinimumLevel(min);
Chris@148 442 model3->setMaximumLevel(max);
Chris@148 443 }
Chris@148 444
Chris@148 445 return model;
Chris@148 446 }
Chris@148 447