annotate data/fileio/CSVFileReader.cpp @ 1346:75ad55315db4 3.0-integration

More work on getting tests (especially file encoding ones) running on Windows. Various problems here to do with interaction with test filenames in Hg repos
author Chris Cannam
date Fri, 06 Jan 2017 15:44:55 +0000
parents 815f82508f96
children 87ae75da6527
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@1030 29 #include <QFileInfo>
Chris@148 30 #include <QString>
Chris@148 31 #include <QRegExp>
Chris@148 32 #include <QStringList>
Chris@148 33 #include <QTextStream>
Chris@148 34
Chris@148 35 #include <iostream>
Chris@628 36 #include <map>
Chris@148 37
Chris@1113 38 using namespace std;
Chris@1113 39
Chris@392 40 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@1047 41 sv_samplerate_t mainModelSampleRate) :
Chris@392 42 m_format(format),
Chris@1009 43 m_device(0),
Chris@1009 44 m_ownDevice(true),
Chris@631 45 m_warnings(0),
Chris@148 46 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 47 {
Chris@1009 48 QFile *file = new QFile(path);
Chris@148 49 bool good = false;
Chris@148 50
Chris@1009 51 if (!file->exists()) {
Chris@148 52 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@1009 53 } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 54 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 55 } else {
Chris@148 56 good = true;
Chris@148 57 }
Chris@148 58
Chris@1009 59 if (good) {
Chris@1009 60 m_device = file;
Chris@1030 61 m_filename = QFileInfo(path).fileName();
Chris@1009 62 } else {
Chris@1009 63 delete file;
Chris@148 64 }
Chris@148 65 }
Chris@148 66
Chris@1009 67 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
Chris@1047 68 sv_samplerate_t mainModelSampleRate) :
Chris@1009 69 m_format(format),
Chris@1009 70 m_device(device),
Chris@1009 71 m_ownDevice(false),
Chris@1009 72 m_warnings(0),
Chris@1009 73 m_mainModelSampleRate(mainModelSampleRate)
Chris@1009 74 {
Chris@1009 75 }
Chris@1009 76
Chris@148 77 CSVFileReader::~CSVFileReader()
Chris@148 78 {
Chris@1009 79 SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
Chris@148 80
Chris@1009 81 if (m_device && m_ownDevice) {
Chris@1009 82 SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
Chris@1009 83 m_device->close();
Chris@1009 84 delete m_device;
Chris@148 85 }
Chris@148 86 }
Chris@148 87
Chris@148 88 bool
Chris@148 89 CSVFileReader::isOK() const
Chris@148 90 {
Chris@1009 91 return (m_device != 0);
Chris@148 92 }
Chris@148 93
Chris@148 94 QString
Chris@148 95 CSVFileReader::getError() const
Chris@148 96 {
Chris@148 97 return m_error;
Chris@148 98 }
Chris@148 99
Chris@1038 100 sv_frame_t
Chris@1047 101 CSVFileReader::convertTimeValue(QString s, int lineno,
Chris@1047 102 sv_samplerate_t sampleRate,
Chris@929 103 int windowSize) const
Chris@631 104 {
Chris@631 105 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 106 int warnLimit = 10;
Chris@631 107
Chris@631 108 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 109
Chris@1038 110 sv_frame_t calculatedFrame = 0;
Chris@631 111
Chris@631 112 bool ok = false;
Chris@631 113 QString numeric = s;
Chris@631 114 numeric.remove(nonNumericRx);
Chris@631 115
Chris@631 116 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 117
Chris@631 118 double time = numeric.toDouble(&ok);
Chris@631 119 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 120 calculatedFrame = sv_frame_t(time * sampleRate + 0.5);
Chris@990 121
Chris@990 122 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 123
Chris@990 124 double time = numeric.toDouble(&ok);
Chris@990 125 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 126 calculatedFrame = sv_frame_t((time / 1000.0) * sampleRate + 0.5);
Chris@631 127
Chris@631 128 } else {
Chris@631 129
Chris@631 130 long n = numeric.toLong(&ok);
Chris@631 131 if (n >= 0) calculatedFrame = n;
Chris@631 132
Chris@631 133 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 134 calculatedFrame *= windowSize;
Chris@631 135 }
Chris@631 136 }
Chris@631 137
Chris@631 138 if (!ok) {
Chris@631 139 if (m_warnings < warnLimit) {
Chris@843 140 cerr << "WARNING: CSVFileReader::load: "
Chris@844 141 << "Bad time format (\"" << s
Chris@631 142 << "\") in data line "
Chris@843 143 << lineno+1 << endl;
Chris@631 144 } else if (m_warnings == warnLimit) {
Chris@843 145 cerr << "WARNING: Too many warnings" << endl;
Chris@631 146 }
Chris@631 147 ++m_warnings;
Chris@631 148 }
Chris@631 149
Chris@631 150 return calculatedFrame;
Chris@631 151 }
Chris@631 152
Chris@148 153 Model *
Chris@148 154 CSVFileReader::load() const
Chris@148 155 {
Chris@1009 156 if (!m_device) return 0;
Chris@148 157
Chris@628 158 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 159 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 160 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@1047 161 sv_samplerate_t sampleRate = m_format.getSampleRate();
Chris@929 162 int windowSize = m_format.getWindowSize();
Chris@631 163 QChar separator = m_format.getSeparator();
Chris@631 164 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 165
Chris@392 166 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 167 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 168 // This will be overridden later if more than one line
Chris@611 169 // appears in our file, but we want to choose a default
Chris@611 170 // that's likely to be visible
Chris@611 171 windowSize = 1024;
Chris@611 172 } else {
Chris@611 173 windowSize = 1;
Chris@611 174 }
Chris@990 175 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 176 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@148 177 sampleRate = m_mainModelSampleRate;
Chris@148 178 }
Chris@148 179 }
Chris@148 180
Chris@148 181 SparseOneDimensionalModel *model1 = 0;
Chris@148 182 SparseTimeValueModel *model2 = 0;
Chris@628 183 RegionModel *model2a = 0;
Chris@897 184 NoteModel *model2b = 0;
Chris@152 185 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 186 Model *model = 0;
Chris@148 187
Chris@1009 188 QTextStream in(m_device);
Chris@148 189
Chris@148 190 unsigned int warnings = 0, warnLimit = 10;
Chris@148 191 unsigned int lineno = 0;
Chris@148 192
Chris@148 193 float min = 0.0, max = 0.0;
Chris@148 194
Chris@1038 195 sv_frame_t frameNo = 0;
Chris@1038 196 sv_frame_t duration = 0;
Chris@1038 197 sv_frame_t endFrame = 0;
Chris@631 198
Chris@631 199 bool haveAnyValue = false;
Chris@631 200 bool haveEndTime = false;
Chris@897 201 bool pitchLooksLikeMIDI = true;
Chris@631 202
Chris@1038 203 sv_frame_t startFrame = 0; // for calculation of dense model resolution
Chris@631 204 bool firstEverValue = true;
Chris@148 205
Chris@1113 206 map<QString, int> labelCountMap;
Chris@631 207
Chris@676 208 int valueColumns = 0;
Chris@676 209 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 210 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 211 ++valueColumns;
Chris@676 212 }
Chris@676 213 }
Chris@676 214
Chris@148 215 while (!in.atEnd()) {
Chris@148 216
Chris@283 217 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 218 // CR-only line endings. Why did they bother making the class
Chris@283 219 // cope with more than one sort of line ending, if it still
Chris@283 220 // can't be configured to cope with all the common sorts?
Chris@148 221
Chris@283 222 // For the time being we'll deal with this case (which is
Chris@283 223 // relatively uncommon for us, but still necessary to handle)
Chris@283 224 // by reading the entire file using a single readLine, and
Chris@283 225 // splitting it. For CR and CR/LF line endings this will just
Chris@283 226 // read a line at a time, and that's obviously OK.
Chris@148 227
Chris@283 228 QString chunk = in.readLine();
Chris@283 229 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 230
Chris@897 231 for (int li = 0; li < lines.size(); ++li) {
Chris@148 232
Chris@283 233 QString line = lines[li];
Chris@1009 234
Chris@283 235 if (line.startsWith("#")) continue;
Chris@283 236
Chris@631 237 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 238 if (!model) {
Chris@283 239
Chris@283 240 switch (modelType) {
Chris@283 241
Chris@392 242 case CSVFormat::OneDimensionalModel:
Chris@283 243 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 244 model = model1;
Chris@283 245 break;
Chris@148 246
Chris@392 247 case CSVFormat::TwoDimensionalModel:
Chris@283 248 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 249 model = model2;
Chris@283 250 break;
Chris@148 251
Chris@628 252 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 253 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 254 model = model2a;
Chris@628 255 break;
Chris@628 256
Chris@897 257 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 258 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 259 model = model2b;
Chris@897 260 break;
Chris@897 261
Chris@392 262 case CSVFormat::ThreeDimensionalModel:
Chris@535 263 model3 = new EditableDenseThreeDimensionalModel
Chris@535 264 (sampleRate,
Chris@535 265 windowSize,
Chris@676 266 valueColumns,
Chris@535 267 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 268 model = model3;
Chris@283 269 break;
Chris@283 270 }
Chris@1030 271
Chris@1030 272 if (model) {
Chris@1030 273 if (m_filename != "") {
Chris@1030 274 model->setObjectName(m_filename);
Chris@1030 275 }
Chris@1030 276 }
Chris@283 277 }
Chris@148 278
Chris@631 279 float value = 0.f;
Chris@897 280 float pitch = 0.f;
Chris@631 281 QString label = "";
Chris@148 282
Chris@631 283 duration = 0.f;
Chris@631 284 haveEndTime = false;
Chris@628 285
Chris@283 286 for (int i = 0; i < list.size(); ++i) {
Chris@148 287
Chris@631 288 QString s = list[i];
Chris@631 289
Chris@631 290 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 291
Chris@631 292 switch (purpose) {
Chris@631 293
Chris@631 294 case CSVFormat::ColumnUnknown:
Chris@631 295 break;
Chris@631 296
Chris@631 297 case CSVFormat::ColumnStartTime:
Chris@631 298 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 299 break;
Chris@631 300
Chris@631 301 case CSVFormat::ColumnEndTime:
Chris@631 302 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 303 haveEndTime = true;
Chris@631 304 break;
Chris@631 305
Chris@631 306 case CSVFormat::ColumnDuration:
Chris@631 307 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 308 break;
Chris@631 309
Chris@631 310 case CSVFormat::ColumnValue:
Chris@631 311 value = s.toFloat();
Chris@631 312 haveAnyValue = true;
Chris@631 313 break;
Chris@631 314
Chris@897 315 case CSVFormat::ColumnPitch:
Chris@897 316 pitch = s.toFloat();
Chris@897 317 if (pitch < 0.f || pitch > 127.f) {
Chris@897 318 pitchLooksLikeMIDI = false;
Chris@897 319 }
Chris@897 320 break;
Chris@897 321
Chris@631 322 case CSVFormat::ColumnLabel:
Chris@631 323 label = s;
Chris@631 324 break;
Chris@283 325 }
Chris@631 326 }
Chris@148 327
Chris@1113 328 ++labelCountMap[label];
Chris@1113 329
Chris@631 330 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 331 if (endFrame > frameNo) {
Chris@631 332 duration = endFrame - frameNo;
Chris@628 333 }
Chris@283 334 }
Chris@148 335
Chris@392 336 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 337
Chris@631 338 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 339 model1->addPoint(point);
Chris@148 340
Chris@392 341 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 342
Chris@631 343 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 344 model2->addPoint(point);
Chris@148 345
Chris@628 346 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 347
Chris@631 348 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 349 model2a->addPoint(point);
Chris@628 350
Chris@897 351 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 352
Chris@897 353 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 354 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 355 model2b->addPoint(point);
Chris@897 356
Chris@392 357 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 358
Chris@283 359 DenseThreeDimensionalModel::Column values;
Chris@148 360
Chris@631 361 for (int i = 0; i < list.size(); ++i) {
Chris@148 362
Chris@676 363 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 364 continue;
Chris@676 365 }
Chris@676 366
Chris@283 367 bool ok = false;
Chris@283 368 float value = list[i].toFloat(&ok);
Chris@611 369
Chris@676 370 values.push_back(value);
Chris@148 371
Chris@631 372 if (firstEverValue || value < min) min = value;
Chris@631 373 if (firstEverValue || value > max) max = value;
Chris@676 374
Chris@631 375 if (firstEverValue) {
Chris@611 376 startFrame = frameNo;
Chris@611 377 model3->setStartFrame(startFrame);
Chris@611 378 } else if (lineno == 1 &&
Chris@611 379 timingType == CSVFormat::ExplicitTiming) {
Chris@1038 380 model3->setResolution(int(frameNo - startFrame));
Chris@611 381 }
Chris@631 382
Chris@631 383 firstEverValue = false;
Chris@148 384
Chris@283 385 if (!ok) {
Chris@283 386 if (warnings < warnLimit) {
Chris@843 387 cerr << "WARNING: CSVFileReader::load: "
Chris@390 388 << "Non-numeric value \""
Chris@844 389 << list[i]
Chris@491 390 << "\" in data line " << lineno+1
Chris@843 391 << ":" << endl;
Chris@843 392 cerr << line << endl;
Chris@283 393 ++warnings;
Chris@283 394 } else if (warnings == warnLimit) {
Chris@843 395 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 396 }
Chris@283 397 }
Chris@283 398 }
Chris@148 399
Chris@690 400 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 401 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 402
Chris@611 403 model3->setColumn(lineno, values);
Chris@283 404 }
Chris@148 405
Chris@283 406 ++lineno;
Chris@392 407 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 408 list.size() == 0) {
Chris@283 409 frameNo += windowSize;
Chris@283 410 }
Chris@283 411 }
Chris@148 412 }
Chris@148 413
Chris@631 414 if (!haveAnyValue) {
Chris@631 415 if (model2a) {
Chris@631 416 // assign values for regions based on label frequency; we
Chris@631 417 // have this in our labelCountMap, sort of
Chris@631 418
Chris@1113 419 map<int, map<QString, float> > countLabelValueMap;
Chris@1113 420 for (map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 421 i != labelCountMap.end(); ++i) {
Chris@1113 422 countLabelValueMap[i->second][i->first] = -1.f;
Chris@631 423 }
Chris@631 424
Chris@631 425 float v = 0.f;
Chris@1113 426 for (map<int, map<QString, float> >::iterator i =
Chris@631 427 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 428 --i;
Chris@1113 429 cerr << "count -> " << i->first << endl;
Chris@1113 430 for (map<QString, float>::iterator j = i->second.begin();
Chris@631 431 j != i->second.end(); ++j) {
Chris@631 432 j->second = v;
Chris@1113 433 cerr << "label -> " << j->first << ", value " << v << endl;
Chris@631 434 v = v + 1.f;
Chris@631 435 }
Chris@631 436 }
Chris@631 437
Chris@1113 438 map<RegionModel::Point, RegionModel::Point,
Chris@631 439 RegionModel::Point::Comparator> pointMap;
Chris@631 440 for (RegionModel::PointList::const_iterator i =
Chris@631 441 model2a->getPoints().begin();
Chris@631 442 i != model2a->getPoints().end(); ++i) {
Chris@631 443 RegionModel::Point p(*i);
Chris@1113 444 int count = labelCountMap[p.label];
Chris@1113 445 v = countLabelValueMap[count][p.label];
Chris@1113 446 cerr << "mapping from label \"" << p.label << "\" (count " << count << ") to value " << v << endl;
Chris@631 447 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 448 pointMap[p] = pp;
Chris@631 449 }
Chris@631 450
Chris@1113 451 for (map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 452 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@1113 453 // There could be duplicate regions; if so replace
Chris@1113 454 // them all -- but we need to check we're not
Chris@1113 455 // replacing a region by itself (or else this will
Chris@1113 456 // never terminate)
Chris@1113 457 if (i->first.value == i->second.value) {
Chris@1113 458 continue;
Chris@1113 459 }
Chris@1113 460 while (model2a->containsPoint(i->first)) {
Chris@1113 461 model2a->deletePoint(i->first);
Chris@1113 462 model2a->addPoint(i->second);
Chris@1113 463 }
Chris@631 464 }
Chris@631 465 }
Chris@631 466 }
Chris@631 467
Chris@897 468 if (model2b) {
Chris@897 469 if (pitchLooksLikeMIDI) {
Chris@897 470 model2b->setScaleUnits("MIDI Pitch");
Chris@897 471 } else {
Chris@897 472 model2b->setScaleUnits("Hz");
Chris@897 473 }
Chris@897 474 }
Chris@897 475
Chris@961 476 if (model3) {
Chris@148 477 model3->setMinimumLevel(min);
Chris@148 478 model3->setMaximumLevel(max);
Chris@148 479 }
Chris@148 480
Chris@148 481 return model;
Chris@148 482 }
Chris@148 483