annotate data/fileio/CSVFileReader.cpp @ 1496:fde8c497373f

Avoid crashing if an effects plugin can't be instantiated and so the output vector is empty in the transformer's run() method
author Chris Cannam
date Mon, 13 Aug 2018 15:25:32 +0100
parents 48e9f538e6e9
children 53fa8d57b728
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@1030 29 #include <QFileInfo>
Chris@148 30 #include <QString>
Chris@148 31 #include <QRegExp>
Chris@148 32 #include <QStringList>
Chris@148 33 #include <QTextStream>
Chris@148 34
Chris@148 35 #include <iostream>
Chris@628 36 #include <map>
Chris@1428 37 #include <string>
Chris@148 38
Chris@1113 39 using namespace std;
Chris@1113 40
Chris@392 41 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@1047 42 sv_samplerate_t mainModelSampleRate) :
Chris@392 43 m_format(format),
Chris@1009 44 m_device(0),
Chris@1009 45 m_ownDevice(true),
Chris@631 46 m_warnings(0),
Chris@148 47 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 48 {
Chris@1009 49 QFile *file = new QFile(path);
Chris@148 50 bool good = false;
Chris@148 51
Chris@1009 52 if (!file->exists()) {
Chris@1429 53 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@1009 54 } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@1429 55 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 56 } else {
Chris@1429 57 good = true;
Chris@148 58 }
Chris@148 59
Chris@1009 60 if (good) {
Chris@1009 61 m_device = file;
Chris@1030 62 m_filename = QFileInfo(path).fileName();
Chris@1009 63 } else {
Chris@1429 64 delete file;
Chris@148 65 }
Chris@148 66 }
Chris@148 67
Chris@1009 68 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
Chris@1047 69 sv_samplerate_t mainModelSampleRate) :
Chris@1009 70 m_format(format),
Chris@1009 71 m_device(device),
Chris@1009 72 m_ownDevice(false),
Chris@1009 73 m_warnings(0),
Chris@1009 74 m_mainModelSampleRate(mainModelSampleRate)
Chris@1009 75 {
Chris@1009 76 }
Chris@1009 77
Chris@148 78 CSVFileReader::~CSVFileReader()
Chris@148 79 {
Chris@1009 80 SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
Chris@148 81
Chris@1009 82 if (m_device && m_ownDevice) {
Chris@1009 83 SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
Chris@1009 84 m_device->close();
Chris@1009 85 delete m_device;
Chris@148 86 }
Chris@148 87 }
Chris@148 88
Chris@148 89 bool
Chris@148 90 CSVFileReader::isOK() const
Chris@148 91 {
Chris@1009 92 return (m_device != 0);
Chris@148 93 }
Chris@148 94
Chris@148 95 QString
Chris@148 96 CSVFileReader::getError() const
Chris@148 97 {
Chris@148 98 return m_error;
Chris@148 99 }
Chris@148 100
Chris@1038 101 sv_frame_t
Chris@1047 102 CSVFileReader::convertTimeValue(QString s, int lineno,
Chris@1047 103 sv_samplerate_t sampleRate,
Chris@929 104 int windowSize) const
Chris@631 105 {
Chris@631 106 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 107 int warnLimit = 10;
Chris@631 108
Chris@631 109 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 110
Chris@1038 111 sv_frame_t calculatedFrame = 0;
Chris@631 112
Chris@631 113 bool ok = false;
Chris@631 114 QString numeric = s;
Chris@631 115 numeric.remove(nonNumericRx);
Chris@631 116
Chris@631 117 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 118
Chris@631 119 double time = numeric.toDouble(&ok);
Chris@631 120 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 121 calculatedFrame = sv_frame_t(time * sampleRate + 0.5);
Chris@990 122
Chris@990 123 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 124
Chris@990 125 double time = numeric.toDouble(&ok);
Chris@990 126 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 127 calculatedFrame = sv_frame_t((time / 1000.0) * sampleRate + 0.5);
Chris@631 128
Chris@631 129 } else {
Chris@631 130
Chris@631 131 long n = numeric.toLong(&ok);
Chris@631 132 if (n >= 0) calculatedFrame = n;
Chris@631 133
Chris@631 134 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 135 calculatedFrame *= windowSize;
Chris@631 136 }
Chris@631 137 }
Chris@631 138
Chris@631 139 if (!ok) {
Chris@631 140 if (m_warnings < warnLimit) {
Chris@1428 141 SVCERR << "WARNING: CSVFileReader::load: "
Chris@844 142 << "Bad time format (\"" << s
Chris@631 143 << "\") in data line "
Chris@843 144 << lineno+1 << endl;
Chris@631 145 } else if (m_warnings == warnLimit) {
Chris@1428 146 SVCERR << "WARNING: Too many warnings" << endl;
Chris@631 147 }
Chris@631 148 ++m_warnings;
Chris@631 149 }
Chris@631 150
Chris@631 151 return calculatedFrame;
Chris@631 152 }
Chris@631 153
Chris@148 154 Model *
Chris@148 155 CSVFileReader::load() const
Chris@148 156 {
Chris@1009 157 if (!m_device) return 0;
Chris@148 158
Chris@628 159 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 160 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 161 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@1047 162 sv_samplerate_t sampleRate = m_format.getSampleRate();
Chris@929 163 int windowSize = m_format.getWindowSize();
Chris@631 164 QChar separator = m_format.getSeparator();
Chris@631 165 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 166
Chris@392 167 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 168 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 169 // This will be overridden later if more than one line
Chris@611 170 // appears in our file, but we want to choose a default
Chris@611 171 // that's likely to be visible
Chris@611 172 windowSize = 1024;
Chris@611 173 } else {
Chris@611 174 windowSize = 1;
Chris@611 175 }
Chris@1429 176 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 177 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@1429 178 sampleRate = m_mainModelSampleRate;
Chris@1429 179 }
Chris@148 180 }
Chris@148 181
Chris@148 182 SparseOneDimensionalModel *model1 = 0;
Chris@148 183 SparseTimeValueModel *model2 = 0;
Chris@628 184 RegionModel *model2a = 0;
Chris@897 185 NoteModel *model2b = 0;
Chris@152 186 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 187 Model *model = 0;
Chris@148 188
Chris@1009 189 QTextStream in(m_device);
Chris@148 190
Chris@148 191 unsigned int warnings = 0, warnLimit = 10;
Chris@148 192 unsigned int lineno = 0;
Chris@148 193
Chris@148 194 float min = 0.0, max = 0.0;
Chris@148 195
Chris@1038 196 sv_frame_t frameNo = 0;
Chris@1038 197 sv_frame_t duration = 0;
Chris@1038 198 sv_frame_t endFrame = 0;
Chris@631 199
Chris@631 200 bool haveAnyValue = false;
Chris@631 201 bool haveEndTime = false;
Chris@897 202 bool pitchLooksLikeMIDI = true;
Chris@631 203
Chris@1038 204 sv_frame_t startFrame = 0; // for calculation of dense model resolution
Chris@631 205 bool firstEverValue = true;
Chris@148 206
Chris@1113 207 map<QString, int> labelCountMap;
Chris@631 208
Chris@676 209 int valueColumns = 0;
Chris@676 210 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 211 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 212 ++valueColumns;
Chris@676 213 }
Chris@676 214 }
Chris@676 215
Chris@148 216 while (!in.atEnd()) {
Chris@148 217
Chris@283 218 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 219 // CR-only line endings. Why did they bother making the class
Chris@283 220 // cope with more than one sort of line ending, if it still
Chris@283 221 // can't be configured to cope with all the common sorts?
Chris@148 222
Chris@283 223 // For the time being we'll deal with this case (which is
Chris@283 224 // relatively uncommon for us, but still necessary to handle)
Chris@283 225 // by reading the entire file using a single readLine, and
Chris@283 226 // splitting it. For CR and CR/LF line endings this will just
Chris@283 227 // read a line at a time, and that's obviously OK.
Chris@148 228
Chris@283 229 QString chunk = in.readLine();
Chris@283 230 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 231
Chris@897 232 for (int li = 0; li < lines.size(); ++li) {
Chris@148 233
Chris@283 234 QString line = lines[li];
Chris@1009 235
Chris@283 236 if (line.startsWith("#")) continue;
Chris@283 237
Chris@631 238 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 239 if (!model) {
Chris@283 240
Chris@283 241 switch (modelType) {
Chris@283 242
Chris@392 243 case CSVFormat::OneDimensionalModel:
Chris@283 244 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 245 model = model1;
Chris@283 246 break;
Chris@1429 247
Chris@392 248 case CSVFormat::TwoDimensionalModel:
Chris@283 249 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 250 model = model2;
Chris@283 251 break;
Chris@1429 252
Chris@628 253 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 254 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 255 model = model2a;
Chris@628 256 break;
Chris@1429 257
Chris@897 258 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 259 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 260 model = model2b;
Chris@897 261 break;
Chris@1429 262
Chris@392 263 case CSVFormat::ThreeDimensionalModel:
Chris@535 264 model3 = new EditableDenseThreeDimensionalModel
Chris@535 265 (sampleRate,
Chris@535 266 windowSize,
Chris@676 267 valueColumns,
Chris@535 268 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 269 model = model3;
Chris@283 270 break;
Chris@283 271 }
Chris@1030 272
Chris@1030 273 if (model) {
Chris@1030 274 if (m_filename != "") {
Chris@1030 275 model->setObjectName(m_filename);
Chris@1030 276 }
Chris@1030 277 }
Chris@283 278 }
Chris@148 279
Chris@631 280 float value = 0.f;
Chris@897 281 float pitch = 0.f;
Chris@631 282 QString label = "";
Chris@148 283
Chris@631 284 duration = 0.f;
Chris@631 285 haveEndTime = false;
Chris@628 286
Chris@283 287 for (int i = 0; i < list.size(); ++i) {
Chris@148 288
Chris@631 289 QString s = list[i];
Chris@631 290
Chris@631 291 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 292
Chris@631 293 switch (purpose) {
Chris@631 294
Chris@631 295 case CSVFormat::ColumnUnknown:
Chris@631 296 break;
Chris@631 297
Chris@631 298 case CSVFormat::ColumnStartTime:
Chris@631 299 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 300 break;
Chris@631 301
Chris@631 302 case CSVFormat::ColumnEndTime:
Chris@631 303 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 304 haveEndTime = true;
Chris@631 305 break;
Chris@631 306
Chris@631 307 case CSVFormat::ColumnDuration:
Chris@631 308 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 309 break;
Chris@631 310
Chris@631 311 case CSVFormat::ColumnValue:
Chris@631 312 value = s.toFloat();
Chris@631 313 haveAnyValue = true;
Chris@631 314 break;
Chris@631 315
Chris@897 316 case CSVFormat::ColumnPitch:
Chris@897 317 pitch = s.toFloat();
Chris@897 318 if (pitch < 0.f || pitch > 127.f) {
Chris@897 319 pitchLooksLikeMIDI = false;
Chris@897 320 }
Chris@897 321 break;
Chris@897 322
Chris@631 323 case CSVFormat::ColumnLabel:
Chris@631 324 label = s;
Chris@631 325 break;
Chris@283 326 }
Chris@631 327 }
Chris@148 328
Chris@1113 329 ++labelCountMap[label];
Chris@1113 330
Chris@631 331 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 332 if (endFrame > frameNo) {
Chris@631 333 duration = endFrame - frameNo;
Chris@628 334 }
Chris@283 335 }
Chris@148 336
Chris@392 337 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@1429 338
Chris@631 339 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 340 model1->addPoint(point);
Chris@148 341
Chris@392 342 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 343
Chris@631 344 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 345 model2->addPoint(point);
Chris@148 346
Chris@628 347 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 348
Chris@631 349 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 350 model2a->addPoint(point);
Chris@628 351
Chris@897 352 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 353
Chris@897 354 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 355 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 356 model2b->addPoint(point);
Chris@897 357
Chris@392 358 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 359
Chris@283 360 DenseThreeDimensionalModel::Column values;
Chris@148 361
Chris@631 362 for (int i = 0; i < list.size(); ++i) {
Chris@148 363
Chris@676 364 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 365 continue;
Chris@676 366 }
Chris@676 367
Chris@283 368 bool ok = false;
Chris@283 369 float value = list[i].toFloat(&ok);
Chris@611 370
Chris@676 371 values.push_back(value);
Chris@1429 372
Chris@631 373 if (firstEverValue || value < min) min = value;
Chris@631 374 if (firstEverValue || value > max) max = value;
Chris@676 375
Chris@631 376 if (firstEverValue) {
Chris@611 377 startFrame = frameNo;
Chris@611 378 model3->setStartFrame(startFrame);
Chris@611 379 } else if (lineno == 1 &&
Chris@611 380 timingType == CSVFormat::ExplicitTiming) {
Chris@1038 381 model3->setResolution(int(frameNo - startFrame));
Chris@611 382 }
Chris@631 383
Chris@631 384 firstEverValue = false;
Chris@148 385
Chris@283 386 if (!ok) {
Chris@283 387 if (warnings < warnLimit) {
Chris@1428 388 SVCERR << "WARNING: CSVFileReader::load: "
Chris@390 389 << "Non-numeric value \""
Chris@844 390 << list[i]
Chris@491 391 << "\" in data line " << lineno+1
Chris@843 392 << ":" << endl;
Chris@1428 393 SVCERR << line << endl;
Chris@283 394 ++warnings;
Chris@283 395 } else if (warnings == warnLimit) {
Chris@1428 396 // SVCERR << "WARNING: Too many warnings" << endl;
Chris@283 397 }
Chris@283 398 }
Chris@283 399 }
Chris@1429 400
Chris@690 401 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 402 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 403
Chris@611 404 model3->setColumn(lineno, values);
Chris@283 405 }
Chris@148 406
Chris@283 407 ++lineno;
Chris@392 408 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 409 list.size() == 0) {
Chris@283 410 frameNo += windowSize;
Chris@283 411 }
Chris@283 412 }
Chris@148 413 }
Chris@148 414
Chris@631 415 if (!haveAnyValue) {
Chris@631 416 if (model2a) {
Chris@631 417 // assign values for regions based on label frequency; we
Chris@631 418 // have this in our labelCountMap, sort of
Chris@631 419
Chris@1113 420 map<int, map<QString, float> > countLabelValueMap;
Chris@1113 421 for (map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 422 i != labelCountMap.end(); ++i) {
Chris@1113 423 countLabelValueMap[i->second][i->first] = -1.f;
Chris@631 424 }
Chris@631 425
Chris@631 426 float v = 0.f;
Chris@1113 427 for (map<int, map<QString, float> >::iterator i =
Chris@631 428 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 429 --i;
Chris@1428 430 SVCERR << "count -> " << i->first << endl;
Chris@1113 431 for (map<QString, float>::iterator j = i->second.begin();
Chris@631 432 j != i->second.end(); ++j) {
Chris@631 433 j->second = v;
Chris@1428 434 SVCERR << "label -> " << j->first << ", value " << v << endl;
Chris@631 435 v = v + 1.f;
Chris@631 436 }
Chris@631 437 }
Chris@631 438
Chris@1113 439 map<RegionModel::Point, RegionModel::Point,
Chris@631 440 RegionModel::Point::Comparator> pointMap;
Chris@631 441 for (RegionModel::PointList::const_iterator i =
Chris@631 442 model2a->getPoints().begin();
Chris@631 443 i != model2a->getPoints().end(); ++i) {
Chris@631 444 RegionModel::Point p(*i);
Chris@1113 445 int count = labelCountMap[p.label];
Chris@1113 446 v = countLabelValueMap[count][p.label];
Chris@1428 447 // SVCERR << "mapping from label \"" << p.label << "\" (count " << count << ") to value " << v << endl;
Chris@631 448 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 449 pointMap[p] = pp;
Chris@631 450 }
Chris@631 451
Chris@1113 452 for (map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 453 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@1113 454 // There could be duplicate regions; if so replace
Chris@1113 455 // them all -- but we need to check we're not
Chris@1113 456 // replacing a region by itself (or else this will
Chris@1113 457 // never terminate)
Chris@1113 458 if (i->first.value == i->second.value) {
Chris@1113 459 continue;
Chris@1113 460 }
Chris@1113 461 while (model2a->containsPoint(i->first)) {
Chris@1113 462 model2a->deletePoint(i->first);
Chris@1113 463 model2a->addPoint(i->second);
Chris@1113 464 }
Chris@631 465 }
Chris@631 466 }
Chris@631 467 }
Chris@631 468
Chris@897 469 if (model2b) {
Chris@897 470 if (pitchLooksLikeMIDI) {
Chris@897 471 model2b->setScaleUnits("MIDI Pitch");
Chris@897 472 } else {
Chris@897 473 model2b->setScaleUnits("Hz");
Chris@897 474 }
Chris@897 475 }
Chris@897 476
Chris@961 477 if (model3) {
Chris@1429 478 model3->setMinimumLevel(min);
Chris@1429 479 model3->setMaximumLevel(max);
Chris@148 480 }
Chris@148 481
Chris@148 482 return model;
Chris@148 483 }
Chris@148 484