annotate data/fileio/CSVFileReader.cpp @ 1499:68a0abfe7263

Merge
author Chris Cannam
date Mon, 13 Aug 2018 15:37:41 +0100
parents 48e9f538e6e9
children 53fa8d57b728
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@1030 29 #include <QFileInfo>
Chris@148 30 #include <QString>
Chris@148 31 #include <QRegExp>
Chris@148 32 #include <QStringList>
Chris@148 33 #include <QTextStream>
Chris@148 34
Chris@148 35 #include <iostream>
Chris@628 36 #include <map>
Chris@1428 37 #include <string>
Chris@148 38
Chris@1113 39 using namespace std;
Chris@1113 40
Chris@392 41 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@1047 42 sv_samplerate_t mainModelSampleRate) :
Chris@392 43 m_format(format),
Chris@1009 44 m_device(0),
Chris@1009 45 m_ownDevice(true),
Chris@631 46 m_warnings(0),
Chris@148 47 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 48 {
Chris@1009 49 QFile *file = new QFile(path);
Chris@148 50 bool good = false;
Chris@148 51
Chris@1009 52 if (!file->exists()) {
Chris@1429 53 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@1009 54 } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@1429 55 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 56 } else {
Chris@1429 57 good = true;
Chris@148 58 }
Chris@148 59
Chris@1009 60 if (good) {
Chris@1009 61 m_device = file;
Chris@1030 62 m_filename = QFileInfo(path).fileName();
Chris@1009 63 } else {
Chris@1429 64 delete file;
Chris@148 65 }
Chris@148 66 }
Chris@148 67
Chris@1009 68 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
Chris@1047 69 sv_samplerate_t mainModelSampleRate) :
Chris@1009 70 m_format(format),
Chris@1009 71 m_device(device),
Chris@1009 72 m_ownDevice(false),
Chris@1009 73 m_warnings(0),
Chris@1009 74 m_mainModelSampleRate(mainModelSampleRate)
Chris@1009 75 {
Chris@1009 76 }
Chris@1009 77
Chris@148 78 CSVFileReader::~CSVFileReader()
Chris@148 79 {
Chris@1009 80 SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
Chris@148 81
Chris@1009 82 if (m_device && m_ownDevice) {
Chris@1009 83 SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
Chris@1009 84 m_device->close();
Chris@1009 85 delete m_device;
Chris@148 86 }
Chris@148 87 }
Chris@148 88
Chris@148 89 bool
Chris@148 90 CSVFileReader::isOK() const
Chris@148 91 {
Chris@1009 92 return (m_device != 0);
Chris@148 93 }
Chris@148 94
Chris@148 95 QString
Chris@148 96 CSVFileReader::getError() const
Chris@148 97 {
Chris@148 98 return m_error;
Chris@148 99 }
Chris@148 100
Chris@1038 101 sv_frame_t
Chris@1047 102 CSVFileReader::convertTimeValue(QString s, int lineno,
Chris@1047 103 sv_samplerate_t sampleRate,
Chris@929 104 int windowSize) const
Chris@631 105 {
Chris@631 106 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 107 int warnLimit = 10;
Chris@631 108
Chris@631 109 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 110
Chris@1038 111 sv_frame_t calculatedFrame = 0;
Chris@631 112
Chris@631 113 bool ok = false;
Chris@631 114 QString numeric = s;
Chris@631 115 numeric.remove(nonNumericRx);
Chris@631 116
Chris@631 117 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 118
Chris@631 119 double time = numeric.toDouble(&ok);
Chris@631 120 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 121 calculatedFrame = sv_frame_t(time * sampleRate + 0.5);
Chris@990 122
Chris@990 123 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 124
Chris@990 125 double time = numeric.toDouble(&ok);
Chris@990 126 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 127 calculatedFrame = sv_frame_t((time / 1000.0) * sampleRate + 0.5);
Chris@631 128
Chris@631 129 } else {
Chris@631 130
Chris@631 131 long n = numeric.toLong(&ok);
Chris@631 132 if (n >= 0) calculatedFrame = n;
Chris@631 133
Chris@631 134 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 135 calculatedFrame *= windowSize;
Chris@631 136 }
Chris@631 137 }
Chris@631 138
Chris@631 139 if (!ok) {
Chris@631 140 if (m_warnings < warnLimit) {
Chris@1428 141 SVCERR << "WARNING: CSVFileReader::load: "
Chris@844 142 << "Bad time format (\"" << s
Chris@631 143 << "\") in data line "
Chris@843 144 << lineno+1 << endl;
Chris@631 145 } else if (m_warnings == warnLimit) {
Chris@1428 146 SVCERR << "WARNING: Too many warnings" << endl;
Chris@631 147 }
Chris@631 148 ++m_warnings;
Chris@631 149 }
Chris@631 150
Chris@631 151 return calculatedFrame;
Chris@631 152 }
Chris@631 153
Chris@148 154 Model *
Chris@148 155 CSVFileReader::load() const
Chris@148 156 {
Chris@1009 157 if (!m_device) return 0;
Chris@148 158
Chris@628 159 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 160 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 161 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@1047 162 sv_samplerate_t sampleRate = m_format.getSampleRate();
Chris@929 163 int windowSize = m_format.getWindowSize();
Chris@631 164 QChar separator = m_format.getSeparator();
Chris@631 165 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 166
Chris@392 167 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 168 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 169 // This will be overridden later if more than one line
Chris@611 170 // appears in our file, but we want to choose a default
Chris@611 171 // that's likely to be visible
Chris@611 172 windowSize = 1024;
Chris@611 173 } else {
Chris@611 174 windowSize = 1;
Chris@611 175 }
Chris@1429 176 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 177 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@1429 178 sampleRate = m_mainModelSampleRate;
Chris@1429 179 }
Chris@148 180 }
Chris@148 181
Chris@148 182 SparseOneDimensionalModel *model1 = 0;
Chris@148 183 SparseTimeValueModel *model2 = 0;
Chris@628 184 RegionModel *model2a = 0;
Chris@897 185 NoteModel *model2b = 0;
Chris@152 186 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 187 Model *model = 0;
Chris@148 188
Chris@1009 189 QTextStream in(m_device);
Chris@148 190
Chris@148 191 unsigned int warnings = 0, warnLimit = 10;
Chris@148 192 unsigned int lineno = 0;
Chris@148 193
Chris@148 194 float min = 0.0, max = 0.0;
Chris@148 195
Chris@1038 196 sv_frame_t frameNo = 0;
Chris@1038 197 sv_frame_t duration = 0;
Chris@1038 198 sv_frame_t endFrame = 0;
Chris@631 199
Chris@631 200 bool haveAnyValue = false;
Chris@631 201 bool haveEndTime = false;
Chris@897 202 bool pitchLooksLikeMIDI = true;
Chris@631 203
Chris@1038 204 sv_frame_t startFrame = 0; // for calculation of dense model resolution
Chris@631 205 bool firstEverValue = true;
Chris@148 206
Chris@1113 207 map<QString, int> labelCountMap;
Chris@631 208
Chris@676 209 int valueColumns = 0;
Chris@676 210 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 211 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 212 ++valueColumns;
Chris@676 213 }
Chris@676 214 }
Chris@676 215
Chris@148 216 while (!in.atEnd()) {
Chris@148 217
Chris@283 218 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 219 // CR-only line endings. Why did they bother making the class
Chris@283 220 // cope with more than one sort of line ending, if it still
Chris@283 221 // can't be configured to cope with all the common sorts?
Chris@148 222
Chris@283 223 // For the time being we'll deal with this case (which is
Chris@283 224 // relatively uncommon for us, but still necessary to handle)
Chris@283 225 // by reading the entire file using a single readLine, and
Chris@283 226 // splitting it. For CR and CR/LF line endings this will just
Chris@283 227 // read a line at a time, and that's obviously OK.
Chris@148 228
Chris@283 229 QString chunk = in.readLine();
Chris@283 230 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 231
Chris@897 232 for (int li = 0; li < lines.size(); ++li) {
Chris@148 233
Chris@283 234 QString line = lines[li];
Chris@1009 235
Chris@283 236 if (line.startsWith("#")) continue;
Chris@283 237
Chris@631 238 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 239 if (!model) {
Chris@283 240
Chris@283 241 switch (modelType) {
Chris@283 242
Chris@392 243 case CSVFormat::OneDimensionalModel:
Chris@283 244 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 245 model = model1;
Chris@283 246 break;
Chris@1429 247
Chris@392 248 case CSVFormat::TwoDimensionalModel:
Chris@283 249 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 250 model = model2;
Chris@283 251 break;
Chris@1429 252
Chris@628 253 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 254 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 255 model = model2a;
Chris@628 256 break;
Chris@1429 257
Chris@897 258 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 259 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 260 model = model2b;
Chris@897 261 break;
Chris@1429 262
Chris@392 263 case CSVFormat::ThreeDimensionalModel:
Chris@535 264 model3 = new EditableDenseThreeDimensionalModel
Chris@535 265 (sampleRate,
Chris@535 266 windowSize,
Chris@676 267 valueColumns,
Chris@535 268 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 269 model = model3;
Chris@283 270 break;
Chris@283 271 }
Chris@1030 272
Chris@1030 273 if (model) {
Chris@1030 274 if (m_filename != "") {
Chris@1030 275 model->setObjectName(m_filename);
Chris@1030 276 }
Chris@1030 277 }
Chris@283 278 }
Chris@148 279
Chris@631 280 float value = 0.f;
Chris@897 281 float pitch = 0.f;
Chris@631 282 QString label = "";
Chris@148 283
Chris@631 284 duration = 0.f;
Chris@631 285 haveEndTime = false;
Chris@628 286
Chris@283 287 for (int i = 0; i < list.size(); ++i) {
Chris@148 288
Chris@631 289 QString s = list[i];
Chris@631 290
Chris@631 291 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 292
Chris@631 293 switch (purpose) {
Chris@631 294
Chris@631 295 case CSVFormat::ColumnUnknown:
Chris@631 296 break;
Chris@631 297
Chris@631 298 case CSVFormat::ColumnStartTime:
Chris@631 299 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 300 break;
Chris@631 301
Chris@631 302 case CSVFormat::ColumnEndTime:
Chris@631 303 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 304 haveEndTime = true;
Chris@631 305 break;
Chris@631 306
Chris@631 307 case CSVFormat::ColumnDuration:
Chris@631 308 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 309 break;
Chris@631 310
Chris@631 311 case CSVFormat::ColumnValue:
Chris@631 312 value = s.toFloat();
Chris@631 313 haveAnyValue = true;
Chris@631 314 break;
Chris@631 315
Chris@897 316 case CSVFormat::ColumnPitch:
Chris@897 317 pitch = s.toFloat();
Chris@897 318 if (pitch < 0.f || pitch > 127.f) {
Chris@897 319 pitchLooksLikeMIDI = false;
Chris@897 320 }
Chris@897 321 break;
Chris@897 322
Chris@631 323 case CSVFormat::ColumnLabel:
Chris@631 324 label = s;
Chris@631 325 break;
Chris@283 326 }
Chris@631 327 }
Chris@148 328
Chris@1113 329 ++labelCountMap[label];
Chris@1113 330
Chris@631 331 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 332 if (endFrame > frameNo) {
Chris@631 333 duration = endFrame - frameNo;
Chris@628 334 }
Chris@283 335 }
Chris@148 336
Chris@392 337 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@1429 338
Chris@631 339 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 340 model1->addPoint(point);
Chris@148 341
Chris@392 342 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 343
Chris@631 344 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 345 model2->addPoint(point);
Chris@148 346
Chris@628 347 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 348
Chris@631 349 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 350 model2a->addPoint(point);
Chris@628 351
Chris@897 352 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 353
Chris@897 354 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 355 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 356 model2b->addPoint(point);
Chris@897 357
Chris@392 358 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 359
Chris@283 360 DenseThreeDimensionalModel::Column values;
Chris@148 361
Chris@631 362 for (int i = 0; i < list.size(); ++i) {
Chris@148 363
Chris@676 364 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 365 continue;
Chris@676 366 }
Chris@676 367
Chris@283 368 bool ok = false;
Chris@283 369 float value = list[i].toFloat(&ok);
Chris@611 370
Chris@676 371 values.push_back(value);
Chris@1429 372
Chris@631 373 if (firstEverValue || value < min) min = value;
Chris@631 374 if (firstEverValue || value > max) max = value;
Chris@676 375
Chris@631 376 if (firstEverValue) {
Chris@611 377 startFrame = frameNo;
Chris@611 378 model3->setStartFrame(startFrame);
Chris@611 379 } else if (lineno == 1 &&
Chris@611 380 timingType == CSVFormat::ExplicitTiming) {
Chris@1038 381 model3->setResolution(int(frameNo - startFrame));
Chris@611 382 }
Chris@631 383
Chris@631 384 firstEverValue = false;
Chris@148 385
Chris@283 386 if (!ok) {
Chris@283 387 if (warnings < warnLimit) {
Chris@1428 388 SVCERR << "WARNING: CSVFileReader::load: "
Chris@390 389 << "Non-numeric value \""
Chris@844 390 << list[i]
Chris@491 391 << "\" in data line " << lineno+1
Chris@843 392 << ":" << endl;
Chris@1428 393 SVCERR << line << endl;
Chris@283 394 ++warnings;
Chris@283 395 } else if (warnings == warnLimit) {
Chris@1428 396 // SVCERR << "WARNING: Too many warnings" << endl;
Chris@283 397 }
Chris@283 398 }
Chris@283 399 }
Chris@1429 400
Chris@690 401 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 402 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 403
Chris@611 404 model3->setColumn(lineno, values);
Chris@283 405 }
Chris@148 406
Chris@283 407 ++lineno;
Chris@392 408 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 409 list.size() == 0) {
Chris@283 410 frameNo += windowSize;
Chris@283 411 }
Chris@283 412 }
Chris@148 413 }
Chris@148 414
Chris@631 415 if (!haveAnyValue) {
Chris@631 416 if (model2a) {
Chris@631 417 // assign values for regions based on label frequency; we
Chris@631 418 // have this in our labelCountMap, sort of
Chris@631 419
Chris@1113 420 map<int, map<QString, float> > countLabelValueMap;
Chris@1113 421 for (map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 422 i != labelCountMap.end(); ++i) {
Chris@1113 423 countLabelValueMap[i->second][i->first] = -1.f;
Chris@631 424 }
Chris@631 425
Chris@631 426 float v = 0.f;
Chris@1113 427 for (map<int, map<QString, float> >::iterator i =
Chris@631 428 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 429 --i;
Chris@1428 430 SVCERR << "count -> " << i->first << endl;
Chris@1113 431 for (map<QString, float>::iterator j = i->second.begin();
Chris@631 432 j != i->second.end(); ++j) {
Chris@631 433 j->second = v;
Chris@1428 434 SVCERR << "label -> " << j->first << ", value " << v << endl;
Chris@631 435 v = v + 1.f;
Chris@631 436 }
Chris@631 437 }
Chris@631 438
Chris@1113 439 map<RegionModel::Point, RegionModel::Point,
Chris@631 440 RegionModel::Point::Comparator> pointMap;
Chris@631 441 for (RegionModel::PointList::const_iterator i =
Chris@631 442 model2a->getPoints().begin();
Chris@631 443 i != model2a->getPoints().end(); ++i) {
Chris@631 444 RegionModel::Point p(*i);
Chris@1113 445 int count = labelCountMap[p.label];
Chris@1113 446 v = countLabelValueMap[count][p.label];
Chris@1428 447 // SVCERR << "mapping from label \"" << p.label << "\" (count " << count << ") to value " << v << endl;
Chris@631 448 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 449 pointMap[p] = pp;
Chris@631 450 }
Chris@631 451
Chris@1113 452 for (map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 453 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@1113 454 // There could be duplicate regions; if so replace
Chris@1113 455 // them all -- but we need to check we're not
Chris@1113 456 // replacing a region by itself (or else this will
Chris@1113 457 // never terminate)
Chris@1113 458 if (i->first.value == i->second.value) {
Chris@1113 459 continue;
Chris@1113 460 }
Chris@1113 461 while (model2a->containsPoint(i->first)) {
Chris@1113 462 model2a->deletePoint(i->first);
Chris@1113 463 model2a->addPoint(i->second);
Chris@1113 464 }
Chris@631 465 }
Chris@631 466 }
Chris@631 467 }
Chris@631 468
Chris@897 469 if (model2b) {
Chris@897 470 if (pitchLooksLikeMIDI) {
Chris@897 471 model2b->setScaleUnits("MIDI Pitch");
Chris@897 472 } else {
Chris@897 473 model2b->setScaleUnits("Hz");
Chris@897 474 }
Chris@897 475 }
Chris@897 476
Chris@961 477 if (model3) {
Chris@1429 478 model3->setMinimumLevel(min);
Chris@1429 479 model3->setMaximumLevel(max);
Chris@148 480 }
Chris@148 481
Chris@148 482 return model;
Chris@148 483 }
Chris@148 484