annotate data/fileio/CSVFileReader.cpp @ 1298:a1af054d8f75 3.0-integration

Avoid being locale-specific in XSD parse
author Chris Cannam
date Fri, 25 Nov 2016 14:26:24 +0000
parents 815f82508f96
children 87ae75da6527
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@1030 29 #include <QFileInfo>
Chris@148 30 #include <QString>
Chris@148 31 #include <QRegExp>
Chris@148 32 #include <QStringList>
Chris@148 33 #include <QTextStream>
Chris@148 34
Chris@148 35 #include <iostream>
Chris@628 36 #include <map>
Chris@148 37
Chris@1113 38 using namespace std;
Chris@1113 39
Chris@392 40 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@1047 41 sv_samplerate_t mainModelSampleRate) :
Chris@392 42 m_format(format),
Chris@1009 43 m_device(0),
Chris@1009 44 m_ownDevice(true),
Chris@631 45 m_warnings(0),
Chris@148 46 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 47 {
Chris@1009 48 QFile *file = new QFile(path);
Chris@148 49 bool good = false;
Chris@148 50
Chris@1009 51 if (!file->exists()) {
Chris@148 52 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@1009 53 } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 54 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 55 } else {
Chris@148 56 good = true;
Chris@148 57 }
Chris@148 58
Chris@1009 59 if (good) {
Chris@1009 60 m_device = file;
Chris@1030 61 m_filename = QFileInfo(path).fileName();
Chris@1009 62 } else {
Chris@1009 63 delete file;
Chris@148 64 }
Chris@148 65 }
Chris@148 66
Chris@1009 67 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
Chris@1047 68 sv_samplerate_t mainModelSampleRate) :
Chris@1009 69 m_format(format),
Chris@1009 70 m_device(device),
Chris@1009 71 m_ownDevice(false),
Chris@1009 72 m_warnings(0),
Chris@1009 73 m_mainModelSampleRate(mainModelSampleRate)
Chris@1009 74 {
Chris@1009 75 }
Chris@1009 76
Chris@148 77 CSVFileReader::~CSVFileReader()
Chris@148 78 {
Chris@1009 79 SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
Chris@148 80
Chris@1009 81 if (m_device && m_ownDevice) {
Chris@1009 82 SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
Chris@1009 83 m_device->close();
Chris@1009 84 delete m_device;
Chris@148 85 }
Chris@148 86 }
Chris@148 87
Chris@148 88 bool
Chris@148 89 CSVFileReader::isOK() const
Chris@148 90 {
Chris@1009 91 return (m_device != 0);
Chris@148 92 }
Chris@148 93
Chris@148 94 QString
Chris@148 95 CSVFileReader::getError() const
Chris@148 96 {
Chris@148 97 return m_error;
Chris@148 98 }
Chris@148 99
Chris@1038 100 sv_frame_t
Chris@1047 101 CSVFileReader::convertTimeValue(QString s, int lineno,
Chris@1047 102 sv_samplerate_t sampleRate,
Chris@929 103 int windowSize) const
Chris@631 104 {
Chris@631 105 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 106 int warnLimit = 10;
Chris@631 107
Chris@631 108 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 109
Chris@1038 110 sv_frame_t calculatedFrame = 0;
Chris@631 111
Chris@631 112 bool ok = false;
Chris@631 113 QString numeric = s;
Chris@631 114 numeric.remove(nonNumericRx);
Chris@631 115
Chris@631 116 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 117
Chris@631 118 double time = numeric.toDouble(&ok);
Chris@631 119 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 120 calculatedFrame = sv_frame_t(time * sampleRate + 0.5);
Chris@990 121
Chris@990 122 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 123
Chris@990 124 double time = numeric.toDouble(&ok);
Chris@990 125 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 126 calculatedFrame = sv_frame_t((time / 1000.0) * sampleRate + 0.5);
Chris@631 127
Chris@631 128 } else {
Chris@631 129
Chris@631 130 long n = numeric.toLong(&ok);
Chris@631 131 if (n >= 0) calculatedFrame = n;
Chris@631 132
Chris@631 133 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 134 calculatedFrame *= windowSize;
Chris@631 135 }
Chris@631 136 }
Chris@631 137
Chris@631 138 if (!ok) {
Chris@631 139 if (m_warnings < warnLimit) {
Chris@843 140 cerr << "WARNING: CSVFileReader::load: "
Chris@844 141 << "Bad time format (\"" << s
Chris@631 142 << "\") in data line "
Chris@843 143 << lineno+1 << endl;
Chris@631 144 } else if (m_warnings == warnLimit) {
Chris@843 145 cerr << "WARNING: Too many warnings" << endl;
Chris@631 146 }
Chris@631 147 ++m_warnings;
Chris@631 148 }
Chris@631 149
Chris@631 150 return calculatedFrame;
Chris@631 151 }
Chris@631 152
Chris@148 153 Model *
Chris@148 154 CSVFileReader::load() const
Chris@148 155 {
Chris@1009 156 if (!m_device) return 0;
Chris@148 157
Chris@628 158 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 159 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 160 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@1047 161 sv_samplerate_t sampleRate = m_format.getSampleRate();
Chris@929 162 int windowSize = m_format.getWindowSize();
Chris@631 163 QChar separator = m_format.getSeparator();
Chris@631 164 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 165
Chris@392 166 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 167 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 168 // This will be overridden later if more than one line
Chris@611 169 // appears in our file, but we want to choose a default
Chris@611 170 // that's likely to be visible
Chris@611 171 windowSize = 1024;
Chris@611 172 } else {
Chris@611 173 windowSize = 1;
Chris@611 174 }
Chris@990 175 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 176 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@148 177 sampleRate = m_mainModelSampleRate;
Chris@148 178 }
Chris@148 179 }
Chris@148 180
Chris@148 181 SparseOneDimensionalModel *model1 = 0;
Chris@148 182 SparseTimeValueModel *model2 = 0;
Chris@628 183 RegionModel *model2a = 0;
Chris@897 184 NoteModel *model2b = 0;
Chris@152 185 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 186 Model *model = 0;
Chris@148 187
Chris@1009 188 QTextStream in(m_device);
Chris@148 189
Chris@148 190 unsigned int warnings = 0, warnLimit = 10;
Chris@148 191 unsigned int lineno = 0;
Chris@148 192
Chris@148 193 float min = 0.0, max = 0.0;
Chris@148 194
Chris@1038 195 sv_frame_t frameNo = 0;
Chris@1038 196 sv_frame_t duration = 0;
Chris@1038 197 sv_frame_t endFrame = 0;
Chris@631 198
Chris@631 199 bool haveAnyValue = false;
Chris@631 200 bool haveEndTime = false;
Chris@897 201 bool pitchLooksLikeMIDI = true;
Chris@631 202
Chris@1038 203 sv_frame_t startFrame = 0; // for calculation of dense model resolution
Chris@631 204 bool firstEverValue = true;
Chris@148 205
Chris@1113 206 map<QString, int> labelCountMap;
Chris@631 207
Chris@676 208 int valueColumns = 0;
Chris@676 209 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 210 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 211 ++valueColumns;
Chris@676 212 }
Chris@676 213 }
Chris@676 214
Chris@148 215 while (!in.atEnd()) {
Chris@148 216
Chris@283 217 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 218 // CR-only line endings. Why did they bother making the class
Chris@283 219 // cope with more than one sort of line ending, if it still
Chris@283 220 // can't be configured to cope with all the common sorts?
Chris@148 221
Chris@283 222 // For the time being we'll deal with this case (which is
Chris@283 223 // relatively uncommon for us, but still necessary to handle)
Chris@283 224 // by reading the entire file using a single readLine, and
Chris@283 225 // splitting it. For CR and CR/LF line endings this will just
Chris@283 226 // read a line at a time, and that's obviously OK.
Chris@148 227
Chris@283 228 QString chunk = in.readLine();
Chris@283 229 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 230
Chris@897 231 for (int li = 0; li < lines.size(); ++li) {
Chris@148 232
Chris@283 233 QString line = lines[li];
Chris@1009 234
Chris@283 235 if (line.startsWith("#")) continue;
Chris@283 236
Chris@631 237 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 238 if (!model) {
Chris@283 239
Chris@283 240 switch (modelType) {
Chris@283 241
Chris@392 242 case CSVFormat::OneDimensionalModel:
Chris@283 243 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 244 model = model1;
Chris@283 245 break;
Chris@148 246
Chris@392 247 case CSVFormat::TwoDimensionalModel:
Chris@283 248 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 249 model = model2;
Chris@283 250 break;
Chris@148 251
Chris@628 252 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 253 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 254 model = model2a;
Chris@628 255 break;
Chris@628 256
Chris@897 257 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 258 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 259 model = model2b;
Chris@897 260 break;
Chris@897 261
Chris@392 262 case CSVFormat::ThreeDimensionalModel:
Chris@535 263 model3 = new EditableDenseThreeDimensionalModel
Chris@535 264 (sampleRate,
Chris@535 265 windowSize,
Chris@676 266 valueColumns,
Chris@535 267 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 268 model = model3;
Chris@283 269 break;
Chris@283 270 }
Chris@1030 271
Chris@1030 272 if (model) {
Chris@1030 273 if (m_filename != "") {
Chris@1030 274 model->setObjectName(m_filename);
Chris@1030 275 }
Chris@1030 276 }
Chris@283 277 }
Chris@148 278
Chris@631 279 float value = 0.f;
Chris@897 280 float pitch = 0.f;
Chris@631 281 QString label = "";
Chris@148 282
Chris@631 283 duration = 0.f;
Chris@631 284 haveEndTime = false;
Chris@628 285
Chris@283 286 for (int i = 0; i < list.size(); ++i) {
Chris@148 287
Chris@631 288 QString s = list[i];
Chris@631 289
Chris@631 290 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 291
Chris@631 292 switch (purpose) {
Chris@631 293
Chris@631 294 case CSVFormat::ColumnUnknown:
Chris@631 295 break;
Chris@631 296
Chris@631 297 case CSVFormat::ColumnStartTime:
Chris@631 298 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 299 break;
Chris@631 300
Chris@631 301 case CSVFormat::ColumnEndTime:
Chris@631 302 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 303 haveEndTime = true;
Chris@631 304 break;
Chris@631 305
Chris@631 306 case CSVFormat::ColumnDuration:
Chris@631 307 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 308 break;
Chris@631 309
Chris@631 310 case CSVFormat::ColumnValue:
Chris@631 311 value = s.toFloat();
Chris@631 312 haveAnyValue = true;
Chris@631 313 break;
Chris@631 314
Chris@897 315 case CSVFormat::ColumnPitch:
Chris@897 316 pitch = s.toFloat();
Chris@897 317 if (pitch < 0.f || pitch > 127.f) {
Chris@897 318 pitchLooksLikeMIDI = false;
Chris@897 319 }
Chris@897 320 break;
Chris@897 321
Chris@631 322 case CSVFormat::ColumnLabel:
Chris@631 323 label = s;
Chris@631 324 break;
Chris@283 325 }
Chris@631 326 }
Chris@148 327
Chris@1113 328 ++labelCountMap[label];
Chris@1113 329
Chris@631 330 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 331 if (endFrame > frameNo) {
Chris@631 332 duration = endFrame - frameNo;
Chris@628 333 }
Chris@283 334 }
Chris@148 335
Chris@392 336 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 337
Chris@631 338 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 339 model1->addPoint(point);
Chris@148 340
Chris@392 341 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 342
Chris@631 343 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 344 model2->addPoint(point);
Chris@148 345
Chris@628 346 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 347
Chris@631 348 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 349 model2a->addPoint(point);
Chris@628 350
Chris@897 351 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 352
Chris@897 353 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 354 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 355 model2b->addPoint(point);
Chris@897 356
Chris@392 357 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 358
Chris@283 359 DenseThreeDimensionalModel::Column values;
Chris@148 360
Chris@631 361 for (int i = 0; i < list.size(); ++i) {
Chris@148 362
Chris@676 363 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 364 continue;
Chris@676 365 }
Chris@676 366
Chris@283 367 bool ok = false;
Chris@283 368 float value = list[i].toFloat(&ok);
Chris@611 369
Chris@676 370 values.push_back(value);
Chris@148 371
Chris@631 372 if (firstEverValue || value < min) min = value;
Chris@631 373 if (firstEverValue || value > max) max = value;
Chris@676 374
Chris@631 375 if (firstEverValue) {
Chris@611 376 startFrame = frameNo;
Chris@611 377 model3->setStartFrame(startFrame);
Chris@611 378 } else if (lineno == 1 &&
Chris@611 379 timingType == CSVFormat::ExplicitTiming) {
Chris@1038 380 model3->setResolution(int(frameNo - startFrame));
Chris@611 381 }
Chris@631 382
Chris@631 383 firstEverValue = false;
Chris@148 384
Chris@283 385 if (!ok) {
Chris@283 386 if (warnings < warnLimit) {
Chris@843 387 cerr << "WARNING: CSVFileReader::load: "
Chris@390 388 << "Non-numeric value \""
Chris@844 389 << list[i]
Chris@491 390 << "\" in data line " << lineno+1
Chris@843 391 << ":" << endl;
Chris@843 392 cerr << line << endl;
Chris@283 393 ++warnings;
Chris@283 394 } else if (warnings == warnLimit) {
Chris@843 395 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 396 }
Chris@283 397 }
Chris@283 398 }
Chris@148 399
Chris@690 400 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 401 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 402
Chris@611 403 model3->setColumn(lineno, values);
Chris@283 404 }
Chris@148 405
Chris@283 406 ++lineno;
Chris@392 407 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 408 list.size() == 0) {
Chris@283 409 frameNo += windowSize;
Chris@283 410 }
Chris@283 411 }
Chris@148 412 }
Chris@148 413
Chris@631 414 if (!haveAnyValue) {
Chris@631 415 if (model2a) {
Chris@631 416 // assign values for regions based on label frequency; we
Chris@631 417 // have this in our labelCountMap, sort of
Chris@631 418
Chris@1113 419 map<int, map<QString, float> > countLabelValueMap;
Chris@1113 420 for (map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 421 i != labelCountMap.end(); ++i) {
Chris@1113 422 countLabelValueMap[i->second][i->first] = -1.f;
Chris@631 423 }
Chris@631 424
Chris@631 425 float v = 0.f;
Chris@1113 426 for (map<int, map<QString, float> >::iterator i =
Chris@631 427 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 428 --i;
Chris@1113 429 cerr << "count -> " << i->first << endl;
Chris@1113 430 for (map<QString, float>::iterator j = i->second.begin();
Chris@631 431 j != i->second.end(); ++j) {
Chris@631 432 j->second = v;
Chris@1113 433 cerr << "label -> " << j->first << ", value " << v << endl;
Chris@631 434 v = v + 1.f;
Chris@631 435 }
Chris@631 436 }
Chris@631 437
Chris@1113 438 map<RegionModel::Point, RegionModel::Point,
Chris@631 439 RegionModel::Point::Comparator> pointMap;
Chris@631 440 for (RegionModel::PointList::const_iterator i =
Chris@631 441 model2a->getPoints().begin();
Chris@631 442 i != model2a->getPoints().end(); ++i) {
Chris@631 443 RegionModel::Point p(*i);
Chris@1113 444 int count = labelCountMap[p.label];
Chris@1113 445 v = countLabelValueMap[count][p.label];
Chris@1113 446 cerr << "mapping from label \"" << p.label << "\" (count " << count << ") to value " << v << endl;
Chris@631 447 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 448 pointMap[p] = pp;
Chris@631 449 }
Chris@631 450
Chris@1113 451 for (map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 452 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@1113 453 // There could be duplicate regions; if so replace
Chris@1113 454 // them all -- but we need to check we're not
Chris@1113 455 // replacing a region by itself (or else this will
Chris@1113 456 // never terminate)
Chris@1113 457 if (i->first.value == i->second.value) {
Chris@1113 458 continue;
Chris@1113 459 }
Chris@1113 460 while (model2a->containsPoint(i->first)) {
Chris@1113 461 model2a->deletePoint(i->first);
Chris@1113 462 model2a->addPoint(i->second);
Chris@1113 463 }
Chris@631 464 }
Chris@631 465 }
Chris@631 466 }
Chris@631 467
Chris@897 468 if (model2b) {
Chris@897 469 if (pitchLooksLikeMIDI) {
Chris@897 470 model2b->setScaleUnits("MIDI Pitch");
Chris@897 471 } else {
Chris@897 472 model2b->setScaleUnits("Hz");
Chris@897 473 }
Chris@897 474 }
Chris@897 475
Chris@961 476 if (model3) {
Chris@148 477 model3->setMinimumLevel(min);
Chris@148 478 model3->setMaximumLevel(max);
Chris@148 479 }
Chris@148 480
Chris@148 481 return model;
Chris@148 482 }
Chris@148 483