annotate data/fileio/CSVFileReader.cpp @ 1020:9c00e7944bf2

Merge
author Chris Cannam
date Thu, 27 Nov 2014 17:51:21 +0000
parents e369dd281cf2
children 920699b6989d
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@148 29 #include <QString>
Chris@148 30 #include <QRegExp>
Chris@148 31 #include <QStringList>
Chris@148 32 #include <QTextStream>
Chris@148 33
Chris@148 34 #include <iostream>
Chris@628 35 #include <map>
Chris@148 36
Chris@392 37 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@929 38 int mainModelSampleRate) :
Chris@392 39 m_format(format),
Chris@1009 40 m_device(0),
Chris@1009 41 m_ownDevice(true),
Chris@631 42 m_warnings(0),
Chris@148 43 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 44 {
Chris@1009 45 QFile *file = new QFile(path);
Chris@148 46 bool good = false;
Chris@148 47
Chris@1009 48 if (!file->exists()) {
Chris@148 49 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@1009 50 } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 51 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 52 } else {
Chris@148 53 good = true;
Chris@148 54 }
Chris@148 55
Chris@1009 56 if (good) {
Chris@1009 57 m_device = file;
Chris@1009 58 } else {
Chris@1009 59 delete file;
Chris@148 60 }
Chris@148 61 }
Chris@148 62
Chris@1009 63 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
Chris@1009 64 int mainModelSampleRate) :
Chris@1009 65 m_format(format),
Chris@1009 66 m_device(device),
Chris@1009 67 m_ownDevice(false),
Chris@1009 68 m_warnings(0),
Chris@1009 69 m_mainModelSampleRate(mainModelSampleRate)
Chris@1009 70 {
Chris@1009 71 }
Chris@1009 72
Chris@148 73 CSVFileReader::~CSVFileReader()
Chris@148 74 {
Chris@1009 75 SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
Chris@148 76
Chris@1009 77 if (m_device && m_ownDevice) {
Chris@1009 78 SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
Chris@1009 79 m_device->close();
Chris@1009 80 delete m_device;
Chris@148 81 }
Chris@148 82 }
Chris@148 83
Chris@148 84 bool
Chris@148 85 CSVFileReader::isOK() const
Chris@148 86 {
Chris@1009 87 return (m_device != 0);
Chris@148 88 }
Chris@148 89
Chris@148 90 QString
Chris@148 91 CSVFileReader::getError() const
Chris@148 92 {
Chris@148 93 return m_error;
Chris@148 94 }
Chris@148 95
Chris@929 96 int
Chris@929 97 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate,
Chris@929 98 int windowSize) const
Chris@631 99 {
Chris@631 100 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 101 int warnLimit = 10;
Chris@631 102
Chris@631 103 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 104
Chris@929 105 int calculatedFrame = 0;
Chris@631 106
Chris@631 107 bool ok = false;
Chris@631 108 QString numeric = s;
Chris@631 109 numeric.remove(nonNumericRx);
Chris@631 110
Chris@631 111 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 112
Chris@631 113 double time = numeric.toDouble(&ok);
Chris@631 114 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 115 calculatedFrame = int(time * sampleRate + 0.5);
Chris@990 116
Chris@990 117 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 118
Chris@990 119 double time = numeric.toDouble(&ok);
Chris@990 120 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@990 121 calculatedFrame = int((time / 1000.0) * sampleRate + 0.5);
Chris@631 122
Chris@631 123 } else {
Chris@631 124
Chris@631 125 long n = numeric.toLong(&ok);
Chris@631 126 if (n >= 0) calculatedFrame = n;
Chris@631 127
Chris@631 128 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 129 calculatedFrame *= windowSize;
Chris@631 130 }
Chris@631 131 }
Chris@631 132
Chris@631 133 if (!ok) {
Chris@631 134 if (m_warnings < warnLimit) {
Chris@843 135 cerr << "WARNING: CSVFileReader::load: "
Chris@844 136 << "Bad time format (\"" << s
Chris@631 137 << "\") in data line "
Chris@843 138 << lineno+1 << endl;
Chris@631 139 } else if (m_warnings == warnLimit) {
Chris@843 140 cerr << "WARNING: Too many warnings" << endl;
Chris@631 141 }
Chris@631 142 ++m_warnings;
Chris@631 143 }
Chris@631 144
Chris@631 145 return calculatedFrame;
Chris@631 146 }
Chris@631 147
Chris@148 148 Model *
Chris@148 149 CSVFileReader::load() const
Chris@148 150 {
Chris@1009 151 if (!m_device) return 0;
Chris@148 152
Chris@628 153 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 154 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 155 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@929 156 int sampleRate = m_format.getSampleRate();
Chris@929 157 int windowSize = m_format.getWindowSize();
Chris@631 158 QChar separator = m_format.getSeparator();
Chris@631 159 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 160
Chris@392 161 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 162 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 163 // This will be overridden later if more than one line
Chris@611 164 // appears in our file, but we want to choose a default
Chris@611 165 // that's likely to be visible
Chris@611 166 windowSize = 1024;
Chris@611 167 } else {
Chris@611 168 windowSize = 1;
Chris@611 169 }
Chris@990 170 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 171 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@148 172 sampleRate = m_mainModelSampleRate;
Chris@148 173 }
Chris@148 174 }
Chris@148 175
Chris@148 176 SparseOneDimensionalModel *model1 = 0;
Chris@148 177 SparseTimeValueModel *model2 = 0;
Chris@628 178 RegionModel *model2a = 0;
Chris@897 179 NoteModel *model2b = 0;
Chris@152 180 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 181 Model *model = 0;
Chris@148 182
Chris@1009 183 QTextStream in(m_device);
Chris@148 184
Chris@148 185 unsigned int warnings = 0, warnLimit = 10;
Chris@148 186 unsigned int lineno = 0;
Chris@148 187
Chris@148 188 float min = 0.0, max = 0.0;
Chris@148 189
Chris@929 190 int frameNo = 0;
Chris@929 191 int duration = 0;
Chris@929 192 int endFrame = 0;
Chris@631 193
Chris@631 194 bool haveAnyValue = false;
Chris@631 195 bool haveEndTime = false;
Chris@897 196 bool pitchLooksLikeMIDI = true;
Chris@631 197
Chris@929 198 int startFrame = 0; // for calculation of dense model resolution
Chris@631 199 bool firstEverValue = true;
Chris@148 200
Chris@631 201 std::map<QString, int> labelCountMap;
Chris@631 202
Chris@676 203 int valueColumns = 0;
Chris@676 204 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 205 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 206 ++valueColumns;
Chris@676 207 }
Chris@676 208 }
Chris@676 209
Chris@148 210 while (!in.atEnd()) {
Chris@148 211
Chris@283 212 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 213 // CR-only line endings. Why did they bother making the class
Chris@283 214 // cope with more than one sort of line ending, if it still
Chris@283 215 // can't be configured to cope with all the common sorts?
Chris@148 216
Chris@283 217 // For the time being we'll deal with this case (which is
Chris@283 218 // relatively uncommon for us, but still necessary to handle)
Chris@283 219 // by reading the entire file using a single readLine, and
Chris@283 220 // splitting it. For CR and CR/LF line endings this will just
Chris@283 221 // read a line at a time, and that's obviously OK.
Chris@148 222
Chris@283 223 QString chunk = in.readLine();
Chris@283 224 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 225
Chris@897 226 for (int li = 0; li < lines.size(); ++li) {
Chris@148 227
Chris@283 228 QString line = lines[li];
Chris@1009 229
Chris@283 230 if (line.startsWith("#")) continue;
Chris@283 231
Chris@631 232 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 233 if (!model) {
Chris@283 234
Chris@283 235 switch (modelType) {
Chris@283 236
Chris@392 237 case CSVFormat::OneDimensionalModel:
Chris@283 238 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 239 model = model1;
Chris@283 240 break;
Chris@148 241
Chris@392 242 case CSVFormat::TwoDimensionalModel:
Chris@283 243 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 244 model = model2;
Chris@283 245 break;
Chris@148 246
Chris@628 247 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 248 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 249 model = model2a;
Chris@628 250 break;
Chris@628 251
Chris@897 252 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 253 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 254 model = model2b;
Chris@897 255 break;
Chris@897 256
Chris@392 257 case CSVFormat::ThreeDimensionalModel:
Chris@535 258 model3 = new EditableDenseThreeDimensionalModel
Chris@535 259 (sampleRate,
Chris@535 260 windowSize,
Chris@676 261 valueColumns,
Chris@535 262 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 263 model = model3;
Chris@283 264 break;
Chris@283 265 }
Chris@283 266 }
Chris@148 267
Chris@631 268 float value = 0.f;
Chris@897 269 float pitch = 0.f;
Chris@631 270 QString label = "";
Chris@148 271
Chris@631 272 duration = 0.f;
Chris@631 273 haveEndTime = false;
Chris@628 274
Chris@283 275 for (int i = 0; i < list.size(); ++i) {
Chris@148 276
Chris@631 277 QString s = list[i];
Chris@631 278
Chris@631 279 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 280
Chris@631 281 switch (purpose) {
Chris@631 282
Chris@631 283 case CSVFormat::ColumnUnknown:
Chris@631 284 break;
Chris@631 285
Chris@631 286 case CSVFormat::ColumnStartTime:
Chris@631 287 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 288 break;
Chris@631 289
Chris@631 290 case CSVFormat::ColumnEndTime:
Chris@631 291 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 292 haveEndTime = true;
Chris@631 293 break;
Chris@631 294
Chris@631 295 case CSVFormat::ColumnDuration:
Chris@631 296 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 297 break;
Chris@631 298
Chris@631 299 case CSVFormat::ColumnValue:
Chris@631 300 value = s.toFloat();
Chris@631 301 haveAnyValue = true;
Chris@631 302 break;
Chris@631 303
Chris@897 304 case CSVFormat::ColumnPitch:
Chris@897 305 pitch = s.toFloat();
Chris@897 306 if (pitch < 0.f || pitch > 127.f) {
Chris@897 307 pitchLooksLikeMIDI = false;
Chris@897 308 }
Chris@897 309 break;
Chris@897 310
Chris@631 311 case CSVFormat::ColumnLabel:
Chris@631 312 label = s;
Chris@631 313 ++labelCountMap[label];
Chris@631 314 break;
Chris@283 315 }
Chris@631 316 }
Chris@148 317
Chris@631 318 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 319 if (endFrame > frameNo) {
Chris@631 320 duration = endFrame - frameNo;
Chris@628 321 }
Chris@283 322 }
Chris@148 323
Chris@392 324 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 325
Chris@631 326 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 327 model1->addPoint(point);
Chris@148 328
Chris@392 329 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 330
Chris@631 331 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 332 model2->addPoint(point);
Chris@148 333
Chris@628 334 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 335
Chris@631 336 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 337 model2a->addPoint(point);
Chris@628 338
Chris@897 339 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 340
Chris@897 341 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 342 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 343 model2b->addPoint(point);
Chris@897 344
Chris@392 345 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 346
Chris@283 347 DenseThreeDimensionalModel::Column values;
Chris@148 348
Chris@631 349 for (int i = 0; i < list.size(); ++i) {
Chris@148 350
Chris@676 351 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 352 continue;
Chris@676 353 }
Chris@676 354
Chris@283 355 bool ok = false;
Chris@283 356 float value = list[i].toFloat(&ok);
Chris@611 357
Chris@676 358 values.push_back(value);
Chris@148 359
Chris@631 360 if (firstEverValue || value < min) min = value;
Chris@631 361 if (firstEverValue || value > max) max = value;
Chris@676 362
Chris@631 363 if (firstEverValue) {
Chris@611 364 startFrame = frameNo;
Chris@611 365 model3->setStartFrame(startFrame);
Chris@611 366 } else if (lineno == 1 &&
Chris@611 367 timingType == CSVFormat::ExplicitTiming) {
Chris@611 368 model3->setResolution(frameNo - startFrame);
Chris@611 369 }
Chris@631 370
Chris@631 371 firstEverValue = false;
Chris@148 372
Chris@283 373 if (!ok) {
Chris@283 374 if (warnings < warnLimit) {
Chris@843 375 cerr << "WARNING: CSVFileReader::load: "
Chris@390 376 << "Non-numeric value \""
Chris@844 377 << list[i]
Chris@491 378 << "\" in data line " << lineno+1
Chris@843 379 << ":" << endl;
Chris@843 380 cerr << line << endl;
Chris@283 381 ++warnings;
Chris@283 382 } else if (warnings == warnLimit) {
Chris@843 383 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 384 }
Chris@283 385 }
Chris@283 386 }
Chris@148 387
Chris@690 388 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 389 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 390
Chris@611 391 model3->setColumn(lineno, values);
Chris@283 392 }
Chris@148 393
Chris@283 394 ++lineno;
Chris@392 395 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 396 list.size() == 0) {
Chris@283 397 frameNo += windowSize;
Chris@283 398 }
Chris@283 399 }
Chris@148 400 }
Chris@148 401
Chris@631 402 if (!haveAnyValue) {
Chris@631 403 if (model2a) {
Chris@631 404 // assign values for regions based on label frequency; we
Chris@631 405 // have this in our labelCountMap, sort of
Chris@631 406
Chris@631 407 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 408 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 409 i != labelCountMap.end(); ++i) {
Chris@631 410 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 411 }
Chris@631 412
Chris@631 413 float v = 0.f;
Chris@631 414 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 415 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 416 --i;
Chris@631 417 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 418 j != i->second.end(); ++j) {
Chris@631 419 j->second = v;
Chris@631 420 v = v + 1.f;
Chris@631 421 }
Chris@631 422 }
Chris@631 423
Chris@631 424 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 425 RegionModel::Point::Comparator> pointMap;
Chris@631 426 for (RegionModel::PointList::const_iterator i =
Chris@631 427 model2a->getPoints().begin();
Chris@631 428 i != model2a->getPoints().end(); ++i) {
Chris@631 429 RegionModel::Point p(*i);
Chris@631 430 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 431 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 432 pointMap[p] = pp;
Chris@631 433 }
Chris@631 434
Chris@631 435 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 436 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 437 model2a->deletePoint(i->first);
Chris@631 438 model2a->addPoint(i->second);
Chris@631 439 }
Chris@631 440 }
Chris@631 441 }
Chris@631 442
Chris@897 443 if (model2b) {
Chris@897 444 if (pitchLooksLikeMIDI) {
Chris@897 445 model2b->setScaleUnits("MIDI Pitch");
Chris@897 446 } else {
Chris@897 447 model2b->setScaleUnits("Hz");
Chris@897 448 }
Chris@897 449 }
Chris@897 450
Chris@961 451 if (model3) {
Chris@148 452 model3->setMinimumLevel(min);
Chris@148 453 model3->setMaximumLevel(max);
Chris@148 454 }
Chris@148 455
Chris@148 456 return model;
Chris@148 457 }
Chris@148 458