annotate data/fileio/CSVFileReader.cpp @ 1072:882d448c8a6d

Fix #1222 CSV export of time-instants layer omits last point
author Chris Cannam
date Fri, 15 May 2015 09:15:57 +0100
parents 26cf6d5251ec
children ed207f89aaef e22bfe8ca248
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@1030 29 #include <QFileInfo>
Chris@148 30 #include <QString>
Chris@148 31 #include <QRegExp>
Chris@148 32 #include <QStringList>
Chris@148 33 #include <QTextStream>
Chris@148 34
Chris@148 35 #include <iostream>
Chris@628 36 #include <map>
Chris@148 37
Chris@392 38 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@1047 39 sv_samplerate_t mainModelSampleRate) :
Chris@392 40 m_format(format),
Chris@1009 41 m_device(0),
Chris@1009 42 m_ownDevice(true),
Chris@631 43 m_warnings(0),
Chris@148 44 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 45 {
Chris@1009 46 QFile *file = new QFile(path);
Chris@148 47 bool good = false;
Chris@148 48
Chris@1009 49 if (!file->exists()) {
Chris@148 50 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@1009 51 } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 52 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 53 } else {
Chris@148 54 good = true;
Chris@148 55 }
Chris@148 56
Chris@1009 57 if (good) {
Chris@1009 58 m_device = file;
Chris@1030 59 m_filename = QFileInfo(path).fileName();
Chris@1009 60 } else {
Chris@1009 61 delete file;
Chris@148 62 }
Chris@148 63 }
Chris@148 64
Chris@1009 65 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
Chris@1047 66 sv_samplerate_t mainModelSampleRate) :
Chris@1009 67 m_format(format),
Chris@1009 68 m_device(device),
Chris@1009 69 m_ownDevice(false),
Chris@1009 70 m_warnings(0),
Chris@1009 71 m_mainModelSampleRate(mainModelSampleRate)
Chris@1009 72 {
Chris@1009 73 }
Chris@1009 74
Chris@148 75 CSVFileReader::~CSVFileReader()
Chris@148 76 {
Chris@1009 77 SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
Chris@148 78
Chris@1009 79 if (m_device && m_ownDevice) {
Chris@1009 80 SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
Chris@1009 81 m_device->close();
Chris@1009 82 delete m_device;
Chris@148 83 }
Chris@148 84 }
Chris@148 85
Chris@148 86 bool
Chris@148 87 CSVFileReader::isOK() const
Chris@148 88 {
Chris@1009 89 return (m_device != 0);
Chris@148 90 }
Chris@148 91
Chris@148 92 QString
Chris@148 93 CSVFileReader::getError() const
Chris@148 94 {
Chris@148 95 return m_error;
Chris@148 96 }
Chris@148 97
Chris@1038 98 sv_frame_t
Chris@1047 99 CSVFileReader::convertTimeValue(QString s, int lineno,
Chris@1047 100 sv_samplerate_t sampleRate,
Chris@929 101 int windowSize) const
Chris@631 102 {
Chris@631 103 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 104 int warnLimit = 10;
Chris@631 105
Chris@631 106 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 107
Chris@1038 108 sv_frame_t calculatedFrame = 0;
Chris@631 109
Chris@631 110 bool ok = false;
Chris@631 111 QString numeric = s;
Chris@631 112 numeric.remove(nonNumericRx);
Chris@631 113
Chris@631 114 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 115
Chris@631 116 double time = numeric.toDouble(&ok);
Chris@631 117 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 118 calculatedFrame = sv_frame_t(time * sampleRate + 0.5);
Chris@990 119
Chris@990 120 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 121
Chris@990 122 double time = numeric.toDouble(&ok);
Chris@990 123 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@1038 124 calculatedFrame = sv_frame_t((time / 1000.0) * sampleRate + 0.5);
Chris@631 125
Chris@631 126 } else {
Chris@631 127
Chris@631 128 long n = numeric.toLong(&ok);
Chris@631 129 if (n >= 0) calculatedFrame = n;
Chris@631 130
Chris@631 131 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 132 calculatedFrame *= windowSize;
Chris@631 133 }
Chris@631 134 }
Chris@631 135
Chris@631 136 if (!ok) {
Chris@631 137 if (m_warnings < warnLimit) {
Chris@843 138 cerr << "WARNING: CSVFileReader::load: "
Chris@844 139 << "Bad time format (\"" << s
Chris@631 140 << "\") in data line "
Chris@843 141 << lineno+1 << endl;
Chris@631 142 } else if (m_warnings == warnLimit) {
Chris@843 143 cerr << "WARNING: Too many warnings" << endl;
Chris@631 144 }
Chris@631 145 ++m_warnings;
Chris@631 146 }
Chris@631 147
Chris@631 148 return calculatedFrame;
Chris@631 149 }
Chris@631 150
Chris@148 151 Model *
Chris@148 152 CSVFileReader::load() const
Chris@148 153 {
Chris@1009 154 if (!m_device) return 0;
Chris@148 155
Chris@628 156 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 157 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 158 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@1047 159 sv_samplerate_t sampleRate = m_format.getSampleRate();
Chris@929 160 int windowSize = m_format.getWindowSize();
Chris@631 161 QChar separator = m_format.getSeparator();
Chris@631 162 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 163
Chris@392 164 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 165 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 166 // This will be overridden later if more than one line
Chris@611 167 // appears in our file, but we want to choose a default
Chris@611 168 // that's likely to be visible
Chris@611 169 windowSize = 1024;
Chris@611 170 } else {
Chris@611 171 windowSize = 1;
Chris@611 172 }
Chris@990 173 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 174 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@148 175 sampleRate = m_mainModelSampleRate;
Chris@148 176 }
Chris@148 177 }
Chris@148 178
Chris@148 179 SparseOneDimensionalModel *model1 = 0;
Chris@148 180 SparseTimeValueModel *model2 = 0;
Chris@628 181 RegionModel *model2a = 0;
Chris@897 182 NoteModel *model2b = 0;
Chris@152 183 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 184 Model *model = 0;
Chris@148 185
Chris@1009 186 QTextStream in(m_device);
Chris@148 187
Chris@148 188 unsigned int warnings = 0, warnLimit = 10;
Chris@148 189 unsigned int lineno = 0;
Chris@148 190
Chris@148 191 float min = 0.0, max = 0.0;
Chris@148 192
Chris@1038 193 sv_frame_t frameNo = 0;
Chris@1038 194 sv_frame_t duration = 0;
Chris@1038 195 sv_frame_t endFrame = 0;
Chris@631 196
Chris@631 197 bool haveAnyValue = false;
Chris@631 198 bool haveEndTime = false;
Chris@897 199 bool pitchLooksLikeMIDI = true;
Chris@631 200
Chris@1038 201 sv_frame_t startFrame = 0; // for calculation of dense model resolution
Chris@631 202 bool firstEverValue = true;
Chris@148 203
Chris@631 204 std::map<QString, int> labelCountMap;
Chris@631 205
Chris@676 206 int valueColumns = 0;
Chris@676 207 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 208 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 209 ++valueColumns;
Chris@676 210 }
Chris@676 211 }
Chris@676 212
Chris@148 213 while (!in.atEnd()) {
Chris@148 214
Chris@283 215 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 216 // CR-only line endings. Why did they bother making the class
Chris@283 217 // cope with more than one sort of line ending, if it still
Chris@283 218 // can't be configured to cope with all the common sorts?
Chris@148 219
Chris@283 220 // For the time being we'll deal with this case (which is
Chris@283 221 // relatively uncommon for us, but still necessary to handle)
Chris@283 222 // by reading the entire file using a single readLine, and
Chris@283 223 // splitting it. For CR and CR/LF line endings this will just
Chris@283 224 // read a line at a time, and that's obviously OK.
Chris@148 225
Chris@283 226 QString chunk = in.readLine();
Chris@283 227 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 228
Chris@897 229 for (int li = 0; li < lines.size(); ++li) {
Chris@148 230
Chris@283 231 QString line = lines[li];
Chris@1009 232
Chris@283 233 if (line.startsWith("#")) continue;
Chris@283 234
Chris@631 235 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 236 if (!model) {
Chris@283 237
Chris@283 238 switch (modelType) {
Chris@283 239
Chris@392 240 case CSVFormat::OneDimensionalModel:
Chris@283 241 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 242 model = model1;
Chris@283 243 break;
Chris@148 244
Chris@392 245 case CSVFormat::TwoDimensionalModel:
Chris@283 246 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 247 model = model2;
Chris@283 248 break;
Chris@148 249
Chris@628 250 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 251 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 252 model = model2a;
Chris@628 253 break;
Chris@628 254
Chris@897 255 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 256 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 257 model = model2b;
Chris@897 258 break;
Chris@897 259
Chris@392 260 case CSVFormat::ThreeDimensionalModel:
Chris@535 261 model3 = new EditableDenseThreeDimensionalModel
Chris@535 262 (sampleRate,
Chris@535 263 windowSize,
Chris@676 264 valueColumns,
Chris@535 265 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 266 model = model3;
Chris@283 267 break;
Chris@283 268 }
Chris@1030 269
Chris@1030 270 if (model) {
Chris@1030 271 if (m_filename != "") {
Chris@1030 272 model->setObjectName(m_filename);
Chris@1030 273 }
Chris@1030 274 }
Chris@283 275 }
Chris@148 276
Chris@631 277 float value = 0.f;
Chris@897 278 float pitch = 0.f;
Chris@631 279 QString label = "";
Chris@148 280
Chris@631 281 duration = 0.f;
Chris@631 282 haveEndTime = false;
Chris@628 283
Chris@283 284 for (int i = 0; i < list.size(); ++i) {
Chris@148 285
Chris@631 286 QString s = list[i];
Chris@631 287
Chris@631 288 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 289
Chris@631 290 switch (purpose) {
Chris@631 291
Chris@631 292 case CSVFormat::ColumnUnknown:
Chris@631 293 break;
Chris@631 294
Chris@631 295 case CSVFormat::ColumnStartTime:
Chris@631 296 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 297 break;
Chris@631 298
Chris@631 299 case CSVFormat::ColumnEndTime:
Chris@631 300 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 301 haveEndTime = true;
Chris@631 302 break;
Chris@631 303
Chris@631 304 case CSVFormat::ColumnDuration:
Chris@631 305 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 306 break;
Chris@631 307
Chris@631 308 case CSVFormat::ColumnValue:
Chris@631 309 value = s.toFloat();
Chris@631 310 haveAnyValue = true;
Chris@631 311 break;
Chris@631 312
Chris@897 313 case CSVFormat::ColumnPitch:
Chris@897 314 pitch = s.toFloat();
Chris@897 315 if (pitch < 0.f || pitch > 127.f) {
Chris@897 316 pitchLooksLikeMIDI = false;
Chris@897 317 }
Chris@897 318 break;
Chris@897 319
Chris@631 320 case CSVFormat::ColumnLabel:
Chris@631 321 label = s;
Chris@631 322 ++labelCountMap[label];
Chris@631 323 break;
Chris@283 324 }
Chris@631 325 }
Chris@148 326
Chris@631 327 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 328 if (endFrame > frameNo) {
Chris@631 329 duration = endFrame - frameNo;
Chris@628 330 }
Chris@283 331 }
Chris@148 332
Chris@392 333 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 334
Chris@631 335 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 336 model1->addPoint(point);
Chris@148 337
Chris@392 338 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 339
Chris@631 340 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 341 model2->addPoint(point);
Chris@148 342
Chris@628 343 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 344
Chris@631 345 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 346 model2a->addPoint(point);
Chris@628 347
Chris@897 348 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 349
Chris@897 350 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 351 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 352 model2b->addPoint(point);
Chris@897 353
Chris@392 354 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 355
Chris@283 356 DenseThreeDimensionalModel::Column values;
Chris@148 357
Chris@631 358 for (int i = 0; i < list.size(); ++i) {
Chris@148 359
Chris@676 360 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 361 continue;
Chris@676 362 }
Chris@676 363
Chris@283 364 bool ok = false;
Chris@283 365 float value = list[i].toFloat(&ok);
Chris@611 366
Chris@676 367 values.push_back(value);
Chris@148 368
Chris@631 369 if (firstEverValue || value < min) min = value;
Chris@631 370 if (firstEverValue || value > max) max = value;
Chris@676 371
Chris@631 372 if (firstEverValue) {
Chris@611 373 startFrame = frameNo;
Chris@611 374 model3->setStartFrame(startFrame);
Chris@611 375 } else if (lineno == 1 &&
Chris@611 376 timingType == CSVFormat::ExplicitTiming) {
Chris@1038 377 model3->setResolution(int(frameNo - startFrame));
Chris@611 378 }
Chris@631 379
Chris@631 380 firstEverValue = false;
Chris@148 381
Chris@283 382 if (!ok) {
Chris@283 383 if (warnings < warnLimit) {
Chris@843 384 cerr << "WARNING: CSVFileReader::load: "
Chris@390 385 << "Non-numeric value \""
Chris@844 386 << list[i]
Chris@491 387 << "\" in data line " << lineno+1
Chris@843 388 << ":" << endl;
Chris@843 389 cerr << line << endl;
Chris@283 390 ++warnings;
Chris@283 391 } else if (warnings == warnLimit) {
Chris@843 392 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 393 }
Chris@283 394 }
Chris@283 395 }
Chris@148 396
Chris@690 397 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 398 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 399
Chris@611 400 model3->setColumn(lineno, values);
Chris@283 401 }
Chris@148 402
Chris@283 403 ++lineno;
Chris@392 404 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 405 list.size() == 0) {
Chris@283 406 frameNo += windowSize;
Chris@283 407 }
Chris@283 408 }
Chris@148 409 }
Chris@148 410
Chris@631 411 if (!haveAnyValue) {
Chris@631 412 if (model2a) {
Chris@631 413 // assign values for regions based on label frequency; we
Chris@631 414 // have this in our labelCountMap, sort of
Chris@631 415
Chris@631 416 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 417 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 418 i != labelCountMap.end(); ++i) {
Chris@631 419 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 420 }
Chris@631 421
Chris@631 422 float v = 0.f;
Chris@631 423 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 424 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 425 --i;
Chris@631 426 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 427 j != i->second.end(); ++j) {
Chris@631 428 j->second = v;
Chris@631 429 v = v + 1.f;
Chris@631 430 }
Chris@631 431 }
Chris@631 432
Chris@631 433 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 434 RegionModel::Point::Comparator> pointMap;
Chris@631 435 for (RegionModel::PointList::const_iterator i =
Chris@631 436 model2a->getPoints().begin();
Chris@631 437 i != model2a->getPoints().end(); ++i) {
Chris@631 438 RegionModel::Point p(*i);
Chris@631 439 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 440 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 441 pointMap[p] = pp;
Chris@631 442 }
Chris@631 443
Chris@631 444 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 445 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 446 model2a->deletePoint(i->first);
Chris@631 447 model2a->addPoint(i->second);
Chris@631 448 }
Chris@631 449 }
Chris@631 450 }
Chris@631 451
Chris@897 452 if (model2b) {
Chris@897 453 if (pitchLooksLikeMIDI) {
Chris@897 454 model2b->setScaleUnits("MIDI Pitch");
Chris@897 455 } else {
Chris@897 456 model2b->setScaleUnits("Hz");
Chris@897 457 }
Chris@897 458 }
Chris@897 459
Chris@961 460 if (model3) {
Chris@148 461 model3->setMinimumLevel(min);
Chris@148 462 model3->setMaximumLevel(max);
Chris@148 463 }
Chris@148 464
Chris@148 465 return model;
Chris@148 466 }
Chris@148 467