annotate data/fileio/CSVFileReader.cpp @ 1008:d9e0e59a1581

When using an aggregate model to pass data to a transform, zero-pad the shorter input to the duration of the longer rather than truncating the longer. (This is better behaviour for e.g. MATCH, and in any case the code was previously truncating incorrectly and ending up with garbage data at the end.)
author Chris Cannam
date Fri, 14 Nov 2014 13:51:33 +0000
parents dc1695b90a58
children e369dd281cf2
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@148 29 #include <QString>
Chris@148 30 #include <QRegExp>
Chris@148 31 #include <QStringList>
Chris@148 32 #include <QTextStream>
Chris@148 33
Chris@148 34 #include <iostream>
Chris@628 35 #include <map>
Chris@148 36
Chris@392 37 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@929 38 int mainModelSampleRate) :
Chris@392 39 m_format(format),
Chris@148 40 m_file(0),
Chris@631 41 m_warnings(0),
Chris@148 42 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 43 {
Chris@148 44 m_file = new QFile(path);
Chris@148 45 bool good = false;
Chris@148 46
Chris@148 47 if (!m_file->exists()) {
Chris@148 48 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 49 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 50 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 51 } else {
Chris@148 52 good = true;
Chris@148 53 }
Chris@148 54
Chris@148 55 if (!good) {
Chris@148 56 delete m_file;
Chris@148 57 m_file = 0;
Chris@148 58 }
Chris@148 59 }
Chris@148 60
Chris@148 61 CSVFileReader::~CSVFileReader()
Chris@148 62 {
Chris@690 63 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
Chris@148 64
Chris@148 65 if (m_file) {
Chris@690 66 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
Chris@148 67 m_file->close();
Chris@148 68 }
Chris@148 69 delete m_file;
Chris@148 70 }
Chris@148 71
Chris@148 72 bool
Chris@148 73 CSVFileReader::isOK() const
Chris@148 74 {
Chris@148 75 return (m_file != 0);
Chris@148 76 }
Chris@148 77
Chris@148 78 QString
Chris@148 79 CSVFileReader::getError() const
Chris@148 80 {
Chris@148 81 return m_error;
Chris@148 82 }
Chris@148 83
Chris@929 84 int
Chris@929 85 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate,
Chris@929 86 int windowSize) const
Chris@631 87 {
Chris@631 88 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 89 int warnLimit = 10;
Chris@631 90
Chris@631 91 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 92
Chris@929 93 int calculatedFrame = 0;
Chris@631 94
Chris@631 95 bool ok = false;
Chris@631 96 QString numeric = s;
Chris@631 97 numeric.remove(nonNumericRx);
Chris@631 98
Chris@631 99 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 100
Chris@631 101 double time = numeric.toDouble(&ok);
Chris@631 102 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 103 calculatedFrame = int(time * sampleRate + 0.5);
Chris@990 104
Chris@990 105 } else if (timeUnits == CSVFormat::TimeMilliseconds) {
Chris@990 106
Chris@990 107 double time = numeric.toDouble(&ok);
Chris@990 108 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@990 109 calculatedFrame = int((time / 1000.0) * sampleRate + 0.5);
Chris@631 110
Chris@631 111 } else {
Chris@631 112
Chris@631 113 long n = numeric.toLong(&ok);
Chris@631 114 if (n >= 0) calculatedFrame = n;
Chris@631 115
Chris@631 116 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 117 calculatedFrame *= windowSize;
Chris@631 118 }
Chris@631 119 }
Chris@631 120
Chris@631 121 if (!ok) {
Chris@631 122 if (m_warnings < warnLimit) {
Chris@843 123 cerr << "WARNING: CSVFileReader::load: "
Chris@844 124 << "Bad time format (\"" << s
Chris@631 125 << "\") in data line "
Chris@843 126 << lineno+1 << endl;
Chris@631 127 } else if (m_warnings == warnLimit) {
Chris@843 128 cerr << "WARNING: Too many warnings" << endl;
Chris@631 129 }
Chris@631 130 ++m_warnings;
Chris@631 131 }
Chris@631 132
Chris@631 133 return calculatedFrame;
Chris@631 134 }
Chris@631 135
Chris@148 136 Model *
Chris@148 137 CSVFileReader::load() const
Chris@148 138 {
Chris@148 139 if (!m_file) return 0;
Chris@148 140
Chris@628 141 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 142 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 143 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@929 144 int sampleRate = m_format.getSampleRate();
Chris@929 145 int windowSize = m_format.getWindowSize();
Chris@631 146 QChar separator = m_format.getSeparator();
Chris@631 147 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 148
Chris@392 149 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 150 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 151 // This will be overridden later if more than one line
Chris@611 152 // appears in our file, but we want to choose a default
Chris@611 153 // that's likely to be visible
Chris@611 154 windowSize = 1024;
Chris@611 155 } else {
Chris@611 156 windowSize = 1;
Chris@611 157 }
Chris@990 158 if (timeUnits == CSVFormat::TimeSeconds ||
Chris@990 159 timeUnits == CSVFormat::TimeMilliseconds) {
Chris@148 160 sampleRate = m_mainModelSampleRate;
Chris@148 161 }
Chris@148 162 }
Chris@148 163
Chris@148 164 SparseOneDimensionalModel *model1 = 0;
Chris@148 165 SparseTimeValueModel *model2 = 0;
Chris@628 166 RegionModel *model2a = 0;
Chris@897 167 NoteModel *model2b = 0;
Chris@152 168 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 169 Model *model = 0;
Chris@148 170
Chris@148 171 QTextStream in(m_file);
Chris@148 172 in.seek(0);
Chris@148 173
Chris@148 174 unsigned int warnings = 0, warnLimit = 10;
Chris@148 175 unsigned int lineno = 0;
Chris@148 176
Chris@148 177 float min = 0.0, max = 0.0;
Chris@148 178
Chris@929 179 int frameNo = 0;
Chris@929 180 int duration = 0;
Chris@929 181 int endFrame = 0;
Chris@631 182
Chris@631 183 bool haveAnyValue = false;
Chris@631 184 bool haveEndTime = false;
Chris@897 185 bool pitchLooksLikeMIDI = true;
Chris@631 186
Chris@929 187 int startFrame = 0; // for calculation of dense model resolution
Chris@631 188 bool firstEverValue = true;
Chris@148 189
Chris@631 190 std::map<QString, int> labelCountMap;
Chris@631 191
Chris@676 192 int valueColumns = 0;
Chris@676 193 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 194 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 195 ++valueColumns;
Chris@676 196 }
Chris@676 197 }
Chris@676 198
Chris@148 199 while (!in.atEnd()) {
Chris@148 200
Chris@283 201 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 202 // CR-only line endings. Why did they bother making the class
Chris@283 203 // cope with more than one sort of line ending, if it still
Chris@283 204 // can't be configured to cope with all the common sorts?
Chris@148 205
Chris@283 206 // For the time being we'll deal with this case (which is
Chris@283 207 // relatively uncommon for us, but still necessary to handle)
Chris@283 208 // by reading the entire file using a single readLine, and
Chris@283 209 // splitting it. For CR and CR/LF line endings this will just
Chris@283 210 // read a line at a time, and that's obviously OK.
Chris@148 211
Chris@283 212 QString chunk = in.readLine();
Chris@283 213 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 214
Chris@897 215 for (int li = 0; li < lines.size(); ++li) {
Chris@148 216
Chris@283 217 QString line = lines[li];
Chris@148 218
Chris@283 219 if (line.startsWith("#")) continue;
Chris@283 220
Chris@631 221 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 222 if (!model) {
Chris@283 223
Chris@283 224 switch (modelType) {
Chris@283 225
Chris@392 226 case CSVFormat::OneDimensionalModel:
Chris@283 227 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 228 model = model1;
Chris@283 229 break;
Chris@148 230
Chris@392 231 case CSVFormat::TwoDimensionalModel:
Chris@283 232 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 233 model = model2;
Chris@283 234 break;
Chris@148 235
Chris@628 236 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 237 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 238 model = model2a;
Chris@628 239 break;
Chris@628 240
Chris@897 241 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 242 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 243 model = model2b;
Chris@897 244 break;
Chris@897 245
Chris@392 246 case CSVFormat::ThreeDimensionalModel:
Chris@535 247 model3 = new EditableDenseThreeDimensionalModel
Chris@535 248 (sampleRate,
Chris@535 249 windowSize,
Chris@676 250 valueColumns,
Chris@535 251 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 252 model = model3;
Chris@283 253 break;
Chris@283 254 }
Chris@283 255 }
Chris@148 256
Chris@631 257 float value = 0.f;
Chris@897 258 float pitch = 0.f;
Chris@631 259 QString label = "";
Chris@148 260
Chris@631 261 duration = 0.f;
Chris@631 262 haveEndTime = false;
Chris@628 263
Chris@283 264 for (int i = 0; i < list.size(); ++i) {
Chris@148 265
Chris@631 266 QString s = list[i];
Chris@631 267
Chris@631 268 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 269
Chris@631 270 switch (purpose) {
Chris@631 271
Chris@631 272 case CSVFormat::ColumnUnknown:
Chris@631 273 break;
Chris@631 274
Chris@631 275 case CSVFormat::ColumnStartTime:
Chris@631 276 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 277 break;
Chris@631 278
Chris@631 279 case CSVFormat::ColumnEndTime:
Chris@631 280 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 281 haveEndTime = true;
Chris@631 282 break;
Chris@631 283
Chris@631 284 case CSVFormat::ColumnDuration:
Chris@631 285 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 286 break;
Chris@631 287
Chris@631 288 case CSVFormat::ColumnValue:
Chris@631 289 value = s.toFloat();
Chris@631 290 haveAnyValue = true;
Chris@631 291 break;
Chris@631 292
Chris@897 293 case CSVFormat::ColumnPitch:
Chris@897 294 pitch = s.toFloat();
Chris@897 295 if (pitch < 0.f || pitch > 127.f) {
Chris@897 296 pitchLooksLikeMIDI = false;
Chris@897 297 }
Chris@897 298 break;
Chris@897 299
Chris@631 300 case CSVFormat::ColumnLabel:
Chris@631 301 label = s;
Chris@631 302 ++labelCountMap[label];
Chris@631 303 break;
Chris@283 304 }
Chris@631 305 }
Chris@148 306
Chris@631 307 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 308 if (endFrame > frameNo) {
Chris@631 309 duration = endFrame - frameNo;
Chris@628 310 }
Chris@283 311 }
Chris@148 312
Chris@392 313 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 314
Chris@631 315 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 316 model1->addPoint(point);
Chris@148 317
Chris@392 318 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 319
Chris@631 320 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 321 model2->addPoint(point);
Chris@148 322
Chris@628 323 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 324
Chris@631 325 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 326 model2a->addPoint(point);
Chris@628 327
Chris@897 328 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 329
Chris@897 330 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 331 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 332 model2b->addPoint(point);
Chris@897 333
Chris@392 334 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 335
Chris@283 336 DenseThreeDimensionalModel::Column values;
Chris@148 337
Chris@631 338 for (int i = 0; i < list.size(); ++i) {
Chris@148 339
Chris@676 340 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 341 continue;
Chris@676 342 }
Chris@676 343
Chris@283 344 bool ok = false;
Chris@283 345 float value = list[i].toFloat(&ok);
Chris@611 346
Chris@676 347 values.push_back(value);
Chris@148 348
Chris@631 349 if (firstEverValue || value < min) min = value;
Chris@631 350 if (firstEverValue || value > max) max = value;
Chris@676 351
Chris@631 352 if (firstEverValue) {
Chris@611 353 startFrame = frameNo;
Chris@611 354 model3->setStartFrame(startFrame);
Chris@611 355 } else if (lineno == 1 &&
Chris@611 356 timingType == CSVFormat::ExplicitTiming) {
Chris@611 357 model3->setResolution(frameNo - startFrame);
Chris@611 358 }
Chris@631 359
Chris@631 360 firstEverValue = false;
Chris@148 361
Chris@283 362 if (!ok) {
Chris@283 363 if (warnings < warnLimit) {
Chris@843 364 cerr << "WARNING: CSVFileReader::load: "
Chris@390 365 << "Non-numeric value \""
Chris@844 366 << list[i]
Chris@491 367 << "\" in data line " << lineno+1
Chris@843 368 << ":" << endl;
Chris@843 369 cerr << line << endl;
Chris@283 370 ++warnings;
Chris@283 371 } else if (warnings == warnLimit) {
Chris@843 372 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 373 }
Chris@283 374 }
Chris@283 375 }
Chris@148 376
Chris@690 377 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 378 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 379
Chris@611 380 model3->setColumn(lineno, values);
Chris@283 381 }
Chris@148 382
Chris@283 383 ++lineno;
Chris@392 384 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 385 list.size() == 0) {
Chris@283 386 frameNo += windowSize;
Chris@283 387 }
Chris@283 388 }
Chris@148 389 }
Chris@148 390
Chris@631 391 if (!haveAnyValue) {
Chris@631 392 if (model2a) {
Chris@631 393 // assign values for regions based on label frequency; we
Chris@631 394 // have this in our labelCountMap, sort of
Chris@631 395
Chris@631 396 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 397 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 398 i != labelCountMap.end(); ++i) {
Chris@631 399 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 400 }
Chris@631 401
Chris@631 402 float v = 0.f;
Chris@631 403 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 404 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 405 --i;
Chris@631 406 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 407 j != i->second.end(); ++j) {
Chris@631 408 j->second = v;
Chris@631 409 v = v + 1.f;
Chris@631 410 }
Chris@631 411 }
Chris@631 412
Chris@631 413 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 414 RegionModel::Point::Comparator> pointMap;
Chris@631 415 for (RegionModel::PointList::const_iterator i =
Chris@631 416 model2a->getPoints().begin();
Chris@631 417 i != model2a->getPoints().end(); ++i) {
Chris@631 418 RegionModel::Point p(*i);
Chris@631 419 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 420 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 421 pointMap[p] = pp;
Chris@631 422 }
Chris@631 423
Chris@631 424 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 425 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 426 model2a->deletePoint(i->first);
Chris@631 427 model2a->addPoint(i->second);
Chris@631 428 }
Chris@631 429 }
Chris@631 430 }
Chris@631 431
Chris@897 432 if (model2b) {
Chris@897 433 if (pitchLooksLikeMIDI) {
Chris@897 434 model2b->setScaleUnits("MIDI Pitch");
Chris@897 435 } else {
Chris@897 436 model2b->setScaleUnits("Hz");
Chris@897 437 }
Chris@897 438 }
Chris@897 439
Chris@961 440 if (model3) {
Chris@148 441 model3->setMinimumLevel(min);
Chris@148 442 model3->setMaximumLevel(max);
Chris@148 443 }
Chris@148 444
Chris@148 445 return model;
Chris@148 446 }
Chris@148 447