annotate data/fileio/CSVFileReader.cpp @ 985:f073d924a7c3

Fix #1058 clicking row in Layer Edit dialog when colour 3d plot layer active jumps to wrong frame (was using sample rate where resolution intended)
author Chris Cannam
date Tue, 16 Sep 2014 10:29:19 +0100
parents fae5689f7e27
children dc1695b90a58
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@897 25 #include "model/NoteModel.h"
Chris@308 26 #include "DataFileReaderFactory.h"
Chris@148 27
Chris@148 28 #include <QFile>
Chris@148 29 #include <QString>
Chris@148 30 #include <QRegExp>
Chris@148 31 #include <QStringList>
Chris@148 32 #include <QTextStream>
Chris@148 33
Chris@148 34 #include <iostream>
Chris@628 35 #include <map>
Chris@148 36
Chris@392 37 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@929 38 int mainModelSampleRate) :
Chris@392 39 m_format(format),
Chris@148 40 m_file(0),
Chris@631 41 m_warnings(0),
Chris@148 42 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 43 {
Chris@148 44 m_file = new QFile(path);
Chris@148 45 bool good = false;
Chris@148 46
Chris@148 47 if (!m_file->exists()) {
Chris@148 48 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 49 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 50 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 51 } else {
Chris@148 52 good = true;
Chris@148 53 }
Chris@148 54
Chris@148 55 if (!good) {
Chris@148 56 delete m_file;
Chris@148 57 m_file = 0;
Chris@148 58 }
Chris@148 59 }
Chris@148 60
Chris@148 61 CSVFileReader::~CSVFileReader()
Chris@148 62 {
Chris@690 63 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
Chris@148 64
Chris@148 65 if (m_file) {
Chris@690 66 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
Chris@148 67 m_file->close();
Chris@148 68 }
Chris@148 69 delete m_file;
Chris@148 70 }
Chris@148 71
Chris@148 72 bool
Chris@148 73 CSVFileReader::isOK() const
Chris@148 74 {
Chris@148 75 return (m_file != 0);
Chris@148 76 }
Chris@148 77
Chris@148 78 QString
Chris@148 79 CSVFileReader::getError() const
Chris@148 80 {
Chris@148 81 return m_error;
Chris@148 82 }
Chris@148 83
Chris@929 84 int
Chris@929 85 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate,
Chris@929 86 int windowSize) const
Chris@631 87 {
Chris@631 88 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@897 89 int warnLimit = 10;
Chris@631 90
Chris@631 91 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 92
Chris@929 93 int calculatedFrame = 0;
Chris@631 94
Chris@631 95 bool ok = false;
Chris@631 96 QString numeric = s;
Chris@631 97 numeric.remove(nonNumericRx);
Chris@631 98
Chris@631 99 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 100
Chris@631 101 double time = numeric.toDouble(&ok);
Chris@631 102 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 103 calculatedFrame = int(time * sampleRate + 0.5);
Chris@631 104
Chris@631 105 } else {
Chris@631 106
Chris@631 107 long n = numeric.toLong(&ok);
Chris@631 108 if (n >= 0) calculatedFrame = n;
Chris@631 109
Chris@631 110 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 111 calculatedFrame *= windowSize;
Chris@631 112 }
Chris@631 113 }
Chris@631 114
Chris@631 115 if (!ok) {
Chris@631 116 if (m_warnings < warnLimit) {
Chris@843 117 cerr << "WARNING: CSVFileReader::load: "
Chris@844 118 << "Bad time format (\"" << s
Chris@631 119 << "\") in data line "
Chris@843 120 << lineno+1 << endl;
Chris@631 121 } else if (m_warnings == warnLimit) {
Chris@843 122 cerr << "WARNING: Too many warnings" << endl;
Chris@631 123 }
Chris@631 124 ++m_warnings;
Chris@631 125 }
Chris@631 126
Chris@631 127 return calculatedFrame;
Chris@631 128 }
Chris@631 129
Chris@148 130 Model *
Chris@148 131 CSVFileReader::load() const
Chris@148 132 {
Chris@148 133 if (!m_file) return 0;
Chris@148 134
Chris@628 135 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 136 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 137 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@929 138 int sampleRate = m_format.getSampleRate();
Chris@929 139 int windowSize = m_format.getWindowSize();
Chris@631 140 QChar separator = m_format.getSeparator();
Chris@631 141 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 142
Chris@392 143 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 144 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 145 // This will be overridden later if more than one line
Chris@611 146 // appears in our file, but we want to choose a default
Chris@611 147 // that's likely to be visible
Chris@611 148 windowSize = 1024;
Chris@611 149 } else {
Chris@611 150 windowSize = 1;
Chris@611 151 }
Chris@392 152 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 153 sampleRate = m_mainModelSampleRate;
Chris@148 154 }
Chris@148 155 }
Chris@148 156
Chris@148 157 SparseOneDimensionalModel *model1 = 0;
Chris@148 158 SparseTimeValueModel *model2 = 0;
Chris@628 159 RegionModel *model2a = 0;
Chris@897 160 NoteModel *model2b = 0;
Chris@152 161 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 162 Model *model = 0;
Chris@148 163
Chris@148 164 QTextStream in(m_file);
Chris@148 165 in.seek(0);
Chris@148 166
Chris@148 167 unsigned int warnings = 0, warnLimit = 10;
Chris@148 168 unsigned int lineno = 0;
Chris@148 169
Chris@148 170 float min = 0.0, max = 0.0;
Chris@148 171
Chris@929 172 int frameNo = 0;
Chris@929 173 int duration = 0;
Chris@929 174 int endFrame = 0;
Chris@631 175
Chris@631 176 bool haveAnyValue = false;
Chris@631 177 bool haveEndTime = false;
Chris@897 178 bool pitchLooksLikeMIDI = true;
Chris@631 179
Chris@929 180 int startFrame = 0; // for calculation of dense model resolution
Chris@631 181 bool firstEverValue = true;
Chris@148 182
Chris@631 183 std::map<QString, int> labelCountMap;
Chris@631 184
Chris@676 185 int valueColumns = 0;
Chris@676 186 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 187 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 188 ++valueColumns;
Chris@676 189 }
Chris@676 190 }
Chris@676 191
Chris@148 192 while (!in.atEnd()) {
Chris@148 193
Chris@283 194 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 195 // CR-only line endings. Why did they bother making the class
Chris@283 196 // cope with more than one sort of line ending, if it still
Chris@283 197 // can't be configured to cope with all the common sorts?
Chris@148 198
Chris@283 199 // For the time being we'll deal with this case (which is
Chris@283 200 // relatively uncommon for us, but still necessary to handle)
Chris@283 201 // by reading the entire file using a single readLine, and
Chris@283 202 // splitting it. For CR and CR/LF line endings this will just
Chris@283 203 // read a line at a time, and that's obviously OK.
Chris@148 204
Chris@283 205 QString chunk = in.readLine();
Chris@283 206 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 207
Chris@897 208 for (int li = 0; li < lines.size(); ++li) {
Chris@148 209
Chris@283 210 QString line = lines[li];
Chris@148 211
Chris@283 212 if (line.startsWith("#")) continue;
Chris@283 213
Chris@631 214 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 215 if (!model) {
Chris@283 216
Chris@283 217 switch (modelType) {
Chris@283 218
Chris@392 219 case CSVFormat::OneDimensionalModel:
Chris@283 220 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 221 model = model1;
Chris@283 222 break;
Chris@148 223
Chris@392 224 case CSVFormat::TwoDimensionalModel:
Chris@283 225 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 226 model = model2;
Chris@283 227 break;
Chris@148 228
Chris@628 229 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 230 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 231 model = model2a;
Chris@628 232 break;
Chris@628 233
Chris@897 234 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
Chris@897 235 model2b = new NoteModel(sampleRate, windowSize, false);
Chris@897 236 model = model2b;
Chris@897 237 break;
Chris@897 238
Chris@392 239 case CSVFormat::ThreeDimensionalModel:
Chris@535 240 model3 = new EditableDenseThreeDimensionalModel
Chris@535 241 (sampleRate,
Chris@535 242 windowSize,
Chris@676 243 valueColumns,
Chris@535 244 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 245 model = model3;
Chris@283 246 break;
Chris@283 247 }
Chris@283 248 }
Chris@148 249
Chris@631 250 float value = 0.f;
Chris@897 251 float pitch = 0.f;
Chris@631 252 QString label = "";
Chris@148 253
Chris@631 254 duration = 0.f;
Chris@631 255 haveEndTime = false;
Chris@628 256
Chris@283 257 for (int i = 0; i < list.size(); ++i) {
Chris@148 258
Chris@631 259 QString s = list[i];
Chris@631 260
Chris@631 261 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 262
Chris@631 263 switch (purpose) {
Chris@631 264
Chris@631 265 case CSVFormat::ColumnUnknown:
Chris@631 266 break;
Chris@631 267
Chris@631 268 case CSVFormat::ColumnStartTime:
Chris@631 269 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 270 break;
Chris@631 271
Chris@631 272 case CSVFormat::ColumnEndTime:
Chris@631 273 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 274 haveEndTime = true;
Chris@631 275 break;
Chris@631 276
Chris@631 277 case CSVFormat::ColumnDuration:
Chris@631 278 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 279 break;
Chris@631 280
Chris@631 281 case CSVFormat::ColumnValue:
Chris@631 282 value = s.toFloat();
Chris@631 283 haveAnyValue = true;
Chris@631 284 break;
Chris@631 285
Chris@897 286 case CSVFormat::ColumnPitch:
Chris@897 287 pitch = s.toFloat();
Chris@897 288 if (pitch < 0.f || pitch > 127.f) {
Chris@897 289 pitchLooksLikeMIDI = false;
Chris@897 290 }
Chris@897 291 break;
Chris@897 292
Chris@631 293 case CSVFormat::ColumnLabel:
Chris@631 294 label = s;
Chris@631 295 ++labelCountMap[label];
Chris@631 296 break;
Chris@283 297 }
Chris@631 298 }
Chris@148 299
Chris@631 300 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 301 if (endFrame > frameNo) {
Chris@631 302 duration = endFrame - frameNo;
Chris@628 303 }
Chris@283 304 }
Chris@148 305
Chris@392 306 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 307
Chris@631 308 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 309 model1->addPoint(point);
Chris@148 310
Chris@392 311 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 312
Chris@631 313 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 314 model2->addPoint(point);
Chris@148 315
Chris@628 316 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 317
Chris@631 318 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 319 model2a->addPoint(point);
Chris@628 320
Chris@897 321 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
Chris@897 322
Chris@897 323 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
Chris@897 324 NoteModel::Point point(frameNo, pitch, duration, level, label);
Chris@897 325 model2b->addPoint(point);
Chris@897 326
Chris@392 327 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 328
Chris@283 329 DenseThreeDimensionalModel::Column values;
Chris@148 330
Chris@631 331 for (int i = 0; i < list.size(); ++i) {
Chris@148 332
Chris@676 333 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 334 continue;
Chris@676 335 }
Chris@676 336
Chris@283 337 bool ok = false;
Chris@283 338 float value = list[i].toFloat(&ok);
Chris@611 339
Chris@676 340 values.push_back(value);
Chris@148 341
Chris@631 342 if (firstEverValue || value < min) min = value;
Chris@631 343 if (firstEverValue || value > max) max = value;
Chris@676 344
Chris@631 345 if (firstEverValue) {
Chris@611 346 startFrame = frameNo;
Chris@611 347 model3->setStartFrame(startFrame);
Chris@611 348 } else if (lineno == 1 &&
Chris@611 349 timingType == CSVFormat::ExplicitTiming) {
Chris@611 350 model3->setResolution(frameNo - startFrame);
Chris@611 351 }
Chris@631 352
Chris@631 353 firstEverValue = false;
Chris@148 354
Chris@283 355 if (!ok) {
Chris@283 356 if (warnings < warnLimit) {
Chris@843 357 cerr << "WARNING: CSVFileReader::load: "
Chris@390 358 << "Non-numeric value \""
Chris@844 359 << list[i]
Chris@491 360 << "\" in data line " << lineno+1
Chris@843 361 << ":" << endl;
Chris@843 362 cerr << line << endl;
Chris@283 363 ++warnings;
Chris@283 364 } else if (warnings == warnLimit) {
Chris@843 365 // cerr << "WARNING: Too many warnings" << endl;
Chris@283 366 }
Chris@283 367 }
Chris@283 368 }
Chris@148 369
Chris@690 370 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 371 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 372
Chris@611 373 model3->setColumn(lineno, values);
Chris@283 374 }
Chris@148 375
Chris@283 376 ++lineno;
Chris@392 377 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 378 list.size() == 0) {
Chris@283 379 frameNo += windowSize;
Chris@283 380 }
Chris@283 381 }
Chris@148 382 }
Chris@148 383
Chris@631 384 if (!haveAnyValue) {
Chris@631 385 if (model2a) {
Chris@631 386 // assign values for regions based on label frequency; we
Chris@631 387 // have this in our labelCountMap, sort of
Chris@631 388
Chris@631 389 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 390 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 391 i != labelCountMap.end(); ++i) {
Chris@631 392 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 393 }
Chris@631 394
Chris@631 395 float v = 0.f;
Chris@631 396 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 397 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 398 --i;
Chris@631 399 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 400 j != i->second.end(); ++j) {
Chris@631 401 j->second = v;
Chris@631 402 v = v + 1.f;
Chris@631 403 }
Chris@631 404 }
Chris@631 405
Chris@631 406 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 407 RegionModel::Point::Comparator> pointMap;
Chris@631 408 for (RegionModel::PointList::const_iterator i =
Chris@631 409 model2a->getPoints().begin();
Chris@631 410 i != model2a->getPoints().end(); ++i) {
Chris@631 411 RegionModel::Point p(*i);
Chris@631 412 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 413 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 414 pointMap[p] = pp;
Chris@631 415 }
Chris@631 416
Chris@631 417 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 418 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 419 model2a->deletePoint(i->first);
Chris@631 420 model2a->addPoint(i->second);
Chris@631 421 }
Chris@631 422 }
Chris@631 423 }
Chris@631 424
Chris@897 425 if (model2b) {
Chris@897 426 if (pitchLooksLikeMIDI) {
Chris@897 427 model2b->setScaleUnits("MIDI Pitch");
Chris@897 428 } else {
Chris@897 429 model2b->setScaleUnits("Hz");
Chris@897 430 }
Chris@897 431 }
Chris@897 432
Chris@961 433 if (model3) {
Chris@148 434 model3->setMinimumLevel(min);
Chris@148 435 model3->setMaximumLevel(max);
Chris@148 436 }
Chris@148 437
Chris@148 438 return model;
Chris@148 439 }
Chris@148 440