annotate data/fileio/CSVFileReader.cpp @ 634:773fc0e43feb

* First bits of works matching
author Chris Cannam
date Fri, 26 Mar 2010 13:53:31 +0000
parents 3a5ee4b6c9ad
children 611a4fa14dde
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@308 25 #include "DataFileReaderFactory.h"
Chris@148 26
Chris@148 27 #include <QFile>
Chris@148 28 #include <QString>
Chris@148 29 #include <QRegExp>
Chris@148 30 #include <QStringList>
Chris@148 31 #include <QTextStream>
Chris@148 32
Chris@148 33 #include <iostream>
Chris@628 34 #include <map>
Chris@148 35
Chris@392 36 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392 37 size_t mainModelSampleRate) :
Chris@392 38 m_format(format),
Chris@148 39 m_file(0),
Chris@631 40 m_warnings(0),
Chris@148 41 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 42 {
Chris@148 43 m_file = new QFile(path);
Chris@148 44 bool good = false;
Chris@148 45
Chris@148 46 if (!m_file->exists()) {
Chris@148 47 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 48 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 49 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 50 } else {
Chris@148 51 good = true;
Chris@148 52 }
Chris@148 53
Chris@148 54 if (!good) {
Chris@148 55 delete m_file;
Chris@148 56 m_file = 0;
Chris@148 57 }
Chris@148 58 }
Chris@148 59
Chris@148 60 CSVFileReader::~CSVFileReader()
Chris@148 61 {
Chris@148 62 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
Chris@148 63
Chris@148 64 if (m_file) {
Chris@148 65 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
Chris@148 66 m_file->close();
Chris@148 67 }
Chris@148 68 delete m_file;
Chris@148 69 }
Chris@148 70
Chris@148 71 bool
Chris@148 72 CSVFileReader::isOK() const
Chris@148 73 {
Chris@148 74 return (m_file != 0);
Chris@148 75 }
Chris@148 76
Chris@148 77 QString
Chris@148 78 CSVFileReader::getError() const
Chris@148 79 {
Chris@148 80 return m_error;
Chris@148 81 }
Chris@148 82
Chris@631 83 size_t
Chris@631 84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
Chris@631 85 size_t windowSize) const
Chris@631 86 {
Chris@631 87 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@631 88 unsigned int warnLimit = 10;
Chris@631 89
Chris@631 90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 91
Chris@631 92 size_t calculatedFrame = 0;
Chris@631 93
Chris@631 94 bool ok = false;
Chris@631 95 QString numeric = s;
Chris@631 96 numeric.remove(nonNumericRx);
Chris@631 97
Chris@631 98 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 99
Chris@631 100 double time = numeric.toDouble(&ok);
Chris@631 101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 102 calculatedFrame = int(time * sampleRate + 0.5);
Chris@631 103
Chris@631 104 } else {
Chris@631 105
Chris@631 106 long n = numeric.toLong(&ok);
Chris@631 107 if (n >= 0) calculatedFrame = n;
Chris@631 108
Chris@631 109 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 110 calculatedFrame *= windowSize;
Chris@631 111 }
Chris@631 112 }
Chris@631 113
Chris@631 114 if (!ok) {
Chris@631 115 if (m_warnings < warnLimit) {
Chris@631 116 std::cerr << "WARNING: CSVFileReader::load: "
Chris@631 117 << "Bad time format (\"" << s.toStdString()
Chris@631 118 << "\") in data line "
Chris@631 119 << lineno+1 << std::endl;
Chris@631 120 } else if (m_warnings == warnLimit) {
Chris@631 121 std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@631 122 }
Chris@631 123 ++m_warnings;
Chris@631 124 }
Chris@631 125
Chris@631 126 return calculatedFrame;
Chris@631 127 }
Chris@631 128
Chris@148 129 Model *
Chris@148 130 CSVFileReader::load() const
Chris@148 131 {
Chris@148 132 if (!m_file) return 0;
Chris@148 133
Chris@628 134 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 135 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392 137 size_t sampleRate = m_format.getSampleRate();
Chris@392 138 size_t windowSize = m_format.getWindowSize();
Chris@631 139 QChar separator = m_format.getSeparator();
Chris@631 140 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 141
Chris@392 142 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 143 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 144 // This will be overridden later if more than one line
Chris@611 145 // appears in our file, but we want to choose a default
Chris@611 146 // that's likely to be visible
Chris@611 147 windowSize = 1024;
Chris@611 148 } else {
Chris@611 149 windowSize = 1;
Chris@611 150 }
Chris@392 151 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 152 sampleRate = m_mainModelSampleRate;
Chris@148 153 }
Chris@148 154 }
Chris@148 155
Chris@148 156 SparseOneDimensionalModel *model1 = 0;
Chris@148 157 SparseTimeValueModel *model2 = 0;
Chris@628 158 RegionModel *model2a = 0;
Chris@152 159 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 160 Model *model = 0;
Chris@148 161
Chris@148 162 QTextStream in(m_file);
Chris@148 163 in.seek(0);
Chris@148 164
Chris@148 165 unsigned int warnings = 0, warnLimit = 10;
Chris@148 166 unsigned int lineno = 0;
Chris@148 167
Chris@148 168 float min = 0.0, max = 0.0;
Chris@148 169
Chris@148 170 size_t frameNo = 0;
Chris@628 171 size_t duration = 0;
Chris@631 172 size_t endFrame = 0;
Chris@631 173
Chris@631 174 bool haveAnyValue = false;
Chris@631 175 bool haveEndTime = false;
Chris@631 176
Chris@611 177 size_t startFrame = 0; // for calculation of dense model resolution
Chris@631 178 bool firstEverValue = true;
Chris@148 179
Chris@631 180 std::map<QString, int> labelCountMap;
Chris@631 181
Chris@148 182 while (!in.atEnd()) {
Chris@148 183
Chris@283 184 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 185 // CR-only line endings. Why did they bother making the class
Chris@283 186 // cope with more than one sort of line ending, if it still
Chris@283 187 // can't be configured to cope with all the common sorts?
Chris@148 188
Chris@283 189 // For the time being we'll deal with this case (which is
Chris@283 190 // relatively uncommon for us, but still necessary to handle)
Chris@283 191 // by reading the entire file using a single readLine, and
Chris@283 192 // splitting it. For CR and CR/LF line endings this will just
Chris@283 193 // read a line at a time, and that's obviously OK.
Chris@148 194
Chris@283 195 QString chunk = in.readLine();
Chris@283 196 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 197
Chris@283 198 for (size_t li = 0; li < lines.size(); ++li) {
Chris@148 199
Chris@283 200 QString line = lines[li];
Chris@148 201
Chris@283 202 if (line.startsWith("#")) continue;
Chris@283 203
Chris@631 204 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 205 if (!model) {
Chris@283 206
Chris@283 207 switch (modelType) {
Chris@283 208
Chris@392 209 case CSVFormat::OneDimensionalModel:
Chris@283 210 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 211 model = model1;
Chris@283 212 break;
Chris@148 213
Chris@392 214 case CSVFormat::TwoDimensionalModel:
Chris@283 215 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 216 model = model2;
Chris@283 217 break;
Chris@148 218
Chris@628 219 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 220 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 221 model = model2a;
Chris@628 222 break;
Chris@628 223
Chris@392 224 case CSVFormat::ThreeDimensionalModel:
Chris@535 225 model3 = new EditableDenseThreeDimensionalModel
Chris@535 226 (sampleRate,
Chris@535 227 windowSize,
Chris@535 228 list.size(),
Chris@535 229 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 230 model = model3;
Chris@283 231 break;
Chris@283 232 }
Chris@283 233 }
Chris@148 234
Chris@631 235 float value = 0.f;
Chris@631 236 QString label = "";
Chris@148 237
Chris@631 238 duration = 0.f;
Chris@631 239 haveEndTime = false;
Chris@628 240
Chris@283 241 for (int i = 0; i < list.size(); ++i) {
Chris@148 242
Chris@631 243 QString s = list[i];
Chris@631 244
Chris@631 245 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 246
Chris@631 247 switch (purpose) {
Chris@631 248
Chris@631 249 case CSVFormat::ColumnUnknown:
Chris@631 250 break;
Chris@631 251
Chris@631 252 case CSVFormat::ColumnStartTime:
Chris@631 253 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 254 break;
Chris@631 255
Chris@631 256 case CSVFormat::ColumnEndTime:
Chris@631 257 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 258 haveEndTime = true;
Chris@631 259 break;
Chris@631 260
Chris@631 261 case CSVFormat::ColumnDuration:
Chris@631 262 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 263 break;
Chris@631 264
Chris@631 265 case CSVFormat::ColumnValue:
Chris@631 266 value = s.toFloat();
Chris@631 267 haveAnyValue = true;
Chris@631 268 break;
Chris@631 269
Chris@631 270 case CSVFormat::ColumnLabel:
Chris@631 271 label = s;
Chris@631 272 ++labelCountMap[label];
Chris@631 273 break;
Chris@283 274 }
Chris@631 275 }
Chris@148 276
Chris@631 277 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 278 if (endFrame > frameNo) {
Chris@631 279 duration = endFrame - frameNo;
Chris@628 280 }
Chris@283 281 }
Chris@148 282
Chris@392 283 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 284
Chris@631 285 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 286 model1->addPoint(point);
Chris@148 287
Chris@392 288 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 289
Chris@631 290 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 291 model2->addPoint(point);
Chris@148 292
Chris@628 293 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 294
Chris@631 295 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 296 model2a->addPoint(point);
Chris@628 297
Chris@392 298 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 299
Chris@283 300 DenseThreeDimensionalModel::Column values;
Chris@148 301
Chris@631 302 for (int i = 0; i < list.size(); ++i) {
Chris@148 303
Chris@283 304 bool ok = false;
Chris@283 305 float value = list[i].toFloat(&ok);
Chris@611 306
Chris@631 307 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@611 308 values.push_back(value);
Chris@611 309 }
Chris@148 310
Chris@631 311 if (firstEverValue || value < min) min = value;
Chris@631 312 if (firstEverValue || value > max) max = value;
Chris@611 313
Chris@631 314 if (firstEverValue) {
Chris@611 315 startFrame = frameNo;
Chris@611 316 model3->setStartFrame(startFrame);
Chris@611 317 } else if (lineno == 1 &&
Chris@611 318 timingType == CSVFormat::ExplicitTiming) {
Chris@611 319 model3->setResolution(frameNo - startFrame);
Chris@611 320 }
Chris@631 321
Chris@631 322 firstEverValue = false;
Chris@148 323
Chris@283 324 if (!ok) {
Chris@283 325 if (warnings < warnLimit) {
Chris@283 326 std::cerr << "WARNING: CSVFileReader::load: "
Chris@390 327 << "Non-numeric value \""
Chris@390 328 << list[i].toStdString()
Chris@491 329 << "\" in data line " << lineno+1
Chris@283 330 << ":" << std::endl;
Chris@283 331 std::cerr << line.toStdString() << std::endl;
Chris@283 332 ++warnings;
Chris@283 333 } else if (warnings == warnLimit) {
Chris@390 334 // std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 335 }
Chris@283 336 }
Chris@283 337 }
Chris@148 338
Chris@390 339 // std::cerr << "Setting bin values for count " << lineno << ", frame "
Chris@390 340 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
Chris@148 341
Chris@611 342 model3->setColumn(lineno, values);
Chris@283 343 }
Chris@148 344
Chris@283 345 ++lineno;
Chris@392 346 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 347 list.size() == 0) {
Chris@283 348 frameNo += windowSize;
Chris@283 349 }
Chris@283 350 }
Chris@148 351 }
Chris@148 352
Chris@631 353 if (!haveAnyValue) {
Chris@631 354 if (model2a) {
Chris@631 355 // assign values for regions based on label frequency; we
Chris@631 356 // have this in our labelCountMap, sort of
Chris@631 357
Chris@631 358 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 359 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 360 i != labelCountMap.end(); ++i) {
Chris@631 361 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 362 }
Chris@631 363
Chris@631 364 float v = 0.f;
Chris@631 365 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 366 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 367 --i;
Chris@631 368 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 369 j != i->second.end(); ++j) {
Chris@631 370 j->second = v;
Chris@631 371 v = v + 1.f;
Chris@631 372 }
Chris@631 373 }
Chris@631 374
Chris@631 375 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 376 RegionModel::Point::Comparator> pointMap;
Chris@631 377 for (RegionModel::PointList::const_iterator i =
Chris@631 378 model2a->getPoints().begin();
Chris@631 379 i != model2a->getPoints().end(); ++i) {
Chris@631 380 RegionModel::Point p(*i);
Chris@631 381 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 382 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 383 pointMap[p] = pp;
Chris@631 384 }
Chris@631 385
Chris@631 386 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 387 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 388 model2a->deletePoint(i->first);
Chris@631 389 model2a->addPoint(i->second);
Chris@631 390 }
Chris@631 391 }
Chris@631 392 }
Chris@631 393
Chris@392 394 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 395 model3->setMinimumLevel(min);
Chris@148 396 model3->setMaximumLevel(max);
Chris@148 397 }
Chris@148 398
Chris@148 399 return model;
Chris@148 400 }
Chris@148 401