annotate data/fileio/CSVFileReader.cpp @ 823:f0558e69a074

Rename Resampling- to DecodingWavFileReader, and use it whenever we have an audio file that is not quickly seekable using libsndfile. Avoids very slow performance when analysing ogg files.
author Chris Cannam
date Wed, 17 Jul 2013 15:40:01 +0100
parents 1424aa29ae95
children e802e550a1f2
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@631 20 #include "base/StringBits.h"
Chris@148 21 #include "model/SparseOneDimensionalModel.h"
Chris@148 22 #include "model/SparseTimeValueModel.h"
Chris@152 23 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628 24 #include "model/RegionModel.h"
Chris@308 25 #include "DataFileReaderFactory.h"
Chris@148 26
Chris@148 27 #include <QFile>
Chris@148 28 #include <QString>
Chris@148 29 #include <QRegExp>
Chris@148 30 #include <QStringList>
Chris@148 31 #include <QTextStream>
Chris@148 32
Chris@148 33 #include <iostream>
Chris@628 34 #include <map>
Chris@148 35
Chris@392 36 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392 37 size_t mainModelSampleRate) :
Chris@392 38 m_format(format),
Chris@148 39 m_file(0),
Chris@631 40 m_warnings(0),
Chris@148 41 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 42 {
Chris@148 43 m_file = new QFile(path);
Chris@148 44 bool good = false;
Chris@148 45
Chris@148 46 if (!m_file->exists()) {
Chris@148 47 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 48 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 49 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 50 } else {
Chris@148 51 good = true;
Chris@148 52 }
Chris@148 53
Chris@148 54 if (!good) {
Chris@148 55 delete m_file;
Chris@148 56 m_file = 0;
Chris@148 57 }
Chris@148 58 }
Chris@148 59
Chris@148 60 CSVFileReader::~CSVFileReader()
Chris@148 61 {
Chris@690 62 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
Chris@148 63
Chris@148 64 if (m_file) {
Chris@690 65 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
Chris@148 66 m_file->close();
Chris@148 67 }
Chris@148 68 delete m_file;
Chris@148 69 }
Chris@148 70
Chris@148 71 bool
Chris@148 72 CSVFileReader::isOK() const
Chris@148 73 {
Chris@148 74 return (m_file != 0);
Chris@148 75 }
Chris@148 76
Chris@148 77 QString
Chris@148 78 CSVFileReader::getError() const
Chris@148 79 {
Chris@148 80 return m_error;
Chris@148 81 }
Chris@148 82
Chris@631 83 size_t
Chris@631 84 CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
Chris@631 85 size_t windowSize) const
Chris@631 86 {
Chris@631 87 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@631 88 unsigned int warnLimit = 10;
Chris@631 89
Chris@631 90 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631 91
Chris@631 92 size_t calculatedFrame = 0;
Chris@631 93
Chris@631 94 bool ok = false;
Chris@631 95 QString numeric = s;
Chris@631 96 numeric.remove(nonNumericRx);
Chris@631 97
Chris@631 98 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631 99
Chris@631 100 double time = numeric.toDouble(&ok);
Chris@631 101 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631 102 calculatedFrame = int(time * sampleRate + 0.5);
Chris@631 103
Chris@631 104 } else {
Chris@631 105
Chris@631 106 long n = numeric.toLong(&ok);
Chris@631 107 if (n >= 0) calculatedFrame = n;
Chris@631 108
Chris@631 109 if (timeUnits == CSVFormat::TimeWindows) {
Chris@631 110 calculatedFrame *= windowSize;
Chris@631 111 }
Chris@631 112 }
Chris@631 113
Chris@631 114 if (!ok) {
Chris@631 115 if (m_warnings < warnLimit) {
Chris@631 116 std::cerr << "WARNING: CSVFileReader::load: "
Chris@631 117 << "Bad time format (\"" << s.toStdString()
Chris@631 118 << "\") in data line "
Chris@631 119 << lineno+1 << std::endl;
Chris@631 120 } else if (m_warnings == warnLimit) {
Chris@631 121 std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@631 122 }
Chris@631 123 ++m_warnings;
Chris@631 124 }
Chris@631 125
Chris@631 126 return calculatedFrame;
Chris@631 127 }
Chris@631 128
Chris@148 129 Model *
Chris@148 130 CSVFileReader::load() const
Chris@148 131 {
Chris@148 132 if (!m_file) return 0;
Chris@148 133
Chris@628 134 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 135 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628 136 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392 137 size_t sampleRate = m_format.getSampleRate();
Chris@392 138 size_t windowSize = m_format.getWindowSize();
Chris@631 139 QChar separator = m_format.getSeparator();
Chris@631 140 bool allowQuoting = m_format.getAllowQuoting();
Chris@148 141
Chris@392 142 if (timingType == CSVFormat::ExplicitTiming) {
Chris@611 143 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611 144 // This will be overridden later if more than one line
Chris@611 145 // appears in our file, but we want to choose a default
Chris@611 146 // that's likely to be visible
Chris@611 147 windowSize = 1024;
Chris@611 148 } else {
Chris@611 149 windowSize = 1;
Chris@611 150 }
Chris@392 151 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 152 sampleRate = m_mainModelSampleRate;
Chris@148 153 }
Chris@148 154 }
Chris@148 155
Chris@148 156 SparseOneDimensionalModel *model1 = 0;
Chris@148 157 SparseTimeValueModel *model2 = 0;
Chris@628 158 RegionModel *model2a = 0;
Chris@152 159 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 160 Model *model = 0;
Chris@148 161
Chris@148 162 QTextStream in(m_file);
Chris@148 163 in.seek(0);
Chris@148 164
Chris@148 165 unsigned int warnings = 0, warnLimit = 10;
Chris@148 166 unsigned int lineno = 0;
Chris@148 167
Chris@148 168 float min = 0.0, max = 0.0;
Chris@148 169
Chris@148 170 size_t frameNo = 0;
Chris@628 171 size_t duration = 0;
Chris@631 172 size_t endFrame = 0;
Chris@631 173
Chris@631 174 bool haveAnyValue = false;
Chris@631 175 bool haveEndTime = false;
Chris@631 176
Chris@611 177 size_t startFrame = 0; // for calculation of dense model resolution
Chris@631 178 bool firstEverValue = true;
Chris@148 179
Chris@631 180 std::map<QString, int> labelCountMap;
Chris@631 181
Chris@676 182 int valueColumns = 0;
Chris@676 183 for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676 184 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676 185 ++valueColumns;
Chris@676 186 }
Chris@676 187 }
Chris@676 188
Chris@148 189 while (!in.atEnd()) {
Chris@148 190
Chris@283 191 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 192 // CR-only line endings. Why did they bother making the class
Chris@283 193 // cope with more than one sort of line ending, if it still
Chris@283 194 // can't be configured to cope with all the common sorts?
Chris@148 195
Chris@283 196 // For the time being we'll deal with this case (which is
Chris@283 197 // relatively uncommon for us, but still necessary to handle)
Chris@283 198 // by reading the entire file using a single readLine, and
Chris@283 199 // splitting it. For CR and CR/LF line endings this will just
Chris@283 200 // read a line at a time, and that's obviously OK.
Chris@148 201
Chris@283 202 QString chunk = in.readLine();
Chris@283 203 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 204
Chris@283 205 for (size_t li = 0; li < lines.size(); ++li) {
Chris@148 206
Chris@283 207 QString line = lines[li];
Chris@148 208
Chris@283 209 if (line.startsWith("#")) continue;
Chris@283 210
Chris@631 211 QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283 212 if (!model) {
Chris@283 213
Chris@283 214 switch (modelType) {
Chris@283 215
Chris@392 216 case CSVFormat::OneDimensionalModel:
Chris@283 217 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 218 model = model1;
Chris@283 219 break;
Chris@148 220
Chris@392 221 case CSVFormat::TwoDimensionalModel:
Chris@283 222 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 223 model = model2;
Chris@283 224 break;
Chris@148 225
Chris@628 226 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628 227 model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628 228 model = model2a;
Chris@628 229 break;
Chris@628 230
Chris@392 231 case CSVFormat::ThreeDimensionalModel:
Chris@535 232 model3 = new EditableDenseThreeDimensionalModel
Chris@535 233 (sampleRate,
Chris@535 234 windowSize,
Chris@676 235 valueColumns,
Chris@535 236 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 237 model = model3;
Chris@283 238 break;
Chris@283 239 }
Chris@283 240 }
Chris@148 241
Chris@631 242 float value = 0.f;
Chris@631 243 QString label = "";
Chris@148 244
Chris@631 245 duration = 0.f;
Chris@631 246 haveEndTime = false;
Chris@628 247
Chris@283 248 for (int i = 0; i < list.size(); ++i) {
Chris@148 249
Chris@631 250 QString s = list[i];
Chris@631 251
Chris@631 252 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631 253
Chris@631 254 switch (purpose) {
Chris@631 255
Chris@631 256 case CSVFormat::ColumnUnknown:
Chris@631 257 break;
Chris@631 258
Chris@631 259 case CSVFormat::ColumnStartTime:
Chris@631 260 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 261 break;
Chris@631 262
Chris@631 263 case CSVFormat::ColumnEndTime:
Chris@631 264 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 265 haveEndTime = true;
Chris@631 266 break;
Chris@631 267
Chris@631 268 case CSVFormat::ColumnDuration:
Chris@631 269 duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631 270 break;
Chris@631 271
Chris@631 272 case CSVFormat::ColumnValue:
Chris@631 273 value = s.toFloat();
Chris@631 274 haveAnyValue = true;
Chris@631 275 break;
Chris@631 276
Chris@631 277 case CSVFormat::ColumnLabel:
Chris@631 278 label = s;
Chris@631 279 ++labelCountMap[label];
Chris@631 280 break;
Chris@283 281 }
Chris@631 282 }
Chris@148 283
Chris@631 284 if (haveEndTime) { // ... calculate duration now all cols read
Chris@631 285 if (endFrame > frameNo) {
Chris@631 286 duration = endFrame - frameNo;
Chris@628 287 }
Chris@283 288 }
Chris@148 289
Chris@392 290 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 291
Chris@631 292 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283 293 model1->addPoint(point);
Chris@148 294
Chris@392 295 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 296
Chris@631 297 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283 298 model2->addPoint(point);
Chris@148 299
Chris@628 300 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628 301
Chris@631 302 RegionModel::Point point(frameNo, value, duration, label);
Chris@628 303 model2a->addPoint(point);
Chris@628 304
Chris@392 305 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 306
Chris@283 307 DenseThreeDimensionalModel::Column values;
Chris@148 308
Chris@631 309 for (int i = 0; i < list.size(); ++i) {
Chris@148 310
Chris@676 311 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676 312 continue;
Chris@676 313 }
Chris@676 314
Chris@283 315 bool ok = false;
Chris@283 316 float value = list[i].toFloat(&ok);
Chris@611 317
Chris@676 318 values.push_back(value);
Chris@148 319
Chris@631 320 if (firstEverValue || value < min) min = value;
Chris@631 321 if (firstEverValue || value > max) max = value;
Chris@676 322
Chris@631 323 if (firstEverValue) {
Chris@611 324 startFrame = frameNo;
Chris@611 325 model3->setStartFrame(startFrame);
Chris@611 326 } else if (lineno == 1 &&
Chris@611 327 timingType == CSVFormat::ExplicitTiming) {
Chris@611 328 model3->setResolution(frameNo - startFrame);
Chris@611 329 }
Chris@631 330
Chris@631 331 firstEverValue = false;
Chris@148 332
Chris@283 333 if (!ok) {
Chris@283 334 if (warnings < warnLimit) {
Chris@283 335 std::cerr << "WARNING: CSVFileReader::load: "
Chris@390 336 << "Non-numeric value \""
Chris@390 337 << list[i].toStdString()
Chris@491 338 << "\" in data line " << lineno+1
Chris@283 339 << ":" << std::endl;
Chris@686 340 std::cerr << line << std::endl;
Chris@283 341 ++warnings;
Chris@283 342 } else if (warnings == warnLimit) {
Chris@390 343 // std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 344 }
Chris@283 345 }
Chris@283 346 }
Chris@148 347
Chris@690 348 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687 349 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148 350
Chris@611 351 model3->setColumn(lineno, values);
Chris@283 352 }
Chris@148 353
Chris@283 354 ++lineno;
Chris@392 355 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 356 list.size() == 0) {
Chris@283 357 frameNo += windowSize;
Chris@283 358 }
Chris@283 359 }
Chris@148 360 }
Chris@148 361
Chris@631 362 if (!haveAnyValue) {
Chris@631 363 if (model2a) {
Chris@631 364 // assign values for regions based on label frequency; we
Chris@631 365 // have this in our labelCountMap, sort of
Chris@631 366
Chris@631 367 std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631 368 for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631 369 i != labelCountMap.end(); ++i) {
Chris@631 370 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631 371 }
Chris@631 372
Chris@631 373 float v = 0.f;
Chris@631 374 for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631 375 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631 376 --i;
Chris@631 377 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631 378 j != i->second.end(); ++j) {
Chris@631 379 j->second = v;
Chris@631 380 v = v + 1.f;
Chris@631 381 }
Chris@631 382 }
Chris@631 383
Chris@631 384 std::map<RegionModel::Point, RegionModel::Point,
Chris@631 385 RegionModel::Point::Comparator> pointMap;
Chris@631 386 for (RegionModel::PointList::const_iterator i =
Chris@631 387 model2a->getPoints().begin();
Chris@631 388 i != model2a->getPoints().end(); ++i) {
Chris@631 389 RegionModel::Point p(*i);
Chris@631 390 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631 391 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631 392 pointMap[p] = pp;
Chris@631 393 }
Chris@631 394
Chris@631 395 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
Chris@631 396 pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631 397 model2a->deletePoint(i->first);
Chris@631 398 model2a->addPoint(i->second);
Chris@631 399 }
Chris@631 400 }
Chris@631 401 }
Chris@631 402
Chris@392 403 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 404 model3->setMinimumLevel(min);
Chris@148 405 model3->setMaximumLevel(max);
Chris@148 406 }
Chris@148 407
Chris@148 408 return model;
Chris@148 409 }
Chris@148 410