annotate data/fileio/CSVFileReader.cpp @ 558:1d7ebc05157e

* Some fairly simplistic code to set up layer type properties based on RDF data about feature types (both when running transforms and when importing features from RDF files).
author Chris Cannam
date Thu, 12 Feb 2009 15:26:43 +0000
parents 3ccf48fb81d6
children dd97f7b3d120
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@148 20 #include "model/SparseOneDimensionalModel.h"
Chris@148 21 #include "model/SparseTimeValueModel.h"
Chris@152 22 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@308 23 #include "DataFileReaderFactory.h"
Chris@148 24
Chris@148 25 #include <QFile>
Chris@148 26 #include <QString>
Chris@148 27 #include <QRegExp>
Chris@148 28 #include <QStringList>
Chris@148 29 #include <QTextStream>
Chris@148 30
Chris@148 31 #include <iostream>
Chris@148 32
Chris@392 33 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392 34 size_t mainModelSampleRate) :
Chris@392 35 m_format(format),
Chris@148 36 m_file(0),
Chris@148 37 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 38 {
Chris@148 39 m_file = new QFile(path);
Chris@148 40 bool good = false;
Chris@148 41
Chris@148 42 if (!m_file->exists()) {
Chris@148 43 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 44 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 45 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 46 } else {
Chris@148 47 good = true;
Chris@148 48 }
Chris@148 49
Chris@148 50 if (!good) {
Chris@148 51 delete m_file;
Chris@148 52 m_file = 0;
Chris@148 53 }
Chris@148 54 }
Chris@148 55
Chris@148 56 CSVFileReader::~CSVFileReader()
Chris@148 57 {
Chris@148 58 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
Chris@148 59
Chris@148 60 if (m_file) {
Chris@148 61 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
Chris@148 62 m_file->close();
Chris@148 63 }
Chris@148 64 delete m_file;
Chris@148 65 }
Chris@148 66
Chris@148 67 bool
Chris@148 68 CSVFileReader::isOK() const
Chris@148 69 {
Chris@148 70 return (m_file != 0);
Chris@148 71 }
Chris@148 72
Chris@148 73 QString
Chris@148 74 CSVFileReader::getError() const
Chris@148 75 {
Chris@148 76 return m_error;
Chris@148 77 }
Chris@148 78
Chris@148 79 Model *
Chris@148 80 CSVFileReader::load() const
Chris@148 81 {
Chris@148 82 if (!m_file) return 0;
Chris@392 83 /*!!!
Chris@148 84 CSVFormatDialog *dialog = new CSVFormatDialog
Chris@148 85 (0, m_file, m_mainModelSampleRate);
Chris@148 86
Chris@148 87 if (dialog->exec() == QDialog::Rejected) {
Chris@148 88 delete dialog;
Chris@308 89 throw DataFileReaderFactory::ImportCancelled;
Chris@148 90 }
Chris@392 91 */
Chris@148 92
Chris@392 93 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 94 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@392 95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392 96 QString separator = m_format.getSeparator();
Chris@392 97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
Chris@392 98 size_t sampleRate = m_format.getSampleRate();
Chris@392 99 size_t windowSize = m_format.getWindowSize();
Chris@148 100
Chris@392 101 if (timingType == CSVFormat::ExplicitTiming) {
Chris@148 102 windowSize = 1;
Chris@392 103 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 104 sampleRate = m_mainModelSampleRate;
Chris@148 105 }
Chris@148 106 }
Chris@148 107
Chris@148 108 SparseOneDimensionalModel *model1 = 0;
Chris@148 109 SparseTimeValueModel *model2 = 0;
Chris@152 110 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 111 Model *model = 0;
Chris@148 112
Chris@148 113 QTextStream in(m_file);
Chris@148 114 in.seek(0);
Chris@148 115
Chris@148 116 unsigned int warnings = 0, warnLimit = 10;
Chris@148 117 unsigned int lineno = 0;
Chris@148 118
Chris@148 119 float min = 0.0, max = 0.0;
Chris@148 120
Chris@148 121 size_t frameNo = 0;
Chris@148 122
Chris@148 123 while (!in.atEnd()) {
Chris@148 124
Chris@283 125 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 126 // CR-only line endings. Why did they bother making the class
Chris@283 127 // cope with more than one sort of line ending, if it still
Chris@283 128 // can't be configured to cope with all the common sorts?
Chris@148 129
Chris@283 130 // For the time being we'll deal with this case (which is
Chris@283 131 // relatively uncommon for us, but still necessary to handle)
Chris@283 132 // by reading the entire file using a single readLine, and
Chris@283 133 // splitting it. For CR and CR/LF line endings this will just
Chris@283 134 // read a line at a time, and that's obviously OK.
Chris@148 135
Chris@283 136 QString chunk = in.readLine();
Chris@283 137 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 138
Chris@283 139 for (size_t li = 0; li < lines.size(); ++li) {
Chris@148 140
Chris@283 141 QString line = lines[li];
Chris@148 142
Chris@283 143 if (line.startsWith("#")) continue;
Chris@283 144
Chris@390 145 QStringList list = line.split(separator, behaviour);
Chris@283 146
Chris@283 147 if (!model) {
Chris@283 148
Chris@283 149 switch (modelType) {
Chris@283 150
Chris@392 151 case CSVFormat::OneDimensionalModel:
Chris@283 152 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 153 model = model1;
Chris@283 154 break;
Chris@148 155
Chris@392 156 case CSVFormat::TwoDimensionalModel:
Chris@283 157 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 158 model = model2;
Chris@283 159 break;
Chris@148 160
Chris@392 161 case CSVFormat::ThreeDimensionalModel:
Chris@535 162 model3 = new EditableDenseThreeDimensionalModel
Chris@535 163 (sampleRate,
Chris@535 164 windowSize,
Chris@535 165 list.size(),
Chris@535 166 EditableDenseThreeDimensionalModel::NoCompression);
Chris@283 167 model = model3;
Chris@283 168 break;
Chris@283 169 }
Chris@283 170 }
Chris@148 171
Chris@283 172 QStringList tidyList;
Chris@390 173 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@148 174
Chris@283 175 for (int i = 0; i < list.size(); ++i) {
Chris@148 176
Chris@283 177 QString s(list[i].trimmed());
Chris@148 178
Chris@283 179 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
Chris@283 180 s = s.mid(1, s.length() - 2);
Chris@283 181 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
Chris@283 182 s = s.mid(1, s.length() - 2);
Chris@283 183 }
Chris@148 184
Chris@392 185 if (i == 0 && timingType == CSVFormat::ExplicitTiming) {
Chris@148 186
Chris@283 187 bool ok = false;
Chris@283 188 QString numeric = s;
Chris@283 189 numeric.remove(nonNumericRx);
Chris@148 190
Chris@392 191 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 192
Chris@283 193 double time = numeric.toDouble(&ok);
Chris@491 194 frameNo = int(time * sampleRate + 0.5);
Chris@148 195
Chris@283 196 } else {
Chris@148 197
Chris@283 198 frameNo = numeric.toInt(&ok);
Chris@148 199
Chris@392 200 if (timeUnits == CSVFormat::TimeWindows) {
Chris@283 201 frameNo *= windowSize;
Chris@283 202 }
Chris@283 203 }
Chris@148 204
Chris@283 205 if (!ok) {
Chris@283 206 if (warnings < warnLimit) {
Chris@283 207 std::cerr << "WARNING: CSVFileReader::load: "
Chris@283 208 << "Bad time format (\"" << s.toStdString()
Chris@283 209 << "\") in data line "
Chris@491 210 << lineno+1 << ":" << std::endl;
Chris@283 211 std::cerr << line.toStdString() << std::endl;
Chris@283 212 } else if (warnings == warnLimit) {
Chris@283 213 std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 214 }
Chris@283 215 ++warnings;
Chris@283 216 }
Chris@283 217 } else {
Chris@283 218 tidyList.push_back(s);
Chris@283 219 }
Chris@283 220 }
Chris@148 221
Chris@392 222 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 223
Chris@283 224 SparseOneDimensionalModel::Point point
Chris@283 225 (frameNo,
Chris@283 226 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
Chris@491 227 QString("%1").arg(lineno+1));
Chris@148 228
Chris@283 229 model1->addPoint(point);
Chris@148 230
Chris@392 231 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 232
Chris@283 233 SparseTimeValueModel::Point point
Chris@283 234 (frameNo,
Chris@283 235 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
Chris@491 236 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1));
Chris@148 237
Chris@283 238 model2->addPoint(point);
Chris@148 239
Chris@392 240 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 241
Chris@283 242 DenseThreeDimensionalModel::Column values;
Chris@148 243
Chris@283 244 for (int i = 0; i < tidyList.size(); ++i) {
Chris@148 245
Chris@283 246 bool ok = false;
Chris@283 247 float value = list[i].toFloat(&ok);
Chris@283 248 values.push_back(value);
Chris@148 249
Chris@283 250 if ((lineno == 0 && i == 0) || value < min) min = value;
Chris@283 251 if ((lineno == 0 && i == 0) || value > max) max = value;
Chris@148 252
Chris@283 253 if (!ok) {
Chris@283 254 if (warnings < warnLimit) {
Chris@283 255 std::cerr << "WARNING: CSVFileReader::load: "
Chris@390 256 << "Non-numeric value \""
Chris@390 257 << list[i].toStdString()
Chris@491 258 << "\" in data line " << lineno+1
Chris@283 259 << ":" << std::endl;
Chris@283 260 std::cerr << line.toStdString() << std::endl;
Chris@283 261 ++warnings;
Chris@283 262 } else if (warnings == warnLimit) {
Chris@390 263 // std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 264 }
Chris@283 265 }
Chris@283 266 }
Chris@148 267
Chris@390 268 // std::cerr << "Setting bin values for count " << lineno << ", frame "
Chris@390 269 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
Chris@148 270
Chris@283 271 model3->setColumn(frameNo / model3->getResolution(), values);
Chris@283 272 }
Chris@148 273
Chris@283 274 ++lineno;
Chris@392 275 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 276 list.size() == 0) {
Chris@283 277 frameNo += windowSize;
Chris@283 278 }
Chris@283 279 }
Chris@148 280 }
Chris@148 281
Chris@392 282 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 283 model3->setMinimumLevel(min);
Chris@148 284 model3->setMaximumLevel(max);
Chris@148 285 }
Chris@148 286
Chris@148 287 return model;
Chris@148 288 }
Chris@148 289