annotate data/fileio/CSVFileReader.cpp @ 498:fdf5930b7ccc

* Bring FeatureWriter and RDFFeatureWriter into the fold (from Runner) so that we can use them to export features from SV as well
author Chris Cannam
date Fri, 28 Nov 2008 13:47:11 +0000
parents 6f8ee19984ad
children 3ccf48fb81d6
rev   line source
Chris@148 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@148 2
Chris@148 3 /*
Chris@148 4 Sonic Visualiser
Chris@148 5 An audio file viewer and annotation editor.
Chris@148 6 Centre for Digital Music, Queen Mary, University of London.
Chris@148 7 This file copyright 2006 Chris Cannam.
Chris@148 8
Chris@148 9 This program is free software; you can redistribute it and/or
Chris@148 10 modify it under the terms of the GNU General Public License as
Chris@148 11 published by the Free Software Foundation; either version 2 of the
Chris@148 12 License, or (at your option) any later version. See the file
Chris@148 13 COPYING included with this distribution for more information.
Chris@148 14 */
Chris@148 15
Chris@148 16 #include "CSVFileReader.h"
Chris@148 17
Chris@150 18 #include "model/Model.h"
Chris@148 19 #include "base/RealTime.h"
Chris@148 20 #include "model/SparseOneDimensionalModel.h"
Chris@148 21 #include "model/SparseTimeValueModel.h"
Chris@152 22 #include "model/EditableDenseThreeDimensionalModel.h"
Chris@308 23 #include "DataFileReaderFactory.h"
Chris@148 24
Chris@148 25 #include <QFile>
Chris@148 26 #include <QString>
Chris@148 27 #include <QRegExp>
Chris@148 28 #include <QStringList>
Chris@148 29 #include <QTextStream>
Chris@148 30
Chris@148 31 #include <iostream>
Chris@148 32
Chris@392 33 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392 34 size_t mainModelSampleRate) :
Chris@392 35 m_format(format),
Chris@148 36 m_file(0),
Chris@148 37 m_mainModelSampleRate(mainModelSampleRate)
Chris@148 38 {
Chris@148 39 m_file = new QFile(path);
Chris@148 40 bool good = false;
Chris@148 41
Chris@148 42 if (!m_file->exists()) {
Chris@148 43 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148 44 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148 45 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148 46 } else {
Chris@148 47 good = true;
Chris@148 48 }
Chris@148 49
Chris@148 50 if (!good) {
Chris@148 51 delete m_file;
Chris@148 52 m_file = 0;
Chris@148 53 }
Chris@148 54 }
Chris@148 55
Chris@148 56 CSVFileReader::~CSVFileReader()
Chris@148 57 {
Chris@148 58 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
Chris@148 59
Chris@148 60 if (m_file) {
Chris@148 61 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
Chris@148 62 m_file->close();
Chris@148 63 }
Chris@148 64 delete m_file;
Chris@148 65 }
Chris@148 66
Chris@148 67 bool
Chris@148 68 CSVFileReader::isOK() const
Chris@148 69 {
Chris@148 70 return (m_file != 0);
Chris@148 71 }
Chris@148 72
Chris@148 73 QString
Chris@148 74 CSVFileReader::getError() const
Chris@148 75 {
Chris@148 76 return m_error;
Chris@148 77 }
Chris@148 78
Chris@148 79 Model *
Chris@148 80 CSVFileReader::load() const
Chris@148 81 {
Chris@148 82 if (!m_file) return 0;
Chris@392 83 /*!!!
Chris@148 84 CSVFormatDialog *dialog = new CSVFormatDialog
Chris@148 85 (0, m_file, m_mainModelSampleRate);
Chris@148 86
Chris@148 87 if (dialog->exec() == QDialog::Rejected) {
Chris@148 88 delete dialog;
Chris@308 89 throw DataFileReaderFactory::ImportCancelled;
Chris@148 90 }
Chris@392 91 */
Chris@148 92
Chris@392 93 CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392 94 CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@392 95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392 96 QString separator = m_format.getSeparator();
Chris@392 97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
Chris@392 98 size_t sampleRate = m_format.getSampleRate();
Chris@392 99 size_t windowSize = m_format.getWindowSize();
Chris@148 100
Chris@392 101 if (timingType == CSVFormat::ExplicitTiming) {
Chris@148 102 windowSize = 1;
Chris@392 103 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 104 sampleRate = m_mainModelSampleRate;
Chris@148 105 }
Chris@148 106 }
Chris@148 107
Chris@148 108 SparseOneDimensionalModel *model1 = 0;
Chris@148 109 SparseTimeValueModel *model2 = 0;
Chris@152 110 EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148 111 Model *model = 0;
Chris@148 112
Chris@148 113 QTextStream in(m_file);
Chris@148 114 in.seek(0);
Chris@148 115
Chris@148 116 unsigned int warnings = 0, warnLimit = 10;
Chris@148 117 unsigned int lineno = 0;
Chris@148 118
Chris@148 119 float min = 0.0, max = 0.0;
Chris@148 120
Chris@148 121 size_t frameNo = 0;
Chris@148 122
Chris@148 123 while (!in.atEnd()) {
Chris@148 124
Chris@283 125 // QTextStream's readLine doesn't cope with old-style Mac
Chris@283 126 // CR-only line endings. Why did they bother making the class
Chris@283 127 // cope with more than one sort of line ending, if it still
Chris@283 128 // can't be configured to cope with all the common sorts?
Chris@148 129
Chris@283 130 // For the time being we'll deal with this case (which is
Chris@283 131 // relatively uncommon for us, but still necessary to handle)
Chris@283 132 // by reading the entire file using a single readLine, and
Chris@283 133 // splitting it. For CR and CR/LF line endings this will just
Chris@283 134 // read a line at a time, and that's obviously OK.
Chris@148 135
Chris@283 136 QString chunk = in.readLine();
Chris@283 137 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283 138
Chris@283 139 for (size_t li = 0; li < lines.size(); ++li) {
Chris@148 140
Chris@283 141 QString line = lines[li];
Chris@148 142
Chris@283 143 if (line.startsWith("#")) continue;
Chris@283 144
Chris@390 145 QStringList list = line.split(separator, behaviour);
Chris@283 146
Chris@283 147 if (!model) {
Chris@283 148
Chris@283 149 switch (modelType) {
Chris@283 150
Chris@392 151 case CSVFormat::OneDimensionalModel:
Chris@283 152 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283 153 model = model1;
Chris@283 154 break;
Chris@148 155
Chris@392 156 case CSVFormat::TwoDimensionalModel:
Chris@283 157 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283 158 model = model2;
Chris@283 159 break;
Chris@148 160
Chris@392 161 case CSVFormat::ThreeDimensionalModel:
Chris@283 162 model3 = new EditableDenseThreeDimensionalModel(sampleRate,
Chris@283 163 windowSize,
Chris@283 164 list.size());
Chris@283 165 model = model3;
Chris@283 166 break;
Chris@283 167 }
Chris@283 168 }
Chris@148 169
Chris@283 170 QStringList tidyList;
Chris@390 171 QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@148 172
Chris@283 173 for (int i = 0; i < list.size(); ++i) {
Chris@148 174
Chris@283 175 QString s(list[i].trimmed());
Chris@148 176
Chris@283 177 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
Chris@283 178 s = s.mid(1, s.length() - 2);
Chris@283 179 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
Chris@283 180 s = s.mid(1, s.length() - 2);
Chris@283 181 }
Chris@148 182
Chris@392 183 if (i == 0 && timingType == CSVFormat::ExplicitTiming) {
Chris@148 184
Chris@283 185 bool ok = false;
Chris@283 186 QString numeric = s;
Chris@283 187 numeric.remove(nonNumericRx);
Chris@148 188
Chris@392 189 if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148 190
Chris@283 191 double time = numeric.toDouble(&ok);
Chris@491 192 frameNo = int(time * sampleRate + 0.5);
Chris@148 193
Chris@283 194 } else {
Chris@148 195
Chris@283 196 frameNo = numeric.toInt(&ok);
Chris@148 197
Chris@392 198 if (timeUnits == CSVFormat::TimeWindows) {
Chris@283 199 frameNo *= windowSize;
Chris@283 200 }
Chris@283 201 }
Chris@148 202
Chris@283 203 if (!ok) {
Chris@283 204 if (warnings < warnLimit) {
Chris@283 205 std::cerr << "WARNING: CSVFileReader::load: "
Chris@283 206 << "Bad time format (\"" << s.toStdString()
Chris@283 207 << "\") in data line "
Chris@491 208 << lineno+1 << ":" << std::endl;
Chris@283 209 std::cerr << line.toStdString() << std::endl;
Chris@283 210 } else if (warnings == warnLimit) {
Chris@283 211 std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 212 }
Chris@283 213 ++warnings;
Chris@283 214 }
Chris@283 215 } else {
Chris@283 216 tidyList.push_back(s);
Chris@283 217 }
Chris@283 218 }
Chris@148 219
Chris@392 220 if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148 221
Chris@283 222 SparseOneDimensionalModel::Point point
Chris@283 223 (frameNo,
Chris@283 224 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
Chris@491 225 QString("%1").arg(lineno+1));
Chris@148 226
Chris@283 227 model1->addPoint(point);
Chris@148 228
Chris@392 229 } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148 230
Chris@283 231 SparseTimeValueModel::Point point
Chris@283 232 (frameNo,
Chris@283 233 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
Chris@491 234 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1));
Chris@148 235
Chris@283 236 model2->addPoint(point);
Chris@148 237
Chris@392 238 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 239
Chris@283 240 DenseThreeDimensionalModel::Column values;
Chris@148 241
Chris@283 242 for (int i = 0; i < tidyList.size(); ++i) {
Chris@148 243
Chris@283 244 bool ok = false;
Chris@283 245 float value = list[i].toFloat(&ok);
Chris@283 246 values.push_back(value);
Chris@148 247
Chris@283 248 if ((lineno == 0 && i == 0) || value < min) min = value;
Chris@283 249 if ((lineno == 0 && i == 0) || value > max) max = value;
Chris@148 250
Chris@283 251 if (!ok) {
Chris@283 252 if (warnings < warnLimit) {
Chris@283 253 std::cerr << "WARNING: CSVFileReader::load: "
Chris@390 254 << "Non-numeric value \""
Chris@390 255 << list[i].toStdString()
Chris@491 256 << "\" in data line " << lineno+1
Chris@283 257 << ":" << std::endl;
Chris@283 258 std::cerr << line.toStdString() << std::endl;
Chris@283 259 ++warnings;
Chris@283 260 } else if (warnings == warnLimit) {
Chris@390 261 // std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283 262 }
Chris@283 263 }
Chris@283 264 }
Chris@148 265
Chris@390 266 // std::cerr << "Setting bin values for count " << lineno << ", frame "
Chris@390 267 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
Chris@148 268
Chris@283 269 model3->setColumn(frameNo / model3->getResolution(), values);
Chris@283 270 }
Chris@148 271
Chris@283 272 ++lineno;
Chris@392 273 if (timingType == CSVFormat::ImplicitTiming ||
Chris@283 274 list.size() == 0) {
Chris@283 275 frameNo += windowSize;
Chris@283 276 }
Chris@283 277 }
Chris@148 278 }
Chris@148 279
Chris@392 280 if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148 281 model3->setMinimumLevel(min);
Chris@148 282 model3->setMaximumLevel(max);
Chris@148 283 }
Chris@148 284
Chris@148 285 return model;
Chris@148 286 }
Chris@148 287