Chris@148: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@148: Chris@148: /* Chris@148: Sonic Visualiser Chris@148: An audio file viewer and annotation editor. Chris@148: Centre for Digital Music, Queen Mary, University of London. Chris@148: This file copyright 2006 Chris Cannam. Chris@148: Chris@148: This program is free software; you can redistribute it and/or Chris@148: modify it under the terms of the GNU General Public License as Chris@148: published by the Free Software Foundation; either version 2 of the Chris@148: License, or (at your option) any later version. See the file Chris@148: COPYING included with this distribution for more information. Chris@148: */ Chris@148: Chris@148: #include "CSVFileReader.h" Chris@148: Chris@150: #include "model/Model.h" Chris@148: #include "base/RealTime.h" Chris@148: #include "model/SparseOneDimensionalModel.h" Chris@148: #include "model/SparseTimeValueModel.h" Chris@152: #include "model/EditableDenseThreeDimensionalModel.h" Chris@628: #include "model/RegionModel.h" Chris@308: #include "DataFileReaderFactory.h" Chris@148: Chris@148: #include Chris@148: #include Chris@148: #include Chris@148: #include Chris@148: #include Chris@148: Chris@148: #include Chris@628: #include Chris@148: Chris@392: CSVFileReader::CSVFileReader(QString path, CSVFormat format, Chris@392: size_t mainModelSampleRate) : Chris@392: m_format(format), Chris@148: m_file(0), Chris@148: m_mainModelSampleRate(mainModelSampleRate) Chris@148: { Chris@148: m_file = new QFile(path); Chris@148: bool good = false; Chris@148: Chris@148: if (!m_file->exists()) { Chris@148: m_error = QFile::tr("File \"%1\" does not exist").arg(path); Chris@148: } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) { Chris@148: m_error = QFile::tr("Failed to open file \"%1\"").arg(path); Chris@148: } else { Chris@148: good = true; Chris@148: } Chris@148: Chris@148: if (!good) { Chris@148: delete m_file; Chris@148: m_file = 0; Chris@148: } Chris@148: } Chris@148: Chris@148: CSVFileReader::~CSVFileReader() Chris@148: { Chris@148: std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl; Chris@148: Chris@148: if (m_file) { Chris@148: std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl; Chris@148: m_file->close(); Chris@148: } Chris@148: delete m_file; Chris@148: } Chris@148: Chris@148: bool Chris@148: CSVFileReader::isOK() const Chris@148: { Chris@148: return (m_file != 0); Chris@148: } Chris@148: Chris@148: QString Chris@148: CSVFileReader::getError() const Chris@148: { Chris@148: return m_error; Chris@148: } Chris@148: Chris@148: Model * Chris@148: CSVFileReader::load() const Chris@148: { Chris@148: if (!m_file) return 0; Chris@392: /*!!! Chris@148: CSVFormatDialog *dialog = new CSVFormatDialog Chris@148: (0, m_file, m_mainModelSampleRate); Chris@148: Chris@148: if (dialog->exec() == QDialog::Rejected) { Chris@148: delete dialog; Chris@308: throw DataFileReaderFactory::ImportCancelled; Chris@148: } Chris@392: */ Chris@148: Chris@628: CSVFormat::ModelType modelType = m_format.getModelType(); Chris@392: CSVFormat::TimingType timingType = m_format.getTimingType(); Chris@628: CSVFormat::DurationType durationType = m_format.getDurationType(); Chris@628: CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); Chris@392: QString separator = m_format.getSeparator(); Chris@392: QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); Chris@392: size_t sampleRate = m_format.getSampleRate(); Chris@392: size_t windowSize = m_format.getWindowSize(); Chris@148: Chris@392: if (timingType == CSVFormat::ExplicitTiming) { Chris@611: if (modelType == CSVFormat::ThreeDimensionalModel) { Chris@611: // This will be overridden later if more than one line Chris@611: // appears in our file, but we want to choose a default Chris@611: // that's likely to be visible Chris@611: windowSize = 1024; Chris@611: } else { Chris@611: windowSize = 1; Chris@611: } Chris@392: if (timeUnits == CSVFormat::TimeSeconds) { Chris@148: sampleRate = m_mainModelSampleRate; Chris@148: } Chris@148: } Chris@148: Chris@148: SparseOneDimensionalModel *model1 = 0; Chris@148: SparseTimeValueModel *model2 = 0; Chris@628: RegionModel *model2a = 0; Chris@152: EditableDenseThreeDimensionalModel *model3 = 0; Chris@148: Model *model = 0; Chris@148: Chris@148: QTextStream in(m_file); Chris@148: in.seek(0); Chris@148: Chris@148: unsigned int warnings = 0, warnLimit = 10; Chris@148: unsigned int lineno = 0; Chris@148: Chris@148: float min = 0.0, max = 0.0; Chris@148: Chris@148: size_t frameNo = 0; Chris@628: size_t duration = 0; Chris@611: size_t startFrame = 0; // for calculation of dense model resolution Chris@148: Chris@628: std::map labelValueMap; Chris@628: float syntheticMax = 0.f; Chris@628: Chris@148: while (!in.atEnd()) { Chris@148: Chris@283: // QTextStream's readLine doesn't cope with old-style Mac Chris@283: // CR-only line endings. Why did they bother making the class Chris@283: // cope with more than one sort of line ending, if it still Chris@283: // can't be configured to cope with all the common sorts? Chris@148: Chris@283: // For the time being we'll deal with this case (which is Chris@283: // relatively uncommon for us, but still necessary to handle) Chris@283: // by reading the entire file using a single readLine, and Chris@283: // splitting it. For CR and CR/LF line endings this will just Chris@283: // read a line at a time, and that's obviously OK. Chris@148: Chris@283: QString chunk = in.readLine(); Chris@283: QStringList lines = chunk.split('\r', QString::SkipEmptyParts); Chris@283: Chris@283: for (size_t li = 0; li < lines.size(); ++li) { Chris@148: Chris@283: QString line = lines[li]; Chris@148: Chris@283: if (line.startsWith("#")) continue; Chris@283: Chris@390: QStringList list = line.split(separator, behaviour); Chris@283: Chris@283: if (!model) { Chris@283: Chris@283: switch (modelType) { Chris@283: Chris@392: case CSVFormat::OneDimensionalModel: Chris@283: model1 = new SparseOneDimensionalModel(sampleRate, windowSize); Chris@283: model = model1; Chris@283: break; Chris@148: Chris@392: case CSVFormat::TwoDimensionalModel: Chris@283: model2 = new SparseTimeValueModel(sampleRate, windowSize, false); Chris@283: model = model2; Chris@283: break; Chris@148: Chris@628: case CSVFormat::TwoDimensionalModelWithDuration: Chris@628: model2a = new RegionModel(sampleRate, windowSize, false); Chris@628: model = model2a; Chris@628: break; Chris@628: Chris@392: case CSVFormat::ThreeDimensionalModel: Chris@535: model3 = new EditableDenseThreeDimensionalModel Chris@535: (sampleRate, Chris@535: windowSize, Chris@535: list.size(), Chris@535: EditableDenseThreeDimensionalModel::NoCompression); Chris@283: model = model3; Chris@283: break; Chris@283: } Chris@283: } Chris@148: Chris@283: QStringList tidyList; Chris@390: QRegExp nonNumericRx("[^0-9eE.,+-]"); Chris@148: Chris@628: float value = 0.f; Chris@628: Chris@283: for (int i = 0; i < list.size(); ++i) { Chris@148: Chris@283: QString s(list[i].trimmed()); Chris@148: Chris@283: if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { Chris@283: s = s.mid(1, s.length() - 2); Chris@283: } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { Chris@283: s = s.mid(1, s.length() - 2); Chris@283: } Chris@148: Chris@628: if (timingType == CSVFormat::ExplicitTiming) { Chris@148: Chris@628: size_t calculatedFrame = 0; Chris@628: Chris@628: if (i == 0 || Chris@628: (i == 1 && Chris@628: modelType == CSVFormat::TwoDimensionalModelWithDuration)) { Chris@628: Chris@628: bool ok = false; Chris@628: QString numeric = s; Chris@628: numeric.remove(nonNumericRx); Chris@628: Chris@628: if (timeUnits == CSVFormat::TimeSeconds) { Chris@628: Chris@628: double time = numeric.toDouble(&ok); Chris@628: calculatedFrame = int(time * sampleRate + 0.5); Chris@628: Chris@628: } else { Chris@628: Chris@628: calculatedFrame = numeric.toInt(&ok); Chris@628: Chris@628: if (timeUnits == CSVFormat::TimeWindows) { Chris@628: calculatedFrame *= windowSize; Chris@628: } Chris@628: } Chris@628: Chris@628: if (!ok) { Chris@628: if (warnings < warnLimit) { Chris@628: std::cerr << "WARNING: CSVFileReader::load: " Chris@628: << "Bad time format (\"" << s.toStdString() Chris@628: << "\") in data line " Chris@628: << lineno+1 << ":" << std::endl; Chris@628: std::cerr << line.toStdString() << std::endl; Chris@628: } else if (warnings == warnLimit) { Chris@628: std::cerr << "WARNING: Too many warnings" << std::endl; Chris@628: } Chris@628: ++warnings; Chris@628: } Chris@628: Chris@628: if (i == 0) frameNo = calculatedFrame; Chris@628: else { Chris@628: if (durationType == CSVFormat::EndTimes) { Chris@628: duration = calculatedFrame - frameNo; Chris@628: } else { Chris@628: duration = calculatedFrame; Chris@628: } Chris@628: } Chris@628: Chris@628: continue; Chris@628: } Chris@628: } Chris@628: Chris@628: if ((i == 1 && Chris@628: modelType == CSVFormat::TwoDimensionalModel) || Chris@628: (i == 2 && Chris@628: modelType == CSVFormat::TwoDimensionalModelWithDuration)) { Chris@283: bool ok = false; Chris@628: value = s.toFloat(&ok); Chris@628: if (!ok) { Chris@628: // cf. RDFImporter::fillModel Chris@628: if (labelValueMap.find(s) == labelValueMap.end()) { Chris@628: syntheticMax = syntheticMax + 1.f; Chris@628: labelValueMap[s] = syntheticMax; Chris@628: } Chris@628: value = labelValueMap[s]; Chris@628: } else { Chris@628: if (value > syntheticMax) syntheticMax = value; Chris@628: } Chris@628: if (i + 1 == list.size()) { Chris@628: // keep text around for use as label (none other given) Chris@628: tidyList.push_back(s); Chris@628: } Chris@628: continue; Chris@628: } Chris@148: Chris@628: tidyList.push_back(s); Chris@283: } Chris@148: Chris@392: if (modelType == CSVFormat::OneDimensionalModel) { Chris@148: Chris@283: SparseOneDimensionalModel::Point point Chris@283: (frameNo, Chris@283: tidyList.size() > 0 ? tidyList[tidyList.size()-1] : Chris@491: QString("%1").arg(lineno+1)); Chris@148: Chris@283: model1->addPoint(point); Chris@148: Chris@392: } else if (modelType == CSVFormat::TwoDimensionalModel) { Chris@148: Chris@283: SparseTimeValueModel::Point point Chris@283: (frameNo, Chris@628: value, Chris@628: tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1)); Chris@148: Chris@283: model2->addPoint(point); Chris@148: Chris@628: } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { Chris@628: Chris@628: RegionModel::Point point Chris@628: (frameNo, Chris@628: value, Chris@628: duration, Chris@628: tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1)); Chris@628: Chris@628: model2a->addPoint(point); Chris@628: Chris@392: } else if (modelType == CSVFormat::ThreeDimensionalModel) { Chris@148: Chris@283: DenseThreeDimensionalModel::Column values; Chris@148: Chris@283: for (int i = 0; i < tidyList.size(); ++i) { Chris@148: Chris@283: bool ok = false; Chris@283: float value = list[i].toFloat(&ok); Chris@611: Chris@611: if (i > 0 || timingType != CSVFormat::ExplicitTiming) { Chris@611: values.push_back(value); Chris@611: } Chris@148: Chris@611: bool firstEver = (lineno == 0 && i == 0); Chris@611: Chris@611: if (firstEver || value < min) min = value; Chris@611: if (firstEver || value > max) max = value; Chris@611: Chris@611: if (firstEver) { Chris@611: startFrame = frameNo; Chris@611: model3->setStartFrame(startFrame); Chris@611: } else if (lineno == 1 && Chris@611: timingType == CSVFormat::ExplicitTiming) { Chris@611: model3->setResolution(frameNo - startFrame); Chris@611: } Chris@148: Chris@283: if (!ok) { Chris@283: if (warnings < warnLimit) { Chris@283: std::cerr << "WARNING: CSVFileReader::load: " Chris@390: << "Non-numeric value \"" Chris@390: << list[i].toStdString() Chris@491: << "\" in data line " << lineno+1 Chris@283: << ":" << std::endl; Chris@283: std::cerr << line.toStdString() << std::endl; Chris@283: ++warnings; Chris@283: } else if (warnings == warnLimit) { Chris@390: // std::cerr << "WARNING: Too many warnings" << std::endl; Chris@283: } Chris@283: } Chris@283: } Chris@148: Chris@390: // std::cerr << "Setting bin values for count " << lineno << ", frame " Chris@390: // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; Chris@148: Chris@611: model3->setColumn(lineno, values); Chris@283: } Chris@148: Chris@283: ++lineno; Chris@392: if (timingType == CSVFormat::ImplicitTiming || Chris@283: list.size() == 0) { Chris@283: frameNo += windowSize; Chris@283: } Chris@283: } Chris@148: } Chris@148: Chris@392: if (modelType == CSVFormat::ThreeDimensionalModel) { Chris@148: model3->setMinimumLevel(min); Chris@148: model3->setMaximumLevel(max); Chris@148: } Chris@148: Chris@148: return model; Chris@148: } Chris@148: