Chris@148: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@148: Chris@148: /* Chris@148: Sonic Visualiser Chris@148: An audio file viewer and annotation editor. Chris@148: Centre for Digital Music, Queen Mary, University of London. Chris@148: This file copyright 2006 Chris Cannam. Chris@148: Chris@148: This program is free software; you can redistribute it and/or Chris@148: modify it under the terms of the GNU General Public License as Chris@148: published by the Free Software Foundation; either version 2 of the Chris@148: License, or (at your option) any later version. See the file Chris@148: COPYING included with this distribution for more information. Chris@148: */ Chris@148: Chris@148: #include "CSVFileReader.h" Chris@148: Chris@150: #include "model/Model.h" Chris@148: #include "base/RealTime.h" Chris@631: #include "base/StringBits.h" Chris@148: #include "model/SparseOneDimensionalModel.h" Chris@148: #include "model/SparseTimeValueModel.h" Chris@152: #include "model/EditableDenseThreeDimensionalModel.h" Chris@628: #include "model/RegionModel.h" Chris@308: #include "DataFileReaderFactory.h" Chris@148: Chris@148: #include Chris@148: #include Chris@148: #include Chris@148: #include Chris@148: #include Chris@148: Chris@148: #include Chris@628: #include Chris@148: Chris@392: CSVFileReader::CSVFileReader(QString path, CSVFormat format, Chris@392: size_t mainModelSampleRate) : Chris@392: m_format(format), Chris@148: m_file(0), Chris@631: m_warnings(0), Chris@148: m_mainModelSampleRate(mainModelSampleRate) Chris@148: { Chris@148: m_file = new QFile(path); Chris@148: bool good = false; Chris@148: Chris@148: if (!m_file->exists()) { Chris@148: m_error = QFile::tr("File \"%1\" does not exist").arg(path); Chris@148: } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) { Chris@148: m_error = QFile::tr("Failed to open file \"%1\"").arg(path); Chris@148: } else { Chris@148: good = true; Chris@148: } Chris@148: Chris@148: if (!good) { Chris@148: delete m_file; Chris@148: m_file = 0; Chris@148: } Chris@148: } Chris@148: Chris@148: CSVFileReader::~CSVFileReader() Chris@148: { Chris@690: SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl; Chris@148: Chris@148: if (m_file) { Chris@690: SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl; Chris@148: m_file->close(); Chris@148: } Chris@148: delete m_file; Chris@148: } Chris@148: Chris@148: bool Chris@148: CSVFileReader::isOK() const Chris@148: { Chris@148: return (m_file != 0); Chris@148: } Chris@148: Chris@148: QString Chris@148: CSVFileReader::getError() const Chris@148: { Chris@148: return m_error; Chris@148: } Chris@148: Chris@631: size_t Chris@631: CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate, Chris@631: size_t windowSize) const Chris@631: { Chris@631: QRegExp nonNumericRx("[^0-9eE.,+-]"); Chris@631: unsigned int warnLimit = 10; Chris@631: Chris@631: CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); Chris@631: Chris@631: size_t calculatedFrame = 0; Chris@631: Chris@631: bool ok = false; Chris@631: QString numeric = s; Chris@631: numeric.remove(nonNumericRx); Chris@631: Chris@631: if (timeUnits == CSVFormat::TimeSeconds) { Chris@631: Chris@631: double time = numeric.toDouble(&ok); Chris@631: if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok); Chris@631: calculatedFrame = int(time * sampleRate + 0.5); Chris@631: Chris@631: } else { Chris@631: Chris@631: long n = numeric.toLong(&ok); Chris@631: if (n >= 0) calculatedFrame = n; Chris@631: Chris@631: if (timeUnits == CSVFormat::TimeWindows) { Chris@631: calculatedFrame *= windowSize; Chris@631: } Chris@631: } Chris@631: Chris@631: if (!ok) { Chris@631: if (m_warnings < warnLimit) { Chris@631: std::cerr << "WARNING: CSVFileReader::load: " Chris@631: << "Bad time format (\"" << s.toStdString() Chris@631: << "\") in data line " Chris@631: << lineno+1 << std::endl; Chris@631: } else if (m_warnings == warnLimit) { Chris@631: std::cerr << "WARNING: Too many warnings" << std::endl; Chris@631: } Chris@631: ++m_warnings; Chris@631: } Chris@631: Chris@631: return calculatedFrame; Chris@631: } Chris@631: Chris@148: Model * Chris@148: CSVFileReader::load() const Chris@148: { Chris@148: if (!m_file) return 0; Chris@148: Chris@628: CSVFormat::ModelType modelType = m_format.getModelType(); Chris@392: CSVFormat::TimingType timingType = m_format.getTimingType(); Chris@628: CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); Chris@392: size_t sampleRate = m_format.getSampleRate(); Chris@392: size_t windowSize = m_format.getWindowSize(); Chris@631: QChar separator = m_format.getSeparator(); Chris@631: bool allowQuoting = m_format.getAllowQuoting(); Chris@148: Chris@392: if (timingType == CSVFormat::ExplicitTiming) { Chris@611: if (modelType == CSVFormat::ThreeDimensionalModel) { Chris@611: // This will be overridden later if more than one line Chris@611: // appears in our file, but we want to choose a default Chris@611: // that's likely to be visible Chris@611: windowSize = 1024; Chris@611: } else { Chris@611: windowSize = 1; Chris@611: } Chris@392: if (timeUnits == CSVFormat::TimeSeconds) { Chris@148: sampleRate = m_mainModelSampleRate; Chris@148: } Chris@148: } Chris@148: Chris@148: SparseOneDimensionalModel *model1 = 0; Chris@148: SparseTimeValueModel *model2 = 0; Chris@628: RegionModel *model2a = 0; Chris@152: EditableDenseThreeDimensionalModel *model3 = 0; Chris@148: Model *model = 0; Chris@148: Chris@148: QTextStream in(m_file); Chris@148: in.seek(0); Chris@148: Chris@148: unsigned int warnings = 0, warnLimit = 10; Chris@148: unsigned int lineno = 0; Chris@148: Chris@148: float min = 0.0, max = 0.0; Chris@148: Chris@148: size_t frameNo = 0; Chris@628: size_t duration = 0; Chris@631: size_t endFrame = 0; Chris@631: Chris@631: bool haveAnyValue = false; Chris@631: bool haveEndTime = false; Chris@631: Chris@611: size_t startFrame = 0; // for calculation of dense model resolution Chris@631: bool firstEverValue = true; Chris@148: Chris@631: std::map labelCountMap; Chris@631: Chris@676: int valueColumns = 0; Chris@676: for (int i = 0; i < m_format.getColumnCount(); ++i) { Chris@676: if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) { Chris@676: ++valueColumns; Chris@676: } Chris@676: } Chris@676: Chris@148: while (!in.atEnd()) { Chris@148: Chris@283: // QTextStream's readLine doesn't cope with old-style Mac Chris@283: // CR-only line endings. Why did they bother making the class Chris@283: // cope with more than one sort of line ending, if it still Chris@283: // can't be configured to cope with all the common sorts? Chris@148: Chris@283: // For the time being we'll deal with this case (which is Chris@283: // relatively uncommon for us, but still necessary to handle) Chris@283: // by reading the entire file using a single readLine, and Chris@283: // splitting it. For CR and CR/LF line endings this will just Chris@283: // read a line at a time, and that's obviously OK. Chris@148: Chris@283: QString chunk = in.readLine(); Chris@283: QStringList lines = chunk.split('\r', QString::SkipEmptyParts); Chris@283: Chris@283: for (size_t li = 0; li < lines.size(); ++li) { Chris@148: Chris@283: QString line = lines[li]; Chris@148: Chris@283: if (line.startsWith("#")) continue; Chris@283: Chris@631: QStringList list = StringBits::split(line, separator, allowQuoting); Chris@283: if (!model) { Chris@283: Chris@283: switch (modelType) { Chris@283: Chris@392: case CSVFormat::OneDimensionalModel: Chris@283: model1 = new SparseOneDimensionalModel(sampleRate, windowSize); Chris@283: model = model1; Chris@283: break; Chris@148: Chris@392: case CSVFormat::TwoDimensionalModel: Chris@283: model2 = new SparseTimeValueModel(sampleRate, windowSize, false); Chris@283: model = model2; Chris@283: break; Chris@148: Chris@628: case CSVFormat::TwoDimensionalModelWithDuration: Chris@628: model2a = new RegionModel(sampleRate, windowSize, false); Chris@628: model = model2a; Chris@628: break; Chris@628: Chris@392: case CSVFormat::ThreeDimensionalModel: Chris@535: model3 = new EditableDenseThreeDimensionalModel Chris@535: (sampleRate, Chris@535: windowSize, Chris@676: valueColumns, Chris@535: EditableDenseThreeDimensionalModel::NoCompression); Chris@283: model = model3; Chris@283: break; Chris@283: } Chris@283: } Chris@148: Chris@631: float value = 0.f; Chris@631: QString label = ""; Chris@148: Chris@631: duration = 0.f; Chris@631: haveEndTime = false; Chris@628: Chris@283: for (int i = 0; i < list.size(); ++i) { Chris@148: Chris@631: QString s = list[i]; Chris@631: Chris@631: CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i); Chris@631: Chris@631: switch (purpose) { Chris@631: Chris@631: case CSVFormat::ColumnUnknown: Chris@631: break; Chris@631: Chris@631: case CSVFormat::ColumnStartTime: Chris@631: frameNo = convertTimeValue(s, lineno, sampleRate, windowSize); Chris@631: break; Chris@631: Chris@631: case CSVFormat::ColumnEndTime: Chris@631: endFrame = convertTimeValue(s, lineno, sampleRate, windowSize); Chris@631: haveEndTime = true; Chris@631: break; Chris@631: Chris@631: case CSVFormat::ColumnDuration: Chris@631: duration = convertTimeValue(s, lineno, sampleRate, windowSize); Chris@631: break; Chris@631: Chris@631: case CSVFormat::ColumnValue: Chris@631: value = s.toFloat(); Chris@631: haveAnyValue = true; Chris@631: break; Chris@631: Chris@631: case CSVFormat::ColumnLabel: Chris@631: label = s; Chris@631: ++labelCountMap[label]; Chris@631: break; Chris@283: } Chris@631: } Chris@148: Chris@631: if (haveEndTime) { // ... calculate duration now all cols read Chris@631: if (endFrame > frameNo) { Chris@631: duration = endFrame - frameNo; Chris@628: } Chris@283: } Chris@148: Chris@392: if (modelType == CSVFormat::OneDimensionalModel) { Chris@148: Chris@631: SparseOneDimensionalModel::Point point(frameNo, label); Chris@283: model1->addPoint(point); Chris@148: Chris@392: } else if (modelType == CSVFormat::TwoDimensionalModel) { Chris@148: Chris@631: SparseTimeValueModel::Point point(frameNo, value, label); Chris@283: model2->addPoint(point); Chris@148: Chris@628: } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { Chris@628: Chris@631: RegionModel::Point point(frameNo, value, duration, label); Chris@628: model2a->addPoint(point); Chris@628: Chris@392: } else if (modelType == CSVFormat::ThreeDimensionalModel) { Chris@148: Chris@283: DenseThreeDimensionalModel::Column values; Chris@148: Chris@631: for (int i = 0; i < list.size(); ++i) { Chris@148: Chris@676: if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) { Chris@676: continue; Chris@676: } Chris@676: Chris@283: bool ok = false; Chris@283: float value = list[i].toFloat(&ok); Chris@611: Chris@676: values.push_back(value); Chris@148: Chris@631: if (firstEverValue || value < min) min = value; Chris@631: if (firstEverValue || value > max) max = value; Chris@676: Chris@631: if (firstEverValue) { Chris@611: startFrame = frameNo; Chris@611: model3->setStartFrame(startFrame); Chris@611: } else if (lineno == 1 && Chris@611: timingType == CSVFormat::ExplicitTiming) { Chris@611: model3->setResolution(frameNo - startFrame); Chris@611: } Chris@631: Chris@631: firstEverValue = false; Chris@148: Chris@283: if (!ok) { Chris@283: if (warnings < warnLimit) { Chris@283: std::cerr << "WARNING: CSVFileReader::load: " Chris@390: << "Non-numeric value \"" Chris@390: << list[i].toStdString() Chris@491: << "\" in data line " << lineno+1 Chris@283: << ":" << std::endl; Chris@686: std::cerr << line << std::endl; Chris@283: ++warnings; Chris@283: } else if (warnings == warnLimit) { Chris@390: // std::cerr << "WARNING: Too many warnings" << std::endl; Chris@283: } Chris@283: } Chris@283: } Chris@148: Chris@690: // SVDEBUG << "Setting bin values for count " << lineno << ", frame " Chris@687: // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl; Chris@148: Chris@611: model3->setColumn(lineno, values); Chris@283: } Chris@148: Chris@283: ++lineno; Chris@392: if (timingType == CSVFormat::ImplicitTiming || Chris@283: list.size() == 0) { Chris@283: frameNo += windowSize; Chris@283: } Chris@283: } Chris@148: } Chris@148: Chris@631: if (!haveAnyValue) { Chris@631: if (model2a) { Chris@631: // assign values for regions based on label frequency; we Chris@631: // have this in our labelCountMap, sort of Chris@631: Chris@631: std::map > countLabelValueMap; Chris@631: for (std::map::iterator i = labelCountMap.begin(); Chris@631: i != labelCountMap.end(); ++i) { Chris@631: countLabelValueMap[i->second][i->first] = 0.f; Chris@631: } Chris@631: Chris@631: float v = 0.f; Chris@631: for (std::map >::iterator i = Chris@631: countLabelValueMap.end(); i != countLabelValueMap.begin(); ) { Chris@631: --i; Chris@631: for (std::map::iterator j = i->second.begin(); Chris@631: j != i->second.end(); ++j) { Chris@631: j->second = v; Chris@631: v = v + 1.f; Chris@631: } Chris@631: } Chris@631: Chris@631: std::map pointMap; Chris@631: for (RegionModel::PointList::const_iterator i = Chris@631: model2a->getPoints().begin(); Chris@631: i != model2a->getPoints().end(); ++i) { Chris@631: RegionModel::Point p(*i); Chris@631: v = countLabelValueMap[labelCountMap[p.label]][p.label]; Chris@631: RegionModel::Point pp(p.frame, v, p.duration, p.label); Chris@631: pointMap[p] = pp; Chris@631: } Chris@631: Chris@631: for (std::map::iterator i = Chris@631: pointMap.begin(); i != pointMap.end(); ++i) { Chris@631: model2a->deletePoint(i->first); Chris@631: model2a->addPoint(i->second); Chris@631: } Chris@631: } Chris@631: } Chris@631: Chris@392: if (modelType == CSVFormat::ThreeDimensionalModel) { Chris@148: model3->setMinimumLevel(min); Chris@148: model3->setMaximumLevel(max); Chris@148: } Chris@148: Chris@148: return model; Chris@148: } Chris@148: