Mercurial > hg > svcore
diff data/fileio/CSVFileReader.cpp @ 742:c10cb8782576 coreaudio_tests
Merge from branch "default"
author | Chris Cannam |
---|---|
date | Sun, 01 Jul 2012 11:53:00 +0100 |
parents | 1424aa29ae95 |
children | e802e550a1f2 |
line wrap: on
line diff
--- a/data/fileio/CSVFileReader.cpp Mon Nov 29 12:45:39 2010 +0000 +++ b/data/fileio/CSVFileReader.cpp Sun Jul 01 11:53:00 2012 +0100 @@ -17,9 +17,11 @@ #include "model/Model.h" #include "base/RealTime.h" +#include "base/StringBits.h" #include "model/SparseOneDimensionalModel.h" #include "model/SparseTimeValueModel.h" #include "model/EditableDenseThreeDimensionalModel.h" +#include "model/RegionModel.h" #include "DataFileReaderFactory.h" #include <QFile> @@ -29,11 +31,13 @@ #include <QTextStream> #include <iostream> +#include <map> CSVFileReader::CSVFileReader(QString path, CSVFormat format, size_t mainModelSampleRate) : m_format(format), m_file(0), + m_warnings(0), m_mainModelSampleRate(mainModelSampleRate) { m_file = new QFile(path); @@ -55,10 +59,10 @@ CSVFileReader::~CSVFileReader() { - std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl; + SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl; if (m_file) { - std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl; + SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl; m_file->close(); } delete m_file; @@ -76,27 +80,64 @@ return m_error; } +size_t +CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate, + size_t windowSize) const +{ + QRegExp nonNumericRx("[^0-9eE.,+-]"); + unsigned int warnLimit = 10; + + CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); + + size_t calculatedFrame = 0; + + bool ok = false; + QString numeric = s; + numeric.remove(nonNumericRx); + + if (timeUnits == CSVFormat::TimeSeconds) { + + double time = numeric.toDouble(&ok); + if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok); + calculatedFrame = int(time * sampleRate + 0.5); + + } else { + + long n = numeric.toLong(&ok); + if (n >= 0) calculatedFrame = n; + + if (timeUnits == CSVFormat::TimeWindows) { + calculatedFrame *= windowSize; + } + } + + if (!ok) { + if (m_warnings < warnLimit) { + std::cerr << "WARNING: CSVFileReader::load: " + << "Bad time format (\"" << s.toStdString() + << "\") in data line " + << lineno+1 << std::endl; + } else if (m_warnings == warnLimit) { + std::cerr << "WARNING: Too many warnings" << std::endl; + } + ++m_warnings; + } + + return calculatedFrame; +} + Model * CSVFileReader::load() const { if (!m_file) return 0; -/*!!! - CSVFormatDialog *dialog = new CSVFormatDialog - (0, m_file, m_mainModelSampleRate); - if (dialog->exec() == QDialog::Rejected) { - delete dialog; - throw DataFileReaderFactory::ImportCancelled; - } -*/ - - CSVFormat::ModelType modelType = m_format.getModelType(); + CSVFormat::ModelType modelType = m_format.getModelType(); CSVFormat::TimingType timingType = m_format.getTimingType(); - CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); - QString separator = m_format.getSeparator(); - QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); + CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); size_t sampleRate = m_format.getSampleRate(); size_t windowSize = m_format.getWindowSize(); + QChar separator = m_format.getSeparator(); + bool allowQuoting = m_format.getAllowQuoting(); if (timingType == CSVFormat::ExplicitTiming) { if (modelType == CSVFormat::ThreeDimensionalModel) { @@ -114,6 +155,7 @@ SparseOneDimensionalModel *model1 = 0; SparseTimeValueModel *model2 = 0; + RegionModel *model2a = 0; EditableDenseThreeDimensionalModel *model3 = 0; Model *model = 0; @@ -126,7 +168,23 @@ float min = 0.0, max = 0.0; size_t frameNo = 0; + size_t duration = 0; + size_t endFrame = 0; + + bool haveAnyValue = false; + bool haveEndTime = false; + size_t startFrame = 0; // for calculation of dense model resolution + bool firstEverValue = true; + + std::map<QString, int> labelCountMap; + + int valueColumns = 0; + for (int i = 0; i < m_format.getColumnCount(); ++i) { + if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) { + ++valueColumns; + } + } while (!in.atEnd()) { @@ -150,8 +208,7 @@ if (line.startsWith("#")) continue; - QStringList list = line.split(separator, behaviour); - + QStringList list = StringBits::split(line, separator, allowQuoting); if (!model) { switch (modelType) { @@ -166,110 +223,112 @@ model = model2; break; + case CSVFormat::TwoDimensionalModelWithDuration: + model2a = new RegionModel(sampleRate, windowSize, false); + model = model2a; + break; + case CSVFormat::ThreeDimensionalModel: model3 = new EditableDenseThreeDimensionalModel (sampleRate, windowSize, - list.size(), + valueColumns, EditableDenseThreeDimensionalModel::NoCompression); model = model3; break; } } - QStringList tidyList; - QRegExp nonNumericRx("[^0-9eE.,+-]"); + float value = 0.f; + QString label = ""; + + duration = 0.f; + haveEndTime = false; for (int i = 0; i < list.size(); ++i) { - - QString s(list[i].trimmed()); - if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { - s = s.mid(1, s.length() - 2); - } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { - s = s.mid(1, s.length() - 2); + QString s = list[i]; + + CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i); + + switch (purpose) { + + case CSVFormat::ColumnUnknown: + break; + + case CSVFormat::ColumnStartTime: + frameNo = convertTimeValue(s, lineno, sampleRate, windowSize); + break; + + case CSVFormat::ColumnEndTime: + endFrame = convertTimeValue(s, lineno, sampleRate, windowSize); + haveEndTime = true; + break; + + case CSVFormat::ColumnDuration: + duration = convertTimeValue(s, lineno, sampleRate, windowSize); + break; + + case CSVFormat::ColumnValue: + value = s.toFloat(); + haveAnyValue = true; + break; + + case CSVFormat::ColumnLabel: + label = s; + ++labelCountMap[label]; + break; } + } - if (i == 0 && timingType == CSVFormat::ExplicitTiming) { - - bool ok = false; - QString numeric = s; - numeric.remove(nonNumericRx); - - if (timeUnits == CSVFormat::TimeSeconds) { - - double time = numeric.toDouble(&ok); - frameNo = int(time * sampleRate + 0.5); - - } else { - - frameNo = numeric.toInt(&ok); - - if (timeUnits == CSVFormat::TimeWindows) { - frameNo *= windowSize; - } - } - - if (!ok) { - if (warnings < warnLimit) { - std::cerr << "WARNING: CSVFileReader::load: " - << "Bad time format (\"" << s.toStdString() - << "\") in data line " - << lineno+1 << ":" << std::endl; - std::cerr << line.toStdString() << std::endl; - } else if (warnings == warnLimit) { - std::cerr << "WARNING: Too many warnings" << std::endl; - } - ++warnings; - } - } else { - tidyList.push_back(s); + if (haveEndTime) { // ... calculate duration now all cols read + if (endFrame > frameNo) { + duration = endFrame - frameNo; } } if (modelType == CSVFormat::OneDimensionalModel) { - SparseOneDimensionalModel::Point point - (frameNo, - tidyList.size() > 0 ? tidyList[tidyList.size()-1] : - QString("%1").arg(lineno+1)); - + SparseOneDimensionalModel::Point point(frameNo, label); model1->addPoint(point); } else if (modelType == CSVFormat::TwoDimensionalModel) { - SparseTimeValueModel::Point point - (frameNo, - tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, - tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1)); + SparseTimeValueModel::Point point(frameNo, value, label); + model2->addPoint(point); - model2->addPoint(point); + } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { + + RegionModel::Point point(frameNo, value, duration, label); + model2a->addPoint(point); } else if (modelType == CSVFormat::ThreeDimensionalModel) { DenseThreeDimensionalModel::Column values; - for (int i = 0; i < tidyList.size(); ++i) { + for (int i = 0; i < list.size(); ++i) { + + if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) { + continue; + } bool ok = false; float value = list[i].toFloat(&ok); - if (i > 0 || timingType != CSVFormat::ExplicitTiming) { - values.push_back(value); - } + values.push_back(value); - bool firstEver = (lineno == 0 && i == 0); - - if (firstEver || value < min) min = value; - if (firstEver || value > max) max = value; - - if (firstEver) { + if (firstEverValue || value < min) min = value; + if (firstEverValue || value > max) max = value; + + if (firstEverValue) { startFrame = frameNo; model3->setStartFrame(startFrame); } else if (lineno == 1 && timingType == CSVFormat::ExplicitTiming) { model3->setResolution(frameNo - startFrame); } + + firstEverValue = false; if (!ok) { if (warnings < warnLimit) { @@ -278,7 +337,7 @@ << list[i].toStdString() << "\" in data line " << lineno+1 << ":" << std::endl; - std::cerr << line.toStdString() << std::endl; + std::cerr << line << std::endl; ++warnings; } else if (warnings == warnLimit) { // std::cerr << "WARNING: Too many warnings" << std::endl; @@ -286,8 +345,8 @@ } } -// std::cerr << "Setting bin values for count " << lineno << ", frame " -// << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl; +// SVDEBUG << "Setting bin values for count " << lineno << ", frame " +// << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl; model3->setColumn(lineno, values); } @@ -300,6 +359,47 @@ } } + if (!haveAnyValue) { + if (model2a) { + // assign values for regions based on label frequency; we + // have this in our labelCountMap, sort of + + std::map<int, std::map<QString, float> > countLabelValueMap; + for (std::map<QString, int>::iterator i = labelCountMap.begin(); + i != labelCountMap.end(); ++i) { + countLabelValueMap[i->second][i->first] = 0.f; + } + + float v = 0.f; + for (std::map<int, std::map<QString, float> >::iterator i = + countLabelValueMap.end(); i != countLabelValueMap.begin(); ) { + --i; + for (std::map<QString, float>::iterator j = i->second.begin(); + j != i->second.end(); ++j) { + j->second = v; + v = v + 1.f; + } + } + + std::map<RegionModel::Point, RegionModel::Point, + RegionModel::Point::Comparator> pointMap; + for (RegionModel::PointList::const_iterator i = + model2a->getPoints().begin(); + i != model2a->getPoints().end(); ++i) { + RegionModel::Point p(*i); + v = countLabelValueMap[labelCountMap[p.label]][p.label]; + RegionModel::Point pp(p.frame, v, p.duration, p.label); + pointMap[p] = pp; + } + + for (std::map<RegionModel::Point, RegionModel::Point>::iterator i = + pointMap.begin(); i != pointMap.end(); ++i) { + model2a->deletePoint(i->first); + model2a->addPoint(i->second); + } + } + } + if (modelType == CSVFormat::ThreeDimensionalModel) { model3->setMinimumLevel(min); model3->setMaximumLevel(max);