# HG changeset patch # User Chris Cannam # Date 1278598948 0 # Node ID 001db550bd48b606d1cb56875b09f77ab02650d8 # Parent 080d8bdd8762e4e2ed4d5be38fd31de72551974d * Add option to import time+duration (or time+endtime) from CSV files (importing to Region layers) * Fix ffwd/rwd in Region layers so as to behave like time-value layers diff -r 080d8bdd8762 -r 001db550bd48 data/fileio/CSVFileReader.cpp --- a/data/fileio/CSVFileReader.cpp Mon Jul 05 11:54:19 2010 +0000 +++ b/data/fileio/CSVFileReader.cpp Thu Jul 08 14:22:28 2010 +0000 @@ -20,6 +20,7 @@ #include "model/SparseOneDimensionalModel.h" #include "model/SparseTimeValueModel.h" #include "model/EditableDenseThreeDimensionalModel.h" +#include "model/RegionModel.h" #include "DataFileReaderFactory.h" #include @@ -29,6 +30,7 @@ #include #include +#include CSVFileReader::CSVFileReader(QString path, CSVFormat format, size_t mainModelSampleRate) : @@ -90,9 +92,10 @@ } */ - CSVFormat::ModelType modelType = m_format.getModelType(); + CSVFormat::ModelType modelType = m_format.getModelType(); CSVFormat::TimingType timingType = m_format.getTimingType(); - CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); + CSVFormat::DurationType durationType = m_format.getDurationType(); + CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); QString separator = m_format.getSeparator(); QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); size_t sampleRate = m_format.getSampleRate(); @@ -114,6 +117,7 @@ SparseOneDimensionalModel *model1 = 0; SparseTimeValueModel *model2 = 0; + RegionModel *model2a = 0; EditableDenseThreeDimensionalModel *model3 = 0; Model *model = 0; @@ -126,8 +130,12 @@ float min = 0.0, max = 0.0; size_t frameNo = 0; + size_t duration = 0; size_t startFrame = 0; // for calculation of dense model resolution + std::map labelValueMap; + float syntheticMax = 0.f; + while (!in.atEnd()) { // QTextStream's readLine doesn't cope with old-style Mac @@ -166,6 +174,11 @@ model = model2; break; + case CSVFormat::TwoDimensionalModelWithDuration: + model2a = new RegionModel(sampleRate, windowSize, false); + model = model2a; + break; + case CSVFormat::ThreeDimensionalModel: model3 = new EditableDenseThreeDimensionalModel (sampleRate, @@ -180,6 +193,8 @@ QStringList tidyList; QRegExp nonNumericRx("[^0-9eE.,+-]"); + float value = 0.f; + for (int i = 0; i < list.size(); ++i) { QString s(list[i].trimmed()); @@ -190,41 +205,82 @@ s = s.mid(1, s.length() - 2); } - if (i == 0 && timingType == CSVFormat::ExplicitTiming) { + if (timingType == CSVFormat::ExplicitTiming) { + size_t calculatedFrame = 0; + + if (i == 0 || + (i == 1 && + modelType == CSVFormat::TwoDimensionalModelWithDuration)) { + + bool ok = false; + QString numeric = s; + numeric.remove(nonNumericRx); + + if (timeUnits == CSVFormat::TimeSeconds) { + + double time = numeric.toDouble(&ok); + calculatedFrame = int(time * sampleRate + 0.5); + + } else { + + calculatedFrame = numeric.toInt(&ok); + + if (timeUnits == CSVFormat::TimeWindows) { + calculatedFrame *= windowSize; + } + } + + if (!ok) { + if (warnings < warnLimit) { + std::cerr << "WARNING: CSVFileReader::load: " + << "Bad time format (\"" << s.toStdString() + << "\") in data line " + << lineno+1 << ":" << std::endl; + std::cerr << line.toStdString() << std::endl; + } else if (warnings == warnLimit) { + std::cerr << "WARNING: Too many warnings" << std::endl; + } + ++warnings; + } + + if (i == 0) frameNo = calculatedFrame; + else { + if (durationType == CSVFormat::EndTimes) { + duration = calculatedFrame - frameNo; + } else { + duration = calculatedFrame; + } + } + + continue; + } + } + + if ((i == 1 && + modelType == CSVFormat::TwoDimensionalModel) || + (i == 2 && + modelType == CSVFormat::TwoDimensionalModelWithDuration)) { bool ok = false; - QString numeric = s; - numeric.remove(nonNumericRx); + value = s.toFloat(&ok); + if (!ok) { + // cf. RDFImporter::fillModel + if (labelValueMap.find(s) == labelValueMap.end()) { + syntheticMax = syntheticMax + 1.f; + labelValueMap[s] = syntheticMax; + } + value = labelValueMap[s]; + } else { + if (value > syntheticMax) syntheticMax = value; + } + if (i + 1 == list.size()) { + // keep text around for use as label (none other given) + tidyList.push_back(s); + } + continue; + } - if (timeUnits == CSVFormat::TimeSeconds) { - - double time = numeric.toDouble(&ok); - frameNo = int(time * sampleRate + 0.5); - - } else { - - frameNo = numeric.toInt(&ok); - - if (timeUnits == CSVFormat::TimeWindows) { - frameNo *= windowSize; - } - } - - if (!ok) { - if (warnings < warnLimit) { - std::cerr << "WARNING: CSVFileReader::load: " - << "Bad time format (\"" << s.toStdString() - << "\") in data line " - << lineno+1 << ":" << std::endl; - std::cerr << line.toStdString() << std::endl; - } else if (warnings == warnLimit) { - std::cerr << "WARNING: Too many warnings" << std::endl; - } - ++warnings; - } - } else { - tidyList.push_back(s); - } + tidyList.push_back(s); } if (modelType == CSVFormat::OneDimensionalModel) { @@ -240,11 +296,21 @@ SparseTimeValueModel::Point point (frameNo, - tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, - tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1)); + value, + tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1)); model2->addPoint(point); + } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { + + RegionModel::Point point + (frameNo, + value, + duration, + tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1)); + + model2a->addPoint(point); + } else if (modelType == CSVFormat::ThreeDimensionalModel) { DenseThreeDimensionalModel::Column values; diff -r 080d8bdd8762 -r 001db550bd48 data/fileio/CSVFormat.cpp --- a/data/fileio/CSVFormat.cpp Mon Jul 05 11:54:19 2010 +0000 +++ b/data/fileio/CSVFormat.cpp Thu Jul 08 14:22:28 2010 +0000 @@ -26,6 +26,7 @@ CSVFormat::CSVFormat(QString filename) : m_modelType(TwoDimensionalModel), m_timingType(ExplicitTiming), + m_durationType(Durations), m_timeUnits(TimeSeconds), m_separator(","), m_sampleRate(44100), @@ -43,6 +44,7 @@ unsigned int lineno = 0; bool nonIncreasingPrimaries = false; + bool nonIncreasingSecondaries = false; bool nonNumericPrimaries = false; bool floatPrimaries = false; bool variableItemCount = false; @@ -50,6 +52,7 @@ int earliestNonNumericItem = -1; float prevPrimary = 0.0; + float prevSecondary = 0.0; m_maxExampleCols = 0; m_separator = ""; @@ -137,6 +140,12 @@ i < earliestNonNumericItem) { earliestNonNumericItem = i; } + } else if (i == 1) { + float secondary = s.toFloat(); + if (lineno > 0 && secondary <= prevSecondary) { + nonIncreasingSecondaries = true; + } + prevSecondary = secondary; } } } @@ -193,10 +202,17 @@ } else { m_modelType = CSVFormat::ThreeDimensionalModel; } + + if (nonIncreasingSecondaries) { + m_durationType = Durations; + } else { + m_durationType = EndTimes; + } } std::cerr << "Estimated model type: " << m_modelType << std::endl; std::cerr << "Estimated timing type: " << m_timingType << std::endl; + std::cerr << "Estimated duration type: " << m_durationType << std::endl; std::cerr << "Estimated units: " << m_timeUnits << std::endl; } diff -r 080d8bdd8762 -r 001db550bd48 data/fileio/CSVFormat.h --- a/data/fileio/CSVFormat.h Mon Jul 05 11:54:19 2010 +0000 +++ b/data/fileio/CSVFormat.h Thu Jul 08 14:22:28 2010 +0000 @@ -25,6 +25,7 @@ enum ModelType { OneDimensionalModel, TwoDimensionalModel, + TwoDimensionalModelWithDuration, ThreeDimensionalModel }; @@ -32,6 +33,11 @@ ExplicitTiming, ImplicitTiming }; + + enum DurationType { + Durations, + EndTimes + }; enum TimeUnits { TimeSeconds, @@ -44,28 +50,31 @@ CSVFormat() : // arbitrary defaults m_modelType(TwoDimensionalModel), m_timingType(ExplicitTiming), + m_durationType(Durations), m_timeUnits(TimeSeconds), m_separator(","), m_sampleRate(44100), m_windowSize(1024), m_behaviour(QString::KeepEmptyParts) { } - - ModelType getModelType() const { return m_modelType; } - TimingType getTimingType() const { return m_timingType; } - TimeUnits getTimeUnits() const { return m_timeUnits; } - QString getSeparator() const { return m_separator; } - size_t getSampleRate() const { return m_sampleRate; } - size_t getWindowSize() const { return m_windowSize; } + + ModelType getModelType() const { return m_modelType; } + TimingType getTimingType() const { return m_timingType; } + DurationType getDurationType() const { return m_durationType; } + TimeUnits getTimeUnits() const { return m_timeUnits; } + QString getSeparator() const { return m_separator; } + size_t getSampleRate() const { return m_sampleRate; } + size_t getWindowSize() const { return m_windowSize; } QString::SplitBehavior getSplitBehaviour() const { return m_behaviour; } - void setModelType(ModelType t) { m_modelType = t; } - void setTimingType(TimingType t) { m_timingType = t; } - void setTimeUnits(TimeUnits t) { m_timeUnits = t; } - void setSeparator(QString s) { m_separator = s; } - void setSampleRate(size_t r) { m_sampleRate = r; } - void setWindowSize(size_t s) { m_windowSize = s; } + void setModelType(ModelType t) { m_modelType = t; } + void setTimingType(TimingType t) { m_timingType = t; } + void setDurationType(DurationType t) { m_durationType = t; } + void setTimeUnits(TimeUnits t) { m_timeUnits = t; } + void setSeparator(QString s) { m_separator = s; } + void setSampleRate(size_t r) { m_sampleRate = r; } + void setWindowSize(size_t s) { m_windowSize = s; } void setSplitBehaviour(QString::SplitBehavior b) { m_behaviour = b; } @@ -74,12 +83,13 @@ int getMaxExampleCols() const { return m_maxExampleCols; } protected: - ModelType m_modelType; - TimingType m_timingType; - TimeUnits m_timeUnits; - QString m_separator; - size_t m_sampleRate; - size_t m_windowSize; + ModelType m_modelType; + TimingType m_timingType; + DurationType m_durationType; + TimeUnits m_timeUnits; + QString m_separator; + size_t m_sampleRate; + size_t m_windowSize; QString::SplitBehavior m_behaviour;