Mercurial > hg > svcore
diff data/fileio/CSVFormat.h @ 629:35499d48a5d1
* Start overhauling CSV parser to associate purposes with columns en route to its guesses; add some string manipulation code
author | Chris Cannam |
---|---|
date | Thu, 15 Jul 2010 15:27:21 +0000 |
parents | 001db550bd48 |
children | 11a664058dd8 |
line wrap: on
line diff
--- a/data/fileio/CSVFormat.h Thu Jul 08 14:22:28 2010 +0000 +++ b/data/fileio/CSVFormat.h Thu Jul 15 15:27:21 2010 +0000 @@ -45,7 +45,22 @@ TimeWindows }; - CSVFormat(QString path); // guess format + enum ColumnPurpose { + ColumnUnknown, + ColumnStartTime, + ColumnEndTime, + ColumnDuration, + ColumnValue, + ColumnLabel + }; + + enum ColumnQuality { + ColumnNumeric = 0x1, + ColumnIntegral = 0x2, + ColumnIncreasing = 0x4, + ColumnLarge = 0x8 + }; + typedef unsigned int ColumnQualities; CSVFormat() : // arbitrary defaults m_modelType(TwoDimensionalModel), @@ -55,8 +70,23 @@ m_separator(","), m_sampleRate(44100), m_windowSize(1024), - m_behaviour(QString::KeepEmptyParts) + m_columnCount(0), + m_variableColumnCount(false), + m_behaviour(QString::KeepEmptyParts), + m_allowQuoting(true), + m_maxExampleCols(0) { } + + CSVFormat(QString path); // guess format + + /** + * Guess the format of the given CSV file, setting the fields in + * this object accordingly. If the current separator is the empty + * string, the separator character will also be guessed; otherwise + * the current separator will be used. The other properties of + * this object will be set according to guesses from the file. + */ + void guessFormatFor(QString path); ModelType getModelType() const { return m_modelType; } TimingType getTimingType() const { return m_timingType; } @@ -65,8 +95,9 @@ QString getSeparator() const { return m_separator; } size_t getSampleRate() const { return m_sampleRate; } size_t getWindowSize() const { return m_windowSize; } - + QString::SplitBehavior getSplitBehaviour() const { return m_behaviour; } + QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; } void setModelType(ModelType t) { m_modelType = t; } void setTimingType(TimingType t) { m_timingType = t; } @@ -77,8 +108,12 @@ void setWindowSize(size_t s) { m_windowSize = s; } void setSplitBehaviour(QString::SplitBehavior b) { m_behaviour = b; } + void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; } - // only valid if constructor that guesses format was used: + // read-only; only valid if format has been guessed: + QList<ColumnQualities> getColumnQualities() const { return m_columnQualities; } + + // read-only; only valid if format has been guessed: QList<QStringList> getExample() const { return m_example; } int getMaxExampleCols() const { return m_maxExampleCols; } @@ -91,10 +126,26 @@ size_t m_sampleRate; size_t m_windowSize; + int m_columnCount; + bool m_variableColumnCount; + + QList<ColumnQualities> m_columnQualities; + QList<ColumnPurpose> m_columnPurposes; + + QList<float> m_prevValues; + QString::SplitBehavior m_behaviour; + bool m_allowQuoting; QList<QStringList> m_example; int m_maxExampleCols; + + void guessSeparator(QString line); + void guessQualities(QString line, int lineno); + void guessPurposes(); + + void guessFormatFor_Old(QString path); + }; #endif