comparison data/fileio/CSVFormat.h @ 629:35499d48a5d1

* Start overhauling CSV parser to associate purposes with columns en route to its guesses; add some string manipulation code
author Chris Cannam
date Thu, 15 Jul 2010 15:27:21 +0000
parents 001db550bd48
children 11a664058dd8
comparison
equal deleted inserted replaced
628:001db550bd48 629:35499d48a5d1
43 TimeSeconds, 43 TimeSeconds,
44 TimeAudioFrames, 44 TimeAudioFrames,
45 TimeWindows 45 TimeWindows
46 }; 46 };
47 47
48 CSVFormat(QString path); // guess format 48 enum ColumnPurpose {
49 ColumnUnknown,
50 ColumnStartTime,
51 ColumnEndTime,
52 ColumnDuration,
53 ColumnValue,
54 ColumnLabel
55 };
56
57 enum ColumnQuality {
58 ColumnNumeric = 0x1,
59 ColumnIntegral = 0x2,
60 ColumnIncreasing = 0x4,
61 ColumnLarge = 0x8
62 };
63 typedef unsigned int ColumnQualities;
49 64
50 CSVFormat() : // arbitrary defaults 65 CSVFormat() : // arbitrary defaults
51 m_modelType(TwoDimensionalModel), 66 m_modelType(TwoDimensionalModel),
52 m_timingType(ExplicitTiming), 67 m_timingType(ExplicitTiming),
53 m_durationType(Durations), 68 m_durationType(Durations),
54 m_timeUnits(TimeSeconds), 69 m_timeUnits(TimeSeconds),
55 m_separator(","), 70 m_separator(","),
56 m_sampleRate(44100), 71 m_sampleRate(44100),
57 m_windowSize(1024), 72 m_windowSize(1024),
58 m_behaviour(QString::KeepEmptyParts) 73 m_columnCount(0),
74 m_variableColumnCount(false),
75 m_behaviour(QString::KeepEmptyParts),
76 m_allowQuoting(true),
77 m_maxExampleCols(0)
59 { } 78 { }
79
80 CSVFormat(QString path); // guess format
81
82 /**
83 * Guess the format of the given CSV file, setting the fields in
84 * this object accordingly. If the current separator is the empty
85 * string, the separator character will also be guessed; otherwise
86 * the current separator will be used. The other properties of
87 * this object will be set according to guesses from the file.
88 */
89 void guessFormatFor(QString path);
60 90
61 ModelType getModelType() const { return m_modelType; } 91 ModelType getModelType() const { return m_modelType; }
62 TimingType getTimingType() const { return m_timingType; } 92 TimingType getTimingType() const { return m_timingType; }
63 DurationType getDurationType() const { return m_durationType; } 93 DurationType getDurationType() const { return m_durationType; }
64 TimeUnits getTimeUnits() const { return m_timeUnits; } 94 TimeUnits getTimeUnits() const { return m_timeUnits; }
65 QString getSeparator() const { return m_separator; } 95 QString getSeparator() const { return m_separator; }
66 size_t getSampleRate() const { return m_sampleRate; } 96 size_t getSampleRate() const { return m_sampleRate; }
67 size_t getWindowSize() const { return m_windowSize; } 97 size_t getWindowSize() const { return m_windowSize; }
68 98
69 QString::SplitBehavior getSplitBehaviour() const { return m_behaviour; } 99 QString::SplitBehavior getSplitBehaviour() const { return m_behaviour; }
100 QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; }
70 101
71 void setModelType(ModelType t) { m_modelType = t; } 102 void setModelType(ModelType t) { m_modelType = t; }
72 void setTimingType(TimingType t) { m_timingType = t; } 103 void setTimingType(TimingType t) { m_timingType = t; }
73 void setDurationType(DurationType t) { m_durationType = t; } 104 void setDurationType(DurationType t) { m_durationType = t; }
74 void setTimeUnits(TimeUnits t) { m_timeUnits = t; } 105 void setTimeUnits(TimeUnits t) { m_timeUnits = t; }
75 void setSeparator(QString s) { m_separator = s; } 106 void setSeparator(QString s) { m_separator = s; }
76 void setSampleRate(size_t r) { m_sampleRate = r; } 107 void setSampleRate(size_t r) { m_sampleRate = r; }
77 void setWindowSize(size_t s) { m_windowSize = s; } 108 void setWindowSize(size_t s) { m_windowSize = s; }
78 109
79 void setSplitBehaviour(QString::SplitBehavior b) { m_behaviour = b; } 110 void setSplitBehaviour(QString::SplitBehavior b) { m_behaviour = b; }
111 void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; }
80 112
81 // only valid if constructor that guesses format was used: 113 // read-only; only valid if format has been guessed:
114 QList<ColumnQualities> getColumnQualities() const { return m_columnQualities; }
115
116 // read-only; only valid if format has been guessed:
82 QList<QStringList> getExample() const { return m_example; } 117 QList<QStringList> getExample() const { return m_example; }
83 int getMaxExampleCols() const { return m_maxExampleCols; } 118 int getMaxExampleCols() const { return m_maxExampleCols; }
84 119
85 protected: 120 protected:
86 ModelType m_modelType; 121 ModelType m_modelType;
89 TimeUnits m_timeUnits; 124 TimeUnits m_timeUnits;
90 QString m_separator; 125 QString m_separator;
91 size_t m_sampleRate; 126 size_t m_sampleRate;
92 size_t m_windowSize; 127 size_t m_windowSize;
93 128
129 int m_columnCount;
130 bool m_variableColumnCount;
131
132 QList<ColumnQualities> m_columnQualities;
133 QList<ColumnPurpose> m_columnPurposes;
134
135 QList<float> m_prevValues;
136
94 QString::SplitBehavior m_behaviour; 137 QString::SplitBehavior m_behaviour;
138 bool m_allowQuoting;
95 139
96 QList<QStringList> m_example; 140 QList<QStringList> m_example;
97 int m_maxExampleCols; 141 int m_maxExampleCols;
142
143 void guessSeparator(QString line);
144 void guessQualities(QString line, int lineno);
145 void guessPurposes();
146
147 void guessFormatFor_Old(QString path);
148
98 }; 149 };
99 150
100 #endif 151 #endif