Mercurial > hg > svcore
comparison data/fileio/CSVFormat.cpp @ 1021:1888ca033a84
If the first column of a CSV file has zero or one non-empty values in it, during the first 5 rows, then ignore it by default
author | Chris Cannam |
---|---|
date | Mon, 01 Dec 2014 10:18:55 +0000 |
parents | 1974859baba5 |
children | 1bf38a4b91c4 |
comparison
equal
deleted
inserted
replaced
1020:9c00e7944bf2 | 1021:1888ca033a84 |
---|---|
108 | 108 |
109 // All columns are regarded as having these qualities until we see | 109 // All columns are regarded as having these qualities until we see |
110 // something that indicates otherwise: | 110 // something that indicates otherwise: |
111 | 111 |
112 ColumnQualities defaultQualities = | 112 ColumnQualities defaultQualities = |
113 ColumnNumeric | ColumnIntegral | ColumnIncreasing; | 113 ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty; |
114 | 114 |
115 for (int i = 0; i < cols; ++i) { | 115 for (int i = 0; i < cols; ++i) { |
116 | 116 |
117 while (m_columnQualities.size() <= i) { | 117 while (m_columnQualities.size() <= i) { |
118 m_columnQualities.push_back(defaultQualities); | 118 m_columnQualities.push_back(defaultQualities); |
126 | 126 |
127 bool numeric = (qualities & ColumnNumeric); | 127 bool numeric = (qualities & ColumnNumeric); |
128 bool integral = (qualities & ColumnIntegral); | 128 bool integral = (qualities & ColumnIntegral); |
129 bool increasing = (qualities & ColumnIncreasing); | 129 bool increasing = (qualities & ColumnIncreasing); |
130 bool large = (qualities & ColumnLarge); // this one defaults to off | 130 bool large = (qualities & ColumnLarge); // this one defaults to off |
131 | 131 bool emptyish = (qualities & ColumnNearEmpty); |
132 | |
133 if (lineno > 1 && s.trimmed() != "") { | |
134 emptyish = false; | |
135 } | |
136 | |
132 float value = 0.f; | 137 float value = 0.f; |
133 | 138 |
134 //!!! how to take into account headers? | 139 //!!! how to take into account headers? |
135 | 140 |
136 if (numeric) { | 141 if (numeric) { |
164 | 169 |
165 m_columnQualities[i] = | 170 m_columnQualities[i] = |
166 (numeric ? ColumnNumeric : 0) | | 171 (numeric ? ColumnNumeric : 0) | |
167 (integral ? ColumnIntegral : 0) | | 172 (integral ? ColumnIntegral : 0) | |
168 (increasing ? ColumnIncreasing : 0) | | 173 (increasing ? ColumnIncreasing : 0) | |
169 (large ? ColumnLarge : 0); | 174 (large ? ColumnLarge : 0) | |
175 (emptyish ? ColumnNearEmpty : 0); | |
170 } | 176 } |
171 | 177 |
172 if (lineno < 10) { | 178 if (lineno < 10) { |
173 m_example.push_back(list); | 179 m_example.push_back(list); |
174 if (lineno == 0 || cols > m_maxExampleCols) { | 180 if (lineno == 0 || cols > m_maxExampleCols) { |
188 { | 194 { |
189 m_timingType = CSVFormat::ImplicitTiming; | 195 m_timingType = CSVFormat::ImplicitTiming; |
190 m_timeUnits = CSVFormat::TimeWindows; | 196 m_timeUnits = CSVFormat::TimeWindows; |
191 | 197 |
192 int timingColumnCount = 0; | 198 int timingColumnCount = 0; |
199 | |
200 // if our first column has zero or one entries in it and the rest | |
201 // have more, then we'll default to ignoring the first column and | |
202 // counting the next one as primary. (e.g. Sonic Annotator output | |
203 // with filename at start of first column.) | |
204 | |
205 int primaryColumnNo = 0; | |
206 | |
207 if (m_columnCount >= 2) { | |
208 if ( (m_columnQualities[0] & ColumnNearEmpty) && | |
209 !(m_columnQualities[1] & ColumnNearEmpty)) { | |
210 primaryColumnNo = 1; | |
211 } | |
212 } | |
193 | 213 |
194 for (int i = 0; i < m_columnCount; ++i) { | 214 for (int i = 0; i < m_columnCount; ++i) { |
195 | 215 |
196 ColumnPurpose purpose = ColumnUnknown; | 216 ColumnPurpose purpose = ColumnUnknown; |
197 bool primary = (i == 0); | 217 |
218 if (i < primaryColumnNo) { | |
219 setColumnPurpose(i, purpose); | |
220 continue; | |
221 } | |
222 | |
223 bool primary = (i == primaryColumnNo); | |
198 | 224 |
199 ColumnQualities qualities = m_columnQualities[i]; | 225 ColumnQualities qualities = m_columnQualities[i]; |
200 | 226 |
201 bool numeric = (qualities & ColumnNumeric); | 227 bool numeric = (qualities & ColumnNumeric); |
202 bool integral = (qualities & ColumnIntegral); | 228 bool integral = (qualities & ColumnIntegral); |