comparison data/fileio/CSVFormat.cpp @ 1021:1888ca033a84

If the first column of a CSV file has zero or one non-empty values in it, during the first 5 rows, then ignore it by default
author Chris Cannam
date Mon, 01 Dec 2014 10:18:55 +0000
parents 1974859baba5
children 1bf38a4b91c4
comparison
equal deleted inserted replaced
1020:9c00e7944bf2 1021:1888ca033a84
108 108
109 // All columns are regarded as having these qualities until we see 109 // All columns are regarded as having these qualities until we see
110 // something that indicates otherwise: 110 // something that indicates otherwise:
111 111
112 ColumnQualities defaultQualities = 112 ColumnQualities defaultQualities =
113 ColumnNumeric | ColumnIntegral | ColumnIncreasing; 113 ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty;
114 114
115 for (int i = 0; i < cols; ++i) { 115 for (int i = 0; i < cols; ++i) {
116 116
117 while (m_columnQualities.size() <= i) { 117 while (m_columnQualities.size() <= i) {
118 m_columnQualities.push_back(defaultQualities); 118 m_columnQualities.push_back(defaultQualities);
126 126
127 bool numeric = (qualities & ColumnNumeric); 127 bool numeric = (qualities & ColumnNumeric);
128 bool integral = (qualities & ColumnIntegral); 128 bool integral = (qualities & ColumnIntegral);
129 bool increasing = (qualities & ColumnIncreasing); 129 bool increasing = (qualities & ColumnIncreasing);
130 bool large = (qualities & ColumnLarge); // this one defaults to off 130 bool large = (qualities & ColumnLarge); // this one defaults to off
131 131 bool emptyish = (qualities & ColumnNearEmpty);
132
133 if (lineno > 1 && s.trimmed() != "") {
134 emptyish = false;
135 }
136
132 float value = 0.f; 137 float value = 0.f;
133 138
134 //!!! how to take into account headers? 139 //!!! how to take into account headers?
135 140
136 if (numeric) { 141 if (numeric) {
164 169
165 m_columnQualities[i] = 170 m_columnQualities[i] =
166 (numeric ? ColumnNumeric : 0) | 171 (numeric ? ColumnNumeric : 0) |
167 (integral ? ColumnIntegral : 0) | 172 (integral ? ColumnIntegral : 0) |
168 (increasing ? ColumnIncreasing : 0) | 173 (increasing ? ColumnIncreasing : 0) |
169 (large ? ColumnLarge : 0); 174 (large ? ColumnLarge : 0) |
175 (emptyish ? ColumnNearEmpty : 0);
170 } 176 }
171 177
172 if (lineno < 10) { 178 if (lineno < 10) {
173 m_example.push_back(list); 179 m_example.push_back(list);
174 if (lineno == 0 || cols > m_maxExampleCols) { 180 if (lineno == 0 || cols > m_maxExampleCols) {
188 { 194 {
189 m_timingType = CSVFormat::ImplicitTiming; 195 m_timingType = CSVFormat::ImplicitTiming;
190 m_timeUnits = CSVFormat::TimeWindows; 196 m_timeUnits = CSVFormat::TimeWindows;
191 197
192 int timingColumnCount = 0; 198 int timingColumnCount = 0;
199
200 // if our first column has zero or one entries in it and the rest
201 // have more, then we'll default to ignoring the first column and
202 // counting the next one as primary. (e.g. Sonic Annotator output
203 // with filename at start of first column.)
204
205 int primaryColumnNo = 0;
206
207 if (m_columnCount >= 2) {
208 if ( (m_columnQualities[0] & ColumnNearEmpty) &&
209 !(m_columnQualities[1] & ColumnNearEmpty)) {
210 primaryColumnNo = 1;
211 }
212 }
193 213
194 for (int i = 0; i < m_columnCount; ++i) { 214 for (int i = 0; i < m_columnCount; ++i) {
195 215
196 ColumnPurpose purpose = ColumnUnknown; 216 ColumnPurpose purpose = ColumnUnknown;
197 bool primary = (i == 0); 217
218 if (i < primaryColumnNo) {
219 setColumnPurpose(i, purpose);
220 continue;
221 }
222
223 bool primary = (i == primaryColumnNo);
198 224
199 ColumnQualities qualities = m_columnQualities[i]; 225 ColumnQualities qualities = m_columnQualities[i];
200 226
201 bool numeric = (qualities & ColumnNumeric); 227 bool numeric = (qualities & ColumnNumeric);
202 bool integral = (qualities & ColumnIntegral); 228 bool integral = (qualities & ColumnIntegral);