comparison data/fileio/CSVFormat.cpp @ 1362:1bf38a4b91c4 3.0-integration

When importing CSV, if the first line doesn't have a separator, continue to leave the separator indeterminate until we arrive at a line that does. Fixes inability to load correctly CSV files in which some lines have more columns than others
author Chris Cannam
date Tue, 10 Jan 2017 14:18:34 +0000
parents 1888ca033a84
children 48e9f538e6e9
comparison
equal deleted inserted replaced
1361:49b43306778b 1362:1bf38a4b91c4
23 #include <QStringList> 23 #include <QStringList>
24 #include <QTextStream> 24 #include <QTextStream>
25 25
26 #include <iostream> 26 #include <iostream>
27 27
28 #include "base/Debug.h"
29
28 CSVFormat::CSVFormat(QString path) : 30 CSVFormat::CSVFormat(QString path) :
29 m_separator(""), 31 m_separator(""),
30 m_sampleRate(44100), 32 m_sampleRate(44100),
31 m_windowSize(1024), 33 m_windowSize(1024),
32 m_allowQuoting(true) 34 m_allowQuoting(true)
90 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) { 92 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
91 m_separator = candidates[i]; 93 m_separator = candidates[i];
92 return; 94 return;
93 } 95 }
94 } 96 }
95 m_separator = " ";
96 } 97 }
97 98
98 void 99 void
99 CSVFormat::guessQualities(QString line, int lineno) 100 CSVFormat::guessQualities(QString line, int lineno)
100 { 101 {
101 if (m_separator == "") guessSeparator(line); 102 if (m_separator == "") guessSeparator(line);
102 103
103 QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting); 104 QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
104 105
105 int cols = list.size(); 106 int cols = list.size();
106 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols; 107 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
107 if (cols != m_columnCount) m_variableColumnCount = true; 108 if (cols != m_columnCount) m_variableColumnCount = true;
108 109
180 if (lineno == 0 || cols > m_maxExampleCols) { 181 if (lineno == 0 || cols > m_maxExampleCols) {
181 m_maxExampleCols = cols; 182 m_maxExampleCols = cols;
182 } 183 }
183 } 184 }
184 185
185 // cerr << "Estimated column qualities: "; 186 if (lineno < 10) {
186 // for (int i = 0; i < m_columnCount; ++i) { 187 SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): ";
187 // cerr << int(m_columnQualities[i]) << " "; 188 for (int i = 0; i < m_columnCount; ++i) {
188 // } 189 SVDEBUG << int(m_columnQualities[i]) << " ";
189 // cerr << endl; 190 }
191 SVDEBUG << endl;
192 }
190 } 193 }
191 194
192 void 195 void
193 CSVFormat::guessPurposes() 196 CSVFormat::guessPurposes()
194 { 197 {
312 } else { 315 } else {
313 m_modelType = ThreeDimensionalModel; 316 m_modelType = ThreeDimensionalModel;
314 } 317 }
315 } 318 }
316 319
317 // cerr << "Estimated column purposes: "; 320 SVDEBUG << "Estimated column purposes: ";
318 // for (int i = 0; i < m_columnCount; ++i) { 321 for (int i = 0; i < m_columnCount; ++i) {
319 // cerr << int(m_columnPurposes[i]) << " "; 322 SVDEBUG << int(m_columnPurposes[i]) << " ";
320 // } 323 }
321 // cerr << endl; 324 SVDEBUG << endl;
322 325
323 // cerr << "Estimated model type: " << m_modelType << endl; 326 SVDEBUG << "Estimated model type: " << m_modelType << endl;
324 // cerr << "Estimated timing type: " << m_timingType << endl; 327 SVDEBUG << "Estimated timing type: " << m_timingType << endl;
325 // cerr << "Estimated units: " << m_timeUnits << endl; 328 SVDEBUG << "Estimated units: " << m_timeUnits << endl;
326 } 329 }
327 330
328 CSVFormat::ColumnPurpose 331 CSVFormat::ColumnPurpose
329 CSVFormat::getColumnPurpose(int i) 332 CSVFormat::getColumnPurpose(int i)
330 { 333 {