# HG changeset patch # User Chris Cannam # Date 1547044790 0 # Node ID 9570ef94eaa3e99558a7639c417bb8f4eed75331 # Parent 07f23b90701a1711e762a4091e45be28cec5d737 Add mechanism to retrieve the set of plausible separators found in CSV-like file when guessing its format diff -r 07f23b90701a -r 9570ef94eaa3 data/fileio/CSVFormat.cpp --- a/data/fileio/CSVFormat.cpp Wed Nov 14 15:46:35 2018 +0000 +++ b/data/fileio/CSVFormat.cpp Wed Jan 09 14:39:50 2019 +0000 @@ -39,8 +39,6 @@ bool CSVFormat::guessFormatFor(QString path) { - m_separator = ""; // to prompt guessing for it - m_modelType = TwoDimensionalModel; m_timingType = ExplicitTiming; m_timeUnits = TimeSeconds; @@ -108,14 +106,12 @@ for (int i = 0; i < candidates.length(); ++i) { auto bits = StringBits::split(line, candidates[i], m_allowQuoting); if (bits.size() >= 2) { - SVDEBUG << "Successfully split the line into:" << endl; - for (auto b: bits) { - SVDEBUG << b << endl; + m_plausibleSeparators.insert(candidates[i]); + if (m_separator == "") { + m_separator = candidates[i]; + SVDEBUG << "Estimated column separator: '" << m_separator + << "'" << endl; } - m_separator = candidates[i]; - SVDEBUG << "Estimated column separator: '" << m_separator - << "'" << endl; - return; } } } @@ -123,9 +119,7 @@ void CSVFormat::guessQualities(QString line, int lineno) { - if (m_separator == "") { - guessSeparator(line); - } + guessSeparator(line); QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting); diff -r 07f23b90701a -r 9570ef94eaa3 data/fileio/CSVFormat.h --- a/data/fileio/CSVFormat.h Wed Nov 14 15:46:35 2018 +0000 +++ b/data/fileio/CSVFormat.h Wed Jan 09 14:39:50 2019 +0000 @@ -19,6 +19,8 @@ #include #include +#include + #include "base/BaseTypes.h" class CSVFormat @@ -77,7 +79,7 @@ m_modelType(TwoDimensionalModel), m_timingType(ExplicitTiming), m_timeUnits(TimeSeconds), - m_separator(","), + m_separator(""), m_sampleRate(44100), m_windowSize(1024), m_columnCount(0), @@ -120,9 +122,13 @@ AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; } bool getAllowQuoting() const { return m_allowQuoting; } QChar getSeparator() const { - if (m_separator == "") return ' '; + if (m_separator == "") return ','; else return m_separator[0]; } + // set rather than QSet to ensure a fixed order + std::set getPlausibleSeparators() const { + return m_plausibleSeparators; + } void setModelType(ModelType t) { m_modelType = t; } void setTimingType(TimingType t) { m_timingType = t; } @@ -157,7 +163,8 @@ ModelType m_modelType; TimingType m_timingType; TimeUnits m_timeUnits; - QString m_separator; + QString m_separator; // "" or a single char - basically QChar option + std::set m_plausibleSeparators; sv_samplerate_t m_sampleRate; int m_windowSize; diff -r 07f23b90701a -r 9570ef94eaa3 data/fileio/test/CSVFormatTest.h --- a/data/fileio/test/CSVFormatTest.h Wed Nov 14 15:46:35 2018 +0000 +++ b/data/fileio/test/CSVFormatTest.h Wed Jan 09 14:39:50 2019 +0000 @@ -91,6 +91,18 @@ QCOMPARE(f.getColumnCount(), 3); } + void plausibleSeparators() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-many.csv"))); + std::set p; + p.insert(QChar('|')); + p.insert(QChar(',')); + p.insert(QChar(':')); + p.insert(QChar(' ')); + std::set actual = f.getPlausibleSeparators(); + QCOMPARE(actual, p); + } + void comment() { CSVFormat f; QVERIFY(f.guessFormatFor(csvDir.filePath("comment.csv"))); diff -r 07f23b90701a -r 9570ef94eaa3 data/fileio/test/csv/separator-many.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/separator-many.csv Wed Jan 09 14:39:50 2019 +0000 @@ -0,0 +1,4 @@ +This thing|That thing|The other thing +1|12,4|16,3 +2|14,2|And:another|column +3|16,1|1901|