Mercurial > hg > svcore
changeset 1585:9570ef94eaa3
Add mechanism to retrieve the set of plausible separators found in CSV-like file when guessing its format
author | Chris Cannam |
---|---|
date | Wed, 09 Jan 2019 14:39:50 +0000 (2019-01-09) |
parents | 07f23b90701a |
children | 841b2a3e606d |
files | data/fileio/CSVFormat.cpp data/fileio/CSVFormat.h data/fileio/test/CSVFormatTest.h data/fileio/test/csv/separator-many.csv |
diffstat | 4 files changed, 32 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/data/fileio/CSVFormat.cpp Wed Nov 14 15:46:35 2018 +0000 +++ b/data/fileio/CSVFormat.cpp Wed Jan 09 14:39:50 2019 +0000 @@ -39,8 +39,6 @@ bool CSVFormat::guessFormatFor(QString path) { - m_separator = ""; // to prompt guessing for it - m_modelType = TwoDimensionalModel; m_timingType = ExplicitTiming; m_timeUnits = TimeSeconds; @@ -108,14 +106,12 @@ for (int i = 0; i < candidates.length(); ++i) { auto bits = StringBits::split(line, candidates[i], m_allowQuoting); if (bits.size() >= 2) { - SVDEBUG << "Successfully split the line into:" << endl; - for (auto b: bits) { - SVDEBUG << b << endl; + m_plausibleSeparators.insert(candidates[i]); + if (m_separator == "") { + m_separator = candidates[i]; + SVDEBUG << "Estimated column separator: '" << m_separator + << "'" << endl; } - m_separator = candidates[i]; - SVDEBUG << "Estimated column separator: '" << m_separator - << "'" << endl; - return; } } } @@ -123,9 +119,7 @@ void CSVFormat::guessQualities(QString line, int lineno) { - if (m_separator == "") { - guessSeparator(line); - } + guessSeparator(line); QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
--- a/data/fileio/CSVFormat.h Wed Nov 14 15:46:35 2018 +0000 +++ b/data/fileio/CSVFormat.h Wed Jan 09 14:39:50 2019 +0000 @@ -19,6 +19,8 @@ #include <QString> #include <QStringList> +#include <set> + #include "base/BaseTypes.h" class CSVFormat @@ -77,7 +79,7 @@ m_modelType(TwoDimensionalModel), m_timingType(ExplicitTiming), m_timeUnits(TimeSeconds), - m_separator(","), + m_separator(""), m_sampleRate(44100), m_windowSize(1024), m_columnCount(0), @@ -120,9 +122,13 @@ AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; } bool getAllowQuoting() const { return m_allowQuoting; } QChar getSeparator() const { - if (m_separator == "") return ' '; + if (m_separator == "") return ','; else return m_separator[0]; } + // set rather than QSet to ensure a fixed order + std::set<QChar> getPlausibleSeparators() const { + return m_plausibleSeparators; + } void setModelType(ModelType t) { m_modelType = t; } void setTimingType(TimingType t) { m_timingType = t; } @@ -157,7 +163,8 @@ ModelType m_modelType; TimingType m_timingType; TimeUnits m_timeUnits; - QString m_separator; + QString m_separator; // "" or a single char - basically QChar option + std::set<QChar> m_plausibleSeparators; sv_samplerate_t m_sampleRate; int m_windowSize;
--- a/data/fileio/test/CSVFormatTest.h Wed Nov 14 15:46:35 2018 +0000 +++ b/data/fileio/test/CSVFormatTest.h Wed Jan 09 14:39:50 2019 +0000 @@ -91,6 +91,18 @@ QCOMPARE(f.getColumnCount(), 3); } + void plausibleSeparators() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-many.csv"))); + std::set<QChar> p; + p.insert(QChar('|')); + p.insert(QChar(',')); + p.insert(QChar(':')); + p.insert(QChar(' ')); + std::set<QChar> actual = f.getPlausibleSeparators(); + QCOMPARE(actual, p); + } + void comment() { CSVFormat f; QVERIFY(f.guessFormatFor(csvDir.filePath("comment.csv")));