# HG changeset patch # User Chris Cannam # Date 1536913517 -3600 # Node ID 64ef24ebb19c0c221301a4b9b46809262d21a1a4 # Parent c1b2eab6ac51345b1febf6576c39b48809f693a3 Some CSV format tests and minor fixes diff -r c1b2eab6ac51 -r 64ef24ebb19c base/StringBits.h --- a/base/StringBits.h Wed Sep 12 18:49:32 2018 +0100 +++ b/base/StringBits.h Fri Sep 14 09:25:17 2018 +0100 @@ -18,8 +18,8 @@ This file copyright 2000-2010 Chris Cannam. */ -#ifndef _STRING_BITS_H_ -#define _STRING_BITS_H_ +#ifndef SV_STRING_BITS_H +#define SV_STRING_BITS_H #include #include diff -r c1b2eab6ac51 -r 64ef24ebb19c base/test/TestStringBits.h --- a/base/test/TestStringBits.h Wed Sep 12 18:49:32 2018 +0100 +++ b/base/test/TestStringBits.h Fri Sep 14 09:25:17 2018 +0100 @@ -128,6 +128,34 @@ testSplitQuoted(in, out); } + void snested3() { + QString in = "'aa bb cc\"' dd"; + QStringList out; + out << "aa bb cc\"" << "dd"; + testSplitQuoted(in, out); + } + + void snested3a() { + QString in = "\"aa bb cc'\" dd"; + QStringList out; + out << "aa bb cc'" << "dd"; + testSplitQuoted(in, out); + } + + void snested4() { + QString in = "'aa \"bb cc\" dd'"; + QStringList out; + out << "aa \"bb cc\" dd"; + testSplitQuoted(in, out); + } + + void snested4a() { + QString in = "\"aa 'bb cc' dd\""; + QStringList out; + out << "aa 'bb cc' dd"; + testSplitQuoted(in, out); + } + void qquoted() { QString in = "a'a 'bb' \\\"cc\" dd\\\""; QStringList out; @@ -135,6 +163,19 @@ testSplitQuoted(in, out); } + void qspace() { + QString in = "\"a a\":\"b:b\":\"c d\""; + QStringList out1; + // Can't start a quote in the middle of a bare field - they + // are handled only if the first character in the field is a + // quote. Otherwise we'd have trouble with apostrophes etc + out1 << "a a:\"b:b\":\"c" << "d\""; + QCOMPARE(StringBits::splitQuoted(in, ' '), out1); + QStringList out2; + out2 << "a a" << "b:b" << "c d"; + QCOMPARE(StringBits::splitQuoted(in, ':'), out2); + } + void multispace() { QString in = " a'a \\' 'bb' ' \\\"cc\" ' dd\\\" '"; QStringList out; diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/CSVFormat.cpp --- a/data/fileio/CSVFormat.cpp Wed Sep 12 18:49:32 2018 +0100 +++ b/data/fileio/CSVFormat.cpp Fri Sep 14 09:25:17 2018 +0100 @@ -33,12 +33,14 @@ m_windowSize(1024), m_allowQuoting(true) { - guessFormatFor(path); + (void)guessFormatFor(path); } -void +bool CSVFormat::guessFormatFor(QString path) { + m_separator = ""; // to prompt guessing for it + m_modelType = TwoDimensionalModel; m_timingType = ExplicitTiming; m_timeUnits = TimeSeconds; @@ -53,8 +55,17 @@ m_prevValues.clear(); QFile file(path); - if (!file.exists()) return; - if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; + if (!file.exists()) { + SVCERR << "CSVFormat::guessFormatFor(" << path + << "): File does not exist" << endl; + return false; + } + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { + SVCERR << "CSVFormat::guessFormatFor(" << path + << "): File could not be opened for reading" << endl; + return false; + } + SVDEBUG << "CSVFormat::guessFormatFor(" << path << ")" << endl; QTextStream in(&file); in.seek(0); @@ -85,14 +96,22 @@ guessPurposes(); guessAudioSampleRange(); + + return true; } void CSVFormat::guessSeparator(QString line) { - char candidates[] = { ',', '\t', ' ', '|', '/', ':' }; - for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) { - if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) { + QString candidates = "\t|,/: "; + + for (int i = 0; i < candidates.length(); ++i) { + auto bits = StringBits::split(line, candidates[i], m_allowQuoting); + if (bits.size() >= 2) { + SVDEBUG << "Successfully split the line into:" << endl; + for (auto b: bits) { + SVDEBUG << b << endl; + } m_separator = candidates[i]; SVDEBUG << "Estimated column separator: '" << m_separator << "'" << endl; @@ -104,7 +123,9 @@ void CSVFormat::guessQualities(QString line, int lineno) { - if (m_separator == "") guessSeparator(line); + if (m_separator == "") { + guessSeparator(line); + } QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting); @@ -167,6 +188,14 @@ } } else { numeric = false; + + // If the column is not numeric, it can't be any of + // these things either + integral = false; + increasing = false; + small = false; + large = false; + signd = false; } } @@ -186,7 +215,7 @@ m_prevValues[i] = value; } - + m_columnQualities[i] = (numeric ? ColumnNumeric : 0) | (integral ? ColumnIntegral : 0) | diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/CSVFormat.h --- a/data/fileio/CSVFormat.h Wed Sep 12 18:49:32 2018 +0100 +++ b/data/fileio/CSVFormat.h Fri Sep 14 09:25:17 2018 +0100 @@ -95,8 +95,21 @@ * string, the separator character will also be guessed; otherwise * the current separator will be used. The other properties of * this object will be set according to guesses from the file. + * + * The properties that are guessed from the file contents are: + * separator, column count, variable-column-count flag, audio + * sample range, timing type, time units, column qualities, column + * purposes, and model type. The sample rate and window size + * cannot be guessed and will not be changed by this function. + * Note also that this function will never guess WaveFileModel for + * the model type. + * + * Return false if there is some fundamental error, e.g. the file + * could not be opened at all. Return true otherwise. Note that + * this function returns true even if the file doesn't appear to + * make much sense as a data format. */ - void guessFormatFor(QString path); + bool guessFormatFor(QString path); ModelType getModelType() const { return m_modelType; } TimingType getTimingType() const { return m_timingType; } diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/CSVFormatTest.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/CSVFormatTest.h Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,130 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Visualiser + An audio file viewer and annotation editor. + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef TEST_CSV_FORMAT_H +#define TEST_CSV_FORMAT_H + +// Tests for the code that guesses the most likely format for parsing a CSV file + +#include "../CSVFormat.h" + +#include "base/Debug.h" + +#include + +#include +#include +#include + +#include + +using namespace std; + +class CSVFormatTest : public QObject +{ + Q_OBJECT + +private: + QDir csvDir; + +public: + CSVFormatTest(QString base) { + if (base == "") { + base = "svcore/data/fileio/test"; + } + csvDir = QDir(base + "/csv"); + } + +private slots: + void init() { + if (!csvDir.exists()) { + SVCERR << "ERROR: CSV test file directory \"" << csvDir.absolutePath() << "\" does not exist" << endl; + QVERIFY2(csvDir.exists(), "CSV test file directory not found"); + } + } + + void separatorComma() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-comma.csv"))); + QCOMPARE(f.getSeparator(), QChar(',')); + QCOMPARE(f.getColumnCount(), 3); + } + + void separatorTab() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-tab.csv"))); + QCOMPARE(f.getSeparator(), QChar('\t')); + QCOMPARE(f.getColumnCount(), 3); + } + + void separatorPipe() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-pipe.csv"))); + QCOMPARE(f.getSeparator(), QChar('|')); + // differs from the others + QCOMPARE(f.getColumnCount(), 4); + } + + void separatorSpace() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-space.csv"))); + QCOMPARE(f.getSeparator(), QChar(' ')); + // NB fields are separated by 1 or more spaces, not necessarily exactly 1 + QCOMPARE(f.getColumnCount(), 3); + } + + void separatorColon() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("separator-colon.csv"))); + QCOMPARE(f.getSeparator(), QChar(':')); + QCOMPARE(f.getColumnCount(), 3); + } + + void comment() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("comment.csv"))); + QCOMPARE(f.getSeparator(), QChar(',')); + QCOMPARE(f.getColumnCount(), 4); + } + + void qualities() { + CSVFormat f; + QVERIFY(f.guessFormatFor(csvDir.filePath("column-qualities.csv"))); + QCOMPARE(f.getSeparator(), QChar(',')); + QCOMPARE(f.getColumnCount(), 7); + QList q = f.getColumnQualities(); + QList expected; + expected << 0; + expected << CSVFormat::ColumnQualities(CSVFormat::ColumnNumeric | + CSVFormat::ColumnIntegral | + CSVFormat::ColumnIncreasing); + expected << CSVFormat::ColumnQualities(CSVFormat::ColumnNumeric | + CSVFormat::ColumnIntegral | + CSVFormat::ColumnIncreasing | + CSVFormat::ColumnLarge); + expected << CSVFormat::ColumnQualities(CSVFormat::ColumnNumeric); + expected << CSVFormat::ColumnQualities(CSVFormat::ColumnNumeric | + CSVFormat::ColumnIncreasing); + expected << CSVFormat::ColumnQualities(CSVFormat::ColumnNumeric | + CSVFormat::ColumnSmall | + CSVFormat::ColumnSigned); + expected << CSVFormat::ColumnQualities(CSVFormat::ColumnNumeric | + CSVFormat::ColumnIntegral | + CSVFormat::ColumnIncreasing | + CSVFormat::ColumnNearEmpty); + QCOMPARE(q, expected); + } +}; + +#endif diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/column-qualities.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/column-qualities.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,9 @@ +Text only,4,1024,45.6,45.7,-0.001,987 +Blah,5,2048,45.1,45.9,0.0123, + +# Include the odd blank line, space, and comment + + +Parp, 6, 3072 , 44.7 ,52.1, 0.26, + +Toot,7,4096,42.2,57.9,0.0, diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/comment.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/comment.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,5 @@ +# This is a comment +# This is a comment with various | possible | but not real separators in it +This is,the first,of the,real data lines +# This,is,one,that,would,cause,more,columns,to,be,counted,if,it,were,real +This is the,second diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/separator-colon.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/separator-colon.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,4 @@ +"This thing":"That thing":"The other thing" +1:12,4:16,3 +2:14,2 +3:16,1:1901 diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/separator-comma.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/separator-comma.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,4 @@ +This thing,That thing,The other thing +1,12.4,16.3 +2,14.2 +3,16.1,"This, that\", and the other" diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/separator-pipe.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/separator-pipe.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,4 @@ +This thing|That thing|The other thing +1|12,4|16,3 +2|14,2|And another|column +3|16,1|1901|"Not another|column - we have four columns, not five" diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/separator-space.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/separator-space.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,4 @@ +"This thing" "That thing" "The other thing" +1 12,4 16,3 +2 14,2 +3 16,1 1901 diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/csv/separator-tab.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/test/csv/separator-tab.csv Fri Sep 14 09:25:17 2018 +0100 @@ -0,0 +1,4 @@ +This thing That thing The other thing +1 12,4 16,3 +2 14,2 +3 16,1 1901 diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/files.pri --- a/data/fileio/test/files.pri Wed Sep 12 18:49:32 2018 +0100 +++ b/data/fileio/test/files.pri Fri Sep 14 09:25:17 2018 +0100 @@ -6,6 +6,7 @@ AudioTestData.h \ EncodingTest.h \ MIDIFileReaderTest.h \ + CSVFormatTest.h \ CSVStreamWriterTest.h TEST_SOURCES += \ diff -r c1b2eab6ac51 -r 64ef24ebb19c data/fileio/test/svcore-data-fileio-test.cpp --- a/data/fileio/test/svcore-data-fileio-test.cpp Wed Sep 12 18:49:32 2018 +0100 +++ b/data/fileio/test/svcore-data-fileio-test.cpp Fri Sep 14 09:25:17 2018 +0100 @@ -16,6 +16,7 @@ #include "AudioFileWriterTest.h" #include "EncodingTest.h" #include "MIDIFileReaderTest.h" +#include "CSVFormatTest.h" #include "CSVStreamWriterTest.h" #include @@ -40,7 +41,7 @@ QCoreApplication app(argc, argv); app.setOrganizationName("sonic-visualiser"); - app.setApplicationName("test-fileio"); + app.setApplicationName("test-svcore-data-fileio"); if (testDir != "") { SVCERR << "Setting test directory base path to \"" << testDir << "\"" << endl; @@ -71,16 +72,22 @@ } { + CSVFormatTest t(testDir); + if (QTest::qExec(&t, argc, argv) == 0) ++good; + else ++bad; + } + + { CSVStreamWriterTest t; if (QTest::qExec(&t, argc, argv) == 0) ++good; else ++bad; } if (bad > 0) { - SVCERR << "\n********* " << bad << " test suite(s) failed!\n" << endl; + SVCERR << "\n********* " << bad << " test suite(s) failed!\n" << endl; return 1; } else { - SVCERR << "All tests passed" << endl; + SVCERR << "All tests passed" << endl; return 0; } }