changeset 629:35499d48a5d1

* Start overhauling CSV parser to associate purposes with columns en route to its guesses; add some string manipulation code
author Chris Cannam
date Thu, 15 Jul 2010 15:27:21 +0000
parents 001db550bd48
children 11a664058dd8
files base/StringBits.cpp base/StringBits.h base/base.pro data/fileio/CSVFormat.cpp data/fileio/CSVFormat.h
diffstat 5 files changed, 529 insertions(+), 150 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/base/StringBits.cpp	Thu Jul 15 15:27:21 2010 +0000
@@ -0,0 +1,211 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+/*
+   This is a modified version of a source file from the 
+   Rosegarden MIDI and audio sequencer and notation editor.
+   This file copyright 2000-2010 Chris Cannam.
+*/
+
+#include "StringBits.h"
+
+double
+StringBits::stringToDoubleLocaleFree(QString s, bool *ok)
+{
+    int dp = 0;
+    int sign = 1;
+    int i = 0;
+    double result = 0.0;
+    int len = s.length();
+
+    result = 0.0;
+
+    if (ok) *ok = true;
+
+    while (i < len && s[i].isSpace()) ++i;
+    if (i < len && s[i] == '-') sign = -1;
+
+    while (i < len) {
+
+	QChar c = s[i];
+
+        if (c.isDigit()) {
+
+            double d = c.digitValue();
+
+            if (dp > 0) {
+                for (int p = dp; p > 0; --p) d /= 10.0;
+                ++dp;
+            } else {
+                result *= 10.0;
+            }
+
+            result += d;
+
+        } else if (c == '.') {
+
+            dp = 1;
+
+        } else if (ok) {
+            *ok = false;
+        }
+
+        ++i;
+    }
+
+    return result * sign;
+}
+    
+QStringList
+StringBits::splitQuoted(QString s, QChar separator)
+{
+    QStringList tokens;
+    QString tok;
+
+    enum { sep, unq, q1, q2 } mode = sep;
+
+    for (int i = 0; i < s.length(); ++i) {
+	
+	QChar c = s[i];
+
+	if (c == '\'') {
+	    switch (mode) {
+	    case sep: mode = q1; break;
+	    case unq: case q2: tok += c; break;
+	    case q1: mode = sep; tokens << tok; tok = ""; break;
+	    }
+
+	} else if (c == '"') {
+	    switch (mode) {
+	    case sep: mode = q2; break;
+	    case unq: case q1: tok += c; break;
+	    case q2: mode = sep; tokens << tok; tok = ""; break;
+	    }
+
+	} else if (c == separator || (separator == ' ' && c.isSpace())) {
+	    switch (mode) {
+	    case sep: if (separator != ' ') tokens << ""; break;
+	    case unq: mode = sep; tokens << tok; tok = ""; break;
+	    case q1: case q2: tok += c; break;
+	    }
+
+	} else if (c == '\\') {
+	    if (++i < s.length()) {
+		c = s[i];
+		switch (mode) {
+		case sep: mode = unq; tok += c; break;
+		default: tok += c; break;
+		}
+	    }
+
+	} else {
+	    switch (mode) {
+	    case sep: mode = unq; tok += c; break;
+	    default: tok += c; break;
+	    }
+	}
+    }
+
+    if (tok != "" || mode != sep) tokens << tok;
+    return tokens;
+}
+
+/*
+
+void testSplit()
+{
+    QStringList tests;
+    tests << "a b c d";
+    tests << "a \"b c\" d";
+    tests << "a 'b c' d";
+    tests << "a \"b c\\\" d\"";
+    tests << "a 'b c\\' d'";
+    tests << "a \"b c' d\"";
+    tests << "a 'b c\" d'";
+    tests << "aa 'bb cc\" dd'";
+    tests << "a'a 'bb' \\\"cc\" dd\\\"";
+    tests << "  a'a \\\'	 'bb'	 \'	\\\"cc\" ' dd\\\" '";
+
+    for (int j = 0; j < tests.size(); ++j) {
+	cout << endl;
+	cout << tests[j].toStdString() << endl;
+	cout << "->" << endl << "(";
+	QStringList l = splitQuoted(tests[j], ' ');
+	for (int i = 0; i < l.size(); ++i) {
+	    if (i > 0) cout << ";";
+	    cout << l[i].toStdString();
+	}
+	cout << ")" << endl;
+    }
+}
+
+*/
+
+/* 
+   Results:
+
+a b c d
+->     
+(a;b;c;d)
+
+a "b c" d
+->       
+(a;b c;d)
+
+a 'b c' d
+->       
+(a;b c;d)
+
+a "b c\" d"
+->         
+(a;b c" d) 
+
+a 'b c\' d'
+->         
+(a;b c' d) 
+
+a "b c' d"
+->        
+(a;b c' d)
+
+a 'b c" d'
+->        
+(a;b c" d)
+
+aa 'bb cc" dd'
+->            
+(aa;bb cc" dd)
+
+a'a 'bb' \"cc" dd\"
+->                 
+(a'a;bb;"cc";dd")  
+
+  a'a \'         'bb'    '      \"cc" ' dd\" '
+->                                            
+(a'a;';bb;      "cc" ;dd";)
+
+*/
+
+QStringList
+StringBits::split(QString line, QChar separator, bool quoted)
+{
+    if (quoted) {
+        return splitQuoted(line, separator);
+    } else {
+        return line.split(separator,
+                          separator == ' ' ? QString::SkipEmptyParts :
+                          QString::KeepEmptyParts);
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/base/StringBits.h	Thu Jul 15 15:27:21 2010 +0000
@@ -0,0 +1,60 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+/*
+   This is a modified version of a source file from the 
+   Rosegarden MIDI and audio sequencer and notation editor.
+   This file copyright 2000-2010 Chris Cannam.
+*/
+
+#ifndef _STRING_BITS_H_
+#define _STRING_BITS_H_
+
+#include <QString>
+#include <QStringList>
+#include <QChar>
+
+class StringBits
+{
+public:
+    /**
+     * Convert a string to a double using basic "C"-locale syntax,
+     * i.e. always using '.' as a decimal point.  We use this as a
+     * fallback when parsing files from an unknown source, if
+     * locale-specific conversion fails.  Does not support e notation.
+     * If ok is non-NULL, *ok will be set to true if conversion
+     * succeeds or false otherwise.
+     */
+    static double stringToDoubleLocaleFree(QString s, bool *ok = 0);
+
+    /**
+     * Split a string at the given separator character, allowing
+     * quoted sections that contain the separator.  If the separator
+     * is ' ', any (amount of) whitespace will be considered as a
+     * single separator.  If the separator is another whitespace
+     * character such as '\t', it will be used literally.
+     */
+    static QStringList splitQuoted(QString s, QChar separator);
+
+    /**
+     * Split a string at the given separator character.  If quoted is
+     * true, do so by calling splitQuoted (above).  If quoted is
+     * false, use QString::split; if separator is ' ', use
+     * SkipEmptyParts behaviour, otherwise use KeepEmptyParts (this is
+     * analogous to the behaviour of splitQuoted).
+     */
+    static QStringList split(QString s, QChar separator, bool quoted);
+};
+
+#endif
--- a/base/base.pro	Thu Jul 08 14:22:28 2010 +0000
+++ b/base/base.pro	Thu Jul 15 15:27:21 2010 +0000
@@ -39,6 +39,7 @@
            Selection.h \
            Serialiser.h \
            StorageAdviser.h \
+           StringBits.h \
            TempDirectory.h \
            TextMatcher.h \
            Thread.h \
@@ -67,6 +68,7 @@
            Selection.cpp \
            Serialiser.cpp \
            StorageAdviser.cpp \
+           StringBits.cpp \
            TempDirectory.cpp \
            TextMatcher.cpp \
            Thread.cpp \
--- a/data/fileio/CSVFormat.cpp	Thu Jul 08 14:22:28 2010 +0000
+++ b/data/fileio/CSVFormat.cpp	Thu Jul 15 15:27:21 2010 +0000
@@ -15,6 +15,8 @@
 
 #include "CSVFormat.h"
 
+#include "base/StringBits.h"
+
 #include <QFile>
 #include <QString>
 #include <QRegExp>
@@ -23,39 +25,41 @@
 
 #include <iostream>
 
-CSVFormat::CSVFormat(QString filename) :
-    m_modelType(TwoDimensionalModel),
-    m_timingType(ExplicitTiming),
-    m_durationType(Durations),
-    m_timeUnits(TimeSeconds),
-    m_separator(","),
+CSVFormat::CSVFormat(QString path) :
+    m_separator(""),
     m_sampleRate(44100),
     m_windowSize(1024),
-    m_behaviour(QString::KeepEmptyParts),
-    m_maxExampleCols(0)
+    m_allowQuoting(true)
 {
-    QFile file(filename);
+    guessFormatFor(path);
+}
+
+void
+CSVFormat::guessFormatFor(QString path)
+{
+    m_modelType = TwoDimensionalModel;
+    m_timingType = ExplicitTiming;
+    m_durationType = Durations;
+    m_timeUnits = TimeSeconds;
+    m_behaviour = QString::KeepEmptyParts;
+
+    m_maxExampleCols = 0;
+    m_columnCount = 0;
+    m_variableColumnCount = false;
+
+    m_example.clear();
+    m_columnQualities.clear();
+    m_columnPurposes.clear();
+    m_prevValues.clear();
+
+    QFile file(path);
     if (!file.exists()) return;
     if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
 
     QTextStream in(&file);
     in.seek(0);
 
-    unsigned int lineno = 0;
-
-    bool nonIncreasingPrimaries = false;
-    bool nonIncreasingSecondaries = false;
-    bool nonNumericPrimaries = false;
-    bool floatPrimaries = false;
-    bool variableItemCount = false;
-    int itemCount = 1;
-    int earliestNonNumericItem = -1;
-
-    float prevPrimary = 0.0;
-    float prevSecondary = 0.0;
-
-    m_maxExampleCols = 0;
-    m_separator = "";
+    int lineno = 0;
 
     while (!in.atEnd()) {
 
@@ -67,148 +71,198 @@
         for (size_t li = 0; li < lines.size(); ++li) {
 
             QString line = lines[li];
+            if (line.startsWith("#") || line == "") continue;
 
-            if (line.startsWith("#")) continue;
+            guessQualities(line, lineno);
 
-            m_behaviour = QString::KeepEmptyParts;
+            if (++lineno == 50) break;
+        }
+    }
 
-            if (m_separator == "") {
-                //!!! to do: ask the user
-                if (line.split(",").size() >= 2) m_separator = ",";
-                else if (line.split("\t").size() >= 2) m_separator = "\t";
-                else if (line.split("|").size() >= 2) m_separator = "|";
-                else if (line.split("/").size() >= 2) m_separator = "/";
-                else if (line.split(":").size() >= 2) m_separator = ":";
-                else {
-                    m_separator = " ";
-                    m_behaviour = QString::SkipEmptyParts;
+    guessPurposes();
+}
+
+void
+CSVFormat::guessSeparator(QString line)
+{
+    char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
+    for (int i = 0; i < sizeof(candidates)/sizeof(candidates[0]); ++i) {
+        if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
+            m_separator = candidates[i];
+            return;
+        }
+    }
+    m_separator = " ";
+}
+
+void
+CSVFormat::guessQualities(QString line, int lineno)
+{
+    if (m_separator == "") guessSeparator(line);
+
+    QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting);
+
+    int cols = list.size();
+    if (lineno == 0 || (cols < m_columnCount)) m_columnCount = cols;
+    if (cols != m_columnCount) m_variableColumnCount = true;
+
+    // All columns are regarded as having these qualities until we see
+    // something that indicates otherwise:
+
+    ColumnQualities defaultQualities =
+        ColumnNumeric | ColumnIntegral | ColumnIncreasing;
+    
+    for (int i = 0; i < cols; ++i) {
+	    
+        while (m_columnQualities.size() <= i) {
+            m_columnQualities.push_back(defaultQualities);
+            m_prevValues.push_back(0.f);
+        }
+
+        QString s(list[i]);
+        bool ok = false;
+
+        ColumnQualities qualities = m_columnQualities[i];
+
+        bool numeric    = (qualities & ColumnNumeric);
+        bool integral   = (qualities & ColumnIntegral);
+        bool increasing = (qualities & ColumnIncreasing);
+        bool large      = (qualities & ColumnLarge); // this one defaults to off
+
+        float value = 0.f;
+
+        //!!! how to take into account headers?
+
+        if (numeric) {
+            value = s.toFloat(&ok);
+            if (!ok) {
+                value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
+            }
+            if (ok) {
+                if (lineno < 2 && value > 1000.f) large = true;
+            } else {
+                numeric = false;
+            }
+        }
+
+        if (numeric) {
+
+            if (integral) {
+                if (s.contains('.') || s.contains(',')) {
+                    integral = false;
                 }
             }
 
-//            std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl;
-
-            QStringList list = line.split(m_separator, m_behaviour);
-            QStringList tidyList;
-
-            for (int i = 0; i < list.size(); ++i) {
-	    
-                QString s(list[i]);
-                bool numeric = false;
-
-                if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
-                    s = s.mid(1, s.length() - 2);
-                } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
-                    s = s.mid(1, s.length() - 2);
-                } else {
-                    float f = s.toFloat(&numeric);
-//                    std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl;
-                }
-
-                tidyList.push_back(s);
-
-                if (lineno == 0 || (list.size() < itemCount)) {
-                    itemCount = list.size();
-                } else {
-                    if (itemCount != list.size()) {
-                        variableItemCount = true;
-                    }
-                }
-	    
-                if (i == 0) { // primary
-
-                    if (numeric) {
-
-                        float primary = s.toFloat();
-
-                        if (lineno > 0 && primary <= prevPrimary) {
-                            nonIncreasingPrimaries = true;
-                        }
-
-                        if (s.contains(".") || s.contains(",")) {
-                            floatPrimaries = true;
-                        }
-
-                        prevPrimary = primary;
-
-                    } else {
-                        nonNumericPrimaries = true;
-                    }
-                } else { // secondary
-
-                    if (!numeric) {
-                        if (earliestNonNumericItem < 0 ||
-                            i < earliestNonNumericItem) {
-                            earliestNonNumericItem = i;
-                        }
-                    } else if (i == 1) {
-                        float secondary = s.toFloat();
-                        if (lineno > 0 && secondary <= prevSecondary) {
-                            nonIncreasingSecondaries = true;
-                        }
-                        prevSecondary = secondary;
-                    }
+            if (increasing) {
+                if (lineno > 0 && value <= m_prevValues[i]) {
+                    increasing = false;
                 }
             }
 
-            if (lineno < 10) {
-                m_example.push_back(tidyList);
-                if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
-                    m_maxExampleCols = tidyList.size();
-                }
-            }
+            m_prevValues[i] = value;
+        }
 
-            ++lineno;
+        m_columnQualities[i] =
+            (numeric    ? ColumnNumeric : 0) |
+            (integral   ? ColumnIntegral : 0) |
+            (increasing ? ColumnIncreasing : 0) |
+            (large      ? ColumnLarge : 0);
+    }
 
-            if (lineno == 50) break;
+    if (lineno < 10) {
+        m_example.push_back(list);
+        if (lineno == 0 || cols > m_maxExampleCols) {
+            m_maxExampleCols = cols;
         }
     }
 
-    if (nonNumericPrimaries || nonIncreasingPrimaries) {
+    std::cerr << "Estimated column qualities: ";
+    for (int i = 0; i < m_columnCount; ++i) {
+        std::cerr << int(m_columnQualities[i]) << " ";
+    }
+    std::cerr << std::endl;
+}
+
+void
+CSVFormat::guessPurposes()
+{
+    while (m_columnPurposes.size() <= m_columnCount) {
+        m_columnPurposes.push_back(ColumnUnknown);
+    }
+
+    m_timingType = CSVFormat::ImplicitTiming;
+    m_timeUnits = CSVFormat::TimeWindows;
 	
-	// Primaries are probably not a series of times
+    int timingColumnCount = 0;
+    
+    for (int i = 0; i < m_columnCount; ++i) {
+        
+        ColumnPurpose purpose = ColumnUnknown;
+        bool primary = (i == 0);
 
-	m_timingType = CSVFormat::ImplicitTiming;
-	m_timeUnits = CSVFormat::TimeWindows;
-	
-	if (nonNumericPrimaries) {
-	    m_modelType = CSVFormat::OneDimensionalModel;
-	} else if (itemCount == 1 || variableItemCount ||
-		   (earliestNonNumericItem != -1)) {
-	    m_modelType = CSVFormat::TwoDimensionalModel;
-	} else {
-	    m_modelType = CSVFormat::ThreeDimensionalModel;
-	}
+        ColumnQualities qualities = m_columnQualities[i];
 
+        bool numeric    = (qualities & ColumnNumeric);
+        bool integral   = (qualities & ColumnIntegral);
+        bool increasing = (qualities & ColumnIncreasing);
+        bool large      = (qualities & ColumnLarge);
+
+        bool timingColumn = (numeric && increasing);
+
+        if (timingColumn) {
+
+            ++timingColumnCount;
+                              
+            if (primary) {
+
+                purpose = ColumnStartTime;
+
+                m_timingType = ExplicitTiming;
+
+                if (integral && large) {
+                    m_timeUnits = TimeAudioFrames;
+                } else {
+                    m_timeUnits = TimeSeconds;
+                }
+
+            } else {
+
+                if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
+                    purpose = ColumnEndTime;
+                    m_durationType = EndTimes;
+                }
+            }
+        }
+
+        if (purpose == ColumnUnknown) {
+            if (numeric) {
+                purpose = ColumnValue;
+            } else {
+                purpose = ColumnLabel;
+            }
+        }
+
+        m_columnPurposes[i] = purpose;
+    }            
+
+    int valueCount = 0;
+    for (int i = 0; i < m_columnCount; ++i) {
+        if (m_columnPurposes[i] == ColumnValue) ++valueCount;
+    }
+
+    if (valueCount == 0) {
+        m_modelType = OneDimensionalModel;
+    } else if (valueCount == 1) {
+        m_modelType = TwoDimensionalModel;
     } else {
+        m_modelType = ThreeDimensionalModel;
+    }
 
-	// Increasing numeric primaries -- likely to be time
-
-	m_timingType = CSVFormat::ExplicitTiming;
-
-	if (floatPrimaries) {
-	    m_timeUnits = CSVFormat::TimeSeconds;
-	} else {
-	    m_timeUnits = CSVFormat::TimeAudioFrames;
-	}
-
-	if (itemCount == 1) {
-	    m_modelType = CSVFormat::OneDimensionalModel;
-	} else if (variableItemCount || (earliestNonNumericItem != -1)) {
-	    if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
-		m_modelType = CSVFormat::OneDimensionalModel;
-	    } else {
-		m_modelType = CSVFormat::TwoDimensionalModel;
-	    }
-	} else {
-	    m_modelType = CSVFormat::ThreeDimensionalModel;
-	}
-
-        if (nonIncreasingSecondaries) {
-            m_durationType = Durations;
-        } else {
-            m_durationType = EndTimes;
-        }
+    std::cerr << "Estimated column purposes: ";
+    for (int i = 0; i < m_columnCount; ++i) {
+        std::cerr << int(m_columnPurposes[i]) << " ";
     }
+    std::cerr << std::endl;
 
     std::cerr << "Estimated model type: " << m_modelType << std::endl;
     std::cerr << "Estimated timing type: " << m_timingType << std::endl;
@@ -216,3 +270,4 @@
     std::cerr << "Estimated units: " << m_timeUnits << std::endl;
 }
 
+
--- a/data/fileio/CSVFormat.h	Thu Jul 08 14:22:28 2010 +0000
+++ b/data/fileio/CSVFormat.h	Thu Jul 15 15:27:21 2010 +0000
@@ -45,7 +45,22 @@
 	TimeWindows
     };
 
-    CSVFormat(QString path); // guess format
+    enum ColumnPurpose {
+        ColumnUnknown,
+        ColumnStartTime,
+        ColumnEndTime,
+        ColumnDuration,
+        ColumnValue,
+        ColumnLabel
+    };
+
+    enum ColumnQuality {
+        ColumnNumeric    = 0x1,
+        ColumnIntegral   = 0x2,
+        ColumnIncreasing = 0x4,
+        ColumnLarge      = 0x8
+    };
+    typedef unsigned int ColumnQualities;
 
     CSVFormat() : // arbitrary defaults
         m_modelType(TwoDimensionalModel),
@@ -55,8 +70,23 @@
         m_separator(","),
         m_sampleRate(44100),
         m_windowSize(1024),
-        m_behaviour(QString::KeepEmptyParts)
+        m_columnCount(0),
+        m_variableColumnCount(false),
+        m_behaviour(QString::KeepEmptyParts),
+        m_allowQuoting(true),
+        m_maxExampleCols(0)
     { }
+
+    CSVFormat(QString path); // guess format
+
+    /**
+     * Guess the format of the given CSV file, setting the fields in
+     * this object accordingly.  If the current separator is the empty
+     * string, the separator character will also be guessed; otherwise
+     * the current separator will be used.  The other properties of
+     * this object will be set according to guesses from the file.
+     */
+    void guessFormatFor(QString path);
  
     ModelType    getModelType()     const { return m_modelType;     }
     TimingType   getTimingType()    const { return m_timingType;    }
@@ -65,8 +95,9 @@
     QString      getSeparator()     const { return m_separator;     }
     size_t       getSampleRate()    const { return m_sampleRate;    }
     size_t       getWindowSize()    const { return m_windowSize;    }
-
+    
     QString::SplitBehavior getSplitBehaviour() const { return m_behaviour; }
+    QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; }
 	
     void setModelType(ModelType t)        { m_modelType    = t; }
     void setTimingType(TimingType t)      { m_timingType   = t; }
@@ -77,8 +108,12 @@
     void setWindowSize(size_t s)          { m_windowSize   = s; }
 
     void setSplitBehaviour(QString::SplitBehavior b) { m_behaviour = b; }
+    void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; }
     
-    // only valid if constructor that guesses format was used:
+    // read-only; only valid if format has been guessed:
+    QList<ColumnQualities> getColumnQualities() const { return m_columnQualities; }
+
+    // read-only; only valid if format has been guessed:
     QList<QStringList> getExample() const { return m_example; }
     int getMaxExampleCols() const { return m_maxExampleCols; }
 
@@ -91,10 +126,26 @@
     size_t       m_sampleRate;
     size_t       m_windowSize;
 
+    int          m_columnCount;
+    bool         m_variableColumnCount;
+
+    QList<ColumnQualities> m_columnQualities;
+    QList<ColumnPurpose> m_columnPurposes;
+
+    QList<float> m_prevValues;
+
     QString::SplitBehavior m_behaviour;
+    bool m_allowQuoting;
 
     QList<QStringList> m_example;
     int m_maxExampleCols;
+
+    void guessSeparator(QString line);
+    void guessQualities(QString line, int lineno);
+    void guessPurposes();
+
+    void guessFormatFor_Old(QString path);
+ 
 };
 
 #endif