changeset 1870:1b8c4ee06f6d csv-import-headers

Detect presence of header row in CSV format guesser; use headings to inform our guesses about column purposes; test this
author Chris Cannam
date Wed, 17 Jun 2020 18:01:00 +0100 (2020-06-17)
parents 44dba7cd9ec3
children bed42ce4d3ab
files data/fileio/CSVFormat.cpp data/fileio/CSVFormat.h data/fileio/test/CSVFormatTest.h data/fileio/test/csv/model-type-1d-samples-header.csv data/fileio/test/csv/model-type-1d-seconds-header.csv data/fileio/test/csv/model-type-2d-duration-samples-header.csv data/fileio/test/csv/model-type-2d-duration-seconds-header.csv data/fileio/test/csv/model-type-2d-endtime-samples-header.csv data/fileio/test/csv/model-type-2d-endtime-seconds-header.csv data/fileio/test/csv/model-type-2d-implicit-header.csv data/fileio/test/csv/model-type-2d-samples-header.csv data/fileio/test/csv/model-type-2d-seconds-header.csv data/fileio/test/csv/model-type-3d-implicit-header.csv data/fileio/test/csv/model-type-3d-samples-header.csv data/fileio/test/csv/model-type-3d-seconds-header.csv
diffstat 15 files changed, 402 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/CSVFormat.cpp	Tue Jun 16 15:15:57 2020 +0100
+++ b/data/fileio/CSVFormat.cpp	Wed Jun 17 18:01:00 2020 +0100
@@ -31,7 +31,9 @@
     m_separator(""),
     m_sampleRate(44100),
     m_windowSize(1024),
-    m_allowQuoting(true)
+    m_headerStatus(HeaderUnknown),
+    m_allowQuoting(true),
+    m_maxExampleCols(0)
 {
     (void)guessFormatFor(path);
 }
@@ -124,8 +126,18 @@
     QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
 
     int cols = list.size();
-    if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
-    if (cols != m_columnCount) m_variableColumnCount = true;
+
+    int firstLine = 0;
+    if (m_headerStatus == HeaderPresent) {
+        firstLine = 1;
+    }
+    
+    if (lineno == firstLine || (cols > m_columnCount)) {
+        m_columnCount = cols;
+    }
+    if (cols != m_columnCount) {
+        m_variableColumnCount = true;
+    }
 
     // All columns are regarded as having these qualities until we see
     // something that indicates otherwise:
@@ -137,10 +149,10 @@
     for (int i = 0; i < cols; ++i) {
 
         SVDEBUG << "line no " << lineno << ": column " << i << " contains: \"" << list[i] << "\"" << endl;
-        
-        while (m_columnQualities.size() <= i) {
-            m_columnQualities.push_back(defaultQualities);
-            m_prevValues.push_back(0.f);
+
+        if (m_columnQualities.find(i) == m_columnQualities.end()) {
+            m_columnQualities[i] = defaultQualities;
+            m_prevValues[i] = 0.f;
         }
 
         QString s(list[i]);
@@ -161,21 +173,19 @@
 
         if (s.trimmed() != "") {
         
-            if (lineno > 1) {
+            if (lineno > firstLine) {
                 emptyish = false;
             }
         
             float value = 0.f;
 
-            //!!! how to take into account headers?
-
             if (numeric) {
                 value = s.toFloat(&ok);
                 if (!ok) {
                     value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
                 }
                 if (ok) {
-                    if (lineno < 2 && value > 1000.f) {
+                    if (lineno < firstLine + 2 && value > 1000.f) {
                         large = true;
                     }
                     if (value < 0.f) {
@@ -206,7 +216,7 @@
                 }
 
                 if (increasing) {
-                    if (lineno > 0 && value <= m_prevValues[i]) {
+                    if (lineno > firstLine && value <= m_prevValues[i]) {
                         increasing = false;
                     }
                 }
@@ -225,19 +235,55 @@
             (emptyish   ? ColumnNearEmpty : 0);
     }
 
-    if (lineno < 10) {
+    if (lineno == 0 && m_headerStatus == HeaderUnknown) {
+        // If we have at least one column, and every column has
+        // quality == ColumnNearEmpty, i.e. not empty and not numeric,
+        // then we probably have a header row
+        bool couldBeHeader = (cols > 0);
+        std::map<int, QString> headings;
+        for (int i = 0; i < cols; ++i) {
+            if (m_columnQualities[i] != ColumnNearEmpty) {
+                couldBeHeader = false;
+            } else {
+                headings[i] = list[i].trimmed().toLower();
+            }
+        }
+        if (couldBeHeader) {
+            m_headerStatus = HeaderPresent;
+            m_columnHeadings = headings;
+        } else {
+            m_headerStatus = HeaderAbsent;
+        }
+    }
+
+    if (lineno == 0 && m_headerStatus == HeaderPresent) {
+        // Start again with the qualities:
+        m_columnQualities.clear();
+        m_prevValues.clear();
+    } else if (lineno < firstLine + 10) {
+        // Not a header row, so add it to the example column output
         m_example.push_back(list);
-        if (lineno == 0 || cols > m_maxExampleCols) {
+        if (lineno == firstLine || cols > m_maxExampleCols) {
             m_maxExampleCols = cols;
         }
     }
 
-    if (lineno < 10) {
+    if (lineno < firstLine + 10) {
         SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): ";
-        for (int i = 0; i < m_columnCount; ++i) {
-            SVDEBUG << int(m_columnQualities[i]) << " ";
+        if (lineno == 0 && m_headerStatus == HeaderPresent &&
+            m_columnCount > 0 && m_columnQualities.empty()) {
+            SVDEBUG << "[whole line classified as a header row]";
+        } else {
+            for (int i = 0; i < cols; ++i) {
+                if (m_columnQualities.find(i) == m_columnQualities.end()) {
+                    SVDEBUG << "(not set) ";
+                } else {
+                    SVDEBUG << int(m_columnQualities[i]) << " ";
+                }
+            }
         }
         SVDEBUG << endl;
+        SVDEBUG << "Estimated header status: " << m_headerStatus << endl;
     }
 }
 
@@ -252,7 +298,11 @@
 
     SVDEBUG << "Estimated column qualities overall: ";
     for (int i = 0; i < m_columnCount; ++i) {
-        SVDEBUG << int(m_columnQualities[i]) << " ";
+        if (m_columnQualities.find(i) == m_columnQualities.end()) {
+            SVDEBUG << "(not set) ";
+        } else {
+            SVDEBUG << int(m_columnQualities[i]) << " ";
+        }
     }
     SVDEBUG << endl;
 
@@ -290,33 +340,56 @@
 
         bool timingColumn = (numeric && increasing);
 
+        QString heading;
+        if (m_columnHeadings.find(i) != m_columnHeadings.end()) {
+            heading = m_columnHeadings[i];
+        }
+        
+        if (heading == "time" || heading == "frame" ||
+            heading == "duration" || heading == "endtime") {
+            timingColumn = true;
+        }
+
+        if (heading == "value" || heading == "height" || heading == "label") {
+            timingColumn = false;
+        }
+        
         if (timingColumn) {
 
             ++timingColumnCount;
+
+            if (heading == "endtime") {
+
+                purpose = ColumnEndTime;
+                haveDurationOrEndTime = true;
+
+            } else if (heading == "duration") {
+
+                purpose = ColumnDuration;
+                haveDurationOrEndTime = true;
                               
-            if (primary) {
+            } else if (primary || heading == "time" || heading == "frame") {
 
                 purpose = ColumnStartTime;
-
                 m_timingType = ExplicitTiming;
 
-                if (integral && large) {
+                if ((integral && large) || heading == "frame") {
                     m_timeUnits = TimeAudioFrames;
                 } else {
                     m_timeUnits = TimeSeconds;
                 }
 
-            } else {
-
-                if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
-                    purpose = ColumnEndTime;
-                    haveDurationOrEndTime = true;
-                }
+            } else if (timingColumnCount == 2 &&
+                       m_timingType == ExplicitTiming) {
+                purpose = ColumnEndTime;
+                haveDurationOrEndTime = true;
             }
         }
 
         if (purpose == ColumnUnknown) {
-            if (numeric) {
+            if (heading == "label") {
+                purpose = ColumnLabel;
+            } else if (numeric || heading == "value" || heading == "height") {
                 purpose = ColumnValue;
             } else {
                 purpose = ColumnLabel;
@@ -328,7 +401,9 @@
 
     int valueCount = 0;
     for (int i = 0; i < m_columnCount; ++i) {
-        if (m_columnPurposes[i] == ColumnValue) ++valueCount;
+        if (m_columnPurposes[i] == ColumnValue) {
+            ++valueCount;
+        }
     }
 
     if (valueCount == 2 && timingColumnCount == 1) {
@@ -455,33 +530,51 @@
     m_audioSampleRange = range;
 }
 
-CSVFormat::ColumnPurpose
-CSVFormat::getColumnPurpose(int i)
+QList<CSVFormat::ColumnPurpose>
+CSVFormat::getColumnPurposes() const
 {
-    while (m_columnPurposes.size() <= i) {
-        m_columnPurposes.push_back(ColumnUnknown);
+    QList<ColumnPurpose> purposes;
+    for (int i = 0; i < m_columnCount; ++i) {
+        purposes.push_back(getColumnPurpose(i));
     }
-    return m_columnPurposes[i];
+    return purposes;
+}
+
+void
+CSVFormat::setColumnPurposes(QList<ColumnPurpose> cl)
+{
+    m_columnPurposes.clear();
+    for (int i = 0; in_range_for(cl, i); ++i) {
+        m_columnPurposes[i] = cl[i];
+    }
 }
 
 CSVFormat::ColumnPurpose
 CSVFormat::getColumnPurpose(int i) const
 {
-    if (m_columnPurposes.size() <= i) {
+    if (m_columnPurposes.find(i) == m_columnPurposes.end()) {
         return ColumnUnknown;
+    } else {
+        return m_columnPurposes.at(i);
     }
-    return m_columnPurposes[i];
 }
 
 void
 CSVFormat::setColumnPurpose(int i, ColumnPurpose p)
 {
-    while (m_columnPurposes.size() <= i) {
-        m_columnPurposes.push_back(ColumnUnknown);
-    }
     m_columnPurposes[i] = p;
 }
 
-
-
-
+QList<CSVFormat::ColumnQualities>
+CSVFormat::getColumnQualities() const
+{
+    QList<ColumnQualities> qualities;
+    for (int i = 0; i < m_columnCount; ++i) {
+        if (m_columnQualities.find(i) == m_columnQualities.end()) {
+            qualities.push_back(0);
+        } else {
+            qualities.push_back(m_columnQualities.at(i));
+        }
+    }
+    return qualities;
+}
--- a/data/fileio/CSVFormat.h	Tue Jun 16 15:15:57 2020 +0100
+++ b/data/fileio/CSVFormat.h	Wed Jun 17 18:01:00 2020 +0100
@@ -20,6 +20,7 @@
 #include <QStringList>
 
 #include <set>
+#include <map>
 
 #include "base/BaseTypes.h"
 
@@ -58,6 +59,12 @@
         ColumnLabel
     };
 
+    enum HeaderStatus {
+        HeaderUnknown = 0,
+        HeaderAbsent  = 1,
+        HeaderPresent = 2
+    };
+    
     enum ColumnQuality {
         ColumnNumeric    = 1,   // No non-numeric values were seen in sample
         ColumnIntegral   = 2,   // All sampled values were integers
@@ -83,6 +90,7 @@
         m_separator(""),
         m_sampleRate(44100),
         m_windowSize(1024),
+        m_headerStatus(HeaderUnknown),
         m_columnCount(0),
         m_variableColumnCount(false),
         m_audioSampleRange(SampleRangeOther),
@@ -122,6 +130,7 @@
     int          getColumnCount()   const { return m_columnCount;   }
     AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; }
     bool         getAllowQuoting()  const { return m_allowQuoting;  }
+    HeaderStatus getHeaderStatus()  const { return m_headerStatus; }
     QChar        getSeparator()     const { 
         if (m_separator == "") return ',';
         else return m_separator[0];
@@ -140,24 +149,19 @@
     void setColumnCount(int c)            { m_columnCount  = c; }
     void setAudioSampleRange(AudioSampleRange r) { m_audioSampleRange = r; }
     void setAllowQuoting(bool q)          { m_allowQuoting = q; }
+    void setHeaderStatus(HeaderStatus s)  { m_headerStatus = s; }
 
-    QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; }
-    void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; }
+    QList<ColumnPurpose> getColumnPurposes() const;
+    void setColumnPurposes(QList<ColumnPurpose> cl);
 
-    ColumnPurpose getColumnPurpose(int i);
     ColumnPurpose getColumnPurpose(int i) const;
     void setColumnPurpose(int i, ColumnPurpose p);
     
-    // read-only; only valid if format has been guessed:
-    const QList<ColumnQualities> &getColumnQualities() const {
-        return m_columnQualities;
-    }
+    // only valid if format has been guessed:
+    QList<ColumnQualities> getColumnQualities() const;
 
-    // read-only; only valid if format has been guessed:
-    const QList<QStringList> &getExample() const {
-        return m_example;
-    }
-    
+    // only valid if format has been guessed:
+    QList<QStringList> getExample() const { return m_example; }
     int getMaxExampleCols() const { return m_maxExampleCols; }
         
 protected:
@@ -168,17 +172,19 @@
     std::set<QChar> m_plausibleSeparators;
     sv_samplerate_t m_sampleRate;
     int          m_windowSize;
+    HeaderStatus m_headerStatus;
 
     int          m_columnCount;
     bool         m_variableColumnCount;
 
-    QList<ColumnQualities> m_columnQualities;
-    QList<ColumnPurpose> m_columnPurposes;
+    std::map<int, ColumnQualities> m_columnQualities;
+    std::map<int, ColumnPurpose> m_columnPurposes;
+    std::map<int, QString> m_columnHeadings;
 
+    std::map<int, float> m_prevValues;
+    
     AudioSampleRange m_audioSampleRange;
 
-    QList<float> m_prevValues;
-
     bool m_allowQuoting;
 
     QList<QStringList> m_example;
--- a/data/fileio/test/CSVFormatTest.h	Tue Jun 16 15:15:57 2020 +0100
+++ b/data/fileio/test/CSVFormatTest.h	Wed Jun 17 18:01:00 2020 +0100
@@ -105,6 +105,7 @@
     
     void comment() {
         CSVFormat f;
+        f.setHeaderStatus(CSVFormat::HeaderAbsent);
         QVERIFY(f.guessFormatFor(csvDir.filePath("comment.csv")));
         QCOMPARE(f.getSeparator(), QChar(','));
         QCOMPARE(f.getColumnCount(), 4);
@@ -142,6 +143,18 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-1d-samples.csv")));
         QCOMPARE(f.getColumnCount(), 1);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
+        QCOMPARE(f.getModelType(), CSVFormat::OneDimensionalModel);
+    }
+
+    void modelType1DSamplesWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-1d-samples-header.csv")));
+        QCOMPARE(f.getColumnCount(), 1);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
         QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
@@ -152,6 +165,19 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-1d-seconds.csv")));
         QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnLabel);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeSeconds);
+        QCOMPARE(f.getModelType(), CSVFormat::OneDimensionalModel);
+    }
+
+    void modelType1DSecondsWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-1d-seconds-header.csv")));
+        QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnLabel);
         QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
@@ -163,6 +189,19 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-samples.csv")));
         QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
+        QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModel);
+    }
+
+    void modelType2DSamplesWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-samples-header.csv")));
+        QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
         QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
@@ -174,6 +213,19 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-seconds.csv")));
         QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeSeconds);
+        QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModel);
+    }
+ 
+    void modelType2DSecondsWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-seconds-header.csv")));
+        QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
         QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
@@ -185,6 +237,16 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-implicit.csv")));
         QCOMPARE(f.getColumnCount(), 1);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnValue);
+        QCOMPARE(f.getTimingType(), CSVFormat::ImplicitTiming);
+    }
+    
+    void modelType2DImplicitWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-implicit-header.csv")));
+        QCOMPARE(f.getColumnCount(), 2);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnValue);
         QCOMPARE(f.getTimingType(), CSVFormat::ImplicitTiming);
     }
@@ -193,6 +255,7 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-endtime-samples.csv")));
         QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnEndTime);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
@@ -201,10 +264,24 @@
         QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
     }
     
+    void modelType2DEndTimeSamplesWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-endtime-samples-header.csv")));
+        QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnEndTime);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
+        QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
+    }
+    
     void modelType2DEndTimeSeconds() {
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-endtime-seconds.csv")));
         QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnEndTime);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
@@ -213,10 +290,24 @@
         QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
     }
     
+    void modelType2DEndTimeSecondsWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-endtime-seconds-header.csv")));
+        QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnEndTime);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeSeconds);
+        QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
+    }
+    
     void modelType2DDurationSamples() {
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-duration-samples.csv")));
         QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnDuration);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
@@ -224,11 +315,25 @@
         QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
         QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
     }
+    
+    void modelType2DDurationSamplesWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-duration-samples-header.csv")));
+        QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnDuration);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
+        QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
+    }
         
     void modelType2DDurationSeconds() {
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-duration-seconds.csv")));
         QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnDuration);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
@@ -237,10 +342,41 @@
         QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
     }
         
+    void modelType2DDurationSecondsWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-2d-duration-seconds-header.csv")));
+        QCOMPARE(f.getColumnCount(), 3);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnDuration);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeSeconds);
+        QCOMPARE(f.getModelType(), CSVFormat::TwoDimensionalModelWithDuration);
+    }
+        
     void modelType3DSamples() {
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-3d-samples.csv")));
         QCOMPARE(f.getColumnCount(), 7);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(3), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(4), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(5), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(6), CSVFormat::ColumnValue);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeAudioFrames);
+        QCOMPARE(f.getModelType(), CSVFormat::ThreeDimensionalModel);
+    }
+        
+    void modelType3DSamplesWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-3d-samples-header.csv")));
+        QCOMPARE(f.getColumnCount(), 7);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
@@ -257,6 +393,24 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-3d-seconds.csv")));
         QCOMPARE(f.getColumnCount(), 7);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(3), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(4), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(5), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(6), CSVFormat::ColumnValue);
+        QCOMPARE(f.getTimingType(), CSVFormat::ExplicitTiming);
+        QCOMPARE(f.getTimeUnits(), CSVFormat::TimeSeconds);
+        QCOMPARE(f.getModelType(), CSVFormat::ThreeDimensionalModel);
+    }
+         
+    void modelType3DSecondsWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-3d-seconds-header.csv")));
+        QCOMPARE(f.getColumnCount(), 7);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnStartTime);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
@@ -273,6 +427,22 @@
         CSVFormat f;
         QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-3d-implicit.csv")));
         QCOMPARE(f.getColumnCount(), 6);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderAbsent);
+        QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(3), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(4), CSVFormat::ColumnValue);
+        QCOMPARE(f.getColumnPurpose(5), CSVFormat::ColumnValue);
+        QCOMPARE(f.getTimingType(), CSVFormat::ImplicitTiming);
+        QCOMPARE(f.getModelType(), CSVFormat::ThreeDimensionalModel);
+    }
+         
+    void modelType3DImplicitWithHeader() {
+        CSVFormat f;
+        QVERIFY(f.guessFormatFor(csvDir.filePath("model-type-3d-implicit-header.csv")));
+        QCOMPARE(f.getColumnCount(), 6);
+        QCOMPARE(f.getHeaderStatus(), CSVFormat::HeaderPresent);
         QCOMPARE(f.getColumnPurpose(0), CSVFormat::ColumnValue);
         QCOMPARE(f.getColumnPurpose(1), CSVFormat::ColumnValue);
         QCOMPARE(f.getColumnPurpose(2), CSVFormat::ColumnValue);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-1d-samples-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+FRAME,LABEL
+45678
+123239
+320130
+452103
+620301
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-1d-seconds-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+TIME,LABEL
+3.200000000,1
+4.400000000,2
+5.500000000,3
+6.300000000,4
+7.800000000,5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-duration-samples-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+FRAME,VALUE,DURATION,LABEL
+45678,4,123
+123239,4.2,4214
+320130,0.4,12312
+452103,3.8,4123
+620301,-2.3,987654
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-duration-seconds-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+TIME,VALUE,DURATION,LABEL
+1.100000000,620,1.4
+2.200000000,880,3.2
+3.300000000,440,3.5
+4.400000000,213,4.5
+5.500000000,123,6.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-endtime-samples-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+FRAME,VALUE,ENDFRAME,LABEL
+45678,4,49000
+123239,4.2,330123
+320130,0.4,350000
+452103,3.8,540325
+620301,-2.3,850000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-endtime-seconds-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+TIME,VALUE,ENDTIME,LABEL
+1.100000000,4,1.4
+2.200000000,4.2,5.1
+3.300000000,0.4,4.5
+4.400000000,3.8,4.6
+5.500000000,-2.3,5.51
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-implicit-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+VALUE,LABEL
+4
+4.2
+0.4,A label
+3.8
+-2.3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-samples-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+FRAME,VALUE,LABEL
+45678,4
+123239,4.2
+320130,0.4
+452103,3.8
+620301,-2.3
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-2d-seconds-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,6 @@
+TIME,VALUE,LABEL
+1.100000000,4
+2.200000000,4.2
+3.300000000,0.4
+4.400000000,3.8
+5.500000000,-2.3
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-3d-implicit-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,7 @@
+BIN 1,BIN 2,BIN 3,BIN 4,BIN 5,BIN 6
+143,2,-1.3,0,0,1
+0.2,0.1,-3,0,0.1,0.143
+0.143,0.2,-3.1,0,0,0.1
+2,1,-0.3,0,1,143
+0,0,0.1,0.143,0.2,-3.1
+0,1,143,2,1,-0.3
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-3d-samples-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,7 @@
+FRAME,BIN 1,BIN 2,BIN 3,BIN 4,BIN 5,BIN 6
+22050,143,2,-1.3,0,0,1
+44100,0.2,0.1,-3,0,0.1,0.143
+66150,0.143,0.2,-3.1,0,0,0.1
+88200,2,1,-0.3,0,1,143
+110250,0,0,0.1,0.143,0.2,-3.1
+132300,0,1,143,2,1,-0.3
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/csv/model-type-3d-seconds-header.csv	Wed Jun 17 18:01:00 2020 +0100
@@ -0,0 +1,7 @@
+TIME,BIN 1,BIN 2,BIN 3,BIN 4,BIN 5,BIN 6
+1.100000000,143,2,-1.3,0,0,1
+2.200000000,0.2,0.1,-3,0,0.1,0.143
+3.300000000,0.143,0.2,-3.1,0,0,0.1
+4.400000000,2,1,-0.3,0,1,143
+5.500000000,0,0,0.1,0.143,0.2,-3.1
+6.600000000,0,1,143,2,1,-0.3
\ No newline at end of file