changeset 1021:1888ca033a84

If the first column of a CSV file has zero or one non-empty values in it, during the first 5 rows, then ignore it by default
author Chris Cannam
date Mon, 01 Dec 2014 10:18:55 +0000 (2014-12-01)
parents 9c00e7944bf2
children eecf544bed92
files data/fileio/CSVFormat.cpp data/fileio/CSVFormat.h
diffstat 2 files changed, 34 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/CSVFormat.cpp	Thu Nov 27 17:51:21 2014 +0000
+++ b/data/fileio/CSVFormat.cpp	Mon Dec 01 10:18:55 2014 +0000
@@ -110,7 +110,7 @@
     // something that indicates otherwise:
 
     ColumnQualities defaultQualities =
-        ColumnNumeric | ColumnIntegral | ColumnIncreasing;
+        ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty;
     
     for (int i = 0; i < cols; ++i) {
 	    
@@ -128,7 +128,12 @@
         bool integral   = (qualities & ColumnIntegral);
         bool increasing = (qualities & ColumnIncreasing);
         bool large      = (qualities & ColumnLarge); // this one defaults to off
+        bool emptyish   = (qualities & ColumnNearEmpty);
 
+        if (lineno > 1 && s.trimmed() != "") {
+            emptyish = false;
+        }
+        
         float value = 0.f;
 
         //!!! how to take into account headers?
@@ -166,7 +171,8 @@
             (numeric    ? ColumnNumeric : 0) |
             (integral   ? ColumnIntegral : 0) |
             (increasing ? ColumnIncreasing : 0) |
-            (large      ? ColumnLarge : 0);
+            (large      ? ColumnLarge : 0) |
+            (emptyish   ? ColumnNearEmpty : 0);
     }
 
     if (lineno < 10) {
@@ -190,11 +196,31 @@
     m_timeUnits = CSVFormat::TimeWindows;
 	
     int timingColumnCount = 0;
+
+    // if our first column has zero or one entries in it and the rest
+    // have more, then we'll default to ignoring the first column and
+    // counting the next one as primary. (e.g. Sonic Annotator output
+    // with filename at start of first column.)
+
+    int primaryColumnNo = 0;
+
+    if (m_columnCount >= 2) {
+        if ( (m_columnQualities[0] & ColumnNearEmpty) &&
+            !(m_columnQualities[1] & ColumnNearEmpty)) {
+            primaryColumnNo = 1;
+        }
+    }
     
     for (int i = 0; i < m_columnCount; ++i) {
         
         ColumnPurpose purpose = ColumnUnknown;
-        bool primary = (i == 0);
+
+        if (i < primaryColumnNo) {
+            setColumnPurpose(i, purpose);
+            continue;
+        }
+        
+        bool primary = (i == primaryColumnNo);
 
         ColumnQualities qualities = m_columnQualities[i];
 
--- a/data/fileio/CSVFormat.h	Thu Nov 27 17:51:21 2014 +0000
+++ b/data/fileio/CSVFormat.h	Mon Dec 01 10:18:55 2014 +0000
@@ -53,10 +53,11 @@
     };
 
     enum ColumnQuality {
-        ColumnNumeric    = 0x1,
-        ColumnIntegral   = 0x2,
-        ColumnIncreasing = 0x4,
-        ColumnLarge      = 0x8
+        ColumnNumeric    = 1,
+        ColumnIntegral   = 2,
+        ColumnIncreasing = 4,
+        ColumnLarge      = 8,
+        ColumnNearEmpty  = 16,
     };
     typedef unsigned int ColumnQualities;