Mercurial > hg > svcore
changeset 1021:1888ca033a84
If the first column of a CSV file has zero or one non-empty values in it, during the first 5 rows, then ignore it by default
author | Chris Cannam |
---|---|
date | Mon, 01 Dec 2014 10:18:55 +0000 (2014-12-01) |
parents | 9c00e7944bf2 |
children | eecf544bed92 |
files | data/fileio/CSVFormat.cpp data/fileio/CSVFormat.h |
diffstat | 2 files changed, 34 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/data/fileio/CSVFormat.cpp Thu Nov 27 17:51:21 2014 +0000 +++ b/data/fileio/CSVFormat.cpp Mon Dec 01 10:18:55 2014 +0000 @@ -110,7 +110,7 @@ // something that indicates otherwise: ColumnQualities defaultQualities = - ColumnNumeric | ColumnIntegral | ColumnIncreasing; + ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty; for (int i = 0; i < cols; ++i) { @@ -128,7 +128,12 @@ bool integral = (qualities & ColumnIntegral); bool increasing = (qualities & ColumnIncreasing); bool large = (qualities & ColumnLarge); // this one defaults to off + bool emptyish = (qualities & ColumnNearEmpty); + if (lineno > 1 && s.trimmed() != "") { + emptyish = false; + } + float value = 0.f; //!!! how to take into account headers? @@ -166,7 +171,8 @@ (numeric ? ColumnNumeric : 0) | (integral ? ColumnIntegral : 0) | (increasing ? ColumnIncreasing : 0) | - (large ? ColumnLarge : 0); + (large ? ColumnLarge : 0) | + (emptyish ? ColumnNearEmpty : 0); } if (lineno < 10) { @@ -190,11 +196,31 @@ m_timeUnits = CSVFormat::TimeWindows; int timingColumnCount = 0; + + // if our first column has zero or one entries in it and the rest + // have more, then we'll default to ignoring the first column and + // counting the next one as primary. (e.g. Sonic Annotator output + // with filename at start of first column.) + + int primaryColumnNo = 0; + + if (m_columnCount >= 2) { + if ( (m_columnQualities[0] & ColumnNearEmpty) && + !(m_columnQualities[1] & ColumnNearEmpty)) { + primaryColumnNo = 1; + } + } for (int i = 0; i < m_columnCount; ++i) { ColumnPurpose purpose = ColumnUnknown; - bool primary = (i == 0); + + if (i < primaryColumnNo) { + setColumnPurpose(i, purpose); + continue; + } + + bool primary = (i == primaryColumnNo); ColumnQualities qualities = m_columnQualities[i];
--- a/data/fileio/CSVFormat.h Thu Nov 27 17:51:21 2014 +0000 +++ b/data/fileio/CSVFormat.h Mon Dec 01 10:18:55 2014 +0000 @@ -53,10 +53,11 @@ }; enum ColumnQuality { - ColumnNumeric = 0x1, - ColumnIntegral = 0x2, - ColumnIncreasing = 0x4, - ColumnLarge = 0x8 + ColumnNumeric = 1, + ColumnIntegral = 2, + ColumnIncreasing = 4, + ColumnLarge = 8, + ColumnNearEmpty = 16, }; typedef unsigned int ColumnQualities;