diff data/fileio/CSVFormat.cpp @ 1527:710e6250a401 zoom

Merge from default branch
author Chris Cannam
date Mon, 17 Sep 2018 13:51:14 +0100
parents a92e94215863
children 9570ef94eaa3
line wrap: on
line diff
--- a/data/fileio/CSVFormat.cpp	Mon Dec 12 15:18:52 2016 +0000
+++ b/data/fileio/CSVFormat.cpp	Mon Sep 17 13:51:14 2018 +0100
@@ -25,18 +25,22 @@
 
 #include <iostream>
 
+#include "base/Debug.h"
+
 CSVFormat::CSVFormat(QString path) :
     m_separator(""),
     m_sampleRate(44100),
     m_windowSize(1024),
     m_allowQuoting(true)
 {
-    guessFormatFor(path);
+    (void)guessFormatFor(path);
 }
 
-void
+bool
 CSVFormat::guessFormatFor(QString path)
 {
+    m_separator = ""; // to prompt guessing for it
+
     m_modelType = TwoDimensionalModel;
     m_timingType = ExplicitTiming;
     m_timeUnits = TimeSeconds;
@@ -51,8 +55,17 @@
     m_prevValues.clear();
 
     QFile file(path);
-    if (!file.exists()) return;
-    if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
+    if (!file.exists()) {
+        SVCERR << "CSVFormat::guessFormatFor(" << path
+               << "): File does not exist" << endl;
+        return false;
+    }
+    if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
+        SVCERR << "CSVFormat::guessFormatFor(" << path
+               << "): File could not be opened for reading" << endl;
+        return false;
+    }
+    SVDEBUG << "CSVFormat::guessFormatFor(" << path << ")" << endl;
 
     QTextStream in(&file);
     in.seek(0);
@@ -69,38 +82,52 @@
         for (int li = 0; li < lines.size(); ++li) {
 
             QString line = lines[li];
-            if (line.startsWith("#") || line == "") continue;
+            if (line.startsWith("#") || line == "") {
+                continue;
+            }
 
             guessQualities(line, lineno);
 
             ++lineno;
         }
 
-        if (lineno >= 50) break;
+        if (lineno >= 150) break;
     }
 
     guessPurposes();
+    guessAudioSampleRange();
+
+    return true;
 }
 
 void
 CSVFormat::guessSeparator(QString line)
 {
-    char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
-    for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) {
-        if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
+    QString candidates = "\t|,/: ";
+
+    for (int i = 0; i < candidates.length(); ++i) {
+        auto bits = StringBits::split(line, candidates[i], m_allowQuoting);
+        if (bits.size() >= 2) {
+            SVDEBUG << "Successfully split the line into:" << endl;
+            for (auto b: bits) {
+                SVDEBUG << b << endl;
+            }
             m_separator = candidates[i];
+            SVDEBUG << "Estimated column separator: '" << m_separator
+                    << "'" << endl;
             return;
         }
     }
-    m_separator = " ";
 }
 
 void
 CSVFormat::guessQualities(QString line, int lineno)
 {
-    if (m_separator == "") guessSeparator(line);
+    if (m_separator == "") {
+        guessSeparator(line);
+    }
 
-    QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting);
+    QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
 
     int cols = list.size();
     if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
@@ -110,10 +137,11 @@
     // something that indicates otherwise:
 
     ColumnQualities defaultQualities =
-        ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty;
+        ColumnNumeric | ColumnIntegral | ColumnSmall |
+        ColumnIncreasing | ColumnNearEmpty;
     
     for (int i = 0; i < cols; ++i) {
-	    
+            
         while (m_columnQualities.size() <= i) {
             m_columnQualities.push_back(defaultQualities);
             m_prevValues.push_back(0.f);
@@ -124,10 +152,15 @@
 
         ColumnQualities qualities = m_columnQualities[i];
 
+// Looks like this is defined on Windows
+#undef small
+        
         bool numeric    = (qualities & ColumnNumeric);
         bool integral   = (qualities & ColumnIntegral);
         bool increasing = (qualities & ColumnIncreasing);
+        bool small      = (qualities & ColumnSmall);
         bool large      = (qualities & ColumnLarge); // this one defaults to off
+        bool signd      = (qualities & ColumnSigned); // also defaults to off
         bool emptyish   = (qualities & ColumnNearEmpty);
 
         if (lineno > 1 && s.trimmed() != "") {
@@ -144,9 +177,25 @@
                 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
             }
             if (ok) {
-                if (lineno < 2 && value > 1000.f) large = true;
+                if (lineno < 2 && value > 1000.f) {
+                    large = true;
+                }
+                if (value < 0.f) {
+                    signd = true;
+                }
+                if (value < -1.f || value > 1.f) {
+                    small = false;
+                }
             } else {
                 numeric = false;
+
+                // If the column is not numeric, it can't be any of
+                // these things either
+                integral = false;
+                increasing = false;
+                small = false;
+                large = false;
+                signd = false;
             }
         }
 
@@ -166,12 +215,14 @@
 
             m_prevValues[i] = value;
         }
-
+        
         m_columnQualities[i] =
             (numeric    ? ColumnNumeric : 0) |
             (integral   ? ColumnIntegral : 0) |
             (increasing ? ColumnIncreasing : 0) |
+            (small      ? ColumnSmall : 0) |
             (large      ? ColumnLarge : 0) |
+            (signd      ? ColumnSigned : 0) |
             (emptyish   ? ColumnNearEmpty : 0);
     }
 
@@ -182,11 +233,13 @@
         }
     }
 
-//    cerr << "Estimated column qualities: ";
-//    for (int i = 0; i < m_columnCount; ++i) {
-//        cerr << int(m_columnQualities[i]) << " ";
-//    }
-//    cerr << endl;
+    if (lineno < 10) {
+        SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): ";
+        for (int i = 0; i < m_columnCount; ++i) {
+            SVDEBUG << int(m_columnQualities[i]) << " ";
+        }
+        SVDEBUG << endl;
+    }
 }
 
 void
@@ -194,8 +247,15 @@
 {
     m_timingType = CSVFormat::ImplicitTiming;
     m_timeUnits = CSVFormat::TimeWindows;
-	
+        
     int timingColumnCount = 0;
+    bool haveDurationOrEndTime = false;
+
+    SVDEBUG << "Estimated column qualities overall: ";
+    for (int i = 0; i < m_columnCount; ++i) {
+        SVDEBUG << int(m_columnQualities[i]) << " ";
+    }
+    SVDEBUG << endl;
 
     // if our first column has zero or one entries in it and the rest
     // have more, then we'll default to ignoring the first column and
@@ -251,6 +311,7 @@
 
                 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
                     purpose = ColumnEndTime;
+                    haveDurationOrEndTime = true;
                 }
             }
         }
@@ -294,15 +355,17 @@
                 if (m_columnQualities[timecol] & ColumnIncreasing) {
                     // This shouldn't happen; should have been settled above
                     m_columnPurposes[timecol] = ColumnEndTime;
+                    haveDurationOrEndTime = true;
                 } else {
                     m_columnPurposes[timecol] = ColumnDuration;
+                    haveDurationOrEndTime = true;
                 }
                 --valueCount;
             }
         }
     }
 
-    if (timingColumnCount > 1) {
+    if (timingColumnCount > 1 || haveDurationOrEndTime) {
         m_modelType = TwoDimensionalModelWithDuration;
     } else {
         if (valueCount == 0) {
@@ -314,15 +377,83 @@
         }
     }
 
-//    cerr << "Estimated column purposes: ";
-//    for (int i = 0; i < m_columnCount; ++i) {
-//        cerr << int(m_columnPurposes[i]) << " ";
-//    }
-//    cerr << endl;
+    SVDEBUG << "Estimated column purposes: ";
+    for (int i = 0; i < m_columnCount; ++i) {
+        SVDEBUG << int(m_columnPurposes[i]) << " ";
+    }
+    SVDEBUG << endl;
 
-//    cerr << "Estimated model type: " << m_modelType << endl;
-//    cerr << "Estimated timing type: " << m_timingType << endl;
-//    cerr << "Estimated units: " << m_timeUnits << endl;
+    SVDEBUG << "Estimated model type: " << m_modelType << endl;
+    SVDEBUG << "Estimated timing type: " << m_timingType << endl;
+    SVDEBUG << "Estimated units: " << m_timeUnits << endl;
+}
+
+void
+CSVFormat::guessAudioSampleRange()
+{
+    AudioSampleRange range = SampleRangeSigned1;
+    
+    range = SampleRangeSigned1;
+    bool knownSigned = false;
+    bool knownNonIntegral = false;
+
+    SVDEBUG << "CSVFormat::guessAudioSampleRange: starting with assumption of "
+            << range << endl;
+    
+    for (int i = 0; i < m_columnCount; ++i) {
+        if (m_columnPurposes[i] != ColumnValue) {
+            SVDEBUG << "... column " << i
+                    << " is not apparently a value, ignoring" << endl;
+            continue;
+        }
+        if (!(m_columnQualities[i] & ColumnIntegral)) {
+            knownNonIntegral = true;
+            if (range == SampleRangeUnsigned255 ||
+                range == SampleRangeSigned32767) {
+                range = SampleRangeOther;
+            }
+            SVDEBUG << "... column " << i
+                    << " is non-integral, updating range to " << range << endl;
+        }
+        if (m_columnQualities[i] & ColumnLarge) {
+            if (range == SampleRangeSigned1 ||
+                range == SampleRangeUnsigned255) {
+                if (knownNonIntegral) {
+                    range = SampleRangeOther;
+                } else {
+                    range = SampleRangeSigned32767;
+                }
+            }
+            SVDEBUG << "... column " << i << " is large, updating range to "
+                    << range << endl;
+        }
+        if (m_columnQualities[i] & ColumnSigned) {
+            knownSigned = true;
+            if (range == SampleRangeUnsigned255) {
+                range = SampleRangeSigned32767;
+            }
+            SVDEBUG << "... column " << i << " is signed, updating range to "
+                    << range << endl;
+        }
+        if (!(m_columnQualities[i] & ColumnSmall)) {
+            if (range == SampleRangeSigned1) {
+                if (knownNonIntegral) {
+                    range = SampleRangeOther;
+                } else if (knownSigned) {
+                    range = SampleRangeSigned32767;
+                } else {
+                    range = SampleRangeUnsigned255;
+                }
+            }
+            SVDEBUG << "... column " << i << " is not small, updating range to "
+                    << range << endl;
+        }
+    }
+
+    SVDEBUG << "CSVFormat::guessAudioSampleRange: ended up with range "
+            << range << endl;
+    
+    m_audioSampleRange = range;
 }
 
 CSVFormat::ColumnPurpose