Mercurial > hg > svcore
diff data/fileio/CSVFormat.cpp @ 1527:710e6250a401 zoom
Merge from default branch
author | Chris Cannam |
---|---|
date | Mon, 17 Sep 2018 13:51:14 +0100 |
parents | a92e94215863 |
children | 9570ef94eaa3 |
line wrap: on
line diff
--- a/data/fileio/CSVFormat.cpp Mon Dec 12 15:18:52 2016 +0000 +++ b/data/fileio/CSVFormat.cpp Mon Sep 17 13:51:14 2018 +0100 @@ -25,18 +25,22 @@ #include <iostream> +#include "base/Debug.h" + CSVFormat::CSVFormat(QString path) : m_separator(""), m_sampleRate(44100), m_windowSize(1024), m_allowQuoting(true) { - guessFormatFor(path); + (void)guessFormatFor(path); } -void +bool CSVFormat::guessFormatFor(QString path) { + m_separator = ""; // to prompt guessing for it + m_modelType = TwoDimensionalModel; m_timingType = ExplicitTiming; m_timeUnits = TimeSeconds; @@ -51,8 +55,17 @@ m_prevValues.clear(); QFile file(path); - if (!file.exists()) return; - if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; + if (!file.exists()) { + SVCERR << "CSVFormat::guessFormatFor(" << path + << "): File does not exist" << endl; + return false; + } + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { + SVCERR << "CSVFormat::guessFormatFor(" << path + << "): File could not be opened for reading" << endl; + return false; + } + SVDEBUG << "CSVFormat::guessFormatFor(" << path << ")" << endl; QTextStream in(&file); in.seek(0); @@ -69,38 +82,52 @@ for (int li = 0; li < lines.size(); ++li) { QString line = lines[li]; - if (line.startsWith("#") || line == "") continue; + if (line.startsWith("#") || line == "") { + continue; + } guessQualities(line, lineno); ++lineno; } - if (lineno >= 50) break; + if (lineno >= 150) break; } guessPurposes(); + guessAudioSampleRange(); + + return true; } void CSVFormat::guessSeparator(QString line) { - char candidates[] = { ',', '\t', ' ', '|', '/', ':' }; - for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) { - if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) { + QString candidates = "\t|,/: "; + + for (int i = 0; i < candidates.length(); ++i) { + auto bits = StringBits::split(line, candidates[i], m_allowQuoting); + if (bits.size() >= 2) { + SVDEBUG << "Successfully split the line into:" << endl; + for (auto b: bits) { + SVDEBUG << b << endl; + } m_separator = candidates[i]; + SVDEBUG << "Estimated column separator: '" << m_separator + << "'" << endl; return; } } - m_separator = " "; } void CSVFormat::guessQualities(QString line, int lineno) { - if (m_separator == "") guessSeparator(line); + if (m_separator == "") { + guessSeparator(line); + } - QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting); + QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting); int cols = list.size(); if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols; @@ -110,10 +137,11 @@ // something that indicates otherwise: ColumnQualities defaultQualities = - ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty; + ColumnNumeric | ColumnIntegral | ColumnSmall | + ColumnIncreasing | ColumnNearEmpty; for (int i = 0; i < cols; ++i) { - + while (m_columnQualities.size() <= i) { m_columnQualities.push_back(defaultQualities); m_prevValues.push_back(0.f); @@ -124,10 +152,15 @@ ColumnQualities qualities = m_columnQualities[i]; +// Looks like this is defined on Windows +#undef small + bool numeric = (qualities & ColumnNumeric); bool integral = (qualities & ColumnIntegral); bool increasing = (qualities & ColumnIncreasing); + bool small = (qualities & ColumnSmall); bool large = (qualities & ColumnLarge); // this one defaults to off + bool signd = (qualities & ColumnSigned); // also defaults to off bool emptyish = (qualities & ColumnNearEmpty); if (lineno > 1 && s.trimmed() != "") { @@ -144,9 +177,25 @@ value = (float)StringBits::stringToDoubleLocaleFree(s, &ok); } if (ok) { - if (lineno < 2 && value > 1000.f) large = true; + if (lineno < 2 && value > 1000.f) { + large = true; + } + if (value < 0.f) { + signd = true; + } + if (value < -1.f || value > 1.f) { + small = false; + } } else { numeric = false; + + // If the column is not numeric, it can't be any of + // these things either + integral = false; + increasing = false; + small = false; + large = false; + signd = false; } } @@ -166,12 +215,14 @@ m_prevValues[i] = value; } - + m_columnQualities[i] = (numeric ? ColumnNumeric : 0) | (integral ? ColumnIntegral : 0) | (increasing ? ColumnIncreasing : 0) | + (small ? ColumnSmall : 0) | (large ? ColumnLarge : 0) | + (signd ? ColumnSigned : 0) | (emptyish ? ColumnNearEmpty : 0); } @@ -182,11 +233,13 @@ } } -// cerr << "Estimated column qualities: "; -// for (int i = 0; i < m_columnCount; ++i) { -// cerr << int(m_columnQualities[i]) << " "; -// } -// cerr << endl; + if (lineno < 10) { + SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): "; + for (int i = 0; i < m_columnCount; ++i) { + SVDEBUG << int(m_columnQualities[i]) << " "; + } + SVDEBUG << endl; + } } void @@ -194,8 +247,15 @@ { m_timingType = CSVFormat::ImplicitTiming; m_timeUnits = CSVFormat::TimeWindows; - + int timingColumnCount = 0; + bool haveDurationOrEndTime = false; + + SVDEBUG << "Estimated column qualities overall: "; + for (int i = 0; i < m_columnCount; ++i) { + SVDEBUG << int(m_columnQualities[i]) << " "; + } + SVDEBUG << endl; // if our first column has zero or one entries in it and the rest // have more, then we'll default to ignoring the first column and @@ -251,6 +311,7 @@ if (timingColumnCount == 2 && m_timingType == ExplicitTiming) { purpose = ColumnEndTime; + haveDurationOrEndTime = true; } } } @@ -294,15 +355,17 @@ if (m_columnQualities[timecol] & ColumnIncreasing) { // This shouldn't happen; should have been settled above m_columnPurposes[timecol] = ColumnEndTime; + haveDurationOrEndTime = true; } else { m_columnPurposes[timecol] = ColumnDuration; + haveDurationOrEndTime = true; } --valueCount; } } } - if (timingColumnCount > 1) { + if (timingColumnCount > 1 || haveDurationOrEndTime) { m_modelType = TwoDimensionalModelWithDuration; } else { if (valueCount == 0) { @@ -314,15 +377,83 @@ } } -// cerr << "Estimated column purposes: "; -// for (int i = 0; i < m_columnCount; ++i) { -// cerr << int(m_columnPurposes[i]) << " "; -// } -// cerr << endl; + SVDEBUG << "Estimated column purposes: "; + for (int i = 0; i < m_columnCount; ++i) { + SVDEBUG << int(m_columnPurposes[i]) << " "; + } + SVDEBUG << endl; -// cerr << "Estimated model type: " << m_modelType << endl; -// cerr << "Estimated timing type: " << m_timingType << endl; -// cerr << "Estimated units: " << m_timeUnits << endl; + SVDEBUG << "Estimated model type: " << m_modelType << endl; + SVDEBUG << "Estimated timing type: " << m_timingType << endl; + SVDEBUG << "Estimated units: " << m_timeUnits << endl; +} + +void +CSVFormat::guessAudioSampleRange() +{ + AudioSampleRange range = SampleRangeSigned1; + + range = SampleRangeSigned1; + bool knownSigned = false; + bool knownNonIntegral = false; + + SVDEBUG << "CSVFormat::guessAudioSampleRange: starting with assumption of " + << range << endl; + + for (int i = 0; i < m_columnCount; ++i) { + if (m_columnPurposes[i] != ColumnValue) { + SVDEBUG << "... column " << i + << " is not apparently a value, ignoring" << endl; + continue; + } + if (!(m_columnQualities[i] & ColumnIntegral)) { + knownNonIntegral = true; + if (range == SampleRangeUnsigned255 || + range == SampleRangeSigned32767) { + range = SampleRangeOther; + } + SVDEBUG << "... column " << i + << " is non-integral, updating range to " << range << endl; + } + if (m_columnQualities[i] & ColumnLarge) { + if (range == SampleRangeSigned1 || + range == SampleRangeUnsigned255) { + if (knownNonIntegral) { + range = SampleRangeOther; + } else { + range = SampleRangeSigned32767; + } + } + SVDEBUG << "... column " << i << " is large, updating range to " + << range << endl; + } + if (m_columnQualities[i] & ColumnSigned) { + knownSigned = true; + if (range == SampleRangeUnsigned255) { + range = SampleRangeSigned32767; + } + SVDEBUG << "... column " << i << " is signed, updating range to " + << range << endl; + } + if (!(m_columnQualities[i] & ColumnSmall)) { + if (range == SampleRangeSigned1) { + if (knownNonIntegral) { + range = SampleRangeOther; + } else if (knownSigned) { + range = SampleRangeSigned32767; + } else { + range = SampleRangeUnsigned255; + } + } + SVDEBUG << "... column " << i << " is not small, updating range to " + << range << endl; + } + } + + SVDEBUG << "CSVFormat::guessAudioSampleRange: ended up with range " + << range << endl; + + m_audioSampleRange = range; } CSVFormat::ColumnPurpose