Chris@392: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@392: Chris@392: /* Chris@392: Sonic Visualiser Chris@392: An audio file viewer and annotation editor. Chris@392: Centre for Digital Music, Queen Mary, University of London. Chris@392: This file copyright 2006 Chris Cannam. Chris@392: Chris@392: This program is free software; you can redistribute it and/or Chris@392: modify it under the terms of the GNU General Public License as Chris@392: published by the Free Software Foundation; either version 2 of the Chris@392: License, or (at your option) any later version. See the file Chris@392: COPYING included with this distribution for more information. Chris@392: */ Chris@392: Chris@392: #include "CSVFormat.h" Chris@392: Chris@629: #include "base/StringBits.h" Chris@629: Chris@392: #include Chris@392: #include Chris@392: #include Chris@392: #include Chris@392: #include Chris@392: Chris@392: #include Chris@392: Chris@629: CSVFormat::CSVFormat(QString path) : Chris@629: m_separator(""), Chris@392: m_sampleRate(44100), Chris@392: m_windowSize(1024), Chris@629: m_allowQuoting(true) Chris@392: { Chris@629: guessFormatFor(path); Chris@629: } Chris@629: Chris@629: void Chris@629: CSVFormat::guessFormatFor(QString path) Chris@629: { Chris@629: m_modelType = TwoDimensionalModel; Chris@629: m_timingType = ExplicitTiming; Chris@629: m_durationType = Durations; Chris@629: m_timeUnits = TimeSeconds; Chris@629: m_behaviour = QString::KeepEmptyParts; Chris@629: Chris@629: m_maxExampleCols = 0; Chris@629: m_columnCount = 0; Chris@629: m_variableColumnCount = false; Chris@629: Chris@629: m_example.clear(); Chris@629: m_columnQualities.clear(); Chris@629: m_columnPurposes.clear(); Chris@629: m_prevValues.clear(); Chris@629: Chris@629: QFile file(path); Chris@392: if (!file.exists()) return; Chris@392: if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; Chris@392: Chris@392: QTextStream in(&file); Chris@392: in.seek(0); Chris@392: Chris@629: int lineno = 0; Chris@392: Chris@392: while (!in.atEnd()) { Chris@392: Chris@392: // See comment about line endings in CSVFileReader::load() Chris@392: Chris@392: QString chunk = in.readLine(); Chris@392: QStringList lines = chunk.split('\r', QString::SkipEmptyParts); Chris@392: Chris@392: for (size_t li = 0; li < lines.size(); ++li) { Chris@392: Chris@392: QString line = lines[li]; Chris@629: if (line.startsWith("#") || line == "") continue; Chris@392: Chris@629: guessQualities(line, lineno); Chris@392: Chris@629: if (++lineno == 50) break; Chris@629: } Chris@629: } Chris@392: Chris@629: guessPurposes(); Chris@629: } Chris@629: Chris@629: void Chris@629: CSVFormat::guessSeparator(QString line) Chris@629: { Chris@629: char candidates[] = { ',', '\t', ' ', '|', '/', ':' }; Chris@629: for (int i = 0; i < sizeof(candidates)/sizeof(candidates[0]); ++i) { Chris@629: if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) { Chris@629: m_separator = candidates[i]; Chris@629: return; Chris@629: } Chris@629: } Chris@629: m_separator = " "; Chris@629: } Chris@629: Chris@629: void Chris@629: CSVFormat::guessQualities(QString line, int lineno) Chris@629: { Chris@629: if (m_separator == "") guessSeparator(line); Chris@629: Chris@629: QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting); Chris@629: Chris@629: int cols = list.size(); Chris@629: if (lineno == 0 || (cols < m_columnCount)) m_columnCount = cols; Chris@629: if (cols != m_columnCount) m_variableColumnCount = true; Chris@629: Chris@629: // All columns are regarded as having these qualities until we see Chris@629: // something that indicates otherwise: Chris@629: Chris@629: ColumnQualities defaultQualities = Chris@629: ColumnNumeric | ColumnIntegral | ColumnIncreasing; Chris@629: Chris@629: for (int i = 0; i < cols; ++i) { Chris@629: Chris@629: while (m_columnQualities.size() <= i) { Chris@629: m_columnQualities.push_back(defaultQualities); Chris@629: m_prevValues.push_back(0.f); Chris@629: } Chris@629: Chris@629: QString s(list[i]); Chris@629: bool ok = false; Chris@629: Chris@629: ColumnQualities qualities = m_columnQualities[i]; Chris@629: Chris@629: bool numeric = (qualities & ColumnNumeric); Chris@629: bool integral = (qualities & ColumnIntegral); Chris@629: bool increasing = (qualities & ColumnIncreasing); Chris@629: bool large = (qualities & ColumnLarge); // this one defaults to off Chris@629: Chris@629: float value = 0.f; Chris@629: Chris@629: //!!! how to take into account headers? Chris@629: Chris@629: if (numeric) { Chris@629: value = s.toFloat(&ok); Chris@629: if (!ok) { Chris@629: value = (float)StringBits::stringToDoubleLocaleFree(s, &ok); Chris@629: } Chris@629: if (ok) { Chris@629: if (lineno < 2 && value > 1000.f) large = true; Chris@629: } else { Chris@629: numeric = false; Chris@629: } Chris@629: } Chris@629: Chris@629: if (numeric) { Chris@629: Chris@629: if (integral) { Chris@629: if (s.contains('.') || s.contains(',')) { Chris@629: integral = false; Chris@392: } Chris@392: } Chris@392: Chris@629: if (increasing) { Chris@629: if (lineno > 0 && value <= m_prevValues[i]) { Chris@629: increasing = false; Chris@392: } Chris@392: } Chris@392: Chris@629: m_prevValues[i] = value; Chris@629: } Chris@392: Chris@629: m_columnQualities[i] = Chris@629: (numeric ? ColumnNumeric : 0) | Chris@629: (integral ? ColumnIntegral : 0) | Chris@629: (increasing ? ColumnIncreasing : 0) | Chris@629: (large ? ColumnLarge : 0); Chris@629: } Chris@392: Chris@629: if (lineno < 10) { Chris@629: m_example.push_back(list); Chris@629: if (lineno == 0 || cols > m_maxExampleCols) { Chris@629: m_maxExampleCols = cols; Chris@392: } Chris@392: } Chris@392: Chris@629: std::cerr << "Estimated column qualities: "; Chris@629: for (int i = 0; i < m_columnCount; ++i) { Chris@629: std::cerr << int(m_columnQualities[i]) << " "; Chris@629: } Chris@629: std::cerr << std::endl; Chris@629: } Chris@629: Chris@629: void Chris@629: CSVFormat::guessPurposes() Chris@629: { Chris@629: while (m_columnPurposes.size() <= m_columnCount) { Chris@629: m_columnPurposes.push_back(ColumnUnknown); Chris@629: } Chris@629: Chris@629: m_timingType = CSVFormat::ImplicitTiming; Chris@629: m_timeUnits = CSVFormat::TimeWindows; Chris@392: Chris@629: int timingColumnCount = 0; Chris@629: Chris@629: for (int i = 0; i < m_columnCount; ++i) { Chris@629: Chris@629: ColumnPurpose purpose = ColumnUnknown; Chris@629: bool primary = (i == 0); Chris@392: Chris@629: ColumnQualities qualities = m_columnQualities[i]; Chris@392: Chris@629: bool numeric = (qualities & ColumnNumeric); Chris@629: bool integral = (qualities & ColumnIntegral); Chris@629: bool increasing = (qualities & ColumnIncreasing); Chris@629: bool large = (qualities & ColumnLarge); Chris@629: Chris@629: bool timingColumn = (numeric && increasing); Chris@629: Chris@629: if (timingColumn) { Chris@629: Chris@629: ++timingColumnCount; Chris@629: Chris@629: if (primary) { Chris@629: Chris@629: purpose = ColumnStartTime; Chris@629: Chris@629: m_timingType = ExplicitTiming; Chris@629: Chris@629: if (integral && large) { Chris@629: m_timeUnits = TimeAudioFrames; Chris@629: } else { Chris@629: m_timeUnits = TimeSeconds; Chris@629: } Chris@629: Chris@629: } else { Chris@629: Chris@629: if (timingColumnCount == 2 && m_timingType == ExplicitTiming) { Chris@629: purpose = ColumnEndTime; Chris@629: m_durationType = EndTimes; Chris@629: } Chris@629: } Chris@629: } Chris@629: Chris@629: if (purpose == ColumnUnknown) { Chris@629: if (numeric) { Chris@629: purpose = ColumnValue; Chris@629: } else { Chris@629: purpose = ColumnLabel; Chris@629: } Chris@629: } Chris@629: Chris@629: m_columnPurposes[i] = purpose; Chris@629: } Chris@629: Chris@629: int valueCount = 0; Chris@629: for (int i = 0; i < m_columnCount; ++i) { Chris@629: if (m_columnPurposes[i] == ColumnValue) ++valueCount; Chris@629: } Chris@629: Chris@629: if (valueCount == 0) { Chris@629: m_modelType = OneDimensionalModel; Chris@629: } else if (valueCount == 1) { Chris@629: m_modelType = TwoDimensionalModel; Chris@392: } else { Chris@629: m_modelType = ThreeDimensionalModel; Chris@629: } Chris@392: Chris@629: std::cerr << "Estimated column purposes: "; Chris@629: for (int i = 0; i < m_columnCount; ++i) { Chris@629: std::cerr << int(m_columnPurposes[i]) << " "; Chris@392: } Chris@629: std::cerr << std::endl; Chris@392: Chris@392: std::cerr << "Estimated model type: " << m_modelType << std::endl; Chris@392: std::cerr << "Estimated timing type: " << m_timingType << std::endl; Chris@628: std::cerr << "Estimated duration type: " << m_durationType << std::endl; Chris@392: std::cerr << "Estimated units: " << m_timeUnits << std::endl; Chris@392: } Chris@392: Chris@629: