Chris@392: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@392: Chris@392: /* Chris@392: Sonic Visualiser Chris@392: An audio file viewer and annotation editor. Chris@392: Centre for Digital Music, Queen Mary, University of London. Chris@392: This file copyright 2006 Chris Cannam. Chris@392: Chris@392: This program is free software; you can redistribute it and/or Chris@392: modify it under the terms of the GNU General Public License as Chris@392: published by the Free Software Foundation; either version 2 of the Chris@392: License, or (at your option) any later version. See the file Chris@392: COPYING included with this distribution for more information. Chris@392: */ Chris@392: Chris@392: #include "CSVFormat.h" Chris@392: Chris@392: #include Chris@392: #include Chris@392: #include Chris@392: #include Chris@392: #include Chris@392: Chris@392: #include Chris@392: Chris@392: CSVFormat::CSVFormat(QString filename) : Chris@392: m_modelType(TwoDimensionalModel), Chris@392: m_timingType(ExplicitTiming), Chris@392: m_timeUnits(TimeSeconds), Chris@392: m_separator(","), Chris@392: m_sampleRate(44100), Chris@392: m_windowSize(1024), Chris@392: m_behaviour(QString::KeepEmptyParts), Chris@392: m_maxExampleCols(0) Chris@392: { Chris@392: QFile file(filename); Chris@392: if (!file.exists()) return; Chris@392: if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; Chris@392: Chris@392: QTextStream in(&file); Chris@392: in.seek(0); Chris@392: Chris@392: unsigned int lineno = 0; Chris@392: Chris@392: bool nonIncreasingPrimaries = false; Chris@392: bool nonNumericPrimaries = false; Chris@392: bool floatPrimaries = false; Chris@392: bool variableItemCount = false; Chris@392: int itemCount = 1; Chris@392: int earliestNonNumericItem = -1; Chris@392: Chris@392: float prevPrimary = 0.0; Chris@392: Chris@392: m_maxExampleCols = 0; Chris@392: m_separator = ""; Chris@392: Chris@392: while (!in.atEnd()) { Chris@392: Chris@392: // See comment about line endings in CSVFileReader::load() Chris@392: Chris@392: QString chunk = in.readLine(); Chris@392: QStringList lines = chunk.split('\r', QString::SkipEmptyParts); Chris@392: Chris@392: for (size_t li = 0; li < lines.size(); ++li) { Chris@392: Chris@392: QString line = lines[li]; Chris@392: Chris@392: if (line.startsWith("#")) continue; Chris@392: Chris@392: m_behaviour = QString::KeepEmptyParts; Chris@392: Chris@392: if (m_separator == "") { Chris@392: //!!! to do: ask the user Chris@392: if (line.split(",").size() >= 2) m_separator = ","; Chris@392: else if (line.split("\t").size() >= 2) m_separator = "\t"; Chris@392: else if (line.split("|").size() >= 2) m_separator = "|"; Chris@392: else if (line.split("/").size() >= 2) m_separator = "/"; Chris@392: else if (line.split(":").size() >= 2) m_separator = ":"; Chris@392: else { Chris@392: m_separator = " "; Chris@392: m_behaviour = QString::SkipEmptyParts; Chris@392: } Chris@392: } Chris@392: Chris@392: std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl; Chris@392: Chris@392: QStringList list = line.split(m_separator, m_behaviour); Chris@392: QStringList tidyList; Chris@392: Chris@392: for (int i = 0; i < list.size(); ++i) { Chris@392: Chris@392: QString s(list[i]); Chris@392: bool numeric = false; Chris@392: Chris@392: if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { Chris@392: s = s.mid(1, s.length() - 2); Chris@392: } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { Chris@392: s = s.mid(1, s.length() - 2); Chris@392: } else { Chris@392: float f = s.toFloat(&numeric); Chris@392: std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl; Chris@392: } Chris@392: Chris@392: tidyList.push_back(s); Chris@392: Chris@392: if (lineno == 0 || (list.size() < itemCount)) { Chris@392: itemCount = list.size(); Chris@392: } else { Chris@392: if (itemCount != list.size()) { Chris@392: variableItemCount = true; Chris@392: } Chris@392: } Chris@392: Chris@392: if (i == 0) { // primary Chris@392: Chris@392: if (numeric) { Chris@392: Chris@392: float primary = s.toFloat(); Chris@392: Chris@392: if (lineno > 0 && primary <= prevPrimary) { Chris@392: nonIncreasingPrimaries = true; Chris@392: } Chris@392: Chris@392: if (s.contains(".") || s.contains(",")) { Chris@392: floatPrimaries = true; Chris@392: } Chris@392: Chris@392: prevPrimary = primary; Chris@392: Chris@392: } else { Chris@392: nonNumericPrimaries = true; Chris@392: } Chris@392: } else { // secondary Chris@392: Chris@392: if (!numeric) { Chris@392: if (earliestNonNumericItem < 0 || Chris@392: i < earliestNonNumericItem) { Chris@392: earliestNonNumericItem = i; Chris@392: } Chris@392: } Chris@392: } Chris@392: } Chris@392: Chris@392: if (lineno < 10) { Chris@392: m_example.push_back(tidyList); Chris@392: if (lineno == 0 || tidyList.size() > m_maxExampleCols) { Chris@392: m_maxExampleCols = tidyList.size(); Chris@392: } Chris@392: } Chris@392: Chris@392: ++lineno; Chris@392: Chris@392: if (lineno == 50) break; Chris@392: } Chris@392: } Chris@392: Chris@392: if (nonNumericPrimaries || nonIncreasingPrimaries) { Chris@392: Chris@392: // Primaries are probably not a series of times Chris@392: Chris@392: m_timingType = CSVFormat::ImplicitTiming; Chris@392: m_timeUnits = CSVFormat::TimeWindows; Chris@392: Chris@392: if (nonNumericPrimaries) { Chris@392: m_modelType = CSVFormat::OneDimensionalModel; Chris@392: } else if (itemCount == 1 || variableItemCount || Chris@392: (earliestNonNumericItem != -1)) { Chris@392: m_modelType = CSVFormat::TwoDimensionalModel; Chris@392: } else { Chris@392: m_modelType = CSVFormat::ThreeDimensionalModel; Chris@392: } Chris@392: Chris@392: } else { Chris@392: Chris@392: // Increasing numeric primaries -- likely to be time Chris@392: Chris@392: m_timingType = CSVFormat::ExplicitTiming; Chris@392: Chris@392: if (floatPrimaries) { Chris@392: m_timeUnits = CSVFormat::TimeSeconds; Chris@392: } else { Chris@392: m_timeUnits = CSVFormat::TimeAudioFrames; Chris@392: } Chris@392: Chris@392: if (itemCount == 1) { Chris@392: m_modelType = CSVFormat::OneDimensionalModel; Chris@392: } else if (variableItemCount || (earliestNonNumericItem != -1)) { Chris@392: if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) { Chris@392: m_modelType = CSVFormat::OneDimensionalModel; Chris@392: } else { Chris@392: m_modelType = CSVFormat::TwoDimensionalModel; Chris@392: } Chris@392: } else { Chris@392: m_modelType = CSVFormat::ThreeDimensionalModel; Chris@392: } Chris@392: } Chris@392: Chris@392: std::cerr << "Estimated model type: " << m_modelType << std::endl; Chris@392: std::cerr << "Estimated timing type: " << m_timingType << std::endl; Chris@392: std::cerr << "Estimated units: " << m_timeUnits << std::endl; Chris@392: } Chris@392: