Mercurial > hg > svcore
diff data/fileio/CSVFormat.cpp @ 392:183ee2a55fc7
* More work to abstract out interactive components used in the data library,
so that it does not need to depend on QtGui.
author | Chris Cannam |
---|---|
date | Fri, 14 Mar 2008 17:14:21 +0000 |
parents | |
children | d095214ffbaf |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/fileio/CSVFormat.cpp Fri Mar 14 17:14:21 2008 +0000 @@ -0,0 +1,202 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Visualiser + An audio file viewer and annotation editor. + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2006 Chris Cannam. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "CSVFormat.h" + +#include <QFile> +#include <QString> +#include <QRegExp> +#include <QStringList> +#include <QTextStream> + +#include <iostream> + +CSVFormat::CSVFormat(QString filename) : + m_modelType(TwoDimensionalModel), + m_timingType(ExplicitTiming), + m_timeUnits(TimeSeconds), + m_separator(","), + m_sampleRate(44100), + m_windowSize(1024), + m_behaviour(QString::KeepEmptyParts), + m_maxExampleCols(0) +{ + QFile file(filename); + if (!file.exists()) return; + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; + + QTextStream in(&file); + in.seek(0); + + unsigned int lineno = 0; + + bool nonIncreasingPrimaries = false; + bool nonNumericPrimaries = false; + bool floatPrimaries = false; + bool variableItemCount = false; + int itemCount = 1; + int earliestNonNumericItem = -1; + + float prevPrimary = 0.0; + + m_maxExampleCols = 0; + m_separator = ""; + + while (!in.atEnd()) { + + // See comment about line endings in CSVFileReader::load() + + QString chunk = in.readLine(); + QStringList lines = chunk.split('\r', QString::SkipEmptyParts); + + for (size_t li = 0; li < lines.size(); ++li) { + + QString line = lines[li]; + + if (line.startsWith("#")) continue; + + m_behaviour = QString::KeepEmptyParts; + + if (m_separator == "") { + //!!! to do: ask the user + if (line.split(",").size() >= 2) m_separator = ","; + else if (line.split("\t").size() >= 2) m_separator = "\t"; + else if (line.split("|").size() >= 2) m_separator = "|"; + else if (line.split("/").size() >= 2) m_separator = "/"; + else if (line.split(":").size() >= 2) m_separator = ":"; + else { + m_separator = " "; + m_behaviour = QString::SkipEmptyParts; + } + } + + std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl; + + QStringList list = line.split(m_separator, m_behaviour); + QStringList tidyList; + + for (int i = 0; i < list.size(); ++i) { + + QString s(list[i]); + bool numeric = false; + + if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { + s = s.mid(1, s.length() - 2); + } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { + s = s.mid(1, s.length() - 2); + } else { + float f = s.toFloat(&numeric); + std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl; + } + + tidyList.push_back(s); + + if (lineno == 0 || (list.size() < itemCount)) { + itemCount = list.size(); + } else { + if (itemCount != list.size()) { + variableItemCount = true; + } + } + + if (i == 0) { // primary + + if (numeric) { + + float primary = s.toFloat(); + + if (lineno > 0 && primary <= prevPrimary) { + nonIncreasingPrimaries = true; + } + + if (s.contains(".") || s.contains(",")) { + floatPrimaries = true; + } + + prevPrimary = primary; + + } else { + nonNumericPrimaries = true; + } + } else { // secondary + + if (!numeric) { + if (earliestNonNumericItem < 0 || + i < earliestNonNumericItem) { + earliestNonNumericItem = i; + } + } + } + } + + if (lineno < 10) { + m_example.push_back(tidyList); + if (lineno == 0 || tidyList.size() > m_maxExampleCols) { + m_maxExampleCols = tidyList.size(); + } + } + + ++lineno; + + if (lineno == 50) break; + } + } + + if (nonNumericPrimaries || nonIncreasingPrimaries) { + + // Primaries are probably not a series of times + + m_timingType = CSVFormat::ImplicitTiming; + m_timeUnits = CSVFormat::TimeWindows; + + if (nonNumericPrimaries) { + m_modelType = CSVFormat::OneDimensionalModel; + } else if (itemCount == 1 || variableItemCount || + (earliestNonNumericItem != -1)) { + m_modelType = CSVFormat::TwoDimensionalModel; + } else { + m_modelType = CSVFormat::ThreeDimensionalModel; + } + + } else { + + // Increasing numeric primaries -- likely to be time + + m_timingType = CSVFormat::ExplicitTiming; + + if (floatPrimaries) { + m_timeUnits = CSVFormat::TimeSeconds; + } else { + m_timeUnits = CSVFormat::TimeAudioFrames; + } + + if (itemCount == 1) { + m_modelType = CSVFormat::OneDimensionalModel; + } else if (variableItemCount || (earliestNonNumericItem != -1)) { + if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) { + m_modelType = CSVFormat::OneDimensionalModel; + } else { + m_modelType = CSVFormat::TwoDimensionalModel; + } + } else { + m_modelType = CSVFormat::ThreeDimensionalModel; + } + } + + std::cerr << "Estimated model type: " << m_modelType << std::endl; + std::cerr << "Estimated timing type: " << m_timingType << std::endl; + std::cerr << "Estimated units: " << m_timeUnits << std::endl; +} +