annotate data/fileio/CSVFormat.cpp @ 558:1d7ebc05157e

* Some fairly simplistic code to set up layer type properties based on RDF data about feature types (both when running transforms and when importing features from RDF files).
author Chris Cannam
date Thu, 12 Feb 2009 15:26:43 +0000
parents d095214ffbaf
children 001db550bd48
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@392 16 #include "CSVFormat.h"
Chris@392 17
Chris@392 18 #include <QFile>
Chris@392 19 #include <QString>
Chris@392 20 #include <QRegExp>
Chris@392 21 #include <QStringList>
Chris@392 22 #include <QTextStream>
Chris@392 23
Chris@392 24 #include <iostream>
Chris@392 25
Chris@392 26 CSVFormat::CSVFormat(QString filename) :
Chris@392 27 m_modelType(TwoDimensionalModel),
Chris@392 28 m_timingType(ExplicitTiming),
Chris@392 29 m_timeUnits(TimeSeconds),
Chris@392 30 m_separator(","),
Chris@392 31 m_sampleRate(44100),
Chris@392 32 m_windowSize(1024),
Chris@392 33 m_behaviour(QString::KeepEmptyParts),
Chris@392 34 m_maxExampleCols(0)
Chris@392 35 {
Chris@392 36 QFile file(filename);
Chris@392 37 if (!file.exists()) return;
Chris@392 38 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
Chris@392 39
Chris@392 40 QTextStream in(&file);
Chris@392 41 in.seek(0);
Chris@392 42
Chris@392 43 unsigned int lineno = 0;
Chris@392 44
Chris@392 45 bool nonIncreasingPrimaries = false;
Chris@392 46 bool nonNumericPrimaries = false;
Chris@392 47 bool floatPrimaries = false;
Chris@392 48 bool variableItemCount = false;
Chris@392 49 int itemCount = 1;
Chris@392 50 int earliestNonNumericItem = -1;
Chris@392 51
Chris@392 52 float prevPrimary = 0.0;
Chris@392 53
Chris@392 54 m_maxExampleCols = 0;
Chris@392 55 m_separator = "";
Chris@392 56
Chris@392 57 while (!in.atEnd()) {
Chris@392 58
Chris@392 59 // See comment about line endings in CSVFileReader::load()
Chris@392 60
Chris@392 61 QString chunk = in.readLine();
Chris@392 62 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@392 63
Chris@392 64 for (size_t li = 0; li < lines.size(); ++li) {
Chris@392 65
Chris@392 66 QString line = lines[li];
Chris@392 67
Chris@392 68 if (line.startsWith("#")) continue;
Chris@392 69
Chris@392 70 m_behaviour = QString::KeepEmptyParts;
Chris@392 71
Chris@392 72 if (m_separator == "") {
Chris@392 73 //!!! to do: ask the user
Chris@392 74 if (line.split(",").size() >= 2) m_separator = ",";
Chris@392 75 else if (line.split("\t").size() >= 2) m_separator = "\t";
Chris@392 76 else if (line.split("|").size() >= 2) m_separator = "|";
Chris@392 77 else if (line.split("/").size() >= 2) m_separator = "/";
Chris@392 78 else if (line.split(":").size() >= 2) m_separator = ":";
Chris@392 79 else {
Chris@392 80 m_separator = " ";
Chris@392 81 m_behaviour = QString::SkipEmptyParts;
Chris@392 82 }
Chris@392 83 }
Chris@392 84
Chris@406 85 // std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl;
Chris@392 86
Chris@392 87 QStringList list = line.split(m_separator, m_behaviour);
Chris@392 88 QStringList tidyList;
Chris@392 89
Chris@392 90 for (int i = 0; i < list.size(); ++i) {
Chris@392 91
Chris@392 92 QString s(list[i]);
Chris@392 93 bool numeric = false;
Chris@392 94
Chris@392 95 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
Chris@392 96 s = s.mid(1, s.length() - 2);
Chris@392 97 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
Chris@392 98 s = s.mid(1, s.length() - 2);
Chris@392 99 } else {
Chris@392 100 float f = s.toFloat(&numeric);
Chris@406 101 // std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl;
Chris@392 102 }
Chris@392 103
Chris@392 104 tidyList.push_back(s);
Chris@392 105
Chris@392 106 if (lineno == 0 || (list.size() < itemCount)) {
Chris@392 107 itemCount = list.size();
Chris@392 108 } else {
Chris@392 109 if (itemCount != list.size()) {
Chris@392 110 variableItemCount = true;
Chris@392 111 }
Chris@392 112 }
Chris@392 113
Chris@392 114 if (i == 0) { // primary
Chris@392 115
Chris@392 116 if (numeric) {
Chris@392 117
Chris@392 118 float primary = s.toFloat();
Chris@392 119
Chris@392 120 if (lineno > 0 && primary <= prevPrimary) {
Chris@392 121 nonIncreasingPrimaries = true;
Chris@392 122 }
Chris@392 123
Chris@392 124 if (s.contains(".") || s.contains(",")) {
Chris@392 125 floatPrimaries = true;
Chris@392 126 }
Chris@392 127
Chris@392 128 prevPrimary = primary;
Chris@392 129
Chris@392 130 } else {
Chris@392 131 nonNumericPrimaries = true;
Chris@392 132 }
Chris@392 133 } else { // secondary
Chris@392 134
Chris@392 135 if (!numeric) {
Chris@392 136 if (earliestNonNumericItem < 0 ||
Chris@392 137 i < earliestNonNumericItem) {
Chris@392 138 earliestNonNumericItem = i;
Chris@392 139 }
Chris@392 140 }
Chris@392 141 }
Chris@392 142 }
Chris@392 143
Chris@392 144 if (lineno < 10) {
Chris@392 145 m_example.push_back(tidyList);
Chris@392 146 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
Chris@392 147 m_maxExampleCols = tidyList.size();
Chris@392 148 }
Chris@392 149 }
Chris@392 150
Chris@392 151 ++lineno;
Chris@392 152
Chris@392 153 if (lineno == 50) break;
Chris@392 154 }
Chris@392 155 }
Chris@392 156
Chris@392 157 if (nonNumericPrimaries || nonIncreasingPrimaries) {
Chris@392 158
Chris@392 159 // Primaries are probably not a series of times
Chris@392 160
Chris@392 161 m_timingType = CSVFormat::ImplicitTiming;
Chris@392 162 m_timeUnits = CSVFormat::TimeWindows;
Chris@392 163
Chris@392 164 if (nonNumericPrimaries) {
Chris@392 165 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 166 } else if (itemCount == 1 || variableItemCount ||
Chris@392 167 (earliestNonNumericItem != -1)) {
Chris@392 168 m_modelType = CSVFormat::TwoDimensionalModel;
Chris@392 169 } else {
Chris@392 170 m_modelType = CSVFormat::ThreeDimensionalModel;
Chris@392 171 }
Chris@392 172
Chris@392 173 } else {
Chris@392 174
Chris@392 175 // Increasing numeric primaries -- likely to be time
Chris@392 176
Chris@392 177 m_timingType = CSVFormat::ExplicitTiming;
Chris@392 178
Chris@392 179 if (floatPrimaries) {
Chris@392 180 m_timeUnits = CSVFormat::TimeSeconds;
Chris@392 181 } else {
Chris@392 182 m_timeUnits = CSVFormat::TimeAudioFrames;
Chris@392 183 }
Chris@392 184
Chris@392 185 if (itemCount == 1) {
Chris@392 186 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 187 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
Chris@392 188 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
Chris@392 189 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 190 } else {
Chris@392 191 m_modelType = CSVFormat::TwoDimensionalModel;
Chris@392 192 }
Chris@392 193 } else {
Chris@392 194 m_modelType = CSVFormat::ThreeDimensionalModel;
Chris@392 195 }
Chris@392 196 }
Chris@392 197
Chris@392 198 std::cerr << "Estimated model type: " << m_modelType << std::endl;
Chris@392 199 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
Chris@392 200 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
Chris@392 201 }
Chris@392 202