annotate data/fileio/CSVFormat.cpp @ 510:af7b6e55895b

* Ensure text models are exported with text properties in RDF, and imported back into text models again (instead of time/value models)
author Chris Cannam
date Mon, 08 Dec 2008 11:53:10 +0000
parents d095214ffbaf
children 001db550bd48
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@392 16 #include "CSVFormat.h"
Chris@392 17
Chris@392 18 #include <QFile>
Chris@392 19 #include <QString>
Chris@392 20 #include <QRegExp>
Chris@392 21 #include <QStringList>
Chris@392 22 #include <QTextStream>
Chris@392 23
Chris@392 24 #include <iostream>
Chris@392 25
Chris@392 26 CSVFormat::CSVFormat(QString filename) :
Chris@392 27 m_modelType(TwoDimensionalModel),
Chris@392 28 m_timingType(ExplicitTiming),
Chris@392 29 m_timeUnits(TimeSeconds),
Chris@392 30 m_separator(","),
Chris@392 31 m_sampleRate(44100),
Chris@392 32 m_windowSize(1024),
Chris@392 33 m_behaviour(QString::KeepEmptyParts),
Chris@392 34 m_maxExampleCols(0)
Chris@392 35 {
Chris@392 36 QFile file(filename);
Chris@392 37 if (!file.exists()) return;
Chris@392 38 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
Chris@392 39
Chris@392 40 QTextStream in(&file);
Chris@392 41 in.seek(0);
Chris@392 42
Chris@392 43 unsigned int lineno = 0;
Chris@392 44
Chris@392 45 bool nonIncreasingPrimaries = false;
Chris@392 46 bool nonNumericPrimaries = false;
Chris@392 47 bool floatPrimaries = false;
Chris@392 48 bool variableItemCount = false;
Chris@392 49 int itemCount = 1;
Chris@392 50 int earliestNonNumericItem = -1;
Chris@392 51
Chris@392 52 float prevPrimary = 0.0;
Chris@392 53
Chris@392 54 m_maxExampleCols = 0;
Chris@392 55 m_separator = "";
Chris@392 56
Chris@392 57 while (!in.atEnd()) {
Chris@392 58
Chris@392 59 // See comment about line endings in CSVFileReader::load()
Chris@392 60
Chris@392 61 QString chunk = in.readLine();
Chris@392 62 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@392 63
Chris@392 64 for (size_t li = 0; li < lines.size(); ++li) {
Chris@392 65
Chris@392 66 QString line = lines[li];
Chris@392 67
Chris@392 68 if (line.startsWith("#")) continue;
Chris@392 69
Chris@392 70 m_behaviour = QString::KeepEmptyParts;
Chris@392 71
Chris@392 72 if (m_separator == "") {
Chris@392 73 //!!! to do: ask the user
Chris@392 74 if (line.split(",").size() >= 2) m_separator = ",";
Chris@392 75 else if (line.split("\t").size() >= 2) m_separator = "\t";
Chris@392 76 else if (line.split("|").size() >= 2) m_separator = "|";
Chris@392 77 else if (line.split("/").size() >= 2) m_separator = "/";
Chris@392 78 else if (line.split(":").size() >= 2) m_separator = ":";
Chris@392 79 else {
Chris@392 80 m_separator = " ";
Chris@392 81 m_behaviour = QString::SkipEmptyParts;
Chris@392 82 }
Chris@392 83 }
Chris@392 84
Chris@406 85 // std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl;
Chris@392 86
Chris@392 87 QStringList list = line.split(m_separator, m_behaviour);
Chris@392 88 QStringList tidyList;
Chris@392 89
Chris@392 90 for (int i = 0; i < list.size(); ++i) {
Chris@392 91
Chris@392 92 QString s(list[i]);
Chris@392 93 bool numeric = false;
Chris@392 94
Chris@392 95 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
Chris@392 96 s = s.mid(1, s.length() - 2);
Chris@392 97 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
Chris@392 98 s = s.mid(1, s.length() - 2);
Chris@392 99 } else {
Chris@392 100 float f = s.toFloat(&numeric);
Chris@406 101 // std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl;
Chris@392 102 }
Chris@392 103
Chris@392 104 tidyList.push_back(s);
Chris@392 105
Chris@392 106 if (lineno == 0 || (list.size() < itemCount)) {
Chris@392 107 itemCount = list.size();
Chris@392 108 } else {
Chris@392 109 if (itemCount != list.size()) {
Chris@392 110 variableItemCount = true;
Chris@392 111 }
Chris@392 112 }
Chris@392 113
Chris@392 114 if (i == 0) { // primary
Chris@392 115
Chris@392 116 if (numeric) {
Chris@392 117
Chris@392 118 float primary = s.toFloat();
Chris@392 119
Chris@392 120 if (lineno > 0 && primary <= prevPrimary) {
Chris@392 121 nonIncreasingPrimaries = true;
Chris@392 122 }
Chris@392 123
Chris@392 124 if (s.contains(".") || s.contains(",")) {
Chris@392 125 floatPrimaries = true;
Chris@392 126 }
Chris@392 127
Chris@392 128 prevPrimary = primary;
Chris@392 129
Chris@392 130 } else {
Chris@392 131 nonNumericPrimaries = true;
Chris@392 132 }
Chris@392 133 } else { // secondary
Chris@392 134
Chris@392 135 if (!numeric) {
Chris@392 136 if (earliestNonNumericItem < 0 ||
Chris@392 137 i < earliestNonNumericItem) {
Chris@392 138 earliestNonNumericItem = i;
Chris@392 139 }
Chris@392 140 }
Chris@392 141 }
Chris@392 142 }
Chris@392 143
Chris@392 144 if (lineno < 10) {
Chris@392 145 m_example.push_back(tidyList);
Chris@392 146 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
Chris@392 147 m_maxExampleCols = tidyList.size();
Chris@392 148 }
Chris@392 149 }
Chris@392 150
Chris@392 151 ++lineno;
Chris@392 152
Chris@392 153 if (lineno == 50) break;
Chris@392 154 }
Chris@392 155 }
Chris@392 156
Chris@392 157 if (nonNumericPrimaries || nonIncreasingPrimaries) {
Chris@392 158
Chris@392 159 // Primaries are probably not a series of times
Chris@392 160
Chris@392 161 m_timingType = CSVFormat::ImplicitTiming;
Chris@392 162 m_timeUnits = CSVFormat::TimeWindows;
Chris@392 163
Chris@392 164 if (nonNumericPrimaries) {
Chris@392 165 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 166 } else if (itemCount == 1 || variableItemCount ||
Chris@392 167 (earliestNonNumericItem != -1)) {
Chris@392 168 m_modelType = CSVFormat::TwoDimensionalModel;
Chris@392 169 } else {
Chris@392 170 m_modelType = CSVFormat::ThreeDimensionalModel;
Chris@392 171 }
Chris@392 172
Chris@392 173 } else {
Chris@392 174
Chris@392 175 // Increasing numeric primaries -- likely to be time
Chris@392 176
Chris@392 177 m_timingType = CSVFormat::ExplicitTiming;
Chris@392 178
Chris@392 179 if (floatPrimaries) {
Chris@392 180 m_timeUnits = CSVFormat::TimeSeconds;
Chris@392 181 } else {
Chris@392 182 m_timeUnits = CSVFormat::TimeAudioFrames;
Chris@392 183 }
Chris@392 184
Chris@392 185 if (itemCount == 1) {
Chris@392 186 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 187 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
Chris@392 188 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
Chris@392 189 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 190 } else {
Chris@392 191 m_modelType = CSVFormat::TwoDimensionalModel;
Chris@392 192 }
Chris@392 193 } else {
Chris@392 194 m_modelType = CSVFormat::ThreeDimensionalModel;
Chris@392 195 }
Chris@392 196 }
Chris@392 197
Chris@392 198 std::cerr << "Estimated model type: " << m_modelType << std::endl;
Chris@392 199 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
Chris@392 200 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
Chris@392 201 }
Chris@392 202