comparison data/fileio/CSVFormat.cpp @ 392:183ee2a55fc7

* More work to abstract out interactive components used in the data library, so that it does not need to depend on QtGui.
author Chris Cannam
date Fri, 14 Mar 2008 17:14:21 +0000
parents
children d095214ffbaf
comparison
equal deleted inserted replaced
391:5858cc462d0a 392:183ee2a55fc7
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2006 Chris Cannam.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "CSVFormat.h"
17
18 #include <QFile>
19 #include <QString>
20 #include <QRegExp>
21 #include <QStringList>
22 #include <QTextStream>
23
24 #include <iostream>
25
26 CSVFormat::CSVFormat(QString filename) :
27 m_modelType(TwoDimensionalModel),
28 m_timingType(ExplicitTiming),
29 m_timeUnits(TimeSeconds),
30 m_separator(","),
31 m_sampleRate(44100),
32 m_windowSize(1024),
33 m_behaviour(QString::KeepEmptyParts),
34 m_maxExampleCols(0)
35 {
36 QFile file(filename);
37 if (!file.exists()) return;
38 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
39
40 QTextStream in(&file);
41 in.seek(0);
42
43 unsigned int lineno = 0;
44
45 bool nonIncreasingPrimaries = false;
46 bool nonNumericPrimaries = false;
47 bool floatPrimaries = false;
48 bool variableItemCount = false;
49 int itemCount = 1;
50 int earliestNonNumericItem = -1;
51
52 float prevPrimary = 0.0;
53
54 m_maxExampleCols = 0;
55 m_separator = "";
56
57 while (!in.atEnd()) {
58
59 // See comment about line endings in CSVFileReader::load()
60
61 QString chunk = in.readLine();
62 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
63
64 for (size_t li = 0; li < lines.size(); ++li) {
65
66 QString line = lines[li];
67
68 if (line.startsWith("#")) continue;
69
70 m_behaviour = QString::KeepEmptyParts;
71
72 if (m_separator == "") {
73 //!!! to do: ask the user
74 if (line.split(",").size() >= 2) m_separator = ",";
75 else if (line.split("\t").size() >= 2) m_separator = "\t";
76 else if (line.split("|").size() >= 2) m_separator = "|";
77 else if (line.split("/").size() >= 2) m_separator = "/";
78 else if (line.split(":").size() >= 2) m_separator = ":";
79 else {
80 m_separator = " ";
81 m_behaviour = QString::SkipEmptyParts;
82 }
83 }
84
85 std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl;
86
87 QStringList list = line.split(m_separator, m_behaviour);
88 QStringList tidyList;
89
90 for (int i = 0; i < list.size(); ++i) {
91
92 QString s(list[i]);
93 bool numeric = false;
94
95 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
96 s = s.mid(1, s.length() - 2);
97 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
98 s = s.mid(1, s.length() - 2);
99 } else {
100 float f = s.toFloat(&numeric);
101 std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl;
102 }
103
104 tidyList.push_back(s);
105
106 if (lineno == 0 || (list.size() < itemCount)) {
107 itemCount = list.size();
108 } else {
109 if (itemCount != list.size()) {
110 variableItemCount = true;
111 }
112 }
113
114 if (i == 0) { // primary
115
116 if (numeric) {
117
118 float primary = s.toFloat();
119
120 if (lineno > 0 && primary <= prevPrimary) {
121 nonIncreasingPrimaries = true;
122 }
123
124 if (s.contains(".") || s.contains(",")) {
125 floatPrimaries = true;
126 }
127
128 prevPrimary = primary;
129
130 } else {
131 nonNumericPrimaries = true;
132 }
133 } else { // secondary
134
135 if (!numeric) {
136 if (earliestNonNumericItem < 0 ||
137 i < earliestNonNumericItem) {
138 earliestNonNumericItem = i;
139 }
140 }
141 }
142 }
143
144 if (lineno < 10) {
145 m_example.push_back(tidyList);
146 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
147 m_maxExampleCols = tidyList.size();
148 }
149 }
150
151 ++lineno;
152
153 if (lineno == 50) break;
154 }
155 }
156
157 if (nonNumericPrimaries || nonIncreasingPrimaries) {
158
159 // Primaries are probably not a series of times
160
161 m_timingType = CSVFormat::ImplicitTiming;
162 m_timeUnits = CSVFormat::TimeWindows;
163
164 if (nonNumericPrimaries) {
165 m_modelType = CSVFormat::OneDimensionalModel;
166 } else if (itemCount == 1 || variableItemCount ||
167 (earliestNonNumericItem != -1)) {
168 m_modelType = CSVFormat::TwoDimensionalModel;
169 } else {
170 m_modelType = CSVFormat::ThreeDimensionalModel;
171 }
172
173 } else {
174
175 // Increasing numeric primaries -- likely to be time
176
177 m_timingType = CSVFormat::ExplicitTiming;
178
179 if (floatPrimaries) {
180 m_timeUnits = CSVFormat::TimeSeconds;
181 } else {
182 m_timeUnits = CSVFormat::TimeAudioFrames;
183 }
184
185 if (itemCount == 1) {
186 m_modelType = CSVFormat::OneDimensionalModel;
187 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
188 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
189 m_modelType = CSVFormat::OneDimensionalModel;
190 } else {
191 m_modelType = CSVFormat::TwoDimensionalModel;
192 }
193 } else {
194 m_modelType = CSVFormat::ThreeDimensionalModel;
195 }
196 }
197
198 std::cerr << "Estimated model type: " << m_modelType << std::endl;
199 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
200 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
201 }
202