Mercurial > hg > svcore
comparison data/fileio/CSVFormat.cpp @ 392:183ee2a55fc7
* More work to abstract out interactive components used in the data library,
so that it does not need to depend on QtGui.
author | Chris Cannam |
---|---|
date | Fri, 14 Mar 2008 17:14:21 +0000 |
parents | |
children | d095214ffbaf |
comparison
equal
deleted
inserted
replaced
391:5858cc462d0a | 392:183ee2a55fc7 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Sonic Visualiser | |
5 An audio file viewer and annotation editor. | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2006 Chris Cannam. | |
8 | |
9 This program is free software; you can redistribute it and/or | |
10 modify it under the terms of the GNU General Public License as | |
11 published by the Free Software Foundation; either version 2 of the | |
12 License, or (at your option) any later version. See the file | |
13 COPYING included with this distribution for more information. | |
14 */ | |
15 | |
16 #include "CSVFormat.h" | |
17 | |
18 #include <QFile> | |
19 #include <QString> | |
20 #include <QRegExp> | |
21 #include <QStringList> | |
22 #include <QTextStream> | |
23 | |
24 #include <iostream> | |
25 | |
26 CSVFormat::CSVFormat(QString filename) : | |
27 m_modelType(TwoDimensionalModel), | |
28 m_timingType(ExplicitTiming), | |
29 m_timeUnits(TimeSeconds), | |
30 m_separator(","), | |
31 m_sampleRate(44100), | |
32 m_windowSize(1024), | |
33 m_behaviour(QString::KeepEmptyParts), | |
34 m_maxExampleCols(0) | |
35 { | |
36 QFile file(filename); | |
37 if (!file.exists()) return; | |
38 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; | |
39 | |
40 QTextStream in(&file); | |
41 in.seek(0); | |
42 | |
43 unsigned int lineno = 0; | |
44 | |
45 bool nonIncreasingPrimaries = false; | |
46 bool nonNumericPrimaries = false; | |
47 bool floatPrimaries = false; | |
48 bool variableItemCount = false; | |
49 int itemCount = 1; | |
50 int earliestNonNumericItem = -1; | |
51 | |
52 float prevPrimary = 0.0; | |
53 | |
54 m_maxExampleCols = 0; | |
55 m_separator = ""; | |
56 | |
57 while (!in.atEnd()) { | |
58 | |
59 // See comment about line endings in CSVFileReader::load() | |
60 | |
61 QString chunk = in.readLine(); | |
62 QStringList lines = chunk.split('\r', QString::SkipEmptyParts); | |
63 | |
64 for (size_t li = 0; li < lines.size(); ++li) { | |
65 | |
66 QString line = lines[li]; | |
67 | |
68 if (line.startsWith("#")) continue; | |
69 | |
70 m_behaviour = QString::KeepEmptyParts; | |
71 | |
72 if (m_separator == "") { | |
73 //!!! to do: ask the user | |
74 if (line.split(",").size() >= 2) m_separator = ","; | |
75 else if (line.split("\t").size() >= 2) m_separator = "\t"; | |
76 else if (line.split("|").size() >= 2) m_separator = "|"; | |
77 else if (line.split("/").size() >= 2) m_separator = "/"; | |
78 else if (line.split(":").size() >= 2) m_separator = ":"; | |
79 else { | |
80 m_separator = " "; | |
81 m_behaviour = QString::SkipEmptyParts; | |
82 } | |
83 } | |
84 | |
85 std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl; | |
86 | |
87 QStringList list = line.split(m_separator, m_behaviour); | |
88 QStringList tidyList; | |
89 | |
90 for (int i = 0; i < list.size(); ++i) { | |
91 | |
92 QString s(list[i]); | |
93 bool numeric = false; | |
94 | |
95 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | |
96 s = s.mid(1, s.length() - 2); | |
97 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { | |
98 s = s.mid(1, s.length() - 2); | |
99 } else { | |
100 float f = s.toFloat(&numeric); | |
101 std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl; | |
102 } | |
103 | |
104 tidyList.push_back(s); | |
105 | |
106 if (lineno == 0 || (list.size() < itemCount)) { | |
107 itemCount = list.size(); | |
108 } else { | |
109 if (itemCount != list.size()) { | |
110 variableItemCount = true; | |
111 } | |
112 } | |
113 | |
114 if (i == 0) { // primary | |
115 | |
116 if (numeric) { | |
117 | |
118 float primary = s.toFloat(); | |
119 | |
120 if (lineno > 0 && primary <= prevPrimary) { | |
121 nonIncreasingPrimaries = true; | |
122 } | |
123 | |
124 if (s.contains(".") || s.contains(",")) { | |
125 floatPrimaries = true; | |
126 } | |
127 | |
128 prevPrimary = primary; | |
129 | |
130 } else { | |
131 nonNumericPrimaries = true; | |
132 } | |
133 } else { // secondary | |
134 | |
135 if (!numeric) { | |
136 if (earliestNonNumericItem < 0 || | |
137 i < earliestNonNumericItem) { | |
138 earliestNonNumericItem = i; | |
139 } | |
140 } | |
141 } | |
142 } | |
143 | |
144 if (lineno < 10) { | |
145 m_example.push_back(tidyList); | |
146 if (lineno == 0 || tidyList.size() > m_maxExampleCols) { | |
147 m_maxExampleCols = tidyList.size(); | |
148 } | |
149 } | |
150 | |
151 ++lineno; | |
152 | |
153 if (lineno == 50) break; | |
154 } | |
155 } | |
156 | |
157 if (nonNumericPrimaries || nonIncreasingPrimaries) { | |
158 | |
159 // Primaries are probably not a series of times | |
160 | |
161 m_timingType = CSVFormat::ImplicitTiming; | |
162 m_timeUnits = CSVFormat::TimeWindows; | |
163 | |
164 if (nonNumericPrimaries) { | |
165 m_modelType = CSVFormat::OneDimensionalModel; | |
166 } else if (itemCount == 1 || variableItemCount || | |
167 (earliestNonNumericItem != -1)) { | |
168 m_modelType = CSVFormat::TwoDimensionalModel; | |
169 } else { | |
170 m_modelType = CSVFormat::ThreeDimensionalModel; | |
171 } | |
172 | |
173 } else { | |
174 | |
175 // Increasing numeric primaries -- likely to be time | |
176 | |
177 m_timingType = CSVFormat::ExplicitTiming; | |
178 | |
179 if (floatPrimaries) { | |
180 m_timeUnits = CSVFormat::TimeSeconds; | |
181 } else { | |
182 m_timeUnits = CSVFormat::TimeAudioFrames; | |
183 } | |
184 | |
185 if (itemCount == 1) { | |
186 m_modelType = CSVFormat::OneDimensionalModel; | |
187 } else if (variableItemCount || (earliestNonNumericItem != -1)) { | |
188 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) { | |
189 m_modelType = CSVFormat::OneDimensionalModel; | |
190 } else { | |
191 m_modelType = CSVFormat::TwoDimensionalModel; | |
192 } | |
193 } else { | |
194 m_modelType = CSVFormat::ThreeDimensionalModel; | |
195 } | |
196 } | |
197 | |
198 std::cerr << "Estimated model type: " << m_modelType << std::endl; | |
199 std::cerr << "Estimated timing type: " << m_timingType << std::endl; | |
200 std::cerr << "Estimated units: " << m_timeUnits << std::endl; | |
201 } | |
202 |