Chris@392
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@392
|
2
|
Chris@392
|
3 /*
|
Chris@392
|
4 Sonic Visualiser
|
Chris@392
|
5 An audio file viewer and annotation editor.
|
Chris@392
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@392
|
7 This file copyright 2006 Chris Cannam.
|
Chris@392
|
8
|
Chris@392
|
9 This program is free software; you can redistribute it and/or
|
Chris@392
|
10 modify it under the terms of the GNU General Public License as
|
Chris@392
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@392
|
12 License, or (at your option) any later version. See the file
|
Chris@392
|
13 COPYING included with this distribution for more information.
|
Chris@392
|
14 */
|
Chris@392
|
15
|
Chris@392
|
16 #include "CSVFormat.h"
|
Chris@392
|
17
|
Chris@392
|
18 #include <QFile>
|
Chris@392
|
19 #include <QString>
|
Chris@392
|
20 #include <QRegExp>
|
Chris@392
|
21 #include <QStringList>
|
Chris@392
|
22 #include <QTextStream>
|
Chris@392
|
23
|
Chris@392
|
24 #include <iostream>
|
Chris@392
|
25
|
Chris@392
|
26 CSVFormat::CSVFormat(QString filename) :
|
Chris@392
|
27 m_modelType(TwoDimensionalModel),
|
Chris@392
|
28 m_timingType(ExplicitTiming),
|
Chris@628
|
29 m_durationType(Durations),
|
Chris@392
|
30 m_timeUnits(TimeSeconds),
|
Chris@392
|
31 m_separator(","),
|
Chris@392
|
32 m_sampleRate(44100),
|
Chris@392
|
33 m_windowSize(1024),
|
Chris@392
|
34 m_behaviour(QString::KeepEmptyParts),
|
Chris@392
|
35 m_maxExampleCols(0)
|
Chris@392
|
36 {
|
Chris@392
|
37 QFile file(filename);
|
Chris@392
|
38 if (!file.exists()) return;
|
Chris@392
|
39 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
|
Chris@392
|
40
|
Chris@392
|
41 QTextStream in(&file);
|
Chris@392
|
42 in.seek(0);
|
Chris@392
|
43
|
Chris@392
|
44 unsigned int lineno = 0;
|
Chris@392
|
45
|
Chris@392
|
46 bool nonIncreasingPrimaries = false;
|
Chris@628
|
47 bool nonIncreasingSecondaries = false;
|
Chris@392
|
48 bool nonNumericPrimaries = false;
|
Chris@392
|
49 bool floatPrimaries = false;
|
Chris@392
|
50 bool variableItemCount = false;
|
Chris@392
|
51 int itemCount = 1;
|
Chris@392
|
52 int earliestNonNumericItem = -1;
|
Chris@392
|
53
|
Chris@392
|
54 float prevPrimary = 0.0;
|
Chris@628
|
55 float prevSecondary = 0.0;
|
Chris@392
|
56
|
Chris@392
|
57 m_maxExampleCols = 0;
|
Chris@392
|
58 m_separator = "";
|
Chris@392
|
59
|
Chris@392
|
60 while (!in.atEnd()) {
|
Chris@392
|
61
|
Chris@392
|
62 // See comment about line endings in CSVFileReader::load()
|
Chris@392
|
63
|
Chris@392
|
64 QString chunk = in.readLine();
|
Chris@392
|
65 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
|
Chris@392
|
66
|
Chris@392
|
67 for (size_t li = 0; li < lines.size(); ++li) {
|
Chris@392
|
68
|
Chris@392
|
69 QString line = lines[li];
|
Chris@392
|
70
|
Chris@392
|
71 if (line.startsWith("#")) continue;
|
Chris@392
|
72
|
Chris@392
|
73 m_behaviour = QString::KeepEmptyParts;
|
Chris@392
|
74
|
Chris@392
|
75 if (m_separator == "") {
|
Chris@392
|
76 //!!! to do: ask the user
|
Chris@392
|
77 if (line.split(",").size() >= 2) m_separator = ",";
|
Chris@392
|
78 else if (line.split("\t").size() >= 2) m_separator = "\t";
|
Chris@392
|
79 else if (line.split("|").size() >= 2) m_separator = "|";
|
Chris@392
|
80 else if (line.split("/").size() >= 2) m_separator = "/";
|
Chris@392
|
81 else if (line.split(":").size() >= 2) m_separator = ":";
|
Chris@392
|
82 else {
|
Chris@392
|
83 m_separator = " ";
|
Chris@392
|
84 m_behaviour = QString::SkipEmptyParts;
|
Chris@392
|
85 }
|
Chris@392
|
86 }
|
Chris@392
|
87
|
Chris@406
|
88 // std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl;
|
Chris@392
|
89
|
Chris@392
|
90 QStringList list = line.split(m_separator, m_behaviour);
|
Chris@392
|
91 QStringList tidyList;
|
Chris@392
|
92
|
Chris@392
|
93 for (int i = 0; i < list.size(); ++i) {
|
Chris@392
|
94
|
Chris@392
|
95 QString s(list[i]);
|
Chris@392
|
96 bool numeric = false;
|
Chris@392
|
97
|
Chris@392
|
98 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
|
Chris@392
|
99 s = s.mid(1, s.length() - 2);
|
Chris@392
|
100 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
|
Chris@392
|
101 s = s.mid(1, s.length() - 2);
|
Chris@392
|
102 } else {
|
Chris@392
|
103 float f = s.toFloat(&numeric);
|
Chris@406
|
104 // std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl;
|
Chris@392
|
105 }
|
Chris@392
|
106
|
Chris@392
|
107 tidyList.push_back(s);
|
Chris@392
|
108
|
Chris@392
|
109 if (lineno == 0 || (list.size() < itemCount)) {
|
Chris@392
|
110 itemCount = list.size();
|
Chris@392
|
111 } else {
|
Chris@392
|
112 if (itemCount != list.size()) {
|
Chris@392
|
113 variableItemCount = true;
|
Chris@392
|
114 }
|
Chris@392
|
115 }
|
Chris@392
|
116
|
Chris@392
|
117 if (i == 0) { // primary
|
Chris@392
|
118
|
Chris@392
|
119 if (numeric) {
|
Chris@392
|
120
|
Chris@392
|
121 float primary = s.toFloat();
|
Chris@392
|
122
|
Chris@392
|
123 if (lineno > 0 && primary <= prevPrimary) {
|
Chris@392
|
124 nonIncreasingPrimaries = true;
|
Chris@392
|
125 }
|
Chris@392
|
126
|
Chris@392
|
127 if (s.contains(".") || s.contains(",")) {
|
Chris@392
|
128 floatPrimaries = true;
|
Chris@392
|
129 }
|
Chris@392
|
130
|
Chris@392
|
131 prevPrimary = primary;
|
Chris@392
|
132
|
Chris@392
|
133 } else {
|
Chris@392
|
134 nonNumericPrimaries = true;
|
Chris@392
|
135 }
|
Chris@392
|
136 } else { // secondary
|
Chris@392
|
137
|
Chris@392
|
138 if (!numeric) {
|
Chris@392
|
139 if (earliestNonNumericItem < 0 ||
|
Chris@392
|
140 i < earliestNonNumericItem) {
|
Chris@392
|
141 earliestNonNumericItem = i;
|
Chris@392
|
142 }
|
Chris@628
|
143 } else if (i == 1) {
|
Chris@628
|
144 float secondary = s.toFloat();
|
Chris@628
|
145 if (lineno > 0 && secondary <= prevSecondary) {
|
Chris@628
|
146 nonIncreasingSecondaries = true;
|
Chris@628
|
147 }
|
Chris@628
|
148 prevSecondary = secondary;
|
Chris@392
|
149 }
|
Chris@392
|
150 }
|
Chris@392
|
151 }
|
Chris@392
|
152
|
Chris@392
|
153 if (lineno < 10) {
|
Chris@392
|
154 m_example.push_back(tidyList);
|
Chris@392
|
155 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
|
Chris@392
|
156 m_maxExampleCols = tidyList.size();
|
Chris@392
|
157 }
|
Chris@392
|
158 }
|
Chris@392
|
159
|
Chris@392
|
160 ++lineno;
|
Chris@392
|
161
|
Chris@392
|
162 if (lineno == 50) break;
|
Chris@392
|
163 }
|
Chris@392
|
164 }
|
Chris@392
|
165
|
Chris@392
|
166 if (nonNumericPrimaries || nonIncreasingPrimaries) {
|
Chris@392
|
167
|
Chris@392
|
168 // Primaries are probably not a series of times
|
Chris@392
|
169
|
Chris@392
|
170 m_timingType = CSVFormat::ImplicitTiming;
|
Chris@392
|
171 m_timeUnits = CSVFormat::TimeWindows;
|
Chris@392
|
172
|
Chris@392
|
173 if (nonNumericPrimaries) {
|
Chris@392
|
174 m_modelType = CSVFormat::OneDimensionalModel;
|
Chris@392
|
175 } else if (itemCount == 1 || variableItemCount ||
|
Chris@392
|
176 (earliestNonNumericItem != -1)) {
|
Chris@392
|
177 m_modelType = CSVFormat::TwoDimensionalModel;
|
Chris@392
|
178 } else {
|
Chris@392
|
179 m_modelType = CSVFormat::ThreeDimensionalModel;
|
Chris@392
|
180 }
|
Chris@392
|
181
|
Chris@392
|
182 } else {
|
Chris@392
|
183
|
Chris@392
|
184 // Increasing numeric primaries -- likely to be time
|
Chris@392
|
185
|
Chris@392
|
186 m_timingType = CSVFormat::ExplicitTiming;
|
Chris@392
|
187
|
Chris@392
|
188 if (floatPrimaries) {
|
Chris@392
|
189 m_timeUnits = CSVFormat::TimeSeconds;
|
Chris@392
|
190 } else {
|
Chris@392
|
191 m_timeUnits = CSVFormat::TimeAudioFrames;
|
Chris@392
|
192 }
|
Chris@392
|
193
|
Chris@392
|
194 if (itemCount == 1) {
|
Chris@392
|
195 m_modelType = CSVFormat::OneDimensionalModel;
|
Chris@392
|
196 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
|
Chris@392
|
197 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
|
Chris@392
|
198 m_modelType = CSVFormat::OneDimensionalModel;
|
Chris@392
|
199 } else {
|
Chris@392
|
200 m_modelType = CSVFormat::TwoDimensionalModel;
|
Chris@392
|
201 }
|
Chris@392
|
202 } else {
|
Chris@392
|
203 m_modelType = CSVFormat::ThreeDimensionalModel;
|
Chris@392
|
204 }
|
Chris@628
|
205
|
Chris@628
|
206 if (nonIncreasingSecondaries) {
|
Chris@628
|
207 m_durationType = Durations;
|
Chris@628
|
208 } else {
|
Chris@628
|
209 m_durationType = EndTimes;
|
Chris@628
|
210 }
|
Chris@392
|
211 }
|
Chris@392
|
212
|
Chris@392
|
213 std::cerr << "Estimated model type: " << m_modelType << std::endl;
|
Chris@392
|
214 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
|
Chris@628
|
215 std::cerr << "Estimated duration type: " << m_durationType << std::endl;
|
Chris@392
|
216 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
|
Chris@392
|
217 }
|
Chris@392
|
218
|