Mercurial > hg > svcore
comparison data/fileio/CSVFileReader.cpp @ 628:001db550bd48
* Add option to import time+duration (or time+endtime) from CSV files
(importing to Region layers)
* Fix ffwd/rwd in Region layers so as to behave like time-value layers
author | Chris Cannam |
---|---|
date | Thu, 08 Jul 2010 14:22:28 +0000 |
parents | dd97f7b3d120 |
children | 3a5ee4b6c9ad |
comparison
equal
deleted
inserted
replaced
627:080d8bdd8762 | 628:001db550bd48 |
---|---|
18 #include "model/Model.h" | 18 #include "model/Model.h" |
19 #include "base/RealTime.h" | 19 #include "base/RealTime.h" |
20 #include "model/SparseOneDimensionalModel.h" | 20 #include "model/SparseOneDimensionalModel.h" |
21 #include "model/SparseTimeValueModel.h" | 21 #include "model/SparseTimeValueModel.h" |
22 #include "model/EditableDenseThreeDimensionalModel.h" | 22 #include "model/EditableDenseThreeDimensionalModel.h" |
23 #include "model/RegionModel.h" | |
23 #include "DataFileReaderFactory.h" | 24 #include "DataFileReaderFactory.h" |
24 | 25 |
25 #include <QFile> | 26 #include <QFile> |
26 #include <QString> | 27 #include <QString> |
27 #include <QRegExp> | 28 #include <QRegExp> |
28 #include <QStringList> | 29 #include <QStringList> |
29 #include <QTextStream> | 30 #include <QTextStream> |
30 | 31 |
31 #include <iostream> | 32 #include <iostream> |
33 #include <map> | |
32 | 34 |
33 CSVFileReader::CSVFileReader(QString path, CSVFormat format, | 35 CSVFileReader::CSVFileReader(QString path, CSVFormat format, |
34 size_t mainModelSampleRate) : | 36 size_t mainModelSampleRate) : |
35 m_format(format), | 37 m_format(format), |
36 m_file(0), | 38 m_file(0), |
88 delete dialog; | 90 delete dialog; |
89 throw DataFileReaderFactory::ImportCancelled; | 91 throw DataFileReaderFactory::ImportCancelled; |
90 } | 92 } |
91 */ | 93 */ |
92 | 94 |
93 CSVFormat::ModelType modelType = m_format.getModelType(); | 95 CSVFormat::ModelType modelType = m_format.getModelType(); |
94 CSVFormat::TimingType timingType = m_format.getTimingType(); | 96 CSVFormat::TimingType timingType = m_format.getTimingType(); |
95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); | 97 CSVFormat::DurationType durationType = m_format.getDurationType(); |
98 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); | |
96 QString separator = m_format.getSeparator(); | 99 QString separator = m_format.getSeparator(); |
97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); | 100 QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); |
98 size_t sampleRate = m_format.getSampleRate(); | 101 size_t sampleRate = m_format.getSampleRate(); |
99 size_t windowSize = m_format.getWindowSize(); | 102 size_t windowSize = m_format.getWindowSize(); |
100 | 103 |
112 } | 115 } |
113 } | 116 } |
114 | 117 |
115 SparseOneDimensionalModel *model1 = 0; | 118 SparseOneDimensionalModel *model1 = 0; |
116 SparseTimeValueModel *model2 = 0; | 119 SparseTimeValueModel *model2 = 0; |
120 RegionModel *model2a = 0; | |
117 EditableDenseThreeDimensionalModel *model3 = 0; | 121 EditableDenseThreeDimensionalModel *model3 = 0; |
118 Model *model = 0; | 122 Model *model = 0; |
119 | 123 |
120 QTextStream in(m_file); | 124 QTextStream in(m_file); |
121 in.seek(0); | 125 in.seek(0); |
124 unsigned int lineno = 0; | 128 unsigned int lineno = 0; |
125 | 129 |
126 float min = 0.0, max = 0.0; | 130 float min = 0.0, max = 0.0; |
127 | 131 |
128 size_t frameNo = 0; | 132 size_t frameNo = 0; |
133 size_t duration = 0; | |
129 size_t startFrame = 0; // for calculation of dense model resolution | 134 size_t startFrame = 0; // for calculation of dense model resolution |
135 | |
136 std::map<QString, float> labelValueMap; | |
137 float syntheticMax = 0.f; | |
130 | 138 |
131 while (!in.atEnd()) { | 139 while (!in.atEnd()) { |
132 | 140 |
133 // QTextStream's readLine doesn't cope with old-style Mac | 141 // QTextStream's readLine doesn't cope with old-style Mac |
134 // CR-only line endings. Why did they bother making the class | 142 // CR-only line endings. Why did they bother making the class |
164 case CSVFormat::TwoDimensionalModel: | 172 case CSVFormat::TwoDimensionalModel: |
165 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); | 173 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); |
166 model = model2; | 174 model = model2; |
167 break; | 175 break; |
168 | 176 |
177 case CSVFormat::TwoDimensionalModelWithDuration: | |
178 model2a = new RegionModel(sampleRate, windowSize, false); | |
179 model = model2a; | |
180 break; | |
181 | |
169 case CSVFormat::ThreeDimensionalModel: | 182 case CSVFormat::ThreeDimensionalModel: |
170 model3 = new EditableDenseThreeDimensionalModel | 183 model3 = new EditableDenseThreeDimensionalModel |
171 (sampleRate, | 184 (sampleRate, |
172 windowSize, | 185 windowSize, |
173 list.size(), | 186 list.size(), |
178 } | 191 } |
179 | 192 |
180 QStringList tidyList; | 193 QStringList tidyList; |
181 QRegExp nonNumericRx("[^0-9eE.,+-]"); | 194 QRegExp nonNumericRx("[^0-9eE.,+-]"); |
182 | 195 |
196 float value = 0.f; | |
197 | |
183 for (int i = 0; i < list.size(); ++i) { | 198 for (int i = 0; i < list.size(); ++i) { |
184 | 199 |
185 QString s(list[i].trimmed()); | 200 QString s(list[i].trimmed()); |
186 | 201 |
187 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | 202 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { |
188 s = s.mid(1, s.length() - 2); | 203 s = s.mid(1, s.length() - 2); |
189 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { | 204 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { |
190 s = s.mid(1, s.length() - 2); | 205 s = s.mid(1, s.length() - 2); |
191 } | 206 } |
192 | 207 |
193 if (i == 0 && timingType == CSVFormat::ExplicitTiming) { | 208 if (timingType == CSVFormat::ExplicitTiming) { |
194 | 209 |
210 size_t calculatedFrame = 0; | |
211 | |
212 if (i == 0 || | |
213 (i == 1 && | |
214 modelType == CSVFormat::TwoDimensionalModelWithDuration)) { | |
215 | |
216 bool ok = false; | |
217 QString numeric = s; | |
218 numeric.remove(nonNumericRx); | |
219 | |
220 if (timeUnits == CSVFormat::TimeSeconds) { | |
221 | |
222 double time = numeric.toDouble(&ok); | |
223 calculatedFrame = int(time * sampleRate + 0.5); | |
224 | |
225 } else { | |
226 | |
227 calculatedFrame = numeric.toInt(&ok); | |
228 | |
229 if (timeUnits == CSVFormat::TimeWindows) { | |
230 calculatedFrame *= windowSize; | |
231 } | |
232 } | |
233 | |
234 if (!ok) { | |
235 if (warnings < warnLimit) { | |
236 std::cerr << "WARNING: CSVFileReader::load: " | |
237 << "Bad time format (\"" << s.toStdString() | |
238 << "\") in data line " | |
239 << lineno+1 << ":" << std::endl; | |
240 std::cerr << line.toStdString() << std::endl; | |
241 } else if (warnings == warnLimit) { | |
242 std::cerr << "WARNING: Too many warnings" << std::endl; | |
243 } | |
244 ++warnings; | |
245 } | |
246 | |
247 if (i == 0) frameNo = calculatedFrame; | |
248 else { | |
249 if (durationType == CSVFormat::EndTimes) { | |
250 duration = calculatedFrame - frameNo; | |
251 } else { | |
252 duration = calculatedFrame; | |
253 } | |
254 } | |
255 | |
256 continue; | |
257 } | |
258 } | |
259 | |
260 if ((i == 1 && | |
261 modelType == CSVFormat::TwoDimensionalModel) || | |
262 (i == 2 && | |
263 modelType == CSVFormat::TwoDimensionalModelWithDuration)) { | |
195 bool ok = false; | 264 bool ok = false; |
196 QString numeric = s; | 265 value = s.toFloat(&ok); |
197 numeric.remove(nonNumericRx); | 266 if (!ok) { |
198 | 267 // cf. RDFImporter::fillModel |
199 if (timeUnits == CSVFormat::TimeSeconds) { | 268 if (labelValueMap.find(s) == labelValueMap.end()) { |
200 | 269 syntheticMax = syntheticMax + 1.f; |
201 double time = numeric.toDouble(&ok); | 270 labelValueMap[s] = syntheticMax; |
202 frameNo = int(time * sampleRate + 0.5); | 271 } |
203 | 272 value = labelValueMap[s]; |
204 } else { | 273 } else { |
205 | 274 if (value > syntheticMax) syntheticMax = value; |
206 frameNo = numeric.toInt(&ok); | 275 } |
207 | 276 if (i + 1 == list.size()) { |
208 if (timeUnits == CSVFormat::TimeWindows) { | 277 // keep text around for use as label (none other given) |
209 frameNo *= windowSize; | 278 tidyList.push_back(s); |
210 } | 279 } |
211 } | 280 continue; |
212 | 281 } |
213 if (!ok) { | 282 |
214 if (warnings < warnLimit) { | 283 tidyList.push_back(s); |
215 std::cerr << "WARNING: CSVFileReader::load: " | |
216 << "Bad time format (\"" << s.toStdString() | |
217 << "\") in data line " | |
218 << lineno+1 << ":" << std::endl; | |
219 std::cerr << line.toStdString() << std::endl; | |
220 } else if (warnings == warnLimit) { | |
221 std::cerr << "WARNING: Too many warnings" << std::endl; | |
222 } | |
223 ++warnings; | |
224 } | |
225 } else { | |
226 tidyList.push_back(s); | |
227 } | |
228 } | 284 } |
229 | 285 |
230 if (modelType == CSVFormat::OneDimensionalModel) { | 286 if (modelType == CSVFormat::OneDimensionalModel) { |
231 | 287 |
232 SparseOneDimensionalModel::Point point | 288 SparseOneDimensionalModel::Point point |
238 | 294 |
239 } else if (modelType == CSVFormat::TwoDimensionalModel) { | 295 } else if (modelType == CSVFormat::TwoDimensionalModel) { |
240 | 296 |
241 SparseTimeValueModel::Point point | 297 SparseTimeValueModel::Point point |
242 (frameNo, | 298 (frameNo, |
243 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, | 299 value, |
244 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1)); | 300 tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1)); |
245 | 301 |
246 model2->addPoint(point); | 302 model2->addPoint(point); |
303 | |
304 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { | |
305 | |
306 RegionModel::Point point | |
307 (frameNo, | |
308 value, | |
309 duration, | |
310 tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1)); | |
311 | |
312 model2a->addPoint(point); | |
247 | 313 |
248 } else if (modelType == CSVFormat::ThreeDimensionalModel) { | 314 } else if (modelType == CSVFormat::ThreeDimensionalModel) { |
249 | 315 |
250 DenseThreeDimensionalModel::Column values; | 316 DenseThreeDimensionalModel::Column values; |
251 | 317 |