comparison data/fileio/CSVFileReader.cpp @ 628:001db550bd48

* Add option to import time+duration (or time+endtime) from CSV files (importing to Region layers) * Fix ffwd/rwd in Region layers so as to behave like time-value layers
author Chris Cannam
date Thu, 08 Jul 2010 14:22:28 +0000
parents dd97f7b3d120
children 3a5ee4b6c9ad
comparison
equal deleted inserted replaced
627:080d8bdd8762 628:001db550bd48
18 #include "model/Model.h" 18 #include "model/Model.h"
19 #include "base/RealTime.h" 19 #include "base/RealTime.h"
20 #include "model/SparseOneDimensionalModel.h" 20 #include "model/SparseOneDimensionalModel.h"
21 #include "model/SparseTimeValueModel.h" 21 #include "model/SparseTimeValueModel.h"
22 #include "model/EditableDenseThreeDimensionalModel.h" 22 #include "model/EditableDenseThreeDimensionalModel.h"
23 #include "model/RegionModel.h"
23 #include "DataFileReaderFactory.h" 24 #include "DataFileReaderFactory.h"
24 25
25 #include <QFile> 26 #include <QFile>
26 #include <QString> 27 #include <QString>
27 #include <QRegExp> 28 #include <QRegExp>
28 #include <QStringList> 29 #include <QStringList>
29 #include <QTextStream> 30 #include <QTextStream>
30 31
31 #include <iostream> 32 #include <iostream>
33 #include <map>
32 34
33 CSVFileReader::CSVFileReader(QString path, CSVFormat format, 35 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
34 size_t mainModelSampleRate) : 36 size_t mainModelSampleRate) :
35 m_format(format), 37 m_format(format),
36 m_file(0), 38 m_file(0),
88 delete dialog; 90 delete dialog;
89 throw DataFileReaderFactory::ImportCancelled; 91 throw DataFileReaderFactory::ImportCancelled;
90 } 92 }
91 */ 93 */
92 94
93 CSVFormat::ModelType modelType = m_format.getModelType(); 95 CSVFormat::ModelType modelType = m_format.getModelType();
94 CSVFormat::TimingType timingType = m_format.getTimingType(); 96 CSVFormat::TimingType timingType = m_format.getTimingType();
95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); 97 CSVFormat::DurationType durationType = m_format.getDurationType();
98 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
96 QString separator = m_format.getSeparator(); 99 QString separator = m_format.getSeparator();
97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour(); 100 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
98 size_t sampleRate = m_format.getSampleRate(); 101 size_t sampleRate = m_format.getSampleRate();
99 size_t windowSize = m_format.getWindowSize(); 102 size_t windowSize = m_format.getWindowSize();
100 103
112 } 115 }
113 } 116 }
114 117
115 SparseOneDimensionalModel *model1 = 0; 118 SparseOneDimensionalModel *model1 = 0;
116 SparseTimeValueModel *model2 = 0; 119 SparseTimeValueModel *model2 = 0;
120 RegionModel *model2a = 0;
117 EditableDenseThreeDimensionalModel *model3 = 0; 121 EditableDenseThreeDimensionalModel *model3 = 0;
118 Model *model = 0; 122 Model *model = 0;
119 123
120 QTextStream in(m_file); 124 QTextStream in(m_file);
121 in.seek(0); 125 in.seek(0);
124 unsigned int lineno = 0; 128 unsigned int lineno = 0;
125 129
126 float min = 0.0, max = 0.0; 130 float min = 0.0, max = 0.0;
127 131
128 size_t frameNo = 0; 132 size_t frameNo = 0;
133 size_t duration = 0;
129 size_t startFrame = 0; // for calculation of dense model resolution 134 size_t startFrame = 0; // for calculation of dense model resolution
135
136 std::map<QString, float> labelValueMap;
137 float syntheticMax = 0.f;
130 138
131 while (!in.atEnd()) { 139 while (!in.atEnd()) {
132 140
133 // QTextStream's readLine doesn't cope with old-style Mac 141 // QTextStream's readLine doesn't cope with old-style Mac
134 // CR-only line endings. Why did they bother making the class 142 // CR-only line endings. Why did they bother making the class
164 case CSVFormat::TwoDimensionalModel: 172 case CSVFormat::TwoDimensionalModel:
165 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); 173 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
166 model = model2; 174 model = model2;
167 break; 175 break;
168 176
177 case CSVFormat::TwoDimensionalModelWithDuration:
178 model2a = new RegionModel(sampleRate, windowSize, false);
179 model = model2a;
180 break;
181
169 case CSVFormat::ThreeDimensionalModel: 182 case CSVFormat::ThreeDimensionalModel:
170 model3 = new EditableDenseThreeDimensionalModel 183 model3 = new EditableDenseThreeDimensionalModel
171 (sampleRate, 184 (sampleRate,
172 windowSize, 185 windowSize,
173 list.size(), 186 list.size(),
178 } 191 }
179 192
180 QStringList tidyList; 193 QStringList tidyList;
181 QRegExp nonNumericRx("[^0-9eE.,+-]"); 194 QRegExp nonNumericRx("[^0-9eE.,+-]");
182 195
196 float value = 0.f;
197
183 for (int i = 0; i < list.size(); ++i) { 198 for (int i = 0; i < list.size(); ++i) {
184 199
185 QString s(list[i].trimmed()); 200 QString s(list[i].trimmed());
186 201
187 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { 202 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
188 s = s.mid(1, s.length() - 2); 203 s = s.mid(1, s.length() - 2);
189 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { 204 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
190 s = s.mid(1, s.length() - 2); 205 s = s.mid(1, s.length() - 2);
191 } 206 }
192 207
193 if (i == 0 && timingType == CSVFormat::ExplicitTiming) { 208 if (timingType == CSVFormat::ExplicitTiming) {
194 209
210 size_t calculatedFrame = 0;
211
212 if (i == 0 ||
213 (i == 1 &&
214 modelType == CSVFormat::TwoDimensionalModelWithDuration)) {
215
216 bool ok = false;
217 QString numeric = s;
218 numeric.remove(nonNumericRx);
219
220 if (timeUnits == CSVFormat::TimeSeconds) {
221
222 double time = numeric.toDouble(&ok);
223 calculatedFrame = int(time * sampleRate + 0.5);
224
225 } else {
226
227 calculatedFrame = numeric.toInt(&ok);
228
229 if (timeUnits == CSVFormat::TimeWindows) {
230 calculatedFrame *= windowSize;
231 }
232 }
233
234 if (!ok) {
235 if (warnings < warnLimit) {
236 std::cerr << "WARNING: CSVFileReader::load: "
237 << "Bad time format (\"" << s.toStdString()
238 << "\") in data line "
239 << lineno+1 << ":" << std::endl;
240 std::cerr << line.toStdString() << std::endl;
241 } else if (warnings == warnLimit) {
242 std::cerr << "WARNING: Too many warnings" << std::endl;
243 }
244 ++warnings;
245 }
246
247 if (i == 0) frameNo = calculatedFrame;
248 else {
249 if (durationType == CSVFormat::EndTimes) {
250 duration = calculatedFrame - frameNo;
251 } else {
252 duration = calculatedFrame;
253 }
254 }
255
256 continue;
257 }
258 }
259
260 if ((i == 1 &&
261 modelType == CSVFormat::TwoDimensionalModel) ||
262 (i == 2 &&
263 modelType == CSVFormat::TwoDimensionalModelWithDuration)) {
195 bool ok = false; 264 bool ok = false;
196 QString numeric = s; 265 value = s.toFloat(&ok);
197 numeric.remove(nonNumericRx); 266 if (!ok) {
198 267 // cf. RDFImporter::fillModel
199 if (timeUnits == CSVFormat::TimeSeconds) { 268 if (labelValueMap.find(s) == labelValueMap.end()) {
200 269 syntheticMax = syntheticMax + 1.f;
201 double time = numeric.toDouble(&ok); 270 labelValueMap[s] = syntheticMax;
202 frameNo = int(time * sampleRate + 0.5); 271 }
203 272 value = labelValueMap[s];
204 } else { 273 } else {
205 274 if (value > syntheticMax) syntheticMax = value;
206 frameNo = numeric.toInt(&ok); 275 }
207 276 if (i + 1 == list.size()) {
208 if (timeUnits == CSVFormat::TimeWindows) { 277 // keep text around for use as label (none other given)
209 frameNo *= windowSize; 278 tidyList.push_back(s);
210 } 279 }
211 } 280 continue;
212 281 }
213 if (!ok) { 282
214 if (warnings < warnLimit) { 283 tidyList.push_back(s);
215 std::cerr << "WARNING: CSVFileReader::load: "
216 << "Bad time format (\"" << s.toStdString()
217 << "\") in data line "
218 << lineno+1 << ":" << std::endl;
219 std::cerr << line.toStdString() << std::endl;
220 } else if (warnings == warnLimit) {
221 std::cerr << "WARNING: Too many warnings" << std::endl;
222 }
223 ++warnings;
224 }
225 } else {
226 tidyList.push_back(s);
227 }
228 } 284 }
229 285
230 if (modelType == CSVFormat::OneDimensionalModel) { 286 if (modelType == CSVFormat::OneDimensionalModel) {
231 287
232 SparseOneDimensionalModel::Point point 288 SparseOneDimensionalModel::Point point
238 294
239 } else if (modelType == CSVFormat::TwoDimensionalModel) { 295 } else if (modelType == CSVFormat::TwoDimensionalModel) {
240 296
241 SparseTimeValueModel::Point point 297 SparseTimeValueModel::Point point
242 (frameNo, 298 (frameNo,
243 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, 299 value,
244 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1)); 300 tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1));
245 301
246 model2->addPoint(point); 302 model2->addPoint(point);
303
304 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
305
306 RegionModel::Point point
307 (frameNo,
308 value,
309 duration,
310 tidyList.size() > 0 ? tidyList[0] : QString("%1").arg(lineno+1));
311
312 model2a->addPoint(point);
247 313
248 } else if (modelType == CSVFormat::ThreeDimensionalModel) { 314 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
249 315
250 DenseThreeDimensionalModel::Column values; 316 DenseThreeDimensionalModel::Column values;
251 317