comparison data/fileio/CSVFileReader.cpp @ 392:183ee2a55fc7

* More work to abstract out interactive components used in the data library, so that it does not need to depend on QtGui.
author Chris Cannam
date Fri, 14 Mar 2008 17:14:21 +0000
parents 21e79997e80f
children 6f8ee19984ad
comparison
equal deleted inserted replaced
391:5858cc462d0a 392:183ee2a55fc7
25 #include <QFile> 25 #include <QFile>
26 #include <QString> 26 #include <QString>
27 #include <QRegExp> 27 #include <QRegExp>
28 #include <QStringList> 28 #include <QStringList>
29 #include <QTextStream> 29 #include <QTextStream>
30 #include <QFrame>
31 #include <QGridLayout>
32 #include <QPushButton>
33 #include <QHBoxLayout>
34 #include <QVBoxLayout>
35 #include <QTableWidget>
36 #include <QComboBox>
37 #include <QLabel>
38 30
39 #include <iostream> 31 #include <iostream>
40 32
41 CSVFileReader::CSVFileReader(QString path, size_t mainModelSampleRate) : 33 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
34 size_t mainModelSampleRate) :
35 m_format(format),
42 m_file(0), 36 m_file(0),
43 m_mainModelSampleRate(mainModelSampleRate) 37 m_mainModelSampleRate(mainModelSampleRate)
44 { 38 {
45 m_file = new QFile(path); 39 m_file = new QFile(path);
46 bool good = false; 40 bool good = false;
84 78
85 Model * 79 Model *
86 CSVFileReader::load() const 80 CSVFileReader::load() const
87 { 81 {
88 if (!m_file) return 0; 82 if (!m_file) return 0;
89 83 /*!!!
90 CSVFormatDialog *dialog = new CSVFormatDialog 84 CSVFormatDialog *dialog = new CSVFormatDialog
91 (0, m_file, m_mainModelSampleRate); 85 (0, m_file, m_mainModelSampleRate);
92 86
93 if (dialog->exec() == QDialog::Rejected) { 87 if (dialog->exec() == QDialog::Rejected) {
94 delete dialog; 88 delete dialog;
95 throw DataFileReaderFactory::ImportCancelled; 89 throw DataFileReaderFactory::ImportCancelled;
96 } 90 }
97 91 */
98 CSVFormatDialog::ModelType modelType = dialog->getModelType(); 92
99 CSVFormatDialog::TimingType timingType = dialog->getTimingType(); 93 CSVFormat::ModelType modelType = m_format.getModelType();
100 CSVFormatDialog::TimeUnits timeUnits = dialog->getTimeUnits(); 94 CSVFormat::TimingType timingType = m_format.getTimingType();
101 QString separator = dialog->getSeparator(); 95 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
102 QString::SplitBehavior behaviour = dialog->getSplitBehaviour(); 96 QString separator = m_format.getSeparator();
103 size_t sampleRate = dialog->getSampleRate(); 97 QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
104 size_t windowSize = dialog->getWindowSize(); 98 size_t sampleRate = m_format.getSampleRate();
105 99 size_t windowSize = m_format.getWindowSize();
106 delete dialog; 100
107 101 if (timingType == CSVFormat::ExplicitTiming) {
108 if (timingType == CSVFormatDialog::ExplicitTiming) {
109 windowSize = 1; 102 windowSize = 1;
110 if (timeUnits == CSVFormatDialog::TimeSeconds) { 103 if (timeUnits == CSVFormat::TimeSeconds) {
111 sampleRate = m_mainModelSampleRate; 104 sampleRate = m_mainModelSampleRate;
112 } 105 }
113 } 106 }
114 107
115 SparseOneDimensionalModel *model1 = 0; 108 SparseOneDimensionalModel *model1 = 0;
153 146
154 if (!model) { 147 if (!model) {
155 148
156 switch (modelType) { 149 switch (modelType) {
157 150
158 case CSVFormatDialog::OneDimensionalModel: 151 case CSVFormat::OneDimensionalModel:
159 model1 = new SparseOneDimensionalModel(sampleRate, windowSize); 152 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
160 model = model1; 153 model = model1;
161 break; 154 break;
162 155
163 case CSVFormatDialog::TwoDimensionalModel: 156 case CSVFormat::TwoDimensionalModel:
164 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); 157 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
165 model = model2; 158 model = model2;
166 break; 159 break;
167 160
168 case CSVFormatDialog::ThreeDimensionalModel: 161 case CSVFormat::ThreeDimensionalModel:
169 model3 = new EditableDenseThreeDimensionalModel(sampleRate, 162 model3 = new EditableDenseThreeDimensionalModel(sampleRate,
170 windowSize, 163 windowSize,
171 list.size()); 164 list.size());
172 model = model3; 165 model = model3;
173 break; 166 break;
185 s = s.mid(1, s.length() - 2); 178 s = s.mid(1, s.length() - 2);
186 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) { 179 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
187 s = s.mid(1, s.length() - 2); 180 s = s.mid(1, s.length() - 2);
188 } 181 }
189 182
190 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) { 183 if (i == 0 && timingType == CSVFormat::ExplicitTiming) {
191 184
192 bool ok = false; 185 bool ok = false;
193 QString numeric = s; 186 QString numeric = s;
194 numeric.remove(nonNumericRx); 187 numeric.remove(nonNumericRx);
195 188
196 if (timeUnits == CSVFormatDialog::TimeSeconds) { 189 if (timeUnits == CSVFormat::TimeSeconds) {
197 190
198 double time = numeric.toDouble(&ok); 191 double time = numeric.toDouble(&ok);
199 frameNo = int(time * sampleRate + 0.00001); 192 frameNo = int(time * sampleRate + 0.00001);
200 193
201 } else { 194 } else {
202 195
203 frameNo = numeric.toInt(&ok); 196 frameNo = numeric.toInt(&ok);
204 197
205 if (timeUnits == CSVFormatDialog::TimeWindows) { 198 if (timeUnits == CSVFormat::TimeWindows) {
206 frameNo *= windowSize; 199 frameNo *= windowSize;
207 } 200 }
208 } 201 }
209 202
210 if (!ok) { 203 if (!ok) {
222 } else { 215 } else {
223 tidyList.push_back(s); 216 tidyList.push_back(s);
224 } 217 }
225 } 218 }
226 219
227 if (modelType == CSVFormatDialog::OneDimensionalModel) { 220 if (modelType == CSVFormat::OneDimensionalModel) {
228 221
229 SparseOneDimensionalModel::Point point 222 SparseOneDimensionalModel::Point point
230 (frameNo, 223 (frameNo,
231 tidyList.size() > 0 ? tidyList[tidyList.size()-1] : 224 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
232 QString("%1").arg(lineno)); 225 QString("%1").arg(lineno));
233 226
234 model1->addPoint(point); 227 model1->addPoint(point);
235 228
236 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) { 229 } else if (modelType == CSVFormat::TwoDimensionalModel) {
237 230
238 SparseTimeValueModel::Point point 231 SparseTimeValueModel::Point point
239 (frameNo, 232 (frameNo,
240 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0, 233 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
241 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno)); 234 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno));
242 235
243 model2->addPoint(point); 236 model2->addPoint(point);
244 237
245 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) { 238 } else if (modelType == CSVFormat::ThreeDimensionalModel) {
246 239
247 DenseThreeDimensionalModel::Column values; 240 DenseThreeDimensionalModel::Column values;
248 241
249 for (int i = 0; i < tidyList.size(); ++i) { 242 for (int i = 0; i < tidyList.size(); ++i) {
250 243
275 268
276 model3->setColumn(frameNo / model3->getResolution(), values); 269 model3->setColumn(frameNo / model3->getResolution(), values);
277 } 270 }
278 271
279 ++lineno; 272 ++lineno;
280 if (timingType == CSVFormatDialog::ImplicitTiming || 273 if (timingType == CSVFormat::ImplicitTiming ||
281 list.size() == 0) { 274 list.size() == 0) {
282 frameNo += windowSize; 275 frameNo += windowSize;
283 } 276 }
284 } 277 }
285 } 278 }
286 279
287 if (modelType == CSVFormatDialog::ThreeDimensionalModel) { 280 if (modelType == CSVFormat::ThreeDimensionalModel) {
288 model3->setMinimumLevel(min); 281 model3->setMinimumLevel(min);
289 model3->setMaximumLevel(max); 282 model3->setMaximumLevel(max);
290 } 283 }
291 284
292 return model; 285 return model;
293 } 286 }
294 287
295
296 CSVFormatDialog::CSVFormatDialog(QWidget *parent, QFile *file,
297 size_t defaultSampleRate) :
298 QDialog(parent),
299 m_modelType(OneDimensionalModel),
300 m_timingType(ExplicitTiming),
301 m_timeUnits(TimeAudioFrames),
302 m_separator(""),
303 m_behaviour(QString::KeepEmptyParts)
304 {
305 setModal(true);
306 setWindowTitle(tr("Select Data Format"));
307
308 (void)guessFormat(file);
309
310 QGridLayout *layout = new QGridLayout;
311
312 layout->addWidget(new QLabel(tr("<b>Select Data Format</b><p>Please select the correct data format for this file.")),
313 0, 0, 1, 4);
314
315 layout->addWidget(new QLabel(tr("Each row specifies:")), 1, 0);
316
317 m_modelTypeCombo = new QComboBox;
318 m_modelTypeCombo->addItem(tr("A point in time"));
319 m_modelTypeCombo->addItem(tr("A value at a time"));
320 m_modelTypeCombo->addItem(tr("A set of values"));
321 layout->addWidget(m_modelTypeCombo, 1, 1, 1, 2);
322 connect(m_modelTypeCombo, SIGNAL(activated(int)),
323 this, SLOT(modelTypeChanged(int)));
324 m_modelTypeCombo->setCurrentIndex(int(m_modelType));
325
326 layout->addWidget(new QLabel(tr("The first column contains:")), 2, 0);
327
328 m_timingTypeCombo = new QComboBox;
329 m_timingTypeCombo->addItem(tr("Time, in seconds"));
330 m_timingTypeCombo->addItem(tr("Time, in audio sample frames"));
331 m_timingTypeCombo->addItem(tr("Data (rows are consecutive in time)"));
332 layout->addWidget(m_timingTypeCombo, 2, 1, 1, 2);
333 connect(m_timingTypeCombo, SIGNAL(activated(int)),
334 this, SLOT(timingTypeChanged(int)));
335 m_timingTypeCombo->setCurrentIndex(m_timingType == ExplicitTiming ?
336 m_timeUnits == TimeSeconds ? 0 : 1 : 2);
337
338 m_sampleRateLabel = new QLabel(tr("Audio sample rate (Hz):"));
339 layout->addWidget(m_sampleRateLabel, 3, 0);
340
341 size_t sampleRates[] = {
342 8000, 11025, 12000, 22050, 24000, 32000,
343 44100, 48000, 88200, 96000, 176400, 192000
344 };
345
346 m_sampleRateCombo = new QComboBox;
347 m_sampleRate = defaultSampleRate;
348 for (size_t i = 0; i < sizeof(sampleRates) / sizeof(sampleRates[0]); ++i) {
349 m_sampleRateCombo->addItem(QString("%1").arg(sampleRates[i]));
350 if (sampleRates[i] == m_sampleRate) m_sampleRateCombo->setCurrentIndex(i);
351 }
352 m_sampleRateCombo->setEditable(true);
353
354 layout->addWidget(m_sampleRateCombo, 3, 1);
355 connect(m_sampleRateCombo, SIGNAL(activated(QString)),
356 this, SLOT(sampleRateChanged(QString)));
357 connect(m_sampleRateCombo, SIGNAL(editTextChanged(QString)),
358 this, SLOT(sampleRateChanged(QString)));
359
360 m_windowSizeLabel = new QLabel(tr("Frame increment between rows:"));
361 layout->addWidget(m_windowSizeLabel, 4, 0);
362
363 m_windowSizeCombo = new QComboBox;
364 m_windowSize = 1024;
365 for (int i = 0; i <= 16; ++i) {
366 int value = 1 << i;
367 m_windowSizeCombo->addItem(QString("%1").arg(value));
368 if (value == int(m_windowSize)) m_windowSizeCombo->setCurrentIndex(i);
369 }
370 m_windowSizeCombo->setEditable(true);
371
372 layout->addWidget(m_windowSizeCombo, 4, 1);
373 connect(m_windowSizeCombo, SIGNAL(activated(QString)),
374 this, SLOT(windowSizeChanged(QString)));
375 connect(m_windowSizeCombo, SIGNAL(editTextChanged(QString)),
376 this, SLOT(windowSizeChanged(QString)));
377
378 layout->addWidget(new QLabel(tr("\nExample data from file:")), 5, 0, 1, 4);
379
380 m_exampleWidget = new QTableWidget
381 (std::min(10, m_example.size()), m_maxExampleCols);
382
383 layout->addWidget(m_exampleWidget, 6, 0, 1, 4);
384 layout->setColumnStretch(3, 10);
385 layout->setRowStretch(4, 10);
386
387 QPushButton *ok = new QPushButton(tr("OK"));
388 connect(ok, SIGNAL(clicked()), this, SLOT(accept()));
389 ok->setDefault(true);
390
391 QPushButton *cancel = new QPushButton(tr("Cancel"));
392 connect(cancel, SIGNAL(clicked()), this, SLOT(reject()));
393
394 QHBoxLayout *buttonLayout = new QHBoxLayout;
395 buttonLayout->addStretch(1);
396 buttonLayout->addWidget(ok);
397 buttonLayout->addWidget(cancel);
398
399 QVBoxLayout *mainLayout = new QVBoxLayout;
400 mainLayout->addLayout(layout);
401 mainLayout->addLayout(buttonLayout);
402
403 setLayout(mainLayout);
404
405 timingTypeChanged(m_timingTypeCombo->currentIndex());
406 }
407
408 CSVFormatDialog::~CSVFormatDialog()
409 {
410 }
411
412 void
413 CSVFormatDialog::populateExample()
414 {
415 m_exampleWidget->setColumnCount
416 (m_timingType == ExplicitTiming ?
417 m_maxExampleCols - 1 : m_maxExampleCols);
418
419 m_exampleWidget->setHorizontalHeaderLabels(QStringList());
420
421 for (int i = 0; i < m_example.size(); ++i) {
422 for (int j = 0; j < m_example[i].size(); ++j) {
423
424 QTableWidgetItem *item = new QTableWidgetItem(m_example[i][j]);
425
426 if (j == 0) {
427 if (m_timingType == ExplicitTiming) {
428 m_exampleWidget->setVerticalHeaderItem(i, item);
429 continue;
430 } else {
431 QTableWidgetItem *header =
432 new QTableWidgetItem(QString("%1").arg(i));
433 header->setFlags(Qt::ItemIsEnabled);
434 m_exampleWidget->setVerticalHeaderItem(i, header);
435 }
436 }
437 int index = j;
438 if (m_timingType == ExplicitTiming) --index;
439 item->setFlags(Qt::ItemIsEnabled);
440 m_exampleWidget->setItem(i, index, item);
441 }
442 }
443 }
444
445 void
446 CSVFormatDialog::modelTypeChanged(int type)
447 {
448 m_modelType = (ModelType)type;
449
450 if (m_modelType == ThreeDimensionalModel) {
451 // We can't load 3d models with explicit timing, because the 3d
452 // model is dense so we need a fixed sample increment
453 m_timingTypeCombo->setCurrentIndex(2);
454 timingTypeChanged(2);
455 }
456 }
457
458 void
459 CSVFormatDialog::timingTypeChanged(int type)
460 {
461 switch (type) {
462
463 case 0:
464 m_timingType = ExplicitTiming;
465 m_timeUnits = TimeSeconds;
466 m_sampleRateCombo->setEnabled(false);
467 m_sampleRateLabel->setEnabled(false);
468 m_windowSizeCombo->setEnabled(false);
469 m_windowSizeLabel->setEnabled(false);
470 if (m_modelType == ThreeDimensionalModel) {
471 m_modelTypeCombo->setCurrentIndex(1);
472 modelTypeChanged(1);
473 }
474 break;
475
476 case 1:
477 m_timingType = ExplicitTiming;
478 m_timeUnits = TimeAudioFrames;
479 m_sampleRateCombo->setEnabled(true);
480 m_sampleRateLabel->setEnabled(true);
481 m_windowSizeCombo->setEnabled(false);
482 m_windowSizeLabel->setEnabled(false);
483 if (m_modelType == ThreeDimensionalModel) {
484 m_modelTypeCombo->setCurrentIndex(1);
485 modelTypeChanged(1);
486 }
487 break;
488
489 case 2:
490 m_timingType = ImplicitTiming;
491 m_timeUnits = TimeWindows;
492 m_sampleRateCombo->setEnabled(true);
493 m_sampleRateLabel->setEnabled(true);
494 m_windowSizeCombo->setEnabled(true);
495 m_windowSizeLabel->setEnabled(true);
496 break;
497 }
498
499 populateExample();
500 }
501
502 void
503 CSVFormatDialog::sampleRateChanged(QString rateString)
504 {
505 bool ok = false;
506 int sampleRate = rateString.toInt(&ok);
507 if (ok) m_sampleRate = sampleRate;
508 }
509
510 void
511 CSVFormatDialog::windowSizeChanged(QString sizeString)
512 {
513 bool ok = false;
514 int size = sizeString.toInt(&ok);
515 if (ok) m_windowSize = size;
516 }
517
518 bool
519 CSVFormatDialog::guessFormat(QFile *file)
520 {
521 QTextStream in(file);
522 in.seek(0);
523
524 unsigned int lineno = 0;
525
526 bool nonIncreasingPrimaries = false;
527 bool nonNumericPrimaries = false;
528 bool floatPrimaries = false;
529 bool variableItemCount = false;
530 int itemCount = 1;
531 int earliestNonNumericItem = -1;
532
533 float prevPrimary = 0.0;
534
535 m_maxExampleCols = 0;
536
537 while (!in.atEnd()) {
538
539 // See comment about line endings in load() above
540
541 QString chunk = in.readLine();
542 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
543
544 for (size_t li = 0; li < lines.size(); ++li) {
545
546 QString line = lines[li];
547
548 if (line.startsWith("#")) continue;
549
550 m_behaviour = QString::KeepEmptyParts;
551
552 if (m_separator == "") {
553 //!!! to do: ask the user
554 if (line.split(",").size() >= 2) m_separator = ",";
555 else if (line.split("\t").size() >= 2) m_separator = "\t";
556 else if (line.split("|").size() >= 2) m_separator = "|";
557 else if (line.split("/").size() >= 2) m_separator = "/";
558 else if (line.split(":").size() >= 2) m_separator = ":";
559 else {
560 m_separator = " ";
561 m_behaviour = QString::SkipEmptyParts;
562 }
563 }
564
565 QStringList list = line.split(m_separator, m_behaviour);
566 QStringList tidyList;
567
568 for (int i = 0; i < list.size(); ++i) {
569
570 QString s(list[i]);
571 bool numeric = false;
572
573 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
574 s = s.mid(1, s.length() - 2);
575 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
576 s = s.mid(1, s.length() - 2);
577 } else {
578 (void)s.toFloat(&numeric);
579 }
580
581 tidyList.push_back(s);
582
583 if (lineno == 0 || (list.size() < itemCount)) {
584 itemCount = list.size();
585 } else {
586 if (itemCount != list.size()) {
587 variableItemCount = true;
588 }
589 }
590
591 if (i == 0) { // primary
592
593 if (numeric) {
594
595 float primary = s.toFloat();
596
597 if (lineno > 0 && primary <= prevPrimary) {
598 nonIncreasingPrimaries = true;
599 }
600
601 if (s.contains(".") || s.contains(",")) {
602 floatPrimaries = true;
603 }
604
605 prevPrimary = primary;
606
607 } else {
608 nonNumericPrimaries = true;
609 }
610 } else { // secondary
611
612 if (!numeric) {
613 if (earliestNonNumericItem < 0 ||
614 i < earliestNonNumericItem) {
615 earliestNonNumericItem = i;
616 }
617 }
618 }
619 }
620
621 if (lineno < 10) {
622 m_example.push_back(tidyList);
623 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
624 m_maxExampleCols = tidyList.size();
625 }
626 }
627
628 ++lineno;
629
630 if (lineno == 50) break;
631 }
632 }
633
634 if (nonNumericPrimaries || nonIncreasingPrimaries) {
635
636 // Primaries are probably not a series of times
637
638 m_timingType = ImplicitTiming;
639 m_timeUnits = TimeWindows;
640
641 if (nonNumericPrimaries) {
642 m_modelType = OneDimensionalModel;
643 } else if (itemCount == 1 || variableItemCount ||
644 (earliestNonNumericItem != -1)) {
645 m_modelType = TwoDimensionalModel;
646 } else {
647 m_modelType = ThreeDimensionalModel;
648 }
649
650 } else {
651
652 // Increasing numeric primaries -- likely to be time
653
654 m_timingType = ExplicitTiming;
655
656 if (floatPrimaries) {
657 m_timeUnits = TimeSeconds;
658 } else {
659 m_timeUnits = TimeAudioFrames;
660 }
661
662 if (itemCount == 1) {
663 m_modelType = OneDimensionalModel;
664 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
665 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
666 m_modelType = OneDimensionalModel;
667 } else {
668 m_modelType = TwoDimensionalModel;
669 }
670 } else {
671 m_modelType = ThreeDimensionalModel;
672 }
673 }
674
675 std::cerr << "Estimated model type: " << m_modelType << std::endl;
676 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
677 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
678
679 in.seek(0);
680 return true;
681 }