comparison data/fileio/CSVFileReader.cpp @ 0:fc9323a41f5a

start base : Sonic Visualiser sv1-1.0rc1
author lbajardsilogic
date Fri, 11 May 2007 09:08:14 +0000
parents
children 6a31322cd9ed
comparison
equal deleted inserted replaced
-1:000000000000 0:fc9323a41f5a
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2006 Chris Cannam.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "CSVFileReader.h"
17
18 #include "model/Model.h"
19 #include "base/RealTime.h"
20 #include "model/SparseOneDimensionalModel.h"
21 #include "model/SparseTimeValueModel.h"
22 #include "model/EditableDenseThreeDimensionalModel.h"
23
24 #include <QFile>
25 #include <QString>
26 #include <QRegExp>
27 #include <QStringList>
28 #include <QTextStream>
29 #include <QFrame>
30 #include <QGridLayout>
31 #include <QPushButton>
32 #include <QHBoxLayout>
33 #include <QVBoxLayout>
34 #include <QTableWidget>
35 #include <QComboBox>
36 #include <QLabel>
37
38 #include <iostream>
39
40 CSVFileReader::CSVFileReader(QString path, size_t mainModelSampleRate) :
41 m_file(0),
42 m_mainModelSampleRate(mainModelSampleRate)
43 {
44 m_file = new QFile(path);
45 bool good = false;
46
47 if (!m_file->exists()) {
48 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
49 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
50 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
51 } else {
52 good = true;
53 }
54
55 if (!good) {
56 delete m_file;
57 m_file = 0;
58 }
59 }
60
61 CSVFileReader::~CSVFileReader()
62 {
63 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
64
65 if (m_file) {
66 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
67 m_file->close();
68 }
69 delete m_file;
70 }
71
72 bool
73 CSVFileReader::isOK() const
74 {
75 return (m_file != 0);
76 }
77
78 QString
79 CSVFileReader::getError() const
80 {
81 return m_error;
82 }
83
84 Model *
85 CSVFileReader::load() const
86 {
87 if (!m_file) return 0;
88
89 CSVFormatDialog *dialog = new CSVFormatDialog
90 (0, m_file, m_mainModelSampleRate);
91
92 if (dialog->exec() == QDialog::Rejected) {
93 delete dialog;
94 return 0;
95 }
96
97 CSVFormatDialog::ModelType modelType = dialog->getModelType();
98 CSVFormatDialog::TimingType timingType = dialog->getTimingType();
99 CSVFormatDialog::TimeUnits timeUnits = dialog->getTimeUnits();
100 QString separator = dialog->getSeparator();
101 size_t sampleRate = dialog->getSampleRate();
102 size_t windowSize = dialog->getWindowSize();
103
104 delete dialog;
105
106 if (timingType == CSVFormatDialog::ExplicitTiming) {
107 windowSize = 1;
108 if (timeUnits == CSVFormatDialog::TimeSeconds) {
109 sampleRate = m_mainModelSampleRate;
110 }
111 }
112
113 SparseOneDimensionalModel *model1 = 0;
114 SparseTimeValueModel *model2 = 0;
115 EditableDenseThreeDimensionalModel *model3 = 0;
116 Model *model = 0;
117
118 QTextStream in(m_file);
119 in.seek(0);
120
121 unsigned int warnings = 0, warnLimit = 10;
122 unsigned int lineno = 0;
123
124 float min = 0.0, max = 0.0;
125
126 size_t frameNo = 0;
127
128 while (!in.atEnd()) {
129
130 QString line = in.readLine().trimmed();
131 if (line.startsWith("#") || line.trimmed() == "") continue;
132
133 QStringList list = line.split(separator);
134
135 if (!model) {
136
137 switch (modelType) {
138
139 case CSVFormatDialog::OneDimensionalModel:
140 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
141 model = model1;
142 break;
143
144 case CSVFormatDialog::TwoDimensionalModel:
145 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
146 model = model2;
147 break;
148
149 case CSVFormatDialog::ThreeDimensionalModel:
150 model3 = new EditableDenseThreeDimensionalModel(sampleRate,
151 windowSize,
152 list.size());
153 model = model3;
154 break;
155 }
156 }
157
158 QStringList tidyList;
159 QRegExp nonNumericRx("[^0-9.,+-]");
160
161 for (int i = 0; i < list.size(); ++i) {
162
163 QString s(list[i].trimmed());
164
165 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
166 s = s.mid(1, s.length() - 2);
167 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
168 s = s.mid(1, s.length() - 2);
169 }
170
171 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) {
172
173 bool ok = false;
174 QString numeric = s;
175 numeric.remove(nonNumericRx);
176
177 if (timeUnits == CSVFormatDialog::TimeSeconds) {
178
179 double time = numeric.toDouble(&ok);
180 frameNo = int(time * sampleRate + 0.00001);
181
182 } else {
183
184 frameNo = numeric.toInt(&ok);
185
186 if (timeUnits == CSVFormatDialog::TimeWindows) {
187 frameNo *= windowSize;
188 }
189 }
190
191 if (!ok) {
192 if (warnings < warnLimit) {
193 std::cerr << "WARNING: CSVFileReader::load: "
194 << "Bad time format (\"" << s.toStdString()
195 << "\") in data line "
196 << lineno << ":" << std::endl;
197 std::cerr << line.toStdString() << std::endl;
198 } else if (warnings == warnLimit) {
199 std::cerr << "WARNING: Too many warnings" << std::endl;
200 }
201 ++warnings;
202 }
203 } else {
204 tidyList.push_back(s);
205 }
206 }
207
208 if (modelType == CSVFormatDialog::OneDimensionalModel) {
209
210 SparseOneDimensionalModel::Point point
211 (frameNo,
212 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
213 QString("%1").arg(lineno));
214
215 model1->addPoint(point);
216
217 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) {
218
219 SparseTimeValueModel::Point point
220 (frameNo,
221 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
222 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno));
223
224 model2->addPoint(point);
225
226 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
227
228 DenseThreeDimensionalModel::Column values;
229
230 for (int i = 0; i < tidyList.size(); ++i) {
231
232 bool ok = false;
233 float value = list[i].toFloat(&ok);
234 values.push_back(value);
235
236 if ((lineno == 0 && i == 0) || value < min) min = value;
237 if ((lineno == 0 && i == 0) || value > max) max = value;
238
239 if (!ok) {
240 if (warnings < warnLimit) {
241 std::cerr << "WARNING: CSVFileReader::load: "
242 << "Non-numeric value in data line " << lineno
243 << ":" << std::endl;
244 std::cerr << line.toStdString() << std::endl;
245 ++warnings;
246 } else if (warnings == warnLimit) {
247 std::cerr << "WARNING: Too many warnings" << std::endl;
248 }
249 }
250 }
251
252 std::cerr << "Setting bin values for count " << lineno << ", frame "
253 << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
254
255 model3->setColumn(frameNo / model3->getResolution(), values);
256 }
257
258 ++lineno;
259 if (timingType == CSVFormatDialog::ImplicitTiming ||
260 list.size() == 0) {
261 frameNo += windowSize;
262 }
263 }
264
265 if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
266 model3->setMinimumLevel(min);
267 model3->setMaximumLevel(max);
268 }
269
270 return model;
271 }
272
273
274 CSVFormatDialog::CSVFormatDialog(QWidget *parent, QFile *file,
275 size_t defaultSampleRate) :
276 QDialog(parent),
277 m_modelType(OneDimensionalModel),
278 m_timingType(ExplicitTiming),
279 m_timeUnits(TimeAudioFrames),
280 m_separator("")
281 {
282 setModal(true);
283 setWindowTitle(tr("Select Data Format"));
284
285 (void)guessFormat(file);
286
287 QGridLayout *layout = new QGridLayout;
288
289 layout->addWidget(new QLabel(tr("\nPlease select the correct data format for this file.\n")),
290 0, 0, 1, 4);
291
292 layout->addWidget(new QLabel(tr("Each row specifies:")), 1, 0);
293
294 m_modelTypeCombo = new QComboBox;
295 m_modelTypeCombo->addItem(tr("A point in time"));
296 m_modelTypeCombo->addItem(tr("A value at a time"));
297 m_modelTypeCombo->addItem(tr("A set of values"));
298 layout->addWidget(m_modelTypeCombo, 1, 1, 1, 2);
299 connect(m_modelTypeCombo, SIGNAL(activated(int)),
300 this, SLOT(modelTypeChanged(int)));
301 m_modelTypeCombo->setCurrentIndex(int(m_modelType));
302
303 layout->addWidget(new QLabel(tr("The first column contains:")), 2, 0);
304
305 m_timingTypeCombo = new QComboBox;
306 m_timingTypeCombo->addItem(tr("Time, in seconds"));
307 m_timingTypeCombo->addItem(tr("Time, in audio sample frames"));
308 m_timingTypeCombo->addItem(tr("Data (rows are consecutive in time)"));
309 layout->addWidget(m_timingTypeCombo, 2, 1, 1, 2);
310 connect(m_timingTypeCombo, SIGNAL(activated(int)),
311 this, SLOT(timingTypeChanged(int)));
312 m_timingTypeCombo->setCurrentIndex(m_timingType == ExplicitTiming ?
313 m_timeUnits == TimeSeconds ? 0 : 1 : 2);
314
315 m_sampleRateLabel = new QLabel(tr("Audio sample rate (Hz):"));
316 layout->addWidget(m_sampleRateLabel, 3, 0);
317
318 size_t sampleRates[] = {
319 8000, 11025, 12000, 22050, 24000, 32000,
320 44100, 48000, 88200, 96000, 176400, 192000
321 };
322
323 m_sampleRateCombo = new QComboBox;
324 m_sampleRate = defaultSampleRate;
325 for (size_t i = 0; i < sizeof(sampleRates) / sizeof(sampleRates[0]); ++i) {
326 m_sampleRateCombo->addItem(QString("%1").arg(sampleRates[i]));
327 if (sampleRates[i] == m_sampleRate) m_sampleRateCombo->setCurrentIndex(i);
328 }
329 m_sampleRateCombo->setEditable(true);
330
331 layout->addWidget(m_sampleRateCombo, 3, 1);
332 connect(m_sampleRateCombo, SIGNAL(activated(QString)),
333 this, SLOT(sampleRateChanged(QString)));
334 connect(m_sampleRateCombo, SIGNAL(editTextChanged(QString)),
335 this, SLOT(sampleRateChanged(QString)));
336
337 m_windowSizeLabel = new QLabel(tr("Frame increment between rows:"));
338 layout->addWidget(m_windowSizeLabel, 4, 0);
339
340 m_windowSizeCombo = new QComboBox;
341 m_windowSize = 1024;
342 for (int i = 0; i <= 16; ++i) {
343 int value = 1 << i;
344 m_windowSizeCombo->addItem(QString("%1").arg(value));
345 if (value == int(m_windowSize)) m_windowSizeCombo->setCurrentIndex(i);
346 }
347 m_windowSizeCombo->setEditable(true);
348
349 layout->addWidget(m_windowSizeCombo, 4, 1);
350 connect(m_windowSizeCombo, SIGNAL(activated(QString)),
351 this, SLOT(windowSizeChanged(QString)));
352 connect(m_windowSizeCombo, SIGNAL(editTextChanged(QString)),
353 this, SLOT(windowSizeChanged(QString)));
354
355 layout->addWidget(new QLabel(tr("\nExample data from file:")), 5, 0, 1, 4);
356
357 m_exampleWidget = new QTableWidget
358 (min(10, m_example.size()), m_maxExampleCols);
359
360 layout->addWidget(m_exampleWidget, 6, 0, 1, 4);
361 layout->setColumnStretch(3, 10);
362 layout->setRowStretch(4, 10);
363
364 QPushButton *ok = new QPushButton(tr("OK"));
365 connect(ok, SIGNAL(clicked()), this, SLOT(accept()));
366 ok->setDefault(true);
367
368 QPushButton *cancel = new QPushButton(tr("Cancel"));
369 connect(cancel, SIGNAL(clicked()), this, SLOT(reject()));
370
371 QHBoxLayout *buttonLayout = new QHBoxLayout;
372 buttonLayout->addStretch(1);
373 buttonLayout->addWidget(ok);
374 buttonLayout->addWidget(cancel);
375
376 QVBoxLayout *mainLayout = new QVBoxLayout;
377 mainLayout->addLayout(layout);
378 mainLayout->addLayout(buttonLayout);
379
380 setLayout(mainLayout);
381
382 timingTypeChanged(m_timingTypeCombo->currentIndex());
383 }
384
385 CSVFormatDialog::~CSVFormatDialog()
386 {
387 }
388
389 void
390 CSVFormatDialog::populateExample()
391 {
392 m_exampleWidget->setColumnCount
393 (m_timingType == ExplicitTiming ?
394 m_maxExampleCols - 1 : m_maxExampleCols);
395
396 m_exampleWidget->setHorizontalHeaderLabels(QStringList());
397
398 for (int i = 0; i < m_example.size(); ++i) {
399 for (int j = 0; j < m_example[i].size(); ++j) {
400
401 QTableWidgetItem *item = new QTableWidgetItem(m_example[i][j]);
402
403 if (j == 0) {
404 if (m_timingType == ExplicitTiming) {
405 m_exampleWidget->setVerticalHeaderItem(i, item);
406 continue;
407 } else {
408 QTableWidgetItem *header =
409 new QTableWidgetItem(QString("%1").arg(i));
410 header->setFlags(Qt::ItemIsEnabled);
411 m_exampleWidget->setVerticalHeaderItem(i, header);
412 }
413 }
414 int index = j;
415 if (m_timingType == ExplicitTiming) --index;
416 item->setFlags(Qt::ItemIsEnabled);
417 m_exampleWidget->setItem(i, index, item);
418 }
419 }
420 }
421
422 void
423 CSVFormatDialog::modelTypeChanged(int type)
424 {
425 m_modelType = (ModelType)type;
426
427 if (m_modelType == ThreeDimensionalModel) {
428 // We can't load 3d models with explicit timing, because the 3d
429 // model is dense so we need a fixed sample increment
430 m_timingTypeCombo->setCurrentIndex(2);
431 timingTypeChanged(2);
432 }
433 }
434
435 void
436 CSVFormatDialog::timingTypeChanged(int type)
437 {
438 switch (type) {
439
440 case 0:
441 m_timingType = ExplicitTiming;
442 m_timeUnits = TimeSeconds;
443 m_sampleRateCombo->setEnabled(false);
444 m_sampleRateLabel->setEnabled(false);
445 m_windowSizeCombo->setEnabled(false);
446 m_windowSizeLabel->setEnabled(false);
447 if (m_modelType == ThreeDimensionalModel) {
448 m_modelTypeCombo->setCurrentIndex(1);
449 modelTypeChanged(1);
450 }
451 break;
452
453 case 1:
454 m_timingType = ExplicitTiming;
455 m_timeUnits = TimeAudioFrames;
456 m_sampleRateCombo->setEnabled(true);
457 m_sampleRateLabel->setEnabled(true);
458 m_windowSizeCombo->setEnabled(false);
459 m_windowSizeLabel->setEnabled(false);
460 if (m_modelType == ThreeDimensionalModel) {
461 m_modelTypeCombo->setCurrentIndex(1);
462 modelTypeChanged(1);
463 }
464 break;
465
466 case 2:
467 m_timingType = ImplicitTiming;
468 m_timeUnits = TimeWindows;
469 m_sampleRateCombo->setEnabled(true);
470 m_sampleRateLabel->setEnabled(true);
471 m_windowSizeCombo->setEnabled(true);
472 m_windowSizeLabel->setEnabled(true);
473 break;
474 }
475
476 populateExample();
477 }
478
479 void
480 CSVFormatDialog::sampleRateChanged(QString rateString)
481 {
482 bool ok = false;
483 int sampleRate = rateString.toInt(&ok);
484 if (ok) m_sampleRate = sampleRate;
485 }
486
487 void
488 CSVFormatDialog::windowSizeChanged(QString sizeString)
489 {
490 bool ok = false;
491 int size = sizeString.toInt(&ok);
492 if (ok) m_windowSize = size;
493 }
494
495 bool
496 CSVFormatDialog::guessFormat(QFile *file)
497 {
498 QTextStream in(file);
499 in.seek(0);
500
501 unsigned int lineno = 0;
502
503 bool nonIncreasingPrimaries = false;
504 bool nonNumericPrimaries = false;
505 bool floatPrimaries = false;
506 bool variableItemCount = false;
507 int itemCount = 1;
508 int earliestNonNumericItem = -1;
509
510 float prevPrimary = 0.0;
511
512 m_maxExampleCols = 0;
513
514 while (!in.atEnd()) {
515
516 QString line = in.readLine().trimmed();
517 if (line.startsWith("#")) continue;
518
519 if (m_separator == "") {
520 //!!! to do: ask the user
521 if (line.split(",").size() >= 2) m_separator = ",";
522 else if (line.split("\t").size() >= 2) m_separator = "\t";
523 else if (line.split("|").size() >= 2) m_separator = "|";
524 else if (line.split("/").size() >= 2) m_separator = "/";
525 else if (line.split(":").size() >= 2) m_separator = ":";
526 else m_separator = " ";
527 }
528
529 QStringList list = line.split(m_separator);
530 QStringList tidyList;
531
532 for (int i = 0; i < list.size(); ++i) {
533
534 QString s(list[i]);
535 bool numeric = false;
536
537 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
538 s = s.mid(1, s.length() - 2);
539 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
540 s = s.mid(1, s.length() - 2);
541 } else {
542 (void)s.toFloat(&numeric);
543 }
544
545 tidyList.push_back(s);
546
547 if (lineno == 0 || (list.size() < itemCount)) {
548 itemCount = list.size();
549 } else {
550 if (itemCount != list.size()) {
551 variableItemCount = true;
552 }
553 }
554
555 if (i == 0) { // primary
556
557 if (numeric) {
558
559 float primary = s.toFloat();
560
561 if (lineno > 0 && primary <= prevPrimary) {
562 nonIncreasingPrimaries = true;
563 }
564
565 if (s.contains(".") || s.contains(",")) {
566 floatPrimaries = true;
567 }
568
569 prevPrimary = primary;
570
571 } else {
572 nonNumericPrimaries = true;
573 }
574 } else { // secondary
575
576 if (!numeric) {
577 if (earliestNonNumericItem < 0 ||
578 i < earliestNonNumericItem) {
579 earliestNonNumericItem = i;
580 }
581 }
582 }
583 }
584
585 if (lineno < 10) {
586 m_example.push_back(tidyList);
587 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
588 m_maxExampleCols = tidyList.size();
589 }
590 }
591
592 ++lineno;
593
594 if (lineno == 50) break;
595 }
596
597 if (nonNumericPrimaries || nonIncreasingPrimaries) {
598
599 // Primaries are probably not a series of times
600
601 m_timingType = ImplicitTiming;
602 m_timeUnits = TimeWindows;
603
604 if (nonNumericPrimaries) {
605 m_modelType = OneDimensionalModel;
606 } else if (itemCount == 1 || variableItemCount ||
607 (earliestNonNumericItem != -1)) {
608 m_modelType = TwoDimensionalModel;
609 } else {
610 m_modelType = ThreeDimensionalModel;
611 }
612
613 } else {
614
615 // Increasing numeric primaries -- likely to be time
616
617 m_timingType = ExplicitTiming;
618
619 if (floatPrimaries) {
620 m_timeUnits = TimeSeconds;
621 } else {
622 m_timeUnits = TimeAudioFrames;
623 }
624
625 if (itemCount == 1) {
626 m_modelType = OneDimensionalModel;
627 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
628 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
629 m_modelType = OneDimensionalModel;
630 } else {
631 m_modelType = TwoDimensionalModel;
632 }
633 } else {
634 m_modelType = ThreeDimensionalModel;
635 }
636 }
637
638 std::cerr << "Estimated model type: " << m_modelType << std::endl;
639 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
640 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
641
642 in.seek(0);
643 return true;
644 }