diff data/fileio/CSVFileReader.cpp @ 148:1a42221a1522

* Reorganising code base. This revision will not compile.
author Chris Cannam
date Mon, 31 Jul 2006 11:49:58 +0000
parents
children 4b2ea82fd0ed
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/CSVFileReader.cpp	Mon Jul 31 11:49:58 2006 +0000
@@ -0,0 +1,645 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    This file copyright 2006 Chris Cannam.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#include "CSVFileReader.h"
+
+#include "base/Model.h"
+#include "base/RealTime.h"
+#include "model/SparseOneDimensionalModel.h"
+#include "model/SparseTimeValueModel.h"
+#include "model/DenseThreeDimensionalModel.h"
+
+#include <QFile>
+#include <QString>
+#include <QRegExp>
+#include <QStringList>
+#include <QTextStream>
+#include <QFrame>
+#include <QGridLayout>
+#include <QPushButton>
+#include <QHBoxLayout>
+#include <QVBoxLayout>
+#include <QTableWidget>
+#include <QComboBox>
+#include <QLabel>
+
+#include <iostream>
+
+CSVFileReader::CSVFileReader(QString path, size_t mainModelSampleRate) :
+    m_file(0),
+    m_mainModelSampleRate(mainModelSampleRate)
+{
+    m_file = new QFile(path);
+    bool good = false;
+    
+    if (!m_file->exists()) {
+	m_error = QFile::tr("File \"%1\" does not exist").arg(path);
+    } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
+	m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
+    } else {
+	good = true;
+    }
+
+    if (!good) {
+	delete m_file;
+	m_file = 0;
+    }
+}
+
+CSVFileReader::~CSVFileReader()
+{
+    std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
+
+    if (m_file) {
+        std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
+        m_file->close();
+    }
+    delete m_file;
+}
+
+bool
+CSVFileReader::isOK() const
+{
+    return (m_file != 0);
+}
+
+QString
+CSVFileReader::getError() const
+{
+    return m_error;
+}
+
+Model *
+CSVFileReader::load() const
+{
+    if (!m_file) return 0;
+
+    CSVFormatDialog *dialog = new CSVFormatDialog
+	(0, m_file, m_mainModelSampleRate);
+
+    if (dialog->exec() == QDialog::Rejected) {
+	delete dialog;
+	return 0;
+    }
+
+    CSVFormatDialog::ModelType   modelType = dialog->getModelType();
+    CSVFormatDialog::TimingType timingType = dialog->getTimingType();
+    CSVFormatDialog::TimeUnits   timeUnits = dialog->getTimeUnits();
+    QString separator = dialog->getSeparator();
+    size_t sampleRate = dialog->getSampleRate();
+    size_t windowSize = dialog->getWindowSize();
+
+    delete dialog;
+
+    if (timingType == CSVFormatDialog::ExplicitTiming) {
+	windowSize = 1;
+	if (timeUnits == CSVFormatDialog::TimeSeconds) {
+	    sampleRate = m_mainModelSampleRate;
+	}
+    }
+
+    SparseOneDimensionalModel *model1 = 0;
+    SparseTimeValueModel *model2 = 0;
+    DenseThreeDimensionalModel *model3 = 0;
+    Model *model = 0;
+
+    QTextStream in(m_file);
+    in.seek(0);
+
+    unsigned int warnings = 0, warnLimit = 10;
+    unsigned int lineno = 0;
+
+    float min = 0.0, max = 0.0;
+
+    size_t frameNo = 0;
+
+    while (!in.atEnd()) {
+
+	QString line = in.readLine().trimmed();
+	if (line.startsWith("#")) continue;
+
+	QStringList list = line.split(separator);
+
+	if (!model) {
+
+	    switch (modelType) {
+
+	    case CSVFormatDialog::OneDimensionalModel:
+		model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
+		model = model1;
+		break;
+		
+	    case CSVFormatDialog::TwoDimensionalModel:
+		model2 = new SparseTimeValueModel(sampleRate, windowSize,
+						  0.0, 0.0,
+						  false);
+		model = model2;
+		break;
+		
+	    case CSVFormatDialog::ThreeDimensionalModel:
+		model3 = new DenseThreeDimensionalModel(sampleRate, windowSize,
+							list.size());
+		model = model3;
+		break;
+	    }
+	}
+
+	QStringList tidyList;
+        QRegExp nonNumericRx("[^0-9.,+-]");
+
+	for (int i = 0; i < list.size(); ++i) {
+	    
+	    QString s(list[i].trimmed());
+
+	    if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
+		s = s.mid(1, s.length() - 2);
+	    } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
+		s = s.mid(1, s.length() - 2);
+	    }
+
+	    if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) {
+
+		bool ok = false;
+                QString numeric = s;
+                numeric.remove(nonNumericRx);
+
+		if (timeUnits == CSVFormatDialog::TimeSeconds) {
+
+		    double time = numeric.toDouble(&ok);
+		    frameNo = int(time * sampleRate + 0.00001);
+
+		} else {
+
+		    frameNo = numeric.toInt(&ok);
+
+		    if (timeUnits == CSVFormatDialog::TimeWindows) {
+			frameNo *= windowSize;
+		    }
+		}
+			       
+		if (!ok) {
+		    if (warnings < warnLimit) {
+			std::cerr << "WARNING: CSVFileReader::load: "
+				  << "Bad time format (\"" << s.toStdString()
+				  << "\") in data line "
+				  << lineno << ":" << std::endl;
+			std::cerr << line.toStdString() << std::endl;
+		    } else if (warnings == warnLimit) {
+			std::cerr << "WARNING: Too many warnings" << std::endl;
+		    }
+                    ++warnings;
+		}
+	    } else {
+		tidyList.push_back(s);
+	    }
+	}
+
+	if (modelType == CSVFormatDialog::OneDimensionalModel) {
+	    
+	    SparseOneDimensionalModel::Point point
+		(frameNo,
+		 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
+		 QString("%1").arg(lineno));
+
+	    model1->addPoint(point);
+
+	} else if (modelType == CSVFormatDialog::TwoDimensionalModel) {
+
+	    SparseTimeValueModel::Point point
+		(frameNo,
+		 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
+		 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno));
+
+	    model2->addPoint(point);
+
+	} else if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
+
+	    DenseThreeDimensionalModel::BinValueSet values;
+
+	    for (int i = 0; i < tidyList.size(); ++i) {
+
+		bool ok = false;
+		float value = list[i].toFloat(&ok);
+		values.push_back(value);
+	    
+		if ((lineno == 0 && i == 0) || value < min) min = value;
+		if ((lineno == 0 && i == 0) || value > max) max = value;
+
+		if (!ok) {
+		    if (warnings < warnLimit) {
+			std::cerr << "WARNING: CSVFileReader::load: "
+				  << "Non-numeric value in data line " << lineno
+				  << ":" << std::endl;
+			std::cerr << line.toStdString() << std::endl;
+			++warnings;
+		    } else if (warnings == warnLimit) {
+			std::cerr << "WARNING: Too many warnings" << std::endl;
+		    }
+		}
+	    }
+	
+	    std::cerr << "Setting bin values for count " << lineno << ", frame "
+		      << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
+
+	    model3->setBinValues(frameNo, values);
+	}
+
+	++lineno;
+	if (timingType == CSVFormatDialog::ImplicitTiming ||
+	    list.size() == 0) {
+	    frameNo += windowSize;
+	}
+    }
+
+    if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
+	model3->setMinimumLevel(min);
+	model3->setMaximumLevel(max);
+    }
+
+    return model;
+}
+
+
+CSVFormatDialog::CSVFormatDialog(QWidget *parent, QFile *file,
+				 size_t defaultSampleRate) :
+    QDialog(parent),
+    m_modelType(OneDimensionalModel),
+    m_timingType(ExplicitTiming),
+    m_timeUnits(TimeAudioFrames),
+    m_separator("")
+{
+    setModal(true);
+    setWindowTitle(tr("Select Data Format"));
+
+    (void)guessFormat(file);
+
+    QGridLayout *layout = new QGridLayout;
+
+    layout->addWidget(new QLabel(tr("\nPlease select the correct data format for this file.\n")),
+		      0, 0, 1, 4);
+
+    layout->addWidget(new QLabel(tr("Each row specifies:")), 1, 0);
+
+    m_modelTypeCombo = new QComboBox;
+    m_modelTypeCombo->addItem(tr("A point in time"));
+    m_modelTypeCombo->addItem(tr("A value at a time"));
+    m_modelTypeCombo->addItem(tr("A set of values"));
+    layout->addWidget(m_modelTypeCombo, 1, 1, 1, 2);
+    connect(m_modelTypeCombo, SIGNAL(activated(int)),
+	    this, SLOT(modelTypeChanged(int)));
+    m_modelTypeCombo->setCurrentIndex(int(m_modelType));
+
+    layout->addWidget(new QLabel(tr("The first column contains:")), 2, 0);
+    
+    m_timingTypeCombo = new QComboBox;
+    m_timingTypeCombo->addItem(tr("Time, in seconds"));
+    m_timingTypeCombo->addItem(tr("Time, in audio sample frames"));
+    m_timingTypeCombo->addItem(tr("Data (rows are consecutive in time)"));
+    layout->addWidget(m_timingTypeCombo, 2, 1, 1, 2);
+    connect(m_timingTypeCombo, SIGNAL(activated(int)),
+	    this, SLOT(timingTypeChanged(int)));
+    m_timingTypeCombo->setCurrentIndex(m_timingType == ExplicitTiming ?
+                                       m_timeUnits == TimeSeconds ? 0 : 1 : 2);
+
+    m_sampleRateLabel = new QLabel(tr("Audio sample rate (Hz):"));
+    layout->addWidget(m_sampleRateLabel, 3, 0);
+    
+    size_t sampleRates[] = {
+	8000, 11025, 12000, 22050, 24000, 32000,
+	44100, 48000, 88200, 96000, 176400, 192000
+    };
+
+    m_sampleRateCombo = new QComboBox;
+    m_sampleRate = defaultSampleRate;
+    for (size_t i = 0; i < sizeof(sampleRates) / sizeof(sampleRates[0]); ++i) {
+	m_sampleRateCombo->addItem(QString("%1").arg(sampleRates[i]));
+	if (sampleRates[i] == m_sampleRate) m_sampleRateCombo->setCurrentIndex(i);
+    }
+    m_sampleRateCombo->setEditable(true);
+
+    layout->addWidget(m_sampleRateCombo, 3, 1);
+    connect(m_sampleRateCombo, SIGNAL(activated(QString)),
+	    this, SLOT(sampleRateChanged(QString)));
+    connect(m_sampleRateCombo, SIGNAL(editTextChanged(QString)),
+	    this, SLOT(sampleRateChanged(QString)));
+
+    m_windowSizeLabel = new QLabel(tr("Frame increment between rows:"));
+    layout->addWidget(m_windowSizeLabel, 4, 0);
+
+    m_windowSizeCombo = new QComboBox;
+    m_windowSize = 1024;
+    for (int i = 0; i <= 16; ++i) {
+	int value = 1 << i;
+	m_windowSizeCombo->addItem(QString("%1").arg(value));
+	if (value == m_windowSize) m_windowSizeCombo->setCurrentIndex(i);
+    }
+    m_windowSizeCombo->setEditable(true);
+
+    layout->addWidget(m_windowSizeCombo, 4, 1);
+    connect(m_windowSizeCombo, SIGNAL(activated(QString)),
+	    this, SLOT(windowSizeChanged(QString)));
+    connect(m_windowSizeCombo, SIGNAL(editTextChanged(QString)),
+	    this, SLOT(windowSizeChanged(QString)));
+
+    layout->addWidget(new QLabel(tr("\nExample data from file:")), 5, 0, 1, 4);
+
+    m_exampleWidget = new QTableWidget
+	(std::min(10, m_example.size()), m_maxExampleCols);
+
+    layout->addWidget(m_exampleWidget, 6, 0, 1, 4);
+    layout->setColumnStretch(3, 10);
+    layout->setRowStretch(4, 10);
+
+    QPushButton *ok = new QPushButton(tr("OK"));
+    connect(ok, SIGNAL(clicked()), this, SLOT(accept()));
+    ok->setDefault(true);
+
+    QPushButton *cancel = new QPushButton(tr("Cancel"));
+    connect(cancel, SIGNAL(clicked()), this, SLOT(reject()));
+
+    QHBoxLayout *buttonLayout = new QHBoxLayout;
+    buttonLayout->addStretch(1);
+    buttonLayout->addWidget(ok);
+    buttonLayout->addWidget(cancel);
+
+    QVBoxLayout *mainLayout = new QVBoxLayout;
+    mainLayout->addLayout(layout);
+    mainLayout->addLayout(buttonLayout);
+
+    setLayout(mainLayout);
+    
+    timingTypeChanged(m_timingTypeCombo->currentIndex());
+}
+
+CSVFormatDialog::~CSVFormatDialog()
+{
+}
+
+void
+CSVFormatDialog::populateExample()
+{
+    m_exampleWidget->setColumnCount
+	(m_timingType == ExplicitTiming ?
+	 m_maxExampleCols - 1 : m_maxExampleCols);
+
+    m_exampleWidget->setHorizontalHeaderLabels(QStringList());
+
+    for (int i = 0; i < m_example.size(); ++i) {
+	for (int j = 0; j < m_example[i].size(); ++j) {
+
+	    QTableWidgetItem *item = new QTableWidgetItem(m_example[i][j]);
+
+	    if (j == 0) {
+		if (m_timingType == ExplicitTiming) {
+		    m_exampleWidget->setVerticalHeaderItem(i, item);
+		    continue;
+		} else {
+		    QTableWidgetItem *header =
+			new QTableWidgetItem(QString("%1").arg(i));
+		    header->setFlags(Qt::ItemIsEnabled);
+		    m_exampleWidget->setVerticalHeaderItem(i, header);
+		}
+	    }
+	    int index = j;
+	    if (m_timingType == ExplicitTiming) --index;
+	    item->setFlags(Qt::ItemIsEnabled);
+	    m_exampleWidget->setItem(i, index, item);
+	}
+    }
+}
+
+void
+CSVFormatDialog::modelTypeChanged(int type)
+{
+    m_modelType = (ModelType)type;
+
+    if (m_modelType == ThreeDimensionalModel) {
+        // We can't load 3d models with explicit timing, because the 3d
+        // model is dense so we need a fixed sample increment
+        m_timingTypeCombo->setCurrentIndex(2);
+        timingTypeChanged(2);
+    }
+}
+
+void
+CSVFormatDialog::timingTypeChanged(int type)
+{
+    switch (type) {
+
+    case 0:
+	m_timingType = ExplicitTiming;
+	m_timeUnits = TimeSeconds;
+	m_sampleRateCombo->setEnabled(false);
+	m_sampleRateLabel->setEnabled(false);
+	m_windowSizeCombo->setEnabled(false);
+	m_windowSizeLabel->setEnabled(false);
+        if (m_modelType == ThreeDimensionalModel) {
+            m_modelTypeCombo->setCurrentIndex(1);
+            modelTypeChanged(1);
+        }
+	break;
+
+    case 1:
+	m_timingType = ExplicitTiming;
+	m_timeUnits = TimeAudioFrames;
+	m_sampleRateCombo->setEnabled(true);
+	m_sampleRateLabel->setEnabled(true);
+	m_windowSizeCombo->setEnabled(false);
+	m_windowSizeLabel->setEnabled(false);
+        if (m_modelType == ThreeDimensionalModel) {
+            m_modelTypeCombo->setCurrentIndex(1);
+            modelTypeChanged(1);
+        }
+	break;
+
+    case 2:
+	m_timingType = ImplicitTiming;
+	m_timeUnits = TimeWindows;
+	m_sampleRateCombo->setEnabled(true);
+	m_sampleRateLabel->setEnabled(true);
+	m_windowSizeCombo->setEnabled(true);
+	m_windowSizeLabel->setEnabled(true);
+	break;
+    }
+
+    populateExample();
+}
+
+void
+CSVFormatDialog::sampleRateChanged(QString rateString)
+{
+    bool ok = false;
+    int sampleRate = rateString.toInt(&ok);
+    if (ok) m_sampleRate = sampleRate;
+}
+
+void
+CSVFormatDialog::windowSizeChanged(QString sizeString)
+{
+    bool ok = false;
+    int size = sizeString.toInt(&ok);
+    if (ok) m_windowSize = size;
+}
+
+bool
+CSVFormatDialog::guessFormat(QFile *file)
+{
+    QTextStream in(file);
+    in.seek(0);
+
+    unsigned int lineno = 0;
+
+    bool nonIncreasingPrimaries = false;
+    bool nonNumericPrimaries = false;
+    bool floatPrimaries = false;
+    bool variableItemCount = false;
+    int itemCount = 1;
+    int earliestNonNumericItem = -1;
+
+    float prevPrimary = 0.0;
+
+    m_maxExampleCols = 0;
+
+    while (!in.atEnd()) {
+	
+	QString line = in.readLine().trimmed();
+	if (line.startsWith("#")) continue;
+
+	if (m_separator == "") {
+	    //!!! to do: ask the user
+	    if (line.split(",").size() >= 2) m_separator = ",";
+	    else if (line.split("\t").size() >= 2) m_separator = "\t";
+	    else if (line.split("|").size() >= 2) m_separator = "|";
+	    else if (line.split("/").size() >= 2) m_separator = "/";
+	    else if (line.split(":").size() >= 2) m_separator = ":";
+	    else m_separator = " ";
+	}
+
+	QStringList list = line.split(m_separator);
+	QStringList tidyList;
+
+	for (int i = 0; i < list.size(); ++i) {
+	    
+	    QString s(list[i]);
+	    bool numeric = false;
+
+	    if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
+		s = s.mid(1, s.length() - 2);
+	    } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
+		s = s.mid(1, s.length() - 2);
+	    } else {
+		(void)s.toFloat(&numeric);
+	    }
+
+	    tidyList.push_back(s);
+
+	    if (lineno == 0 || (list.size() < itemCount)) {
+		itemCount = list.size();
+	    } else {
+		if (itemCount != list.size()) {
+		    variableItemCount = true;
+		}
+	    }
+	    
+	    if (i == 0) { // primary
+
+		if (numeric) {
+
+		    float primary = s.toFloat();
+
+		    if (lineno > 0 && primary <= prevPrimary) {
+			nonIncreasingPrimaries = true;
+		    }
+
+		    if (s.contains(".") || s.contains(",")) {
+			floatPrimaries = true;
+		    }
+
+		    prevPrimary = primary;
+
+		} else {
+		    nonNumericPrimaries = true;
+		}
+	    } else { // secondary
+
+		if (!numeric) {
+		    if (earliestNonNumericItem < 0 ||
+			i < earliestNonNumericItem) {
+			earliestNonNumericItem = i;
+		    }
+		}
+	    }
+	}
+
+	if (lineno < 10) {
+	    m_example.push_back(tidyList);
+	    if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
+		m_maxExampleCols = tidyList.size();
+	    }
+	}
+
+	++lineno;
+
+	if (lineno == 50) break;
+    }
+
+    if (nonNumericPrimaries || nonIncreasingPrimaries) {
+	
+	// Primaries are probably not a series of times
+
+	m_timingType = ImplicitTiming;
+	m_timeUnits = TimeWindows;
+	
+	if (nonNumericPrimaries) {
+	    m_modelType = OneDimensionalModel;
+	} else if (itemCount == 1 || variableItemCount ||
+		   (earliestNonNumericItem != -1)) {
+	    m_modelType = TwoDimensionalModel;
+	} else {
+	    m_modelType = ThreeDimensionalModel;
+	}
+
+    } else {
+
+	// Increasing numeric primaries -- likely to be time
+
+	m_timingType = ExplicitTiming;
+
+	if (floatPrimaries) {
+	    m_timeUnits = TimeSeconds;
+	} else {
+	    m_timeUnits = TimeAudioFrames;
+	}
+
+	if (itemCount == 1) {
+	    m_modelType = OneDimensionalModel;
+	} else if (variableItemCount || (earliestNonNumericItem != -1)) {
+	    if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
+		m_modelType = OneDimensionalModel;
+	    } else {
+		m_modelType = TwoDimensionalModel;
+	    }
+	} else {
+	    m_modelType = ThreeDimensionalModel;
+	}
+    }
+
+    std::cerr << "Estimated model type: " << m_modelType << std::endl;
+    std::cerr << "Estimated timing type: " << m_timingType << std::endl;
+    std::cerr << "Estimated units: " << m_timeUnits << std::endl;
+
+    in.seek(0);
+    return true;
+}