Chris@148: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
Chris@148: 
Chris@148: /*
Chris@148:     Sonic Visualiser
Chris@148:     An audio file viewer and annotation editor.
Chris@148:     Centre for Digital Music, Queen Mary, University of London.
Chris@148:     This file copyright 2006 Chris Cannam.
Chris@148:     
Chris@148:     This program is free software; you can redistribute it and/or
Chris@148:     modify it under the terms of the GNU General Public License as
Chris@148:     published by the Free Software Foundation; either version 2 of the
Chris@148:     License, or (at your option) any later version.  See the file
Chris@148:     COPYING included with this distribution for more information.
Chris@148: */
Chris@148: 
Chris@148: #include "CSVFileReader.h"
Chris@148: 
Chris@150: #include "model/Model.h"
Chris@148: #include "base/RealTime.h"
Chris@631: #include "base/StringBits.h"
Chris@148: #include "model/SparseOneDimensionalModel.h"
Chris@148: #include "model/SparseTimeValueModel.h"
Chris@152: #include "model/EditableDenseThreeDimensionalModel.h"
Chris@628: #include "model/RegionModel.h"
Chris@308: #include "DataFileReaderFactory.h"
Chris@148: 
Chris@148: #include <QFile>
Chris@148: #include <QString>
Chris@148: #include <QRegExp>
Chris@148: #include <QStringList>
Chris@148: #include <QTextStream>
Chris@148: 
Chris@148: #include <iostream>
Chris@628: #include <map>
Chris@148: 
Chris@392: CSVFileReader::CSVFileReader(QString path, CSVFormat format,
Chris@392:                              size_t mainModelSampleRate) :
Chris@392:     m_format(format),
Chris@148:     m_file(0),
Chris@631:     m_warnings(0),
Chris@148:     m_mainModelSampleRate(mainModelSampleRate)
Chris@148: {
Chris@148:     m_file = new QFile(path);
Chris@148:     bool good = false;
Chris@148:     
Chris@148:     if (!m_file->exists()) {
Chris@148: 	m_error = QFile::tr("File \"%1\" does not exist").arg(path);
Chris@148:     } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@148: 	m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
Chris@148:     } else {
Chris@148: 	good = true;
Chris@148:     }
Chris@148: 
Chris@148:     if (!good) {
Chris@148: 	delete m_file;
Chris@148: 	m_file = 0;
Chris@148:     }
Chris@148: }
Chris@148: 
Chris@148: CSVFileReader::~CSVFileReader()
Chris@148: {
Chris@690:     SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
Chris@148: 
Chris@148:     if (m_file) {
Chris@690:         SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
Chris@148:         m_file->close();
Chris@148:     }
Chris@148:     delete m_file;
Chris@148: }
Chris@148: 
Chris@148: bool
Chris@148: CSVFileReader::isOK() const
Chris@148: {
Chris@148:     return (m_file != 0);
Chris@148: }
Chris@148: 
Chris@148: QString
Chris@148: CSVFileReader::getError() const
Chris@148: {
Chris@148:     return m_error;
Chris@148: }
Chris@148: 
Chris@631: size_t
Chris@631: CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
Chris@631:                                 size_t windowSize) const
Chris@631: {
Chris@631:     QRegExp nonNumericRx("[^0-9eE.,+-]");
Chris@631:     unsigned int warnLimit = 10;
Chris@631: 
Chris@631:     CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@631: 
Chris@631:     size_t calculatedFrame = 0;
Chris@631: 
Chris@631:     bool ok = false;
Chris@631:     QString numeric = s;
Chris@631:     numeric.remove(nonNumericRx);
Chris@631:     
Chris@631:     if (timeUnits == CSVFormat::TimeSeconds) {
Chris@631: 
Chris@631:         double time = numeric.toDouble(&ok);
Chris@631:         if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
Chris@631:         calculatedFrame = int(time * sampleRate + 0.5);
Chris@631:         
Chris@631:     } else {
Chris@631:         
Chris@631:         long n = numeric.toLong(&ok);
Chris@631:         if (n >= 0) calculatedFrame = n;
Chris@631:         
Chris@631:         if (timeUnits == CSVFormat::TimeWindows) {
Chris@631:             calculatedFrame *= windowSize;
Chris@631:         }
Chris@631:     }
Chris@631:     
Chris@631:     if (!ok) {
Chris@631:         if (m_warnings < warnLimit) {
Chris@631:             std::cerr << "WARNING: CSVFileReader::load: "
Chris@631:                       << "Bad time format (\"" << s.toStdString()
Chris@631:                       << "\") in data line "
Chris@631:                       << lineno+1 << std::endl;
Chris@631:         } else if (m_warnings == warnLimit) {
Chris@631:             std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@631:         }
Chris@631:         ++m_warnings;
Chris@631:     }
Chris@631: 
Chris@631:     return calculatedFrame;
Chris@631: }
Chris@631: 
Chris@148: Model *
Chris@148: CSVFileReader::load() const
Chris@148: {
Chris@148:     if (!m_file) return 0;
Chris@148: 
Chris@628:     CSVFormat::ModelType modelType = m_format.getModelType();
Chris@392:     CSVFormat::TimingType timingType = m_format.getTimingType();
Chris@628:     CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
Chris@392:     size_t sampleRate = m_format.getSampleRate();
Chris@392:     size_t windowSize = m_format.getWindowSize();
Chris@631:     QChar separator = m_format.getSeparator();
Chris@631:     bool allowQuoting = m_format.getAllowQuoting();
Chris@148: 
Chris@392:     if (timingType == CSVFormat::ExplicitTiming) {
Chris@611:         if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@611:             // This will be overridden later if more than one line
Chris@611:             // appears in our file, but we want to choose a default
Chris@611:             // that's likely to be visible
Chris@611:             windowSize = 1024;
Chris@611:         } else {
Chris@611:             windowSize = 1;
Chris@611:         }
Chris@392: 	if (timeUnits == CSVFormat::TimeSeconds) {
Chris@148: 	    sampleRate = m_mainModelSampleRate;
Chris@148: 	}
Chris@148:     }
Chris@148: 
Chris@148:     SparseOneDimensionalModel *model1 = 0;
Chris@148:     SparseTimeValueModel *model2 = 0;
Chris@628:     RegionModel *model2a = 0;
Chris@152:     EditableDenseThreeDimensionalModel *model3 = 0;
Chris@148:     Model *model = 0;
Chris@148: 
Chris@148:     QTextStream in(m_file);
Chris@148:     in.seek(0);
Chris@148: 
Chris@148:     unsigned int warnings = 0, warnLimit = 10;
Chris@148:     unsigned int lineno = 0;
Chris@148: 
Chris@148:     float min = 0.0, max = 0.0;
Chris@148: 
Chris@148:     size_t frameNo = 0;
Chris@628:     size_t duration = 0;
Chris@631:     size_t endFrame = 0;
Chris@631: 
Chris@631:     bool haveAnyValue = false;
Chris@631:     bool haveEndTime = false;
Chris@631: 
Chris@611:     size_t startFrame = 0; // for calculation of dense model resolution
Chris@631:     bool firstEverValue = true;
Chris@148: 
Chris@631:     std::map<QString, int> labelCountMap;
Chris@631:     
Chris@676:     int valueColumns = 0;
Chris@676:     for (int i = 0; i < m_format.getColumnCount(); ++i) {
Chris@676:         if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
Chris@676:             ++valueColumns;
Chris@676:         }
Chris@676:     }
Chris@676: 
Chris@148:     while (!in.atEnd()) {
Chris@148: 
Chris@283:         // QTextStream's readLine doesn't cope with old-style Mac
Chris@283:         // CR-only line endings.  Why did they bother making the class
Chris@283:         // cope with more than one sort of line ending, if it still
Chris@283:         // can't be configured to cope with all the common sorts?
Chris@148: 
Chris@283:         // For the time being we'll deal with this case (which is
Chris@283:         // relatively uncommon for us, but still necessary to handle)
Chris@283:         // by reading the entire file using a single readLine, and
Chris@283:         // splitting it.  For CR and CR/LF line endings this will just
Chris@283:         // read a line at a time, and that's obviously OK.
Chris@148: 
Chris@283:         QString chunk = in.readLine();
Chris@283:         QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@283:         
Chris@283:         for (size_t li = 0; li < lines.size(); ++li) {
Chris@148: 
Chris@283:             QString line = lines[li];
Chris@148: 
Chris@283:             if (line.startsWith("#")) continue;
Chris@283: 
Chris@631:             QStringList list = StringBits::split(line, separator, allowQuoting);
Chris@283:             if (!model) {
Chris@283: 
Chris@283:                 switch (modelType) {
Chris@283: 
Chris@392:                 case CSVFormat::OneDimensionalModel:
Chris@283:                     model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
Chris@283:                     model = model1;
Chris@283:                     break;
Chris@148: 		
Chris@392:                 case CSVFormat::TwoDimensionalModel:
Chris@283:                     model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
Chris@283:                     model = model2;
Chris@283:                     break;
Chris@148: 		
Chris@628:                 case CSVFormat::TwoDimensionalModelWithDuration:
Chris@628:                     model2a = new RegionModel(sampleRate, windowSize, false);
Chris@628:                     model = model2a;
Chris@628:                     break;
Chris@628: 		
Chris@392:                 case CSVFormat::ThreeDimensionalModel:
Chris@535:                     model3 = new EditableDenseThreeDimensionalModel
Chris@535:                         (sampleRate,
Chris@535:                          windowSize,
Chris@676:                          valueColumns,
Chris@535:                          EditableDenseThreeDimensionalModel::NoCompression);
Chris@283:                     model = model3;
Chris@283:                     break;
Chris@283:                 }
Chris@283:             }
Chris@148: 
Chris@631:             float value = 0.f;
Chris@631:             QString label = "";
Chris@148: 
Chris@631:             duration = 0.f;
Chris@631:             haveEndTime = false;
Chris@628: 
Chris@283:             for (int i = 0; i < list.size(); ++i) {
Chris@148: 
Chris@631:                 QString s = list[i];
Chris@631: 
Chris@631:                 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
Chris@631: 
Chris@631:                 switch (purpose) {
Chris@631: 
Chris@631:                 case CSVFormat::ColumnUnknown:
Chris@631:                     break;
Chris@631: 
Chris@631:                 case CSVFormat::ColumnStartTime:
Chris@631:                     frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631:                     break;
Chris@631:                 
Chris@631:                 case CSVFormat::ColumnEndTime:
Chris@631:                     endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631:                     haveEndTime = true;
Chris@631:                     break;
Chris@631: 
Chris@631:                 case CSVFormat::ColumnDuration:
Chris@631:                     duration = convertTimeValue(s, lineno, sampleRate, windowSize);
Chris@631:                     break;
Chris@631: 
Chris@631:                 case CSVFormat::ColumnValue:
Chris@631:                     value = s.toFloat();
Chris@631:                     haveAnyValue = true;
Chris@631:                     break;
Chris@631: 
Chris@631:                 case CSVFormat::ColumnLabel:
Chris@631:                     label = s;
Chris@631:                     ++labelCountMap[label];
Chris@631:                     break;
Chris@283:                 }
Chris@631:             }
Chris@148: 
Chris@631:             if (haveEndTime) { // ... calculate duration now all cols read
Chris@631:                 if (endFrame > frameNo) {
Chris@631:                     duration = endFrame - frameNo;
Chris@628:                 }
Chris@283:             }
Chris@148: 
Chris@392:             if (modelType == CSVFormat::OneDimensionalModel) {
Chris@148: 	    
Chris@631:                 SparseOneDimensionalModel::Point point(frameNo, label);
Chris@283:                 model1->addPoint(point);
Chris@148: 
Chris@392:             } else if (modelType == CSVFormat::TwoDimensionalModel) {
Chris@148: 
Chris@631:                 SparseTimeValueModel::Point point(frameNo, value, label);
Chris@283:                 model2->addPoint(point);
Chris@148: 
Chris@628:             } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
Chris@628: 
Chris@631:                 RegionModel::Point point(frameNo, value, duration, label);
Chris@628:                 model2a->addPoint(point);
Chris@628: 
Chris@392:             } else if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148: 
Chris@283:                 DenseThreeDimensionalModel::Column values;
Chris@148: 
Chris@631:                 for (int i = 0; i < list.size(); ++i) {
Chris@148: 
Chris@676:                     if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
Chris@676:                         continue;
Chris@676:                     }
Chris@676: 
Chris@283:                     bool ok = false;
Chris@283:                     float value = list[i].toFloat(&ok);
Chris@611: 
Chris@676:                     values.push_back(value);
Chris@148: 	    
Chris@631:                     if (firstEverValue || value < min) min = value;
Chris@631:                     if (firstEverValue || value > max) max = value;
Chris@676:                     
Chris@631:                     if (firstEverValue) {
Chris@611:                         startFrame = frameNo;
Chris@611:                         model3->setStartFrame(startFrame);
Chris@611:                     } else if (lineno == 1 &&
Chris@611:                                timingType == CSVFormat::ExplicitTiming) {
Chris@611:                         model3->setResolution(frameNo - startFrame);
Chris@611:                     }
Chris@631:                     
Chris@631:                     firstEverValue = false;
Chris@148: 
Chris@283:                     if (!ok) {
Chris@283:                         if (warnings < warnLimit) {
Chris@283:                             std::cerr << "WARNING: CSVFileReader::load: "
Chris@390:                                       << "Non-numeric value \""
Chris@390:                                       << list[i].toStdString()
Chris@491:                                       << "\" in data line " << lineno+1
Chris@283:                                       << ":" << std::endl;
Chris@686:                             std::cerr << line << std::endl;
Chris@283:                             ++warnings;
Chris@283:                         } else if (warnings == warnLimit) {
Chris@390: //                            std::cerr << "WARNING: Too many warnings" << std::endl;
Chris@283:                         }
Chris@283:                     }
Chris@283:                 }
Chris@148: 	
Chris@690: //                SVDEBUG << "Setting bin values for count " << lineno << ", frame "
Chris@687: //                          << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
Chris@148: 
Chris@611:                 model3->setColumn(lineno, values);
Chris@283:             }
Chris@148: 
Chris@283:             ++lineno;
Chris@392:             if (timingType == CSVFormat::ImplicitTiming ||
Chris@283:                 list.size() == 0) {
Chris@283:                 frameNo += windowSize;
Chris@283:             }
Chris@283:         }
Chris@148:     }
Chris@148: 
Chris@631:     if (!haveAnyValue) {
Chris@631:         if (model2a) {
Chris@631:             // assign values for regions based on label frequency; we
Chris@631:             // have this in our labelCountMap, sort of
Chris@631: 
Chris@631:             std::map<int, std::map<QString, float> > countLabelValueMap;
Chris@631:             for (std::map<QString, int>::iterator i = labelCountMap.begin();
Chris@631:                  i != labelCountMap.end(); ++i) {
Chris@631:                 countLabelValueMap[i->second][i->first] = 0.f;
Chris@631:             }
Chris@631: 
Chris@631:             float v = 0.f;
Chris@631:             for (std::map<int, std::map<QString, float> >::iterator i =
Chris@631:                      countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
Chris@631:                 --i;
Chris@631:                 for (std::map<QString, float>::iterator j = i->second.begin();
Chris@631:                      j != i->second.end(); ++j) {
Chris@631:                     j->second = v;
Chris@631:                     v = v + 1.f;
Chris@631:                 }
Chris@631:             }
Chris@631: 
Chris@631:             std::map<RegionModel::Point, RegionModel::Point,
Chris@631:                 RegionModel::Point::Comparator> pointMap;
Chris@631:             for (RegionModel::PointList::const_iterator i =
Chris@631:                      model2a->getPoints().begin();
Chris@631:                  i != model2a->getPoints().end(); ++i) {
Chris@631:                 RegionModel::Point p(*i);
Chris@631:                 v = countLabelValueMap[labelCountMap[p.label]][p.label];
Chris@631:                 RegionModel::Point pp(p.frame, v, p.duration, p.label);
Chris@631:                 pointMap[p] = pp;
Chris@631:             }
Chris@631: 
Chris@631:             for (std::map<RegionModel::Point, RegionModel::Point>::iterator i = 
Chris@631:                      pointMap.begin(); i != pointMap.end(); ++i) {
Chris@631:                 model2a->deletePoint(i->first);
Chris@631:                 model2a->addPoint(i->second);
Chris@631:             }
Chris@631:         }
Chris@631:     }
Chris@631:                 
Chris@392:     if (modelType == CSVFormat::ThreeDimensionalModel) {
Chris@148: 	model3->setMinimumLevel(min);
Chris@148: 	model3->setMaximumLevel(max);
Chris@148:     }
Chris@148: 
Chris@148:     return model;
Chris@148: }
Chris@148: