diff data/fileio/CSVFileReader.cpp @ 742:c10cb8782576 coreaudio_tests

Merge from branch "default"
author Chris Cannam
date Sun, 01 Jul 2012 11:53:00 +0100
parents 1424aa29ae95
children e802e550a1f2
line wrap: on
line diff
--- a/data/fileio/CSVFileReader.cpp	Mon Nov 29 12:45:39 2010 +0000
+++ b/data/fileio/CSVFileReader.cpp	Sun Jul 01 11:53:00 2012 +0100
@@ -17,9 +17,11 @@
 
 #include "model/Model.h"
 #include "base/RealTime.h"
+#include "base/StringBits.h"
 #include "model/SparseOneDimensionalModel.h"
 #include "model/SparseTimeValueModel.h"
 #include "model/EditableDenseThreeDimensionalModel.h"
+#include "model/RegionModel.h"
 #include "DataFileReaderFactory.h"
 
 #include <QFile>
@@ -29,11 +31,13 @@
 #include <QTextStream>
 
 #include <iostream>
+#include <map>
 
 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
                              size_t mainModelSampleRate) :
     m_format(format),
     m_file(0),
+    m_warnings(0),
     m_mainModelSampleRate(mainModelSampleRate)
 {
     m_file = new QFile(path);
@@ -55,10 +59,10 @@
 
 CSVFileReader::~CSVFileReader()
 {
-    std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
+    SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
 
     if (m_file) {
-        std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
+        SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
         m_file->close();
     }
     delete m_file;
@@ -76,27 +80,64 @@
     return m_error;
 }
 
+size_t
+CSVFileReader::convertTimeValue(QString s, int lineno, size_t sampleRate,
+                                size_t windowSize) const
+{
+    QRegExp nonNumericRx("[^0-9eE.,+-]");
+    unsigned int warnLimit = 10;
+
+    CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
+
+    size_t calculatedFrame = 0;
+
+    bool ok = false;
+    QString numeric = s;
+    numeric.remove(nonNumericRx);
+    
+    if (timeUnits == CSVFormat::TimeSeconds) {
+
+        double time = numeric.toDouble(&ok);
+        if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
+        calculatedFrame = int(time * sampleRate + 0.5);
+        
+    } else {
+        
+        long n = numeric.toLong(&ok);
+        if (n >= 0) calculatedFrame = n;
+        
+        if (timeUnits == CSVFormat::TimeWindows) {
+            calculatedFrame *= windowSize;
+        }
+    }
+    
+    if (!ok) {
+        if (m_warnings < warnLimit) {
+            std::cerr << "WARNING: CSVFileReader::load: "
+                      << "Bad time format (\"" << s.toStdString()
+                      << "\") in data line "
+                      << lineno+1 << std::endl;
+        } else if (m_warnings == warnLimit) {
+            std::cerr << "WARNING: Too many warnings" << std::endl;
+        }
+        ++m_warnings;
+    }
+
+    return calculatedFrame;
+}
+
 Model *
 CSVFileReader::load() const
 {
     if (!m_file) return 0;
-/*!!!
-    CSVFormatDialog *dialog = new CSVFormatDialog
-	(0, m_file, m_mainModelSampleRate);
 
-    if (dialog->exec() == QDialog::Rejected) {
-	delete dialog;
-        throw DataFileReaderFactory::ImportCancelled;
-    }
-*/
-
-    CSVFormat::ModelType   modelType = m_format.getModelType();
+    CSVFormat::ModelType modelType = m_format.getModelType();
     CSVFormat::TimingType timingType = m_format.getTimingType();
-    CSVFormat::TimeUnits   timeUnits = m_format.getTimeUnits();
-    QString separator = m_format.getSeparator();
-    QString::SplitBehavior behaviour = m_format.getSplitBehaviour();
+    CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
     size_t sampleRate = m_format.getSampleRate();
     size_t windowSize = m_format.getWindowSize();
+    QChar separator = m_format.getSeparator();
+    bool allowQuoting = m_format.getAllowQuoting();
 
     if (timingType == CSVFormat::ExplicitTiming) {
         if (modelType == CSVFormat::ThreeDimensionalModel) {
@@ -114,6 +155,7 @@
 
     SparseOneDimensionalModel *model1 = 0;
     SparseTimeValueModel *model2 = 0;
+    RegionModel *model2a = 0;
     EditableDenseThreeDimensionalModel *model3 = 0;
     Model *model = 0;
 
@@ -126,7 +168,23 @@
     float min = 0.0, max = 0.0;
 
     size_t frameNo = 0;
+    size_t duration = 0;
+    size_t endFrame = 0;
+
+    bool haveAnyValue = false;
+    bool haveEndTime = false;
+
     size_t startFrame = 0; // for calculation of dense model resolution
+    bool firstEverValue = true;
+
+    std::map<QString, int> labelCountMap;
+    
+    int valueColumns = 0;
+    for (int i = 0; i < m_format.getColumnCount(); ++i) {
+        if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
+            ++valueColumns;
+        }
+    }
 
     while (!in.atEnd()) {
 
@@ -150,8 +208,7 @@
 
             if (line.startsWith("#")) continue;
 
-            QStringList list = line.split(separator, behaviour);
-
+            QStringList list = StringBits::split(line, separator, allowQuoting);
             if (!model) {
 
                 switch (modelType) {
@@ -166,110 +223,112 @@
                     model = model2;
                     break;
 		
+                case CSVFormat::TwoDimensionalModelWithDuration:
+                    model2a = new RegionModel(sampleRate, windowSize, false);
+                    model = model2a;
+                    break;
+		
                 case CSVFormat::ThreeDimensionalModel:
                     model3 = new EditableDenseThreeDimensionalModel
                         (sampleRate,
                          windowSize,
-                         list.size(),
+                         valueColumns,
                          EditableDenseThreeDimensionalModel::NoCompression);
                     model = model3;
                     break;
                 }
             }
 
-            QStringList tidyList;
-            QRegExp nonNumericRx("[^0-9eE.,+-]");
+            float value = 0.f;
+            QString label = "";
+
+            duration = 0.f;
+            haveEndTime = false;
 
             for (int i = 0; i < list.size(); ++i) {
-	    
-                QString s(list[i].trimmed());
 
-                if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
-                    s = s.mid(1, s.length() - 2);
-                } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
-                    s = s.mid(1, s.length() - 2);
+                QString s = list[i];
+
+                CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
+
+                switch (purpose) {
+
+                case CSVFormat::ColumnUnknown:
+                    break;
+
+                case CSVFormat::ColumnStartTime:
+                    frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
+                    break;
+                
+                case CSVFormat::ColumnEndTime:
+                    endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
+                    haveEndTime = true;
+                    break;
+
+                case CSVFormat::ColumnDuration:
+                    duration = convertTimeValue(s, lineno, sampleRate, windowSize);
+                    break;
+
+                case CSVFormat::ColumnValue:
+                    value = s.toFloat();
+                    haveAnyValue = true;
+                    break;
+
+                case CSVFormat::ColumnLabel:
+                    label = s;
+                    ++labelCountMap[label];
+                    break;
                 }
+            }
 
-                if (i == 0 && timingType == CSVFormat::ExplicitTiming) {
-
-                    bool ok = false;
-                    QString numeric = s;
-                    numeric.remove(nonNumericRx);
-
-                    if (timeUnits == CSVFormat::TimeSeconds) {
-
-                        double time = numeric.toDouble(&ok);
-                        frameNo = int(time * sampleRate + 0.5);
-
-                    } else {
-
-                        frameNo = numeric.toInt(&ok);
-
-                        if (timeUnits == CSVFormat::TimeWindows) {
-                            frameNo *= windowSize;
-                        }
-                    }
-			       
-                    if (!ok) {
-                        if (warnings < warnLimit) {
-                            std::cerr << "WARNING: CSVFileReader::load: "
-                                      << "Bad time format (\"" << s.toStdString()
-                                      << "\") in data line "
-                                      << lineno+1 << ":" << std::endl;
-                            std::cerr << line.toStdString() << std::endl;
-                        } else if (warnings == warnLimit) {
-                            std::cerr << "WARNING: Too many warnings" << std::endl;
-                        }
-                        ++warnings;
-                    }
-                } else {
-                    tidyList.push_back(s);
+            if (haveEndTime) { // ... calculate duration now all cols read
+                if (endFrame > frameNo) {
+                    duration = endFrame - frameNo;
                 }
             }
 
             if (modelType == CSVFormat::OneDimensionalModel) {
 	    
-                SparseOneDimensionalModel::Point point
-                    (frameNo,
-                     tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
-                     QString("%1").arg(lineno+1));
-
+                SparseOneDimensionalModel::Point point(frameNo, label);
                 model1->addPoint(point);
 
             } else if (modelType == CSVFormat::TwoDimensionalModel) {
 
-                SparseTimeValueModel::Point point
-                    (frameNo,
-                     tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
-                     tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno+1));
+                SparseTimeValueModel::Point point(frameNo, value, label);
+                model2->addPoint(point);
 
-                model2->addPoint(point);
+            } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
+
+                RegionModel::Point point(frameNo, value, duration, label);
+                model2a->addPoint(point);
 
             } else if (modelType == CSVFormat::ThreeDimensionalModel) {
 
                 DenseThreeDimensionalModel::Column values;
 
-                for (int i = 0; i < tidyList.size(); ++i) {
+                for (int i = 0; i < list.size(); ++i) {
+
+                    if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
+                        continue;
+                    }
 
                     bool ok = false;
                     float value = list[i].toFloat(&ok);
 
-                    if (i > 0 || timingType != CSVFormat::ExplicitTiming) {
-                        values.push_back(value);
-                    }
+                    values.push_back(value);
 	    
-                    bool firstEver = (lineno == 0 && i == 0);
-
-                    if (firstEver || value < min) min = value;
-                    if (firstEver || value > max) max = value;
-
-                    if (firstEver) {
+                    if (firstEverValue || value < min) min = value;
+                    if (firstEverValue || value > max) max = value;
+                    
+                    if (firstEverValue) {
                         startFrame = frameNo;
                         model3->setStartFrame(startFrame);
                     } else if (lineno == 1 &&
                                timingType == CSVFormat::ExplicitTiming) {
                         model3->setResolution(frameNo - startFrame);
                     }
+                    
+                    firstEverValue = false;
 
                     if (!ok) {
                         if (warnings < warnLimit) {
@@ -278,7 +337,7 @@
                                       << list[i].toStdString()
                                       << "\" in data line " << lineno+1
                                       << ":" << std::endl;
-                            std::cerr << line.toStdString() << std::endl;
+                            std::cerr << line << std::endl;
                             ++warnings;
                         } else if (warnings == warnLimit) {
 //                            std::cerr << "WARNING: Too many warnings" << std::endl;
@@ -286,8 +345,8 @@
                     }
                 }
 	
-//                std::cerr << "Setting bin values for count " << lineno << ", frame "
-//                          << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
+//                SVDEBUG << "Setting bin values for count " << lineno << ", frame "
+//                          << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
 
                 model3->setColumn(lineno, values);
             }
@@ -300,6 +359,47 @@
         }
     }
 
+    if (!haveAnyValue) {
+        if (model2a) {
+            // assign values for regions based on label frequency; we
+            // have this in our labelCountMap, sort of
+
+            std::map<int, std::map<QString, float> > countLabelValueMap;
+            for (std::map<QString, int>::iterator i = labelCountMap.begin();
+                 i != labelCountMap.end(); ++i) {
+                countLabelValueMap[i->second][i->first] = 0.f;
+            }
+
+            float v = 0.f;
+            for (std::map<int, std::map<QString, float> >::iterator i =
+                     countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
+                --i;
+                for (std::map<QString, float>::iterator j = i->second.begin();
+                     j != i->second.end(); ++j) {
+                    j->second = v;
+                    v = v + 1.f;
+                }
+            }
+
+            std::map<RegionModel::Point, RegionModel::Point,
+                RegionModel::Point::Comparator> pointMap;
+            for (RegionModel::PointList::const_iterator i =
+                     model2a->getPoints().begin();
+                 i != model2a->getPoints().end(); ++i) {
+                RegionModel::Point p(*i);
+                v = countLabelValueMap[labelCountMap[p.label]][p.label];
+                RegionModel::Point pp(p.frame, v, p.duration, p.label);
+                pointMap[p] = pp;
+            }
+
+            for (std::map<RegionModel::Point, RegionModel::Point>::iterator i = 
+                     pointMap.begin(); i != pointMap.end(); ++i) {
+                model2a->deletePoint(i->first);
+                model2a->addPoint(i->second);
+            }
+        }
+    }
+                
     if (modelType == CSVFormat::ThreeDimensionalModel) {
 	model3->setMinimumLevel(min);
 	model3->setMaximumLevel(max);