Chris@392: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
Chris@392: 
Chris@392: /*
Chris@392:     Sonic Visualiser
Chris@392:     An audio file viewer and annotation editor.
Chris@392:     Centre for Digital Music, Queen Mary, University of London.
Chris@392:     This file copyright 2006 Chris Cannam.
Chris@392:     
Chris@392:     This program is free software; you can redistribute it and/or
Chris@392:     modify it under the terms of the GNU General Public License as
Chris@392:     published by the Free Software Foundation; either version 2 of the
Chris@392:     License, or (at your option) any later version.  See the file
Chris@392:     COPYING included with this distribution for more information.
Chris@392: */
Chris@392: 
Chris@392: #include "CSVFormat.h"
Chris@392: 
Chris@629: #include "base/StringBits.h"
Chris@629: 
Chris@392: #include <QFile>
Chris@392: #include <QString>
Chris@392: #include <QRegExp>
Chris@392: #include <QStringList>
Chris@392: #include <QTextStream>
Chris@392: 
Chris@392: #include <iostream>
Chris@392: 
Chris@629: CSVFormat::CSVFormat(QString path) :
Chris@629:     m_separator(""),
Chris@392:     m_sampleRate(44100),
Chris@392:     m_windowSize(1024),
Chris@629:     m_allowQuoting(true)
Chris@392: {
Chris@629:     guessFormatFor(path);
Chris@629: }
Chris@629: 
Chris@629: void
Chris@629: CSVFormat::guessFormatFor(QString path)
Chris@629: {
Chris@629:     m_modelType = TwoDimensionalModel;
Chris@629:     m_timingType = ExplicitTiming;
Chris@629:     m_timeUnits = TimeSeconds;
Chris@629: 
Chris@629:     m_maxExampleCols = 0;
Chris@629:     m_columnCount = 0;
Chris@629:     m_variableColumnCount = false;
Chris@629: 
Chris@629:     m_example.clear();
Chris@629:     m_columnQualities.clear();
Chris@629:     m_columnPurposes.clear();
Chris@629:     m_prevValues.clear();
Chris@629: 
Chris@629:     QFile file(path);
Chris@392:     if (!file.exists()) return;
Chris@392:     if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
Chris@392: 
Chris@392:     QTextStream in(&file);
Chris@392:     in.seek(0);
Chris@392: 
Chris@629:     int lineno = 0;
Chris@392: 
Chris@392:     while (!in.atEnd()) {
Chris@392: 
Chris@392:         // See comment about line endings in CSVFileReader::load() 
Chris@392: 
Chris@392:         QString chunk = in.readLine();
Chris@392:         QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@392: 
Chris@897:         for (int li = 0; li < lines.size(); ++li) {
Chris@392: 
Chris@392:             QString line = lines[li];
Chris@629:             if (line.startsWith("#") || line == "") continue;
Chris@392: 
Chris@629:             guessQualities(line, lineno);
Chris@392: 
Chris@840:             ++lineno;
Chris@629:         }
Chris@840: 
Chris@840:         if (lineno >= 50) break;
Chris@629:     }
Chris@392: 
Chris@629:     guessPurposes();
Chris@629: }
Chris@629: 
Chris@629: void
Chris@629: CSVFormat::guessSeparator(QString line)
Chris@629: {
Chris@629:     char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
Chris@897:     for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) {
Chris@629:         if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
Chris@629:             m_separator = candidates[i];
Chris@629:             return;
Chris@629:         }
Chris@629:     }
Chris@629:     m_separator = " ";
Chris@629: }
Chris@629: 
Chris@629: void
Chris@629: CSVFormat::guessQualities(QString line, int lineno)
Chris@629: {
Chris@629:     if (m_separator == "") guessSeparator(line);
Chris@629: 
Chris@629:     QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting);
Chris@629: 
Chris@629:     int cols = list.size();
Chris@629:     if (lineno == 0 || (cols < m_columnCount)) m_columnCount = cols;
Chris@629:     if (cols != m_columnCount) m_variableColumnCount = true;
Chris@629: 
Chris@629:     // All columns are regarded as having these qualities until we see
Chris@629:     // something that indicates otherwise:
Chris@629: 
Chris@629:     ColumnQualities defaultQualities =
Chris@629:         ColumnNumeric | ColumnIntegral | ColumnIncreasing;
Chris@629:     
Chris@629:     for (int i = 0; i < cols; ++i) {
Chris@629: 	    
Chris@629:         while (m_columnQualities.size() <= i) {
Chris@629:             m_columnQualities.push_back(defaultQualities);
Chris@629:             m_prevValues.push_back(0.f);
Chris@629:         }
Chris@629: 
Chris@629:         QString s(list[i]);
Chris@629:         bool ok = false;
Chris@629: 
Chris@629:         ColumnQualities qualities = m_columnQualities[i];
Chris@629: 
Chris@629:         bool numeric    = (qualities & ColumnNumeric);
Chris@629:         bool integral   = (qualities & ColumnIntegral);
Chris@629:         bool increasing = (qualities & ColumnIncreasing);
Chris@629:         bool large      = (qualities & ColumnLarge); // this one defaults to off
Chris@629: 
Chris@629:         float value = 0.f;
Chris@629: 
Chris@629:         //!!! how to take into account headers?
Chris@629: 
Chris@629:         if (numeric) {
Chris@629:             value = s.toFloat(&ok);
Chris@629:             if (!ok) {
Chris@629:                 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
Chris@629:             }
Chris@629:             if (ok) {
Chris@629:                 if (lineno < 2 && value > 1000.f) large = true;
Chris@629:             } else {
Chris@629:                 numeric = false;
Chris@629:             }
Chris@629:         }
Chris@629: 
Chris@629:         if (numeric) {
Chris@629: 
Chris@629:             if (integral) {
Chris@629:                 if (s.contains('.') || s.contains(',')) {
Chris@629:                     integral = false;
Chris@392:                 }
Chris@392:             }
Chris@392: 
Chris@629:             if (increasing) {
Chris@629:                 if (lineno > 0 && value <= m_prevValues[i]) {
Chris@629:                     increasing = false;
Chris@392:                 }
Chris@392:             }
Chris@392: 
Chris@629:             m_prevValues[i] = value;
Chris@629:         }
Chris@392: 
Chris@629:         m_columnQualities[i] =
Chris@629:             (numeric    ? ColumnNumeric : 0) |
Chris@629:             (integral   ? ColumnIntegral : 0) |
Chris@629:             (increasing ? ColumnIncreasing : 0) |
Chris@629:             (large      ? ColumnLarge : 0);
Chris@629:     }
Chris@392: 
Chris@629:     if (lineno < 10) {
Chris@629:         m_example.push_back(list);
Chris@629:         if (lineno == 0 || cols > m_maxExampleCols) {
Chris@629:             m_maxExampleCols = cols;
Chris@392:         }
Chris@392:     }
Chris@392: 
Chris@843: //    cerr << "Estimated column qualities: ";
Chris@676: //    for (int i = 0; i < m_columnCount; ++i) {
Chris@843: //        cerr << int(m_columnQualities[i]) << " ";
Chris@676: //    }
Chris@843: //    cerr << endl;
Chris@629: }
Chris@629: 
Chris@629: void
Chris@629: CSVFormat::guessPurposes()
Chris@629: {
Chris@629:     m_timingType = CSVFormat::ImplicitTiming;
Chris@629:     m_timeUnits = CSVFormat::TimeWindows;
Chris@392: 	
Chris@629:     int timingColumnCount = 0;
Chris@629:     
Chris@629:     for (int i = 0; i < m_columnCount; ++i) {
Chris@629:         
Chris@629:         ColumnPurpose purpose = ColumnUnknown;
Chris@629:         bool primary = (i == 0);
Chris@392: 
Chris@629:         ColumnQualities qualities = m_columnQualities[i];
Chris@392: 
Chris@629:         bool numeric    = (qualities & ColumnNumeric);
Chris@629:         bool integral   = (qualities & ColumnIntegral);
Chris@629:         bool increasing = (qualities & ColumnIncreasing);
Chris@629:         bool large      = (qualities & ColumnLarge);
Chris@629: 
Chris@629:         bool timingColumn = (numeric && increasing);
Chris@629: 
Chris@629:         if (timingColumn) {
Chris@629: 
Chris@629:             ++timingColumnCount;
Chris@629:                               
Chris@629:             if (primary) {
Chris@629: 
Chris@629:                 purpose = ColumnStartTime;
Chris@629: 
Chris@629:                 m_timingType = ExplicitTiming;
Chris@629: 
Chris@629:                 if (integral && large) {
Chris@629:                     m_timeUnits = TimeAudioFrames;
Chris@629:                 } else {
Chris@629:                     m_timeUnits = TimeSeconds;
Chris@629:                 }
Chris@629: 
Chris@629:             } else {
Chris@629: 
Chris@629:                 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
Chris@629:                     purpose = ColumnEndTime;
Chris@629:                 }
Chris@629:             }
Chris@629:         }
Chris@629: 
Chris@629:         if (purpose == ColumnUnknown) {
Chris@629:             if (numeric) {
Chris@629:                 purpose = ColumnValue;
Chris@629:             } else {
Chris@629:                 purpose = ColumnLabel;
Chris@629:             }
Chris@629:         }
Chris@629: 
Chris@631:         setColumnPurpose(i, purpose);
Chris@629:     }            
Chris@629: 
Chris@629:     int valueCount = 0;
Chris@629:     for (int i = 0; i < m_columnCount; ++i) {
Chris@629:         if (m_columnPurposes[i] == ColumnValue) ++valueCount;
Chris@629:     }
Chris@629: 
Chris@630:     if (valueCount == 2 && timingColumnCount == 1) {
Chris@630:         // If we have exactly two apparent value columns and only one
Chris@630:         // timing column, but one value column is integral and the
Chris@630:         // other is not, guess that whichever one matches the integral
Chris@630:         // status of the time column is either duration or end time
Chris@630:         if (m_timingType == ExplicitTiming) {
Chris@630:             int a = -1, b = -1;
Chris@630:             for (int i = 0; i < m_columnCount; ++i) {
Chris@630:                 if (m_columnPurposes[i] == ColumnValue) {
Chris@630:                     if (a == -1) a = i;
Chris@630:                     else b = i;
Chris@630:                 }
Chris@630:             }
Chris@630:             if ((m_columnQualities[a] & ColumnIntegral) !=
Chris@630:                 (m_columnQualities[b] & ColumnIntegral)) {
Chris@630:                 int timecol = a;
Chris@630:                 if ((m_columnQualities[a] & ColumnIntegral) !=
Chris@630:                     (m_columnQualities[0] & ColumnIntegral)) {
Chris@630:                     timecol = b;
Chris@630:                 }
Chris@630:                 if (m_columnQualities[timecol] & ColumnIncreasing) {
Chris@630:                     // This shouldn't happen; should have been settled above
Chris@630:                     m_columnPurposes[timecol] = ColumnEndTime;
Chris@630:                 } else {
Chris@630:                     m_columnPurposes[timecol] = ColumnDuration;
Chris@630:                 }
Chris@630:                 --valueCount;
Chris@630:             }
Chris@630:         }
Chris@630:     }
Chris@630: 
Chris@631:     if (timingColumnCount > 1) {
Chris@631:         m_modelType = TwoDimensionalModelWithDuration;
Chris@392:     } else {
Chris@631:         if (valueCount == 0) {
Chris@631:             m_modelType = OneDimensionalModel;
Chris@631:         } else if (valueCount == 1) {
Chris@631:             m_modelType = TwoDimensionalModel;
Chris@631:         } else {
Chris@631:             m_modelType = ThreeDimensionalModel;
Chris@631:         }
Chris@629:     }
Chris@392: 
Chris@843: //    cerr << "Estimated column purposes: ";
Chris@676: //    for (int i = 0; i < m_columnCount; ++i) {
Chris@843: //        cerr << int(m_columnPurposes[i]) << " ";
Chris@676: //    }
Chris@843: //    cerr << endl;
Chris@392: 
Chris@843: //    cerr << "Estimated model type: " << m_modelType << endl;
Chris@843: //    cerr << "Estimated timing type: " << m_timingType << endl;
Chris@843: //    cerr << "Estimated units: " << m_timeUnits << endl;
Chris@392: }
Chris@392: 
Chris@631: CSVFormat::ColumnPurpose
Chris@631: CSVFormat::getColumnPurpose(int i)
Chris@631: {
Chris@631:     while (m_columnPurposes.size() <= i) {
Chris@631:         m_columnPurposes.push_back(ColumnUnknown);
Chris@631:     }
Chris@631:     return m_columnPurposes[i];
Chris@631: }
Chris@629: 
Chris@631: CSVFormat::ColumnPurpose
Chris@631: CSVFormat::getColumnPurpose(int i) const
Chris@631: {
Chris@668:     if (m_columnPurposes.size() <= i) {
Chris@668:         return ColumnUnknown;
Chris@668:     }
Chris@631:     return m_columnPurposes[i];
Chris@631: }
Chris@631: 
Chris@631: void
Chris@631: CSVFormat::setColumnPurpose(int i, ColumnPurpose p)
Chris@631: {
Chris@631:     while (m_columnPurposes.size() <= i) {
Chris@631:         m_columnPurposes.push_back(ColumnUnknown);
Chris@631:     }
Chris@631:     m_columnPurposes[i] = p;
Chris@631: }
Chris@631: 
Chris@631: 
Chris@631: 
Chris@631: