annotate data/fileio/CSVFormat.h @ 629:35499d48a5d1

* Start overhauling CSV parser to associate purposes with columns en route to its guesses; add some string manipulation code
author Chris Cannam
date Thu, 15 Jul 2010 15:27:21 +0000
parents 001db550bd48
children 11a664058dd8
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@392 16 #ifndef _CSV_FORMAT_H_
Chris@392 17 #define _CSV_FORMAT_H_
Chris@392 18
Chris@392 19 #include <QString>
Chris@392 20 #include <QStringList>
Chris@392 21
Chris@392 22 class CSVFormat
Chris@392 23 {
Chris@392 24 public:
Chris@392 25 enum ModelType {
Chris@392 26 OneDimensionalModel,
Chris@392 27 TwoDimensionalModel,
Chris@628 28 TwoDimensionalModelWithDuration,
Chris@392 29 ThreeDimensionalModel
Chris@392 30 };
Chris@392 31
Chris@392 32 enum TimingType {
Chris@392 33 ExplicitTiming,
Chris@392 34 ImplicitTiming
Chris@392 35 };
Chris@628 36
Chris@628 37 enum DurationType {
Chris@628 38 Durations,
Chris@628 39 EndTimes
Chris@628 40 };
Chris@392 41
Chris@392 42 enum TimeUnits {
Chris@392 43 TimeSeconds,
Chris@392 44 TimeAudioFrames,
Chris@392 45 TimeWindows
Chris@392 46 };
Chris@392 47
Chris@629 48 enum ColumnPurpose {
Chris@629 49 ColumnUnknown,
Chris@629 50 ColumnStartTime,
Chris@629 51 ColumnEndTime,
Chris@629 52 ColumnDuration,
Chris@629 53 ColumnValue,
Chris@629 54 ColumnLabel
Chris@629 55 };
Chris@629 56
Chris@629 57 enum ColumnQuality {
Chris@629 58 ColumnNumeric = 0x1,
Chris@629 59 ColumnIntegral = 0x2,
Chris@629 60 ColumnIncreasing = 0x4,
Chris@629 61 ColumnLarge = 0x8
Chris@629 62 };
Chris@629 63 typedef unsigned int ColumnQualities;
Chris@392 64
Chris@392 65 CSVFormat() : // arbitrary defaults
Chris@392 66 m_modelType(TwoDimensionalModel),
Chris@392 67 m_timingType(ExplicitTiming),
Chris@628 68 m_durationType(Durations),
Chris@392 69 m_timeUnits(TimeSeconds),
Chris@392 70 m_separator(","),
Chris@392 71 m_sampleRate(44100),
Chris@392 72 m_windowSize(1024),
Chris@629 73 m_columnCount(0),
Chris@629 74 m_variableColumnCount(false),
Chris@629 75 m_behaviour(QString::KeepEmptyParts),
Chris@629 76 m_allowQuoting(true),
Chris@629 77 m_maxExampleCols(0)
Chris@392 78 { }
Chris@629 79
Chris@629 80 CSVFormat(QString path); // guess format
Chris@629 81
Chris@629 82 /**
Chris@629 83 * Guess the format of the given CSV file, setting the fields in
Chris@629 84 * this object accordingly. If the current separator is the empty
Chris@629 85 * string, the separator character will also be guessed; otherwise
Chris@629 86 * the current separator will be used. The other properties of
Chris@629 87 * this object will be set according to guesses from the file.
Chris@629 88 */
Chris@629 89 void guessFormatFor(QString path);
Chris@628 90
Chris@628 91 ModelType getModelType() const { return m_modelType; }
Chris@628 92 TimingType getTimingType() const { return m_timingType; }
Chris@628 93 DurationType getDurationType() const { return m_durationType; }
Chris@628 94 TimeUnits getTimeUnits() const { return m_timeUnits; }
Chris@628 95 QString getSeparator() const { return m_separator; }
Chris@628 96 size_t getSampleRate() const { return m_sampleRate; }
Chris@628 97 size_t getWindowSize() const { return m_windowSize; }
Chris@629 98
Chris@392 99 QString::SplitBehavior getSplitBehaviour() const { return m_behaviour; }
Chris@629 100 QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; }
Chris@392 101
Chris@628 102 void setModelType(ModelType t) { m_modelType = t; }
Chris@628 103 void setTimingType(TimingType t) { m_timingType = t; }
Chris@628 104 void setDurationType(DurationType t) { m_durationType = t; }
Chris@628 105 void setTimeUnits(TimeUnits t) { m_timeUnits = t; }
Chris@628 106 void setSeparator(QString s) { m_separator = s; }
Chris@628 107 void setSampleRate(size_t r) { m_sampleRate = r; }
Chris@628 108 void setWindowSize(size_t s) { m_windowSize = s; }
Chris@392 109
Chris@392 110 void setSplitBehaviour(QString::SplitBehavior b) { m_behaviour = b; }
Chris@629 111 void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; }
Chris@392 112
Chris@629 113 // read-only; only valid if format has been guessed:
Chris@629 114 QList<ColumnQualities> getColumnQualities() const { return m_columnQualities; }
Chris@629 115
Chris@629 116 // read-only; only valid if format has been guessed:
Chris@392 117 QList<QStringList> getExample() const { return m_example; }
Chris@392 118 int getMaxExampleCols() const { return m_maxExampleCols; }
Chris@392 119
Chris@392 120 protected:
Chris@628 121 ModelType m_modelType;
Chris@628 122 TimingType m_timingType;
Chris@628 123 DurationType m_durationType;
Chris@628 124 TimeUnits m_timeUnits;
Chris@628 125 QString m_separator;
Chris@628 126 size_t m_sampleRate;
Chris@628 127 size_t m_windowSize;
Chris@392 128
Chris@629 129 int m_columnCount;
Chris@629 130 bool m_variableColumnCount;
Chris@629 131
Chris@629 132 QList<ColumnQualities> m_columnQualities;
Chris@629 133 QList<ColumnPurpose> m_columnPurposes;
Chris@629 134
Chris@629 135 QList<float> m_prevValues;
Chris@629 136
Chris@392 137 QString::SplitBehavior m_behaviour;
Chris@629 138 bool m_allowQuoting;
Chris@392 139
Chris@392 140 QList<QStringList> m_example;
Chris@392 141 int m_maxExampleCols;
Chris@629 142
Chris@629 143 void guessSeparator(QString line);
Chris@629 144 void guessQualities(QString line, int lineno);
Chris@629 145 void guessPurposes();
Chris@629 146
Chris@629 147 void guessFormatFor_Old(QString path);
Chris@629 148
Chris@392 149 };
Chris@392 150
Chris@392 151 #endif