comparison data/fileio/CSVFormat.h @ 1870:1b8c4ee06f6d csv-import-headers

Detect presence of header row in CSV format guesser; use headings to inform our guesses about column purposes; test this
author Chris Cannam
date Wed, 17 Jun 2020 18:01:00 +0100
parents f0ffc88a36b3
children
comparison
equal deleted inserted replaced
1868:44dba7cd9ec3 1870:1b8c4ee06f6d
18 18
19 #include <QString> 19 #include <QString>
20 #include <QStringList> 20 #include <QStringList>
21 21
22 #include <set> 22 #include <set>
23 #include <map>
23 24
24 #include "base/BaseTypes.h" 25 #include "base/BaseTypes.h"
25 26
26 class CSVFormat 27 class CSVFormat
27 { 28 {
56 ColumnValue, 57 ColumnValue,
57 ColumnPitch, 58 ColumnPitch,
58 ColumnLabel 59 ColumnLabel
59 }; 60 };
60 61
62 enum HeaderStatus {
63 HeaderUnknown = 0,
64 HeaderAbsent = 1,
65 HeaderPresent = 2
66 };
67
61 enum ColumnQuality { 68 enum ColumnQuality {
62 ColumnNumeric = 1, // No non-numeric values were seen in sample 69 ColumnNumeric = 1, // No non-numeric values were seen in sample
63 ColumnIntegral = 2, // All sampled values were integers 70 ColumnIntegral = 2, // All sampled values were integers
64 ColumnIncreasing = 4, // Sampled values were monotonically increasing 71 ColumnIncreasing = 4, // Sampled values were monotonically increasing
65 ColumnSmall = 8, // All sampled values had magnitude < 1 72 ColumnSmall = 8, // All sampled values had magnitude < 1
81 m_timingType(ExplicitTiming), 88 m_timingType(ExplicitTiming),
82 m_timeUnits(TimeSeconds), 89 m_timeUnits(TimeSeconds),
83 m_separator(""), 90 m_separator(""),
84 m_sampleRate(44100), 91 m_sampleRate(44100),
85 m_windowSize(1024), 92 m_windowSize(1024),
93 m_headerStatus(HeaderUnknown),
86 m_columnCount(0), 94 m_columnCount(0),
87 m_variableColumnCount(false), 95 m_variableColumnCount(false),
88 m_audioSampleRange(SampleRangeOther), 96 m_audioSampleRange(SampleRangeOther),
89 m_allowQuoting(true), 97 m_allowQuoting(true),
90 m_maxExampleCols(0) 98 m_maxExampleCols(0)
120 sv_samplerate_t getSampleRate() const { return m_sampleRate; } 128 sv_samplerate_t getSampleRate() const { return m_sampleRate; }
121 int getWindowSize() const { return m_windowSize; } 129 int getWindowSize() const { return m_windowSize; }
122 int getColumnCount() const { return m_columnCount; } 130 int getColumnCount() const { return m_columnCount; }
123 AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; } 131 AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; }
124 bool getAllowQuoting() const { return m_allowQuoting; } 132 bool getAllowQuoting() const { return m_allowQuoting; }
133 HeaderStatus getHeaderStatus() const { return m_headerStatus; }
125 QChar getSeparator() const { 134 QChar getSeparator() const {
126 if (m_separator == "") return ','; 135 if (m_separator == "") return ',';
127 else return m_separator[0]; 136 else return m_separator[0];
128 } 137 }
129 // set rather than QSet to ensure a fixed order 138 // set rather than QSet to ensure a fixed order
138 void setSampleRate(sv_samplerate_t r) { m_sampleRate = r; } 147 void setSampleRate(sv_samplerate_t r) { m_sampleRate = r; }
139 void setWindowSize(int s) { m_windowSize = s; } 148 void setWindowSize(int s) { m_windowSize = s; }
140 void setColumnCount(int c) { m_columnCount = c; } 149 void setColumnCount(int c) { m_columnCount = c; }
141 void setAudioSampleRange(AudioSampleRange r) { m_audioSampleRange = r; } 150 void setAudioSampleRange(AudioSampleRange r) { m_audioSampleRange = r; }
142 void setAllowQuoting(bool q) { m_allowQuoting = q; } 151 void setAllowQuoting(bool q) { m_allowQuoting = q; }
152 void setHeaderStatus(HeaderStatus s) { m_headerStatus = s; }
143 153
144 QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; } 154 QList<ColumnPurpose> getColumnPurposes() const;
145 void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; } 155 void setColumnPurposes(QList<ColumnPurpose> cl);
146 156
147 ColumnPurpose getColumnPurpose(int i);
148 ColumnPurpose getColumnPurpose(int i) const; 157 ColumnPurpose getColumnPurpose(int i) const;
149 void setColumnPurpose(int i, ColumnPurpose p); 158 void setColumnPurpose(int i, ColumnPurpose p);
150 159
151 // read-only; only valid if format has been guessed: 160 // only valid if format has been guessed:
152 const QList<ColumnQualities> &getColumnQualities() const { 161 QList<ColumnQualities> getColumnQualities() const;
153 return m_columnQualities;
154 }
155 162
156 // read-only; only valid if format has been guessed: 163 // only valid if format has been guessed:
157 const QList<QStringList> &getExample() const { 164 QList<QStringList> getExample() const { return m_example; }
158 return m_example;
159 }
160
161 int getMaxExampleCols() const { return m_maxExampleCols; } 165 int getMaxExampleCols() const { return m_maxExampleCols; }
162 166
163 protected: 167 protected:
164 ModelType m_modelType; 168 ModelType m_modelType;
165 TimingType m_timingType; 169 TimingType m_timingType;
166 TimeUnits m_timeUnits; 170 TimeUnits m_timeUnits;
167 QString m_separator; // "" or a single char - basically QChar option 171 QString m_separator; // "" or a single char - basically QChar option
168 std::set<QChar> m_plausibleSeparators; 172 std::set<QChar> m_plausibleSeparators;
169 sv_samplerate_t m_sampleRate; 173 sv_samplerate_t m_sampleRate;
170 int m_windowSize; 174 int m_windowSize;
175 HeaderStatus m_headerStatus;
171 176
172 int m_columnCount; 177 int m_columnCount;
173 bool m_variableColumnCount; 178 bool m_variableColumnCount;
174 179
175 QList<ColumnQualities> m_columnQualities; 180 std::map<int, ColumnQualities> m_columnQualities;
176 QList<ColumnPurpose> m_columnPurposes; 181 std::map<int, ColumnPurpose> m_columnPurposes;
182 std::map<int, QString> m_columnHeadings;
177 183
184 std::map<int, float> m_prevValues;
185
178 AudioSampleRange m_audioSampleRange; 186 AudioSampleRange m_audioSampleRange;
179
180 QList<float> m_prevValues;
181 187
182 bool m_allowQuoting; 188 bool m_allowQuoting;
183 189
184 QList<QStringList> m_example; 190 QList<QStringList> m_example;
185 int m_maxExampleCols; 191 int m_maxExampleCols;