annotate data/fileio/CSVFormat.h @ 1524:64ef24ebb19c

Some CSV format tests and minor fixes
author Chris Cannam
date Fri, 14 Sep 2018 09:25:17 +0100
parents 2d291eac9f21
children 9570ef94eaa3
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@1362 16 #ifndef SV_CSV_FORMAT_H
Chris@1362 17 #define SV_CSV_FORMAT_H
Chris@392 18
Chris@392 19 #include <QString>
Chris@392 20 #include <QStringList>
Chris@392 21
Chris@1047 22 #include "base/BaseTypes.h"
Chris@1047 23
Chris@392 24 class CSVFormat
Chris@392 25 {
Chris@392 26 public:
Chris@392 27 enum ModelType {
Chris@1429 28 OneDimensionalModel,
Chris@1429 29 TwoDimensionalModel,
Chris@628 30 TwoDimensionalModelWithDuration,
Chris@897 31 TwoDimensionalModelWithDurationAndPitch,
Chris@1488 32 ThreeDimensionalModel,
Chris@1488 33 WaveFileModel
Chris@392 34 };
Chris@392 35
Chris@392 36 enum TimingType {
Chris@1429 37 ExplicitTiming,
Chris@1429 38 ImplicitTiming
Chris@392 39 };
Chris@628 40
Chris@392 41 enum TimeUnits {
Chris@1429 42 TimeSeconds,
Chris@990 43 TimeMilliseconds,
Chris@1429 44 TimeAudioFrames,
Chris@1429 45 TimeWindows,
Chris@392 46 };
Chris@392 47
Chris@629 48 enum ColumnPurpose {
Chris@629 49 ColumnUnknown,
Chris@629 50 ColumnStartTime,
Chris@629 51 ColumnEndTime,
Chris@629 52 ColumnDuration,
Chris@629 53 ColumnValue,
Chris@897 54 ColumnPitch,
Chris@629 55 ColumnLabel
Chris@629 56 };
Chris@629 57
Chris@629 58 enum ColumnQuality {
Chris@1512 59 ColumnNumeric = 1, // No non-numeric values were seen in sample
Chris@1512 60 ColumnIntegral = 2, // All sampled values were integers
Chris@1512 61 ColumnIncreasing = 4, // Sampled values were monotonically increasing
Chris@1512 62 ColumnSmall = 8, // All sampled values had magnitude < 1
Chris@1512 63 ColumnLarge = 16, // Values "quickly" grew to over 1000
Chris@1512 64 ColumnSigned = 32, // Some negative values were seen
Chris@1512 65 ColumnNearEmpty = 64, // Nothing in this column beyond first row
Chris@629 66 };
Chris@629 67 typedef unsigned int ColumnQualities;
Chris@392 68
Chris@1515 69 enum AudioSampleRange {
Chris@1515 70 SampleRangeSigned1 = 0, // -1 .. 1
Chris@1515 71 SampleRangeUnsigned255, // 0 .. 255
Chris@1515 72 SampleRangeSigned32767, // -32768 .. 32767
Chris@1515 73 SampleRangeOther // Other/unknown: Normalise on load
Chris@1515 74 };
Chris@1515 75
Chris@392 76 CSVFormat() : // arbitrary defaults
Chris@392 77 m_modelType(TwoDimensionalModel),
Chris@392 78 m_timingType(ExplicitTiming),
Chris@392 79 m_timeUnits(TimeSeconds),
Chris@392 80 m_separator(","),
Chris@392 81 m_sampleRate(44100),
Chris@392 82 m_windowSize(1024),
Chris@629 83 m_columnCount(0),
Chris@629 84 m_variableColumnCount(false),
Chris@1516 85 m_audioSampleRange(SampleRangeOther),
Chris@629 86 m_allowQuoting(true),
Chris@629 87 m_maxExampleCols(0)
Chris@392 88 { }
Chris@629 89
Chris@629 90 CSVFormat(QString path); // guess format
Chris@629 91
Chris@629 92 /**
Chris@629 93 * Guess the format of the given CSV file, setting the fields in
Chris@629 94 * this object accordingly. If the current separator is the empty
Chris@629 95 * string, the separator character will also be guessed; otherwise
Chris@629 96 * the current separator will be used. The other properties of
Chris@629 97 * this object will be set according to guesses from the file.
Chris@1524 98 *
Chris@1524 99 * The properties that are guessed from the file contents are:
Chris@1524 100 * separator, column count, variable-column-count flag, audio
Chris@1524 101 * sample range, timing type, time units, column qualities, column
Chris@1524 102 * purposes, and model type. The sample rate and window size
Chris@1524 103 * cannot be guessed and will not be changed by this function.
Chris@1524 104 * Note also that this function will never guess WaveFileModel for
Chris@1524 105 * the model type.
Chris@1524 106 *
Chris@1524 107 * Return false if there is some fundamental error, e.g. the file
Chris@1524 108 * could not be opened at all. Return true otherwise. Note that
Chris@1524 109 * this function returns true even if the file doesn't appear to
Chris@1524 110 * make much sense as a data format.
Chris@629 111 */
Chris@1524 112 bool guessFormatFor(QString path);
Chris@628 113
Chris@628 114 ModelType getModelType() const { return m_modelType; }
Chris@628 115 TimingType getTimingType() const { return m_timingType; }
Chris@628 116 TimeUnits getTimeUnits() const { return m_timeUnits; }
Chris@1047 117 sv_samplerate_t getSampleRate() const { return m_sampleRate; }
Chris@929 118 int getWindowSize() const { return m_windowSize; }
Chris@630 119 int getColumnCount() const { return m_columnCount; }
Chris@1516 120 AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; }
Chris@631 121 bool getAllowQuoting() const { return m_allowQuoting; }
Chris@631 122 QChar getSeparator() const {
Chris@631 123 if (m_separator == "") return ' ';
Chris@631 124 else return m_separator[0];
Chris@631 125 }
Chris@630 126
Chris@628 127 void setModelType(ModelType t) { m_modelType = t; }
Chris@628 128 void setTimingType(TimingType t) { m_timingType = t; }
Chris@628 129 void setTimeUnits(TimeUnits t) { m_timeUnits = t; }
Chris@631 130 void setSeparator(QChar s) { m_separator = s; }
Chris@1047 131 void setSampleRate(sv_samplerate_t r) { m_sampleRate = r; }
Chris@1009 132 void setWindowSize(int s) { m_windowSize = s; }
Chris@630 133 void setColumnCount(int c) { m_columnCount = c; }
Chris@1516 134 void setAudioSampleRange(AudioSampleRange r) { m_audioSampleRange = r; }
Chris@631 135 void setAllowQuoting(bool q) { m_allowQuoting = q; }
Chris@392 136
Chris@631 137 QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; }
Chris@629 138 void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; }
Chris@631 139
Chris@631 140 ColumnPurpose getColumnPurpose(int i);
Chris@631 141 ColumnPurpose getColumnPurpose(int i) const;
Chris@631 142 void setColumnPurpose(int i, ColumnPurpose p);
Chris@392 143
Chris@629 144 // read-only; only valid if format has been guessed:
Chris@1510 145 const QList<ColumnQualities> &getColumnQualities() const {
Chris@1510 146 return m_columnQualities;
Chris@1510 147 }
Chris@629 148
Chris@629 149 // read-only; only valid if format has been guessed:
Chris@1510 150 const QList<QStringList> &getExample() const {
Chris@1510 151 return m_example;
Chris@1510 152 }
Chris@1510 153
Chris@392 154 int getMaxExampleCols() const { return m_maxExampleCols; }
Chris@1429 155
Chris@392 156 protected:
Chris@628 157 ModelType m_modelType;
Chris@628 158 TimingType m_timingType;
Chris@628 159 TimeUnits m_timeUnits;
Chris@628 160 QString m_separator;
Chris@1047 161 sv_samplerate_t m_sampleRate;
Chris@929 162 int m_windowSize;
Chris@392 163
Chris@629 164 int m_columnCount;
Chris@629 165 bool m_variableColumnCount;
Chris@629 166
Chris@629 167 QList<ColumnQualities> m_columnQualities;
Chris@629 168 QList<ColumnPurpose> m_columnPurposes;
Chris@629 169
Chris@1515 170 AudioSampleRange m_audioSampleRange;
Chris@1515 171
Chris@629 172 QList<float> m_prevValues;
Chris@629 173
Chris@629 174 bool m_allowQuoting;
Chris@392 175
Chris@392 176 QList<QStringList> m_example;
Chris@392 177 int m_maxExampleCols;
Chris@629 178
Chris@629 179 void guessSeparator(QString line);
Chris@629 180 void guessQualities(QString line, int lineno);
Chris@629 181 void guessPurposes();
Chris@1515 182 void guessAudioSampleRange();
Chris@392 183 };
Chris@392 184
Chris@392 185 #endif