annotate data/fileio/CSVFormat.h @ 1773:fadd9f8aaa27

This output is too annoying, in the perfectly innocuous case of reading from an aggregate model whose components are different lengths
author Chris Cannam
date Wed, 14 Aug 2019 13:54:23 +0100
parents 9570ef94eaa3
children f0ffc88a36b3
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@1362 16 #ifndef SV_CSV_FORMAT_H
Chris@1362 17 #define SV_CSV_FORMAT_H
Chris@392 18
Chris@392 19 #include <QString>
Chris@392 20 #include <QStringList>
Chris@392 21
Chris@1585 22 #include <set>
Chris@1585 23
Chris@1047 24 #include "base/BaseTypes.h"
Chris@1047 25
Chris@392 26 class CSVFormat
Chris@392 27 {
Chris@392 28 public:
Chris@392 29 enum ModelType {
Chris@1429 30 OneDimensionalModel,
Chris@1429 31 TwoDimensionalModel,
Chris@628 32 TwoDimensionalModelWithDuration,
Chris@897 33 TwoDimensionalModelWithDurationAndPitch,
Chris@1488 34 ThreeDimensionalModel,
Chris@1488 35 WaveFileModel
Chris@392 36 };
Chris@392 37
Chris@392 38 enum TimingType {
Chris@1429 39 ExplicitTiming,
Chris@1429 40 ImplicitTiming
Chris@392 41 };
Chris@628 42
Chris@392 43 enum TimeUnits {
Chris@1429 44 TimeSeconds,
Chris@990 45 TimeMilliseconds,
Chris@1429 46 TimeAudioFrames,
Chris@1429 47 TimeWindows,
Chris@392 48 };
Chris@392 49
Chris@629 50 enum ColumnPurpose {
Chris@629 51 ColumnUnknown,
Chris@629 52 ColumnStartTime,
Chris@629 53 ColumnEndTime,
Chris@629 54 ColumnDuration,
Chris@629 55 ColumnValue,
Chris@897 56 ColumnPitch,
Chris@629 57 ColumnLabel
Chris@629 58 };
Chris@629 59
Chris@629 60 enum ColumnQuality {
Chris@1512 61 ColumnNumeric = 1, // No non-numeric values were seen in sample
Chris@1512 62 ColumnIntegral = 2, // All sampled values were integers
Chris@1512 63 ColumnIncreasing = 4, // Sampled values were monotonically increasing
Chris@1512 64 ColumnSmall = 8, // All sampled values had magnitude < 1
Chris@1512 65 ColumnLarge = 16, // Values "quickly" grew to over 1000
Chris@1512 66 ColumnSigned = 32, // Some negative values were seen
Chris@1512 67 ColumnNearEmpty = 64, // Nothing in this column beyond first row
Chris@629 68 };
Chris@629 69 typedef unsigned int ColumnQualities;
Chris@392 70
Chris@1515 71 enum AudioSampleRange {
Chris@1515 72 SampleRangeSigned1 = 0, // -1 .. 1
Chris@1515 73 SampleRangeUnsigned255, // 0 .. 255
Chris@1515 74 SampleRangeSigned32767, // -32768 .. 32767
Chris@1515 75 SampleRangeOther // Other/unknown: Normalise on load
Chris@1515 76 };
Chris@1515 77
Chris@392 78 CSVFormat() : // arbitrary defaults
Chris@392 79 m_modelType(TwoDimensionalModel),
Chris@392 80 m_timingType(ExplicitTiming),
Chris@392 81 m_timeUnits(TimeSeconds),
Chris@1585 82 m_separator(""),
Chris@392 83 m_sampleRate(44100),
Chris@392 84 m_windowSize(1024),
Chris@629 85 m_columnCount(0),
Chris@629 86 m_variableColumnCount(false),
Chris@1516 87 m_audioSampleRange(SampleRangeOther),
Chris@629 88 m_allowQuoting(true),
Chris@629 89 m_maxExampleCols(0)
Chris@392 90 { }
Chris@629 91
Chris@629 92 CSVFormat(QString path); // guess format
Chris@629 93
Chris@629 94 /**
Chris@629 95 * Guess the format of the given CSV file, setting the fields in
Chris@629 96 * this object accordingly. If the current separator is the empty
Chris@629 97 * string, the separator character will also be guessed; otherwise
Chris@629 98 * the current separator will be used. The other properties of
Chris@629 99 * this object will be set according to guesses from the file.
Chris@1524 100 *
Chris@1524 101 * The properties that are guessed from the file contents are:
Chris@1524 102 * separator, column count, variable-column-count flag, audio
Chris@1524 103 * sample range, timing type, time units, column qualities, column
Chris@1524 104 * purposes, and model type. The sample rate and window size
Chris@1524 105 * cannot be guessed and will not be changed by this function.
Chris@1524 106 * Note also that this function will never guess WaveFileModel for
Chris@1524 107 * the model type.
Chris@1524 108 *
Chris@1524 109 * Return false if there is some fundamental error, e.g. the file
Chris@1524 110 * could not be opened at all. Return true otherwise. Note that
Chris@1524 111 * this function returns true even if the file doesn't appear to
Chris@1524 112 * make much sense as a data format.
Chris@629 113 */
Chris@1524 114 bool guessFormatFor(QString path);
Chris@628 115
Chris@628 116 ModelType getModelType() const { return m_modelType; }
Chris@628 117 TimingType getTimingType() const { return m_timingType; }
Chris@628 118 TimeUnits getTimeUnits() const { return m_timeUnits; }
Chris@1047 119 sv_samplerate_t getSampleRate() const { return m_sampleRate; }
Chris@929 120 int getWindowSize() const { return m_windowSize; }
Chris@630 121 int getColumnCount() const { return m_columnCount; }
Chris@1516 122 AudioSampleRange getAudioSampleRange() const { return m_audioSampleRange; }
Chris@631 123 bool getAllowQuoting() const { return m_allowQuoting; }
Chris@631 124 QChar getSeparator() const {
Chris@1585 125 if (m_separator == "") return ',';
Chris@631 126 else return m_separator[0];
Chris@631 127 }
Chris@1585 128 // set rather than QSet to ensure a fixed order
Chris@1585 129 std::set<QChar> getPlausibleSeparators() const {
Chris@1585 130 return m_plausibleSeparators;
Chris@1585 131 }
Chris@630 132
Chris@628 133 void setModelType(ModelType t) { m_modelType = t; }
Chris@628 134 void setTimingType(TimingType t) { m_timingType = t; }
Chris@628 135 void setTimeUnits(TimeUnits t) { m_timeUnits = t; }
Chris@631 136 void setSeparator(QChar s) { m_separator = s; }
Chris@1047 137 void setSampleRate(sv_samplerate_t r) { m_sampleRate = r; }
Chris@1009 138 void setWindowSize(int s) { m_windowSize = s; }
Chris@630 139 void setColumnCount(int c) { m_columnCount = c; }
Chris@1516 140 void setAudioSampleRange(AudioSampleRange r) { m_audioSampleRange = r; }
Chris@631 141 void setAllowQuoting(bool q) { m_allowQuoting = q; }
Chris@392 142
Chris@631 143 QList<ColumnPurpose> getColumnPurposes() const { return m_columnPurposes; }
Chris@629 144 void setColumnPurposes(QList<ColumnPurpose> cl) { m_columnPurposes = cl; }
Chris@631 145
Chris@631 146 ColumnPurpose getColumnPurpose(int i);
Chris@631 147 ColumnPurpose getColumnPurpose(int i) const;
Chris@631 148 void setColumnPurpose(int i, ColumnPurpose p);
Chris@392 149
Chris@629 150 // read-only; only valid if format has been guessed:
Chris@1510 151 const QList<ColumnQualities> &getColumnQualities() const {
Chris@1510 152 return m_columnQualities;
Chris@1510 153 }
Chris@629 154
Chris@629 155 // read-only; only valid if format has been guessed:
Chris@1510 156 const QList<QStringList> &getExample() const {
Chris@1510 157 return m_example;
Chris@1510 158 }
Chris@1510 159
Chris@392 160 int getMaxExampleCols() const { return m_maxExampleCols; }
Chris@1429 161
Chris@392 162 protected:
Chris@628 163 ModelType m_modelType;
Chris@628 164 TimingType m_timingType;
Chris@628 165 TimeUnits m_timeUnits;
Chris@1585 166 QString m_separator; // "" or a single char - basically QChar option
Chris@1585 167 std::set<QChar> m_plausibleSeparators;
Chris@1047 168 sv_samplerate_t m_sampleRate;
Chris@929 169 int m_windowSize;
Chris@392 170
Chris@629 171 int m_columnCount;
Chris@629 172 bool m_variableColumnCount;
Chris@629 173
Chris@629 174 QList<ColumnQualities> m_columnQualities;
Chris@629 175 QList<ColumnPurpose> m_columnPurposes;
Chris@629 176
Chris@1515 177 AudioSampleRange m_audioSampleRange;
Chris@1515 178
Chris@629 179 QList<float> m_prevValues;
Chris@629 180
Chris@629 181 bool m_allowQuoting;
Chris@392 182
Chris@392 183 QList<QStringList> m_example;
Chris@392 184 int m_maxExampleCols;
Chris@629 185
Chris@629 186 void guessSeparator(QString line);
Chris@629 187 void guessQualities(QString line, int lineno);
Chris@629 188 void guessPurposes();
Chris@1515 189 void guessAudioSampleRange();
Chris@392 190 };
Chris@392 191
Chris@392 192 #endif