annotate data/fileio/CSVFormat.cpp @ 1496:fde8c497373f

Avoid crashing if an effects plugin can't be instantiated and so the output vector is empty in the transformer's run() method
author Chris Cannam
date Mon, 13 Aug 2018 15:25:32 +0100
parents 48e9f538e6e9
children 5f1b2a117a4f
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@392 16 #include "CSVFormat.h"
Chris@392 17
Chris@629 18 #include "base/StringBits.h"
Chris@629 19
Chris@392 20 #include <QFile>
Chris@392 21 #include <QString>
Chris@392 22 #include <QRegExp>
Chris@392 23 #include <QStringList>
Chris@392 24 #include <QTextStream>
Chris@392 25
Chris@392 26 #include <iostream>
Chris@392 27
Chris@1362 28 #include "base/Debug.h"
Chris@1362 29
Chris@629 30 CSVFormat::CSVFormat(QString path) :
Chris@629 31 m_separator(""),
Chris@392 32 m_sampleRate(44100),
Chris@392 33 m_windowSize(1024),
Chris@629 34 m_allowQuoting(true)
Chris@392 35 {
Chris@629 36 guessFormatFor(path);
Chris@629 37 }
Chris@629 38
Chris@629 39 void
Chris@629 40 CSVFormat::guessFormatFor(QString path)
Chris@629 41 {
Chris@629 42 m_modelType = TwoDimensionalModel;
Chris@629 43 m_timingType = ExplicitTiming;
Chris@629 44 m_timeUnits = TimeSeconds;
Chris@629 45
Chris@629 46 m_maxExampleCols = 0;
Chris@629 47 m_columnCount = 0;
Chris@629 48 m_variableColumnCount = false;
Chris@629 49
Chris@629 50 m_example.clear();
Chris@629 51 m_columnQualities.clear();
Chris@629 52 m_columnPurposes.clear();
Chris@629 53 m_prevValues.clear();
Chris@629 54
Chris@629 55 QFile file(path);
Chris@392 56 if (!file.exists()) return;
Chris@392 57 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
Chris@392 58
Chris@392 59 QTextStream in(&file);
Chris@392 60 in.seek(0);
Chris@392 61
Chris@629 62 int lineno = 0;
Chris@392 63
Chris@392 64 while (!in.atEnd()) {
Chris@392 65
Chris@392 66 // See comment about line endings in CSVFileReader::load()
Chris@392 67
Chris@392 68 QString chunk = in.readLine();
Chris@392 69 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@392 70
Chris@897 71 for (int li = 0; li < lines.size(); ++li) {
Chris@392 72
Chris@392 73 QString line = lines[li];
Chris@629 74 if (line.startsWith("#") || line == "") continue;
Chris@392 75
Chris@629 76 guessQualities(line, lineno);
Chris@392 77
Chris@840 78 ++lineno;
Chris@629 79 }
Chris@840 80
Chris@840 81 if (lineno >= 50) break;
Chris@629 82 }
Chris@392 83
Chris@629 84 guessPurposes();
Chris@629 85 }
Chris@629 86
Chris@629 87 void
Chris@629 88 CSVFormat::guessSeparator(QString line)
Chris@629 89 {
Chris@629 90 char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
Chris@897 91 for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) {
Chris@629 92 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
Chris@629 93 m_separator = candidates[i];
Chris@629 94 return;
Chris@629 95 }
Chris@629 96 }
Chris@629 97 }
Chris@629 98
Chris@629 99 void
Chris@629 100 CSVFormat::guessQualities(QString line, int lineno)
Chris@629 101 {
Chris@629 102 if (m_separator == "") guessSeparator(line);
Chris@629 103
Chris@1362 104 QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
Chris@629 105
Chris@629 106 int cols = list.size();
Chris@991 107 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
Chris@629 108 if (cols != m_columnCount) m_variableColumnCount = true;
Chris@629 109
Chris@629 110 // All columns are regarded as having these qualities until we see
Chris@629 111 // something that indicates otherwise:
Chris@629 112
Chris@629 113 ColumnQualities defaultQualities =
Chris@1021 114 ColumnNumeric | ColumnIntegral | ColumnIncreasing | ColumnNearEmpty;
Chris@629 115
Chris@629 116 for (int i = 0; i < cols; ++i) {
Chris@1429 117
Chris@629 118 while (m_columnQualities.size() <= i) {
Chris@629 119 m_columnQualities.push_back(defaultQualities);
Chris@629 120 m_prevValues.push_back(0.f);
Chris@629 121 }
Chris@629 122
Chris@629 123 QString s(list[i]);
Chris@629 124 bool ok = false;
Chris@629 125
Chris@629 126 ColumnQualities qualities = m_columnQualities[i];
Chris@629 127
Chris@629 128 bool numeric = (qualities & ColumnNumeric);
Chris@629 129 bool integral = (qualities & ColumnIntegral);
Chris@629 130 bool increasing = (qualities & ColumnIncreasing);
Chris@629 131 bool large = (qualities & ColumnLarge); // this one defaults to off
Chris@1021 132 bool emptyish = (qualities & ColumnNearEmpty);
Chris@629 133
Chris@1021 134 if (lineno > 1 && s.trimmed() != "") {
Chris@1021 135 emptyish = false;
Chris@1021 136 }
Chris@1021 137
Chris@629 138 float value = 0.f;
Chris@629 139
Chris@629 140 //!!! how to take into account headers?
Chris@629 141
Chris@629 142 if (numeric) {
Chris@629 143 value = s.toFloat(&ok);
Chris@629 144 if (!ok) {
Chris@629 145 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
Chris@629 146 }
Chris@629 147 if (ok) {
Chris@629 148 if (lineno < 2 && value > 1000.f) large = true;
Chris@629 149 } else {
Chris@629 150 numeric = false;
Chris@629 151 }
Chris@629 152 }
Chris@629 153
Chris@629 154 if (numeric) {
Chris@629 155
Chris@629 156 if (integral) {
Chris@629 157 if (s.contains('.') || s.contains(',')) {
Chris@629 158 integral = false;
Chris@392 159 }
Chris@392 160 }
Chris@392 161
Chris@629 162 if (increasing) {
Chris@629 163 if (lineno > 0 && value <= m_prevValues[i]) {
Chris@629 164 increasing = false;
Chris@392 165 }
Chris@392 166 }
Chris@392 167
Chris@629 168 m_prevValues[i] = value;
Chris@629 169 }
Chris@392 170
Chris@629 171 m_columnQualities[i] =
Chris@629 172 (numeric ? ColumnNumeric : 0) |
Chris@629 173 (integral ? ColumnIntegral : 0) |
Chris@629 174 (increasing ? ColumnIncreasing : 0) |
Chris@1021 175 (large ? ColumnLarge : 0) |
Chris@1021 176 (emptyish ? ColumnNearEmpty : 0);
Chris@629 177 }
Chris@392 178
Chris@629 179 if (lineno < 10) {
Chris@629 180 m_example.push_back(list);
Chris@629 181 if (lineno == 0 || cols > m_maxExampleCols) {
Chris@629 182 m_maxExampleCols = cols;
Chris@392 183 }
Chris@392 184 }
Chris@392 185
Chris@1362 186 if (lineno < 10) {
Chris@1362 187 SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): ";
Chris@1362 188 for (int i = 0; i < m_columnCount; ++i) {
Chris@1362 189 SVDEBUG << int(m_columnQualities[i]) << " ";
Chris@1362 190 }
Chris@1362 191 SVDEBUG << endl;
Chris@1362 192 }
Chris@629 193 }
Chris@629 194
Chris@629 195 void
Chris@629 196 CSVFormat::guessPurposes()
Chris@629 197 {
Chris@629 198 m_timingType = CSVFormat::ImplicitTiming;
Chris@629 199 m_timeUnits = CSVFormat::TimeWindows;
Chris@1429 200
Chris@629 201 int timingColumnCount = 0;
Chris@1021 202
Chris@1021 203 // if our first column has zero or one entries in it and the rest
Chris@1021 204 // have more, then we'll default to ignoring the first column and
Chris@1021 205 // counting the next one as primary. (e.g. Sonic Annotator output
Chris@1021 206 // with filename at start of first column.)
Chris@1021 207
Chris@1021 208 int primaryColumnNo = 0;
Chris@1021 209
Chris@1021 210 if (m_columnCount >= 2) {
Chris@1021 211 if ( (m_columnQualities[0] & ColumnNearEmpty) &&
Chris@1021 212 !(m_columnQualities[1] & ColumnNearEmpty)) {
Chris@1021 213 primaryColumnNo = 1;
Chris@1021 214 }
Chris@1021 215 }
Chris@629 216
Chris@629 217 for (int i = 0; i < m_columnCount; ++i) {
Chris@629 218
Chris@629 219 ColumnPurpose purpose = ColumnUnknown;
Chris@1021 220
Chris@1021 221 if (i < primaryColumnNo) {
Chris@1021 222 setColumnPurpose(i, purpose);
Chris@1021 223 continue;
Chris@1021 224 }
Chris@1021 225
Chris@1021 226 bool primary = (i == primaryColumnNo);
Chris@392 227
Chris@629 228 ColumnQualities qualities = m_columnQualities[i];
Chris@392 229
Chris@629 230 bool numeric = (qualities & ColumnNumeric);
Chris@629 231 bool integral = (qualities & ColumnIntegral);
Chris@629 232 bool increasing = (qualities & ColumnIncreasing);
Chris@629 233 bool large = (qualities & ColumnLarge);
Chris@629 234
Chris@629 235 bool timingColumn = (numeric && increasing);
Chris@629 236
Chris@629 237 if (timingColumn) {
Chris@629 238
Chris@629 239 ++timingColumnCount;
Chris@629 240
Chris@629 241 if (primary) {
Chris@629 242
Chris@629 243 purpose = ColumnStartTime;
Chris@629 244
Chris@629 245 m_timingType = ExplicitTiming;
Chris@629 246
Chris@629 247 if (integral && large) {
Chris@629 248 m_timeUnits = TimeAudioFrames;
Chris@629 249 } else {
Chris@629 250 m_timeUnits = TimeSeconds;
Chris@629 251 }
Chris@629 252
Chris@629 253 } else {
Chris@629 254
Chris@629 255 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
Chris@629 256 purpose = ColumnEndTime;
Chris@629 257 }
Chris@629 258 }
Chris@629 259 }
Chris@629 260
Chris@629 261 if (purpose == ColumnUnknown) {
Chris@629 262 if (numeric) {
Chris@629 263 purpose = ColumnValue;
Chris@629 264 } else {
Chris@629 265 purpose = ColumnLabel;
Chris@629 266 }
Chris@629 267 }
Chris@629 268
Chris@631 269 setColumnPurpose(i, purpose);
Chris@629 270 }
Chris@629 271
Chris@629 272 int valueCount = 0;
Chris@629 273 for (int i = 0; i < m_columnCount; ++i) {
Chris@629 274 if (m_columnPurposes[i] == ColumnValue) ++valueCount;
Chris@629 275 }
Chris@629 276
Chris@630 277 if (valueCount == 2 && timingColumnCount == 1) {
Chris@630 278 // If we have exactly two apparent value columns and only one
Chris@630 279 // timing column, but one value column is integral and the
Chris@630 280 // other is not, guess that whichever one matches the integral
Chris@630 281 // status of the time column is either duration or end time
Chris@630 282 if (m_timingType == ExplicitTiming) {
Chris@630 283 int a = -1, b = -1;
Chris@630 284 for (int i = 0; i < m_columnCount; ++i) {
Chris@630 285 if (m_columnPurposes[i] == ColumnValue) {
Chris@630 286 if (a == -1) a = i;
Chris@630 287 else b = i;
Chris@630 288 }
Chris@630 289 }
Chris@630 290 if ((m_columnQualities[a] & ColumnIntegral) !=
Chris@630 291 (m_columnQualities[b] & ColumnIntegral)) {
Chris@630 292 int timecol = a;
Chris@630 293 if ((m_columnQualities[a] & ColumnIntegral) !=
Chris@630 294 (m_columnQualities[0] & ColumnIntegral)) {
Chris@630 295 timecol = b;
Chris@630 296 }
Chris@630 297 if (m_columnQualities[timecol] & ColumnIncreasing) {
Chris@630 298 // This shouldn't happen; should have been settled above
Chris@630 299 m_columnPurposes[timecol] = ColumnEndTime;
Chris@630 300 } else {
Chris@630 301 m_columnPurposes[timecol] = ColumnDuration;
Chris@630 302 }
Chris@630 303 --valueCount;
Chris@630 304 }
Chris@630 305 }
Chris@630 306 }
Chris@630 307
Chris@631 308 if (timingColumnCount > 1) {
Chris@631 309 m_modelType = TwoDimensionalModelWithDuration;
Chris@392 310 } else {
Chris@631 311 if (valueCount == 0) {
Chris@631 312 m_modelType = OneDimensionalModel;
Chris@631 313 } else if (valueCount == 1) {
Chris@631 314 m_modelType = TwoDimensionalModel;
Chris@631 315 } else {
Chris@631 316 m_modelType = ThreeDimensionalModel;
Chris@631 317 }
Chris@629 318 }
Chris@392 319
Chris@1362 320 SVDEBUG << "Estimated column purposes: ";
Chris@1362 321 for (int i = 0; i < m_columnCount; ++i) {
Chris@1362 322 SVDEBUG << int(m_columnPurposes[i]) << " ";
Chris@1362 323 }
Chris@1362 324 SVDEBUG << endl;
Chris@392 325
Chris@1362 326 SVDEBUG << "Estimated model type: " << m_modelType << endl;
Chris@1362 327 SVDEBUG << "Estimated timing type: " << m_timingType << endl;
Chris@1362 328 SVDEBUG << "Estimated units: " << m_timeUnits << endl;
Chris@392 329 }
Chris@392 330
Chris@631 331 CSVFormat::ColumnPurpose
Chris@631 332 CSVFormat::getColumnPurpose(int i)
Chris@631 333 {
Chris@631 334 while (m_columnPurposes.size() <= i) {
Chris@631 335 m_columnPurposes.push_back(ColumnUnknown);
Chris@631 336 }
Chris@631 337 return m_columnPurposes[i];
Chris@631 338 }
Chris@629 339
Chris@631 340 CSVFormat::ColumnPurpose
Chris@631 341 CSVFormat::getColumnPurpose(int i) const
Chris@631 342 {
Chris@668 343 if (m_columnPurposes.size() <= i) {
Chris@668 344 return ColumnUnknown;
Chris@668 345 }
Chris@631 346 return m_columnPurposes[i];
Chris@631 347 }
Chris@631 348
Chris@631 349 void
Chris@631 350 CSVFormat::setColumnPurpose(int i, ColumnPurpose p)
Chris@631 351 {
Chris@631 352 while (m_columnPurposes.size() <= i) {
Chris@631 353 m_columnPurposes.push_back(ColumnUnknown);
Chris@631 354 }
Chris@631 355 m_columnPurposes[i] = p;
Chris@631 356 }
Chris@631 357
Chris@631 358
Chris@631 359
Chris@631 360