annotate data/fileio/CSVFormat.cpp @ 1552:05c3fbaec8ea

Introduce RelativelyFineZoomConstraint, which encodes more-or-less the scheme that was already used for the horizontal thumbwheel in the pane (which overrode the layers' own zoom constraints unless they said they couldn't support any other)
author Chris Cannam
date Wed, 10 Oct 2018 14:32:34 +0100
parents a92e94215863
children 9570ef94eaa3
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@392 16 #include "CSVFormat.h"
Chris@392 17
Chris@629 18 #include "base/StringBits.h"
Chris@629 19
Chris@392 20 #include <QFile>
Chris@392 21 #include <QString>
Chris@392 22 #include <QRegExp>
Chris@392 23 #include <QStringList>
Chris@392 24 #include <QTextStream>
Chris@392 25
Chris@392 26 #include <iostream>
Chris@392 27
Chris@1362 28 #include "base/Debug.h"
Chris@1362 29
Chris@629 30 CSVFormat::CSVFormat(QString path) :
Chris@629 31 m_separator(""),
Chris@392 32 m_sampleRate(44100),
Chris@392 33 m_windowSize(1024),
Chris@629 34 m_allowQuoting(true)
Chris@392 35 {
Chris@1524 36 (void)guessFormatFor(path);
Chris@629 37 }
Chris@629 38
Chris@1524 39 bool
Chris@629 40 CSVFormat::guessFormatFor(QString path)
Chris@629 41 {
Chris@1524 42 m_separator = ""; // to prompt guessing for it
Chris@1524 43
Chris@629 44 m_modelType = TwoDimensionalModel;
Chris@629 45 m_timingType = ExplicitTiming;
Chris@629 46 m_timeUnits = TimeSeconds;
Chris@629 47
Chris@629 48 m_maxExampleCols = 0;
Chris@629 49 m_columnCount = 0;
Chris@629 50 m_variableColumnCount = false;
Chris@629 51
Chris@629 52 m_example.clear();
Chris@629 53 m_columnQualities.clear();
Chris@629 54 m_columnPurposes.clear();
Chris@629 55 m_prevValues.clear();
Chris@629 56
Chris@629 57 QFile file(path);
Chris@1524 58 if (!file.exists()) {
Chris@1524 59 SVCERR << "CSVFormat::guessFormatFor(" << path
Chris@1524 60 << "): File does not exist" << endl;
Chris@1524 61 return false;
Chris@1524 62 }
Chris@1524 63 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
Chris@1524 64 SVCERR << "CSVFormat::guessFormatFor(" << path
Chris@1524 65 << "): File could not be opened for reading" << endl;
Chris@1524 66 return false;
Chris@1524 67 }
Chris@1524 68 SVDEBUG << "CSVFormat::guessFormatFor(" << path << ")" << endl;
Chris@392 69
Chris@392 70 QTextStream in(&file);
Chris@392 71 in.seek(0);
Chris@392 72
Chris@629 73 int lineno = 0;
Chris@392 74
Chris@392 75 while (!in.atEnd()) {
Chris@392 76
Chris@392 77 // See comment about line endings in CSVFileReader::load()
Chris@392 78
Chris@392 79 QString chunk = in.readLine();
Chris@392 80 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@392 81
Chris@897 82 for (int li = 0; li < lines.size(); ++li) {
Chris@392 83
Chris@392 84 QString line = lines[li];
Chris@1512 85 if (line.startsWith("#") || line == "") {
Chris@1512 86 continue;
Chris@1512 87 }
Chris@392 88
Chris@629 89 guessQualities(line, lineno);
Chris@392 90
Chris@840 91 ++lineno;
Chris@629 92 }
Chris@840 93
Chris@1512 94 if (lineno >= 150) break;
Chris@629 95 }
Chris@392 96
Chris@629 97 guessPurposes();
Chris@1515 98 guessAudioSampleRange();
Chris@1524 99
Chris@1524 100 return true;
Chris@629 101 }
Chris@629 102
Chris@629 103 void
Chris@629 104 CSVFormat::guessSeparator(QString line)
Chris@629 105 {
Chris@1524 106 QString candidates = "\t|,/: ";
Chris@1524 107
Chris@1524 108 for (int i = 0; i < candidates.length(); ++i) {
Chris@1524 109 auto bits = StringBits::split(line, candidates[i], m_allowQuoting);
Chris@1524 110 if (bits.size() >= 2) {
Chris@1524 111 SVDEBUG << "Successfully split the line into:" << endl;
Chris@1524 112 for (auto b: bits) {
Chris@1524 113 SVDEBUG << b << endl;
Chris@1524 114 }
Chris@629 115 m_separator = candidates[i];
Chris@1510 116 SVDEBUG << "Estimated column separator: '" << m_separator
Chris@1510 117 << "'" << endl;
Chris@629 118 return;
Chris@629 119 }
Chris@629 120 }
Chris@629 121 }
Chris@629 122
Chris@629 123 void
Chris@629 124 CSVFormat::guessQualities(QString line, int lineno)
Chris@629 125 {
Chris@1524 126 if (m_separator == "") {
Chris@1524 127 guessSeparator(line);
Chris@1524 128 }
Chris@629 129
Chris@1362 130 QStringList list = StringBits::split(line, getSeparator(), m_allowQuoting);
Chris@629 131
Chris@629 132 int cols = list.size();
Chris@991 133 if (lineno == 0 || (cols > m_columnCount)) m_columnCount = cols;
Chris@629 134 if (cols != m_columnCount) m_variableColumnCount = true;
Chris@629 135
Chris@629 136 // All columns are regarded as having these qualities until we see
Chris@629 137 // something that indicates otherwise:
Chris@629 138
Chris@629 139 ColumnQualities defaultQualities =
Chris@1512 140 ColumnNumeric | ColumnIntegral | ColumnSmall |
Chris@1512 141 ColumnIncreasing | ColumnNearEmpty;
Chris@629 142
Chris@629 143 for (int i = 0; i < cols; ++i) {
Chris@1429 144
Chris@629 145 while (m_columnQualities.size() <= i) {
Chris@629 146 m_columnQualities.push_back(defaultQualities);
Chris@629 147 m_prevValues.push_back(0.f);
Chris@629 148 }
Chris@629 149
Chris@629 150 QString s(list[i]);
Chris@629 151 bool ok = false;
Chris@629 152
Chris@629 153 ColumnQualities qualities = m_columnQualities[i];
Chris@629 154
Chris@1523 155 // Looks like this is defined on Windows
Chris@1523 156 #undef small
Chris@1523 157
Chris@629 158 bool numeric = (qualities & ColumnNumeric);
Chris@629 159 bool integral = (qualities & ColumnIntegral);
Chris@629 160 bool increasing = (qualities & ColumnIncreasing);
Chris@1512 161 bool small = (qualities & ColumnSmall);
Chris@629 162 bool large = (qualities & ColumnLarge); // this one defaults to off
Chris@1512 163 bool signd = (qualities & ColumnSigned); // also defaults to off
Chris@1021 164 bool emptyish = (qualities & ColumnNearEmpty);
Chris@629 165
Chris@1021 166 if (lineno > 1 && s.trimmed() != "") {
Chris@1021 167 emptyish = false;
Chris@1021 168 }
Chris@1021 169
Chris@629 170 float value = 0.f;
Chris@629 171
Chris@629 172 //!!! how to take into account headers?
Chris@629 173
Chris@629 174 if (numeric) {
Chris@629 175 value = s.toFloat(&ok);
Chris@629 176 if (!ok) {
Chris@629 177 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
Chris@629 178 }
Chris@629 179 if (ok) {
Chris@1512 180 if (lineno < 2 && value > 1000.f) {
Chris@1512 181 large = true;
Chris@1512 182 }
Chris@1512 183 if (value < 0.f) {
Chris@1512 184 signd = true;
Chris@1512 185 }
Chris@1512 186 if (value < -1.f || value > 1.f) {
Chris@1512 187 small = false;
Chris@1512 188 }
Chris@629 189 } else {
Chris@629 190 numeric = false;
Chris@1524 191
Chris@1524 192 // If the column is not numeric, it can't be any of
Chris@1524 193 // these things either
Chris@1524 194 integral = false;
Chris@1524 195 increasing = false;
Chris@1524 196 small = false;
Chris@1524 197 large = false;
Chris@1524 198 signd = false;
Chris@629 199 }
Chris@629 200 }
Chris@629 201
Chris@629 202 if (numeric) {
Chris@629 203
Chris@629 204 if (integral) {
Chris@629 205 if (s.contains('.') || s.contains(',')) {
Chris@629 206 integral = false;
Chris@392 207 }
Chris@392 208 }
Chris@392 209
Chris@629 210 if (increasing) {
Chris@629 211 if (lineno > 0 && value <= m_prevValues[i]) {
Chris@629 212 increasing = false;
Chris@392 213 }
Chris@392 214 }
Chris@392 215
Chris@629 216 m_prevValues[i] = value;
Chris@629 217 }
Chris@1524 218
Chris@629 219 m_columnQualities[i] =
Chris@629 220 (numeric ? ColumnNumeric : 0) |
Chris@629 221 (integral ? ColumnIntegral : 0) |
Chris@629 222 (increasing ? ColumnIncreasing : 0) |
Chris@1512 223 (small ? ColumnSmall : 0) |
Chris@1021 224 (large ? ColumnLarge : 0) |
Chris@1512 225 (signd ? ColumnSigned : 0) |
Chris@1021 226 (emptyish ? ColumnNearEmpty : 0);
Chris@629 227 }
Chris@392 228
Chris@629 229 if (lineno < 10) {
Chris@629 230 m_example.push_back(list);
Chris@629 231 if (lineno == 0 || cols > m_maxExampleCols) {
Chris@629 232 m_maxExampleCols = cols;
Chris@392 233 }
Chris@392 234 }
Chris@392 235
Chris@1362 236 if (lineno < 10) {
Chris@1362 237 SVDEBUG << "Estimated column qualities for line " << lineno << " (reporting up to first 10): ";
Chris@1362 238 for (int i = 0; i < m_columnCount; ++i) {
Chris@1362 239 SVDEBUG << int(m_columnQualities[i]) << " ";
Chris@1362 240 }
Chris@1362 241 SVDEBUG << endl;
Chris@1362 242 }
Chris@629 243 }
Chris@629 244
Chris@629 245 void
Chris@629 246 CSVFormat::guessPurposes()
Chris@629 247 {
Chris@629 248 m_timingType = CSVFormat::ImplicitTiming;
Chris@629 249 m_timeUnits = CSVFormat::TimeWindows;
Chris@1429 250
Chris@629 251 int timingColumnCount = 0;
Chris@1525 252 bool haveDurationOrEndTime = false;
Chris@1021 253
Chris@1510 254 SVDEBUG << "Estimated column qualities overall: ";
Chris@1510 255 for (int i = 0; i < m_columnCount; ++i) {
Chris@1510 256 SVDEBUG << int(m_columnQualities[i]) << " ";
Chris@1510 257 }
Chris@1510 258 SVDEBUG << endl;
Chris@1510 259
Chris@1021 260 // if our first column has zero or one entries in it and the rest
Chris@1021 261 // have more, then we'll default to ignoring the first column and
Chris@1021 262 // counting the next one as primary. (e.g. Sonic Annotator output
Chris@1021 263 // with filename at start of first column.)
Chris@1021 264
Chris@1021 265 int primaryColumnNo = 0;
Chris@1021 266
Chris@1021 267 if (m_columnCount >= 2) {
Chris@1021 268 if ( (m_columnQualities[0] & ColumnNearEmpty) &&
Chris@1021 269 !(m_columnQualities[1] & ColumnNearEmpty)) {
Chris@1021 270 primaryColumnNo = 1;
Chris@1021 271 }
Chris@1021 272 }
Chris@629 273
Chris@629 274 for (int i = 0; i < m_columnCount; ++i) {
Chris@629 275
Chris@629 276 ColumnPurpose purpose = ColumnUnknown;
Chris@1021 277
Chris@1021 278 if (i < primaryColumnNo) {
Chris@1021 279 setColumnPurpose(i, purpose);
Chris@1021 280 continue;
Chris@1021 281 }
Chris@1021 282
Chris@1021 283 bool primary = (i == primaryColumnNo);
Chris@392 284
Chris@629 285 ColumnQualities qualities = m_columnQualities[i];
Chris@392 286
Chris@629 287 bool numeric = (qualities & ColumnNumeric);
Chris@629 288 bool integral = (qualities & ColumnIntegral);
Chris@629 289 bool increasing = (qualities & ColumnIncreasing);
Chris@629 290 bool large = (qualities & ColumnLarge);
Chris@629 291
Chris@629 292 bool timingColumn = (numeric && increasing);
Chris@629 293
Chris@629 294 if (timingColumn) {
Chris@629 295
Chris@629 296 ++timingColumnCount;
Chris@629 297
Chris@629 298 if (primary) {
Chris@629 299
Chris@629 300 purpose = ColumnStartTime;
Chris@629 301
Chris@629 302 m_timingType = ExplicitTiming;
Chris@629 303
Chris@629 304 if (integral && large) {
Chris@629 305 m_timeUnits = TimeAudioFrames;
Chris@629 306 } else {
Chris@629 307 m_timeUnits = TimeSeconds;
Chris@629 308 }
Chris@629 309
Chris@629 310 } else {
Chris@629 311
Chris@629 312 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
Chris@629 313 purpose = ColumnEndTime;
Chris@1525 314 haveDurationOrEndTime = true;
Chris@629 315 }
Chris@629 316 }
Chris@629 317 }
Chris@629 318
Chris@629 319 if (purpose == ColumnUnknown) {
Chris@629 320 if (numeric) {
Chris@629 321 purpose = ColumnValue;
Chris@629 322 } else {
Chris@629 323 purpose = ColumnLabel;
Chris@629 324 }
Chris@629 325 }
Chris@629 326
Chris@631 327 setColumnPurpose(i, purpose);
Chris@629 328 }
Chris@629 329
Chris@629 330 int valueCount = 0;
Chris@629 331 for (int i = 0; i < m_columnCount; ++i) {
Chris@629 332 if (m_columnPurposes[i] == ColumnValue) ++valueCount;
Chris@629 333 }
Chris@629 334
Chris@630 335 if (valueCount == 2 && timingColumnCount == 1) {
Chris@630 336 // If we have exactly two apparent value columns and only one
Chris@630 337 // timing column, but one value column is integral and the
Chris@630 338 // other is not, guess that whichever one matches the integral
Chris@630 339 // status of the time column is either duration or end time
Chris@630 340 if (m_timingType == ExplicitTiming) {
Chris@630 341 int a = -1, b = -1;
Chris@630 342 for (int i = 0; i < m_columnCount; ++i) {
Chris@630 343 if (m_columnPurposes[i] == ColumnValue) {
Chris@630 344 if (a == -1) a = i;
Chris@630 345 else b = i;
Chris@630 346 }
Chris@630 347 }
Chris@630 348 if ((m_columnQualities[a] & ColumnIntegral) !=
Chris@630 349 (m_columnQualities[b] & ColumnIntegral)) {
Chris@630 350 int timecol = a;
Chris@630 351 if ((m_columnQualities[a] & ColumnIntegral) !=
Chris@630 352 (m_columnQualities[0] & ColumnIntegral)) {
Chris@630 353 timecol = b;
Chris@630 354 }
Chris@630 355 if (m_columnQualities[timecol] & ColumnIncreasing) {
Chris@630 356 // This shouldn't happen; should have been settled above
Chris@630 357 m_columnPurposes[timecol] = ColumnEndTime;
Chris@1525 358 haveDurationOrEndTime = true;
Chris@630 359 } else {
Chris@630 360 m_columnPurposes[timecol] = ColumnDuration;
Chris@1525 361 haveDurationOrEndTime = true;
Chris@630 362 }
Chris@630 363 --valueCount;
Chris@630 364 }
Chris@630 365 }
Chris@630 366 }
Chris@630 367
Chris@1525 368 if (timingColumnCount > 1 || haveDurationOrEndTime) {
Chris@631 369 m_modelType = TwoDimensionalModelWithDuration;
Chris@392 370 } else {
Chris@631 371 if (valueCount == 0) {
Chris@631 372 m_modelType = OneDimensionalModel;
Chris@631 373 } else if (valueCount == 1) {
Chris@631 374 m_modelType = TwoDimensionalModel;
Chris@631 375 } else {
Chris@631 376 m_modelType = ThreeDimensionalModel;
Chris@631 377 }
Chris@629 378 }
Chris@392 379
Chris@1362 380 SVDEBUG << "Estimated column purposes: ";
Chris@1362 381 for (int i = 0; i < m_columnCount; ++i) {
Chris@1362 382 SVDEBUG << int(m_columnPurposes[i]) << " ";
Chris@1362 383 }
Chris@1362 384 SVDEBUG << endl;
Chris@392 385
Chris@1362 386 SVDEBUG << "Estimated model type: " << m_modelType << endl;
Chris@1362 387 SVDEBUG << "Estimated timing type: " << m_timingType << endl;
Chris@1362 388 SVDEBUG << "Estimated units: " << m_timeUnits << endl;
Chris@392 389 }
Chris@392 390
Chris@1515 391 void
Chris@1515 392 CSVFormat::guessAudioSampleRange()
Chris@1515 393 {
Chris@1515 394 AudioSampleRange range = SampleRangeSigned1;
Chris@1515 395
Chris@1515 396 range = SampleRangeSigned1;
Chris@1515 397 bool knownSigned = false;
Chris@1515 398 bool knownNonIntegral = false;
Chris@1521 399
Chris@1521 400 SVDEBUG << "CSVFormat::guessAudioSampleRange: starting with assumption of "
Chris@1521 401 << range << endl;
Chris@1515 402
Chris@1515 403 for (int i = 0; i < m_columnCount; ++i) {
Chris@1521 404 if (m_columnPurposes[i] != ColumnValue) {
Chris@1521 405 SVDEBUG << "... column " << i
Chris@1521 406 << " is not apparently a value, ignoring" << endl;
Chris@1521 407 continue;
Chris@1521 408 }
Chris@1515 409 if (!(m_columnQualities[i] & ColumnIntegral)) {
Chris@1515 410 knownNonIntegral = true;
Chris@1515 411 if (range == SampleRangeUnsigned255 ||
Chris@1515 412 range == SampleRangeSigned32767) {
Chris@1515 413 range = SampleRangeOther;
Chris@1515 414 }
Chris@1521 415 SVDEBUG << "... column " << i
Chris@1521 416 << " is non-integral, updating range to " << range << endl;
Chris@1515 417 }
Chris@1515 418 if (m_columnQualities[i] & ColumnLarge) {
Chris@1515 419 if (range == SampleRangeSigned1 ||
Chris@1515 420 range == SampleRangeUnsigned255) {
Chris@1515 421 if (knownNonIntegral) {
Chris@1515 422 range = SampleRangeOther;
Chris@1515 423 } else {
Chris@1515 424 range = SampleRangeSigned32767;
Chris@1515 425 }
Chris@1515 426 }
Chris@1521 427 SVDEBUG << "... column " << i << " is large, updating range to "
Chris@1521 428 << range << endl;
Chris@1515 429 }
Chris@1515 430 if (m_columnQualities[i] & ColumnSigned) {
Chris@1515 431 knownSigned = true;
Chris@1515 432 if (range == SampleRangeUnsigned255) {
Chris@1515 433 range = SampleRangeSigned32767;
Chris@1515 434 }
Chris@1521 435 SVDEBUG << "... column " << i << " is signed, updating range to "
Chris@1521 436 << range << endl;
Chris@1515 437 }
Chris@1515 438 if (!(m_columnQualities[i] & ColumnSmall)) {
Chris@1515 439 if (range == SampleRangeSigned1) {
Chris@1515 440 if (knownNonIntegral) {
Chris@1515 441 range = SampleRangeOther;
Chris@1515 442 } else if (knownSigned) {
Chris@1515 443 range = SampleRangeSigned32767;
Chris@1515 444 } else {
Chris@1515 445 range = SampleRangeUnsigned255;
Chris@1515 446 }
Chris@1515 447 }
Chris@1521 448 SVDEBUG << "... column " << i << " is not small, updating range to "
Chris@1521 449 << range << endl;
Chris@1515 450 }
Chris@1515 451 }
Chris@1515 452
Chris@1521 453 SVDEBUG << "CSVFormat::guessAudioSampleRange: ended up with range "
Chris@1521 454 << range << endl;
Chris@1521 455
Chris@1515 456 m_audioSampleRange = range;
Chris@1515 457 }
Chris@1515 458
Chris@631 459 CSVFormat::ColumnPurpose
Chris@631 460 CSVFormat::getColumnPurpose(int i)
Chris@631 461 {
Chris@631 462 while (m_columnPurposes.size() <= i) {
Chris@631 463 m_columnPurposes.push_back(ColumnUnknown);
Chris@631 464 }
Chris@631 465 return m_columnPurposes[i];
Chris@631 466 }
Chris@629 467
Chris@631 468 CSVFormat::ColumnPurpose
Chris@631 469 CSVFormat::getColumnPurpose(int i) const
Chris@631 470 {
Chris@668 471 if (m_columnPurposes.size() <= i) {
Chris@668 472 return ColumnUnknown;
Chris@668 473 }
Chris@631 474 return m_columnPurposes[i];
Chris@631 475 }
Chris@631 476
Chris@631 477 void
Chris@631 478 CSVFormat::setColumnPurpose(int i, ColumnPurpose p)
Chris@631 479 {
Chris@631 480 while (m_columnPurposes.size() <= i) {
Chris@631 481 m_columnPurposes.push_back(ColumnUnknown);
Chris@631 482 }
Chris@631 483 m_columnPurposes[i] = p;
Chris@631 484 }
Chris@631 485
Chris@631 486
Chris@631 487
Chris@631 488