annotate data/fileio/CSVFormat.cpp @ 628:001db550bd48

* Add option to import time+duration (or time+endtime) from CSV files (importing to Region layers) * Fix ffwd/rwd in Region layers so as to behave like time-value layers
author Chris Cannam
date Thu, 08 Jul 2010 14:22:28 +0000
parents d095214ffbaf
children 35499d48a5d1
rev   line source
Chris@392 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@392 2
Chris@392 3 /*
Chris@392 4 Sonic Visualiser
Chris@392 5 An audio file viewer and annotation editor.
Chris@392 6 Centre for Digital Music, Queen Mary, University of London.
Chris@392 7 This file copyright 2006 Chris Cannam.
Chris@392 8
Chris@392 9 This program is free software; you can redistribute it and/or
Chris@392 10 modify it under the terms of the GNU General Public License as
Chris@392 11 published by the Free Software Foundation; either version 2 of the
Chris@392 12 License, or (at your option) any later version. See the file
Chris@392 13 COPYING included with this distribution for more information.
Chris@392 14 */
Chris@392 15
Chris@392 16 #include "CSVFormat.h"
Chris@392 17
Chris@392 18 #include <QFile>
Chris@392 19 #include <QString>
Chris@392 20 #include <QRegExp>
Chris@392 21 #include <QStringList>
Chris@392 22 #include <QTextStream>
Chris@392 23
Chris@392 24 #include <iostream>
Chris@392 25
Chris@392 26 CSVFormat::CSVFormat(QString filename) :
Chris@392 27 m_modelType(TwoDimensionalModel),
Chris@392 28 m_timingType(ExplicitTiming),
Chris@628 29 m_durationType(Durations),
Chris@392 30 m_timeUnits(TimeSeconds),
Chris@392 31 m_separator(","),
Chris@392 32 m_sampleRate(44100),
Chris@392 33 m_windowSize(1024),
Chris@392 34 m_behaviour(QString::KeepEmptyParts),
Chris@392 35 m_maxExampleCols(0)
Chris@392 36 {
Chris@392 37 QFile file(filename);
Chris@392 38 if (!file.exists()) return;
Chris@392 39 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
Chris@392 40
Chris@392 41 QTextStream in(&file);
Chris@392 42 in.seek(0);
Chris@392 43
Chris@392 44 unsigned int lineno = 0;
Chris@392 45
Chris@392 46 bool nonIncreasingPrimaries = false;
Chris@628 47 bool nonIncreasingSecondaries = false;
Chris@392 48 bool nonNumericPrimaries = false;
Chris@392 49 bool floatPrimaries = false;
Chris@392 50 bool variableItemCount = false;
Chris@392 51 int itemCount = 1;
Chris@392 52 int earliestNonNumericItem = -1;
Chris@392 53
Chris@392 54 float prevPrimary = 0.0;
Chris@628 55 float prevSecondary = 0.0;
Chris@392 56
Chris@392 57 m_maxExampleCols = 0;
Chris@392 58 m_separator = "";
Chris@392 59
Chris@392 60 while (!in.atEnd()) {
Chris@392 61
Chris@392 62 // See comment about line endings in CSVFileReader::load()
Chris@392 63
Chris@392 64 QString chunk = in.readLine();
Chris@392 65 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
Chris@392 66
Chris@392 67 for (size_t li = 0; li < lines.size(); ++li) {
Chris@392 68
Chris@392 69 QString line = lines[li];
Chris@392 70
Chris@392 71 if (line.startsWith("#")) continue;
Chris@392 72
Chris@392 73 m_behaviour = QString::KeepEmptyParts;
Chris@392 74
Chris@392 75 if (m_separator == "") {
Chris@392 76 //!!! to do: ask the user
Chris@392 77 if (line.split(",").size() >= 2) m_separator = ",";
Chris@392 78 else if (line.split("\t").size() >= 2) m_separator = "\t";
Chris@392 79 else if (line.split("|").size() >= 2) m_separator = "|";
Chris@392 80 else if (line.split("/").size() >= 2) m_separator = "/";
Chris@392 81 else if (line.split(":").size() >= 2) m_separator = ":";
Chris@392 82 else {
Chris@392 83 m_separator = " ";
Chris@392 84 m_behaviour = QString::SkipEmptyParts;
Chris@392 85 }
Chris@392 86 }
Chris@392 87
Chris@406 88 // std::cerr << "separator = \"" << m_separator.toStdString() << "\"" << std::endl;
Chris@392 89
Chris@392 90 QStringList list = line.split(m_separator, m_behaviour);
Chris@392 91 QStringList tidyList;
Chris@392 92
Chris@392 93 for (int i = 0; i < list.size(); ++i) {
Chris@392 94
Chris@392 95 QString s(list[i]);
Chris@392 96 bool numeric = false;
Chris@392 97
Chris@392 98 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
Chris@392 99 s = s.mid(1, s.length() - 2);
Chris@392 100 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
Chris@392 101 s = s.mid(1, s.length() - 2);
Chris@392 102 } else {
Chris@392 103 float f = s.toFloat(&numeric);
Chris@406 104 // std::cerr << "converted \"" << s.toStdString() << "\" to float, got " << f << " and success = " << numeric << std::endl;
Chris@392 105 }
Chris@392 106
Chris@392 107 tidyList.push_back(s);
Chris@392 108
Chris@392 109 if (lineno == 0 || (list.size() < itemCount)) {
Chris@392 110 itemCount = list.size();
Chris@392 111 } else {
Chris@392 112 if (itemCount != list.size()) {
Chris@392 113 variableItemCount = true;
Chris@392 114 }
Chris@392 115 }
Chris@392 116
Chris@392 117 if (i == 0) { // primary
Chris@392 118
Chris@392 119 if (numeric) {
Chris@392 120
Chris@392 121 float primary = s.toFloat();
Chris@392 122
Chris@392 123 if (lineno > 0 && primary <= prevPrimary) {
Chris@392 124 nonIncreasingPrimaries = true;
Chris@392 125 }
Chris@392 126
Chris@392 127 if (s.contains(".") || s.contains(",")) {
Chris@392 128 floatPrimaries = true;
Chris@392 129 }
Chris@392 130
Chris@392 131 prevPrimary = primary;
Chris@392 132
Chris@392 133 } else {
Chris@392 134 nonNumericPrimaries = true;
Chris@392 135 }
Chris@392 136 } else { // secondary
Chris@392 137
Chris@392 138 if (!numeric) {
Chris@392 139 if (earliestNonNumericItem < 0 ||
Chris@392 140 i < earliestNonNumericItem) {
Chris@392 141 earliestNonNumericItem = i;
Chris@392 142 }
Chris@628 143 } else if (i == 1) {
Chris@628 144 float secondary = s.toFloat();
Chris@628 145 if (lineno > 0 && secondary <= prevSecondary) {
Chris@628 146 nonIncreasingSecondaries = true;
Chris@628 147 }
Chris@628 148 prevSecondary = secondary;
Chris@392 149 }
Chris@392 150 }
Chris@392 151 }
Chris@392 152
Chris@392 153 if (lineno < 10) {
Chris@392 154 m_example.push_back(tidyList);
Chris@392 155 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
Chris@392 156 m_maxExampleCols = tidyList.size();
Chris@392 157 }
Chris@392 158 }
Chris@392 159
Chris@392 160 ++lineno;
Chris@392 161
Chris@392 162 if (lineno == 50) break;
Chris@392 163 }
Chris@392 164 }
Chris@392 165
Chris@392 166 if (nonNumericPrimaries || nonIncreasingPrimaries) {
Chris@392 167
Chris@392 168 // Primaries are probably not a series of times
Chris@392 169
Chris@392 170 m_timingType = CSVFormat::ImplicitTiming;
Chris@392 171 m_timeUnits = CSVFormat::TimeWindows;
Chris@392 172
Chris@392 173 if (nonNumericPrimaries) {
Chris@392 174 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 175 } else if (itemCount == 1 || variableItemCount ||
Chris@392 176 (earliestNonNumericItem != -1)) {
Chris@392 177 m_modelType = CSVFormat::TwoDimensionalModel;
Chris@392 178 } else {
Chris@392 179 m_modelType = CSVFormat::ThreeDimensionalModel;
Chris@392 180 }
Chris@392 181
Chris@392 182 } else {
Chris@392 183
Chris@392 184 // Increasing numeric primaries -- likely to be time
Chris@392 185
Chris@392 186 m_timingType = CSVFormat::ExplicitTiming;
Chris@392 187
Chris@392 188 if (floatPrimaries) {
Chris@392 189 m_timeUnits = CSVFormat::TimeSeconds;
Chris@392 190 } else {
Chris@392 191 m_timeUnits = CSVFormat::TimeAudioFrames;
Chris@392 192 }
Chris@392 193
Chris@392 194 if (itemCount == 1) {
Chris@392 195 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 196 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
Chris@392 197 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
Chris@392 198 m_modelType = CSVFormat::OneDimensionalModel;
Chris@392 199 } else {
Chris@392 200 m_modelType = CSVFormat::TwoDimensionalModel;
Chris@392 201 }
Chris@392 202 } else {
Chris@392 203 m_modelType = CSVFormat::ThreeDimensionalModel;
Chris@392 204 }
Chris@628 205
Chris@628 206 if (nonIncreasingSecondaries) {
Chris@628 207 m_durationType = Durations;
Chris@628 208 } else {
Chris@628 209 m_durationType = EndTimes;
Chris@628 210 }
Chris@392 211 }
Chris@392 212
Chris@392 213 std::cerr << "Estimated model type: " << m_modelType << std::endl;
Chris@392 214 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
Chris@628 215 std::cerr << "Estimated duration type: " << m_durationType << std::endl;
Chris@392 216 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
Chris@392 217 }
Chris@392 218