CSVFileReader.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2006 Chris Cannam.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include "CSVFileReader.h"
17 
18 #include "model/Model.h"
19 #include "base/RealTime.h"
20 #include "base/StringBits.h"
21 #include "base/ProgressReporter.h"
22 #include "base/RecordDirectory.h"
26 #include "model/RegionModel.h"
27 #include "model/NoteModel.h"
28 #include "model/BoxModel.h"
30 #include "DataFileReaderFactory.h"
31 
32 #include <QFile>
33 #include <QDir>
34 #include <QFileInfo>
35 #include <QString>
36 #include <QRegExp>
37 #include <QStringList>
38 #include <QTextStream>
39 #include <QDateTime>
40 
41 #include <iostream>
42 #include <map>
43 #include <string>
44 
45 using namespace std;
46 
48  sv_samplerate_t mainModelSampleRate,
49  ProgressReporter *reporter) :
50  m_format(format),
51  m_device(nullptr),
52  m_ownDevice(true),
53  m_warnings(0),
54  m_mainModelSampleRate(mainModelSampleRate),
55  m_fileSize(0),
56  m_readCount(0),
57  m_progress(-1),
58  m_reporter(reporter)
59 {
60  QFile *file = new QFile(path);
61  bool good = false;
62 
63  if (!file->exists()) {
64  m_error = QFile::tr("File \"%1\" does not exist").arg(path);
65  } else if (!file->open(QIODevice::ReadOnly | QIODevice::Text)) {
66  m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
67  } else {
68  good = true;
69  }
70 
71  if (good) {
72  m_device = file;
73  m_filename = QFileInfo(path).fileName();
74  m_fileSize = file->size();
75  if (m_reporter) m_reporter->setDefinite(true);
76  } else {
77  delete file;
78  }
79 }
80 
81 CSVFileReader::CSVFileReader(QIODevice *device, CSVFormat format,
82  sv_samplerate_t mainModelSampleRate,
83  ProgressReporter *reporter) :
84  m_format(format),
85  m_device(device),
86  m_ownDevice(false),
87  m_warnings(0),
88  m_mainModelSampleRate(mainModelSampleRate),
89  m_fileSize(0),
90  m_readCount(0),
91  m_progress(-1),
92  m_reporter(reporter)
93 {
94  if (m_reporter) m_reporter->setDefinite(false);
95 }
96 
98 {
99  SVDEBUG << "CSVFileReader::~CSVFileReader: device is " << m_device << endl;
100 
101  if (m_device && m_ownDevice) {
102  SVDEBUG << "CSVFileReader::CSVFileReader: Closing device" << endl;
103  m_device->close();
104  delete m_device;
105  }
106 }
107 
108 bool
110 {
111  return (m_device != nullptr);
112 }
113 
114 QString
116 {
117  return m_error;
118 }
119 
120 bool
121 CSVFileReader::convertTimeValue(QString s, int lineno,
122  sv_samplerate_t sampleRate,
123  int windowSize,
124  sv_frame_t &calculatedFrame) const
125 {
126  QRegExp nonNumericRx("[^0-9eE.,+-]");
127  int warnLimit = 10;
128 
130 
131  calculatedFrame = 0;
132 
133  bool ok = false;
134  QString numeric = s;
135  numeric.remove(nonNumericRx);
136 
137  if (timeUnits == CSVFormat::TimeSeconds) {
138 
139  double time = numeric.toDouble(&ok);
140  if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
141  calculatedFrame = sv_frame_t(time * sampleRate + 0.5);
142 
143  } else if (timeUnits == CSVFormat::TimeMilliseconds) {
144 
145  double time = numeric.toDouble(&ok);
146  if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
147  calculatedFrame = sv_frame_t((time / 1000.0) * sampleRate + 0.5);
148 
149  } else {
150 
151  long n = numeric.toLong(&ok);
152  if (n >= 0) calculatedFrame = n;
153 
154  if (timeUnits == CSVFormat::TimeWindows) {
155  calculatedFrame *= windowSize;
156  }
157  }
158 
159  if (!ok) {
160  if (m_warnings < warnLimit) {
161  SVCERR << "WARNING: CSVFileReader::load: "
162  << "Bad time format (\"" << s
163  << "\") in data line "
164  << lineno+1 << endl;
165  } else if (m_warnings == warnLimit) {
166  SVCERR << "WARNING: Too many warnings" << endl;
167  }
168  ++m_warnings;
169  }
170 
171  return calculatedFrame;
172 }
173 
174 Model *
176 {
177  if (!m_device) return nullptr;
178 
182  sv_samplerate_t sampleRate = m_format.getSampleRate();
183  int windowSize = m_format.getWindowSize();
184  QChar separator = m_format.getSeparator();
185  bool allowQuoting = m_format.getAllowQuoting();
186 
187  if (timingType == CSVFormat::ExplicitTiming) {
188  if (modelType == CSVFormat::ThreeDimensionalModel) {
189  // This will be overridden later if more than one line
190  // appears in our file, but we want to choose a default
191  // that's likely to be visible
192  windowSize = 1024;
193  } else {
194  windowSize = 1;
195  }
196  if (timeUnits == CSVFormat::TimeSeconds ||
197  timeUnits == CSVFormat::TimeMilliseconds) {
198  sampleRate = m_mainModelSampleRate;
199  }
200  }
201 
202  SparseOneDimensionalModel *model1 = nullptr;
203  SparseTimeValueModel *model2 = nullptr;
204  RegionModel *model2a = nullptr;
205  NoteModel *model2b = nullptr;
206  BoxModel *model2c = nullptr;
207  EditableDenseThreeDimensionalModel *model3 = nullptr;
208  WritableWaveFileModel *modelW = nullptr;
209  Model *model = nullptr;
210 
211  QTextStream in(m_device);
212 
213  unsigned int warnings = 0, warnLimit = 10;
214  unsigned int lineno = 0;
215 
216  float min = 0.0, max = 0.0;
217 
218  sv_frame_t frameNo = 0;
219  sv_frame_t duration = 0;
220  sv_frame_t endFrame = 0;
221 
222  bool haveAnyValue = false;
223  bool haveEndTime = false;
224  bool pitchLooksLikeMIDI = true;
225 
226  sv_frame_t startFrame = 0; // for calculation of dense model resolution
227  bool firstEverValue = true;
228 
229  int valueColumns = 0;
230  for (int i = 0; i < m_format.getColumnCount(); ++i) {
232  ++valueColumns;
233  }
234  }
235 
236  int audioChannels = 0;
237  float **audioSamples = nullptr;
238  float sampleShift = 0.f;
239  float sampleScale = 1.f;
240 
241  if (modelType == CSVFormat::WaveFileModel) {
242 
243  audioChannels = valueColumns;
244 
245  audioSamples =
246  breakfastquay::allocate_and_zero_channels<float>
247  (audioChannels, 1);
248 
249  switch (m_format.getAudioSampleRange()) {
252  sampleShift = 0.f;
253  sampleScale = 1.f;
254  break;
256  sampleShift = -128.f;
257  sampleScale = 1.f / 128.f;
258  break;
260  sampleShift = 0.f;
261  sampleScale = 1.f / 32768.f;
262  break;
263  }
264  }
265 
266  map<QString, int> labelCountMap;
267 
268  bool atStart = true;
269  bool abandoned = false;
270 
271  while (!in.atEnd() && !abandoned) {
272 
273  // QTextStream's readLine doesn't cope with old-style Mac
274  // CR-only line endings. Why did they bother making the class
275  // cope with more than one sort of line ending, if it still
276  // can't be configured to cope with all the common sorts?
277 
278  // For the time being we'll deal with this case (which is
279  // relatively uncommon for us, but still necessary to handle)
280  // by reading the entire file using a single readLine, and
281  // splitting it. For CR and CR/LF line endings this will just
282  // read a line at a time, and that's obviously OK.
283 
284  QString chunk = in.readLine();
285  QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
286 
287  m_readCount += chunk.size() + 1;
288 
289  if (m_reporter) {
290  if (m_reporter->wasCancelled()) {
291  abandoned = true;
292  break;
293  }
294  int progress;
295  if (m_fileSize > 0) {
296  progress = int((double(m_readCount) / double(m_fileSize))
297  * 100.0);
298  } else {
299  progress = int(m_readCount / 10000);
300  }
301  if (progress != m_progress) {
302  m_reporter->setProgress(progress);
303  m_progress = progress;
304  }
305  }
306 
307  for (int li = 0; li < lines.size(); ++li) {
308 
309  QString line = lines[li];
310  if (line.startsWith("#")) continue;
311 
312  if (atStart) {
313  atStart = false;
315  continue;
316  }
317  }
318 
319  QStringList list = StringBits::split(line, separator, allowQuoting);
320  if (!model) {
321 
322  QString modelName = m_filename;
323 
324  switch (modelType) {
325 
327  SVDEBUG << "CSVFileReader: Creating sparse one-dimensional model" << endl;
328  model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
329  model = model1;
330  break;
331 
333  SVDEBUG << "CSVFileReader: Creating sparse time-value model" << endl;
334  model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
335  model = model2;
336  break;
337 
339  SVDEBUG << "CSVFileReader: Creating region model" << endl;
340  model2a = new RegionModel(sampleRate, windowSize, false);
341  model = model2a;
342  break;
343 
345  SVDEBUG << "CSVFileReader: Creating note model" << endl;
346  model2b = new NoteModel(sampleRate, windowSize, false);
347  model = model2b;
348  break;
349 
351  SVDEBUG << "CSVFileReader: Creating box model" << endl;
352  model2c = new BoxModel(sampleRate, windowSize, false);
353  model = model2c;
354  break;
355 
357  SVDEBUG << "CSVFileReader: Creating editable dense three-dimensional model" << endl;
359  (sampleRate, windowSize, valueColumns);
360  model = model3;
361  break;
362 
364  {
365  SVDEBUG << "CSVFileReader: Creating writable wave-file model" << endl;
366  bool normalise = (m_format.getAudioSampleRange()
368  QString path = getConvertedAudioFilePath();
369  modelW = new WritableWaveFileModel
370  (path, sampleRate, valueColumns,
371  normalise ?
374  modelName = QFileInfo(path).fileName();
375  model = modelW;
376  break;
377  }
378  }
379 
380  if (model && model->isOK()) {
381  if (modelName != "") {
382  model->setObjectName(modelName);
383  }
384  }
385  }
386 
387  if (!model || !model->isOK()) {
388  SVCERR << "Failed to create model to load CSV file into"
389  << endl;
390  if (model) {
391  delete model;
392  model = nullptr;
393  model1 = nullptr; model2 = nullptr;
394  model2a = nullptr; model2b = nullptr; model2c = nullptr;
395  model3 = nullptr; modelW = nullptr;
396  }
397  abandoned = true;
398  break;
399  }
400 
401  float value = 0.f;
402  float otherValue = 0.f;
403  float pitch = 0.f;
404  QString label = "";
405  bool ok = true;
406 
407  duration = 0.f;
408  haveEndTime = false;
409 
410  for (int i = 0; i < list.size(); ++i) {
411 
412  QString s = list[i];
413 
415 
416  switch (purpose) {
417 
419  break;
420 
422  if (!convertTimeValue(s, lineno, sampleRate, windowSize, frameNo)) {
423  ok = false;
424  }
425  break;
426 
428  if (convertTimeValue(s, lineno, sampleRate, windowSize, endFrame)) {
429  haveEndTime = true;
430  }
431  break;
432 
434  if (!convertTimeValue(s, lineno, sampleRate, windowSize, duration)) {
435  ok = false;
436  }
437  break;
438 
440  if (haveAnyValue) {
441  otherValue = value;
442  }
443  value = s.toFloat();
444  haveAnyValue = true;
445  break;
446 
448  pitch = s.toFloat();
449  if (pitch < 0.f || pitch > 127.f) {
450  pitchLooksLikeMIDI = false;
451  }
452  break;
453 
455  label = s;
456  break;
457  }
458  }
459 
460  if (!ok) {
461  continue;
462  }
463 
464  ++labelCountMap[label];
465 
466  if (haveEndTime) { // ... calculate duration now all cols read
467  if (endFrame > frameNo) {
468  duration = endFrame - frameNo;
469  }
470  }
471 
472  if (modelType == CSVFormat::OneDimensionalModel) {
473 
474  Event point(frameNo, label);
475  model1->add(point);
476 
477  } else if (modelType == CSVFormat::TwoDimensionalModel) {
478 
479  Event point(frameNo, value, label);
480  model2->add(point);
481 
482  } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
483 
484  Event region(frameNo, value, duration, label);
485  model2a->add(region);
486 
487  } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
488 
489  float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
490  Event note(frameNo, pitch, duration, level, label);
491  model2b->add(note);
492 
493  } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndExtent) {
494 
495  float level = 0.f;
496  if (value > otherValue) {
497  level = value - otherValue;
498  value = otherValue;
499  } else {
500  level = otherValue - value;
501  }
502  Event box(frameNo, value, duration, level, label);
503  model2c->add(box);
504 
505  } else if (modelType == CSVFormat::ThreeDimensionalModel) {
506 
508 
509  for (int i = 0; i < list.size(); ++i) {
510 
512  continue;
513  }
514 
515  bool ok = false;
516  float value = list[i].toFloat(&ok);
517 
518  values.push_back(value);
519 
520  if (firstEverValue || value < min) min = value;
521  if (firstEverValue || value > max) max = value;
522 
523  if (firstEverValue) {
524  startFrame = frameNo;
525  model3->setStartFrame(startFrame);
526  } else if (lineno == 1 &&
527  timingType == CSVFormat::ExplicitTiming) {
528  model3->setResolution(int(frameNo - startFrame));
529  }
530 
531  firstEverValue = false;
532 
533  if (!ok) {
534  if (warnings < warnLimit) {
535  SVCERR << "WARNING: CSVFileReader::load: "
536  << "Non-numeric value \""
537  << list[i]
538  << "\" in data line " << lineno+1
539  << ":" << endl;
540  SVCERR << line << endl;
541  ++warnings;
542  } else if (warnings == warnLimit) {
543 // SVCERR << "WARNING: Too many warnings" << endl;
544  }
545  }
546  }
547 
548 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
549 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
550 
551  model3->setColumn(lineno, values);
552 
553  } else if (modelType == CSVFormat::WaveFileModel) {
554 
555  int channel = 0;
556 
557  for (int i = 0;
558  i < list.size() && channel < audioChannels;
559  ++i) {
560 
561  if (m_format.getColumnPurpose(i) !=
563  continue;
564  }
565 
566  bool ok = false;
567  float value = list[i].toFloat(&ok);
568  if (!ok) {
569  value = 0.f;
570  }
571 
572  value += sampleShift;
573  value *= sampleScale;
574 
575  audioSamples[channel][0] = value;
576 
577  ++channel;
578  }
579 
580  while (channel < audioChannels) {
581  audioSamples[channel][0] = 0.f;
582  ++channel;
583  }
584 
585  bool ok = modelW->addSamples(audioSamples, 1);
586 
587  if (!ok) {
588  if (warnings < warnLimit) {
589  SVCERR << "WARNING: CSVFileReader::load: "
590  << "Unable to add sample to wave-file model"
591  << endl;
592  SVCERR << line << endl;
593  ++warnings;
594  }
595  }
596  }
597 
598  ++lineno;
599  if (timingType == CSVFormat::ImplicitTiming ||
600  list.size() == 0) {
601  frameNo += windowSize;
602  }
603  }
604  }
605 
606  if (!haveAnyValue) {
607  if (model2a) {
608  // assign values for regions based on label frequency; we
609  // have this in our labelCountMap, sort of
610 
611  map<int, map<QString, float> > countLabelValueMap;
612  for (map<QString, int>::iterator i = labelCountMap.begin();
613  i != labelCountMap.end(); ++i) {
614  countLabelValueMap[i->second][i->first] = -1.f;
615  }
616 
617  float v = 0.f;
618  for (map<int, map<QString, float> >::iterator i =
619  countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
620  --i;
621  SVCERR << "count -> " << i->first << endl;
622  for (map<QString, float>::iterator j = i->second.begin();
623  j != i->second.end(); ++j) {
624  j->second = v;
625  SVCERR << "label -> " << j->first << ", value " << v << endl;
626  v = v + 1.f;
627  }
628  }
629 
630  map<Event, Event> eventMap;
631 
632  EventVector allEvents = model2a->getAllEvents();
633  for (const Event &e: allEvents) {
634  int count = labelCountMap[e.getLabel()];
635  v = countLabelValueMap[count][e.getLabel()];
636  // SVCERR << "mapping from label \"" << p.label
637  // << "\" (count " << count
638  // << ") to value " << v << endl;
639  eventMap[e] = Event(e.getFrame(), v,
640  e.getDuration(), e.getLabel());
641  }
642 
643  for (const auto &i: eventMap) {
644  // There could be duplicate regions; if so replace
645  // them all -- but we need to check we're not
646  // replacing a region by itself (or else this will
647  // never terminate)
648  if (i.first.getValue() == i.second.getValue()) {
649  continue;
650  }
651  while (model2a->containsEvent(i.first)) {
652  model2a->remove(i.first);
653  model2a->add(i.second);
654  }
655  }
656  }
657  }
658 
659  if (model2b) {
660  if (pitchLooksLikeMIDI) {
661  model2b->setScaleUnits("MIDI Pitch");
662  } else {
663  model2b->setScaleUnits("Hz");
664  }
665  }
666 
667  if (model3) {
668  model3->setMinimumLevel(min);
669  model3->setMaximumLevel(max);
670  }
671 
672  if (modelW) {
673  breakfastquay::deallocate_channels(audioSamples, audioChannels);
674  modelW->updateModel();
675  modelW->writeComplete();
676  }
677 
678  return model;
679 }
680 
681 QString
683 {
684  QString base = m_filename;
685  base.replace(QRegExp("[/\\,.:;~<>\"'|?%*]+"), "_");
686 
687  QString convertedFileDir = RecordDirectory::getConvertedAudioDirectory();
688  if (convertedFileDir == "") {
689  SVCERR << "WARNING: CSVFileReader::getConvertedAudioFilePath: Failed to retrieve converted audio directory" << endl;
690  return "";
691  }
692 
693  auto ms = QDateTime::currentDateTime().toMSecsSinceEpoch();
694  auto s = ms / 1000; // there is a toSecsSinceEpoch in Qt 5.8 but
695  // we currently want to support older versions
696 
697  return QDir(convertedFileDir).filePath
698  (QString("%1-%2.wav").arg(base).arg(s));
699 }
700 
double sv_samplerate_t
Sample rate.
Definition: BaseTypes.h:51
virtual void setMinimumLevel(float sz)
Set the minimum value of the value in a bin.
AudioSampleRange getAudioSampleRange() const
Definition: CSVFormat.h:131
HeaderStatus getHeaderStatus() const
Definition: CSVFormat.h:133
int64_t sv_frame_t
Frame index, the unit of our time axis.
Definition: BaseTypes.h:31
void writeComplete()
Indicate that writing is complete.
TimeUnits getTimeUnits() const
Definition: CSVFormat.h:127
virtual void setDefinite(bool definite)=0
static QStringList split(QString s, QChar separator, bool quoted)
Split a string at the given separator character.
Definition: StringBits.cpp:160
QString getConvertedAudioFilePath() const
QString m_error
Definition: CSVFileReader.h:65
CSVFormat m_format
Definition: CSVFileReader.h:61
void add(Event e) override
Editing methods.
qint64 m_readCount
Definition: CSVFileReader.h:69
EventVector getAllEvents() const
Definition: RegionModel.h:143
void add(Event e) override
Editing methods.
Definition: RegionModel.h:172
QString m_filename
Definition: CSVFileReader.h:64
int getWindowSize() const
Definition: CSVFormat.h:129
CSVFileReader(QString path, CSVFormat format, sv_samplerate_t mainModelSampleRate, ProgressReporter *reporter=0)
Construct a CSVFileReader to read the CSV file at the given path, with the given format.
qint64 m_fileSize
Definition: CSVFileReader.h:68
ProgressReporter * m_reporter
Definition: CSVFileReader.h:71
bool isOK() const override
Return true if the file appears to be of the correct type.
virtual bool isOK() const =0
Return true if the model was constructed successfully.
ColumnPurpose getColumnPurpose(int i) const
Definition: CSVFormat.cpp:554
bool containsEvent(const Event &e) const
Definition: RegionModel.h:140
void add(Event e) override
Editing methods.
Definition: BoxModel.h:171
sv_samplerate_t m_mainModelSampleRate
Definition: CSVFileReader.h:67
Model is the base class for all data models that represent any sort of data on a time scale based on ...
Definition: Model.h:51
bool convertTimeValue(QString, int lineno, sv_samplerate_t sampleRate, int windowSize, sv_frame_t &calculatedFrame) const
virtual void setMaximumLevel(float sz)
Set the maximum value of the value in a bin.
BoxModel – a model for annotations having start time, duration, and a value range.
Definition: BoxModel.h:40
Model * load() const override
Read the file and return the corresponding data model.
QIODevice * m_device
Definition: CSVFileReader.h:62
QString getError() const override
sv_samplerate_t getSampleRate() const
Definition: CSVFormat.h:128
static QString getConvertedAudioDirectory()
Return the directory in which an audio file converted from a data file should be saved.
virtual void setProgress(int percentage)=0
A model representing a wiggly-line plot with points at arbitrary intervals of the model resolution...
RegionModel – a model for intervals associated with a value, which we call regions for no very compe...
Definition: RegionModel.h:33
#define SVDEBUG
Definition: Debug.h:106
void setScaleUnits(QString units)
Definition: NoteModel.h:128
ModelType getModelType() const
Definition: CSVFormat.h:125
An immutable(-ish) type used for point and event representation in sparse models, as well as for inte...
Definition: Event.h:55
virtual void setStartFrame(sv_frame_t)
Set the frame offset of the first column.
bool getAllowQuoting() const
Definition: CSVFormat.h:132
TimingType getTimingType() const
Definition: CSVFormat.h:126
QChar getSeparator() const
Definition: CSVFormat.h:134
void updateModel()
Tell the model to update its own (read) view of the (written) file.
#define SVCERR
Definition: Debug.h:109
void add(Event e) override
Editing methods.
virtual void setResolution(int sz)
Set the number of sample frames covered by each set of bins.
A model representing a series of time instants with optional labels but without values.
virtual ~CSVFileReader()
std::vector< Event > EventVector
Definition: Event.h:494
void remove(Event e) override
Definition: RegionModel.h:200
void add(Event e) override
Editing methods.
Definition: NoteModel.h:207
int getColumnCount() const
Definition: CSVFormat.h:130
virtual bool addSamples(const float *const *samples, sv_frame_t count)
Call addSamples to append a block of samples to the end of the file.
virtual void setColumn(int x, const Column &values)
Set the entire set of bin values at the given column.
virtual bool wasCancelled() const =0
static double stringToDoubleLocaleFree(QString s, bool *ok=0)
Convert a string to a double using basic "C"-locale syntax, i.e.
Definition: StringBits.cpp:28