annotate transform/CSVFeatureWriter.cpp @ 1521:2d291eac9f21 import-audio-data

Ignore non-value columns when guessing sample range
author Chris Cannam
date Wed, 12 Sep 2018 15:27:30 +0100
parents 87ae75da6527
children 70e172e6cc59
rev   line source
Chris@498 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@498 2
Chris@498 3 /*
Chris@498 4 Sonic Visualiser
Chris@498 5 An audio file viewer and annotation editor.
Chris@498 6
Chris@498 7 Sonic Annotator
Chris@498 8 A utility for batch feature extraction from audio files.
Chris@498 9
Chris@498 10 Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
Chris@498 11 Copyright 2007-2008 QMUL.
Chris@498 12
Chris@498 13 This program is free software; you can redistribute it and/or
Chris@498 14 modify it under the terms of the GNU General Public License as
Chris@498 15 published by the Free Software Foundation; either version 2 of the
Chris@498 16 License, or (at your option) any later version. See the file
Chris@498 17 COPYING included with this distribution for more information.
Chris@498 18 */
Chris@498 19
Chris@498 20 #include "CSVFeatureWriter.h"
Chris@498 21
Chris@498 22 #include <iostream>
Chris@498 23
Chris@498 24 #include <QRegExp>
Chris@498 25 #include <QTextStream>
Chris@1035 26 #include <QTextCodec>
Chris@498 27
Chris@498 28 using namespace std;
Chris@498 29 using namespace Vamp;
Chris@498 30
Chris@498 31 CSVFeatureWriter::CSVFeatureWriter() :
Chris@498 32 FileFeatureWriter(SupportOneFilePerTrackTransform |
Chris@997 33 SupportOneFileTotal |
Chris@997 34 SupportStdOut,
Chris@498 35 "csv"),
Chris@669 36 m_separator(","),
Chris@1000 37 m_sampleTiming(false),
Chris@1001 38 m_endTimes(false),
Chris@1002 39 m_forceEnd(false),
Chris@1142 40 m_omitFilename(false),
Chris@1142 41 m_digits(6)
Chris@498 42 {
Chris@498 43 }
Chris@498 44
Chris@498 45 CSVFeatureWriter::~CSVFeatureWriter()
Chris@498 46 {
Chris@498 47 }
Chris@498 48
Chris@998 49 string
Chris@998 50 CSVFeatureWriter::getDescription() const
Chris@998 51 {
Chris@998 52 return "Write features in comma-separated (CSV) format. If transforms are being written to a single file or to stdout, the first column in the output will contain the input audio filename, or an empty string if the feature hails from the same audio file as its predecessor. If transforms are being written to multiple files, the audio filename column will be omitted. Subsequent columns will contain the feature timestamp, then any or all of duration, values, and label.";
Chris@998 53 }
Chris@998 54
Chris@498 55 CSVFeatureWriter::ParameterList
Chris@498 56 CSVFeatureWriter::getSupportedParameters() const
Chris@498 57 {
Chris@498 58 ParameterList pl = FileFeatureWriter::getSupportedParameters();
Chris@498 59 Parameter p;
Chris@498 60
Chris@498 61 p.name = "separator";
Chris@498 62 p.description = "Column separator for output. Default is \",\" (comma).";
Chris@498 63 p.hasArg = true;
Chris@498 64 pl.push_back(p);
Chris@669 65
Chris@1002 66 p.name = "omit-filename";
Chris@1002 67 p.description = "Omit the filename column. May result in confusion if sending more than one audio file's features to the same CSV output.";
Chris@1002 68 p.hasArg = false;
Chris@1002 69 pl.push_back(p);
Chris@1002 70
Chris@669 71 p.name = "sample-timing";
Chris@669 72 p.description = "Show timings as sample frame counts instead of in seconds.";
Chris@669 73 p.hasArg = false;
Chris@669 74 pl.push_back(p);
Chris@1000 75
Chris@1000 76 p.name = "end-times";
Chris@1000 77 p.description = "Show start and end time instead of start and duration, for features with duration.";
Chris@1000 78 p.hasArg = false;
Chris@1000 79 pl.push_back(p);
Chris@498 80
Chris@1001 81 p.name = "fill-ends";
Chris@1001 82 p.description = "Include durations (or end times) even for features without duration, by using the gap to the next feature instead.";
Chris@1001 83 p.hasArg = false;
Chris@1001 84 pl.push_back(p);
Chris@1001 85
Chris@1142 86 p.name = "digits";
Chris@1142 87 p.description = "Specify the number of significant digits to use when printing transform outputs. Outputs are represented internally using single-precision floating-point, so digits beyond the 8th or 9th place are usually meaningless. The default is 6.";
Chris@1142 88 p.hasArg = true;
Chris@1142 89 pl.push_back(p);
Chris@1142 90
Chris@498 91 return pl;
Chris@498 92 }
Chris@498 93
Chris@498 94 void
Chris@498 95 CSVFeatureWriter::setParameters(map<string, string> &params)
Chris@498 96 {
Chris@498 97 FileFeatureWriter::setParameters(params);
Chris@498 98
Chris@690 99 SVDEBUG << "CSVFeatureWriter::setParameters" << endl;
Chris@498 100 for (map<string, string>::iterator i = params.begin();
Chris@498 101 i != params.end(); ++i) {
Chris@1163 102 SVDEBUG << i->first << " -> " << i->second << endl;
Chris@498 103 if (i->first == "separator") {
Chris@498 104 m_separator = i->second.c_str();
Chris@1163 105 SVDEBUG << "m_separator = " << m_separator << endl;
Chris@1002 106 if (m_separator == "\\t") {
Chris@1002 107 m_separator = QChar::Tabulation;
Chris@1002 108 }
Chris@669 109 } else if (i->first == "sample-timing") {
Chris@669 110 m_sampleTiming = true;
Chris@1000 111 } else if (i->first == "end-times") {
Chris@1000 112 m_endTimes = true;
Chris@1001 113 } else if (i->first == "fill-ends") {
Chris@1001 114 m_forceEnd = true;
Chris@1002 115 } else if (i->first == "omit-filename") {
Chris@1002 116 m_omitFilename = true;
Chris@1142 117 } else if (i->first == "digits") {
Chris@1142 118 int digits = atoi(i->second.c_str());
Chris@1142 119 if (digits <= 0 || digits > 100) {
Chris@1428 120 SVCERR << "CSVFeatureWriter: ERROR: Invalid or out-of-range value for number of significant digits: " << i->second << endl;
Chris@1428 121 SVCERR << "CSVFeatureWriter: NOTE: Continuing with default settings" << endl;
Chris@1142 122 } else {
Chris@1142 123 m_digits = digits;
Chris@1142 124 }
Chris@498 125 }
Chris@498 126 }
Chris@498 127 }
Chris@498 128
Chris@498 129 void
Chris@498 130 CSVFeatureWriter::write(QString trackId,
Chris@498 131 const Transform &transform,
Chris@930 132 const Plugin::OutputDescriptor& ,
Chris@498 133 const Plugin::FeatureList& features,
Chris@498 134 std::string summaryType)
Chris@498 135 {
Chris@1001 136 TransformId transformId = transform.getIdentifier();
Chris@1001 137
Chris@498 138 // Select appropriate output file for our track/transform
Chris@498 139 // combination
Chris@498 140
Chris@1035 141 QTextStream *sptr = getOutputStream(trackId,
Chris@1035 142 transformId,
Chris@1035 143 QTextCodec::codecForName("UTF-8"));
Chris@604 144 if (!sptr) {
Chris@1001 145 throw FailedToOpenOutputStream(trackId, transformId);
Chris@604 146 }
Chris@498 147
Chris@498 148 QTextStream &stream = *sptr;
Chris@498 149
Chris@1039 150 int n = (int)features.size();
Chris@498 151
Chris@1001 152 if (n == 0) return;
Chris@1001 153
Chris@1006 154 DataId tt(trackId, transform);
Chris@1001 155
Chris@1001 156 if (m_pending.find(tt) != m_pending.end()) {
Chris@1001 157 writeFeature(tt,
Chris@1001 158 stream,
Chris@1001 159 m_pending[tt],
Chris@1001 160 &features[0],
Chris@1001 161 m_pendingSummaryTypes[tt]);
Chris@1001 162 m_pending.erase(tt);
Chris@1001 163 m_pendingSummaryTypes.erase(tt);
Chris@1001 164 }
Chris@1001 165
Chris@1001 166 if (m_forceEnd) {
Chris@1001 167 // can't write final feature until we know its end time
Chris@1001 168 --n;
Chris@1001 169 m_pending[tt] = features[n];
Chris@1001 170 m_pendingSummaryTypes[tt] = summaryType;
Chris@1001 171 }
Chris@1001 172
Chris@1001 173 for (int i = 0; i < n; ++i) {
Chris@1001 174 writeFeature(tt,
Chris@1001 175 stream,
Chris@1001 176 features[i],
Chris@1001 177 m_forceEnd ? &features[i+1] : 0,
Chris@1001 178 summaryType);
Chris@1001 179 }
Chris@1001 180 }
Chris@1001 181
Chris@1001 182 void
Chris@1001 183 CSVFeatureWriter::finish()
Chris@1001 184 {
Chris@1001 185 for (PendingFeatures::const_iterator i = m_pending.begin();
Chris@1001 186 i != m_pending.end(); ++i) {
Chris@1006 187 DataId tt = i->first;
Chris@1001 188 Plugin::Feature f = i->second;
Chris@1035 189 QTextStream *sptr = getOutputStream(tt.first,
Chris@1035 190 tt.second.getIdentifier(),
Chris@1035 191 QTextCodec::codecForName("UTF-8"));
Chris@1001 192 if (!sptr) {
Chris@1006 193 throw FailedToOpenOutputStream(tt.first, tt.second.getIdentifier());
Chris@1001 194 }
Chris@1001 195 QTextStream &stream = *sptr;
Chris@1001 196 // final feature has its own time as end time (we can't
Chris@1001 197 // reliably determine the end of audio file, and because of
Chris@1001 198 // the nature of block processing, the feature could even
Chris@1001 199 // start beyond that anyway)
Chris@1001 200 writeFeature(tt, stream, f, &f, m_pendingSummaryTypes[tt]);
Chris@1001 201 }
Chris@1001 202
Chris@1001 203 m_pending.clear();
Chris@1001 204 }
Chris@1001 205
Chris@1001 206 void
Chris@1006 207 CSVFeatureWriter::writeFeature(DataId tt,
Chris@1001 208 QTextStream &stream,
Chris@1001 209 const Plugin::Feature &f,
Chris@1001 210 const Plugin::Feature *optionalNextFeature,
Chris@1001 211 std::string summaryType)
Chris@1001 212 {
Chris@1001 213 QString trackId = tt.first;
Chris@1006 214 Transform transform = tt.second;
Chris@1001 215
Chris@1002 216 if (!m_omitFilename) {
Chris@1002 217 if (m_stdout || m_singleFileName != "") {
Chris@1002 218 if (trackId != m_prevPrintedTrackId) {
Chris@1002 219 stream << "\"" << trackId << "\"" << m_separator;
Chris@1002 220 m_prevPrintedTrackId = trackId;
Chris@1002 221 } else {
Chris@1002 222 stream << m_separator;
Chris@1002 223 }
Chris@1001 224 }
Chris@1001 225 }
Chris@1001 226
Chris@1047 227 ::RealTime duration;
Chris@1001 228 bool haveDuration = true;
Chris@1001 229
Chris@1001 230 if (f.hasDuration) {
Chris@1001 231 duration = f.duration;
Chris@1001 232 } else if (optionalNextFeature) {
Chris@1001 233 duration = optionalNextFeature->timestamp - f.timestamp;
Chris@1001 234 } else {
Chris@1001 235 haveDuration = false;
Chris@1001 236 }
Chris@1001 237
Chris@1001 238 if (m_sampleTiming) {
Chris@1001 239
Chris@1047 240 sv_samplerate_t rate = transform.getSampleRate();
Chris@1001 241
Chris@1047 242 stream << ::RealTime::realTime2Frame(f.timestamp, rate);
Chris@1001 243
Chris@1001 244 if (haveDuration) {
Chris@1001 245 stream << m_separator;
Chris@1001 246 if (m_endTimes) {
Chris@1047 247 stream << ::RealTime::realTime2Frame
Chris@1047 248 (::RealTime(f.timestamp) + duration, rate);
Chris@514 249 } else {
Chris@1047 250 stream << ::RealTime::realTime2Frame(duration, rate);
Chris@514 251 }
Chris@514 252 }
Chris@514 253
Chris@1001 254 } else {
Chris@498 255
Chris@1001 256 QString timestamp = f.timestamp.toString().c_str();
Chris@1001 257 timestamp.replace(QRegExp("^ +"), "");
Chris@1001 258 stream << timestamp;
Chris@669 259
Chris@1001 260 if (haveDuration) {
Chris@1001 261 if (m_endTimes) {
Chris@1001 262 QString endtime =
Chris@1047 263 (::RealTime(f.timestamp) + duration).toString().c_str();
Chris@1001 264 endtime.replace(QRegExp("^ +"), "");
Chris@1001 265 stream << m_separator << endtime;
Chris@1001 266 } else {
Chris@1047 267 QString d = ::RealTime(duration).toString().c_str();
Chris@1001 268 d.replace(QRegExp("^ +"), "");
Chris@1001 269 stream << m_separator << d;
Chris@669 270 }
Chris@1001 271 }
Chris@1001 272 }
Chris@669 273
Chris@1001 274 if (summaryType != "") {
Chris@1001 275 stream << m_separator << summaryType.c_str();
Chris@498 276 }
Chris@1001 277
Chris@1001 278 for (unsigned int j = 0; j < f.values.size(); ++j) {
Chris@1300 279
Chris@1300 280 QString number = QString("%1").arg(f.values[j], 0, 'g', m_digits);
Chris@1300 281
Chris@1300 282 // Qt pre-5.6 zero pads single-digit exponents to two digits;
Chris@1300 283 // Qt 5.7+ doesn't by default. But we want both to produce the
Chris@1300 284 // same output. Getting the new behaviour from standard APIs
Chris@1300 285 // in Qt 5.6 isn't possible I think; getting the old behaviour
Chris@1300 286 // from Qt 5.7 is possible but fiddly, involving setting up an
Chris@1300 287 // appropriate locale and using the %L specifier. We could
Chris@1300 288 // doubtless do it with sprintf but Qt is a known quantity at
Chris@1300 289 // this point. Let's just convert the old format to the new.
Chris@1300 290 number.replace("e-0", "e-");
Chris@1300 291
Chris@1300 292 stream << m_separator << number;
Chris@1001 293 }
Chris@1001 294
Chris@1001 295 if (f.label != "") {
Chris@1001 296 stream << m_separator << "\"" << f.label.c_str() << "\"";
Chris@1001 297 }
Chris@1001 298
Chris@1001 299 stream << "\n";
Chris@498 300 }
Chris@498 301
Chris@498 302