annotate data/fileio/test/AudioFileReaderTest.h @ 1517:925d205c39b4 import-audio-data

Handle sample range specification for CSV import
author Chris Cannam
date Sat, 08 Sep 2018 20:43:14 +0100
parents 48e9f538e6e9
children d2555df635ec
rev   line source
Chris@756 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@756 2
Chris@756 3 /*
Chris@756 4 Sonic Visualiser
Chris@756 5 An audio file viewer and annotation editor.
Chris@756 6 Centre for Digital Music, Queen Mary, University of London.
Chris@756 7 This file copyright 2013 Chris Cannam.
Chris@756 8
Chris@756 9 This program is free software; you can redistribute it and/or
Chris@756 10 modify it under the terms of the GNU General Public License as
Chris@756 11 published by the Free Software Foundation; either version 2 of the
Chris@756 12 License, or (at your option) any later version. See the file
Chris@756 13 COPYING included with this distribution for more information.
Chris@756 14 */
Chris@756 15
Chris@756 16 #ifndef TEST_AUDIO_FILE_READER_H
Chris@756 17 #define TEST_AUDIO_FILE_READER_H
Chris@756 18
Chris@756 19 #include "../AudioFileReaderFactory.h"
Chris@756 20 #include "../AudioFileReader.h"
Chris@1313 21 #include "../WavFileWriter.h"
Chris@756 22
Chris@756 23 #include "AudioTestData.h"
Chris@756 24
Chris@756 25 #include <cmath>
Chris@756 26
Chris@756 27 #include <QObject>
Chris@756 28 #include <QtTest>
Chris@756 29 #include <QDir>
Chris@756 30
Chris@756 31 #include <iostream>
Chris@756 32
Chris@756 33 using namespace std;
Chris@756 34
Chris@756 35 class AudioFileReaderTest : public QObject
Chris@756 36 {
Chris@756 37 Q_OBJECT
Chris@756 38
Chris@1346 39 private:
Chris@1346 40 QString testDirBase;
Chris@1346 41 QString audioDir;
Chris@1346 42 QString diffDir;
Chris@1346 43
Chris@1346 44 public:
Chris@1346 45 AudioFileReaderTest(QString base) {
Chris@1346 46 if (base == "") {
Chris@1346 47 base = "svcore/data/fileio/test";
Chris@1346 48 }
Chris@1346 49 testDirBase = base;
Chris@1359 50 audioDir = base + "/audio";
Chris@1346 51 diffDir = base + "/diffs";
Chris@1346 52 }
Chris@1346 53
Chris@1346 54 private:
Chris@756 55 const char *strOf(QString s) {
Chris@756 56 return strdup(s.toLocal8Bit().data());
Chris@756 57 }
Chris@756 58
Chris@1313 59 void getFileMetadata(QString filename,
Chris@1313 60 QString &extension,
Chris@1313 61 sv_samplerate_t &rate,
Chris@1313 62 int &channels,
Chris@1313 63 int &bitdepth) {
Chris@1313 64
Chris@1313 65 QStringList fileAndExt = filename.split(".");
Chris@1313 66 QStringList bits = fileAndExt[0].split("-");
Chris@1313 67
Chris@1313 68 extension = fileAndExt[1];
Chris@1313 69 rate = bits[0].toInt();
Chris@1313 70 channels = bits[1].toInt();
Chris@1313 71 bitdepth = 16;
Chris@1313 72 if (bits.length() > 2) {
Chris@1313 73 bitdepth = bits[2].toInt();
Chris@1313 74 }
Chris@1313 75 }
Chris@1313 76
cannam@1315 77 void getExpectedThresholds(QString format,
cannam@1315 78 QString filename,
Chris@1313 79 bool resampled,
Chris@1313 80 bool gapless,
Chris@1313 81 bool normalised,
Chris@1313 82 double &maxLimit,
Chris@1313 83 double &rmsLimit) {
Chris@1313 84
Chris@1313 85 QString extension;
Chris@1313 86 sv_samplerate_t fileRate;
Chris@1313 87 int channels;
Chris@1313 88 int bitdepth;
Chris@1313 89 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
Chris@1313 90
Chris@1313 91 if (normalised) {
Chris@1313 92
cannam@1315 93 if (format == "ogg") {
Chris@1313 94
Chris@1313 95 // Our ogg is not especially high quality and is
Chris@1313 96 // actually further from the original if normalised
Chris@1313 97
Chris@1313 98 maxLimit = 0.1;
Chris@1313 99 rmsLimit = 0.03;
Chris@1313 100
cannam@1315 101 } else if (format == "aac") {
Chris@1313 102
cannam@1315 103 // Terrible performance for this test, load of spill
cannam@1315 104 // from one channel to the other. I guess they know
cannam@1315 105 // what they're doing, it's perceptual after all, but
cannam@1315 106 // it does make this check a bit superfluous, you
cannam@1315 107 // could probably pass it with a signal that sounds
cannam@1315 108 // nothing like the original
cannam@1315 109 maxLimit = 0.2;
cannam@1314 110 rmsLimit = 0.1;
Chris@1313 111
cannam@1315 112 } else if (format == "mp3") {
Chris@1313 113
Chris@1313 114 if (resampled && !gapless) {
Chris@1313 115
Chris@1313 116 // We expect worse figures here, because the
Chris@1313 117 // combination of uncompensated encoder delay +
Chris@1313 118 // resampling results in a fractional delay which
Chris@1313 119 // means the decoded signal is slightly out of
Chris@1313 120 // phase compared to the test signal
Chris@1313 121
Chris@1313 122 maxLimit = 0.1;
Chris@1313 123 rmsLimit = 0.05;
Chris@1313 124
Chris@1313 125 } else {
Chris@1313 126
Chris@1313 127 maxLimit = 0.05;
Chris@1313 128 rmsLimit = 0.01;
Chris@1313 129 }
Chris@1313 130
Chris@1313 131 } else {
Chris@1313 132
cannam@1315 133 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 134
Chris@1313 135 if (bitdepth >= 16 && !resampled) {
Chris@1313 136 maxLimit = 1e-3;
Chris@1313 137 rmsLimit = 3e-4;
Chris@1313 138 } else {
Chris@1313 139 maxLimit = 0.01;
Chris@1313 140 rmsLimit = 5e-3;
Chris@1313 141 }
Chris@1313 142 }
Chris@1313 143
Chris@1313 144 } else { // !normalised
Chris@1313 145
cannam@1315 146 if (format == "ogg") {
Chris@1313 147
Chris@1313 148 maxLimit = 0.06;
Chris@1313 149 rmsLimit = 0.03;
Chris@1313 150
cannam@1315 151 } else if (format == "aac") {
Chris@1313 152
cannam@1315 153 maxLimit = 0.1;
cannam@1315 154 rmsLimit = 0.1;
Chris@1313 155
cannam@1315 156 } else if (format == "mp3") {
Chris@1313 157
Chris@1313 158 // all mp3 figures are worse when not normalising
Chris@1313 159 maxLimit = 0.1;
Chris@1313 160 rmsLimit = 0.05;
Chris@1313 161
Chris@1313 162 } else {
Chris@1313 163
cannam@1315 164 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 165
Chris@1313 166 if (bitdepth >= 16 && !resampled) {
Chris@1313 167 maxLimit = 1e-3;
Chris@1313 168 rmsLimit = 3e-4;
Chris@1313 169 } else {
Chris@1313 170 maxLimit = 0.02;
Chris@1313 171 rmsLimit = 0.01;
Chris@1313 172 }
Chris@1313 173 }
Chris@1313 174 }
Chris@1313 175 }
Chris@1313 176
cannam@1315 177 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
cannam@1315 178 return QString("%1/%2 at %3%4%5")
cannam@1315 179 .arg(format)
Chris@1313 180 .arg(filename)
Chris@1313 181 .arg(rate)
Chris@1313 182 .arg(norm ? " normalised": "")
Chris@1313 183 .arg(gapless ? "" : " non-gapless");
Chris@1313 184 }
Chris@1313 185
Chris@756 186 private slots:
Chris@756 187 void init()
Chris@756 188 {
Chris@756 189 if (!QDir(audioDir).exists()) {
Chris@1346 190 QString cwd = QDir::currentPath();
Chris@1428 191 SVCERR << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
Chris@756 192 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
Chris@756 193 }
Chris@1313 194 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
Chris@1428 195 SVCERR << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
Chris@1313 196 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
Chris@1313 197 }
Chris@756 198 }
Chris@756 199
Chris@756 200 void read_data()
Chris@756 201 {
cannam@1315 202 QTest::addColumn<QString>("format");
Chris@756 203 QTest::addColumn<QString>("audiofile");
Chris@1313 204 QTest::addColumn<int>("rate");
Chris@1313 205 QTest::addColumn<bool>("normalised");
Chris@1313 206 QTest::addColumn<bool>("gapless");
cannam@1315 207 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
cannam@1315 208 QDir::NoDotAndDotDot);
cannam@1315 209 for (QString format: dirs) {
cannam@1315 210 QStringList files = QDir(QDir(audioDir).filePath(format))
cannam@1315 211 .entryList(QDir::Files);
cannam@1315 212 int readRates[] = { 44100, 48000 };
cannam@1315 213 bool norms[] = { false, true };
cannam@1315 214 bool gaplesses[] = { true, false };
cannam@1315 215 foreach (QString filename, files) {
cannam@1315 216 for (int rate: readRates) {
cannam@1315 217 for (bool norm: norms) {
cannam@1315 218 for (bool gapless: gaplesses) {
Chris@1313 219
cannam@1315 220 if (format != "mp3" && !gapless) {
cannam@1315 221 continue;
cannam@1315 222 }
cannam@1315 223
cannam@1315 224 QString desc = testName
cannam@1315 225 (format, filename, rate, norm, gapless);
cannam@1315 226
cannam@1315 227 QTest::newRow(strOf(desc))
cannam@1315 228 << format << filename << rate << norm << gapless;
Chris@1313 229 }
Chris@1313 230 }
Chris@1313 231 }
Chris@1313 232 }
Chris@756 233 }
Chris@756 234 }
Chris@756 235
Chris@756 236 void read()
Chris@756 237 {
cannam@1315 238 QFETCH(QString, format);
Chris@756 239 QFETCH(QString, audiofile);
Chris@1313 240 QFETCH(int, rate);
Chris@1313 241 QFETCH(bool, normalised);
Chris@1313 242 QFETCH(bool, gapless);
Chris@756 243
Chris@1313 244 sv_samplerate_t readRate(rate);
Chris@1313 245
cannam@1315 246 // cerr << "\naudiofile = " << audiofile << endl;
Chris@1313 247
Chris@1313 248 AudioFileReaderFactory::Parameters params;
Chris@1313 249 params.targetRate = readRate;
Chris@1313 250 params.normalisation = (normalised ?
Chris@1313 251 AudioFileReaderFactory::Normalisation::Peak :
Chris@1313 252 AudioFileReaderFactory::Normalisation::None);
Chris@1313 253 params.gaplessMode = (gapless ?
Chris@1313 254 AudioFileReaderFactory::GaplessMode::Gapless :
Chris@1313 255 AudioFileReaderFactory::GaplessMode::Gappy);
Chris@757 256
Chris@1429 257 AudioFileReader *reader =
Chris@1429 258 AudioFileReaderFactory::createReader
Chris@1429 259 (audioDir + "/" + format + "/" + audiofile, params);
Chris@1313 260
Chris@1429 261 if (!reader) {
Chris@820 262 #if ( QT_VERSION >= 0x050000 )
Chris@1429 263 QSKIP("Unsupported file, skipping");
Chris@820 264 #else
Chris@1429 265 QSKIP("Unsupported file, skipping", SkipSingle);
Chris@820 266 #endif
Chris@1429 267 }
Chris@756 268
Chris@1313 269 QString extension;
Chris@1313 270 sv_samplerate_t fileRate;
Chris@1313 271 int channels;
Chris@1313 272 int fileBitdepth;
Chris@1313 273 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
Chris@1313 274
Chris@1313 275 QCOMPARE((int)reader->getChannelCount(), channels);
Chris@1313 276 QCOMPARE(reader->getNativeRate(), fileRate);
Chris@1040 277 QCOMPARE(reader->getSampleRate(), readRate);
Chris@757 278
Chris@1429 279 AudioTestData tdata(readRate, channels);
Chris@1429 280
Chris@1429 281 float *reference = tdata.getInterleavedData();
Chris@1040 282 sv_frame_t refFrames = tdata.getFrameCount();
Chris@1429 283
Chris@1429 284 // The reader should give us exactly the expected number of
Chris@1429 285 // frames, except for mp3/aac files. We ask for quite a lot
Chris@1429 286 // more, though, so we can (a) check that we only get the
Chris@1429 287 // expected number back (if this is not mp3/aac) or (b) take
Chris@1429 288 // into account silence at beginning and end (if it is).
Chris@1429 289 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
Chris@1402 290
Chris@1402 291 delete reader;
Chris@1402 292 reader = 0;
Chris@1402 293
Chris@1429 294 sv_frame_t read = test.size() / channels;
Chris@756 295
Chris@1313 296 bool perceptual = (extension == "mp3" ||
Chris@1313 297 extension == "aac" ||
Chris@1313 298 extension == "m4a");
Chris@1313 299
Chris@1313 300 if (perceptual && !gapless) {
Chris@1313 301 // allow silence at start and end
Chris@759 302 QVERIFY(read >= refFrames);
Chris@757 303 } else {
Chris@759 304 QCOMPARE(read, refFrames);
Chris@757 305 }
Chris@757 306
Chris@1313 307 bool resampled = readRate != fileRate;
Chris@1313 308 double maxLimit, rmsLimit;
cannam@1315 309 getExpectedThresholds(format,
cannam@1315 310 audiofile,
Chris@1313 311 resampled,
Chris@1313 312 gapless,
Chris@1313 313 normalised,
Chris@1313 314 maxLimit, rmsLimit);
Chris@1313 315
Chris@1313 316 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
Chris@1313 317 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
Chris@759 318 int edgeSize = 100;
Chris@759 319
Chris@759 320 // And we ignore completely the last few frames when upsampling
Chris@1313 321 int discard = 1 + int(round(readRate / fileRate));
Chris@759 322
Chris@759 323 int offset = 0;
Chris@759 324
Chris@1313 325 if (perceptual) {
Chris@759 326
cannam@1314 327 // Look for an initial offset.
cannam@1314 328 //
cannam@1314 329 // We know the first channel has a sinusoid in it. It
cannam@1314 330 // should have a peak at 0.4ms (see AudioTestData.h) but
cannam@1314 331 // that might have been clipped, which would make it
cannam@1314 332 // imprecise. We can tell if it's clipped, though, as
cannam@1314 333 // there will be samples having exactly identical
cannam@1314 334 // values. So what we look for is the peak if it's not
cannam@1314 335 // clipped and, if it is, the first zero crossing after
cannam@1314 336 // the peak, which should be at 0.8ms.
cannam@1314 337
Chris@1296 338 int expectedPeak = int(0.0004 * readRate);
cannam@1314 339 int expectedZC = int(0.0008 * readRate);
cannam@1314 340 bool foundPeak = false;
cannam@1314 341 for (int i = 1; i+1 < read; ++i) {
cannam@1314 342 float prevSample = test[(i-1) * channels];
cannam@1314 343 float thisSample = test[i * channels];
cannam@1314 344 float nextSample = test[(i+1) * channels];
cannam@1314 345 if (thisSample > 0.8 && nextSample < thisSample) {
cannam@1314 346 foundPeak = true;
cannam@1314 347 if (thisSample > prevSample) {
cannam@1314 348 // not clipped
cannam@1314 349 offset = i - expectedPeak - 1;
cannam@1314 350 break;
cannam@1314 351 }
cannam@1314 352 }
cannam@1314 353 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
cannam@1315 354 // cerr << "thisSample = " << thisSample << ", nextSample = "
cannam@1315 355 // << nextSample << endl;
cannam@1314 356 offset = i - expectedZC - 1;
Chris@759 357 break;
Chris@759 358 }
Chris@759 359 }
Chris@1313 360
cannam@1315 361 // int fileRateEquivalent = int((offset / readRate) * fileRate);
cannam@1315 362 // std::cerr << "offset = " << offset << std::endl;
cannam@1315 363 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
Chris@1313 364
Chris@1313 365 // Previously our m4a test file had a fixed offset of 1024
Chris@1313 366 // at the file sample rate -- this may be because it was
Chris@1313 367 // produced by FAAC which did not write in the delay as
Chris@1313 368 // metadata? We now have an m4a produced by Core Audio
Chris@1313 369 // which gives a 0 offset. What to do...
Chris@1313 370
Chris@1313 371 // Anyway, mp3s should have 0 offset in gapless mode and
Chris@1313 372 // "something else" otherwise.
Chris@1313 373
Chris@1313 374 if (gapless) {
cannam@1315 375 if (format == "aac") {
cannam@1315 376 // ouch!
cannam@1315 377 if (offset == -1) offset = 0;
cannam@1315 378 }
Chris@1313 379 QCOMPARE(offset, 0);
Chris@1313 380 }
Chris@759 381 }
Chris@756 382
cannam@1315 383 {
cannam@1315 384 // Write the diff file now, so that it's already been written
cannam@1315 385 // even if the comparison fails. We aren't checking anything
cannam@1315 386 // here except as necessary to avoid buffer overruns etc
cannam@1315 387
cannam@1315 388 QString diffFile =
cannam@1315 389 testName(format, audiofile, rate, normalised, gapless);
cannam@1315 390 diffFile.replace("/", "_");
cannam@1315 391 diffFile.replace(".", "_");
cannam@1315 392 diffFile.replace(" ", "_");
cannam@1315 393 diffFile += ".wav";
cannam@1315 394 diffFile = QDir(diffDir).filePath(diffFile);
cannam@1315 395 WavFileWriter diffWriter(diffFile, readRate, channels,
Chris@1359 396 WavFileWriter::WriteToTemporary);
cannam@1315 397 QVERIFY(diffWriter.isOK());
cannam@1315 398
cannam@1315 399 vector<vector<float>> diffs(channels);
cannam@1315 400 for (int c = 0; c < channels; ++c) {
cannam@1315 401 for (int i = 0; i < refFrames; ++i) {
cannam@1315 402 int ix = i + offset;
cannam@1315 403 if (ix < read) {
cannam@1315 404 float signeddiff =
cannam@1315 405 test[ix * channels + c] -
cannam@1315 406 reference[i * channels + c];
cannam@1315 407 diffs[c].push_back(signeddiff);
cannam@1315 408 }
cannam@1315 409 }
cannam@1315 410 }
cannam@1315 411 float **ptrs = new float*[channels];
cannam@1315 412 for (int c = 0; c < channels; ++c) {
cannam@1315 413 ptrs[c] = diffs[c].data();
cannam@1315 414 }
cannam@1315 415 diffWriter.writeSamples(ptrs, refFrames);
cannam@1315 416 delete[] ptrs;
cannam@1315 417 }
Chris@1313 418
Chris@1346 419 for (int c = 0; c < channels; ++c) {
Chris@1313 420
Chris@1313 421 double maxDiff = 0.0;
Chris@1313 422 double totalDiff = 0.0;
Chris@1313 423 double totalSqrDiff = 0.0;
Chris@1346 424 int maxIndex = 0;
Chris@1313 425
Chris@1346 426 for (int i = 0; i < refFrames; ++i) {
Chris@1296 427 int ix = i + offset;
Chris@1296 428 if (ix >= read) {
Chris@1428 429 SVCERR << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
Chris@1296 430 QVERIFY(ix < read);
Chris@1296 431 }
Chris@1313 432
Chris@1296 433 if (ix + discard >= read) {
Chris@1296 434 // we forgive the very edge samples when
Chris@1296 435 // resampling (discard > 0)
Chris@1296 436 continue;
Chris@1296 437 }
Chris@1313 438
Chris@1346 439 double diff = fabs(test[ix * channels + c] -
cannam@1315 440 reference[i * channels + c]);
Chris@1313 441
Chris@1346 442 totalDiff += diff;
Chris@1313 443 totalSqrDiff += diff * diff;
Chris@1313 444
Chris@757 445 // in edge areas, record this only if it exceeds edgeLimit
Chris@1313 446 if (i < edgeSize || i + edgeSize >= refFrames) {
Chris@1313 447 if (diff > edgeLimit && diff > maxDiff) {
Chris@1313 448 maxDiff = diff;
Chris@1313 449 maxIndex = i;
Chris@757 450 }
Chris@757 451 } else {
Chris@1313 452 if (diff > maxDiff) {
Chris@1313 453 maxDiff = diff;
Chris@1313 454 maxIndex = i;
Chris@757 455 }
Chris@1346 456 }
Chris@1346 457 }
Chris@1313 458
Chris@1346 459 double meanDiff = totalDiff / double(refFrames);
Chris@1313 460 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
cannam@1308 461
cannam@1314 462 /*
Chris@1346 463 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
Chris@1429 464 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
Chris@1429 465 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
cannam@1314 466 */
Chris@1313 467 if (rmsDiff >= rmsLimit) {
Chris@1428 468 SVCERR << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
Chris@1313 469 QVERIFY(rmsDiff < rmsLimit);
Chris@1313 470 }
Chris@1346 471 if (maxDiff >= maxLimit) {
Chris@1428 472 SVCERR << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
Chris@1346 473 QVERIFY(maxDiff < maxLimit);
Chris@1346 474 }
Chris@1313 475
Chris@1313 476 // and check for spurious material at end
Chris@1313 477
Chris@1309 478 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
Chris@1309 479 sv_frame_t ix = i + offset;
Chris@1323 480 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
cannam@1308 481 float mag = fabsf(test[ix * channels + c]);
cannam@1308 482 if (mag > quiet) {
Chris@1428 483 SVCERR << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
cannam@1308 484 QVERIFY(mag < quiet);
cannam@1308 485 }
cannam@1308 486 }
Chris@1429 487 }
Chris@756 488 }
Chris@756 489 };
Chris@756 490
Chris@756 491 #endif