annotate data/fileio/test/AudioFileReaderTest.h @ 1496:fde8c497373f

Avoid crashing if an effects plugin can't be instantiated and so the output vector is empty in the transformer's run() method
author Chris Cannam
date Mon, 13 Aug 2018 15:25:32 +0100
parents 48e9f538e6e9
children d2555df635ec
rev   line source
Chris@756 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@756 2
Chris@756 3 /*
Chris@756 4 Sonic Visualiser
Chris@756 5 An audio file viewer and annotation editor.
Chris@756 6 Centre for Digital Music, Queen Mary, University of London.
Chris@756 7 This file copyright 2013 Chris Cannam.
Chris@756 8
Chris@756 9 This program is free software; you can redistribute it and/or
Chris@756 10 modify it under the terms of the GNU General Public License as
Chris@756 11 published by the Free Software Foundation; either version 2 of the
Chris@756 12 License, or (at your option) any later version. See the file
Chris@756 13 COPYING included with this distribution for more information.
Chris@756 14 */
Chris@756 15
Chris@756 16 #ifndef TEST_AUDIO_FILE_READER_H
Chris@756 17 #define TEST_AUDIO_FILE_READER_H
Chris@756 18
Chris@756 19 #include "../AudioFileReaderFactory.h"
Chris@756 20 #include "../AudioFileReader.h"
Chris@1313 21 #include "../WavFileWriter.h"
Chris@756 22
Chris@756 23 #include "AudioTestData.h"
Chris@756 24
Chris@756 25 #include <cmath>
Chris@756 26
Chris@756 27 #include <QObject>
Chris@756 28 #include <QtTest>
Chris@756 29 #include <QDir>
Chris@756 30
Chris@756 31 #include <iostream>
Chris@756 32
Chris@756 33 using namespace std;
Chris@756 34
Chris@756 35 class AudioFileReaderTest : public QObject
Chris@756 36 {
Chris@756 37 Q_OBJECT
Chris@756 38
Chris@1346 39 private:
Chris@1346 40 QString testDirBase;
Chris@1346 41 QString audioDir;
Chris@1346 42 QString diffDir;
Chris@1346 43
Chris@1346 44 public:
Chris@1346 45 AudioFileReaderTest(QString base) {
Chris@1346 46 if (base == "") {
Chris@1346 47 base = "svcore/data/fileio/test";
Chris@1346 48 }
Chris@1346 49 testDirBase = base;
Chris@1359 50 audioDir = base + "/audio";
Chris@1346 51 diffDir = base + "/diffs";
Chris@1346 52 }
Chris@1346 53
Chris@1346 54 private:
Chris@756 55 const char *strOf(QString s) {
Chris@756 56 return strdup(s.toLocal8Bit().data());
Chris@756 57 }
Chris@756 58
Chris@1313 59 void getFileMetadata(QString filename,
Chris@1313 60 QString &extension,
Chris@1313 61 sv_samplerate_t &rate,
Chris@1313 62 int &channels,
Chris@1313 63 int &bitdepth) {
Chris@1313 64
Chris@1313 65 QStringList fileAndExt = filename.split(".");
Chris@1313 66 QStringList bits = fileAndExt[0].split("-");
Chris@1313 67
Chris@1313 68 extension = fileAndExt[1];
Chris@1313 69 rate = bits[0].toInt();
Chris@1313 70 channels = bits[1].toInt();
Chris@1313 71 bitdepth = 16;
Chris@1313 72 if (bits.length() > 2) {
Chris@1313 73 bitdepth = bits[2].toInt();
Chris@1313 74 }
Chris@1313 75 }
Chris@1313 76
cannam@1315 77 void getExpectedThresholds(QString format,
cannam@1315 78 QString filename,
Chris@1313 79 bool resampled,
Chris@1313 80 bool gapless,
Chris@1313 81 bool normalised,
Chris@1313 82 double &maxLimit,
Chris@1313 83 double &rmsLimit) {
Chris@1313 84
Chris@1313 85 QString extension;
Chris@1313 86 sv_samplerate_t fileRate;
Chris@1313 87 int channels;
Chris@1313 88 int bitdepth;
Chris@1313 89 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
Chris@1313 90
Chris@1313 91 if (normalised) {
Chris@1313 92
cannam@1315 93 if (format == "ogg") {
Chris@1313 94
Chris@1313 95 // Our ogg is not especially high quality and is
Chris@1313 96 // actually further from the original if normalised
Chris@1313 97
Chris@1313 98 maxLimit = 0.1;
Chris@1313 99 rmsLimit = 0.03;
Chris@1313 100
cannam@1315 101 } else if (format == "aac") {
Chris@1313 102
cannam@1315 103 // Terrible performance for this test, load of spill
cannam@1315 104 // from one channel to the other. I guess they know
cannam@1315 105 // what they're doing, it's perceptual after all, but
cannam@1315 106 // it does make this check a bit superfluous, you
cannam@1315 107 // could probably pass it with a signal that sounds
cannam@1315 108 // nothing like the original
cannam@1315 109 maxLimit = 0.2;
cannam@1314 110 rmsLimit = 0.1;
Chris@1313 111
cannam@1315 112 } else if (format == "mp3") {
Chris@1313 113
Chris@1313 114 if (resampled && !gapless) {
Chris@1313 115
Chris@1313 116 // We expect worse figures here, because the
Chris@1313 117 // combination of uncompensated encoder delay +
Chris@1313 118 // resampling results in a fractional delay which
Chris@1313 119 // means the decoded signal is slightly out of
Chris@1313 120 // phase compared to the test signal
Chris@1313 121
Chris@1313 122 maxLimit = 0.1;
Chris@1313 123 rmsLimit = 0.05;
Chris@1313 124
Chris@1313 125 } else {
Chris@1313 126
Chris@1313 127 maxLimit = 0.05;
Chris@1313 128 rmsLimit = 0.01;
Chris@1313 129 }
Chris@1313 130
Chris@1313 131 } else {
Chris@1313 132
cannam@1315 133 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 134
Chris@1313 135 if (bitdepth >= 16 && !resampled) {
Chris@1313 136 maxLimit = 1e-3;
Chris@1313 137 rmsLimit = 3e-4;
Chris@1313 138 } else {
Chris@1313 139 maxLimit = 0.01;
Chris@1313 140 rmsLimit = 5e-3;
Chris@1313 141 }
Chris@1313 142 }
Chris@1313 143
Chris@1313 144 } else { // !normalised
Chris@1313 145
cannam@1315 146 if (format == "ogg") {
Chris@1313 147
Chris@1313 148 maxLimit = 0.06;
Chris@1313 149 rmsLimit = 0.03;
Chris@1313 150
cannam@1315 151 } else if (format == "aac") {
Chris@1313 152
cannam@1315 153 maxLimit = 0.1;
cannam@1315 154 rmsLimit = 0.1;
Chris@1313 155
cannam@1315 156 } else if (format == "mp3") {
Chris@1313 157
Chris@1313 158 // all mp3 figures are worse when not normalising
Chris@1313 159 maxLimit = 0.1;
Chris@1313 160 rmsLimit = 0.05;
Chris@1313 161
Chris@1313 162 } else {
Chris@1313 163
cannam@1315 164 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 165
Chris@1313 166 if (bitdepth >= 16 && !resampled) {
Chris@1313 167 maxLimit = 1e-3;
Chris@1313 168 rmsLimit = 3e-4;
Chris@1313 169 } else {
Chris@1313 170 maxLimit = 0.02;
Chris@1313 171 rmsLimit = 0.01;
Chris@1313 172 }
Chris@1313 173 }
Chris@1313 174 }
Chris@1313 175 }
Chris@1313 176
cannam@1315 177 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
cannam@1315 178 return QString("%1/%2 at %3%4%5")
cannam@1315 179 .arg(format)
Chris@1313 180 .arg(filename)
Chris@1313 181 .arg(rate)
Chris@1313 182 .arg(norm ? " normalised": "")
Chris@1313 183 .arg(gapless ? "" : " non-gapless");
Chris@1313 184 }
Chris@1313 185
Chris@756 186 private slots:
Chris@756 187 void init()
Chris@756 188 {
Chris@756 189 if (!QDir(audioDir).exists()) {
Chris@1346 190 QString cwd = QDir::currentPath();
Chris@1428 191 SVCERR << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
Chris@756 192 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
Chris@756 193 }
Chris@1313 194 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
Chris@1428 195 SVCERR << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
Chris@1313 196 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
Chris@1313 197 }
Chris@756 198 }
Chris@756 199
Chris@756 200 void read_data()
Chris@756 201 {
cannam@1315 202 QTest::addColumn<QString>("format");
Chris@756 203 QTest::addColumn<QString>("audiofile");
Chris@1313 204 QTest::addColumn<int>("rate");
Chris@1313 205 QTest::addColumn<bool>("normalised");
Chris@1313 206 QTest::addColumn<bool>("gapless");
cannam@1315 207 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
cannam@1315 208 QDir::NoDotAndDotDot);
cannam@1315 209 for (QString format: dirs) {
cannam@1315 210 QStringList files = QDir(QDir(audioDir).filePath(format))
cannam@1315 211 .entryList(QDir::Files);
cannam@1315 212 int readRates[] = { 44100, 48000 };
cannam@1315 213 bool norms[] = { false, true };
cannam@1315 214 bool gaplesses[] = { true, false };
cannam@1315 215 foreach (QString filename, files) {
cannam@1315 216 for (int rate: readRates) {
cannam@1315 217 for (bool norm: norms) {
cannam@1315 218 for (bool gapless: gaplesses) {
Chris@1313 219
cannam@1315 220 if (format != "mp3" && !gapless) {
cannam@1315 221 continue;
cannam@1315 222 }
cannam@1315 223
cannam@1315 224 QString desc = testName
cannam@1315 225 (format, filename, rate, norm, gapless);
cannam@1315 226
cannam@1315 227 QTest::newRow(strOf(desc))
cannam@1315 228 << format << filename << rate << norm << gapless;
Chris@1313 229 }
Chris@1313 230 }
Chris@1313 231 }
Chris@1313 232 }
Chris@756 233 }
Chris@756 234 }
Chris@756 235
Chris@756 236 void read()
Chris@756 237 {
cannam@1315 238 QFETCH(QString, format);
Chris@756 239 QFETCH(QString, audiofile);
Chris@1313 240 QFETCH(int, rate);
Chris@1313 241 QFETCH(bool, normalised);
Chris@1313 242 QFETCH(bool, gapless);
Chris@756 243
Chris@1313 244 sv_samplerate_t readRate(rate);
Chris@1313 245
cannam@1315 246 // cerr << "\naudiofile = " << audiofile << endl;
Chris@1313 247
Chris@1313 248 AudioFileReaderFactory::Parameters params;
Chris@1313 249 params.targetRate = readRate;
Chris@1313 250 params.normalisation = (normalised ?
Chris@1313 251 AudioFileReaderFactory::Normalisation::Peak :
Chris@1313 252 AudioFileReaderFactory::Normalisation::None);
Chris@1313 253 params.gaplessMode = (gapless ?
Chris@1313 254 AudioFileReaderFactory::GaplessMode::Gapless :
Chris@1313 255 AudioFileReaderFactory::GaplessMode::Gappy);
Chris@757 256
Chris@1429 257 AudioFileReader *reader =
Chris@1429 258 AudioFileReaderFactory::createReader
Chris@1429 259 (audioDir + "/" + format + "/" + audiofile, params);
Chris@1313 260
Chris@1429 261 if (!reader) {
Chris@820 262 #if ( QT_VERSION >= 0x050000 )
Chris@1429 263 QSKIP("Unsupported file, skipping");
Chris@820 264 #else
Chris@1429 265 QSKIP("Unsupported file, skipping", SkipSingle);
Chris@820 266 #endif
Chris@1429 267 }
Chris@756 268
Chris@1313 269 QString extension;
Chris@1313 270 sv_samplerate_t fileRate;
Chris@1313 271 int channels;
Chris@1313 272 int fileBitdepth;
Chris@1313 273 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
Chris@1313 274
Chris@1313 275 QCOMPARE((int)reader->getChannelCount(), channels);
Chris@1313 276 QCOMPARE(reader->getNativeRate(), fileRate);
Chris@1040 277 QCOMPARE(reader->getSampleRate(), readRate);
Chris@757 278
Chris@1429 279 AudioTestData tdata(readRate, channels);
Chris@1429 280
Chris@1429 281 float *reference = tdata.getInterleavedData();
Chris@1040 282 sv_frame_t refFrames = tdata.getFrameCount();
Chris@1429 283
Chris@1429 284 // The reader should give us exactly the expected number of
Chris@1429 285 // frames, except for mp3/aac files. We ask for quite a lot
Chris@1429 286 // more, though, so we can (a) check that we only get the
Chris@1429 287 // expected number back (if this is not mp3/aac) or (b) take
Chris@1429 288 // into account silence at beginning and end (if it is).
Chris@1429 289 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
Chris@1402 290
Chris@1402 291 delete reader;
Chris@1402 292 reader = 0;
Chris@1402 293
Chris@1429 294 sv_frame_t read = test.size() / channels;
Chris@756 295
Chris@1313 296 bool perceptual = (extension == "mp3" ||
Chris@1313 297 extension == "aac" ||
Chris@1313 298 extension == "m4a");
Chris@1313 299
Chris@1313 300 if (perceptual && !gapless) {
Chris@1313 301 // allow silence at start and end
Chris@759 302 QVERIFY(read >= refFrames);
Chris@757 303 } else {
Chris@759 304 QCOMPARE(read, refFrames);
Chris@757 305 }
Chris@757 306
Chris@1313 307 bool resampled = readRate != fileRate;
Chris@1313 308 double maxLimit, rmsLimit;
cannam@1315 309 getExpectedThresholds(format,
cannam@1315 310 audiofile,
Chris@1313 311 resampled,
Chris@1313 312 gapless,
Chris@1313 313 normalised,
Chris@1313 314 maxLimit, rmsLimit);
Chris@1313 315
Chris@1313 316 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
Chris@1313 317 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
Chris@759 318 int edgeSize = 100;
Chris@759 319
Chris@759 320 // And we ignore completely the last few frames when upsampling
Chris@1313 321 int discard = 1 + int(round(readRate / fileRate));
Chris@759 322
Chris@759 323 int offset = 0;
Chris@759 324
Chris@1313 325 if (perceptual) {
Chris@759 326
cannam@1314 327 // Look for an initial offset.
cannam@1314 328 //
cannam@1314 329 // We know the first channel has a sinusoid in it. It
cannam@1314 330 // should have a peak at 0.4ms (see AudioTestData.h) but
cannam@1314 331 // that might have been clipped, which would make it
cannam@1314 332 // imprecise. We can tell if it's clipped, though, as
cannam@1314 333 // there will be samples having exactly identical
cannam@1314 334 // values. So what we look for is the peak if it's not
cannam@1314 335 // clipped and, if it is, the first zero crossing after
cannam@1314 336 // the peak, which should be at 0.8ms.
cannam@1314 337
Chris@1296 338 int expectedPeak = int(0.0004 * readRate);
cannam@1314 339 int expectedZC = int(0.0008 * readRate);
cannam@1314 340 bool foundPeak = false;
cannam@1314 341 for (int i = 1; i+1 < read; ++i) {
cannam@1314 342 float prevSample = test[(i-1) * channels];
cannam@1314 343 float thisSample = test[i * channels];
cannam@1314 344 float nextSample = test[(i+1) * channels];
cannam@1314 345 if (thisSample > 0.8 && nextSample < thisSample) {
cannam@1314 346 foundPeak = true;
cannam@1314 347 if (thisSample > prevSample) {
cannam@1314 348 // not clipped
cannam@1314 349 offset = i - expectedPeak - 1;
cannam@1314 350 break;
cannam@1314 351 }
cannam@1314 352 }
cannam@1314 353 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
cannam@1315 354 // cerr << "thisSample = " << thisSample << ", nextSample = "
cannam@1315 355 // << nextSample << endl;
cannam@1314 356 offset = i - expectedZC - 1;
Chris@759 357 break;
Chris@759 358 }
Chris@759 359 }
Chris@1313 360
cannam@1315 361 // int fileRateEquivalent = int((offset / readRate) * fileRate);
cannam@1315 362 // std::cerr << "offset = " << offset << std::endl;
cannam@1315 363 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
Chris@1313 364
Chris@1313 365 // Previously our m4a test file had a fixed offset of 1024
Chris@1313 366 // at the file sample rate -- this may be because it was
Chris@1313 367 // produced by FAAC which did not write in the delay as
Chris@1313 368 // metadata? We now have an m4a produced by Core Audio
Chris@1313 369 // which gives a 0 offset. What to do...
Chris@1313 370
Chris@1313 371 // Anyway, mp3s should have 0 offset in gapless mode and
Chris@1313 372 // "something else" otherwise.
Chris@1313 373
Chris@1313 374 if (gapless) {
cannam@1315 375 if (format == "aac") {
cannam@1315 376 // ouch!
cannam@1315 377 if (offset == -1) offset = 0;
cannam@1315 378 }
Chris@1313 379 QCOMPARE(offset, 0);
Chris@1313 380 }
Chris@759 381 }
Chris@756 382
cannam@1315 383 {
cannam@1315 384 // Write the diff file now, so that it's already been written
cannam@1315 385 // even if the comparison fails. We aren't checking anything
cannam@1315 386 // here except as necessary to avoid buffer overruns etc
cannam@1315 387
cannam@1315 388 QString diffFile =
cannam@1315 389 testName(format, audiofile, rate, normalised, gapless);
cannam@1315 390 diffFile.replace("/", "_");
cannam@1315 391 diffFile.replace(".", "_");
cannam@1315 392 diffFile.replace(" ", "_");
cannam@1315 393 diffFile += ".wav";
cannam@1315 394 diffFile = QDir(diffDir).filePath(diffFile);
cannam@1315 395 WavFileWriter diffWriter(diffFile, readRate, channels,
Chris@1359 396 WavFileWriter::WriteToTemporary);
cannam@1315 397 QVERIFY(diffWriter.isOK());
cannam@1315 398
cannam@1315 399 vector<vector<float>> diffs(channels);
cannam@1315 400 for (int c = 0; c < channels; ++c) {
cannam@1315 401 for (int i = 0; i < refFrames; ++i) {
cannam@1315 402 int ix = i + offset;
cannam@1315 403 if (ix < read) {
cannam@1315 404 float signeddiff =
cannam@1315 405 test[ix * channels + c] -
cannam@1315 406 reference[i * channels + c];
cannam@1315 407 diffs[c].push_back(signeddiff);
cannam@1315 408 }
cannam@1315 409 }
cannam@1315 410 }
cannam@1315 411 float **ptrs = new float*[channels];
cannam@1315 412 for (int c = 0; c < channels; ++c) {
cannam@1315 413 ptrs[c] = diffs[c].data();
cannam@1315 414 }
cannam@1315 415 diffWriter.writeSamples(ptrs, refFrames);
cannam@1315 416 delete[] ptrs;
cannam@1315 417 }
Chris@1313 418
Chris@1346 419 for (int c = 0; c < channels; ++c) {
Chris@1313 420
Chris@1313 421 double maxDiff = 0.0;
Chris@1313 422 double totalDiff = 0.0;
Chris@1313 423 double totalSqrDiff = 0.0;
Chris@1346 424 int maxIndex = 0;
Chris@1313 425
Chris@1346 426 for (int i = 0; i < refFrames; ++i) {
Chris@1296 427 int ix = i + offset;
Chris@1296 428 if (ix >= read) {
Chris@1428 429 SVCERR << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
Chris@1296 430 QVERIFY(ix < read);
Chris@1296 431 }
Chris@1313 432
Chris@1296 433 if (ix + discard >= read) {
Chris@1296 434 // we forgive the very edge samples when
Chris@1296 435 // resampling (discard > 0)
Chris@1296 436 continue;
Chris@1296 437 }
Chris@1313 438
Chris@1346 439 double diff = fabs(test[ix * channels + c] -
cannam@1315 440 reference[i * channels + c]);
Chris@1313 441
Chris@1346 442 totalDiff += diff;
Chris@1313 443 totalSqrDiff += diff * diff;
Chris@1313 444
Chris@757 445 // in edge areas, record this only if it exceeds edgeLimit
Chris@1313 446 if (i < edgeSize || i + edgeSize >= refFrames) {
Chris@1313 447 if (diff > edgeLimit && diff > maxDiff) {
Chris@1313 448 maxDiff = diff;
Chris@1313 449 maxIndex = i;
Chris@757 450 }
Chris@757 451 } else {
Chris@1313 452 if (diff > maxDiff) {
Chris@1313 453 maxDiff = diff;
Chris@1313 454 maxIndex = i;
Chris@757 455 }
Chris@1346 456 }
Chris@1346 457 }
Chris@1313 458
Chris@1346 459 double meanDiff = totalDiff / double(refFrames);
Chris@1313 460 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
cannam@1308 461
cannam@1314 462 /*
Chris@1346 463 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
Chris@1429 464 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
Chris@1429 465 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
cannam@1314 466 */
Chris@1313 467 if (rmsDiff >= rmsLimit) {
Chris@1428 468 SVCERR << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
Chris@1313 469 QVERIFY(rmsDiff < rmsLimit);
Chris@1313 470 }
Chris@1346 471 if (maxDiff >= maxLimit) {
Chris@1428 472 SVCERR << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
Chris@1346 473 QVERIFY(maxDiff < maxLimit);
Chris@1346 474 }
Chris@1313 475
Chris@1313 476 // and check for spurious material at end
Chris@1313 477
Chris@1309 478 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
Chris@1309 479 sv_frame_t ix = i + offset;
Chris@1323 480 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
cannam@1308 481 float mag = fabsf(test[ix * channels + c]);
cannam@1308 482 if (mag > quiet) {
Chris@1428 483 SVCERR << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
cannam@1308 484 QVERIFY(mag < quiet);
cannam@1308 485 }
cannam@1308 486 }
Chris@1429 487 }
Chris@756 488 }
Chris@756 489 };
Chris@756 490
Chris@756 491 #endif