annotate data/fileio/test/AudioFileReaderTest.h @ 1383:f204f2fcb15e

Header required
author Chris Cannam
date Wed, 22 Feb 2017 09:53:41 +0000
parents 1c9bbbb6116a
children aadfb395e933
rev   line source
Chris@756 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@756 2
Chris@756 3 /*
Chris@756 4 Sonic Visualiser
Chris@756 5 An audio file viewer and annotation editor.
Chris@756 6 Centre for Digital Music, Queen Mary, University of London.
Chris@756 7 This file copyright 2013 Chris Cannam.
Chris@756 8
Chris@756 9 This program is free software; you can redistribute it and/or
Chris@756 10 modify it under the terms of the GNU General Public License as
Chris@756 11 published by the Free Software Foundation; either version 2 of the
Chris@756 12 License, or (at your option) any later version. See the file
Chris@756 13 COPYING included with this distribution for more information.
Chris@756 14 */
Chris@756 15
Chris@756 16 #ifndef TEST_AUDIO_FILE_READER_H
Chris@756 17 #define TEST_AUDIO_FILE_READER_H
Chris@756 18
Chris@756 19 #include "../AudioFileReaderFactory.h"
Chris@756 20 #include "../AudioFileReader.h"
Chris@1313 21 #include "../WavFileWriter.h"
Chris@756 22
Chris@756 23 #include "AudioTestData.h"
Chris@756 24
Chris@756 25 #include <cmath>
Chris@756 26
Chris@756 27 #include <QObject>
Chris@756 28 #include <QtTest>
Chris@756 29 #include <QDir>
Chris@756 30
Chris@756 31 #include <iostream>
Chris@756 32
Chris@756 33 using namespace std;
Chris@756 34
Chris@756 35 class AudioFileReaderTest : public QObject
Chris@756 36 {
Chris@756 37 Q_OBJECT
Chris@756 38
Chris@1346 39 private:
Chris@1346 40 QString testDirBase;
Chris@1346 41 QString audioDir;
Chris@1346 42 QString diffDir;
Chris@1346 43
Chris@1346 44 public:
Chris@1346 45 AudioFileReaderTest(QString base) {
Chris@1346 46 if (base == "") {
Chris@1346 47 base = "svcore/data/fileio/test";
Chris@1346 48 }
Chris@1346 49 testDirBase = base;
Chris@1359 50 audioDir = base + "/audio";
Chris@1346 51 diffDir = base + "/diffs";
Chris@1346 52 }
Chris@1346 53
Chris@1346 54 private:
Chris@756 55 const char *strOf(QString s) {
Chris@756 56 return strdup(s.toLocal8Bit().data());
Chris@756 57 }
Chris@756 58
Chris@1313 59 void getFileMetadata(QString filename,
Chris@1313 60 QString &extension,
Chris@1313 61 sv_samplerate_t &rate,
Chris@1313 62 int &channels,
Chris@1313 63 int &bitdepth) {
Chris@1313 64
Chris@1313 65 QStringList fileAndExt = filename.split(".");
Chris@1313 66 QStringList bits = fileAndExt[0].split("-");
Chris@1313 67
Chris@1313 68 extension = fileAndExt[1];
Chris@1313 69 rate = bits[0].toInt();
Chris@1313 70 channels = bits[1].toInt();
Chris@1313 71 bitdepth = 16;
Chris@1313 72 if (bits.length() > 2) {
Chris@1313 73 bitdepth = bits[2].toInt();
Chris@1313 74 }
Chris@1313 75 }
Chris@1313 76
cannam@1315 77 void getExpectedThresholds(QString format,
cannam@1315 78 QString filename,
Chris@1313 79 bool resampled,
Chris@1313 80 bool gapless,
Chris@1313 81 bool normalised,
Chris@1313 82 double &maxLimit,
Chris@1313 83 double &rmsLimit) {
Chris@1313 84
Chris@1313 85 QString extension;
Chris@1313 86 sv_samplerate_t fileRate;
Chris@1313 87 int channels;
Chris@1313 88 int bitdepth;
Chris@1313 89 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
Chris@1313 90
Chris@1313 91 if (normalised) {
Chris@1313 92
cannam@1315 93 if (format == "ogg") {
Chris@1313 94
Chris@1313 95 // Our ogg is not especially high quality and is
Chris@1313 96 // actually further from the original if normalised
Chris@1313 97
Chris@1313 98 maxLimit = 0.1;
Chris@1313 99 rmsLimit = 0.03;
Chris@1313 100
cannam@1315 101 } else if (format == "aac") {
Chris@1313 102
cannam@1315 103 // Terrible performance for this test, load of spill
cannam@1315 104 // from one channel to the other. I guess they know
cannam@1315 105 // what they're doing, it's perceptual after all, but
cannam@1315 106 // it does make this check a bit superfluous, you
cannam@1315 107 // could probably pass it with a signal that sounds
cannam@1315 108 // nothing like the original
cannam@1315 109 maxLimit = 0.2;
cannam@1314 110 rmsLimit = 0.1;
Chris@1313 111
cannam@1315 112 } else if (format == "mp3") {
Chris@1313 113
Chris@1313 114 if (resampled && !gapless) {
Chris@1313 115
Chris@1313 116 // We expect worse figures here, because the
Chris@1313 117 // combination of uncompensated encoder delay +
Chris@1313 118 // resampling results in a fractional delay which
Chris@1313 119 // means the decoded signal is slightly out of
Chris@1313 120 // phase compared to the test signal
Chris@1313 121
Chris@1313 122 maxLimit = 0.1;
Chris@1313 123 rmsLimit = 0.05;
Chris@1313 124
Chris@1313 125 } else {
Chris@1313 126
Chris@1313 127 maxLimit = 0.05;
Chris@1313 128 rmsLimit = 0.01;
Chris@1313 129 }
Chris@1313 130
Chris@1313 131 } else {
Chris@1313 132
cannam@1315 133 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 134
Chris@1313 135 if (bitdepth >= 16 && !resampled) {
Chris@1313 136 maxLimit = 1e-3;
Chris@1313 137 rmsLimit = 3e-4;
Chris@1313 138 } else {
Chris@1313 139 maxLimit = 0.01;
Chris@1313 140 rmsLimit = 5e-3;
Chris@1313 141 }
Chris@1313 142 }
Chris@1313 143
Chris@1313 144 } else { // !normalised
Chris@1313 145
cannam@1315 146 if (format == "ogg") {
Chris@1313 147
Chris@1313 148 maxLimit = 0.06;
Chris@1313 149 rmsLimit = 0.03;
Chris@1313 150
cannam@1315 151 } else if (format == "aac") {
Chris@1313 152
cannam@1315 153 maxLimit = 0.1;
cannam@1315 154 rmsLimit = 0.1;
Chris@1313 155
cannam@1315 156 } else if (format == "mp3") {
Chris@1313 157
Chris@1313 158 // all mp3 figures are worse when not normalising
Chris@1313 159 maxLimit = 0.1;
Chris@1313 160 rmsLimit = 0.05;
Chris@1313 161
Chris@1313 162 } else {
Chris@1313 163
cannam@1315 164 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 165
Chris@1313 166 if (bitdepth >= 16 && !resampled) {
Chris@1313 167 maxLimit = 1e-3;
Chris@1313 168 rmsLimit = 3e-4;
Chris@1313 169 } else {
Chris@1313 170 maxLimit = 0.02;
Chris@1313 171 rmsLimit = 0.01;
Chris@1313 172 }
Chris@1313 173 }
Chris@1313 174 }
Chris@1313 175 }
Chris@1313 176
cannam@1315 177 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
cannam@1315 178 return QString("%1/%2 at %3%4%5")
cannam@1315 179 .arg(format)
Chris@1313 180 .arg(filename)
Chris@1313 181 .arg(rate)
Chris@1313 182 .arg(norm ? " normalised": "")
Chris@1313 183 .arg(gapless ? "" : " non-gapless");
Chris@1313 184 }
Chris@1313 185
Chris@756 186 private slots:
Chris@756 187 void init()
Chris@756 188 {
Chris@756 189 if (!QDir(audioDir).exists()) {
Chris@1346 190 QString cwd = QDir::currentPath();
Chris@1346 191 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
Chris@756 192 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
Chris@756 193 }
Chris@1313 194 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
Chris@1313 195 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
Chris@1313 196 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
Chris@1313 197 }
Chris@756 198 }
Chris@756 199
Chris@756 200 void read_data()
Chris@756 201 {
cannam@1315 202 QTest::addColumn<QString>("format");
Chris@756 203 QTest::addColumn<QString>("audiofile");
Chris@1313 204 QTest::addColumn<int>("rate");
Chris@1313 205 QTest::addColumn<bool>("normalised");
Chris@1313 206 QTest::addColumn<bool>("gapless");
cannam@1315 207 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
cannam@1315 208 QDir::NoDotAndDotDot);
cannam@1315 209 for (QString format: dirs) {
cannam@1315 210 QStringList files = QDir(QDir(audioDir).filePath(format))
cannam@1315 211 .entryList(QDir::Files);
cannam@1315 212 int readRates[] = { 44100, 48000 };
cannam@1315 213 bool norms[] = { false, true };
cannam@1315 214 bool gaplesses[] = { true, false };
cannam@1315 215 foreach (QString filename, files) {
cannam@1315 216 for (int rate: readRates) {
cannam@1315 217 for (bool norm: norms) {
cannam@1315 218 for (bool gapless: gaplesses) {
Chris@1313 219
cannam@1315 220 if (format != "mp3" && !gapless) {
cannam@1315 221 continue;
cannam@1315 222 }
cannam@1315 223
cannam@1315 224 QString desc = testName
cannam@1315 225 (format, filename, rate, norm, gapless);
cannam@1315 226
cannam@1315 227 QTest::newRow(strOf(desc))
cannam@1315 228 << format << filename << rate << norm << gapless;
Chris@1313 229 }
Chris@1313 230 }
Chris@1313 231 }
Chris@1313 232 }
Chris@756 233 }
Chris@756 234 }
Chris@756 235
Chris@756 236 void read()
Chris@756 237 {
cannam@1315 238 QFETCH(QString, format);
Chris@756 239 QFETCH(QString, audiofile);
Chris@1313 240 QFETCH(int, rate);
Chris@1313 241 QFETCH(bool, normalised);
Chris@1313 242 QFETCH(bool, gapless);
Chris@756 243
Chris@1313 244 sv_samplerate_t readRate(rate);
Chris@1313 245
cannam@1315 246 // cerr << "\naudiofile = " << audiofile << endl;
Chris@1313 247
Chris@1313 248 AudioFileReaderFactory::Parameters params;
Chris@1313 249 params.targetRate = readRate;
Chris@1313 250 params.normalisation = (normalised ?
Chris@1313 251 AudioFileReaderFactory::Normalisation::Peak :
Chris@1313 252 AudioFileReaderFactory::Normalisation::None);
Chris@1313 253 params.gaplessMode = (gapless ?
Chris@1313 254 AudioFileReaderFactory::GaplessMode::Gapless :
Chris@1313 255 AudioFileReaderFactory::GaplessMode::Gappy);
Chris@757 256
Chris@756 257 AudioFileReader *reader =
Chris@756 258 AudioFileReaderFactory::createReader
cannam@1315 259 (audioDir + "/" + format + "/" + audiofile, params);
Chris@1313 260
Chris@756 261 if (!reader) {
Chris@820 262 #if ( QT_VERSION >= 0x050000 )
Chris@763 263 QSKIP("Unsupported file, skipping");
Chris@820 264 #else
Chris@820 265 QSKIP("Unsupported file, skipping", SkipSingle);
Chris@820 266 #endif
Chris@756 267 }
Chris@756 268
Chris@1313 269 QString extension;
Chris@1313 270 sv_samplerate_t fileRate;
Chris@1313 271 int channels;
Chris@1313 272 int fileBitdepth;
Chris@1313 273 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
Chris@1313 274
Chris@1313 275 QCOMPARE((int)reader->getChannelCount(), channels);
Chris@1313 276 QCOMPARE(reader->getNativeRate(), fileRate);
Chris@1040 277 QCOMPARE(reader->getSampleRate(), readRate);
Chris@757 278
Chris@757 279 AudioTestData tdata(readRate, channels);
Chris@756 280
Chris@756 281 float *reference = tdata.getInterleavedData();
Chris@1040 282 sv_frame_t refFrames = tdata.getFrameCount();
Chris@756 283
Chris@756 284 // The reader should give us exactly the expected number of
Chris@759 285 // frames, except for mp3/aac files. We ask for quite a lot
Chris@759 286 // more, though, so we can (a) check that we only get the
Chris@759 287 // expected number back (if this is not mp3/aac) or (b) take
Chris@759 288 // into account silence at beginning and end (if it is).
Chris@1326 289 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
Chris@1040 290 sv_frame_t read = test.size() / channels;
Chris@756 291
Chris@1313 292 bool perceptual = (extension == "mp3" ||
Chris@1313 293 extension == "aac" ||
Chris@1313 294 extension == "m4a");
Chris@1313 295
Chris@1313 296 if (perceptual && !gapless) {
Chris@1313 297 // allow silence at start and end
Chris@759 298 QVERIFY(read >= refFrames);
Chris@757 299 } else {
Chris@759 300 QCOMPARE(read, refFrames);
Chris@757 301 }
Chris@757 302
Chris@1313 303 bool resampled = readRate != fileRate;
Chris@1313 304 double maxLimit, rmsLimit;
cannam@1315 305 getExpectedThresholds(format,
cannam@1315 306 audiofile,
Chris@1313 307 resampled,
Chris@1313 308 gapless,
Chris@1313 309 normalised,
Chris@1313 310 maxLimit, rmsLimit);
Chris@1313 311
Chris@1313 312 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
Chris@1313 313 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
Chris@759 314 int edgeSize = 100;
Chris@759 315
Chris@759 316 // And we ignore completely the last few frames when upsampling
Chris@1313 317 int discard = 1 + int(round(readRate / fileRate));
Chris@759 318
Chris@759 319 int offset = 0;
Chris@759 320
Chris@1313 321 if (perceptual) {
Chris@759 322
cannam@1314 323 // Look for an initial offset.
cannam@1314 324 //
cannam@1314 325 // We know the first channel has a sinusoid in it. It
cannam@1314 326 // should have a peak at 0.4ms (see AudioTestData.h) but
cannam@1314 327 // that might have been clipped, which would make it
cannam@1314 328 // imprecise. We can tell if it's clipped, though, as
cannam@1314 329 // there will be samples having exactly identical
cannam@1314 330 // values. So what we look for is the peak if it's not
cannam@1314 331 // clipped and, if it is, the first zero crossing after
cannam@1314 332 // the peak, which should be at 0.8ms.
cannam@1314 333
Chris@1296 334 int expectedPeak = int(0.0004 * readRate);
cannam@1314 335 int expectedZC = int(0.0008 * readRate);
cannam@1314 336 bool foundPeak = false;
cannam@1314 337 for (int i = 1; i+1 < read; ++i) {
cannam@1314 338 float prevSample = test[(i-1) * channels];
cannam@1314 339 float thisSample = test[i * channels];
cannam@1314 340 float nextSample = test[(i+1) * channels];
cannam@1314 341 if (thisSample > 0.8 && nextSample < thisSample) {
cannam@1314 342 foundPeak = true;
cannam@1314 343 if (thisSample > prevSample) {
cannam@1314 344 // not clipped
cannam@1314 345 offset = i - expectedPeak - 1;
cannam@1314 346 break;
cannam@1314 347 }
cannam@1314 348 }
cannam@1314 349 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
cannam@1315 350 // cerr << "thisSample = " << thisSample << ", nextSample = "
cannam@1315 351 // << nextSample << endl;
cannam@1314 352 offset = i - expectedZC - 1;
Chris@759 353 break;
Chris@759 354 }
Chris@759 355 }
Chris@1313 356
cannam@1315 357 // int fileRateEquivalent = int((offset / readRate) * fileRate);
cannam@1315 358 // std::cerr << "offset = " << offset << std::endl;
cannam@1315 359 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
Chris@1313 360
Chris@1313 361 // Previously our m4a test file had a fixed offset of 1024
Chris@1313 362 // at the file sample rate -- this may be because it was
Chris@1313 363 // produced by FAAC which did not write in the delay as
Chris@1313 364 // metadata? We now have an m4a produced by Core Audio
Chris@1313 365 // which gives a 0 offset. What to do...
Chris@1313 366
Chris@1313 367 // Anyway, mp3s should have 0 offset in gapless mode and
Chris@1313 368 // "something else" otherwise.
Chris@1313 369
Chris@1313 370 if (gapless) {
cannam@1315 371 if (format == "aac") {
cannam@1315 372 // ouch!
cannam@1315 373 if (offset == -1) offset = 0;
cannam@1315 374 }
Chris@1313 375 QCOMPARE(offset, 0);
Chris@1313 376 }
Chris@759 377 }
Chris@756 378
cannam@1315 379 {
cannam@1315 380 // Write the diff file now, so that it's already been written
cannam@1315 381 // even if the comparison fails. We aren't checking anything
cannam@1315 382 // here except as necessary to avoid buffer overruns etc
cannam@1315 383
cannam@1315 384 QString diffFile =
cannam@1315 385 testName(format, audiofile, rate, normalised, gapless);
cannam@1315 386 diffFile.replace("/", "_");
cannam@1315 387 diffFile.replace(".", "_");
cannam@1315 388 diffFile.replace(" ", "_");
cannam@1315 389 diffFile += ".wav";
cannam@1315 390 diffFile = QDir(diffDir).filePath(diffFile);
cannam@1315 391 WavFileWriter diffWriter(diffFile, readRate, channels,
Chris@1359 392 WavFileWriter::WriteToTemporary);
cannam@1315 393 QVERIFY(diffWriter.isOK());
cannam@1315 394
cannam@1315 395 vector<vector<float>> diffs(channels);
cannam@1315 396 for (int c = 0; c < channels; ++c) {
cannam@1315 397 for (int i = 0; i < refFrames; ++i) {
cannam@1315 398 int ix = i + offset;
cannam@1315 399 if (ix < read) {
cannam@1315 400 float signeddiff =
cannam@1315 401 test[ix * channels + c] -
cannam@1315 402 reference[i * channels + c];
cannam@1315 403 diffs[c].push_back(signeddiff);
cannam@1315 404 }
cannam@1315 405 }
cannam@1315 406 }
cannam@1315 407 float **ptrs = new float*[channels];
cannam@1315 408 for (int c = 0; c < channels; ++c) {
cannam@1315 409 ptrs[c] = diffs[c].data();
cannam@1315 410 }
cannam@1315 411 diffWriter.writeSamples(ptrs, refFrames);
cannam@1315 412 delete[] ptrs;
cannam@1315 413 }
Chris@1313 414
Chris@1346 415 for (int c = 0; c < channels; ++c) {
Chris@1313 416
Chris@1313 417 double maxDiff = 0.0;
Chris@1313 418 double totalDiff = 0.0;
Chris@1313 419 double totalSqrDiff = 0.0;
Chris@1346 420 int maxIndex = 0;
Chris@1313 421
Chris@1346 422 for (int i = 0; i < refFrames; ++i) {
Chris@1296 423 int ix = i + offset;
Chris@1296 424 if (ix >= read) {
cannam@1308 425 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
Chris@1296 426 QVERIFY(ix < read);
Chris@1296 427 }
Chris@1313 428
Chris@1296 429 if (ix + discard >= read) {
Chris@1296 430 // we forgive the very edge samples when
Chris@1296 431 // resampling (discard > 0)
Chris@1296 432 continue;
Chris@1296 433 }
Chris@1313 434
Chris@1346 435 double diff = fabs(test[ix * channels + c] -
cannam@1315 436 reference[i * channels + c]);
Chris@1313 437
Chris@1346 438 totalDiff += diff;
Chris@1313 439 totalSqrDiff += diff * diff;
Chris@1313 440
Chris@757 441 // in edge areas, record this only if it exceeds edgeLimit
Chris@1313 442 if (i < edgeSize || i + edgeSize >= refFrames) {
Chris@1313 443 if (diff > edgeLimit && diff > maxDiff) {
Chris@1313 444 maxDiff = diff;
Chris@1313 445 maxIndex = i;
Chris@757 446 }
Chris@757 447 } else {
Chris@1313 448 if (diff > maxDiff) {
Chris@1313 449 maxDiff = diff;
Chris@1313 450 maxIndex = i;
Chris@757 451 }
Chris@1346 452 }
Chris@1346 453 }
Chris@1313 454
Chris@1346 455 double meanDiff = totalDiff / double(refFrames);
Chris@1313 456 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
cannam@1308 457
cannam@1314 458 /*
Chris@1346 459 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
Chris@1313 460 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
Chris@1313 461 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
cannam@1314 462 */
Chris@1313 463 if (rmsDiff >= rmsLimit) {
Chris@1346 464 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
Chris@1313 465 QVERIFY(rmsDiff < rmsLimit);
Chris@1313 466 }
Chris@1346 467 if (maxDiff >= maxLimit) {
Chris@1346 468 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
Chris@1346 469 QVERIFY(maxDiff < maxLimit);
Chris@1346 470 }
Chris@1313 471
Chris@1313 472 // and check for spurious material at end
Chris@1313 473
Chris@1309 474 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
Chris@1309 475 sv_frame_t ix = i + offset;
Chris@1323 476 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
cannam@1308 477 float mag = fabsf(test[ix * channels + c]);
cannam@1308 478 if (mag > quiet) {
Chris@1313 479 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
cannam@1308 480 QVERIFY(mag < quiet);
cannam@1308 481 }
cannam@1308 482 }
Chris@756 483 }
Chris@756 484 }
Chris@756 485 };
Chris@756 486
Chris@756 487 #endif