annotate data/fileio/test/AudioFileReaderTest.h @ 1333:46670bb4a290 3.0-integration

Merge from branch svg, and thus (in some subrepos) from levelpanwidget
author Chris Cannam
date Mon, 19 Dec 2016 16:34:38 +0000
parents 54af1e21705c
children 75ad55315db4
rev   line source
Chris@756 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@756 2
Chris@756 3 /*
Chris@756 4 Sonic Visualiser
Chris@756 5 An audio file viewer and annotation editor.
Chris@756 6 Centre for Digital Music, Queen Mary, University of London.
Chris@756 7 This file copyright 2013 Chris Cannam.
Chris@756 8
Chris@756 9 This program is free software; you can redistribute it and/or
Chris@756 10 modify it under the terms of the GNU General Public License as
Chris@756 11 published by the Free Software Foundation; either version 2 of the
Chris@756 12 License, or (at your option) any later version. See the file
Chris@756 13 COPYING included with this distribution for more information.
Chris@756 14 */
Chris@756 15
Chris@756 16 #ifndef TEST_AUDIO_FILE_READER_H
Chris@756 17 #define TEST_AUDIO_FILE_READER_H
Chris@756 18
Chris@756 19 #include "../AudioFileReaderFactory.h"
Chris@756 20 #include "../AudioFileReader.h"
Chris@1313 21 #include "../WavFileWriter.h"
Chris@756 22
Chris@756 23 #include "AudioTestData.h"
Chris@756 24
Chris@756 25 #include <cmath>
Chris@756 26
Chris@756 27 #include <QObject>
Chris@756 28 #include <QtTest>
Chris@756 29 #include <QDir>
Chris@756 30
Chris@756 31 #include <iostream>
Chris@756 32
Chris@756 33 using namespace std;
Chris@756 34
Chris@1263 35 static QString audioDir = "svcore/data/fileio/test/testfiles";
Chris@1313 36 static QString diffDir = "svcore/data/fileio/test/diffs";
Chris@756 37
Chris@756 38 class AudioFileReaderTest : public QObject
Chris@756 39 {
Chris@756 40 Q_OBJECT
Chris@756 41
Chris@756 42 const char *strOf(QString s) {
Chris@756 43 return strdup(s.toLocal8Bit().data());
Chris@756 44 }
Chris@756 45
Chris@1313 46 void getFileMetadata(QString filename,
Chris@1313 47 QString &extension,
Chris@1313 48 sv_samplerate_t &rate,
Chris@1313 49 int &channels,
Chris@1313 50 int &bitdepth) {
Chris@1313 51
Chris@1313 52 QStringList fileAndExt = filename.split(".");
Chris@1313 53 QStringList bits = fileAndExt[0].split("-");
Chris@1313 54
Chris@1313 55 extension = fileAndExt[1];
Chris@1313 56 rate = bits[0].toInt();
Chris@1313 57 channels = bits[1].toInt();
Chris@1313 58 bitdepth = 16;
Chris@1313 59 if (bits.length() > 2) {
Chris@1313 60 bitdepth = bits[2].toInt();
Chris@1313 61 }
Chris@1313 62 }
Chris@1313 63
cannam@1315 64 void getExpectedThresholds(QString format,
cannam@1315 65 QString filename,
Chris@1313 66 bool resampled,
Chris@1313 67 bool gapless,
Chris@1313 68 bool normalised,
Chris@1313 69 double &maxLimit,
Chris@1313 70 double &rmsLimit) {
Chris@1313 71
Chris@1313 72 QString extension;
Chris@1313 73 sv_samplerate_t fileRate;
Chris@1313 74 int channels;
Chris@1313 75 int bitdepth;
Chris@1313 76 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
Chris@1313 77
Chris@1313 78 if (normalised) {
Chris@1313 79
cannam@1315 80 if (format == "ogg") {
Chris@1313 81
Chris@1313 82 // Our ogg is not especially high quality and is
Chris@1313 83 // actually further from the original if normalised
Chris@1313 84
Chris@1313 85 maxLimit = 0.1;
Chris@1313 86 rmsLimit = 0.03;
Chris@1313 87
cannam@1315 88 } else if (format == "aac") {
Chris@1313 89
cannam@1315 90 // Terrible performance for this test, load of spill
cannam@1315 91 // from one channel to the other. I guess they know
cannam@1315 92 // what they're doing, it's perceptual after all, but
cannam@1315 93 // it does make this check a bit superfluous, you
cannam@1315 94 // could probably pass it with a signal that sounds
cannam@1315 95 // nothing like the original
cannam@1315 96 maxLimit = 0.2;
cannam@1314 97 rmsLimit = 0.1;
Chris@1313 98
cannam@1315 99 } else if (format == "mp3") {
Chris@1313 100
Chris@1313 101 if (resampled && !gapless) {
Chris@1313 102
Chris@1313 103 // We expect worse figures here, because the
Chris@1313 104 // combination of uncompensated encoder delay +
Chris@1313 105 // resampling results in a fractional delay which
Chris@1313 106 // means the decoded signal is slightly out of
Chris@1313 107 // phase compared to the test signal
Chris@1313 108
Chris@1313 109 maxLimit = 0.1;
Chris@1313 110 rmsLimit = 0.05;
Chris@1313 111
Chris@1313 112 } else {
Chris@1313 113
Chris@1313 114 maxLimit = 0.05;
Chris@1313 115 rmsLimit = 0.01;
Chris@1313 116 }
Chris@1313 117
Chris@1313 118 } else {
Chris@1313 119
cannam@1315 120 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 121
Chris@1313 122 if (bitdepth >= 16 && !resampled) {
Chris@1313 123 maxLimit = 1e-3;
Chris@1313 124 rmsLimit = 3e-4;
Chris@1313 125 } else {
Chris@1313 126 maxLimit = 0.01;
Chris@1313 127 rmsLimit = 5e-3;
Chris@1313 128 }
Chris@1313 129 }
Chris@1313 130
Chris@1313 131 } else { // !normalised
Chris@1313 132
cannam@1315 133 if (format == "ogg") {
Chris@1313 134
Chris@1313 135 maxLimit = 0.06;
Chris@1313 136 rmsLimit = 0.03;
Chris@1313 137
cannam@1315 138 } else if (format == "aac") {
Chris@1313 139
cannam@1315 140 maxLimit = 0.1;
cannam@1315 141 rmsLimit = 0.1;
Chris@1313 142
cannam@1315 143 } else if (format == "mp3") {
Chris@1313 144
Chris@1313 145 // all mp3 figures are worse when not normalising
Chris@1313 146 maxLimit = 0.1;
Chris@1313 147 rmsLimit = 0.05;
Chris@1313 148
Chris@1313 149 } else {
Chris@1313 150
cannam@1315 151 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 152
Chris@1313 153 if (bitdepth >= 16 && !resampled) {
Chris@1313 154 maxLimit = 1e-3;
Chris@1313 155 rmsLimit = 3e-4;
Chris@1313 156 } else {
Chris@1313 157 maxLimit = 0.02;
Chris@1313 158 rmsLimit = 0.01;
Chris@1313 159 }
Chris@1313 160 }
Chris@1313 161 }
Chris@1313 162 }
Chris@1313 163
cannam@1315 164 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
cannam@1315 165 return QString("%1/%2 at %3%4%5")
cannam@1315 166 .arg(format)
Chris@1313 167 .arg(filename)
Chris@1313 168 .arg(rate)
Chris@1313 169 .arg(norm ? " normalised": "")
Chris@1313 170 .arg(gapless ? "" : " non-gapless");
Chris@1313 171 }
Chris@1313 172
Chris@756 173 private slots:
Chris@756 174 void init()
Chris@756 175 {
Chris@756 176 if (!QDir(audioDir).exists()) {
Chris@756 177 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl;
Chris@756 178 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
Chris@756 179 }
Chris@1313 180 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
Chris@1313 181 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
Chris@1313 182 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
Chris@1313 183 }
Chris@756 184 }
Chris@756 185
Chris@756 186 void read_data()
Chris@756 187 {
cannam@1315 188 QTest::addColumn<QString>("format");
Chris@756 189 QTest::addColumn<QString>("audiofile");
Chris@1313 190 QTest::addColumn<int>("rate");
Chris@1313 191 QTest::addColumn<bool>("normalised");
Chris@1313 192 QTest::addColumn<bool>("gapless");
cannam@1315 193 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
cannam@1315 194 QDir::NoDotAndDotDot);
cannam@1315 195 for (QString format: dirs) {
cannam@1315 196 QStringList files = QDir(QDir(audioDir).filePath(format))
cannam@1315 197 .entryList(QDir::Files);
cannam@1315 198 int readRates[] = { 44100, 48000 };
cannam@1315 199 bool norms[] = { false, true };
cannam@1315 200 bool gaplesses[] = { true, false };
cannam@1315 201 foreach (QString filename, files) {
cannam@1315 202 for (int rate: readRates) {
cannam@1315 203 for (bool norm: norms) {
cannam@1315 204 for (bool gapless: gaplesses) {
Chris@1313 205
cannam@1315 206 if (format != "mp3" && !gapless) {
cannam@1315 207 continue;
cannam@1315 208 }
cannam@1315 209
cannam@1315 210 QString desc = testName
cannam@1315 211 (format, filename, rate, norm, gapless);
cannam@1315 212
cannam@1315 213 QTest::newRow(strOf(desc))
cannam@1315 214 << format << filename << rate << norm << gapless;
Chris@1313 215 }
Chris@1313 216 }
Chris@1313 217 }
Chris@1313 218 }
Chris@756 219 }
Chris@756 220 }
Chris@756 221
Chris@756 222 void read()
Chris@756 223 {
cannam@1315 224 QFETCH(QString, format);
Chris@756 225 QFETCH(QString, audiofile);
Chris@1313 226 QFETCH(int, rate);
Chris@1313 227 QFETCH(bool, normalised);
Chris@1313 228 QFETCH(bool, gapless);
Chris@756 229
Chris@1313 230 sv_samplerate_t readRate(rate);
Chris@1313 231
cannam@1315 232 // cerr << "\naudiofile = " << audiofile << endl;
Chris@1313 233
Chris@1313 234 AudioFileReaderFactory::Parameters params;
Chris@1313 235 params.targetRate = readRate;
Chris@1313 236 params.normalisation = (normalised ?
Chris@1313 237 AudioFileReaderFactory::Normalisation::Peak :
Chris@1313 238 AudioFileReaderFactory::Normalisation::None);
Chris@1313 239 params.gaplessMode = (gapless ?
Chris@1313 240 AudioFileReaderFactory::GaplessMode::Gapless :
Chris@1313 241 AudioFileReaderFactory::GaplessMode::Gappy);
Chris@757 242
Chris@756 243 AudioFileReader *reader =
Chris@756 244 AudioFileReaderFactory::createReader
cannam@1315 245 (audioDir + "/" + format + "/" + audiofile, params);
Chris@1313 246
Chris@756 247 if (!reader) {
Chris@820 248 #if ( QT_VERSION >= 0x050000 )
Chris@763 249 QSKIP("Unsupported file, skipping");
Chris@820 250 #else
Chris@820 251 QSKIP("Unsupported file, skipping", SkipSingle);
Chris@820 252 #endif
Chris@756 253 }
Chris@756 254
Chris@1313 255 QString extension;
Chris@1313 256 sv_samplerate_t fileRate;
Chris@1313 257 int channels;
Chris@1313 258 int fileBitdepth;
Chris@1313 259 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
Chris@1313 260
Chris@1313 261 QCOMPARE((int)reader->getChannelCount(), channels);
Chris@1313 262 QCOMPARE(reader->getNativeRate(), fileRate);
Chris@1040 263 QCOMPARE(reader->getSampleRate(), readRate);
Chris@757 264
Chris@757 265 AudioTestData tdata(readRate, channels);
Chris@756 266
Chris@756 267 float *reference = tdata.getInterleavedData();
Chris@1040 268 sv_frame_t refFrames = tdata.getFrameCount();
Chris@756 269
Chris@756 270 // The reader should give us exactly the expected number of
Chris@759 271 // frames, except for mp3/aac files. We ask for quite a lot
Chris@759 272 // more, though, so we can (a) check that we only get the
Chris@759 273 // expected number back (if this is not mp3/aac) or (b) take
Chris@759 274 // into account silence at beginning and end (if it is).
Chris@1326 275 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
Chris@1040 276 sv_frame_t read = test.size() / channels;
Chris@756 277
Chris@1313 278 bool perceptual = (extension == "mp3" ||
Chris@1313 279 extension == "aac" ||
Chris@1313 280 extension == "m4a");
Chris@1313 281
Chris@1313 282 if (perceptual && !gapless) {
Chris@1313 283 // allow silence at start and end
Chris@759 284 QVERIFY(read >= refFrames);
Chris@757 285 } else {
Chris@759 286 QCOMPARE(read, refFrames);
Chris@757 287 }
Chris@757 288
Chris@1313 289 bool resampled = readRate != fileRate;
Chris@1313 290 double maxLimit, rmsLimit;
cannam@1315 291 getExpectedThresholds(format,
cannam@1315 292 audiofile,
Chris@1313 293 resampled,
Chris@1313 294 gapless,
Chris@1313 295 normalised,
Chris@1313 296 maxLimit, rmsLimit);
Chris@1313 297
Chris@1313 298 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
Chris@1313 299 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
Chris@759 300 int edgeSize = 100;
Chris@759 301
Chris@759 302 // And we ignore completely the last few frames when upsampling
Chris@1313 303 int discard = 1 + int(round(readRate / fileRate));
Chris@759 304
Chris@759 305 int offset = 0;
Chris@759 306
Chris@1313 307 if (perceptual) {
Chris@759 308
cannam@1314 309 // Look for an initial offset.
cannam@1314 310 //
cannam@1314 311 // We know the first channel has a sinusoid in it. It
cannam@1314 312 // should have a peak at 0.4ms (see AudioTestData.h) but
cannam@1314 313 // that might have been clipped, which would make it
cannam@1314 314 // imprecise. We can tell if it's clipped, though, as
cannam@1314 315 // there will be samples having exactly identical
cannam@1314 316 // values. So what we look for is the peak if it's not
cannam@1314 317 // clipped and, if it is, the first zero crossing after
cannam@1314 318 // the peak, which should be at 0.8ms.
cannam@1314 319
Chris@1296 320 int expectedPeak = int(0.0004 * readRate);
cannam@1314 321 int expectedZC = int(0.0008 * readRate);
cannam@1314 322 bool foundPeak = false;
cannam@1314 323 for (int i = 1; i+1 < read; ++i) {
cannam@1314 324 float prevSample = test[(i-1) * channels];
cannam@1314 325 float thisSample = test[i * channels];
cannam@1314 326 float nextSample = test[(i+1) * channels];
cannam@1314 327 if (thisSample > 0.8 && nextSample < thisSample) {
cannam@1314 328 foundPeak = true;
cannam@1314 329 if (thisSample > prevSample) {
cannam@1314 330 // not clipped
cannam@1314 331 offset = i - expectedPeak - 1;
cannam@1314 332 break;
cannam@1314 333 }
cannam@1314 334 }
cannam@1314 335 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
cannam@1315 336 // cerr << "thisSample = " << thisSample << ", nextSample = "
cannam@1315 337 // << nextSample << endl;
cannam@1314 338 offset = i - expectedZC - 1;
Chris@759 339 break;
Chris@759 340 }
Chris@759 341 }
Chris@1313 342
cannam@1315 343 // int fileRateEquivalent = int((offset / readRate) * fileRate);
cannam@1315 344 // std::cerr << "offset = " << offset << std::endl;
cannam@1315 345 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
Chris@1313 346
Chris@1313 347 // Previously our m4a test file had a fixed offset of 1024
Chris@1313 348 // at the file sample rate -- this may be because it was
Chris@1313 349 // produced by FAAC which did not write in the delay as
Chris@1313 350 // metadata? We now have an m4a produced by Core Audio
Chris@1313 351 // which gives a 0 offset. What to do...
Chris@1313 352
Chris@1313 353 // Anyway, mp3s should have 0 offset in gapless mode and
Chris@1313 354 // "something else" otherwise.
Chris@1313 355
Chris@1313 356 if (gapless) {
cannam@1315 357 if (format == "aac") {
cannam@1315 358 // ouch!
cannam@1315 359 if (offset == -1) offset = 0;
cannam@1315 360 }
Chris@1313 361 QCOMPARE(offset, 0);
Chris@1313 362 }
Chris@759 363 }
Chris@756 364
cannam@1315 365 {
cannam@1315 366 // Write the diff file now, so that it's already been written
cannam@1315 367 // even if the comparison fails. We aren't checking anything
cannam@1315 368 // here except as necessary to avoid buffer overruns etc
cannam@1315 369
cannam@1315 370 QString diffFile =
cannam@1315 371 testName(format, audiofile, rate, normalised, gapless);
cannam@1315 372 diffFile.replace("/", "_");
cannam@1315 373 diffFile.replace(".", "_");
cannam@1315 374 diffFile.replace(" ", "_");
cannam@1315 375 diffFile += ".wav";
cannam@1315 376 diffFile = QDir(diffDir).filePath(diffFile);
cannam@1315 377 WavFileWriter diffWriter(diffFile, readRate, channels,
cannam@1315 378 WavFileWriter::WriteToTarget); //!!! NB WriteToTemporary not working, why?
cannam@1315 379 QVERIFY(diffWriter.isOK());
cannam@1315 380
cannam@1315 381 vector<vector<float>> diffs(channels);
cannam@1315 382 for (int c = 0; c < channels; ++c) {
cannam@1315 383 for (int i = 0; i < refFrames; ++i) {
cannam@1315 384 int ix = i + offset;
cannam@1315 385 if (ix < read) {
cannam@1315 386 float signeddiff =
cannam@1315 387 test[ix * channels + c] -
cannam@1315 388 reference[i * channels + c];
cannam@1315 389 diffs[c].push_back(signeddiff);
cannam@1315 390 }
cannam@1315 391 }
cannam@1315 392 }
cannam@1315 393 float **ptrs = new float*[channels];
cannam@1315 394 for (int c = 0; c < channels; ++c) {
cannam@1315 395 ptrs[c] = diffs[c].data();
cannam@1315 396 }
cannam@1315 397 diffWriter.writeSamples(ptrs, refFrames);
cannam@1315 398 delete[] ptrs;
cannam@1315 399 }
Chris@1313 400
Chris@756 401 for (int c = 0; c < channels; ++c) {
Chris@1313 402
Chris@1313 403 double maxDiff = 0.0;
Chris@1313 404 double totalDiff = 0.0;
Chris@1313 405 double totalSqrDiff = 0.0;
Chris@1313 406 int maxIndex = 0;
Chris@1313 407
Chris@1296 408 for (int i = 0; i < refFrames; ++i) {
Chris@1296 409 int ix = i + offset;
Chris@1296 410 if (ix >= read) {
cannam@1308 411 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
Chris@1296 412 QVERIFY(ix < read);
Chris@1296 413 }
Chris@1313 414
Chris@1296 415 if (ix + discard >= read) {
Chris@1296 416 // we forgive the very edge samples when
Chris@1296 417 // resampling (discard > 0)
Chris@1296 418 continue;
Chris@1296 419 }
Chris@1313 420
cannam@1315 421 double diff = fabs(test[ix * channels + c] -
cannam@1315 422 reference[i * channels + c]);
Chris@1313 423
Chris@1313 424 totalDiff += diff;
Chris@1313 425 totalSqrDiff += diff * diff;
Chris@1313 426
Chris@757 427 // in edge areas, record this only if it exceeds edgeLimit
Chris@1313 428 if (i < edgeSize || i + edgeSize >= refFrames) {
Chris@1313 429 if (diff > edgeLimit && diff > maxDiff) {
Chris@1313 430 maxDiff = diff;
Chris@1313 431 maxIndex = i;
Chris@757 432 }
Chris@757 433 } else {
Chris@1313 434 if (diff > maxDiff) {
Chris@1313 435 maxDiff = diff;
Chris@1313 436 maxIndex = i;
Chris@757 437 }
Chris@756 438 }
Chris@756 439 }
Chris@1313 440
Chris@1313 441 double meanDiff = totalDiff / double(refFrames);
Chris@1313 442 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
cannam@1308 443
cannam@1314 444 /*
Chris@1313 445 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
Chris@1313 446 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
Chris@1313 447 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
cannam@1314 448 */
Chris@1313 449 if (rmsDiff >= rmsLimit) {
Chris@1313 450 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
Chris@1313 451 QVERIFY(rmsDiff < rmsLimit);
Chris@1313 452 }
Chris@1313 453 if (maxDiff >= maxLimit) {
Chris@1313 454 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
Chris@1313 455 QVERIFY(maxDiff < maxLimit);
Chris@1313 456 }
Chris@1313 457
Chris@1313 458 // and check for spurious material at end
Chris@1313 459
Chris@1309 460 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
Chris@1309 461 sv_frame_t ix = i + offset;
Chris@1323 462 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
cannam@1308 463 float mag = fabsf(test[ix * channels + c]);
cannam@1308 464 if (mag > quiet) {
Chris@1313 465 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
cannam@1308 466 QVERIFY(mag < quiet);
cannam@1308 467 }
cannam@1308 468 }
Chris@756 469 }
Chris@756 470 }
Chris@756 471 };
Chris@756 472
Chris@756 473 #endif