annotate data/fileio/test/AudioFileReaderTest.h @ 1394:9ef1cc26024c

Add Range01 normalisation method to ColumnOp. This is the normalisation that is actually used in the Colour 3D Plot layer historically when column normalisation is enabled (not Max1 after all).
author Chris Cannam
date Tue, 28 Feb 2017 14:04:16 +0000
parents 1c9bbbb6116a
children aadfb395e933
rev   line source
Chris@756 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@756 2
Chris@756 3 /*
Chris@756 4 Sonic Visualiser
Chris@756 5 An audio file viewer and annotation editor.
Chris@756 6 Centre for Digital Music, Queen Mary, University of London.
Chris@756 7 This file copyright 2013 Chris Cannam.
Chris@756 8
Chris@756 9 This program is free software; you can redistribute it and/or
Chris@756 10 modify it under the terms of the GNU General Public License as
Chris@756 11 published by the Free Software Foundation; either version 2 of the
Chris@756 12 License, or (at your option) any later version. See the file
Chris@756 13 COPYING included with this distribution for more information.
Chris@756 14 */
Chris@756 15
Chris@756 16 #ifndef TEST_AUDIO_FILE_READER_H
Chris@756 17 #define TEST_AUDIO_FILE_READER_H
Chris@756 18
Chris@756 19 #include "../AudioFileReaderFactory.h"
Chris@756 20 #include "../AudioFileReader.h"
Chris@1313 21 #include "../WavFileWriter.h"
Chris@756 22
Chris@756 23 #include "AudioTestData.h"
Chris@756 24
Chris@756 25 #include <cmath>
Chris@756 26
Chris@756 27 #include <QObject>
Chris@756 28 #include <QtTest>
Chris@756 29 #include <QDir>
Chris@756 30
Chris@756 31 #include <iostream>
Chris@756 32
Chris@756 33 using namespace std;
Chris@756 34
Chris@756 35 class AudioFileReaderTest : public QObject
Chris@756 36 {
Chris@756 37 Q_OBJECT
Chris@756 38
Chris@1346 39 private:
Chris@1346 40 QString testDirBase;
Chris@1346 41 QString audioDir;
Chris@1346 42 QString diffDir;
Chris@1346 43
Chris@1346 44 public:
Chris@1346 45 AudioFileReaderTest(QString base) {
Chris@1346 46 if (base == "") {
Chris@1346 47 base = "svcore/data/fileio/test";
Chris@1346 48 }
Chris@1346 49 testDirBase = base;
Chris@1359 50 audioDir = base + "/audio";
Chris@1346 51 diffDir = base + "/diffs";
Chris@1346 52 }
Chris@1346 53
Chris@1346 54 private:
Chris@756 55 const char *strOf(QString s) {
Chris@756 56 return strdup(s.toLocal8Bit().data());
Chris@756 57 }
Chris@756 58
Chris@1313 59 void getFileMetadata(QString filename,
Chris@1313 60 QString &extension,
Chris@1313 61 sv_samplerate_t &rate,
Chris@1313 62 int &channels,
Chris@1313 63 int &bitdepth) {
Chris@1313 64
Chris@1313 65 QStringList fileAndExt = filename.split(".");
Chris@1313 66 QStringList bits = fileAndExt[0].split("-");
Chris@1313 67
Chris@1313 68 extension = fileAndExt[1];
Chris@1313 69 rate = bits[0].toInt();
Chris@1313 70 channels = bits[1].toInt();
Chris@1313 71 bitdepth = 16;
Chris@1313 72 if (bits.length() > 2) {
Chris@1313 73 bitdepth = bits[2].toInt();
Chris@1313 74 }
Chris@1313 75 }
Chris@1313 76
cannam@1315 77 void getExpectedThresholds(QString format,
cannam@1315 78 QString filename,
Chris@1313 79 bool resampled,
Chris@1313 80 bool gapless,
Chris@1313 81 bool normalised,
Chris@1313 82 double &maxLimit,
Chris@1313 83 double &rmsLimit) {
Chris@1313 84
Chris@1313 85 QString extension;
Chris@1313 86 sv_samplerate_t fileRate;
Chris@1313 87 int channels;
Chris@1313 88 int bitdepth;
Chris@1313 89 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
Chris@1313 90
Chris@1313 91 if (normalised) {
Chris@1313 92
cannam@1315 93 if (format == "ogg") {
Chris@1313 94
Chris@1313 95 // Our ogg is not especially high quality and is
Chris@1313 96 // actually further from the original if normalised
Chris@1313 97
Chris@1313 98 maxLimit = 0.1;
Chris@1313 99 rmsLimit = 0.03;
Chris@1313 100
cannam@1315 101 } else if (format == "aac") {
Chris@1313 102
cannam@1315 103 // Terrible performance for this test, load of spill
cannam@1315 104 // from one channel to the other. I guess they know
cannam@1315 105 // what they're doing, it's perceptual after all, but
cannam@1315 106 // it does make this check a bit superfluous, you
cannam@1315 107 // could probably pass it with a signal that sounds
cannam@1315 108 // nothing like the original
cannam@1315 109 maxLimit = 0.2;
cannam@1314 110 rmsLimit = 0.1;
Chris@1313 111
cannam@1315 112 } else if (format == "mp3") {
Chris@1313 113
Chris@1313 114 if (resampled && !gapless) {
Chris@1313 115
Chris@1313 116 // We expect worse figures here, because the
Chris@1313 117 // combination of uncompensated encoder delay +
Chris@1313 118 // resampling results in a fractional delay which
Chris@1313 119 // means the decoded signal is slightly out of
Chris@1313 120 // phase compared to the test signal
Chris@1313 121
Chris@1313 122 maxLimit = 0.1;
Chris@1313 123 rmsLimit = 0.05;
Chris@1313 124
Chris@1313 125 } else {
Chris@1313 126
Chris@1313 127 maxLimit = 0.05;
Chris@1313 128 rmsLimit = 0.01;
Chris@1313 129 }
Chris@1313 130
Chris@1313 131 } else {
Chris@1313 132
cannam@1315 133 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 134
Chris@1313 135 if (bitdepth >= 16 && !resampled) {
Chris@1313 136 maxLimit = 1e-3;
Chris@1313 137 rmsLimit = 3e-4;
Chris@1313 138 } else {
Chris@1313 139 maxLimit = 0.01;
Chris@1313 140 rmsLimit = 5e-3;
Chris@1313 141 }
Chris@1313 142 }
Chris@1313 143
Chris@1313 144 } else { // !normalised
Chris@1313 145
cannam@1315 146 if (format == "ogg") {
Chris@1313 147
Chris@1313 148 maxLimit = 0.06;
Chris@1313 149 rmsLimit = 0.03;
Chris@1313 150
cannam@1315 151 } else if (format == "aac") {
Chris@1313 152
cannam@1315 153 maxLimit = 0.1;
cannam@1315 154 rmsLimit = 0.1;
Chris@1313 155
cannam@1315 156 } else if (format == "mp3") {
Chris@1313 157
Chris@1313 158 // all mp3 figures are worse when not normalising
Chris@1313 159 maxLimit = 0.1;
Chris@1313 160 rmsLimit = 0.05;
Chris@1313 161
Chris@1313 162 } else {
Chris@1313 163
cannam@1315 164 // lossless formats (wav, aiff, flac, apple_lossless)
Chris@1313 165
Chris@1313 166 if (bitdepth >= 16 && !resampled) {
Chris@1313 167 maxLimit = 1e-3;
Chris@1313 168 rmsLimit = 3e-4;
Chris@1313 169 } else {
Chris@1313 170 maxLimit = 0.02;
Chris@1313 171 rmsLimit = 0.01;
Chris@1313 172 }
Chris@1313 173 }
Chris@1313 174 }
Chris@1313 175 }
Chris@1313 176
cannam@1315 177 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
cannam@1315 178 return QString("%1/%2 at %3%4%5")
cannam@1315 179 .arg(format)
Chris@1313 180 .arg(filename)
Chris@1313 181 .arg(rate)
Chris@1313 182 .arg(norm ? " normalised": "")
Chris@1313 183 .arg(gapless ? "" : " non-gapless");
Chris@1313 184 }
Chris@1313 185
Chris@756 186 private slots:
Chris@756 187 void init()
Chris@756 188 {
Chris@756 189 if (!QDir(audioDir).exists()) {
Chris@1346 190 QString cwd = QDir::currentPath();
Chris@1346 191 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
Chris@756 192 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
Chris@756 193 }
Chris@1313 194 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
Chris@1313 195 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
Chris@1313 196 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
Chris@1313 197 }
Chris@756 198 }
Chris@756 199
Chris@756 200 void read_data()
Chris@756 201 {
cannam@1315 202 QTest::addColumn<QString>("format");
Chris@756 203 QTest::addColumn<QString>("audiofile");
Chris@1313 204 QTest::addColumn<int>("rate");
Chris@1313 205 QTest::addColumn<bool>("normalised");
Chris@1313 206 QTest::addColumn<bool>("gapless");
cannam@1315 207 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
cannam@1315 208 QDir::NoDotAndDotDot);
cannam@1315 209 for (QString format: dirs) {
cannam@1315 210 QStringList files = QDir(QDir(audioDir).filePath(format))
cannam@1315 211 .entryList(QDir::Files);
cannam@1315 212 int readRates[] = { 44100, 48000 };
cannam@1315 213 bool norms[] = { false, true };
cannam@1315 214 bool gaplesses[] = { true, false };
cannam@1315 215 foreach (QString filename, files) {
cannam@1315 216 for (int rate: readRates) {
cannam@1315 217 for (bool norm: norms) {
cannam@1315 218 for (bool gapless: gaplesses) {
Chris@1313 219
cannam@1315 220 if (format != "mp3" && !gapless) {
cannam@1315 221 continue;
cannam@1315 222 }
cannam@1315 223
cannam@1315 224 QString desc = testName
cannam@1315 225 (format, filename, rate, norm, gapless);
cannam@1315 226
cannam@1315 227 QTest::newRow(strOf(desc))
cannam@1315 228 << format << filename << rate << norm << gapless;
Chris@1313 229 }
Chris@1313 230 }
Chris@1313 231 }
Chris@1313 232 }
Chris@756 233 }
Chris@756 234 }
Chris@756 235
Chris@756 236 void read()
Chris@756 237 {
cannam@1315 238 QFETCH(QString, format);
Chris@756 239 QFETCH(QString, audiofile);
Chris@1313 240 QFETCH(int, rate);
Chris@1313 241 QFETCH(bool, normalised);
Chris@1313 242 QFETCH(bool, gapless);
Chris@756 243
Chris@1313 244 sv_samplerate_t readRate(rate);
Chris@1313 245
cannam@1315 246 // cerr << "\naudiofile = " << audiofile << endl;
Chris@1313 247
Chris@1313 248 AudioFileReaderFactory::Parameters params;
Chris@1313 249 params.targetRate = readRate;
Chris@1313 250 params.normalisation = (normalised ?
Chris@1313 251 AudioFileReaderFactory::Normalisation::Peak :
Chris@1313 252 AudioFileReaderFactory::Normalisation::None);
Chris@1313 253 params.gaplessMode = (gapless ?
Chris@1313 254 AudioFileReaderFactory::GaplessMode::Gapless :
Chris@1313 255 AudioFileReaderFactory::GaplessMode::Gappy);
Chris@757 256
Chris@756 257 AudioFileReader *reader =
Chris@756 258 AudioFileReaderFactory::createReader
cannam@1315 259 (audioDir + "/" + format + "/" + audiofile, params);
Chris@1313 260
Chris@756 261 if (!reader) {
Chris@820 262 #if ( QT_VERSION >= 0x050000 )
Chris@763 263 QSKIP("Unsupported file, skipping");
Chris@820 264 #else
Chris@820 265 QSKIP("Unsupported file, skipping", SkipSingle);
Chris@820 266 #endif
Chris@756 267 }
Chris@756 268
Chris@1313 269 QString extension;
Chris@1313 270 sv_samplerate_t fileRate;
Chris@1313 271 int channels;
Chris@1313 272 int fileBitdepth;
Chris@1313 273 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
Chris@1313 274
Chris@1313 275 QCOMPARE((int)reader->getChannelCount(), channels);
Chris@1313 276 QCOMPARE(reader->getNativeRate(), fileRate);
Chris@1040 277 QCOMPARE(reader->getSampleRate(), readRate);
Chris@757 278
Chris@757 279 AudioTestData tdata(readRate, channels);
Chris@756 280
Chris@756 281 float *reference = tdata.getInterleavedData();
Chris@1040 282 sv_frame_t refFrames = tdata.getFrameCount();
Chris@756 283
Chris@756 284 // The reader should give us exactly the expected number of
Chris@759 285 // frames, except for mp3/aac files. We ask for quite a lot
Chris@759 286 // more, though, so we can (a) check that we only get the
Chris@759 287 // expected number back (if this is not mp3/aac) or (b) take
Chris@759 288 // into account silence at beginning and end (if it is).
Chris@1326 289 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
Chris@1040 290 sv_frame_t read = test.size() / channels;
Chris@756 291
Chris@1313 292 bool perceptual = (extension == "mp3" ||
Chris@1313 293 extension == "aac" ||
Chris@1313 294 extension == "m4a");
Chris@1313 295
Chris@1313 296 if (perceptual && !gapless) {
Chris@1313 297 // allow silence at start and end
Chris@759 298 QVERIFY(read >= refFrames);
Chris@757 299 } else {
Chris@759 300 QCOMPARE(read, refFrames);
Chris@757 301 }
Chris@757 302
Chris@1313 303 bool resampled = readRate != fileRate;
Chris@1313 304 double maxLimit, rmsLimit;
cannam@1315 305 getExpectedThresholds(format,
cannam@1315 306 audiofile,
Chris@1313 307 resampled,
Chris@1313 308 gapless,
Chris@1313 309 normalised,
Chris@1313 310 maxLimit, rmsLimit);
Chris@1313 311
Chris@1313 312 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
Chris@1313 313 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
Chris@759 314 int edgeSize = 100;
Chris@759 315
Chris@759 316 // And we ignore completely the last few frames when upsampling
Chris@1313 317 int discard = 1 + int(round(readRate / fileRate));
Chris@759 318
Chris@759 319 int offset = 0;
Chris@759 320
Chris@1313 321 if (perceptual) {
Chris@759 322
cannam@1314 323 // Look for an initial offset.
cannam@1314 324 //
cannam@1314 325 // We know the first channel has a sinusoid in it. It
cannam@1314 326 // should have a peak at 0.4ms (see AudioTestData.h) but
cannam@1314 327 // that might have been clipped, which would make it
cannam@1314 328 // imprecise. We can tell if it's clipped, though, as
cannam@1314 329 // there will be samples having exactly identical
cannam@1314 330 // values. So what we look for is the peak if it's not
cannam@1314 331 // clipped and, if it is, the first zero crossing after
cannam@1314 332 // the peak, which should be at 0.8ms.
cannam@1314 333
Chris@1296 334 int expectedPeak = int(0.0004 * readRate);
cannam@1314 335 int expectedZC = int(0.0008 * readRate);
cannam@1314 336 bool foundPeak = false;
cannam@1314 337 for (int i = 1; i+1 < read; ++i) {
cannam@1314 338 float prevSample = test[(i-1) * channels];
cannam@1314 339 float thisSample = test[i * channels];
cannam@1314 340 float nextSample = test[(i+1) * channels];
cannam@1314 341 if (thisSample > 0.8 && nextSample < thisSample) {
cannam@1314 342 foundPeak = true;
cannam@1314 343 if (thisSample > prevSample) {
cannam@1314 344 // not clipped
cannam@1314 345 offset = i - expectedPeak - 1;
cannam@1314 346 break;
cannam@1314 347 }
cannam@1314 348 }
cannam@1314 349 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
cannam@1315 350 // cerr << "thisSample = " << thisSample << ", nextSample = "
cannam@1315 351 // << nextSample << endl;
cannam@1314 352 offset = i - expectedZC - 1;
Chris@759 353 break;
Chris@759 354 }
Chris@759 355 }
Chris@1313 356
cannam@1315 357 // int fileRateEquivalent = int((offset / readRate) * fileRate);
cannam@1315 358 // std::cerr << "offset = " << offset << std::endl;
cannam@1315 359 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
Chris@1313 360
Chris@1313 361 // Previously our m4a test file had a fixed offset of 1024
Chris@1313 362 // at the file sample rate -- this may be because it was
Chris@1313 363 // produced by FAAC which did not write in the delay as
Chris@1313 364 // metadata? We now have an m4a produced by Core Audio
Chris@1313 365 // which gives a 0 offset. What to do...
Chris@1313 366
Chris@1313 367 // Anyway, mp3s should have 0 offset in gapless mode and
Chris@1313 368 // "something else" otherwise.
Chris@1313 369
Chris@1313 370 if (gapless) {
cannam@1315 371 if (format == "aac") {
cannam@1315 372 // ouch!
cannam@1315 373 if (offset == -1) offset = 0;
cannam@1315 374 }
Chris@1313 375 QCOMPARE(offset, 0);
Chris@1313 376 }
Chris@759 377 }
Chris@756 378
cannam@1315 379 {
cannam@1315 380 // Write the diff file now, so that it's already been written
cannam@1315 381 // even if the comparison fails. We aren't checking anything
cannam@1315 382 // here except as necessary to avoid buffer overruns etc
cannam@1315 383
cannam@1315 384 QString diffFile =
cannam@1315 385 testName(format, audiofile, rate, normalised, gapless);
cannam@1315 386 diffFile.replace("/", "_");
cannam@1315 387 diffFile.replace(".", "_");
cannam@1315 388 diffFile.replace(" ", "_");
cannam@1315 389 diffFile += ".wav";
cannam@1315 390 diffFile = QDir(diffDir).filePath(diffFile);
cannam@1315 391 WavFileWriter diffWriter(diffFile, readRate, channels,
Chris@1359 392 WavFileWriter::WriteToTemporary);
cannam@1315 393 QVERIFY(diffWriter.isOK());
cannam@1315 394
cannam@1315 395 vector<vector<float>> diffs(channels);
cannam@1315 396 for (int c = 0; c < channels; ++c) {
cannam@1315 397 for (int i = 0; i < refFrames; ++i) {
cannam@1315 398 int ix = i + offset;
cannam@1315 399 if (ix < read) {
cannam@1315 400 float signeddiff =
cannam@1315 401 test[ix * channels + c] -
cannam@1315 402 reference[i * channels + c];
cannam@1315 403 diffs[c].push_back(signeddiff);
cannam@1315 404 }
cannam@1315 405 }
cannam@1315 406 }
cannam@1315 407 float **ptrs = new float*[channels];
cannam@1315 408 for (int c = 0; c < channels; ++c) {
cannam@1315 409 ptrs[c] = diffs[c].data();
cannam@1315 410 }
cannam@1315 411 diffWriter.writeSamples(ptrs, refFrames);
cannam@1315 412 delete[] ptrs;
cannam@1315 413 }
Chris@1313 414
Chris@1346 415 for (int c = 0; c < channels; ++c) {
Chris@1313 416
Chris@1313 417 double maxDiff = 0.0;
Chris@1313 418 double totalDiff = 0.0;
Chris@1313 419 double totalSqrDiff = 0.0;
Chris@1346 420 int maxIndex = 0;
Chris@1313 421
Chris@1346 422 for (int i = 0; i < refFrames; ++i) {
Chris@1296 423 int ix = i + offset;
Chris@1296 424 if (ix >= read) {
cannam@1308 425 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
Chris@1296 426 QVERIFY(ix < read);
Chris@1296 427 }
Chris@1313 428
Chris@1296 429 if (ix + discard >= read) {
Chris@1296 430 // we forgive the very edge samples when
Chris@1296 431 // resampling (discard > 0)
Chris@1296 432 continue;
Chris@1296 433 }
Chris@1313 434
Chris@1346 435 double diff = fabs(test[ix * channels + c] -
cannam@1315 436 reference[i * channels + c]);
Chris@1313 437
Chris@1346 438 totalDiff += diff;
Chris@1313 439 totalSqrDiff += diff * diff;
Chris@1313 440
Chris@757 441 // in edge areas, record this only if it exceeds edgeLimit
Chris@1313 442 if (i < edgeSize || i + edgeSize >= refFrames) {
Chris@1313 443 if (diff > edgeLimit && diff > maxDiff) {
Chris@1313 444 maxDiff = diff;
Chris@1313 445 maxIndex = i;
Chris@757 446 }
Chris@757 447 } else {
Chris@1313 448 if (diff > maxDiff) {
Chris@1313 449 maxDiff = diff;
Chris@1313 450 maxIndex = i;
Chris@757 451 }
Chris@1346 452 }
Chris@1346 453 }
Chris@1313 454
Chris@1346 455 double meanDiff = totalDiff / double(refFrames);
Chris@1313 456 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
cannam@1308 457
cannam@1314 458 /*
Chris@1346 459 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
Chris@1313 460 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
Chris@1313 461 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
cannam@1314 462 */
Chris@1313 463 if (rmsDiff >= rmsLimit) {
Chris@1346 464 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
Chris@1313 465 QVERIFY(rmsDiff < rmsLimit);
Chris@1313 466 }
Chris@1346 467 if (maxDiff >= maxLimit) {
Chris@1346 468 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
Chris@1346 469 QVERIFY(maxDiff < maxLimit);
Chris@1346 470 }
Chris@1313 471
Chris@1313 472 // and check for spurious material at end
Chris@1313 473
Chris@1309 474 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
Chris@1309 475 sv_frame_t ix = i + offset;
Chris@1323 476 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
cannam@1308 477 float mag = fabsf(test[ix * channels + c]);
cannam@1308 478 if (mag > quiet) {
Chris@1313 479 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
cannam@1308 480 QVERIFY(mag < quiet);
cannam@1308 481 }
cannam@1308 482 }
Chris@756 483 }
Chris@756 484 }
Chris@756 485 };
Chris@756 486
Chris@756 487 #endif