Mercurial > hg > svcore
diff data/fileio/test/AudioFileReaderTest.h @ 1365:3382d914e110
Merge from branch 3.0-integration
author | Chris Cannam |
---|---|
date | Fri, 13 Jan 2017 10:29:44 +0000 |
parents | 1c9bbbb6116a |
children | aadfb395e933 |
line wrap: on
line diff
--- a/data/fileio/test/AudioFileReaderTest.h Mon Nov 21 16:32:58 2016 +0000 +++ b/data/fileio/test/AudioFileReaderTest.h Fri Jan 13 10:29:44 2017 +0000 @@ -18,6 +18,7 @@ #include "../AudioFileReaderFactory.h" #include "../AudioFileReader.h" +#include "../WavFileWriter.h" #include "AudioTestData.h" @@ -31,52 +32,232 @@ using namespace std; -static QString audioDir = "testfiles"; - class AudioFileReaderTest : public QObject { Q_OBJECT +private: + QString testDirBase; + QString audioDir; + QString diffDir; + +public: + AudioFileReaderTest(QString base) { + if (base == "") { + base = "svcore/data/fileio/test"; + } + testDirBase = base; + audioDir = base + "/audio"; + diffDir = base + "/diffs"; + } + +private: const char *strOf(QString s) { return strdup(s.toLocal8Bit().data()); } + void getFileMetadata(QString filename, + QString &extension, + sv_samplerate_t &rate, + int &channels, + int &bitdepth) { + + QStringList fileAndExt = filename.split("."); + QStringList bits = fileAndExt[0].split("-"); + + extension = fileAndExt[1]; + rate = bits[0].toInt(); + channels = bits[1].toInt(); + bitdepth = 16; + if (bits.length() > 2) { + bitdepth = bits[2].toInt(); + } + } + + void getExpectedThresholds(QString format, + QString filename, + bool resampled, + bool gapless, + bool normalised, + double &maxLimit, + double &rmsLimit) { + + QString extension; + sv_samplerate_t fileRate; + int channels; + int bitdepth; + getFileMetadata(filename, extension, fileRate, channels, bitdepth); + + if (normalised) { + + if (format == "ogg") { + + // Our ogg is not especially high quality and is + // actually further from the original if normalised + + maxLimit = 0.1; + rmsLimit = 0.03; + + } else if (format == "aac") { + + // Terrible performance for this test, load of spill + // from one channel to the other. I guess they know + // what they're doing, it's perceptual after all, but + // it does make this check a bit superfluous, you + // could probably pass it with a signal that sounds + // nothing like the original + maxLimit = 0.2; + rmsLimit = 0.1; + + } else if (format == "mp3") { + + if (resampled && !gapless) { + + // We expect worse figures here, because the + // combination of uncompensated encoder delay + + // resampling results in a fractional delay which + // means the decoded signal is slightly out of + // phase compared to the test signal + + maxLimit = 0.1; + rmsLimit = 0.05; + + } else { + + maxLimit = 0.05; + rmsLimit = 0.01; + } + + } else { + + // lossless formats (wav, aiff, flac, apple_lossless) + + if (bitdepth >= 16 && !resampled) { + maxLimit = 1e-3; + rmsLimit = 3e-4; + } else { + maxLimit = 0.01; + rmsLimit = 5e-3; + } + } + + } else { // !normalised + + if (format == "ogg") { + + maxLimit = 0.06; + rmsLimit = 0.03; + + } else if (format == "aac") { + + maxLimit = 0.1; + rmsLimit = 0.1; + + } else if (format == "mp3") { + + // all mp3 figures are worse when not normalising + maxLimit = 0.1; + rmsLimit = 0.05; + + } else { + + // lossless formats (wav, aiff, flac, apple_lossless) + + if (bitdepth >= 16 && !resampled) { + maxLimit = 1e-3; + rmsLimit = 3e-4; + } else { + maxLimit = 0.02; + rmsLimit = 0.01; + } + } + } + } + + QString testName(QString format, QString filename, int rate, bool norm, bool gapless) { + return QString("%1/%2 at %3%4%5") + .arg(format) + .arg(filename) + .arg(rate) + .arg(norm ? " normalised": "") + .arg(gapless ? "" : " non-gapless"); + } + private slots: void init() { if (!QDir(audioDir).exists()) { - cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; + QString cwd = QDir::currentPath(); + cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl; QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); } + if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) { + cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl; + QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created"); + } } void read_data() { + QTest::addColumn<QString>("format"); QTest::addColumn<QString>("audiofile"); - QStringList files = QDir(audioDir).entryList(QDir::Files); - foreach (QString filename, files) { - QTest::newRow(strOf(filename)) << filename; + QTest::addColumn<int>("rate"); + QTest::addColumn<bool>("normalised"); + QTest::addColumn<bool>("gapless"); + QStringList dirs = QDir(audioDir).entryList(QDir::Dirs | + QDir::NoDotAndDotDot); + for (QString format: dirs) { + QStringList files = QDir(QDir(audioDir).filePath(format)) + .entryList(QDir::Files); + int readRates[] = { 44100, 48000 }; + bool norms[] = { false, true }; + bool gaplesses[] = { true, false }; + foreach (QString filename, files) { + for (int rate: readRates) { + for (bool norm: norms) { + for (bool gapless: gaplesses) { + + if (format != "mp3" && !gapless) { + continue; + } + + QString desc = testName + (format, filename, rate, norm, gapless); + + QTest::newRow(strOf(desc)) + << format << filename << rate << norm << gapless; + } + } + } + } } } void read() { + QFETCH(QString, format); QFETCH(QString, audiofile); + QFETCH(int, rate); + QFETCH(bool, normalised); + QFETCH(bool, gapless); - sv_samplerate_t readRate = 48000; + sv_samplerate_t readRate(rate); + +// cerr << "\naudiofile = " << audiofile << endl; + + AudioFileReaderFactory::Parameters params; + params.targetRate = readRate; + params.normalisation = (normalised ? + AudioFileReaderFactory::Normalisation::Peak : + AudioFileReaderFactory::Normalisation::None); + params.gaplessMode = (gapless ? + AudioFileReaderFactory::GaplessMode::Gapless : + AudioFileReaderFactory::GaplessMode::Gappy); AudioFileReader *reader = AudioFileReaderFactory::createReader - (audioDir + "/" + audiofile, readRate); - - QStringList fileAndExt = audiofile.split("."); - QStringList bits = fileAndExt[0].split("-"); - QString extension = fileAndExt[1]; - sv_samplerate_t nominalRate = bits[0].toInt(); - int nominalChannels = bits[1].toInt(); - int nominalDepth = 16; - if (bits.length() > 2) nominalDepth = bits[2].toInt(); - + (audioDir + "/" + format + "/" + audiofile, params); + if (!reader) { #if ( QT_VERSION >= 0x050000 ) QSKIP("Unsupported file, skipping"); @@ -85,11 +266,16 @@ #endif } - QCOMPARE((int)reader->getChannelCount(), nominalChannels); - QCOMPARE(reader->getNativeRate(), nominalRate); + QString extension; + sv_samplerate_t fileRate; + int channels; + int fileBitdepth; + getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth); + + QCOMPARE((int)reader->getChannelCount(), channels); + QCOMPARE(reader->getNativeRate(), fileRate); QCOMPARE(reader->getSampleRate(), readRate); - int channels = reader->getChannelCount(); AudioTestData tdata(readRate, channels); float *reference = tdata.getInterleavedData(); @@ -100,95 +286,200 @@ // more, though, so we can (a) check that we only get the // expected number back (if this is not mp3/aac) or (b) take // into account silence at beginning and end (if it is). - vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); + floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000); sv_frame_t read = test.size() / channels; - if (extension == "mp3" || extension == "aac" || extension == "m4a") { - // mp3s and aacs can have silence at start and end + bool perceptual = (extension == "mp3" || + extension == "aac" || + extension == "m4a"); + + if (perceptual && !gapless) { + // allow silence at start and end QVERIFY(read >= refFrames); } else { QCOMPARE(read, refFrames); } - // Our limits are pretty relaxed -- we're not testing decoder - // or resampler quality here, just whether the results are - // plainly wrong (e.g. at wrong samplerate or with an offset) - - double limit = 0.01; - double edgeLimit = limit * 10; // in first or final edgeSize frames + bool resampled = readRate != fileRate; + double maxLimit, rmsLimit; + getExpectedThresholds(format, + audiofile, + resampled, + gapless, + normalised, + maxLimit, rmsLimit); + + double edgeLimit = maxLimit * 3; // in first or final edgeSize frames + if (resampled && edgeLimit < 0.1) edgeLimit = 0.1; int edgeSize = 100; - if (nominalDepth < 16) { - limit = 0.02; - } - if (extension == "ogg" || extension == "mp3" || - extension == "aac" || extension == "m4a") { - limit = 0.2; - edgeLimit = limit * 3; - } - // And we ignore completely the last few frames when upsampling - int discard = 1 + int(round(readRate / nominalRate)); + int discard = 1 + int(round(readRate / fileRate)); int offset = 0; - if (extension == "aac" || extension == "m4a") { - // our m4a file appears to have a fixed offset of 1024 (at - // file sample rate) - offset = int(round((1024 / nominalRate) * readRate)); - } + if (perceptual) { - if (extension == "mp3") { - // while mp3s appear to vary - for (int i = 0; i < read; ++i) { - bool any = false; - double thresh = 0.01; - for (int c = 0; c < channels; ++c) { - if (fabs(test[i * channels + c]) > thresh) { - any = true; + // Look for an initial offset. + // + // We know the first channel has a sinusoid in it. It + // should have a peak at 0.4ms (see AudioTestData.h) but + // that might have been clipped, which would make it + // imprecise. We can tell if it's clipped, though, as + // there will be samples having exactly identical + // values. So what we look for is the peak if it's not + // clipped and, if it is, the first zero crossing after + // the peak, which should be at 0.8ms. + + int expectedPeak = int(0.0004 * readRate); + int expectedZC = int(0.0008 * readRate); + bool foundPeak = false; + for (int i = 1; i+1 < read; ++i) { + float prevSample = test[(i-1) * channels]; + float thisSample = test[i * channels]; + float nextSample = test[(i+1) * channels]; + if (thisSample > 0.8 && nextSample < thisSample) { + foundPeak = true; + if (thisSample > prevSample) { + // not clipped + offset = i - expectedPeak - 1; break; } } - if (any) { - offset = i; + if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) { +// cerr << "thisSample = " << thisSample << ", nextSample = " +// << nextSample << endl; + offset = i - expectedZC - 1; break; } } + +// int fileRateEquivalent = int((offset / readRate) * fileRate); // std::cerr << "offset = " << offset << std::endl; +// std::cerr << "at file rate would be " << fileRateEquivalent << std::endl; + + // Previously our m4a test file had a fixed offset of 1024 + // at the file sample rate -- this may be because it was + // produced by FAAC which did not write in the delay as + // metadata? We now have an m4a produced by Core Audio + // which gives a 0 offset. What to do... + + // Anyway, mp3s should have 0 offset in gapless mode and + // "something else" otherwise. + + if (gapless) { + if (format == "aac") { + // ouch! + if (offset == -1) offset = 0; + } + QCOMPARE(offset, 0); + } } - for (int c = 0; c < channels; ++c) { - float maxdiff = 0.f; - int maxAt = 0; - float totdiff = 0.f; - for (int i = 0; i < read - offset - discard && i < refFrames; ++i) { - float diff = fabsf(test[(i + offset) * channels + c] - - reference[i * channels + c]); - totdiff += diff; + { + // Write the diff file now, so that it's already been written + // even if the comparison fails. We aren't checking anything + // here except as necessary to avoid buffer overruns etc + + QString diffFile = + testName(format, audiofile, rate, normalised, gapless); + diffFile.replace("/", "_"); + diffFile.replace(".", "_"); + diffFile.replace(" ", "_"); + diffFile += ".wav"; + diffFile = QDir(diffDir).filePath(diffFile); + WavFileWriter diffWriter(diffFile, readRate, channels, + WavFileWriter::WriteToTemporary); + QVERIFY(diffWriter.isOK()); + + vector<vector<float>> diffs(channels); + for (int c = 0; c < channels; ++c) { + for (int i = 0; i < refFrames; ++i) { + int ix = i + offset; + if (ix < read) { + float signeddiff = + test[ix * channels + c] - + reference[i * channels + c]; + diffs[c].push_back(signeddiff); + } + } + } + float **ptrs = new float*[channels]; + for (int c = 0; c < channels; ++c) { + ptrs[c] = diffs[c].data(); + } + diffWriter.writeSamples(ptrs, refFrames); + delete[] ptrs; + } + + for (int c = 0; c < channels; ++c) { + + double maxDiff = 0.0; + double totalDiff = 0.0; + double totalSqrDiff = 0.0; + int maxIndex = 0; + + for (int i = 0; i < refFrames; ++i) { + int ix = i + offset; + if (ix >= read) { + cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; + QVERIFY(ix < read); + } + + if (ix + discard >= read) { + // we forgive the very edge samples when + // resampling (discard > 0) + continue; + } + + double diff = fabs(test[ix * channels + c] - + reference[i * channels + c]); + + totalDiff += diff; + totalSqrDiff += diff * diff; + // in edge areas, record this only if it exceeds edgeLimit - if (i < edgeSize || i + edgeSize >= read - offset) { - if (diff > edgeLimit && diff > maxdiff) { - maxdiff = diff; - maxAt = i; + if (i < edgeSize || i + edgeSize >= refFrames) { + if (diff > edgeLimit && diff > maxDiff) { + maxDiff = diff; + maxIndex = i; } } else { - if (diff > maxdiff) { - maxdiff = diff; - maxAt = i; + if (diff > maxDiff) { + maxDiff = diff; + maxIndex = i; } - } - } - float meandiff = totdiff / float(read); -// cerr << "meandiff on channel " << c << ": " << meandiff << endl; -// cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl; - if (meandiff >= limit) { - cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl; - QVERIFY(meandiff < limit); + } } - if (maxdiff >= limit) { - cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl; - QVERIFY(maxdiff < limit); - } + + double meanDiff = totalDiff / double(refFrames); + double rmsDiff = sqrt(totalSqrDiff / double(refFrames)); + + /* + cerr << "channel " << c << ": mean diff " << meanDiff << endl; + cerr << "channel " << c << ": rms diff " << rmsDiff << endl; + cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl; + */ + if (rmsDiff >= rmsLimit) { + cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl; + QVERIFY(rmsDiff < rmsLimit); + } + if (maxDiff >= maxLimit) { + cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl; + QVERIFY(maxDiff < maxLimit); + } + + // and check for spurious material at end + + for (sv_frame_t i = refFrames; i + offset < read; ++i) { + sv_frame_t ix = i + offset; + float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off + float mag = fabsf(test[ix * channels + c]); + if (mag > quiet) { + cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl; + QVERIFY(mag < quiet); + } + } } } };