Mercurial > hg > svcore
diff data/fileio/test/AudioFileReaderTest.h @ 1313:ff9697592bef 3.0-integration
Add gapless preference to prefs dialog; much work on audio read tests
author | Chris Cannam |
---|---|
date | Thu, 01 Dec 2016 17:45:40 +0000 |
parents | 2e7fcdd5f627 |
children | 00cae2d5ee7e |
line wrap: on
line diff
--- a/data/fileio/test/AudioFileReaderTest.h Tue Nov 29 17:09:07 2016 +0000 +++ b/data/fileio/test/AudioFileReaderTest.h Thu Dec 01 17:45:40 2016 +0000 @@ -18,6 +18,7 @@ #include "../AudioFileReaderFactory.h" #include "../AudioFileReader.h" +#include "../WavFileWriter.h" #include "AudioTestData.h" @@ -32,6 +33,7 @@ using namespace std; static QString audioDir = "svcore/data/fileio/test/testfiles"; +static QString diffDir = "svcore/data/fileio/test/diffs"; class AudioFileReaderTest : public QObject { @@ -41,6 +43,127 @@ return strdup(s.toLocal8Bit().data()); } + void getFileMetadata(QString filename, + QString &extension, + sv_samplerate_t &rate, + int &channels, + int &bitdepth) { + + QStringList fileAndExt = filename.split("."); + QStringList bits = fileAndExt[0].split("-"); + + extension = fileAndExt[1]; + rate = bits[0].toInt(); + channels = bits[1].toInt(); + bitdepth = 16; + if (bits.length() > 2) { + bitdepth = bits[2].toInt(); + } + } + + void getExpectedThresholds(QString filename, + bool resampled, + bool gapless, + bool normalised, + double &maxLimit, + double &rmsLimit) { + + QString extension; + sv_samplerate_t fileRate; + int channels; + int bitdepth; + getFileMetadata(filename, extension, fileRate, channels, bitdepth); + + if (normalised) { + + if (extension == "ogg") { + + // Our ogg is not especially high quality and is + // actually further from the original if normalised + + maxLimit = 0.1; + rmsLimit = 0.03; + + } else if (extension == "m4a" || extension == "aac") { + + //!!! to be worked out + maxLimit = 1e-10; + rmsLimit = 1e-10; + + } else if (extension == "mp3") { + + if (resampled && !gapless) { + + // We expect worse figures here, because the + // combination of uncompensated encoder delay + + // resampling results in a fractional delay which + // means the decoded signal is slightly out of + // phase compared to the test signal + + maxLimit = 0.1; + rmsLimit = 0.05; + + } else { + + maxLimit = 0.05; + rmsLimit = 0.01; + } + + } else { + + // supposed to be lossless then (wav, aiff, flac) + + if (bitdepth >= 16 && !resampled) { + maxLimit = 1e-3; + rmsLimit = 3e-4; + } else { + maxLimit = 0.01; + rmsLimit = 5e-3; + } + } + + } else { // !normalised + + if (extension == "ogg") { + + maxLimit = 0.06; + rmsLimit = 0.03; + + } else if (extension == "m4a" || extension == "aac") { + + //!!! to be worked out + maxLimit = 1e-10; + rmsLimit = 1e-10; + + } else if (extension == "mp3") { + + // all mp3 figures are worse when not normalising + maxLimit = 0.1; + rmsLimit = 0.05; + + } else { + + // supposed to be lossless then (wav, aiff, flac) + + if (bitdepth >= 16 && !resampled) { + maxLimit = 1e-3; + rmsLimit = 3e-4; + } else { + maxLimit = 0.02; + rmsLimit = 0.01; + } + } + } + } + + QString testName(QString filename, int rate, bool norm, bool gapless) { + return QString("%1 at %2%3%4") + .arg(filename) + .arg(rate) + .arg(norm ? " normalised": "") + .arg(gapless ? "" : " non-gapless"); + } + private slots: void init() { @@ -48,35 +171,66 @@ cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); } + if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) { + cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl; + QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created"); + } } void read_data() { QTest::addColumn<QString>("audiofile"); + QTest::addColumn<int>("rate"); + QTest::addColumn<bool>("normalised"); + QTest::addColumn<bool>("gapless"); QStringList files = QDir(audioDir).entryList(QDir::Files); + int readRates[] = { 44100, 48000 }; + bool norms[] = { false, true }; + bool gaplesses[] = { true, false }; foreach (QString filename, files) { - QTest::newRow(strOf(filename)) << filename; + for (int rate: readRates) { + for (bool norm: norms) { + for (bool gapless: gaplesses) { + + if (QFileInfo(filename).suffix() != "mp3" && + !gapless) { + continue; + } + + QString desc = testName(filename, rate, norm, gapless); + + QTest::newRow(strOf(desc)) + << filename << rate << norm << gapless; + } + } + } } } void read() { QFETCH(QString, audiofile); + QFETCH(int, rate); + QFETCH(bool, normalised); + QFETCH(bool, gapless); - sv_samplerate_t readRate = 48000; + sv_samplerate_t readRate(rate); + + cerr << "\naudiofile = " << audiofile << endl; + + AudioFileReaderFactory::Parameters params; + params.targetRate = readRate; + params.normalisation = (normalised ? + AudioFileReaderFactory::Normalisation::Peak : + AudioFileReaderFactory::Normalisation::None); + params.gaplessMode = (gapless ? + AudioFileReaderFactory::GaplessMode::Gapless : + AudioFileReaderFactory::GaplessMode::Gappy); AudioFileReader *reader = AudioFileReaderFactory::createReader - (audioDir + "/" + audiofile, readRate); - - QStringList fileAndExt = audiofile.split("."); - QStringList bits = fileAndExt[0].split("-"); - QString extension = fileAndExt[1]; - sv_samplerate_t nominalRate = bits[0].toInt(); - int nominalChannels = bits[1].toInt(); - int nominalDepth = 16; - if (bits.length() > 2) nominalDepth = bits[2].toInt(); - + (audioDir + "/" + audiofile, params); + if (!reader) { #if ( QT_VERSION >= 0x050000 ) QSKIP("Unsupported file, skipping"); @@ -85,11 +239,25 @@ #endif } - QCOMPARE((int)reader->getChannelCount(), nominalChannels); - QCOMPARE(reader->getNativeRate(), nominalRate); + QString extension; + sv_samplerate_t fileRate; + int channels; + int fileBitdepth; + getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth); + + QString diffFile = testName(audiofile, rate, normalised, gapless); + diffFile.replace(".", "_"); + diffFile.replace(" ", "_"); + diffFile += ".wav"; + diffFile = QDir(diffDir).filePath(diffFile); + WavFileWriter diffWriter(diffFile, readRate, channels, + WavFileWriter::WriteToTarget); //!!! NB WriteToTemporary not working, why? + QVERIFY(diffWriter.isOK()); + + QCOMPARE((int)reader->getChannelCount(), channels); + QCOMPARE(reader->getNativeRate(), fileRate); QCOMPARE(reader->getSampleRate(), readRate); - int channels = reader->getChannelCount(); AudioTestData tdata(readRate, channels); float *reference = tdata.getInterleavedData(); @@ -103,123 +271,151 @@ vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); sv_frame_t read = test.size() / channels; - if (extension == "mp3" || extension == "aac" || extension == "m4a") { - // mp3s and aacs can have silence at start and end + bool perceptual = (extension == "mp3" || + extension == "aac" || + extension == "m4a"); + + if (perceptual && !gapless) { + // allow silence at start and end QVERIFY(read >= refFrames); } else { QCOMPARE(read, refFrames); } - // Our limits are pretty relaxed -- we're not testing decoder - // or resampler quality here, just whether the results are - // plainly wrong (e.g. at wrong samplerate or with an offset). - - double maxLimit = 0.01; - double meanLimit = 0.001; - double edgeLimit = maxLimit * 10; // in first or final edgeSize frames + bool resampled = readRate != fileRate; + double maxLimit, rmsLimit; + getExpectedThresholds(audiofile, + resampled, + gapless, + normalised, + maxLimit, rmsLimit); + + double edgeLimit = maxLimit * 3; // in first or final edgeSize frames + if (resampled && edgeLimit < 0.1) edgeLimit = 0.1; int edgeSize = 100; - if (nominalDepth < 16) { - maxLimit = 0.02; - meanLimit = 0.02; - } else if (extension == "ogg" || extension == "mp3") { - maxLimit = 0.1; - meanLimit = 0.035; - edgeLimit = maxLimit * 3; - } else if (extension == "aac" || extension == "m4a") { - maxLimit = 0.3; // seems max diff can be quite large here - // even when mean is fairly small - meanLimit = 0.01; - edgeLimit = maxLimit * 3; - } - // And we ignore completely the last few frames when upsampling - int discard = 1 + int(round(readRate / nominalRate)); + int discard = 1 + int(round(readRate / fileRate)); int offset = 0; - if (extension == "aac" || extension == "m4a") { - // our m4a file appears to have a fixed offset of 1024 (at - // file sample rate) - // offset = int(round((1024 / nominalRate) * readRate)); - offset = 0; - } + if (perceptual) { - if (extension == "mp3") { - // ...while mp3s appear to vary. What we're looking for is + // Look for an initial offset. What we're looking for is // the first peak of the sinusoid in the first channel // (since we may have only the one channel). This should - // appear at 0.4ms (see AudioTestData.h) + // appear at 0.4ms (see AudioTestData.h). + int expectedPeak = int(0.0004 * readRate); -// std::cerr << "expectedPeak = " << expectedPeak << std::endl; for (int i = 1; i < read; ++i) { if (test[i * channels] > 0.8 && test[(i+1) * channels] < test[i * channels]) { offset = i - expectedPeak - 1; -// std::cerr << "actual peak = " << i-1 << std::endl; break; } } -// std::cerr << "offset = " << offset << std::endl; + + std::cerr << "offset = " << offset << std::endl; + std::cerr << "at file rate would be " << (offset / readRate) * fileRate << std::endl; + + // Previously our m4a test file had a fixed offset of 1024 + // at the file sample rate -- this may be because it was + // produced by FAAC which did not write in the delay as + // metadata? We now have an m4a produced by Core Audio + // which gives a 0 offset. What to do... + + // Anyway, mp3s should have 0 offset in gapless mode and + // "something else" otherwise. + + if (gapless) { + QCOMPARE(offset, 0); + } } + vector<vector<float>> diffs(channels); + for (int c = 0; c < channels; ++c) { + + double maxDiff = 0.0; + double totalDiff = 0.0; + double totalSqrDiff = 0.0; + int maxIndex = 0; + +// cerr << "\nchannel " << c << ": "; - float maxdiff = 0.f; - int maxAt = 0; - float totdiff = 0.f; - for (int i = 0; i < refFrames; ++i) { int ix = i + offset; if (ix >= read) { cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; QVERIFY(ix < read); } + + float signeddiff = + test[ix * channels + c] - + reference[i * channels + c]; + + diffs[c].push_back(signeddiff); + if (ix + discard >= read) { // we forgive the very edge samples when // resampling (discard > 0) continue; } - float diff = fabsf(test[ix * channels + c] - - reference[i * channels + c]); - totdiff += diff; + + double diff = fabs(signeddiff); + + totalDiff += diff; + totalSqrDiff += diff * diff; + // in edge areas, record this only if it exceeds edgeLimit - if (i < edgeSize || i + edgeSize >= read - offset) { - if (diff > edgeLimit && diff > maxdiff) { - maxdiff = diff; - maxAt = i; + if (i < edgeSize || i + edgeSize >= refFrames) { + if (diff > edgeLimit && diff > maxDiff) { + maxDiff = diff; + maxIndex = i; } } else { - if (diff > maxdiff) { - maxdiff = diff; - maxAt = i; + if (diff > maxDiff) { + maxDiff = diff; + maxIndex = i; } } } + + double meanDiff = totalDiff / double(refFrames); + double rmsDiff = sqrt(totalSqrDiff / double(refFrames)); - // check for spurious material at end + cerr << "channel " << c << ": mean diff " << meanDiff << endl; + cerr << "channel " << c << ": rms diff " << rmsDiff << endl; + cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl; + + if (rmsDiff >= rmsLimit) { + cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl; + QVERIFY(rmsDiff < rmsLimit); + } + if (maxDiff >= maxLimit) { + cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl; + QVERIFY(maxDiff < maxLimit); + } + + // and check for spurious material at end + for (sv_frame_t i = refFrames; i + offset < read; ++i) { sv_frame_t ix = i + offset; - float quiet = 1e-6f; + float quiet = 0.1; //!!! allow some ringing - but let's come back to this, it should tail off float mag = fabsf(test[ix * channels + c]); if (mag > quiet) { - cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << ")" << endl; + cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl; QVERIFY(mag < quiet); } } - - float meandiff = totdiff / float(read); -// cerr << "meandiff on channel " << c << ": " << meandiff << endl; -// cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl; - if (meandiff >= meanLimit) { - cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl; - QVERIFY(meandiff < meanLimit); - } - if (maxdiff >= maxLimit) { - cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl; - QVERIFY(maxdiff < maxLimit); - } } + + float **ptrs = new float*[channels]; + for (int c = 0; c < channels; ++c) { + ptrs[c] = diffs[c].data(); + } + diffWriter.writeSamples(ptrs, refFrames); + delete[] ptrs; } };