Mercurial > hg > svcore
view data/fileio/test/AudioFileReaderTest.h @ 1793:f0ffc88a36b3 time-frequency-boxes
Add duration-and-extent type, which corresponds to a box model
author | Chris Cannam |
---|---|
date | Wed, 25 Sep 2019 11:06:59 +0100 (2019-09-25) |
parents | 83cb6e9d769b |
children |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Sonic Visualiser An audio file viewer and annotation editor. Centre for Digital Music, Queen Mary, University of London. This file copyright 2013 Chris Cannam. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #ifndef TEST_AUDIO_FILE_READER_H #define TEST_AUDIO_FILE_READER_H #include "../AudioFileReaderFactory.h" #include "../AudioFileReader.h" #include "../WavFileWriter.h" #include "AudioTestData.h" #include "UnsupportedFormat.h" #include <cmath> #include <QObject> #include <QtTest> #include <QDir> #include <iostream> using namespace std; class AudioFileReaderTest : public QObject { Q_OBJECT private: QString testDirBase; QString audioDir; QString diffDir; public: AudioFileReaderTest(QString base) { if (base == "") { base = "svcore/data/fileio/test"; } testDirBase = base; audioDir = base + "/audio"; diffDir = base + "/diffs"; } private: const char *strOf(QString s) { return strdup(s.toLocal8Bit().data()); } void getFileMetadata(QString filename, QString &extension, sv_samplerate_t &rate, int &channels, int &bitdepth) { QStringList fileAndExt = filename.split("."); QStringList bits = fileAndExt[0].split("-"); extension = fileAndExt[1]; rate = bits[0].toInt(); channels = bits[1].toInt(); bitdepth = 16; if (bits.length() > 2) { bitdepth = bits[2].toInt(); } } void getExpectedThresholds(QString format, QString filename, bool resampled, bool gapless, bool normalised, double &maxLimit, double &rmsLimit) { QString extension; sv_samplerate_t fileRate; int channels; int bitdepth; getFileMetadata(filename, extension, fileRate, channels, bitdepth); if (normalised) { if (format == "ogg") { // Our ogg is not especially high quality and is // actually further from the original if normalised maxLimit = 0.1; rmsLimit = 0.03; } else if (format == "opus") { maxLimit = 0.06; rmsLimit = 0.015; } else if (format == "aac") { // Terrible performance for this test, load of spill // from one channel to the other. I guess they know // what they're doing, it's perceptual after all, but // it does make this check a bit superfluous, you // could probably pass it with a signal that sounds // nothing like the original maxLimit = 0.2; rmsLimit = 0.1; } else if (format == "wma") { maxLimit = 0.05; rmsLimit = 0.01; } else if (format == "mp3") { if (resampled && !gapless) { // We expect worse figures here, because the // combination of uncompensated encoder delay + // resampling results in a fractional delay which // means the decoded signal is slightly out of // phase compared to the test signal maxLimit = 0.1; rmsLimit = 0.05; } else { maxLimit = 0.05; rmsLimit = 0.01; } } else { // lossless formats (wav, aiff, flac, apple_lossless) if (bitdepth >= 16 && !resampled) { maxLimit = 1e-3; rmsLimit = 3e-4; } else { maxLimit = 0.01; rmsLimit = 5e-3; } } } else { // !normalised if (format == "ogg") { maxLimit = 0.06; rmsLimit = 0.03; } else if (format == "opus") { maxLimit = 0.06; rmsLimit = 0.015; } else if (format == "aac") { maxLimit = 0.2; rmsLimit = 0.1; } else if (format == "wma") { maxLimit = 0.05; rmsLimit = 0.01; } else if (format == "mp3") { // all mp3 figures are worse when not normalising maxLimit = 0.1; rmsLimit = 0.05; } else { // lossless formats (wav, aiff, flac, apple_lossless) if (bitdepth >= 16 && !resampled) { maxLimit = 1e-3; rmsLimit = 3e-4; } else { maxLimit = 0.02; rmsLimit = 0.01; } } } } QString testName(QString format, QString filename, int rate, bool norm, bool gapless) { return QString("%1/%2 at %3%4%5") .arg(format) .arg(filename) .arg(rate) .arg(norm ? " normalised": "") .arg(gapless ? "" : " non-gapless"); } private slots: void init() { if (!QDir(audioDir).exists()) { QString cwd = QDir::currentPath(); SVCERR << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl; QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); } if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) { SVCERR << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl; QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created"); } } void read_data() { QTest::addColumn<QString>("format"); QTest::addColumn<QString>("audiofile"); QTest::addColumn<int>("rate"); QTest::addColumn<bool>("normalised"); QTest::addColumn<bool>("gapless"); QStringList dirs = QDir(audioDir).entryList(QDir::Dirs | QDir::NoDotAndDotDot); for (QString format: dirs) { QStringList files = QDir(QDir(audioDir).filePath(format)) .entryList(QDir::Files); int readRates[] = { 44100, 48000 }; bool norms[] = { false, true }; bool gaplesses[] = { true, false }; foreach (QString filename, files) { for (int rate: readRates) { for (bool norm: norms) { for (bool gapless: gaplesses) { #ifdef Q_OS_WIN if (format == "aac") { if (gapless) { // Apparently no support for AAC // encoder delay compensation in // MediaFoundation, so these tests // are only available non-gapless continue; } } else if (format != "mp3") { if (!gapless) { // All other formats but mp3 are // intrinsically gapless, so we // can skip the non-gapless option continue; } } #else if (format != "mp3") { if (!gapless) { // All other formats but mp3 are // intrinsically gapless // everywhere except for Windows // (see above), so we can skip the // non-gapless option continue; } } #endif QString desc = testName (format, filename, rate, norm, gapless); QTest::newRow(strOf(desc)) << format << filename << rate << norm << gapless; } } } } } } void read() { QFETCH(QString, format); QFETCH(QString, audiofile); QFETCH(int, rate); QFETCH(bool, normalised); QFETCH(bool, gapless); sv_samplerate_t readRate(rate); // cerr << "\naudiofile = " << audiofile << endl; AudioFileReaderFactory::Parameters params; params.targetRate = readRate; params.normalisation = (normalised ? AudioFileReaderFactory::Normalisation::Peak : AudioFileReaderFactory::Normalisation::None); params.gaplessMode = (gapless ? AudioFileReaderFactory::GaplessMode::Gapless : AudioFileReaderFactory::GaplessMode::Gappy); AudioFileReader *reader = AudioFileReaderFactory::createReader (audioDir + "/" + format + "/" + audiofile, params); if (!reader) { if (UnsupportedFormat::isLegitimatelyUnsupported(format)) { #if ( QT_VERSION >= 0x050000 ) QSKIP("Unsupported file, skipping"); #else QSKIP("Unsupported file, skipping", SkipSingle); #endif } } QVERIFY(reader != nullptr); QString extension; sv_samplerate_t fileRate; int channels; int fileBitdepth; getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth); QCOMPARE((int)reader->getChannelCount(), channels); QCOMPARE(reader->getNativeRate(), fileRate); QCOMPARE(reader->getSampleRate(), readRate); AudioTestData tdata(readRate, channels); float *reference = tdata.getInterleavedData(); sv_frame_t refFrames = tdata.getFrameCount(); // The reader should give us exactly the expected number of // frames, except for mp3/aac files. We ask for quite a lot // more, though, so we can (a) check that we only get the // expected number back (if this is not mp3/aac) or (b) take // into account silence at beginning and end (if it is). floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000); delete reader; reader = 0; sv_frame_t read = test.size() / channels; bool perceptual = (extension == "mp3" || extension == "aac" || extension == "m4a" || extension == "wma" || extension == "opus"); if (perceptual && !gapless) { // allow silence at start and end QVERIFY(read >= refFrames); } else { QCOMPARE(read, refFrames); } bool resampled = readRate != fileRate; double maxLimit, rmsLimit; getExpectedThresholds(format, audiofile, resampled, gapless, normalised, maxLimit, rmsLimit); double edgeLimit = maxLimit * 3; // in first or final edgeSize frames if (resampled && edgeLimit < 0.1) edgeLimit = 0.1; int edgeSize = 100; // And we ignore completely the last few frames when upsampling int discard = 1 + int(round(readRate / fileRate)); int offset = 0; if (perceptual) { // Look for an initial offset. // // We know the first channel has a sinusoid in it. It // should have a peak at 0.4ms (see AudioTestData.h) but // that might have been clipped, which would make it // imprecise. We can tell if it's clipped, though, as // there will be samples having exactly identical // values. So what we look for is the peak if it's not // clipped and, if it is, the first zero crossing after // the peak, which should be at 0.8ms. int expectedPeak = int(0.0004 * readRate); int expectedZC = int(0.0008 * readRate); bool foundPeak = false; for (int i = 1; i+1 < read; ++i) { float prevSample = test[(i-1) * channels]; float thisSample = test[i * channels]; float nextSample = test[(i+1) * channels]; if (thisSample > 0.8 && nextSample < thisSample) { foundPeak = true; if (thisSample > prevSample) { // not clipped offset = i - expectedPeak - 1; break; } } if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) { // cerr << "thisSample = " << thisSample << ", nextSample = " // << nextSample << endl; offset = i - expectedZC - 1; break; } } // int fileRateEquivalent = int((offset / readRate) * fileRate); // std::cerr << "offset = " << offset << std::endl; // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl; // Previously our m4a test file had a fixed offset of 1024 // at the file sample rate -- this may be because it was // produced by FAAC which did not write in the delay as // metadata? We now have an m4a produced by Core Audio // which gives a 0 offset. What to do... // Anyway, mp3s should have 0 offset in gapless mode and // "something else" otherwise. if (gapless) { if (format == "aac" #ifdef Q_OS_WIN || (format == "mp3" && (readRate != fileRate)) #endif ) { // ouch! if (offset == -1) offset = 0; } QCOMPARE(offset, 0); } } { // Write the diff file now, so that it's already been written // even if the comparison fails. We aren't checking anything // here except as necessary to avoid buffer overruns etc QString diffFile = testName(format, audiofile, rate, normalised, gapless); diffFile.replace("/", "_"); diffFile.replace(".", "_"); diffFile.replace(" ", "_"); diffFile += ".wav"; diffFile = QDir(diffDir).filePath(diffFile); WavFileWriter diffWriter(diffFile, readRate, channels, WavFileWriter::WriteToTemporary); QVERIFY(diffWriter.isOK()); vector<vector<float>> diffs(channels); for (int c = 0; c < channels; ++c) { for (int i = 0; i < refFrames; ++i) { int ix = i + offset; if (ix < read) { float signeddiff = test[ix * channels + c] - reference[i * channels + c]; diffs[c].push_back(signeddiff); } } } float **ptrs = new float*[channels]; for (int c = 0; c < channels; ++c) { ptrs[c] = diffs[c].data(); } diffWriter.writeSamples(ptrs, refFrames); delete[] ptrs; } for (int c = 0; c < channels; ++c) { double maxDiff = 0.0; double totalDiff = 0.0; double totalSqrDiff = 0.0; int maxIndex = 0; for (int i = 0; i < refFrames; ++i) { int ix = i + offset; if (ix >= read) { SVCERR << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; QVERIFY(ix < read); } if (ix + discard >= read) { // we forgive the very edge samples when // resampling (discard > 0) continue; } double diff = fabs(test[ix * channels + c] - reference[i * channels + c]); totalDiff += diff; totalSqrDiff += diff * diff; // in edge areas, record this only if it exceeds edgeLimit if (i < edgeSize || i + edgeSize >= refFrames) { if (diff > edgeLimit && diff > maxDiff) { maxDiff = diff; maxIndex = i; } } else { if (diff > maxDiff) { maxDiff = diff; maxIndex = i; } } } double meanDiff = totalDiff / double(refFrames); double rmsDiff = sqrt(totalSqrDiff / double(refFrames)); /* cerr << "channel " << c << ": mean diff " << meanDiff << endl; cerr << "channel " << c << ": rms diff " << rmsDiff << endl; cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl; */ if (rmsDiff >= rmsLimit) { SVCERR << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl; QVERIFY(rmsDiff < rmsLimit); } if (maxDiff >= maxLimit) { SVCERR << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl; QVERIFY(maxDiff < maxLimit); } // and check for spurious material at end for (sv_frame_t i = refFrames; i + offset < read; ++i) { sv_frame_t ix = i + offset; float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off float mag = fabsf(test[ix * channels + c]); if (mag > quiet) { SVCERR << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl; QVERIFY(mag < quiet); } } } } }; #endif