svcore: data/fileio/test/AudioFileReaderTest.h comparison

comparison data/fileio/test/AudioFileReaderTest.h @ 1365:3382d914e110

Merge from branch 3.0-integration

author	Chris Cannam
date	Fri, 13 Jan 2017 10:29:44 +0000
parents	1c9bbbb6116a
children	aadfb395e933

comparison

equal deleted inserted replaced

-:6a7ea3bd0e10
+:3382d914e110
 #ifndef TEST_AUDIO_FILE_READER_H
 #define TEST_AUDIO_FILE_READER_H
 #include "../AudioFileReaderFactory.h"
 #include "../AudioFileReader.h"
+#include "../WavFileWriter.h"
 #include "AudioTestData.h"
 #include <cmath>
 #include <iostream>
 using namespace std;
-static QString audioDir = "testfiles";
 class AudioFileReaderTest : public QObject
 {
 Q_OBJECT
+private:
+QString testDirBase;
+QString audioDir;
+QString diffDir;
+public:
+AudioFileReaderTest(QString base) {
+if (base == "") {
+base = "svcore/data/fileio/test";
+}
+testDirBase = base;
+audioDir = base + "/audio";
+diffDir = base + "/diffs";
+}
+private:
 const char *strOf(QString s) {
 return strdup(s.toLocal8Bit().data());
+}
+void getFileMetadata(QString filename,
+QString &extension,
+sv_samplerate_t &rate,
+int &channels,
+int &bitdepth) {
+QStringList fileAndExt = filename.split(".");
+QStringList bits = fileAndExt[0].split("-");
+extension = fileAndExt[1];
+rate = bits[0].toInt();
+channels = bits[1].toInt();
+bitdepth = 16;
+if (bits.length() > 2) {
+bitdepth = bits[2].toInt();
+}
+}
+void getExpectedThresholds(QString format,
+QString filename,
+bool resampled,
+bool gapless,
+bool normalised,
+double &maxLimit,
+double &rmsLimit) {
+QString extension;
+sv_samplerate_t fileRate;
+int channels;
+int bitdepth;
+getFileMetadata(filename, extension, fileRate, channels, bitdepth);
+if (normalised) {
+if (format == "ogg") {
+// Our ogg is not especially high quality and is
+// actually further from the original if normalised
+maxLimit = 0.1;
+rmsLimit = 0.03;
+} else if (format == "aac") {
+// Terrible performance for this test, load of spill
+// from one channel to the other. I guess they know
+// what they're doing, it's perceptual after all, but
+// it does make this check a bit superfluous, you
+// could probably pass it with a signal that sounds
+// nothing like the original
+maxLimit = 0.2;
+rmsLimit = 0.1;
+} else if (format == "mp3") {
+if (resampled && !gapless) {
+// We expect worse figures here, because the
+// combination of uncompensated encoder delay +
+// resampling results in a fractional delay which
+// means the decoded signal is slightly out of
+// phase compared to the test signal
+maxLimit = 0.1;
+rmsLimit = 0.05;
+} else {
+maxLimit = 0.05;
+rmsLimit = 0.01;
+}
+} else {
+// lossless formats (wav, aiff, flac, apple_lossless)
+if (bitdepth >= 16 && !resampled) {
+maxLimit = 1e-3;
+rmsLimit = 3e-4;
+} else {
+maxLimit = 0.01;
+rmsLimit = 5e-3;
+}
+}
+} else { // !normalised
+if (format == "ogg") {
+maxLimit = 0.06;
+rmsLimit = 0.03;
+} else if (format == "aac") {
+maxLimit = 0.1;
+rmsLimit = 0.1;
+} else if (format == "mp3") {
+// all mp3 figures are worse when not normalising
+maxLimit = 0.1;
+rmsLimit = 0.05;
+} else {
+// lossless formats (wav, aiff, flac, apple_lossless)
+if (bitdepth >= 16 && !resampled) {
+maxLimit = 1e-3;
+rmsLimit = 3e-4;
+} else {
+maxLimit = 0.02;
+rmsLimit = 0.01;
+}
+}
+}
+}
+QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
+return QString("%1/%2 at %3%4%5")
+.arg(format)
+.arg(filename)
+.arg(rate)
+.arg(norm ? " normalised": "")
+.arg(gapless ? "" : " non-gapless");
 }
 private slots:
 void init()
 {
 if (!QDir(audioDir).exists()) {
-cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl;
+QString cwd = QDir::currentPath();
+cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
+}
+if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
+cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
+QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
 }
 }
 void read_data()
 {
+QTest::addColumn<QString>("format");
 QTest::addColumn<QString>("audiofile");
-QStringList files = QDir(audioDir).entryList(QDir::Files);
+QTest::addColumn<int>("rate");
-foreach (QString filename, files) {
+QTest::addColumn<bool>("normalised");
-QTest::newRow(strOf(filename)) << filename;
+QTest::addColumn<bool>("gapless");
+QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
+QDir::NoDotAndDotDot);
+for (QString format: dirs) {
+QStringList files = QDir(QDir(audioDir).filePath(format))
+.entryList(QDir::Files);
+int readRates[] = { 44100, 48000 };
+bool norms[] = { false, true };
+bool gaplesses[] = { true, false };
+foreach (QString filename, files) {
+for (int rate: readRates) {
+for (bool norm: norms) {
+for (bool gapless: gaplesses) {
+if (format != "mp3" && !gapless) {
+continue;
+}
+QString desc = testName
+(format, filename, rate, norm, gapless);
+QTest::newRow(strOf(desc))
+<< format << filename << rate << norm << gapless;
+}
+}
+}
+}
 }
 }
 void read()
 {
+QFETCH(QString, format);
 QFETCH(QString, audiofile);
+QFETCH(int, rate);
-sv_samplerate_t readRate = 48000;
+QFETCH(bool, normalised);
+QFETCH(bool, gapless);
+sv_samplerate_t readRate(rate);
+//        cerr << "\naudiofile = " << audiofile << endl;
+AudioFileReaderFactory::Parameters params;
+params.targetRate = readRate;
+params.normalisation = (normalised ?
+AudioFileReaderFactory::Normalisation::Peak :
+AudioFileReaderFactory::Normalisation::None);
+params.gaplessMode = (gapless ?
+AudioFileReaderFactory::GaplessMode::Gapless :
+AudioFileReaderFactory::GaplessMode::Gappy);
 	AudioFileReader *reader =
 	    AudioFileReaderFactory::createReader
-	    (audioDir + "/" + audiofile, readRate);
+	    (audioDir + "/" + format + "/" + audiofile, params);
-QStringList fileAndExt = audiofile.split(".");
-QStringList bits = fileAndExt[0].split("-");
-QString extension = fileAndExt[1];
-sv_samplerate_t nominalRate = bits[0].toInt();
-int nominalChannels = bits[1].toInt();
-int nominalDepth = 16;
-if (bits.length() > 2) nominalDepth = bits[2].toInt();
 	if (!reader) {
 #if ( QT_VERSION >= 0x050000 )
 	    QSKIP("Unsupported file, skipping");
 #else
 	    QSKIP("Unsupported file, skipping", SkipSingle);
 #endif
 	}
-QCOMPARE((int)reader->getChannelCount(), nominalChannels);
+QString extension;
-QCOMPARE(reader->getNativeRate(), nominalRate);
+sv_samplerate_t fileRate;
+int channels;
+int fileBitdepth;
+getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
+QCOMPARE((int)reader->getChannelCount(), channels);
+QCOMPARE(reader->getNativeRate(), fileRate);
 QCOMPARE(reader->getSampleRate(), readRate);
-	int channels = reader->getChannelCount();
 	AudioTestData tdata(readRate, channels);
 	float *reference = tdata.getInterleavedData();
 sv_frame_t refFrames = tdata.getFrameCount();
 	// The reader should give us exactly the expected number of
 	// frames, except for mp3/aac files. We ask for quite a lot
 	// more, though, so we can (a) check that we only get the
 	// expected number back (if this is not mp3/aac) or (b) take
 	// into account silence at beginning and end (if it is).
-	vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000);
+	floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
 	sv_frame_t read = test.size() / channels;
-if (extension == "mp3" || extension == "aac" || extension == "m4a") {
+bool perceptual = (extension == "mp3" ||
-// mp3s and aacs can have silence at start and end
+extension == "aac" ||
+extension == "m4a");
+if (perceptual && !gapless) {
+// allow silence at start and end
 QVERIFY(read >= refFrames);
 } else {
 QCOMPARE(read, refFrames);
 }
-// Our limits are pretty relaxed -- we're not testing decoder
+bool resampled = readRate != fileRate;
-// or resampler quality here, just whether the results are
+double maxLimit, rmsLimit;
-// plainly wrong (e.g. at wrong samplerate or with an offset)
+getExpectedThresholds(format,
+audiofile,
-	double limit = 0.01;
+resampled,
-double edgeLimit = limit * 10; // in first or final edgeSize frames
+gapless,
+normalised,
+maxLimit, rmsLimit);
+double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
+if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
 int edgeSize = 100;
-if (nominalDepth < 16) {
-limit = 0.02;
-}
-if (extension == "ogg" || extension == "mp3" ||
-extension == "aac" || extension == "m4a") {
-limit = 0.2;
-edgeLimit = limit * 3;
-}
 // And we ignore completely the last few frames when upsampling
-int discard = 1 + int(round(readRate / nominalRate));
+int discard = 1 + int(round(readRate / fileRate));
 int offset = 0;
-if (extension == "aac" || extension == "m4a") {
+if (perceptual) {
-// our m4a file appears to have a fixed offset of 1024 (at
-// file sample rate)
+// Look for an initial offset.
-offset = int(round((1024 / nominalRate) * readRate));
+//
-}
+// We know the first channel has a sinusoid in it. It
+// should have a peak at 0.4ms (see AudioTestData.h) but
-if (extension == "mp3") {
+// that might have been clipped, which would make it
-// while mp3s appear to vary
+// imprecise. We can tell if it's clipped, though, as
-for (int i = 0; i < read; ++i) {
+// there will be samples having exactly identical
-bool any = false;
+// values. So what we look for is the peak if it's not
-double thresh = 0.01;
+// clipped and, if it is, the first zero crossing after
-for (int c = 0; c < channels; ++c) {
+// the peak, which should be at 0.8ms.
-if (fabs(test[i * channels + c]) > thresh) {
-any = true;
+int expectedPeak = int(0.0004 * readRate);
+int expectedZC = int(0.0008 * readRate);
+bool foundPeak = false;
+for (int i = 1; i+1 < read; ++i) {
+float prevSample = test[(i-1) * channels];
+float thisSample = test[i * channels];
+float nextSample = test[(i+1) * channels];
+if (thisSample > 0.8 && nextSample < thisSample) {
+foundPeak = true;
+if (thisSample > prevSample) {
+// not clipped
+offset = i - expectedPeak - 1;
 break;
 }
 }
-if (any) {
+if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
-offset = i;
+//                    cerr << "thisSample = " << thisSample << ", nextSample = "
+//                         << nextSample << endl;
+offset = i - expectedZC - 1;
 break;
 }
 }
+//            int fileRateEquivalent = int((offset / readRate) * fileRate);
 //            std::cerr << "offset = " << offset << std::endl;
-}
+//            std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
-	for (int c = 0; c < channels; ++c) {
+// Previously our m4a test file had a fixed offset of 1024
-	    float maxdiff = 0.f;
+// at the file sample rate -- this may be because it was
-	    int maxAt = 0;
+// produced by FAAC which did not write in the delay as
-	    float totdiff = 0.f;
+// metadata? We now have an m4a produced by Core Audio
-	    for (int i = 0; i < read - offset - discard && i < refFrames; ++i) {
+// which gives a 0 offset. What to do...
-		float diff = fabsf(test[(i + offset) * channels + c] -
-				   reference[i * channels + c]);
+// Anyway, mp3s should have 0 offset in gapless mode and
-		totdiff += diff;
+// "something else" otherwise.
+if (gapless) {
+if (format == "aac") {
+// ouch!
+if (offset == -1) offset = 0;
+}
+QCOMPARE(offset, 0);
+}
+}
+{
+// Write the diff file now, so that it's already been written
+// even if the comparison fails. We aren't checking anything
+// here except as necessary to avoid buffer overruns etc
+QString diffFile =
+testName(format, audiofile, rate, normalised, gapless);
+diffFile.replace("/", "_");
+diffFile.replace(".", "_");
+diffFile.replace(" ", "_");
+diffFile += ".wav";
+diffFile = QDir(diffDir).filePath(diffFile);
+WavFileWriter diffWriter(diffFile, readRate, channels,
+WavFileWriter::WriteToTemporary);
+QVERIFY(diffWriter.isOK());
+vector<vector<float>> diffs(channels);
+for (int c = 0; c < channels; ++c) {
+for (int i = 0; i < refFrames; ++i) {
+int ix = i + offset;
+if (ix < read) {
+float signeddiff =
+test[ix * channels + c] -
+reference[i * channels + c];
+diffs[c].push_back(signeddiff);
+}
+}
+}
+float **ptrs = new float*[channels];
+for (int c = 0; c < channels; ++c) {
+ptrs[c] = diffs[c].data();
+}
+diffWriter.writeSamples(ptrs, refFrames);
+delete[] ptrs;
+}
+for (int c = 0; c < channels; ++c) {
+double maxDiff = 0.0;
+double totalDiff = 0.0;
+double totalSqrDiff = 0.0;
+int maxIndex = 0;
+for (int i = 0; i < refFrames; ++i) {
+int ix = i + offset;
+if (ix >= read) {
+cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
+QVERIFY(ix < read);
+}
+if (ix + discard >= read) {
+// we forgive the very edge samples when
+// resampling (discard > 0)
+continue;
+}
+double diff = fabs(test[ix * channels + c] -
+reference[i * channels + c]);
+totalDiff += diff;
+totalSqrDiff += diff * diff;
 // in edge areas, record this only if it exceeds edgeLimit
-if (i < edgeSize || i + edgeSize >= read - offset) {
+if (i < edgeSize || i + edgeSize >= refFrames) {
-if (diff > edgeLimit && diff > maxdiff) {
+if (diff > edgeLimit && diff > maxDiff) {
-maxdiff = diff;
+maxDiff = diff;
-maxAt = i;
+maxIndex = i;
 }
 } else {
-if (diff > maxdiff) {
+if (diff > maxDiff) {
-maxdiff = diff;
+maxDiff = diff;
-maxAt = i;
+maxIndex = i;
 }
-		}
+}
-	    }
+}
-	    float meandiff = totdiff / float(read);
-//	    cerr << "meandiff on channel " << c << ": " << meandiff << endl;
+double meanDiff = totalDiff / double(refFrames);
-//	    cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl;
+double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
-if (meandiff >= limit) {
-		cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl;
+/*
-QVERIFY(meandiff < limit);
+cerr << "channel " << c << ": mean diff " << meanDiff << endl;
-}
+	    cerr << "channel " << c << ":  rms diff " << rmsDiff << endl;
-	    if (maxdiff >= limit) {
+	    cerr << "channel " << c << ":  max diff " << maxDiff << " at " << maxIndex << endl;
-		cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl;
+*/
-		QVERIFY(maxdiff < limit);
+if (rmsDiff >= rmsLimit) {
-	    }
+cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
+QVERIFY(rmsDiff < rmsLimit);
+}
+if (maxDiff >= maxLimit) {
+cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
+QVERIFY(maxDiff < maxLimit);
+}
+// and check for spurious material at end
+for (sv_frame_t i = refFrames; i + offset < read; ++i) {
+sv_frame_t ix = i + offset;
+float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
+float mag = fabsf(test[ix * channels + c]);
+if (mag > quiet) {
+cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
+QVERIFY(mag < quiet);
+}
+}
 	}
 }
 };
 #endif

Mercurial > hg > svcore

comparison data/fileio/test/AudioFileReaderTest.h @ 1365:3382d914e110