diff data/fileio/test/AudioFileReaderTest.h @ 1365:3382d914e110

Merge from branch 3.0-integration
author Chris Cannam
date Fri, 13 Jan 2017 10:29:44 +0000
parents 1c9bbbb6116a
children aadfb395e933
line wrap: on
line diff
--- a/data/fileio/test/AudioFileReaderTest.h	Mon Nov 21 16:32:58 2016 +0000
+++ b/data/fileio/test/AudioFileReaderTest.h	Fri Jan 13 10:29:44 2017 +0000
@@ -18,6 +18,7 @@
 
 #include "../AudioFileReaderFactory.h"
 #include "../AudioFileReader.h"
+#include "../WavFileWriter.h"
 
 #include "AudioTestData.h"
 
@@ -31,52 +32,232 @@
 
 using namespace std;
 
-static QString audioDir = "testfiles";
-
 class AudioFileReaderTest : public QObject
 {
     Q_OBJECT
 
+private:
+    QString testDirBase;
+    QString audioDir;
+    QString diffDir;
+
+public:
+    AudioFileReaderTest(QString base) {
+        if (base == "") {
+            base = "svcore/data/fileio/test";
+        }
+        testDirBase = base;
+        audioDir = base + "/audio";
+        diffDir = base + "/diffs";
+    }
+
+private:
     const char *strOf(QString s) {
         return strdup(s.toLocal8Bit().data());
     }
 
+    void getFileMetadata(QString filename,
+                         QString &extension,
+                         sv_samplerate_t &rate,
+                         int &channels,
+                         int &bitdepth) {
+
+        QStringList fileAndExt = filename.split(".");
+        QStringList bits = fileAndExt[0].split("-");
+
+        extension = fileAndExt[1];
+        rate = bits[0].toInt();
+        channels = bits[1].toInt();
+        bitdepth = 16;
+        if (bits.length() > 2) {
+            bitdepth = bits[2].toInt();
+        }
+    }
+    
+    void getExpectedThresholds(QString format,
+                               QString filename,
+                               bool resampled,
+                               bool gapless,
+                               bool normalised,
+                               double &maxLimit,
+                               double &rmsLimit) {
+
+        QString extension;
+        sv_samplerate_t fileRate;
+        int channels;
+        int bitdepth;
+        getFileMetadata(filename, extension, fileRate, channels, bitdepth);
+        
+        if (normalised) {
+
+            if (format == "ogg") {
+
+                // Our ogg is not especially high quality and is
+                // actually further from the original if normalised
+
+                maxLimit = 0.1;
+                rmsLimit = 0.03;
+
+            } else if (format == "aac") {
+
+                // Terrible performance for this test, load of spill
+                // from one channel to the other. I guess they know
+                // what they're doing, it's perceptual after all, but
+                // it does make this check a bit superfluous, you
+                // could probably pass it with a signal that sounds
+                // nothing like the original
+                maxLimit = 0.2;
+                rmsLimit = 0.1;
+
+            } else if (format == "mp3") {
+
+                if (resampled && !gapless) {
+
+                    // We expect worse figures here, because the
+                    // combination of uncompensated encoder delay +
+                    // resampling results in a fractional delay which
+                    // means the decoded signal is slightly out of
+                    // phase compared to the test signal
+
+                    maxLimit = 0.1;
+                    rmsLimit = 0.05;
+
+                } else {
+
+                    maxLimit = 0.05;
+                    rmsLimit = 0.01;
+                }
+
+            } else {
+
+                // lossless formats (wav, aiff, flac, apple_lossless)
+                
+                if (bitdepth >= 16 && !resampled) {
+                    maxLimit = 1e-3;
+                    rmsLimit = 3e-4;
+                } else {
+                    maxLimit = 0.01;
+                    rmsLimit = 5e-3;
+                }
+            }
+            
+        } else { // !normalised
+            
+            if (format == "ogg") {
+
+                maxLimit = 0.06;
+                rmsLimit = 0.03;
+
+            } else if (format == "aac") {
+
+                maxLimit = 0.1;
+                rmsLimit = 0.1;
+
+            } else if (format == "mp3") {
+
+                // all mp3 figures are worse when not normalising
+                maxLimit = 0.1;
+                rmsLimit = 0.05;
+
+            } else {
+
+                // lossless formats (wav, aiff, flac, apple_lossless)
+                
+                if (bitdepth >= 16 && !resampled) {
+                    maxLimit = 1e-3;
+                    rmsLimit = 3e-4;
+                } else {
+                    maxLimit = 0.02;
+                    rmsLimit = 0.01;
+                }
+            }
+        }
+    }
+
+    QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
+        return QString("%1/%2 at %3%4%5")
+            .arg(format)
+            .arg(filename)
+            .arg(rate)
+            .arg(norm ? " normalised": "")
+            .arg(gapless ? "" : " non-gapless");
+    }
+
 private slots:
     void init()
     {
         if (!QDir(audioDir).exists()) {
-            cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl;
+            QString cwd = QDir::currentPath();
+            cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
             QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
         }
+        if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
+            cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
+            QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
+        }
     }
 
     void read_data()
     {
+        QTest::addColumn<QString>("format");
         QTest::addColumn<QString>("audiofile");
-        QStringList files = QDir(audioDir).entryList(QDir::Files);
-        foreach (QString filename, files) {
-            QTest::newRow(strOf(filename)) << filename;
+        QTest::addColumn<int>("rate");
+        QTest::addColumn<bool>("normalised");
+        QTest::addColumn<bool>("gapless");
+        QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
+                                                    QDir::NoDotAndDotDot);
+        for (QString format: dirs) {
+            QStringList files = QDir(QDir(audioDir).filePath(format))
+                .entryList(QDir::Files);
+            int readRates[] = { 44100, 48000 };
+            bool norms[] = { false, true };
+            bool gaplesses[] = { true, false };
+            foreach (QString filename, files) {
+                for (int rate: readRates) {
+                    for (bool norm: norms) {
+                        for (bool gapless: gaplesses) {
+
+                            if (format != "mp3" && !gapless) {
+                                continue;
+                            }
+                        
+                            QString desc = testName
+                                (format, filename, rate, norm, gapless);
+
+                            QTest::newRow(strOf(desc))
+                                << format << filename << rate << norm << gapless;
+                        }
+                    }
+                }
+            }
         }
     }
 
     void read()
     {
+        QFETCH(QString, format);
         QFETCH(QString, audiofile);
+        QFETCH(int, rate);
+        QFETCH(bool, normalised);
+        QFETCH(bool, gapless);
 
-        sv_samplerate_t readRate = 48000;
+        sv_samplerate_t readRate(rate);
+        
+//        cerr << "\naudiofile = " << audiofile << endl;
+
+        AudioFileReaderFactory::Parameters params;
+        params.targetRate = readRate;
+        params.normalisation = (normalised ?
+                                AudioFileReaderFactory::Normalisation::Peak :
+                                AudioFileReaderFactory::Normalisation::None);
+        params.gaplessMode = (gapless ?
+                              AudioFileReaderFactory::GaplessMode::Gapless :
+                              AudioFileReaderFactory::GaplessMode::Gappy);
 
 	AudioFileReader *reader =
 	    AudioFileReaderFactory::createReader
-	    (audioDir + "/" + audiofile, readRate);
-
-        QStringList fileAndExt = audiofile.split(".");
-        QStringList bits = fileAndExt[0].split("-");
-        QString extension = fileAndExt[1];
-        sv_samplerate_t nominalRate = bits[0].toInt();
-        int nominalChannels = bits[1].toInt();
-        int nominalDepth = 16;
-        if (bits.length() > 2) nominalDepth = bits[2].toInt();
-
+	    (audioDir + "/" + format + "/" + audiofile, params);
+        
 	if (!reader) {
 #if ( QT_VERSION >= 0x050000 )
 	    QSKIP("Unsupported file, skipping");
@@ -85,11 +266,16 @@
 #endif
 	}
 
-        QCOMPARE((int)reader->getChannelCount(), nominalChannels);
-        QCOMPARE(reader->getNativeRate(), nominalRate);
+        QString extension;
+        sv_samplerate_t fileRate;
+        int channels;
+        int fileBitdepth;
+        getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
+        
+        QCOMPARE((int)reader->getChannelCount(), channels);
+        QCOMPARE(reader->getNativeRate(), fileRate);
         QCOMPARE(reader->getSampleRate(), readRate);
 
-	int channels = reader->getChannelCount();
 	AudioTestData tdata(readRate, channels);
 	
 	float *reference = tdata.getInterleavedData();
@@ -100,95 +286,200 @@
 	// more, though, so we can (a) check that we only get the
 	// expected number back (if this is not mp3/aac) or (b) take
 	// into account silence at beginning and end (if it is).
-	vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000);
+	floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
 	sv_frame_t read = test.size() / channels;
 
-        if (extension == "mp3" || extension == "aac" || extension == "m4a") {
-            // mp3s and aacs can have silence at start and end
+        bool perceptual = (extension == "mp3" ||
+                           extension == "aac" ||
+                           extension == "m4a");
+        
+        if (perceptual && !gapless) {
+            // allow silence at start and end
             QVERIFY(read >= refFrames);
         } else {
             QCOMPARE(read, refFrames);
         }
 
-        // Our limits are pretty relaxed -- we're not testing decoder
-        // or resampler quality here, just whether the results are
-        // plainly wrong (e.g. at wrong samplerate or with an offset)
-
-	double limit = 0.01;
-        double edgeLimit = limit * 10; // in first or final edgeSize frames
+        bool resampled = readRate != fileRate;
+        double maxLimit, rmsLimit;
+        getExpectedThresholds(format,
+                              audiofile,
+                              resampled,
+                              gapless,
+                              normalised,
+                              maxLimit, rmsLimit);
+        
+        double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
+        if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
         int edgeSize = 100; 
 
-        if (nominalDepth < 16) {
-            limit = 0.02;
-        }
-        if (extension == "ogg" || extension == "mp3" ||
-            extension == "aac" || extension == "m4a") {
-            limit = 0.2;
-            edgeLimit = limit * 3;
-        }
-
         // And we ignore completely the last few frames when upsampling
-        int discard = 1 + int(round(readRate / nominalRate));
+        int discard = 1 + int(round(readRate / fileRate));
 
         int offset = 0;
 
-        if (extension == "aac" || extension == "m4a") {
-            // our m4a file appears to have a fixed offset of 1024 (at
-            // file sample rate)
-            offset = int(round((1024 / nominalRate) * readRate));
-        }
+        if (perceptual) {
 
-        if (extension == "mp3") {
-            // while mp3s appear to vary
-            for (int i = 0; i < read; ++i) {
-                bool any = false;
-                double thresh = 0.01;
-                for (int c = 0; c < channels; ++c) {
-                    if (fabs(test[i * channels + c]) > thresh) {
-                        any = true;
+            // Look for an initial offset.
+            //
+            // We know the first channel has a sinusoid in it. It
+            // should have a peak at 0.4ms (see AudioTestData.h) but
+            // that might have been clipped, which would make it
+            // imprecise. We can tell if it's clipped, though, as
+            // there will be samples having exactly identical
+            // values. So what we look for is the peak if it's not
+            // clipped and, if it is, the first zero crossing after
+            // the peak, which should be at 0.8ms.
+
+            int expectedPeak = int(0.0004 * readRate);
+            int expectedZC = int(0.0008 * readRate);
+            bool foundPeak = false;
+            for (int i = 1; i+1 < read; ++i) {
+                float prevSample = test[(i-1) * channels];
+                float thisSample = test[i * channels];
+                float nextSample = test[(i+1) * channels];
+                if (thisSample > 0.8 && nextSample < thisSample) {
+                    foundPeak = true;
+                    if (thisSample > prevSample) {
+                        // not clipped
+                        offset = i - expectedPeak - 1;
                         break;
                     }
                 }
-                if (any) {
-                    offset = i;
+                if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
+//                    cerr << "thisSample = " << thisSample << ", nextSample = "
+//                         << nextSample << endl;
+                    offset = i - expectedZC - 1;
                     break;
                 }
             }
+
+//            int fileRateEquivalent = int((offset / readRate) * fileRate);
 //            std::cerr << "offset = " << offset << std::endl;
+//            std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
+
+            // Previously our m4a test file had a fixed offset of 1024
+            // at the file sample rate -- this may be because it was
+            // produced by FAAC which did not write in the delay as
+            // metadata? We now have an m4a produced by Core Audio
+            // which gives a 0 offset. What to do...
+
+            // Anyway, mp3s should have 0 offset in gapless mode and
+            // "something else" otherwise.
+            
+            if (gapless) {
+                if (format == "aac") {
+                    // ouch!
+                    if (offset == -1) offset = 0;
+                }
+                QCOMPARE(offset, 0);
+            }
         }
 
-	for (int c = 0; c < channels; ++c) {
-	    float maxdiff = 0.f;
-	    int maxAt = 0;
-	    float totdiff = 0.f;
-	    for (int i = 0; i < read - offset - discard && i < refFrames; ++i) {
-		float diff = fabsf(test[(i + offset) * channels + c] -
-				   reference[i * channels + c]);
-		totdiff += diff;
+        {
+            // Write the diff file now, so that it's already been written
+            // even if the comparison fails. We aren't checking anything
+            // here except as necessary to avoid buffer overruns etc
+
+            QString diffFile =
+                testName(format, audiofile, rate, normalised, gapless);
+            diffFile.replace("/", "_");
+            diffFile.replace(".", "_");
+            diffFile.replace(" ", "_");
+            diffFile += ".wav";
+            diffFile = QDir(diffDir).filePath(diffFile);
+            WavFileWriter diffWriter(diffFile, readRate, channels,
+                                     WavFileWriter::WriteToTemporary);
+            QVERIFY(diffWriter.isOK());
+
+            vector<vector<float>> diffs(channels);
+            for (int c = 0; c < channels; ++c) {
+                for (int i = 0; i < refFrames; ++i) {
+                    int ix = i + offset;
+                    if (ix < read) {
+                        float signeddiff =
+                            test[ix * channels + c] -
+                            reference[i * channels + c];
+                        diffs[c].push_back(signeddiff);
+                    }
+                }
+            }
+            float **ptrs = new float*[channels];
+            for (int c = 0; c < channels; ++c) {
+                ptrs[c] = diffs[c].data();
+            }
+            diffWriter.writeSamples(ptrs, refFrames);
+            delete[] ptrs;
+        }
+            
+        for (int c = 0; c < channels; ++c) {
+
+            double maxDiff = 0.0;
+            double totalDiff = 0.0;
+            double totalSqrDiff = 0.0;
+            int maxIndex = 0;
+
+            for (int i = 0; i < refFrames; ++i) {
+                int ix = i + offset;
+                if (ix >= read) {
+                    cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
+                    QVERIFY(ix < read);
+                }
+
+                if (ix + discard >= read) {
+                    // we forgive the very edge samples when
+                    // resampling (discard > 0)
+                    continue;
+                }
+                
+                double diff = fabs(test[ix * channels + c] -
+                                   reference[i * channels + c]);
+
+                totalDiff += diff;
+                totalSqrDiff += diff * diff;
+                
                 // in edge areas, record this only if it exceeds edgeLimit
-                if (i < edgeSize || i + edgeSize >= read - offset) {
-                    if (diff > edgeLimit && diff > maxdiff) {
-                        maxdiff = diff;
-                        maxAt = i;
+                if (i < edgeSize || i + edgeSize >= refFrames) {
+                    if (diff > edgeLimit && diff > maxDiff) {
+                        maxDiff = diff;
+                        maxIndex = i;
                     }
                 } else {
-                    if (diff > maxdiff) {
-                        maxdiff = diff;
-                        maxAt = i;
+                    if (diff > maxDiff) {
+                        maxDiff = diff;
+                        maxIndex = i;
                     }
-		}
-	    }
-	    float meandiff = totdiff / float(read);
-//	    cerr << "meandiff on channel " << c << ": " << meandiff << endl;
-//	    cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl;
-            if (meandiff >= limit) {
-		cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl;
-                QVERIFY(meandiff < limit);
+                }
             }
-	    if (maxdiff >= limit) {
-		cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl;
-		QVERIFY(maxdiff < limit);
-	    }
+                
+            double meanDiff = totalDiff / double(refFrames);
+            double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
+
+            /*
+        cerr << "channel " << c << ": mean diff " << meanDiff << endl;
+	    cerr << "channel " << c << ":  rms diff " << rmsDiff << endl;
+	    cerr << "channel " << c << ":  max diff " << maxDiff << " at " << maxIndex << endl;
+            */            
+            if (rmsDiff >= rmsLimit) {
+                cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
+                QVERIFY(rmsDiff < rmsLimit);
+            }
+            if (maxDiff >= maxLimit) {
+                cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
+                QVERIFY(maxDiff < maxLimit);
+            }
+
+            // and check for spurious material at end
+            
+            for (sv_frame_t i = refFrames; i + offset < read; ++i) {
+                sv_frame_t ix = i + offset;
+                float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
+                float mag = fabsf(test[ix * channels + c]);
+                if (mag > quiet) {
+                    cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
+                    QVERIFY(mag < quiet);
+                }
+            }
 	}
     }
 };