Chris@756: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@756: Chris@756: /* Chris@756: Sonic Visualiser Chris@756: An audio file viewer and annotation editor. Chris@756: Centre for Digital Music, Queen Mary, University of London. Chris@756: This file copyright 2013 Chris Cannam. Chris@756: Chris@756: This program is free software; you can redistribute it and/or Chris@756: modify it under the terms of the GNU General Public License as Chris@756: published by the Free Software Foundation; either version 2 of the Chris@756: License, or (at your option) any later version. See the file Chris@756: COPYING included with this distribution for more information. Chris@756: */ Chris@756: Chris@756: #ifndef TEST_AUDIO_FILE_READER_H Chris@756: #define TEST_AUDIO_FILE_READER_H Chris@756: Chris@756: #include "../AudioFileReaderFactory.h" Chris@756: #include "../AudioFileReader.h" Chris@1313: #include "../WavFileWriter.h" Chris@756: Chris@756: #include "AudioTestData.h" Chris@756: Chris@756: #include Chris@756: Chris@756: #include Chris@756: #include Chris@756: #include Chris@756: Chris@756: #include Chris@756: Chris@756: using namespace std; Chris@756: Chris@756: class AudioFileReaderTest : public QObject Chris@756: { Chris@756: Q_OBJECT Chris@756: Chris@1346: private: Chris@1346: QString testDirBase; Chris@1346: QString audioDir; Chris@1346: QString diffDir; Chris@1346: Chris@1346: public: Chris@1346: AudioFileReaderTest(QString base) { Chris@1346: if (base == "") { Chris@1346: base = "svcore/data/fileio/test"; Chris@1346: } Chris@1346: testDirBase = base; Chris@1359: audioDir = base + "/audio"; Chris@1346: diffDir = base + "/diffs"; Chris@1346: } Chris@1346: Chris@1346: private: Chris@756: const char *strOf(QString s) { Chris@756: return strdup(s.toLocal8Bit().data()); Chris@756: } Chris@756: Chris@1313: void getFileMetadata(QString filename, Chris@1313: QString &extension, Chris@1313: sv_samplerate_t &rate, Chris@1313: int &channels, Chris@1313: int &bitdepth) { Chris@1313: Chris@1313: QStringList fileAndExt = filename.split("."); Chris@1313: QStringList bits = fileAndExt[0].split("-"); Chris@1313: Chris@1313: extension = fileAndExt[1]; Chris@1313: rate = bits[0].toInt(); Chris@1313: channels = bits[1].toInt(); Chris@1313: bitdepth = 16; Chris@1313: if (bits.length() > 2) { Chris@1313: bitdepth = bits[2].toInt(); Chris@1313: } Chris@1313: } Chris@1313: cannam@1315: void getExpectedThresholds(QString format, cannam@1315: QString filename, Chris@1313: bool resampled, Chris@1313: bool gapless, Chris@1313: bool normalised, Chris@1313: double &maxLimit, Chris@1313: double &rmsLimit) { Chris@1313: Chris@1313: QString extension; Chris@1313: sv_samplerate_t fileRate; Chris@1313: int channels; Chris@1313: int bitdepth; Chris@1313: getFileMetadata(filename, extension, fileRate, channels, bitdepth); Chris@1313: Chris@1313: if (normalised) { Chris@1313: cannam@1315: if (format == "ogg") { Chris@1313: Chris@1313: // Our ogg is not especially high quality and is Chris@1313: // actually further from the original if normalised Chris@1313: Chris@1313: maxLimit = 0.1; Chris@1313: rmsLimit = 0.03; Chris@1313: Chris@1598: } else if (format == "opus") { Chris@1598: Chris@1598: maxLimit = 0.06; Chris@1598: rmsLimit = 0.015; Chris@1598: cannam@1315: } else if (format == "aac") { Chris@1313: cannam@1315: // Terrible performance for this test, load of spill cannam@1315: // from one channel to the other. I guess they know cannam@1315: // what they're doing, it's perceptual after all, but cannam@1315: // it does make this check a bit superfluous, you cannam@1315: // could probably pass it with a signal that sounds cannam@1315: // nothing like the original cannam@1315: maxLimit = 0.2; cannam@1314: rmsLimit = 0.1; Chris@1313: Chris@1603: } else if (format == "wma") { Chris@1603: Chris@1603: maxLimit = 0.05; Chris@1603: rmsLimit = 0.01; Chris@1603: cannam@1315: } else if (format == "mp3") { Chris@1313: Chris@1313: if (resampled && !gapless) { Chris@1313: Chris@1313: // We expect worse figures here, because the Chris@1313: // combination of uncompensated encoder delay + Chris@1313: // resampling results in a fractional delay which Chris@1313: // means the decoded signal is slightly out of Chris@1313: // phase compared to the test signal Chris@1313: Chris@1313: maxLimit = 0.1; Chris@1313: rmsLimit = 0.05; Chris@1313: Chris@1313: } else { Chris@1313: Chris@1313: maxLimit = 0.05; Chris@1313: rmsLimit = 0.01; Chris@1313: } Chris@1313: Chris@1313: } else { Chris@1313: cannam@1315: // lossless formats (wav, aiff, flac, apple_lossless) Chris@1313: Chris@1313: if (bitdepth >= 16 && !resampled) { Chris@1313: maxLimit = 1e-3; Chris@1313: rmsLimit = 3e-4; Chris@1313: } else { Chris@1313: maxLimit = 0.01; Chris@1313: rmsLimit = 5e-3; Chris@1313: } Chris@1313: } Chris@1313: Chris@1313: } else { // !normalised Chris@1313: cannam@1315: if (format == "ogg") { Chris@1313: Chris@1313: maxLimit = 0.06; Chris@1313: rmsLimit = 0.03; Chris@1313: Chris@1598: } else if (format == "opus") { Chris@1598: Chris@1598: maxLimit = 0.06; Chris@1598: rmsLimit = 0.015; Chris@1598: cannam@1315: } else if (format == "aac") { Chris@1313: Chris@1603: maxLimit = 0.2; cannam@1315: rmsLimit = 0.1; Chris@1313: Chris@1603: } else if (format == "wma") { Chris@1603: Chris@1603: maxLimit = 0.05; Chris@1603: rmsLimit = 0.01; Chris@1603: cannam@1315: } else if (format == "mp3") { Chris@1313: Chris@1313: // all mp3 figures are worse when not normalising Chris@1313: maxLimit = 0.1; Chris@1313: rmsLimit = 0.05; Chris@1313: Chris@1313: } else { Chris@1313: cannam@1315: // lossless formats (wav, aiff, flac, apple_lossless) Chris@1313: Chris@1313: if (bitdepth >= 16 && !resampled) { Chris@1313: maxLimit = 1e-3; Chris@1313: rmsLimit = 3e-4; Chris@1313: } else { Chris@1313: maxLimit = 0.02; Chris@1313: rmsLimit = 0.01; Chris@1313: } Chris@1313: } Chris@1313: } Chris@1313: } Chris@1313: cannam@1315: QString testName(QString format, QString filename, int rate, bool norm, bool gapless) { cannam@1315: return QString("%1/%2 at %3%4%5") cannam@1315: .arg(format) Chris@1313: .arg(filename) Chris@1313: .arg(rate) Chris@1313: .arg(norm ? " normalised": "") Chris@1313: .arg(gapless ? "" : " non-gapless"); Chris@1313: } Chris@1313: Chris@756: private slots: Chris@756: void init() Chris@756: { Chris@756: if (!QDir(audioDir).exists()) { Chris@1346: QString cwd = QDir::currentPath(); Chris@1428: SVCERR << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl; Chris@756: QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); Chris@756: } Chris@1313: if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) { Chris@1428: SVCERR << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl; Chris@1313: QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created"); Chris@1313: } Chris@756: } Chris@756: Chris@756: void read_data() Chris@756: { cannam@1315: QTest::addColumn("format"); Chris@756: QTest::addColumn("audiofile"); Chris@1313: QTest::addColumn("rate"); Chris@1313: QTest::addColumn("normalised"); Chris@1313: QTest::addColumn("gapless"); cannam@1315: QStringList dirs = QDir(audioDir).entryList(QDir::Dirs | cannam@1315: QDir::NoDotAndDotDot); cannam@1315: for (QString format: dirs) { cannam@1315: QStringList files = QDir(QDir(audioDir).filePath(format)) cannam@1315: .entryList(QDir::Files); cannam@1315: int readRates[] = { 44100, 48000 }; cannam@1315: bool norms[] = { false, true }; cannam@1315: bool gaplesses[] = { true, false }; cannam@1315: foreach (QString filename, files) { cannam@1315: for (int rate: readRates) { cannam@1315: for (bool norm: norms) { cannam@1315: for (bool gapless: gaplesses) { Chris@1313: Chris@1603: #ifdef Q_OS_WIN Chris@1603: if (format == "aac") { Chris@1603: if (gapless) { Chris@1603: // Apparently no support for AAC Chris@1603: // encoder delay compensation in Chris@1603: // MediaFoundation, so these tests Chris@1603: // are only available non-gapless Chris@1603: continue; Chris@1603: } Chris@1603: } else if (format != "mp3") { Chris@1603: if (!gapless) { Chris@1603: // All other formats but mp3 are Chris@1603: // intrinsically gapless, so we Chris@1603: // can skip the non-gapless option Chris@1603: continue; Chris@1603: } cannam@1315: } Chris@1603: #else Chris@1603: if (format != "mp3") { Chris@1603: if (!gapless) { Chris@1603: // All other formats but mp3 are Chris@1603: // intrinsically gapless Chris@1603: // everywhere except for Windows Chris@1603: // (see above), so we can skip the Chris@1603: // non-gapless option Chris@1603: continue; Chris@1603: } Chris@1603: } Chris@1603: #endif cannam@1315: cannam@1315: QString desc = testName cannam@1315: (format, filename, rate, norm, gapless); cannam@1315: cannam@1315: QTest::newRow(strOf(desc)) cannam@1315: << format << filename << rate << norm << gapless; Chris@1313: } Chris@1313: } Chris@1313: } Chris@1313: } Chris@756: } Chris@756: } Chris@756: Chris@756: void read() Chris@756: { cannam@1315: QFETCH(QString, format); Chris@756: QFETCH(QString, audiofile); Chris@1313: QFETCH(int, rate); Chris@1313: QFETCH(bool, normalised); Chris@1313: QFETCH(bool, gapless); Chris@756: Chris@1313: sv_samplerate_t readRate(rate); Chris@1313: cannam@1315: // cerr << "\naudiofile = " << audiofile << endl; Chris@1313: Chris@1313: AudioFileReaderFactory::Parameters params; Chris@1313: params.targetRate = readRate; Chris@1313: params.normalisation = (normalised ? Chris@1313: AudioFileReaderFactory::Normalisation::Peak : Chris@1313: AudioFileReaderFactory::Normalisation::None); Chris@1313: params.gaplessMode = (gapless ? Chris@1313: AudioFileReaderFactory::GaplessMode::Gapless : Chris@1313: AudioFileReaderFactory::GaplessMode::Gappy); Chris@757: Chris@1429: AudioFileReader *reader = Chris@1429: AudioFileReaderFactory::createReader Chris@1429: (audioDir + "/" + format + "/" + audiofile, params); Chris@1313: Chris@1429: if (!reader) { Chris@820: #if ( QT_VERSION >= 0x050000 ) Chris@1429: QSKIP("Unsupported file, skipping"); Chris@820: #else Chris@1429: QSKIP("Unsupported file, skipping", SkipSingle); Chris@820: #endif Chris@1429: } Chris@756: Chris@1313: QString extension; Chris@1313: sv_samplerate_t fileRate; Chris@1313: int channels; Chris@1313: int fileBitdepth; Chris@1313: getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth); Chris@1313: Chris@1313: QCOMPARE((int)reader->getChannelCount(), channels); Chris@1313: QCOMPARE(reader->getNativeRate(), fileRate); Chris@1040: QCOMPARE(reader->getSampleRate(), readRate); Chris@757: Chris@1429: AudioTestData tdata(readRate, channels); Chris@1429: Chris@1429: float *reference = tdata.getInterleavedData(); Chris@1040: sv_frame_t refFrames = tdata.getFrameCount(); Chris@1429: Chris@1429: // The reader should give us exactly the expected number of Chris@1429: // frames, except for mp3/aac files. We ask for quite a lot Chris@1429: // more, though, so we can (a) check that we only get the Chris@1429: // expected number back (if this is not mp3/aac) or (b) take Chris@1429: // into account silence at beginning and end (if it is). Chris@1429: floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000); Chris@1402: Chris@1402: delete reader; Chris@1402: reader = 0; Chris@1402: Chris@1429: sv_frame_t read = test.size() / channels; Chris@756: Chris@1313: bool perceptual = (extension == "mp3" || Chris@1313: extension == "aac" || Chris@1598: extension == "m4a" || Chris@1603: extension == "wma" || Chris@1598: extension == "opus"); Chris@1313: Chris@1313: if (perceptual && !gapless) { Chris@1313: // allow silence at start and end Chris@759: QVERIFY(read >= refFrames); Chris@757: } else { Chris@759: QCOMPARE(read, refFrames); Chris@757: } Chris@757: Chris@1313: bool resampled = readRate != fileRate; Chris@1313: double maxLimit, rmsLimit; cannam@1315: getExpectedThresholds(format, cannam@1315: audiofile, Chris@1313: resampled, Chris@1313: gapless, Chris@1313: normalised, Chris@1313: maxLimit, rmsLimit); Chris@1313: Chris@1313: double edgeLimit = maxLimit * 3; // in first or final edgeSize frames Chris@1313: if (resampled && edgeLimit < 0.1) edgeLimit = 0.1; Chris@759: int edgeSize = 100; Chris@759: Chris@759: // And we ignore completely the last few frames when upsampling Chris@1313: int discard = 1 + int(round(readRate / fileRate)); Chris@759: Chris@759: int offset = 0; Chris@759: Chris@1313: if (perceptual) { Chris@759: cannam@1314: // Look for an initial offset. cannam@1314: // cannam@1314: // We know the first channel has a sinusoid in it. It cannam@1314: // should have a peak at 0.4ms (see AudioTestData.h) but cannam@1314: // that might have been clipped, which would make it cannam@1314: // imprecise. We can tell if it's clipped, though, as cannam@1314: // there will be samples having exactly identical cannam@1314: // values. So what we look for is the peak if it's not cannam@1314: // clipped and, if it is, the first zero crossing after cannam@1314: // the peak, which should be at 0.8ms. cannam@1314: Chris@1296: int expectedPeak = int(0.0004 * readRate); cannam@1314: int expectedZC = int(0.0008 * readRate); cannam@1314: bool foundPeak = false; cannam@1314: for (int i = 1; i+1 < read; ++i) { cannam@1314: float prevSample = test[(i-1) * channels]; cannam@1314: float thisSample = test[i * channels]; cannam@1314: float nextSample = test[(i+1) * channels]; cannam@1314: if (thisSample > 0.8 && nextSample < thisSample) { cannam@1314: foundPeak = true; cannam@1314: if (thisSample > prevSample) { cannam@1314: // not clipped cannam@1314: offset = i - expectedPeak - 1; cannam@1314: break; cannam@1314: } cannam@1314: } cannam@1314: if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) { cannam@1315: // cerr << "thisSample = " << thisSample << ", nextSample = " cannam@1315: // << nextSample << endl; cannam@1314: offset = i - expectedZC - 1; Chris@759: break; Chris@759: } Chris@759: } Chris@1313: cannam@1315: // int fileRateEquivalent = int((offset / readRate) * fileRate); cannam@1315: // std::cerr << "offset = " << offset << std::endl; cannam@1315: // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl; Chris@1313: Chris@1313: // Previously our m4a test file had a fixed offset of 1024 Chris@1313: // at the file sample rate -- this may be because it was Chris@1313: // produced by FAAC which did not write in the delay as Chris@1313: // metadata? We now have an m4a produced by Core Audio Chris@1313: // which gives a 0 offset. What to do... Chris@1313: Chris@1313: // Anyway, mp3s should have 0 offset in gapless mode and Chris@1313: // "something else" otherwise. Chris@1313: Chris@1313: if (gapless) { Chris@1603: if (format == "aac" Chris@1603: #ifdef Q_OS_WIN Chris@1603: || (format == "mp3" && (readRate != fileRate)) Chris@1603: #endif Chris@1603: ) { cannam@1315: // ouch! cannam@1315: if (offset == -1) offset = 0; cannam@1315: } Chris@1313: QCOMPARE(offset, 0); Chris@1313: } Chris@759: } Chris@756: cannam@1315: { cannam@1315: // Write the diff file now, so that it's already been written cannam@1315: // even if the comparison fails. We aren't checking anything cannam@1315: // here except as necessary to avoid buffer overruns etc cannam@1315: cannam@1315: QString diffFile = cannam@1315: testName(format, audiofile, rate, normalised, gapless); cannam@1315: diffFile.replace("/", "_"); cannam@1315: diffFile.replace(".", "_"); cannam@1315: diffFile.replace(" ", "_"); cannam@1315: diffFile += ".wav"; cannam@1315: diffFile = QDir(diffDir).filePath(diffFile); cannam@1315: WavFileWriter diffWriter(diffFile, readRate, channels, Chris@1359: WavFileWriter::WriteToTemporary); cannam@1315: QVERIFY(diffWriter.isOK()); cannam@1315: cannam@1315: vector> diffs(channels); cannam@1315: for (int c = 0; c < channels; ++c) { cannam@1315: for (int i = 0; i < refFrames; ++i) { cannam@1315: int ix = i + offset; cannam@1315: if (ix < read) { cannam@1315: float signeddiff = cannam@1315: test[ix * channels + c] - cannam@1315: reference[i * channels + c]; cannam@1315: diffs[c].push_back(signeddiff); cannam@1315: } cannam@1315: } cannam@1315: } cannam@1315: float **ptrs = new float*[channels]; cannam@1315: for (int c = 0; c < channels; ++c) { cannam@1315: ptrs[c] = diffs[c].data(); cannam@1315: } cannam@1315: diffWriter.writeSamples(ptrs, refFrames); cannam@1315: delete[] ptrs; cannam@1315: } Chris@1313: Chris@1346: for (int c = 0; c < channels; ++c) { Chris@1313: Chris@1313: double maxDiff = 0.0; Chris@1313: double totalDiff = 0.0; Chris@1313: double totalSqrDiff = 0.0; Chris@1346: int maxIndex = 0; Chris@1313: Chris@1346: for (int i = 0; i < refFrames; ++i) { Chris@1296: int ix = i + offset; Chris@1296: if (ix >= read) { Chris@1428: SVCERR << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; Chris@1296: QVERIFY(ix < read); Chris@1296: } Chris@1313: Chris@1296: if (ix + discard >= read) { Chris@1296: // we forgive the very edge samples when Chris@1296: // resampling (discard > 0) Chris@1296: continue; Chris@1296: } Chris@1313: Chris@1346: double diff = fabs(test[ix * channels + c] - cannam@1315: reference[i * channels + c]); Chris@1313: Chris@1346: totalDiff += diff; Chris@1313: totalSqrDiff += diff * diff; Chris@1313: Chris@757: // in edge areas, record this only if it exceeds edgeLimit Chris@1313: if (i < edgeSize || i + edgeSize >= refFrames) { Chris@1313: if (diff > edgeLimit && diff > maxDiff) { Chris@1313: maxDiff = diff; Chris@1313: maxIndex = i; Chris@757: } Chris@757: } else { Chris@1313: if (diff > maxDiff) { Chris@1313: maxDiff = diff; Chris@1313: maxIndex = i; Chris@757: } Chris@1346: } Chris@1346: } Chris@1313: Chris@1346: double meanDiff = totalDiff / double(refFrames); Chris@1313: double rmsDiff = sqrt(totalSqrDiff / double(refFrames)); cannam@1308: cannam@1314: /* Chris@1346: cerr << "channel " << c << ": mean diff " << meanDiff << endl; Chris@1429: cerr << "channel " << c << ": rms diff " << rmsDiff << endl; Chris@1429: cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl; cannam@1314: */ Chris@1313: if (rmsDiff >= rmsLimit) { Chris@1428: SVCERR << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl; Chris@1313: QVERIFY(rmsDiff < rmsLimit); Chris@1313: } Chris@1346: if (maxDiff >= maxLimit) { Chris@1428: SVCERR << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl; Chris@1346: QVERIFY(maxDiff < maxLimit); Chris@1346: } Chris@1313: Chris@1313: // and check for spurious material at end Chris@1313: Chris@1309: for (sv_frame_t i = refFrames; i + offset < read; ++i) { Chris@1309: sv_frame_t ix = i + offset; Chris@1323: float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off cannam@1308: float mag = fabsf(test[ix * channels + c]); cannam@1308: if (mag > quiet) { Chris@1428: SVCERR << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl; cannam@1308: QVERIFY(mag < quiet); cannam@1308: } cannam@1308: } Chris@1429: } Chris@756: } Chris@756: }; Chris@756: Chris@756: #endif