comparison data/fileio/test/AudioFileReaderTest.h @ 1365:3382d914e110

Merge from branch 3.0-integration
author Chris Cannam
date Fri, 13 Jan 2017 10:29:44 +0000
parents 1c9bbbb6116a
children aadfb395e933
comparison
equal deleted inserted replaced
1272:6a7ea3bd0e10 1365:3382d914e110
16 #ifndef TEST_AUDIO_FILE_READER_H 16 #ifndef TEST_AUDIO_FILE_READER_H
17 #define TEST_AUDIO_FILE_READER_H 17 #define TEST_AUDIO_FILE_READER_H
18 18
19 #include "../AudioFileReaderFactory.h" 19 #include "../AudioFileReaderFactory.h"
20 #include "../AudioFileReader.h" 20 #include "../AudioFileReader.h"
21 #include "../WavFileWriter.h"
21 22
22 #include "AudioTestData.h" 23 #include "AudioTestData.h"
23 24
24 #include <cmath> 25 #include <cmath>
25 26
29 30
30 #include <iostream> 31 #include <iostream>
31 32
32 using namespace std; 33 using namespace std;
33 34
34 static QString audioDir = "testfiles";
35
36 class AudioFileReaderTest : public QObject 35 class AudioFileReaderTest : public QObject
37 { 36 {
38 Q_OBJECT 37 Q_OBJECT
39 38
39 private:
40 QString testDirBase;
41 QString audioDir;
42 QString diffDir;
43
44 public:
45 AudioFileReaderTest(QString base) {
46 if (base == "") {
47 base = "svcore/data/fileio/test";
48 }
49 testDirBase = base;
50 audioDir = base + "/audio";
51 diffDir = base + "/diffs";
52 }
53
54 private:
40 const char *strOf(QString s) { 55 const char *strOf(QString s) {
41 return strdup(s.toLocal8Bit().data()); 56 return strdup(s.toLocal8Bit().data());
57 }
58
59 void getFileMetadata(QString filename,
60 QString &extension,
61 sv_samplerate_t &rate,
62 int &channels,
63 int &bitdepth) {
64
65 QStringList fileAndExt = filename.split(".");
66 QStringList bits = fileAndExt[0].split("-");
67
68 extension = fileAndExt[1];
69 rate = bits[0].toInt();
70 channels = bits[1].toInt();
71 bitdepth = 16;
72 if (bits.length() > 2) {
73 bitdepth = bits[2].toInt();
74 }
75 }
76
77 void getExpectedThresholds(QString format,
78 QString filename,
79 bool resampled,
80 bool gapless,
81 bool normalised,
82 double &maxLimit,
83 double &rmsLimit) {
84
85 QString extension;
86 sv_samplerate_t fileRate;
87 int channels;
88 int bitdepth;
89 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
90
91 if (normalised) {
92
93 if (format == "ogg") {
94
95 // Our ogg is not especially high quality and is
96 // actually further from the original if normalised
97
98 maxLimit = 0.1;
99 rmsLimit = 0.03;
100
101 } else if (format == "aac") {
102
103 // Terrible performance for this test, load of spill
104 // from one channel to the other. I guess they know
105 // what they're doing, it's perceptual after all, but
106 // it does make this check a bit superfluous, you
107 // could probably pass it with a signal that sounds
108 // nothing like the original
109 maxLimit = 0.2;
110 rmsLimit = 0.1;
111
112 } else if (format == "mp3") {
113
114 if (resampled && !gapless) {
115
116 // We expect worse figures here, because the
117 // combination of uncompensated encoder delay +
118 // resampling results in a fractional delay which
119 // means the decoded signal is slightly out of
120 // phase compared to the test signal
121
122 maxLimit = 0.1;
123 rmsLimit = 0.05;
124
125 } else {
126
127 maxLimit = 0.05;
128 rmsLimit = 0.01;
129 }
130
131 } else {
132
133 // lossless formats (wav, aiff, flac, apple_lossless)
134
135 if (bitdepth >= 16 && !resampled) {
136 maxLimit = 1e-3;
137 rmsLimit = 3e-4;
138 } else {
139 maxLimit = 0.01;
140 rmsLimit = 5e-3;
141 }
142 }
143
144 } else { // !normalised
145
146 if (format == "ogg") {
147
148 maxLimit = 0.06;
149 rmsLimit = 0.03;
150
151 } else if (format == "aac") {
152
153 maxLimit = 0.1;
154 rmsLimit = 0.1;
155
156 } else if (format == "mp3") {
157
158 // all mp3 figures are worse when not normalising
159 maxLimit = 0.1;
160 rmsLimit = 0.05;
161
162 } else {
163
164 // lossless formats (wav, aiff, flac, apple_lossless)
165
166 if (bitdepth >= 16 && !resampled) {
167 maxLimit = 1e-3;
168 rmsLimit = 3e-4;
169 } else {
170 maxLimit = 0.02;
171 rmsLimit = 0.01;
172 }
173 }
174 }
175 }
176
177 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) {
178 return QString("%1/%2 at %3%4%5")
179 .arg(format)
180 .arg(filename)
181 .arg(rate)
182 .arg(norm ? " normalised": "")
183 .arg(gapless ? "" : " non-gapless");
42 } 184 }
43 185
44 private slots: 186 private slots:
45 void init() 187 void init()
46 { 188 {
47 if (!QDir(audioDir).exists()) { 189 if (!QDir(audioDir).exists()) {
48 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; 190 QString cwd = QDir::currentPath();
191 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl;
49 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); 192 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
193 }
194 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
195 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
196 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
50 } 197 }
51 } 198 }
52 199
53 void read_data() 200 void read_data()
54 { 201 {
202 QTest::addColumn<QString>("format");
55 QTest::addColumn<QString>("audiofile"); 203 QTest::addColumn<QString>("audiofile");
56 QStringList files = QDir(audioDir).entryList(QDir::Files); 204 QTest::addColumn<int>("rate");
57 foreach (QString filename, files) { 205 QTest::addColumn<bool>("normalised");
58 QTest::newRow(strOf(filename)) << filename; 206 QTest::addColumn<bool>("gapless");
207 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs |
208 QDir::NoDotAndDotDot);
209 for (QString format: dirs) {
210 QStringList files = QDir(QDir(audioDir).filePath(format))
211 .entryList(QDir::Files);
212 int readRates[] = { 44100, 48000 };
213 bool norms[] = { false, true };
214 bool gaplesses[] = { true, false };
215 foreach (QString filename, files) {
216 for (int rate: readRates) {
217 for (bool norm: norms) {
218 for (bool gapless: gaplesses) {
219
220 if (format != "mp3" && !gapless) {
221 continue;
222 }
223
224 QString desc = testName
225 (format, filename, rate, norm, gapless);
226
227 QTest::newRow(strOf(desc))
228 << format << filename << rate << norm << gapless;
229 }
230 }
231 }
232 }
59 } 233 }
60 } 234 }
61 235
62 void read() 236 void read()
63 { 237 {
238 QFETCH(QString, format);
64 QFETCH(QString, audiofile); 239 QFETCH(QString, audiofile);
65 240 QFETCH(int, rate);
66 sv_samplerate_t readRate = 48000; 241 QFETCH(bool, normalised);
242 QFETCH(bool, gapless);
243
244 sv_samplerate_t readRate(rate);
245
246 // cerr << "\naudiofile = " << audiofile << endl;
247
248 AudioFileReaderFactory::Parameters params;
249 params.targetRate = readRate;
250 params.normalisation = (normalised ?
251 AudioFileReaderFactory::Normalisation::Peak :
252 AudioFileReaderFactory::Normalisation::None);
253 params.gaplessMode = (gapless ?
254 AudioFileReaderFactory::GaplessMode::Gapless :
255 AudioFileReaderFactory::GaplessMode::Gappy);
67 256
68 AudioFileReader *reader = 257 AudioFileReader *reader =
69 AudioFileReaderFactory::createReader 258 AudioFileReaderFactory::createReader
70 (audioDir + "/" + audiofile, readRate); 259 (audioDir + "/" + format + "/" + audiofile, params);
71 260
72 QStringList fileAndExt = audiofile.split(".");
73 QStringList bits = fileAndExt[0].split("-");
74 QString extension = fileAndExt[1];
75 sv_samplerate_t nominalRate = bits[0].toInt();
76 int nominalChannels = bits[1].toInt();
77 int nominalDepth = 16;
78 if (bits.length() > 2) nominalDepth = bits[2].toInt();
79
80 if (!reader) { 261 if (!reader) {
81 #if ( QT_VERSION >= 0x050000 ) 262 #if ( QT_VERSION >= 0x050000 )
82 QSKIP("Unsupported file, skipping"); 263 QSKIP("Unsupported file, skipping");
83 #else 264 #else
84 QSKIP("Unsupported file, skipping", SkipSingle); 265 QSKIP("Unsupported file, skipping", SkipSingle);
85 #endif 266 #endif
86 } 267 }
87 268
88 QCOMPARE((int)reader->getChannelCount(), nominalChannels); 269 QString extension;
89 QCOMPARE(reader->getNativeRate(), nominalRate); 270 sv_samplerate_t fileRate;
271 int channels;
272 int fileBitdepth;
273 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
274
275 QCOMPARE((int)reader->getChannelCount(), channels);
276 QCOMPARE(reader->getNativeRate(), fileRate);
90 QCOMPARE(reader->getSampleRate(), readRate); 277 QCOMPARE(reader->getSampleRate(), readRate);
91 278
92 int channels = reader->getChannelCount();
93 AudioTestData tdata(readRate, channels); 279 AudioTestData tdata(readRate, channels);
94 280
95 float *reference = tdata.getInterleavedData(); 281 float *reference = tdata.getInterleavedData();
96 sv_frame_t refFrames = tdata.getFrameCount(); 282 sv_frame_t refFrames = tdata.getFrameCount();
97 283
98 // The reader should give us exactly the expected number of 284 // The reader should give us exactly the expected number of
99 // frames, except for mp3/aac files. We ask for quite a lot 285 // frames, except for mp3/aac files. We ask for quite a lot
100 // more, though, so we can (a) check that we only get the 286 // more, though, so we can (a) check that we only get the
101 // expected number back (if this is not mp3/aac) or (b) take 287 // expected number back (if this is not mp3/aac) or (b) take
102 // into account silence at beginning and end (if it is). 288 // into account silence at beginning and end (if it is).
103 vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); 289 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000);
104 sv_frame_t read = test.size() / channels; 290 sv_frame_t read = test.size() / channels;
105 291
106 if (extension == "mp3" || extension == "aac" || extension == "m4a") { 292 bool perceptual = (extension == "mp3" ||
107 // mp3s and aacs can have silence at start and end 293 extension == "aac" ||
294 extension == "m4a");
295
296 if (perceptual && !gapless) {
297 // allow silence at start and end
108 QVERIFY(read >= refFrames); 298 QVERIFY(read >= refFrames);
109 } else { 299 } else {
110 QCOMPARE(read, refFrames); 300 QCOMPARE(read, refFrames);
111 } 301 }
112 302
113 // Our limits are pretty relaxed -- we're not testing decoder 303 bool resampled = readRate != fileRate;
114 // or resampler quality here, just whether the results are 304 double maxLimit, rmsLimit;
115 // plainly wrong (e.g. at wrong samplerate or with an offset) 305 getExpectedThresholds(format,
116 306 audiofile,
117 double limit = 0.01; 307 resampled,
118 double edgeLimit = limit * 10; // in first or final edgeSize frames 308 gapless,
309 normalised,
310 maxLimit, rmsLimit);
311
312 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
313 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
119 int edgeSize = 100; 314 int edgeSize = 100;
120 315
121 if (nominalDepth < 16) {
122 limit = 0.02;
123 }
124 if (extension == "ogg" || extension == "mp3" ||
125 extension == "aac" || extension == "m4a") {
126 limit = 0.2;
127 edgeLimit = limit * 3;
128 }
129
130 // And we ignore completely the last few frames when upsampling 316 // And we ignore completely the last few frames when upsampling
131 int discard = 1 + int(round(readRate / nominalRate)); 317 int discard = 1 + int(round(readRate / fileRate));
132 318
133 int offset = 0; 319 int offset = 0;
134 320
135 if (extension == "aac" || extension == "m4a") { 321 if (perceptual) {
136 // our m4a file appears to have a fixed offset of 1024 (at 322
137 // file sample rate) 323 // Look for an initial offset.
138 offset = int(round((1024 / nominalRate) * readRate)); 324 //
139 } 325 // We know the first channel has a sinusoid in it. It
140 326 // should have a peak at 0.4ms (see AudioTestData.h) but
141 if (extension == "mp3") { 327 // that might have been clipped, which would make it
142 // while mp3s appear to vary 328 // imprecise. We can tell if it's clipped, though, as
143 for (int i = 0; i < read; ++i) { 329 // there will be samples having exactly identical
144 bool any = false; 330 // values. So what we look for is the peak if it's not
145 double thresh = 0.01; 331 // clipped and, if it is, the first zero crossing after
146 for (int c = 0; c < channels; ++c) { 332 // the peak, which should be at 0.8ms.
147 if (fabs(test[i * channels + c]) > thresh) { 333
148 any = true; 334 int expectedPeak = int(0.0004 * readRate);
335 int expectedZC = int(0.0008 * readRate);
336 bool foundPeak = false;
337 for (int i = 1; i+1 < read; ++i) {
338 float prevSample = test[(i-1) * channels];
339 float thisSample = test[i * channels];
340 float nextSample = test[(i+1) * channels];
341 if (thisSample > 0.8 && nextSample < thisSample) {
342 foundPeak = true;
343 if (thisSample > prevSample) {
344 // not clipped
345 offset = i - expectedPeak - 1;
149 break; 346 break;
150 } 347 }
151 } 348 }
152 if (any) { 349 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) {
153 offset = i; 350 // cerr << "thisSample = " << thisSample << ", nextSample = "
351 // << nextSample << endl;
352 offset = i - expectedZC - 1;
154 break; 353 break;
155 } 354 }
156 } 355 }
356
357 // int fileRateEquivalent = int((offset / readRate) * fileRate);
157 // std::cerr << "offset = " << offset << std::endl; 358 // std::cerr << "offset = " << offset << std::endl;
158 } 359 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl;
159 360
160 for (int c = 0; c < channels; ++c) { 361 // Previously our m4a test file had a fixed offset of 1024
161 float maxdiff = 0.f; 362 // at the file sample rate -- this may be because it was
162 int maxAt = 0; 363 // produced by FAAC which did not write in the delay as
163 float totdiff = 0.f; 364 // metadata? We now have an m4a produced by Core Audio
164 for (int i = 0; i < read - offset - discard && i < refFrames; ++i) { 365 // which gives a 0 offset. What to do...
165 float diff = fabsf(test[(i + offset) * channels + c] - 366
166 reference[i * channels + c]); 367 // Anyway, mp3s should have 0 offset in gapless mode and
167 totdiff += diff; 368 // "something else" otherwise.
369
370 if (gapless) {
371 if (format == "aac") {
372 // ouch!
373 if (offset == -1) offset = 0;
374 }
375 QCOMPARE(offset, 0);
376 }
377 }
378
379 {
380 // Write the diff file now, so that it's already been written
381 // even if the comparison fails. We aren't checking anything
382 // here except as necessary to avoid buffer overruns etc
383
384 QString diffFile =
385 testName(format, audiofile, rate, normalised, gapless);
386 diffFile.replace("/", "_");
387 diffFile.replace(".", "_");
388 diffFile.replace(" ", "_");
389 diffFile += ".wav";
390 diffFile = QDir(diffDir).filePath(diffFile);
391 WavFileWriter diffWriter(diffFile, readRate, channels,
392 WavFileWriter::WriteToTemporary);
393 QVERIFY(diffWriter.isOK());
394
395 vector<vector<float>> diffs(channels);
396 for (int c = 0; c < channels; ++c) {
397 for (int i = 0; i < refFrames; ++i) {
398 int ix = i + offset;
399 if (ix < read) {
400 float signeddiff =
401 test[ix * channels + c] -
402 reference[i * channels + c];
403 diffs[c].push_back(signeddiff);
404 }
405 }
406 }
407 float **ptrs = new float*[channels];
408 for (int c = 0; c < channels; ++c) {
409 ptrs[c] = diffs[c].data();
410 }
411 diffWriter.writeSamples(ptrs, refFrames);
412 delete[] ptrs;
413 }
414
415 for (int c = 0; c < channels; ++c) {
416
417 double maxDiff = 0.0;
418 double totalDiff = 0.0;
419 double totalSqrDiff = 0.0;
420 int maxIndex = 0;
421
422 for (int i = 0; i < refFrames; ++i) {
423 int ix = i + offset;
424 if (ix >= read) {
425 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
426 QVERIFY(ix < read);
427 }
428
429 if (ix + discard >= read) {
430 // we forgive the very edge samples when
431 // resampling (discard > 0)
432 continue;
433 }
434
435 double diff = fabs(test[ix * channels + c] -
436 reference[i * channels + c]);
437
438 totalDiff += diff;
439 totalSqrDiff += diff * diff;
440
168 // in edge areas, record this only if it exceeds edgeLimit 441 // in edge areas, record this only if it exceeds edgeLimit
169 if (i < edgeSize || i + edgeSize >= read - offset) { 442 if (i < edgeSize || i + edgeSize >= refFrames) {
170 if (diff > edgeLimit && diff > maxdiff) { 443 if (diff > edgeLimit && diff > maxDiff) {
171 maxdiff = diff; 444 maxDiff = diff;
172 maxAt = i; 445 maxIndex = i;
173 } 446 }
174 } else { 447 } else {
175 if (diff > maxdiff) { 448 if (diff > maxDiff) {
176 maxdiff = diff; 449 maxDiff = diff;
177 maxAt = i; 450 maxIndex = i;
178 } 451 }
179 } 452 }
180 } 453 }
181 float meandiff = totdiff / float(read); 454
182 // cerr << "meandiff on channel " << c << ": " << meandiff << endl; 455 double meanDiff = totalDiff / double(refFrames);
183 // cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl; 456 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
184 if (meandiff >= limit) { 457
185 cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl; 458 /*
186 QVERIFY(meandiff < limit); 459 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
187 } 460 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
188 if (maxdiff >= limit) { 461 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
189 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl; 462 */
190 QVERIFY(maxdiff < limit); 463 if (rmsDiff >= rmsLimit) {
191 } 464 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
465 QVERIFY(rmsDiff < rmsLimit);
466 }
467 if (maxDiff >= maxLimit) {
468 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
469 QVERIFY(maxDiff < maxLimit);
470 }
471
472 // and check for spurious material at end
473
474 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
475 sv_frame_t ix = i + offset;
476 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off
477 float mag = fabsf(test[ix * channels + c]);
478 if (mag > quiet) {
479 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
480 QVERIFY(mag < quiet);
481 }
482 }
192 } 483 }
193 } 484 }
194 }; 485 };
195 486
196 #endif 487 #endif