Mercurial > hg > svcore
comparison data/fileio/test/AudioFileReaderTest.h @ 1365:3382d914e110
Merge from branch 3.0-integration
author | Chris Cannam |
---|---|
date | Fri, 13 Jan 2017 10:29:44 +0000 |
parents | 1c9bbbb6116a |
children | aadfb395e933 |
comparison
equal
deleted
inserted
replaced
1272:6a7ea3bd0e10 | 1365:3382d914e110 |
---|---|
16 #ifndef TEST_AUDIO_FILE_READER_H | 16 #ifndef TEST_AUDIO_FILE_READER_H |
17 #define TEST_AUDIO_FILE_READER_H | 17 #define TEST_AUDIO_FILE_READER_H |
18 | 18 |
19 #include "../AudioFileReaderFactory.h" | 19 #include "../AudioFileReaderFactory.h" |
20 #include "../AudioFileReader.h" | 20 #include "../AudioFileReader.h" |
21 #include "../WavFileWriter.h" | |
21 | 22 |
22 #include "AudioTestData.h" | 23 #include "AudioTestData.h" |
23 | 24 |
24 #include <cmath> | 25 #include <cmath> |
25 | 26 |
29 | 30 |
30 #include <iostream> | 31 #include <iostream> |
31 | 32 |
32 using namespace std; | 33 using namespace std; |
33 | 34 |
34 static QString audioDir = "testfiles"; | |
35 | |
36 class AudioFileReaderTest : public QObject | 35 class AudioFileReaderTest : public QObject |
37 { | 36 { |
38 Q_OBJECT | 37 Q_OBJECT |
39 | 38 |
39 private: | |
40 QString testDirBase; | |
41 QString audioDir; | |
42 QString diffDir; | |
43 | |
44 public: | |
45 AudioFileReaderTest(QString base) { | |
46 if (base == "") { | |
47 base = "svcore/data/fileio/test"; | |
48 } | |
49 testDirBase = base; | |
50 audioDir = base + "/audio"; | |
51 diffDir = base + "/diffs"; | |
52 } | |
53 | |
54 private: | |
40 const char *strOf(QString s) { | 55 const char *strOf(QString s) { |
41 return strdup(s.toLocal8Bit().data()); | 56 return strdup(s.toLocal8Bit().data()); |
57 } | |
58 | |
59 void getFileMetadata(QString filename, | |
60 QString &extension, | |
61 sv_samplerate_t &rate, | |
62 int &channels, | |
63 int &bitdepth) { | |
64 | |
65 QStringList fileAndExt = filename.split("."); | |
66 QStringList bits = fileAndExt[0].split("-"); | |
67 | |
68 extension = fileAndExt[1]; | |
69 rate = bits[0].toInt(); | |
70 channels = bits[1].toInt(); | |
71 bitdepth = 16; | |
72 if (bits.length() > 2) { | |
73 bitdepth = bits[2].toInt(); | |
74 } | |
75 } | |
76 | |
77 void getExpectedThresholds(QString format, | |
78 QString filename, | |
79 bool resampled, | |
80 bool gapless, | |
81 bool normalised, | |
82 double &maxLimit, | |
83 double &rmsLimit) { | |
84 | |
85 QString extension; | |
86 sv_samplerate_t fileRate; | |
87 int channels; | |
88 int bitdepth; | |
89 getFileMetadata(filename, extension, fileRate, channels, bitdepth); | |
90 | |
91 if (normalised) { | |
92 | |
93 if (format == "ogg") { | |
94 | |
95 // Our ogg is not especially high quality and is | |
96 // actually further from the original if normalised | |
97 | |
98 maxLimit = 0.1; | |
99 rmsLimit = 0.03; | |
100 | |
101 } else if (format == "aac") { | |
102 | |
103 // Terrible performance for this test, load of spill | |
104 // from one channel to the other. I guess they know | |
105 // what they're doing, it's perceptual after all, but | |
106 // it does make this check a bit superfluous, you | |
107 // could probably pass it with a signal that sounds | |
108 // nothing like the original | |
109 maxLimit = 0.2; | |
110 rmsLimit = 0.1; | |
111 | |
112 } else if (format == "mp3") { | |
113 | |
114 if (resampled && !gapless) { | |
115 | |
116 // We expect worse figures here, because the | |
117 // combination of uncompensated encoder delay + | |
118 // resampling results in a fractional delay which | |
119 // means the decoded signal is slightly out of | |
120 // phase compared to the test signal | |
121 | |
122 maxLimit = 0.1; | |
123 rmsLimit = 0.05; | |
124 | |
125 } else { | |
126 | |
127 maxLimit = 0.05; | |
128 rmsLimit = 0.01; | |
129 } | |
130 | |
131 } else { | |
132 | |
133 // lossless formats (wav, aiff, flac, apple_lossless) | |
134 | |
135 if (bitdepth >= 16 && !resampled) { | |
136 maxLimit = 1e-3; | |
137 rmsLimit = 3e-4; | |
138 } else { | |
139 maxLimit = 0.01; | |
140 rmsLimit = 5e-3; | |
141 } | |
142 } | |
143 | |
144 } else { // !normalised | |
145 | |
146 if (format == "ogg") { | |
147 | |
148 maxLimit = 0.06; | |
149 rmsLimit = 0.03; | |
150 | |
151 } else if (format == "aac") { | |
152 | |
153 maxLimit = 0.1; | |
154 rmsLimit = 0.1; | |
155 | |
156 } else if (format == "mp3") { | |
157 | |
158 // all mp3 figures are worse when not normalising | |
159 maxLimit = 0.1; | |
160 rmsLimit = 0.05; | |
161 | |
162 } else { | |
163 | |
164 // lossless formats (wav, aiff, flac, apple_lossless) | |
165 | |
166 if (bitdepth >= 16 && !resampled) { | |
167 maxLimit = 1e-3; | |
168 rmsLimit = 3e-4; | |
169 } else { | |
170 maxLimit = 0.02; | |
171 rmsLimit = 0.01; | |
172 } | |
173 } | |
174 } | |
175 } | |
176 | |
177 QString testName(QString format, QString filename, int rate, bool norm, bool gapless) { | |
178 return QString("%1/%2 at %3%4%5") | |
179 .arg(format) | |
180 .arg(filename) | |
181 .arg(rate) | |
182 .arg(norm ? " normalised": "") | |
183 .arg(gapless ? "" : " non-gapless"); | |
42 } | 184 } |
43 | 185 |
44 private slots: | 186 private slots: |
45 void init() | 187 void init() |
46 { | 188 { |
47 if (!QDir(audioDir).exists()) { | 189 if (!QDir(audioDir).exists()) { |
48 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; | 190 QString cwd = QDir::currentPath(); |
191 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist (cwd = " << cwd << ")" << endl; | |
49 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); | 192 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); |
193 } | |
194 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) { | |
195 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl; | |
196 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created"); | |
50 } | 197 } |
51 } | 198 } |
52 | 199 |
53 void read_data() | 200 void read_data() |
54 { | 201 { |
202 QTest::addColumn<QString>("format"); | |
55 QTest::addColumn<QString>("audiofile"); | 203 QTest::addColumn<QString>("audiofile"); |
56 QStringList files = QDir(audioDir).entryList(QDir::Files); | 204 QTest::addColumn<int>("rate"); |
57 foreach (QString filename, files) { | 205 QTest::addColumn<bool>("normalised"); |
58 QTest::newRow(strOf(filename)) << filename; | 206 QTest::addColumn<bool>("gapless"); |
207 QStringList dirs = QDir(audioDir).entryList(QDir::Dirs | | |
208 QDir::NoDotAndDotDot); | |
209 for (QString format: dirs) { | |
210 QStringList files = QDir(QDir(audioDir).filePath(format)) | |
211 .entryList(QDir::Files); | |
212 int readRates[] = { 44100, 48000 }; | |
213 bool norms[] = { false, true }; | |
214 bool gaplesses[] = { true, false }; | |
215 foreach (QString filename, files) { | |
216 for (int rate: readRates) { | |
217 for (bool norm: norms) { | |
218 for (bool gapless: gaplesses) { | |
219 | |
220 if (format != "mp3" && !gapless) { | |
221 continue; | |
222 } | |
223 | |
224 QString desc = testName | |
225 (format, filename, rate, norm, gapless); | |
226 | |
227 QTest::newRow(strOf(desc)) | |
228 << format << filename << rate << norm << gapless; | |
229 } | |
230 } | |
231 } | |
232 } | |
59 } | 233 } |
60 } | 234 } |
61 | 235 |
62 void read() | 236 void read() |
63 { | 237 { |
238 QFETCH(QString, format); | |
64 QFETCH(QString, audiofile); | 239 QFETCH(QString, audiofile); |
65 | 240 QFETCH(int, rate); |
66 sv_samplerate_t readRate = 48000; | 241 QFETCH(bool, normalised); |
242 QFETCH(bool, gapless); | |
243 | |
244 sv_samplerate_t readRate(rate); | |
245 | |
246 // cerr << "\naudiofile = " << audiofile << endl; | |
247 | |
248 AudioFileReaderFactory::Parameters params; | |
249 params.targetRate = readRate; | |
250 params.normalisation = (normalised ? | |
251 AudioFileReaderFactory::Normalisation::Peak : | |
252 AudioFileReaderFactory::Normalisation::None); | |
253 params.gaplessMode = (gapless ? | |
254 AudioFileReaderFactory::GaplessMode::Gapless : | |
255 AudioFileReaderFactory::GaplessMode::Gappy); | |
67 | 256 |
68 AudioFileReader *reader = | 257 AudioFileReader *reader = |
69 AudioFileReaderFactory::createReader | 258 AudioFileReaderFactory::createReader |
70 (audioDir + "/" + audiofile, readRate); | 259 (audioDir + "/" + format + "/" + audiofile, params); |
71 | 260 |
72 QStringList fileAndExt = audiofile.split("."); | |
73 QStringList bits = fileAndExt[0].split("-"); | |
74 QString extension = fileAndExt[1]; | |
75 sv_samplerate_t nominalRate = bits[0].toInt(); | |
76 int nominalChannels = bits[1].toInt(); | |
77 int nominalDepth = 16; | |
78 if (bits.length() > 2) nominalDepth = bits[2].toInt(); | |
79 | |
80 if (!reader) { | 261 if (!reader) { |
81 #if ( QT_VERSION >= 0x050000 ) | 262 #if ( QT_VERSION >= 0x050000 ) |
82 QSKIP("Unsupported file, skipping"); | 263 QSKIP("Unsupported file, skipping"); |
83 #else | 264 #else |
84 QSKIP("Unsupported file, skipping", SkipSingle); | 265 QSKIP("Unsupported file, skipping", SkipSingle); |
85 #endif | 266 #endif |
86 } | 267 } |
87 | 268 |
88 QCOMPARE((int)reader->getChannelCount(), nominalChannels); | 269 QString extension; |
89 QCOMPARE(reader->getNativeRate(), nominalRate); | 270 sv_samplerate_t fileRate; |
271 int channels; | |
272 int fileBitdepth; | |
273 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth); | |
274 | |
275 QCOMPARE((int)reader->getChannelCount(), channels); | |
276 QCOMPARE(reader->getNativeRate(), fileRate); | |
90 QCOMPARE(reader->getSampleRate(), readRate); | 277 QCOMPARE(reader->getSampleRate(), readRate); |
91 | 278 |
92 int channels = reader->getChannelCount(); | |
93 AudioTestData tdata(readRate, channels); | 279 AudioTestData tdata(readRate, channels); |
94 | 280 |
95 float *reference = tdata.getInterleavedData(); | 281 float *reference = tdata.getInterleavedData(); |
96 sv_frame_t refFrames = tdata.getFrameCount(); | 282 sv_frame_t refFrames = tdata.getFrameCount(); |
97 | 283 |
98 // The reader should give us exactly the expected number of | 284 // The reader should give us exactly the expected number of |
99 // frames, except for mp3/aac files. We ask for quite a lot | 285 // frames, except for mp3/aac files. We ask for quite a lot |
100 // more, though, so we can (a) check that we only get the | 286 // more, though, so we can (a) check that we only get the |
101 // expected number back (if this is not mp3/aac) or (b) take | 287 // expected number back (if this is not mp3/aac) or (b) take |
102 // into account silence at beginning and end (if it is). | 288 // into account silence at beginning and end (if it is). |
103 vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); | 289 floatvec_t test = reader->getInterleavedFrames(0, refFrames + 5000); |
104 sv_frame_t read = test.size() / channels; | 290 sv_frame_t read = test.size() / channels; |
105 | 291 |
106 if (extension == "mp3" || extension == "aac" || extension == "m4a") { | 292 bool perceptual = (extension == "mp3" || |
107 // mp3s and aacs can have silence at start and end | 293 extension == "aac" || |
294 extension == "m4a"); | |
295 | |
296 if (perceptual && !gapless) { | |
297 // allow silence at start and end | |
108 QVERIFY(read >= refFrames); | 298 QVERIFY(read >= refFrames); |
109 } else { | 299 } else { |
110 QCOMPARE(read, refFrames); | 300 QCOMPARE(read, refFrames); |
111 } | 301 } |
112 | 302 |
113 // Our limits are pretty relaxed -- we're not testing decoder | 303 bool resampled = readRate != fileRate; |
114 // or resampler quality here, just whether the results are | 304 double maxLimit, rmsLimit; |
115 // plainly wrong (e.g. at wrong samplerate or with an offset) | 305 getExpectedThresholds(format, |
116 | 306 audiofile, |
117 double limit = 0.01; | 307 resampled, |
118 double edgeLimit = limit * 10; // in first or final edgeSize frames | 308 gapless, |
309 normalised, | |
310 maxLimit, rmsLimit); | |
311 | |
312 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames | |
313 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1; | |
119 int edgeSize = 100; | 314 int edgeSize = 100; |
120 | 315 |
121 if (nominalDepth < 16) { | |
122 limit = 0.02; | |
123 } | |
124 if (extension == "ogg" || extension == "mp3" || | |
125 extension == "aac" || extension == "m4a") { | |
126 limit = 0.2; | |
127 edgeLimit = limit * 3; | |
128 } | |
129 | |
130 // And we ignore completely the last few frames when upsampling | 316 // And we ignore completely the last few frames when upsampling |
131 int discard = 1 + int(round(readRate / nominalRate)); | 317 int discard = 1 + int(round(readRate / fileRate)); |
132 | 318 |
133 int offset = 0; | 319 int offset = 0; |
134 | 320 |
135 if (extension == "aac" || extension == "m4a") { | 321 if (perceptual) { |
136 // our m4a file appears to have a fixed offset of 1024 (at | 322 |
137 // file sample rate) | 323 // Look for an initial offset. |
138 offset = int(round((1024 / nominalRate) * readRate)); | 324 // |
139 } | 325 // We know the first channel has a sinusoid in it. It |
140 | 326 // should have a peak at 0.4ms (see AudioTestData.h) but |
141 if (extension == "mp3") { | 327 // that might have been clipped, which would make it |
142 // while mp3s appear to vary | 328 // imprecise. We can tell if it's clipped, though, as |
143 for (int i = 0; i < read; ++i) { | 329 // there will be samples having exactly identical |
144 bool any = false; | 330 // values. So what we look for is the peak if it's not |
145 double thresh = 0.01; | 331 // clipped and, if it is, the first zero crossing after |
146 for (int c = 0; c < channels; ++c) { | 332 // the peak, which should be at 0.8ms. |
147 if (fabs(test[i * channels + c]) > thresh) { | 333 |
148 any = true; | 334 int expectedPeak = int(0.0004 * readRate); |
335 int expectedZC = int(0.0008 * readRate); | |
336 bool foundPeak = false; | |
337 for (int i = 1; i+1 < read; ++i) { | |
338 float prevSample = test[(i-1) * channels]; | |
339 float thisSample = test[i * channels]; | |
340 float nextSample = test[(i+1) * channels]; | |
341 if (thisSample > 0.8 && nextSample < thisSample) { | |
342 foundPeak = true; | |
343 if (thisSample > prevSample) { | |
344 // not clipped | |
345 offset = i - expectedPeak - 1; | |
149 break; | 346 break; |
150 } | 347 } |
151 } | 348 } |
152 if (any) { | 349 if (foundPeak && (thisSample >= 0.0 && nextSample < 0.0)) { |
153 offset = i; | 350 // cerr << "thisSample = " << thisSample << ", nextSample = " |
351 // << nextSample << endl; | |
352 offset = i - expectedZC - 1; | |
154 break; | 353 break; |
155 } | 354 } |
156 } | 355 } |
356 | |
357 // int fileRateEquivalent = int((offset / readRate) * fileRate); | |
157 // std::cerr << "offset = " << offset << std::endl; | 358 // std::cerr << "offset = " << offset << std::endl; |
158 } | 359 // std::cerr << "at file rate would be " << fileRateEquivalent << std::endl; |
159 | 360 |
160 for (int c = 0; c < channels; ++c) { | 361 // Previously our m4a test file had a fixed offset of 1024 |
161 float maxdiff = 0.f; | 362 // at the file sample rate -- this may be because it was |
162 int maxAt = 0; | 363 // produced by FAAC which did not write in the delay as |
163 float totdiff = 0.f; | 364 // metadata? We now have an m4a produced by Core Audio |
164 for (int i = 0; i < read - offset - discard && i < refFrames; ++i) { | 365 // which gives a 0 offset. What to do... |
165 float diff = fabsf(test[(i + offset) * channels + c] - | 366 |
166 reference[i * channels + c]); | 367 // Anyway, mp3s should have 0 offset in gapless mode and |
167 totdiff += diff; | 368 // "something else" otherwise. |
369 | |
370 if (gapless) { | |
371 if (format == "aac") { | |
372 // ouch! | |
373 if (offset == -1) offset = 0; | |
374 } | |
375 QCOMPARE(offset, 0); | |
376 } | |
377 } | |
378 | |
379 { | |
380 // Write the diff file now, so that it's already been written | |
381 // even if the comparison fails. We aren't checking anything | |
382 // here except as necessary to avoid buffer overruns etc | |
383 | |
384 QString diffFile = | |
385 testName(format, audiofile, rate, normalised, gapless); | |
386 diffFile.replace("/", "_"); | |
387 diffFile.replace(".", "_"); | |
388 diffFile.replace(" ", "_"); | |
389 diffFile += ".wav"; | |
390 diffFile = QDir(diffDir).filePath(diffFile); | |
391 WavFileWriter diffWriter(diffFile, readRate, channels, | |
392 WavFileWriter::WriteToTemporary); | |
393 QVERIFY(diffWriter.isOK()); | |
394 | |
395 vector<vector<float>> diffs(channels); | |
396 for (int c = 0; c < channels; ++c) { | |
397 for (int i = 0; i < refFrames; ++i) { | |
398 int ix = i + offset; | |
399 if (ix < read) { | |
400 float signeddiff = | |
401 test[ix * channels + c] - | |
402 reference[i * channels + c]; | |
403 diffs[c].push_back(signeddiff); | |
404 } | |
405 } | |
406 } | |
407 float **ptrs = new float*[channels]; | |
408 for (int c = 0; c < channels; ++c) { | |
409 ptrs[c] = diffs[c].data(); | |
410 } | |
411 diffWriter.writeSamples(ptrs, refFrames); | |
412 delete[] ptrs; | |
413 } | |
414 | |
415 for (int c = 0; c < channels; ++c) { | |
416 | |
417 double maxDiff = 0.0; | |
418 double totalDiff = 0.0; | |
419 double totalSqrDiff = 0.0; | |
420 int maxIndex = 0; | |
421 | |
422 for (int i = 0; i < refFrames; ++i) { | |
423 int ix = i + offset; | |
424 if (ix >= read) { | |
425 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; | |
426 QVERIFY(ix < read); | |
427 } | |
428 | |
429 if (ix + discard >= read) { | |
430 // we forgive the very edge samples when | |
431 // resampling (discard > 0) | |
432 continue; | |
433 } | |
434 | |
435 double diff = fabs(test[ix * channels + c] - | |
436 reference[i * channels + c]); | |
437 | |
438 totalDiff += diff; | |
439 totalSqrDiff += diff * diff; | |
440 | |
168 // in edge areas, record this only if it exceeds edgeLimit | 441 // in edge areas, record this only if it exceeds edgeLimit |
169 if (i < edgeSize || i + edgeSize >= read - offset) { | 442 if (i < edgeSize || i + edgeSize >= refFrames) { |
170 if (diff > edgeLimit && diff > maxdiff) { | 443 if (diff > edgeLimit && diff > maxDiff) { |
171 maxdiff = diff; | 444 maxDiff = diff; |
172 maxAt = i; | 445 maxIndex = i; |
173 } | 446 } |
174 } else { | 447 } else { |
175 if (diff > maxdiff) { | 448 if (diff > maxDiff) { |
176 maxdiff = diff; | 449 maxDiff = diff; |
177 maxAt = i; | 450 maxIndex = i; |
178 } | 451 } |
179 } | 452 } |
180 } | 453 } |
181 float meandiff = totdiff / float(read); | 454 |
182 // cerr << "meandiff on channel " << c << ": " << meandiff << endl; | 455 double meanDiff = totalDiff / double(refFrames); |
183 // cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl; | 456 double rmsDiff = sqrt(totalSqrDiff / double(refFrames)); |
184 if (meandiff >= limit) { | 457 |
185 cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl; | 458 /* |
186 QVERIFY(meandiff < limit); | 459 cerr << "channel " << c << ": mean diff " << meanDiff << endl; |
187 } | 460 cerr << "channel " << c << ": rms diff " << rmsDiff << endl; |
188 if (maxdiff >= limit) { | 461 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl; |
189 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl; | 462 */ |
190 QVERIFY(maxdiff < limit); | 463 if (rmsDiff >= rmsLimit) { |
191 } | 464 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl; |
465 QVERIFY(rmsDiff < rmsLimit); | |
466 } | |
467 if (maxDiff >= maxLimit) { | |
468 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl; | |
469 QVERIFY(maxDiff < maxLimit); | |
470 } | |
471 | |
472 // and check for spurious material at end | |
473 | |
474 for (sv_frame_t i = refFrames; i + offset < read; ++i) { | |
475 sv_frame_t ix = i + offset; | |
476 float quiet = 0.1f; //!!! allow some ringing - but let's come back to this, it should tail off | |
477 float mag = fabsf(test[ix * channels + c]); | |
478 if (mag > quiet) { | |
479 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl; | |
480 QVERIFY(mag < quiet); | |
481 } | |
482 } | |
192 } | 483 } |
193 } | 484 } |
194 }; | 485 }; |
195 | 486 |
196 #endif | 487 #endif |