Mercurial > hg > svcore
comparison data/fileio/test/AudioFileReaderTest.h @ 1313:ff9697592bef 3.0-integration
Add gapless preference to prefs dialog; much work on audio read tests
author | Chris Cannam |
---|---|
date | Thu, 01 Dec 2016 17:45:40 +0000 |
parents | 2e7fcdd5f627 |
children | 00cae2d5ee7e |
comparison
equal
deleted
inserted
replaced
1312:079e553dc16e | 1313:ff9697592bef |
---|---|
16 #ifndef TEST_AUDIO_FILE_READER_H | 16 #ifndef TEST_AUDIO_FILE_READER_H |
17 #define TEST_AUDIO_FILE_READER_H | 17 #define TEST_AUDIO_FILE_READER_H |
18 | 18 |
19 #include "../AudioFileReaderFactory.h" | 19 #include "../AudioFileReaderFactory.h" |
20 #include "../AudioFileReader.h" | 20 #include "../AudioFileReader.h" |
21 #include "../WavFileWriter.h" | |
21 | 22 |
22 #include "AudioTestData.h" | 23 #include "AudioTestData.h" |
23 | 24 |
24 #include <cmath> | 25 #include <cmath> |
25 | 26 |
30 #include <iostream> | 31 #include <iostream> |
31 | 32 |
32 using namespace std; | 33 using namespace std; |
33 | 34 |
34 static QString audioDir = "svcore/data/fileio/test/testfiles"; | 35 static QString audioDir = "svcore/data/fileio/test/testfiles"; |
36 static QString diffDir = "svcore/data/fileio/test/diffs"; | |
35 | 37 |
36 class AudioFileReaderTest : public QObject | 38 class AudioFileReaderTest : public QObject |
37 { | 39 { |
38 Q_OBJECT | 40 Q_OBJECT |
39 | 41 |
40 const char *strOf(QString s) { | 42 const char *strOf(QString s) { |
41 return strdup(s.toLocal8Bit().data()); | 43 return strdup(s.toLocal8Bit().data()); |
44 } | |
45 | |
46 void getFileMetadata(QString filename, | |
47 QString &extension, | |
48 sv_samplerate_t &rate, | |
49 int &channels, | |
50 int &bitdepth) { | |
51 | |
52 QStringList fileAndExt = filename.split("."); | |
53 QStringList bits = fileAndExt[0].split("-"); | |
54 | |
55 extension = fileAndExt[1]; | |
56 rate = bits[0].toInt(); | |
57 channels = bits[1].toInt(); | |
58 bitdepth = 16; | |
59 if (bits.length() > 2) { | |
60 bitdepth = bits[2].toInt(); | |
61 } | |
62 } | |
63 | |
64 void getExpectedThresholds(QString filename, | |
65 bool resampled, | |
66 bool gapless, | |
67 bool normalised, | |
68 double &maxLimit, | |
69 double &rmsLimit) { | |
70 | |
71 QString extension; | |
72 sv_samplerate_t fileRate; | |
73 int channels; | |
74 int bitdepth; | |
75 getFileMetadata(filename, extension, fileRate, channels, bitdepth); | |
76 | |
77 if (normalised) { | |
78 | |
79 if (extension == "ogg") { | |
80 | |
81 // Our ogg is not especially high quality and is | |
82 // actually further from the original if normalised | |
83 | |
84 maxLimit = 0.1; | |
85 rmsLimit = 0.03; | |
86 | |
87 } else if (extension == "m4a" || extension == "aac") { | |
88 | |
89 //!!! to be worked out | |
90 maxLimit = 1e-10; | |
91 rmsLimit = 1e-10; | |
92 | |
93 } else if (extension == "mp3") { | |
94 | |
95 if (resampled && !gapless) { | |
96 | |
97 // We expect worse figures here, because the | |
98 // combination of uncompensated encoder delay + | |
99 // resampling results in a fractional delay which | |
100 // means the decoded signal is slightly out of | |
101 // phase compared to the test signal | |
102 | |
103 maxLimit = 0.1; | |
104 rmsLimit = 0.05; | |
105 | |
106 } else { | |
107 | |
108 maxLimit = 0.05; | |
109 rmsLimit = 0.01; | |
110 } | |
111 | |
112 } else { | |
113 | |
114 // supposed to be lossless then (wav, aiff, flac) | |
115 | |
116 if (bitdepth >= 16 && !resampled) { | |
117 maxLimit = 1e-3; | |
118 rmsLimit = 3e-4; | |
119 } else { | |
120 maxLimit = 0.01; | |
121 rmsLimit = 5e-3; | |
122 } | |
123 } | |
124 | |
125 } else { // !normalised | |
126 | |
127 if (extension == "ogg") { | |
128 | |
129 maxLimit = 0.06; | |
130 rmsLimit = 0.03; | |
131 | |
132 } else if (extension == "m4a" || extension == "aac") { | |
133 | |
134 //!!! to be worked out | |
135 maxLimit = 1e-10; | |
136 rmsLimit = 1e-10; | |
137 | |
138 } else if (extension == "mp3") { | |
139 | |
140 // all mp3 figures are worse when not normalising | |
141 maxLimit = 0.1; | |
142 rmsLimit = 0.05; | |
143 | |
144 } else { | |
145 | |
146 // supposed to be lossless then (wav, aiff, flac) | |
147 | |
148 if (bitdepth >= 16 && !resampled) { | |
149 maxLimit = 1e-3; | |
150 rmsLimit = 3e-4; | |
151 } else { | |
152 maxLimit = 0.02; | |
153 rmsLimit = 0.01; | |
154 } | |
155 } | |
156 } | |
157 } | |
158 | |
159 QString testName(QString filename, int rate, bool norm, bool gapless) { | |
160 return QString("%1 at %2%3%4") | |
161 .arg(filename) | |
162 .arg(rate) | |
163 .arg(norm ? " normalised": "") | |
164 .arg(gapless ? "" : " non-gapless"); | |
42 } | 165 } |
43 | 166 |
44 private slots: | 167 private slots: |
45 void init() | 168 void init() |
46 { | 169 { |
47 if (!QDir(audioDir).exists()) { | 170 if (!QDir(audioDir).exists()) { |
48 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; | 171 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; |
49 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); | 172 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); |
50 } | 173 } |
174 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) { | |
175 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl; | |
176 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created"); | |
177 } | |
51 } | 178 } |
52 | 179 |
53 void read_data() | 180 void read_data() |
54 { | 181 { |
55 QTest::addColumn<QString>("audiofile"); | 182 QTest::addColumn<QString>("audiofile"); |
183 QTest::addColumn<int>("rate"); | |
184 QTest::addColumn<bool>("normalised"); | |
185 QTest::addColumn<bool>("gapless"); | |
56 QStringList files = QDir(audioDir).entryList(QDir::Files); | 186 QStringList files = QDir(audioDir).entryList(QDir::Files); |
187 int readRates[] = { 44100, 48000 }; | |
188 bool norms[] = { false, true }; | |
189 bool gaplesses[] = { true, false }; | |
57 foreach (QString filename, files) { | 190 foreach (QString filename, files) { |
58 QTest::newRow(strOf(filename)) << filename; | 191 for (int rate: readRates) { |
192 for (bool norm: norms) { | |
193 for (bool gapless: gaplesses) { | |
194 | |
195 if (QFileInfo(filename).suffix() != "mp3" && | |
196 !gapless) { | |
197 continue; | |
198 } | |
199 | |
200 QString desc = testName(filename, rate, norm, gapless); | |
201 | |
202 QTest::newRow(strOf(desc)) | |
203 << filename << rate << norm << gapless; | |
204 } | |
205 } | |
206 } | |
59 } | 207 } |
60 } | 208 } |
61 | 209 |
62 void read() | 210 void read() |
63 { | 211 { |
64 QFETCH(QString, audiofile); | 212 QFETCH(QString, audiofile); |
65 | 213 QFETCH(int, rate); |
66 sv_samplerate_t readRate = 48000; | 214 QFETCH(bool, normalised); |
215 QFETCH(bool, gapless); | |
216 | |
217 sv_samplerate_t readRate(rate); | |
218 | |
219 cerr << "\naudiofile = " << audiofile << endl; | |
220 | |
221 AudioFileReaderFactory::Parameters params; | |
222 params.targetRate = readRate; | |
223 params.normalisation = (normalised ? | |
224 AudioFileReaderFactory::Normalisation::Peak : | |
225 AudioFileReaderFactory::Normalisation::None); | |
226 params.gaplessMode = (gapless ? | |
227 AudioFileReaderFactory::GaplessMode::Gapless : | |
228 AudioFileReaderFactory::GaplessMode::Gappy); | |
67 | 229 |
68 AudioFileReader *reader = | 230 AudioFileReader *reader = |
69 AudioFileReaderFactory::createReader | 231 AudioFileReaderFactory::createReader |
70 (audioDir + "/" + audiofile, readRate); | 232 (audioDir + "/" + audiofile, params); |
71 | 233 |
72 QStringList fileAndExt = audiofile.split("."); | |
73 QStringList bits = fileAndExt[0].split("-"); | |
74 QString extension = fileAndExt[1]; | |
75 sv_samplerate_t nominalRate = bits[0].toInt(); | |
76 int nominalChannels = bits[1].toInt(); | |
77 int nominalDepth = 16; | |
78 if (bits.length() > 2) nominalDepth = bits[2].toInt(); | |
79 | |
80 if (!reader) { | 234 if (!reader) { |
81 #if ( QT_VERSION >= 0x050000 ) | 235 #if ( QT_VERSION >= 0x050000 ) |
82 QSKIP("Unsupported file, skipping"); | 236 QSKIP("Unsupported file, skipping"); |
83 #else | 237 #else |
84 QSKIP("Unsupported file, skipping", SkipSingle); | 238 QSKIP("Unsupported file, skipping", SkipSingle); |
85 #endif | 239 #endif |
86 } | 240 } |
87 | 241 |
88 QCOMPARE((int)reader->getChannelCount(), nominalChannels); | 242 QString extension; |
89 QCOMPARE(reader->getNativeRate(), nominalRate); | 243 sv_samplerate_t fileRate; |
244 int channels; | |
245 int fileBitdepth; | |
246 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth); | |
247 | |
248 QString diffFile = testName(audiofile, rate, normalised, gapless); | |
249 diffFile.replace(".", "_"); | |
250 diffFile.replace(" ", "_"); | |
251 diffFile += ".wav"; | |
252 diffFile = QDir(diffDir).filePath(diffFile); | |
253 WavFileWriter diffWriter(diffFile, readRate, channels, | |
254 WavFileWriter::WriteToTarget); //!!! NB WriteToTemporary not working, why? | |
255 QVERIFY(diffWriter.isOK()); | |
256 | |
257 QCOMPARE((int)reader->getChannelCount(), channels); | |
258 QCOMPARE(reader->getNativeRate(), fileRate); | |
90 QCOMPARE(reader->getSampleRate(), readRate); | 259 QCOMPARE(reader->getSampleRate(), readRate); |
91 | 260 |
92 int channels = reader->getChannelCount(); | |
93 AudioTestData tdata(readRate, channels); | 261 AudioTestData tdata(readRate, channels); |
94 | 262 |
95 float *reference = tdata.getInterleavedData(); | 263 float *reference = tdata.getInterleavedData(); |
96 sv_frame_t refFrames = tdata.getFrameCount(); | 264 sv_frame_t refFrames = tdata.getFrameCount(); |
97 | 265 |
101 // expected number back (if this is not mp3/aac) or (b) take | 269 // expected number back (if this is not mp3/aac) or (b) take |
102 // into account silence at beginning and end (if it is). | 270 // into account silence at beginning and end (if it is). |
103 vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); | 271 vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); |
104 sv_frame_t read = test.size() / channels; | 272 sv_frame_t read = test.size() / channels; |
105 | 273 |
106 if (extension == "mp3" || extension == "aac" || extension == "m4a") { | 274 bool perceptual = (extension == "mp3" || |
107 // mp3s and aacs can have silence at start and end | 275 extension == "aac" || |
276 extension == "m4a"); | |
277 | |
278 if (perceptual && !gapless) { | |
279 // allow silence at start and end | |
108 QVERIFY(read >= refFrames); | 280 QVERIFY(read >= refFrames); |
109 } else { | 281 } else { |
110 QCOMPARE(read, refFrames); | 282 QCOMPARE(read, refFrames); |
111 } | 283 } |
112 | 284 |
113 // Our limits are pretty relaxed -- we're not testing decoder | 285 bool resampled = readRate != fileRate; |
114 // or resampler quality here, just whether the results are | 286 double maxLimit, rmsLimit; |
115 // plainly wrong (e.g. at wrong samplerate or with an offset). | 287 getExpectedThresholds(audiofile, |
116 | 288 resampled, |
117 double maxLimit = 0.01; | 289 gapless, |
118 double meanLimit = 0.001; | 290 normalised, |
119 double edgeLimit = maxLimit * 10; // in first or final edgeSize frames | 291 maxLimit, rmsLimit); |
292 | |
293 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames | |
294 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1; | |
120 int edgeSize = 100; | 295 int edgeSize = 100; |
121 | 296 |
122 if (nominalDepth < 16) { | |
123 maxLimit = 0.02; | |
124 meanLimit = 0.02; | |
125 } else if (extension == "ogg" || extension == "mp3") { | |
126 maxLimit = 0.1; | |
127 meanLimit = 0.035; | |
128 edgeLimit = maxLimit * 3; | |
129 } else if (extension == "aac" || extension == "m4a") { | |
130 maxLimit = 0.3; // seems max diff can be quite large here | |
131 // even when mean is fairly small | |
132 meanLimit = 0.01; | |
133 edgeLimit = maxLimit * 3; | |
134 } | |
135 | |
136 // And we ignore completely the last few frames when upsampling | 297 // And we ignore completely the last few frames when upsampling |
137 int discard = 1 + int(round(readRate / nominalRate)); | 298 int discard = 1 + int(round(readRate / fileRate)); |
138 | 299 |
139 int offset = 0; | 300 int offset = 0; |
140 | 301 |
141 if (extension == "aac" || extension == "m4a") { | 302 if (perceptual) { |
142 // our m4a file appears to have a fixed offset of 1024 (at | 303 |
143 // file sample rate) | 304 // Look for an initial offset. What we're looking for is |
144 // offset = int(round((1024 / nominalRate) * readRate)); | |
145 offset = 0; | |
146 } | |
147 | |
148 if (extension == "mp3") { | |
149 // ...while mp3s appear to vary. What we're looking for is | |
150 // the first peak of the sinusoid in the first channel | 305 // the first peak of the sinusoid in the first channel |
151 // (since we may have only the one channel). This should | 306 // (since we may have only the one channel). This should |
152 // appear at 0.4ms (see AudioTestData.h) | 307 // appear at 0.4ms (see AudioTestData.h). |
308 | |
153 int expectedPeak = int(0.0004 * readRate); | 309 int expectedPeak = int(0.0004 * readRate); |
154 // std::cerr << "expectedPeak = " << expectedPeak << std::endl; | |
155 for (int i = 1; i < read; ++i) { | 310 for (int i = 1; i < read; ++i) { |
156 if (test[i * channels] > 0.8 && | 311 if (test[i * channels] > 0.8 && |
157 test[(i+1) * channels] < test[i * channels]) { | 312 test[(i+1) * channels] < test[i * channels]) { |
158 offset = i - expectedPeak - 1; | 313 offset = i - expectedPeak - 1; |
159 // std::cerr << "actual peak = " << i-1 << std::endl; | |
160 break; | 314 break; |
161 } | 315 } |
162 } | 316 } |
163 // std::cerr << "offset = " << offset << std::endl; | 317 |
164 } | 318 std::cerr << "offset = " << offset << std::endl; |
165 | 319 std::cerr << "at file rate would be " << (offset / readRate) * fileRate << std::endl; |
320 | |
321 // Previously our m4a test file had a fixed offset of 1024 | |
322 // at the file sample rate -- this may be because it was | |
323 // produced by FAAC which did not write in the delay as | |
324 // metadata? We now have an m4a produced by Core Audio | |
325 // which gives a 0 offset. What to do... | |
326 | |
327 // Anyway, mp3s should have 0 offset in gapless mode and | |
328 // "something else" otherwise. | |
329 | |
330 if (gapless) { | |
331 QCOMPARE(offset, 0); | |
332 } | |
333 } | |
334 | |
335 vector<vector<float>> diffs(channels); | |
336 | |
166 for (int c = 0; c < channels; ++c) { | 337 for (int c = 0; c < channels; ++c) { |
167 | 338 |
168 float maxdiff = 0.f; | 339 double maxDiff = 0.0; |
169 int maxAt = 0; | 340 double totalDiff = 0.0; |
170 float totdiff = 0.f; | 341 double totalSqrDiff = 0.0; |
171 | 342 int maxIndex = 0; |
343 | |
344 // cerr << "\nchannel " << c << ": "; | |
345 | |
172 for (int i = 0; i < refFrames; ++i) { | 346 for (int i = 0; i < refFrames; ++i) { |
173 int ix = i + offset; | 347 int ix = i + offset; |
174 if (ix >= read) { | 348 if (ix >= read) { |
175 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; | 349 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; |
176 QVERIFY(ix < read); | 350 QVERIFY(ix < read); |
177 } | 351 } |
352 | |
353 float signeddiff = | |
354 test[ix * channels + c] - | |
355 reference[i * channels + c]; | |
356 | |
357 diffs[c].push_back(signeddiff); | |
358 | |
178 if (ix + discard >= read) { | 359 if (ix + discard >= read) { |
179 // we forgive the very edge samples when | 360 // we forgive the very edge samples when |
180 // resampling (discard > 0) | 361 // resampling (discard > 0) |
181 continue; | 362 continue; |
182 } | 363 } |
183 float diff = fabsf(test[ix * channels + c] - | 364 |
184 reference[i * channels + c]); | 365 double diff = fabs(signeddiff); |
185 totdiff += diff; | 366 |
367 totalDiff += diff; | |
368 totalSqrDiff += diff * diff; | |
369 | |
186 // in edge areas, record this only if it exceeds edgeLimit | 370 // in edge areas, record this only if it exceeds edgeLimit |
187 if (i < edgeSize || i + edgeSize >= read - offset) { | 371 if (i < edgeSize || i + edgeSize >= refFrames) { |
188 if (diff > edgeLimit && diff > maxdiff) { | 372 if (diff > edgeLimit && diff > maxDiff) { |
189 maxdiff = diff; | 373 maxDiff = diff; |
190 maxAt = i; | 374 maxIndex = i; |
191 } | 375 } |
192 } else { | 376 } else { |
193 if (diff > maxdiff) { | 377 if (diff > maxDiff) { |
194 maxdiff = diff; | 378 maxDiff = diff; |
195 maxAt = i; | 379 maxIndex = i; |
196 } | 380 } |
197 } | 381 } |
198 } | 382 } |
199 | 383 |
200 // check for spurious material at end | 384 double meanDiff = totalDiff / double(refFrames); |
385 double rmsDiff = sqrt(totalSqrDiff / double(refFrames)); | |
386 | |
387 cerr << "channel " << c << ": mean diff " << meanDiff << endl; | |
388 cerr << "channel " << c << ": rms diff " << rmsDiff << endl; | |
389 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl; | |
390 | |
391 if (rmsDiff >= rmsLimit) { | |
392 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl; | |
393 QVERIFY(rmsDiff < rmsLimit); | |
394 } | |
395 if (maxDiff >= maxLimit) { | |
396 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl; | |
397 QVERIFY(maxDiff < maxLimit); | |
398 } | |
399 | |
400 // and check for spurious material at end | |
401 | |
201 for (sv_frame_t i = refFrames; i + offset < read; ++i) { | 402 for (sv_frame_t i = refFrames; i + offset < read; ++i) { |
202 sv_frame_t ix = i + offset; | 403 sv_frame_t ix = i + offset; |
203 float quiet = 1e-6f; | 404 float quiet = 0.1; //!!! allow some ringing - but let's come back to this, it should tail off |
204 float mag = fabsf(test[ix * channels + c]); | 405 float mag = fabsf(test[ix * channels + c]); |
205 if (mag > quiet) { | 406 if (mag > quiet) { |
206 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << ")" << endl; | 407 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl; |
207 QVERIFY(mag < quiet); | 408 QVERIFY(mag < quiet); |
208 } | 409 } |
209 } | 410 } |
210 | |
211 float meandiff = totdiff / float(read); | |
212 // cerr << "meandiff on channel " << c << ": " << meandiff << endl; | |
213 // cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl; | |
214 if (meandiff >= meanLimit) { | |
215 cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl; | |
216 QVERIFY(meandiff < meanLimit); | |
217 } | |
218 if (maxdiff >= maxLimit) { | |
219 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl; | |
220 QVERIFY(maxdiff < maxLimit); | |
221 } | |
222 } | 411 } |
412 | |
413 float **ptrs = new float*[channels]; | |
414 for (int c = 0; c < channels; ++c) { | |
415 ptrs[c] = diffs[c].data(); | |
416 } | |
417 diffWriter.writeSamples(ptrs, refFrames); | |
418 delete[] ptrs; | |
223 } | 419 } |
224 }; | 420 }; |
225 | 421 |
226 #endif | 422 #endif |