comparison data/fileio/test/AudioFileReaderTest.h @ 1313:ff9697592bef 3.0-integration

Add gapless preference to prefs dialog; much work on audio read tests
author Chris Cannam
date Thu, 01 Dec 2016 17:45:40 +0000
parents 2e7fcdd5f627
children 00cae2d5ee7e
comparison
equal deleted inserted replaced
1312:079e553dc16e 1313:ff9697592bef
16 #ifndef TEST_AUDIO_FILE_READER_H 16 #ifndef TEST_AUDIO_FILE_READER_H
17 #define TEST_AUDIO_FILE_READER_H 17 #define TEST_AUDIO_FILE_READER_H
18 18
19 #include "../AudioFileReaderFactory.h" 19 #include "../AudioFileReaderFactory.h"
20 #include "../AudioFileReader.h" 20 #include "../AudioFileReader.h"
21 #include "../WavFileWriter.h"
21 22
22 #include "AudioTestData.h" 23 #include "AudioTestData.h"
23 24
24 #include <cmath> 25 #include <cmath>
25 26
30 #include <iostream> 31 #include <iostream>
31 32
32 using namespace std; 33 using namespace std;
33 34
34 static QString audioDir = "svcore/data/fileio/test/testfiles"; 35 static QString audioDir = "svcore/data/fileio/test/testfiles";
36 static QString diffDir = "svcore/data/fileio/test/diffs";
35 37
36 class AudioFileReaderTest : public QObject 38 class AudioFileReaderTest : public QObject
37 { 39 {
38 Q_OBJECT 40 Q_OBJECT
39 41
40 const char *strOf(QString s) { 42 const char *strOf(QString s) {
41 return strdup(s.toLocal8Bit().data()); 43 return strdup(s.toLocal8Bit().data());
44 }
45
46 void getFileMetadata(QString filename,
47 QString &extension,
48 sv_samplerate_t &rate,
49 int &channels,
50 int &bitdepth) {
51
52 QStringList fileAndExt = filename.split(".");
53 QStringList bits = fileAndExt[0].split("-");
54
55 extension = fileAndExt[1];
56 rate = bits[0].toInt();
57 channels = bits[1].toInt();
58 bitdepth = 16;
59 if (bits.length() > 2) {
60 bitdepth = bits[2].toInt();
61 }
62 }
63
64 void getExpectedThresholds(QString filename,
65 bool resampled,
66 bool gapless,
67 bool normalised,
68 double &maxLimit,
69 double &rmsLimit) {
70
71 QString extension;
72 sv_samplerate_t fileRate;
73 int channels;
74 int bitdepth;
75 getFileMetadata(filename, extension, fileRate, channels, bitdepth);
76
77 if (normalised) {
78
79 if (extension == "ogg") {
80
81 // Our ogg is not especially high quality and is
82 // actually further from the original if normalised
83
84 maxLimit = 0.1;
85 rmsLimit = 0.03;
86
87 } else if (extension == "m4a" || extension == "aac") {
88
89 //!!! to be worked out
90 maxLimit = 1e-10;
91 rmsLimit = 1e-10;
92
93 } else if (extension == "mp3") {
94
95 if (resampled && !gapless) {
96
97 // We expect worse figures here, because the
98 // combination of uncompensated encoder delay +
99 // resampling results in a fractional delay which
100 // means the decoded signal is slightly out of
101 // phase compared to the test signal
102
103 maxLimit = 0.1;
104 rmsLimit = 0.05;
105
106 } else {
107
108 maxLimit = 0.05;
109 rmsLimit = 0.01;
110 }
111
112 } else {
113
114 // supposed to be lossless then (wav, aiff, flac)
115
116 if (bitdepth >= 16 && !resampled) {
117 maxLimit = 1e-3;
118 rmsLimit = 3e-4;
119 } else {
120 maxLimit = 0.01;
121 rmsLimit = 5e-3;
122 }
123 }
124
125 } else { // !normalised
126
127 if (extension == "ogg") {
128
129 maxLimit = 0.06;
130 rmsLimit = 0.03;
131
132 } else if (extension == "m4a" || extension == "aac") {
133
134 //!!! to be worked out
135 maxLimit = 1e-10;
136 rmsLimit = 1e-10;
137
138 } else if (extension == "mp3") {
139
140 // all mp3 figures are worse when not normalising
141 maxLimit = 0.1;
142 rmsLimit = 0.05;
143
144 } else {
145
146 // supposed to be lossless then (wav, aiff, flac)
147
148 if (bitdepth >= 16 && !resampled) {
149 maxLimit = 1e-3;
150 rmsLimit = 3e-4;
151 } else {
152 maxLimit = 0.02;
153 rmsLimit = 0.01;
154 }
155 }
156 }
157 }
158
159 QString testName(QString filename, int rate, bool norm, bool gapless) {
160 return QString("%1 at %2%3%4")
161 .arg(filename)
162 .arg(rate)
163 .arg(norm ? " normalised": "")
164 .arg(gapless ? "" : " non-gapless");
42 } 165 }
43 166
44 private slots: 167 private slots:
45 void init() 168 void init()
46 { 169 {
47 if (!QDir(audioDir).exists()) { 170 if (!QDir(audioDir).exists()) {
48 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl; 171 cerr << "ERROR: Audio test file directory \"" << audioDir << "\" does not exist" << endl;
49 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found"); 172 QVERIFY2(QDir(audioDir).exists(), "Audio test file directory not found");
50 } 173 }
174 if (!QDir(diffDir).exists() && !QDir().mkpath(diffDir)) {
175 cerr << "ERROR: Audio diff directory \"" << diffDir << "\" does not exist and could not be created" << endl;
176 QVERIFY2(QDir(diffDir).exists(), "Audio diff directory not found and could not be created");
177 }
51 } 178 }
52 179
53 void read_data() 180 void read_data()
54 { 181 {
55 QTest::addColumn<QString>("audiofile"); 182 QTest::addColumn<QString>("audiofile");
183 QTest::addColumn<int>("rate");
184 QTest::addColumn<bool>("normalised");
185 QTest::addColumn<bool>("gapless");
56 QStringList files = QDir(audioDir).entryList(QDir::Files); 186 QStringList files = QDir(audioDir).entryList(QDir::Files);
187 int readRates[] = { 44100, 48000 };
188 bool norms[] = { false, true };
189 bool gaplesses[] = { true, false };
57 foreach (QString filename, files) { 190 foreach (QString filename, files) {
58 QTest::newRow(strOf(filename)) << filename; 191 for (int rate: readRates) {
192 for (bool norm: norms) {
193 for (bool gapless: gaplesses) {
194
195 if (QFileInfo(filename).suffix() != "mp3" &&
196 !gapless) {
197 continue;
198 }
199
200 QString desc = testName(filename, rate, norm, gapless);
201
202 QTest::newRow(strOf(desc))
203 << filename << rate << norm << gapless;
204 }
205 }
206 }
59 } 207 }
60 } 208 }
61 209
62 void read() 210 void read()
63 { 211 {
64 QFETCH(QString, audiofile); 212 QFETCH(QString, audiofile);
65 213 QFETCH(int, rate);
66 sv_samplerate_t readRate = 48000; 214 QFETCH(bool, normalised);
215 QFETCH(bool, gapless);
216
217 sv_samplerate_t readRate(rate);
218
219 cerr << "\naudiofile = " << audiofile << endl;
220
221 AudioFileReaderFactory::Parameters params;
222 params.targetRate = readRate;
223 params.normalisation = (normalised ?
224 AudioFileReaderFactory::Normalisation::Peak :
225 AudioFileReaderFactory::Normalisation::None);
226 params.gaplessMode = (gapless ?
227 AudioFileReaderFactory::GaplessMode::Gapless :
228 AudioFileReaderFactory::GaplessMode::Gappy);
67 229
68 AudioFileReader *reader = 230 AudioFileReader *reader =
69 AudioFileReaderFactory::createReader 231 AudioFileReaderFactory::createReader
70 (audioDir + "/" + audiofile, readRate); 232 (audioDir + "/" + audiofile, params);
71 233
72 QStringList fileAndExt = audiofile.split(".");
73 QStringList bits = fileAndExt[0].split("-");
74 QString extension = fileAndExt[1];
75 sv_samplerate_t nominalRate = bits[0].toInt();
76 int nominalChannels = bits[1].toInt();
77 int nominalDepth = 16;
78 if (bits.length() > 2) nominalDepth = bits[2].toInt();
79
80 if (!reader) { 234 if (!reader) {
81 #if ( QT_VERSION >= 0x050000 ) 235 #if ( QT_VERSION >= 0x050000 )
82 QSKIP("Unsupported file, skipping"); 236 QSKIP("Unsupported file, skipping");
83 #else 237 #else
84 QSKIP("Unsupported file, skipping", SkipSingle); 238 QSKIP("Unsupported file, skipping", SkipSingle);
85 #endif 239 #endif
86 } 240 }
87 241
88 QCOMPARE((int)reader->getChannelCount(), nominalChannels); 242 QString extension;
89 QCOMPARE(reader->getNativeRate(), nominalRate); 243 sv_samplerate_t fileRate;
244 int channels;
245 int fileBitdepth;
246 getFileMetadata(audiofile, extension, fileRate, channels, fileBitdepth);
247
248 QString diffFile = testName(audiofile, rate, normalised, gapless);
249 diffFile.replace(".", "_");
250 diffFile.replace(" ", "_");
251 diffFile += ".wav";
252 diffFile = QDir(diffDir).filePath(diffFile);
253 WavFileWriter diffWriter(diffFile, readRate, channels,
254 WavFileWriter::WriteToTarget); //!!! NB WriteToTemporary not working, why?
255 QVERIFY(diffWriter.isOK());
256
257 QCOMPARE((int)reader->getChannelCount(), channels);
258 QCOMPARE(reader->getNativeRate(), fileRate);
90 QCOMPARE(reader->getSampleRate(), readRate); 259 QCOMPARE(reader->getSampleRate(), readRate);
91 260
92 int channels = reader->getChannelCount();
93 AudioTestData tdata(readRate, channels); 261 AudioTestData tdata(readRate, channels);
94 262
95 float *reference = tdata.getInterleavedData(); 263 float *reference = tdata.getInterleavedData();
96 sv_frame_t refFrames = tdata.getFrameCount(); 264 sv_frame_t refFrames = tdata.getFrameCount();
97 265
101 // expected number back (if this is not mp3/aac) or (b) take 269 // expected number back (if this is not mp3/aac) or (b) take
102 // into account silence at beginning and end (if it is). 270 // into account silence at beginning and end (if it is).
103 vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000); 271 vector<float> test = reader->getInterleavedFrames(0, refFrames + 5000);
104 sv_frame_t read = test.size() / channels; 272 sv_frame_t read = test.size() / channels;
105 273
106 if (extension == "mp3" || extension == "aac" || extension == "m4a") { 274 bool perceptual = (extension == "mp3" ||
107 // mp3s and aacs can have silence at start and end 275 extension == "aac" ||
276 extension == "m4a");
277
278 if (perceptual && !gapless) {
279 // allow silence at start and end
108 QVERIFY(read >= refFrames); 280 QVERIFY(read >= refFrames);
109 } else { 281 } else {
110 QCOMPARE(read, refFrames); 282 QCOMPARE(read, refFrames);
111 } 283 }
112 284
113 // Our limits are pretty relaxed -- we're not testing decoder 285 bool resampled = readRate != fileRate;
114 // or resampler quality here, just whether the results are 286 double maxLimit, rmsLimit;
115 // plainly wrong (e.g. at wrong samplerate or with an offset). 287 getExpectedThresholds(audiofile,
116 288 resampled,
117 double maxLimit = 0.01; 289 gapless,
118 double meanLimit = 0.001; 290 normalised,
119 double edgeLimit = maxLimit * 10; // in first or final edgeSize frames 291 maxLimit, rmsLimit);
292
293 double edgeLimit = maxLimit * 3; // in first or final edgeSize frames
294 if (resampled && edgeLimit < 0.1) edgeLimit = 0.1;
120 int edgeSize = 100; 295 int edgeSize = 100;
121 296
122 if (nominalDepth < 16) {
123 maxLimit = 0.02;
124 meanLimit = 0.02;
125 } else if (extension == "ogg" || extension == "mp3") {
126 maxLimit = 0.1;
127 meanLimit = 0.035;
128 edgeLimit = maxLimit * 3;
129 } else if (extension == "aac" || extension == "m4a") {
130 maxLimit = 0.3; // seems max diff can be quite large here
131 // even when mean is fairly small
132 meanLimit = 0.01;
133 edgeLimit = maxLimit * 3;
134 }
135
136 // And we ignore completely the last few frames when upsampling 297 // And we ignore completely the last few frames when upsampling
137 int discard = 1 + int(round(readRate / nominalRate)); 298 int discard = 1 + int(round(readRate / fileRate));
138 299
139 int offset = 0; 300 int offset = 0;
140 301
141 if (extension == "aac" || extension == "m4a") { 302 if (perceptual) {
142 // our m4a file appears to have a fixed offset of 1024 (at 303
143 // file sample rate) 304 // Look for an initial offset. What we're looking for is
144 // offset = int(round((1024 / nominalRate) * readRate));
145 offset = 0;
146 }
147
148 if (extension == "mp3") {
149 // ...while mp3s appear to vary. What we're looking for is
150 // the first peak of the sinusoid in the first channel 305 // the first peak of the sinusoid in the first channel
151 // (since we may have only the one channel). This should 306 // (since we may have only the one channel). This should
152 // appear at 0.4ms (see AudioTestData.h) 307 // appear at 0.4ms (see AudioTestData.h).
308
153 int expectedPeak = int(0.0004 * readRate); 309 int expectedPeak = int(0.0004 * readRate);
154 // std::cerr << "expectedPeak = " << expectedPeak << std::endl;
155 for (int i = 1; i < read; ++i) { 310 for (int i = 1; i < read; ++i) {
156 if (test[i * channels] > 0.8 && 311 if (test[i * channels] > 0.8 &&
157 test[(i+1) * channels] < test[i * channels]) { 312 test[(i+1) * channels] < test[i * channels]) {
158 offset = i - expectedPeak - 1; 313 offset = i - expectedPeak - 1;
159 // std::cerr << "actual peak = " << i-1 << std::endl;
160 break; 314 break;
161 } 315 }
162 } 316 }
163 // std::cerr << "offset = " << offset << std::endl; 317
164 } 318 std::cerr << "offset = " << offset << std::endl;
165 319 std::cerr << "at file rate would be " << (offset / readRate) * fileRate << std::endl;
320
321 // Previously our m4a test file had a fixed offset of 1024
322 // at the file sample rate -- this may be because it was
323 // produced by FAAC which did not write in the delay as
324 // metadata? We now have an m4a produced by Core Audio
325 // which gives a 0 offset. What to do...
326
327 // Anyway, mp3s should have 0 offset in gapless mode and
328 // "something else" otherwise.
329
330 if (gapless) {
331 QCOMPARE(offset, 0);
332 }
333 }
334
335 vector<vector<float>> diffs(channels);
336
166 for (int c = 0; c < channels; ++c) { 337 for (int c = 0; c < channels; ++c) {
167 338
168 float maxdiff = 0.f; 339 double maxDiff = 0.0;
169 int maxAt = 0; 340 double totalDiff = 0.0;
170 float totdiff = 0.f; 341 double totalSqrDiff = 0.0;
171 342 int maxIndex = 0;
343
344 // cerr << "\nchannel " << c << ": ";
345
172 for (int i = 0; i < refFrames; ++i) { 346 for (int i = 0; i < refFrames; ++i) {
173 int ix = i + offset; 347 int ix = i + offset;
174 if (ix >= read) { 348 if (ix >= read) {
175 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; 349 cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl;
176 QVERIFY(ix < read); 350 QVERIFY(ix < read);
177 } 351 }
352
353 float signeddiff =
354 test[ix * channels + c] -
355 reference[i * channels + c];
356
357 diffs[c].push_back(signeddiff);
358
178 if (ix + discard >= read) { 359 if (ix + discard >= read) {
179 // we forgive the very edge samples when 360 // we forgive the very edge samples when
180 // resampling (discard > 0) 361 // resampling (discard > 0)
181 continue; 362 continue;
182 } 363 }
183 float diff = fabsf(test[ix * channels + c] - 364
184 reference[i * channels + c]); 365 double diff = fabs(signeddiff);
185 totdiff += diff; 366
367 totalDiff += diff;
368 totalSqrDiff += diff * diff;
369
186 // in edge areas, record this only if it exceeds edgeLimit 370 // in edge areas, record this only if it exceeds edgeLimit
187 if (i < edgeSize || i + edgeSize >= read - offset) { 371 if (i < edgeSize || i + edgeSize >= refFrames) {
188 if (diff > edgeLimit && diff > maxdiff) { 372 if (diff > edgeLimit && diff > maxDiff) {
189 maxdiff = diff; 373 maxDiff = diff;
190 maxAt = i; 374 maxIndex = i;
191 } 375 }
192 } else { 376 } else {
193 if (diff > maxdiff) { 377 if (diff > maxDiff) {
194 maxdiff = diff; 378 maxDiff = diff;
195 maxAt = i; 379 maxIndex = i;
196 } 380 }
197 } 381 }
198 } 382 }
199 383
200 // check for spurious material at end 384 double meanDiff = totalDiff / double(refFrames);
385 double rmsDiff = sqrt(totalSqrDiff / double(refFrames));
386
387 cerr << "channel " << c << ": mean diff " << meanDiff << endl;
388 cerr << "channel " << c << ": rms diff " << rmsDiff << endl;
389 cerr << "channel " << c << ": max diff " << maxDiff << " at " << maxIndex << endl;
390
391 if (rmsDiff >= rmsLimit) {
392 cerr << "ERROR: for audiofile " << audiofile << ": RMS diff = " << rmsDiff << " for channel " << c << " (limit = " << rmsLimit << ")" << endl;
393 QVERIFY(rmsDiff < rmsLimit);
394 }
395 if (maxDiff >= maxLimit) {
396 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxDiff << " at frame " << maxIndex << " of " << read << " on channel " << c << " (limit = " << maxLimit << ", edge limit = " << edgeLimit << ", mean diff = " << meanDiff << ", rms = " << rmsDiff << ")" << endl;
397 QVERIFY(maxDiff < maxLimit);
398 }
399
400 // and check for spurious material at end
401
201 for (sv_frame_t i = refFrames; i + offset < read; ++i) { 402 for (sv_frame_t i = refFrames; i + offset < read; ++i) {
202 sv_frame_t ix = i + offset; 403 sv_frame_t ix = i + offset;
203 float quiet = 1e-6f; 404 float quiet = 0.1; //!!! allow some ringing - but let's come back to this, it should tail off
204 float mag = fabsf(test[ix * channels + c]); 405 float mag = fabsf(test[ix * channels + c]);
205 if (mag > quiet) { 406 if (mag > quiet) {
206 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << ")" << endl; 407 cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << " after reference+offset ended at " << refFrames+offset << ")" << endl;
207 QVERIFY(mag < quiet); 408 QVERIFY(mag < quiet);
208 } 409 }
209 } 410 }
210
211 float meandiff = totdiff / float(read);
212 // cerr << "meandiff on channel " << c << ": " << meandiff << endl;
213 // cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl;
214 if (meandiff >= meanLimit) {
215 cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl;
216 QVERIFY(meandiff < meanLimit);
217 }
218 if (maxdiff >= maxLimit) {
219 cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl;
220 QVERIFY(maxdiff < maxLimit);
221 }
222 } 411 }
412
413 float **ptrs = new float*[channels];
414 for (int c = 0; c < channels; ++c) {
415 ptrs[c] = diffs[c].data();
416 }
417 diffWriter.writeSamples(ptrs, refFrames);
418 delete[] ptrs;
223 } 419 }
224 }; 420 };
225 421
226 #endif 422 #endif