Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.5/libbench2/verify-lib.c @ 42:2cd0e3b3e1fd
Current fftw source
author | Chris Cannam |
---|---|
date | Tue, 18 Oct 2016 13:40:26 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
41:481f5f8c5634 | 42:2cd0e3b3e1fd |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-14 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 | |
22 #include "verify.h" | |
23 #include <math.h> | |
24 #include <stdlib.h> | |
25 #include <stdio.h> | |
26 | |
27 /* | |
28 * Utility functions: | |
29 */ | |
30 static double dabs(double x) { return (x < 0.0) ? -x : x; } | |
31 static double dmin(double x, double y) { return (x < y) ? x : y; } | |
32 static double norm2(double x, double y) { return dmax(dabs(x), dabs(y)); } | |
33 | |
34 double dmax(double x, double y) { return (x > y) ? x : y; } | |
35 | |
36 static double aerror(C *a, C *b, int n) | |
37 { | |
38 if (n > 0) { | |
39 /* compute the relative Linf error */ | |
40 double e = 0.0, mag = 0.0; | |
41 int i; | |
42 | |
43 for (i = 0; i < n; ++i) { | |
44 e = dmax(e, norm2(c_re(a[i]) - c_re(b[i]), | |
45 c_im(a[i]) - c_im(b[i]))); | |
46 mag = dmax(mag, | |
47 dmin(norm2(c_re(a[i]), c_im(a[i])), | |
48 norm2(c_re(b[i]), c_im(b[i])))); | |
49 } | |
50 e /= mag; | |
51 | |
52 #ifdef HAVE_ISNAN | |
53 BENCH_ASSERT(!isnan(e)); | |
54 #endif | |
55 return e; | |
56 } else | |
57 return 0.0; | |
58 } | |
59 | |
60 #ifdef HAVE_DRAND48 | |
61 # if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48 | |
62 extern double drand48(void); | |
63 # endif | |
64 double mydrand(void) | |
65 { | |
66 return drand48() - 0.5; | |
67 } | |
68 #else | |
69 double mydrand(void) | |
70 { | |
71 double d = rand(); | |
72 return (d / (double) RAND_MAX) - 0.5; | |
73 } | |
74 #endif | |
75 | |
76 void arand(C *a, int n) | |
77 { | |
78 int i; | |
79 | |
80 /* generate random inputs */ | |
81 for (i = 0; i < n; ++i) { | |
82 c_re(a[i]) = mydrand(); | |
83 c_im(a[i]) = mydrand(); | |
84 } | |
85 } | |
86 | |
87 /* make array real */ | |
88 void mkreal(C *A, int n) | |
89 { | |
90 int i; | |
91 | |
92 for (i = 0; i < n; ++i) { | |
93 c_im(A[i]) = 0.0; | |
94 } | |
95 } | |
96 | |
97 static void assign_conj(C *Ac, C *A, int rank, const bench_iodim *dim, int stride) | |
98 { | |
99 if (rank == 0) { | |
100 c_re(*Ac) = c_re(*A); | |
101 c_im(*Ac) = -c_im(*A); | |
102 } | |
103 else { | |
104 int i, n0 = dim[rank - 1].n, s = stride; | |
105 rank -= 1; | |
106 stride *= n0; | |
107 assign_conj(Ac, A, rank, dim, stride); | |
108 for (i = 1; i < n0; ++i) | |
109 assign_conj(Ac + (n0 - i) * s, A + i * s, rank, dim, stride); | |
110 } | |
111 } | |
112 | |
113 /* make array hermitian */ | |
114 void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride) | |
115 { | |
116 if (rank == 0) | |
117 c_im(*A) = 0.0; | |
118 else { | |
119 int i, n0 = dim[rank - 1].n, s = stride; | |
120 rank -= 1; | |
121 stride *= n0; | |
122 mkhermitian(A, rank, dim, stride); | |
123 for (i = 1; 2*i < n0; ++i) | |
124 assign_conj(A + (n0 - i) * s, A + i * s, rank, dim, stride); | |
125 if (2*i == n0) | |
126 mkhermitian(A + i * s, rank, dim, stride); | |
127 } | |
128 } | |
129 | |
130 void mkhermitian1(C *a, int n) | |
131 { | |
132 bench_iodim d; | |
133 | |
134 d.n = n; | |
135 d.is = d.os = 1; | |
136 mkhermitian(a, 1, &d, 1); | |
137 } | |
138 | |
139 /* C = A */ | |
140 void acopy(C *c, C *a, int n) | |
141 { | |
142 int i; | |
143 | |
144 for (i = 0; i < n; ++i) { | |
145 c_re(c[i]) = c_re(a[i]); | |
146 c_im(c[i]) = c_im(a[i]); | |
147 } | |
148 } | |
149 | |
150 /* C = A + B */ | |
151 void aadd(C *c, C *a, C *b, int n) | |
152 { | |
153 int i; | |
154 | |
155 for (i = 0; i < n; ++i) { | |
156 c_re(c[i]) = c_re(a[i]) + c_re(b[i]); | |
157 c_im(c[i]) = c_im(a[i]) + c_im(b[i]); | |
158 } | |
159 } | |
160 | |
161 /* C = A - B */ | |
162 void asub(C *c, C *a, C *b, int n) | |
163 { | |
164 int i; | |
165 | |
166 for (i = 0; i < n; ++i) { | |
167 c_re(c[i]) = c_re(a[i]) - c_re(b[i]); | |
168 c_im(c[i]) = c_im(a[i]) - c_im(b[i]); | |
169 } | |
170 } | |
171 | |
172 /* B = rotate left A (complex) */ | |
173 void arol(C *b, C *a, int n, int nb, int na) | |
174 { | |
175 int i, ib, ia; | |
176 | |
177 for (ib = 0; ib < nb; ++ib) { | |
178 for (i = 0; i < n - 1; ++i) | |
179 for (ia = 0; ia < na; ++ia) { | |
180 C *pb = b + (ib * n + i) * na + ia; | |
181 C *pa = a + (ib * n + i + 1) * na + ia; | |
182 c_re(*pb) = c_re(*pa); | |
183 c_im(*pb) = c_im(*pa); | |
184 } | |
185 | |
186 for (ia = 0; ia < na; ++ia) { | |
187 C *pb = b + (ib * n + n - 1) * na + ia; | |
188 C *pa = a + ib * n * na + ia; | |
189 c_re(*pb) = c_re(*pa); | |
190 c_im(*pb) = c_im(*pa); | |
191 } | |
192 } | |
193 } | |
194 | |
195 void aphase_shift(C *b, C *a, int n, int nb, int na, double sign) | |
196 { | |
197 int j, jb, ja; | |
198 trigreal twopin; | |
199 twopin = K2PI / n; | |
200 | |
201 for (jb = 0; jb < nb; ++jb) | |
202 for (j = 0; j < n; ++j) { | |
203 trigreal s = sign * SIN(j * twopin); | |
204 trigreal c = COS(j * twopin); | |
205 | |
206 for (ja = 0; ja < na; ++ja) { | |
207 int k = (jb * n + j) * na + ja; | |
208 c_re(b[k]) = c_re(a[k]) * c - c_im(a[k]) * s; | |
209 c_im(b[k]) = c_re(a[k]) * s + c_im(a[k]) * c; | |
210 } | |
211 } | |
212 } | |
213 | |
214 /* A = alpha * A (complex, in place) */ | |
215 void ascale(C *a, C alpha, int n) | |
216 { | |
217 int i; | |
218 | |
219 for (i = 0; i < n; ++i) { | |
220 R xr = c_re(a[i]), xi = c_im(a[i]); | |
221 c_re(a[i]) = xr * c_re(alpha) - xi * c_im(alpha); | |
222 c_im(a[i]) = xr * c_im(alpha) + xi * c_re(alpha); | |
223 } | |
224 } | |
225 | |
226 | |
227 double acmp(C *a, C *b, int n, const char *test, double tol) | |
228 { | |
229 double d = aerror(a, b, n); | |
230 if (d > tol) { | |
231 ovtpvt_err("Found relative error %e (%s)\n", d, test); | |
232 | |
233 { | |
234 int i, N; | |
235 N = n > 300 && verbose <= 2 ? 300 : n; | |
236 for (i = 0; i < N; ++i) | |
237 ovtpvt_err("%8d %16.12f %16.12f %16.12f %16.12f\n", i, | |
238 (double) c_re(a[i]), (double) c_im(a[i]), | |
239 (double) c_re(b[i]), (double) c_im(b[i])); | |
240 } | |
241 | |
242 bench_exit(EXIT_FAILURE); | |
243 } | |
244 return d; | |
245 } | |
246 | |
247 | |
248 /* | |
249 * Implementation of the FFT tester described in | |
250 * | |
251 * Funda Ergün. Testing multivariate linear functions: Overcoming the | |
252 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual | |
253 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, | |
254 * Nevada, 29 May--1 June 1995. | |
255 * | |
256 * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without | |
257 * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000). | |
258 */ | |
259 | |
260 static double impulse0(dofft_closure *k, | |
261 int n, int vecn, | |
262 C *inA, C *inB, C *inC, | |
263 C *outA, C *outB, C *outC, | |
264 C *tmp, int rounds, double tol) | |
265 { | |
266 int N = n * vecn; | |
267 double e = 0.0; | |
268 int j; | |
269 | |
270 k->apply(k, inA, tmp); | |
271 e = dmax(e, acmp(tmp, outA, N, "impulse 1", tol)); | |
272 | |
273 for (j = 0; j < rounds; ++j) { | |
274 arand(inB, N); | |
275 asub(inC, inA, inB, N); | |
276 k->apply(k, inB, outB); | |
277 k->apply(k, inC, outC); | |
278 aadd(tmp, outB, outC, N); | |
279 e = dmax(e, acmp(tmp, outA, N, "impulse", tol)); | |
280 } | |
281 return e; | |
282 } | |
283 | |
284 double impulse(dofft_closure *k, | |
285 int n, int vecn, | |
286 C *inA, C *inB, C *inC, | |
287 C *outA, C *outB, C *outC, | |
288 C *tmp, int rounds, double tol) | |
289 { | |
290 int i, j; | |
291 double e = 0.0; | |
292 | |
293 /* check impulsive input */ | |
294 for (i = 0; i < vecn; ++i) { | |
295 R x = (sqrt(n)*(i+1)) / (double)(vecn+1); | |
296 for (j = 0; j < n; ++j) { | |
297 c_re(inA[j + i * n]) = 0; | |
298 c_im(inA[j + i * n]) = 0; | |
299 c_re(outA[j + i * n]) = x; | |
300 c_im(outA[j + i * n]) = 0; | |
301 } | |
302 c_re(inA[i * n]) = x; | |
303 c_im(inA[i * n]) = 0; | |
304 } | |
305 | |
306 e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, | |
307 tmp, rounds, tol)); | |
308 | |
309 /* check constant input */ | |
310 for (i = 0; i < vecn; ++i) { | |
311 R x = (i+1) / ((double)(vecn+1) * sqrt(n)); | |
312 for (j = 0; j < n; ++j) { | |
313 c_re(inA[j + i * n]) = x; | |
314 c_im(inA[j + i * n]) = 0; | |
315 c_re(outA[j + i * n]) = 0; | |
316 c_im(outA[j + i * n]) = 0; | |
317 } | |
318 c_re(outA[i * n]) = n * x; | |
319 c_im(outA[i * n]) = 0; | |
320 } | |
321 | |
322 e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, | |
323 tmp, rounds, tol)); | |
324 return e; | |
325 } | |
326 | |
327 double linear(dofft_closure *k, int realp, | |
328 int n, C *inA, C *inB, C *inC, C *outA, | |
329 C *outB, C *outC, C *tmp, int rounds, double tol) | |
330 { | |
331 int j; | |
332 double e = 0.0; | |
333 | |
334 for (j = 0; j < rounds; ++j) { | |
335 C alpha, beta; | |
336 c_re(alpha) = mydrand(); | |
337 c_im(alpha) = realp ? 0.0 : mydrand(); | |
338 c_re(beta) = mydrand(); | |
339 c_im(beta) = realp ? 0.0 : mydrand(); | |
340 arand(inA, n); | |
341 arand(inB, n); | |
342 k->apply(k, inA, outA); | |
343 k->apply(k, inB, outB); | |
344 | |
345 ascale(outA, alpha, n); | |
346 ascale(outB, beta, n); | |
347 aadd(tmp, outA, outB, n); | |
348 ascale(inA, alpha, n); | |
349 ascale(inB, beta, n); | |
350 aadd(inC, inA, inB, n); | |
351 k->apply(k, inC, outC); | |
352 | |
353 e = dmax(e, acmp(outC, tmp, n, "linear", tol)); | |
354 } | |
355 return e; | |
356 } | |
357 | |
358 | |
359 | |
360 double tf_shift(dofft_closure *k, | |
361 int realp, const bench_tensor *sz, | |
362 int n, int vecn, double sign, | |
363 C *inA, C *inB, C *outA, C *outB, C *tmp, | |
364 int rounds, double tol, int which_shift) | |
365 { | |
366 int nb, na, dim, N = n * vecn; | |
367 int i, j; | |
368 double e = 0.0; | |
369 | |
370 /* test 3: check the time-shift property */ | |
371 /* the paper performs more tests, but this code should be fine too */ | |
372 | |
373 nb = 1; | |
374 na = n; | |
375 | |
376 /* check shifts across all SZ dimensions */ | |
377 for (dim = 0; dim < sz->rnk; ++dim) { | |
378 int ncur = sz->dims[dim].n; | |
379 | |
380 na /= ncur; | |
381 | |
382 for (j = 0; j < rounds; ++j) { | |
383 arand(inA, N); | |
384 | |
385 if (which_shift == TIME_SHIFT) { | |
386 for (i = 0; i < vecn; ++i) { | |
387 if (realp) mkreal(inA + i * n, n); | |
388 arol(inB + i * n, inA + i * n, ncur, nb, na); | |
389 } | |
390 k->apply(k, inA, outA); | |
391 k->apply(k, inB, outB); | |
392 for (i = 0; i < vecn; ++i) | |
393 aphase_shift(tmp + i * n, outB + i * n, ncur, | |
394 nb, na, sign); | |
395 e = dmax(e, acmp(tmp, outA, N, "time shift", tol)); | |
396 } else { | |
397 for (i = 0; i < vecn; ++i) { | |
398 if (realp) | |
399 mkhermitian(inA + i * n, sz->rnk, sz->dims, 1); | |
400 aphase_shift(inB + i * n, inA + i * n, ncur, | |
401 nb, na, -sign); | |
402 } | |
403 k->apply(k, inA, outA); | |
404 k->apply(k, inB, outB); | |
405 for (i = 0; i < vecn; ++i) | |
406 arol(tmp + i * n, outB + i * n, ncur, nb, na); | |
407 e = dmax(e, acmp(tmp, outA, N, "freq shift", tol)); | |
408 } | |
409 } | |
410 | |
411 nb *= ncur; | |
412 } | |
413 return e; | |
414 } | |
415 | |
416 | |
417 void preserves_input(dofft_closure *k, aconstrain constrain, | |
418 int n, C *inA, C *inB, C *outB, int rounds) | |
419 { | |
420 int j; | |
421 int recopy_input = k->recopy_input; | |
422 | |
423 k->recopy_input = 1; | |
424 for (j = 0; j < rounds; ++j) { | |
425 arand(inA, n); | |
426 if (constrain) | |
427 constrain(inA, n); | |
428 | |
429 acopy(inB, inA, n); | |
430 k->apply(k, inB, outB); | |
431 acmp(inB, inA, n, "preserves_input", 0.0); | |
432 } | |
433 k->recopy_input = recopy_input; | |
434 } | |
435 | |
436 | |
437 /* Make a copy of the size tensor, with the same dimensions, but with | |
438 the strides corresponding to a "packed" row-major array with the | |
439 given stride. */ | |
440 bench_tensor *verify_pack(const bench_tensor *sz, int s) | |
441 { | |
442 bench_tensor *x = tensor_copy(sz); | |
443 if (BENCH_FINITE_RNK(x->rnk) && x->rnk > 0) { | |
444 int i; | |
445 x->dims[x->rnk - 1].is = s; | |
446 x->dims[x->rnk - 1].os = s; | |
447 for (i = x->rnk - 1; i > 0; --i) { | |
448 x->dims[i - 1].is = x->dims[i].is * x->dims[i].n; | |
449 x->dims[i - 1].os = x->dims[i].os * x->dims[i].n; | |
450 } | |
451 } | |
452 return x; | |
453 } | |
454 | |
455 static int all_zero(C *a, int n) | |
456 { | |
457 int i; | |
458 for (i = 0; i < n; ++i) | |
459 if (c_re(a[i]) != 0.0 || c_im(a[i]) != 0.0) | |
460 return 0; | |
461 return 1; | |
462 } | |
463 | |
464 static int one_accuracy_test(dofft_closure *k, aconstrain constrain, | |
465 int sign, int n, C *a, C *b, | |
466 double t[6]) | |
467 { | |
468 double err[6]; | |
469 | |
470 if (constrain) | |
471 constrain(a, n); | |
472 | |
473 if (all_zero(a, n)) | |
474 return 0; | |
475 | |
476 k->apply(k, a, b); | |
477 fftaccuracy(n, a, b, sign, err); | |
478 | |
479 t[0] += err[0]; | |
480 t[1] += err[1] * err[1]; | |
481 t[2] = dmax(t[2], err[2]); | |
482 t[3] += err[3]; | |
483 t[4] += err[4] * err[4]; | |
484 t[5] = dmax(t[5], err[5]); | |
485 | |
486 return 1; | |
487 } | |
488 | |
489 void accuracy_test(dofft_closure *k, aconstrain constrain, | |
490 int sign, int n, C *a, C *b, int rounds, int impulse_rounds, | |
491 double t[6]) | |
492 { | |
493 int r, i; | |
494 int ntests = 0; | |
495 bench_complex czero = {0, 0}; | |
496 | |
497 for (i = 0; i < 6; ++i) t[i] = 0.0; | |
498 | |
499 for (r = 0; r < rounds; ++r) { | |
500 arand(a, n); | |
501 if (one_accuracy_test(k, constrain, sign, n, a, b, t)) | |
502 ++ntests; | |
503 } | |
504 | |
505 /* impulses at beginning of array */ | |
506 for (r = 0; r < impulse_rounds; ++r) { | |
507 if (r > n - r - 1) | |
508 continue; | |
509 | |
510 caset(a, n, czero); | |
511 c_re(a[r]) = c_im(a[r]) = 1.0; | |
512 | |
513 if (one_accuracy_test(k, constrain, sign, n, a, b, t)) | |
514 ++ntests; | |
515 } | |
516 | |
517 /* impulses at end of array */ | |
518 for (r = 0; r < impulse_rounds; ++r) { | |
519 if (r <= n - r - 1) | |
520 continue; | |
521 | |
522 caset(a, n, czero); | |
523 c_re(a[n - r - 1]) = c_im(a[n - r - 1]) = 1.0; | |
524 | |
525 if (one_accuracy_test(k, constrain, sign, n, a, b, t)) | |
526 ++ntests; | |
527 } | |
528 | |
529 /* randomly-located impulses */ | |
530 for (r = 0; r < impulse_rounds; ++r) { | |
531 caset(a, n, czero); | |
532 i = rand() % n; | |
533 c_re(a[i]) = c_im(a[i]) = 1.0; | |
534 | |
535 if (one_accuracy_test(k, constrain, sign, n, a, b, t)) | |
536 ++ntests; | |
537 } | |
538 | |
539 t[0] /= ntests; | |
540 t[1] = sqrt(t[1] / ntests); | |
541 t[3] /= ntests; | |
542 t[4] = sqrt(t[4] / ntests); | |
543 | |
544 fftaccuracy_done(); | |
545 } |