cannam@167
|
1 /*
|
cannam@167
|
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
|
cannam@167
|
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
cannam@167
|
4 *
|
cannam@167
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@167
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@167
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@167
|
8 * (at your option) any later version.
|
cannam@167
|
9 *
|
cannam@167
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@167
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@167
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@167
|
13 * GNU General Public License for more details.
|
cannam@167
|
14 *
|
cannam@167
|
15 * You should have received a copy of the GNU General Public License
|
cannam@167
|
16 * along with this program; if not, write to the Free Software
|
cannam@167
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@167
|
18 *
|
cannam@167
|
19 */
|
cannam@167
|
20
|
cannam@167
|
21
|
cannam@167
|
22 #include "verify.h"
|
cannam@167
|
23 #include <math.h>
|
cannam@167
|
24 #include <stdlib.h>
|
cannam@167
|
25 #include <stdio.h>
|
cannam@167
|
26
|
cannam@167
|
27 /*
|
cannam@167
|
28 * Utility functions:
|
cannam@167
|
29 */
|
cannam@167
|
30 static double dabs(double x) { return (x < 0.0) ? -x : x; }
|
cannam@167
|
31 static double dmin(double x, double y) { return (x < y) ? x : y; }
|
cannam@167
|
32 static double norm2(double x, double y) { return dmax(dabs(x), dabs(y)); }
|
cannam@167
|
33
|
cannam@167
|
34 double dmax(double x, double y) { return (x > y) ? x : y; }
|
cannam@167
|
35
|
cannam@167
|
36 static double aerror(C *a, C *b, int n)
|
cannam@167
|
37 {
|
cannam@167
|
38 if (n > 0) {
|
cannam@167
|
39 /* compute the relative Linf error */
|
cannam@167
|
40 double e = 0.0, mag = 0.0;
|
cannam@167
|
41 int i;
|
cannam@167
|
42
|
cannam@167
|
43 for (i = 0; i < n; ++i) {
|
cannam@167
|
44 e = dmax(e, norm2(c_re(a[i]) - c_re(b[i]),
|
cannam@167
|
45 c_im(a[i]) - c_im(b[i])));
|
cannam@167
|
46 mag = dmax(mag,
|
cannam@167
|
47 dmin(norm2(c_re(a[i]), c_im(a[i])),
|
cannam@167
|
48 norm2(c_re(b[i]), c_im(b[i]))));
|
cannam@167
|
49 }
|
cannam@167
|
50 e /= mag;
|
cannam@167
|
51
|
cannam@167
|
52 #ifdef HAVE_ISNAN
|
cannam@167
|
53 BENCH_ASSERT(!isnan(e));
|
cannam@167
|
54 #endif
|
cannam@167
|
55 return e;
|
cannam@167
|
56 } else
|
cannam@167
|
57 return 0.0;
|
cannam@167
|
58 }
|
cannam@167
|
59
|
cannam@167
|
60 #ifdef HAVE_DRAND48
|
cannam@167
|
61 # if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48
|
cannam@167
|
62 extern double drand48(void);
|
cannam@167
|
63 # endif
|
cannam@167
|
64 double mydrand(void)
|
cannam@167
|
65 {
|
cannam@167
|
66 return drand48() - 0.5;
|
cannam@167
|
67 }
|
cannam@167
|
68 #else
|
cannam@167
|
69 double mydrand(void)
|
cannam@167
|
70 {
|
cannam@167
|
71 double d = rand();
|
cannam@167
|
72 return (d / (double) RAND_MAX) - 0.5;
|
cannam@167
|
73 }
|
cannam@167
|
74 #endif
|
cannam@167
|
75
|
cannam@167
|
76 void arand(C *a, int n)
|
cannam@167
|
77 {
|
cannam@167
|
78 int i;
|
cannam@167
|
79
|
cannam@167
|
80 /* generate random inputs */
|
cannam@167
|
81 for (i = 0; i < n; ++i) {
|
cannam@167
|
82 c_re(a[i]) = mydrand();
|
cannam@167
|
83 c_im(a[i]) = mydrand();
|
cannam@167
|
84 }
|
cannam@167
|
85 }
|
cannam@167
|
86
|
cannam@167
|
87 /* make array real */
|
cannam@167
|
88 void mkreal(C *A, int n)
|
cannam@167
|
89 {
|
cannam@167
|
90 int i;
|
cannam@167
|
91
|
cannam@167
|
92 for (i = 0; i < n; ++i) {
|
cannam@167
|
93 c_im(A[i]) = 0.0;
|
cannam@167
|
94 }
|
cannam@167
|
95 }
|
cannam@167
|
96
|
cannam@167
|
97 static void assign_conj(C *Ac, C *A, int rank, const bench_iodim *dim, int stride)
|
cannam@167
|
98 {
|
cannam@167
|
99 if (rank == 0) {
|
cannam@167
|
100 c_re(*Ac) = c_re(*A);
|
cannam@167
|
101 c_im(*Ac) = -c_im(*A);
|
cannam@167
|
102 }
|
cannam@167
|
103 else {
|
cannam@167
|
104 int i, n0 = dim[rank - 1].n, s = stride;
|
cannam@167
|
105 rank -= 1;
|
cannam@167
|
106 stride *= n0;
|
cannam@167
|
107 assign_conj(Ac, A, rank, dim, stride);
|
cannam@167
|
108 for (i = 1; i < n0; ++i)
|
cannam@167
|
109 assign_conj(Ac + (n0 - i) * s, A + i * s, rank, dim, stride);
|
cannam@167
|
110 }
|
cannam@167
|
111 }
|
cannam@167
|
112
|
cannam@167
|
113 /* make array hermitian */
|
cannam@167
|
114 void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride)
|
cannam@167
|
115 {
|
cannam@167
|
116 if (rank == 0)
|
cannam@167
|
117 c_im(*A) = 0.0;
|
cannam@167
|
118 else {
|
cannam@167
|
119 int i, n0 = dim[rank - 1].n, s = stride;
|
cannam@167
|
120 rank -= 1;
|
cannam@167
|
121 stride *= n0;
|
cannam@167
|
122 mkhermitian(A, rank, dim, stride);
|
cannam@167
|
123 for (i = 1; 2*i < n0; ++i)
|
cannam@167
|
124 assign_conj(A + (n0 - i) * s, A + i * s, rank, dim, stride);
|
cannam@167
|
125 if (2*i == n0)
|
cannam@167
|
126 mkhermitian(A + i * s, rank, dim, stride);
|
cannam@167
|
127 }
|
cannam@167
|
128 }
|
cannam@167
|
129
|
cannam@167
|
130 void mkhermitian1(C *a, int n)
|
cannam@167
|
131 {
|
cannam@167
|
132 bench_iodim d;
|
cannam@167
|
133
|
cannam@167
|
134 d.n = n;
|
cannam@167
|
135 d.is = d.os = 1;
|
cannam@167
|
136 mkhermitian(a, 1, &d, 1);
|
cannam@167
|
137 }
|
cannam@167
|
138
|
cannam@167
|
139 /* C = A */
|
cannam@167
|
140 void acopy(C *c, C *a, int n)
|
cannam@167
|
141 {
|
cannam@167
|
142 int i;
|
cannam@167
|
143
|
cannam@167
|
144 for (i = 0; i < n; ++i) {
|
cannam@167
|
145 c_re(c[i]) = c_re(a[i]);
|
cannam@167
|
146 c_im(c[i]) = c_im(a[i]);
|
cannam@167
|
147 }
|
cannam@167
|
148 }
|
cannam@167
|
149
|
cannam@167
|
150 /* C = A + B */
|
cannam@167
|
151 void aadd(C *c, C *a, C *b, int n)
|
cannam@167
|
152 {
|
cannam@167
|
153 int i;
|
cannam@167
|
154
|
cannam@167
|
155 for (i = 0; i < n; ++i) {
|
cannam@167
|
156 c_re(c[i]) = c_re(a[i]) + c_re(b[i]);
|
cannam@167
|
157 c_im(c[i]) = c_im(a[i]) + c_im(b[i]);
|
cannam@167
|
158 }
|
cannam@167
|
159 }
|
cannam@167
|
160
|
cannam@167
|
161 /* C = A - B */
|
cannam@167
|
162 void asub(C *c, C *a, C *b, int n)
|
cannam@167
|
163 {
|
cannam@167
|
164 int i;
|
cannam@167
|
165
|
cannam@167
|
166 for (i = 0; i < n; ++i) {
|
cannam@167
|
167 c_re(c[i]) = c_re(a[i]) - c_re(b[i]);
|
cannam@167
|
168 c_im(c[i]) = c_im(a[i]) - c_im(b[i]);
|
cannam@167
|
169 }
|
cannam@167
|
170 }
|
cannam@167
|
171
|
cannam@167
|
172 /* B = rotate left A (complex) */
|
cannam@167
|
173 void arol(C *b, C *a, int n, int nb, int na)
|
cannam@167
|
174 {
|
cannam@167
|
175 int i, ib, ia;
|
cannam@167
|
176
|
cannam@167
|
177 for (ib = 0; ib < nb; ++ib) {
|
cannam@167
|
178 for (i = 0; i < n - 1; ++i)
|
cannam@167
|
179 for (ia = 0; ia < na; ++ia) {
|
cannam@167
|
180 C *pb = b + (ib * n + i) * na + ia;
|
cannam@167
|
181 C *pa = a + (ib * n + i + 1) * na + ia;
|
cannam@167
|
182 c_re(*pb) = c_re(*pa);
|
cannam@167
|
183 c_im(*pb) = c_im(*pa);
|
cannam@167
|
184 }
|
cannam@167
|
185
|
cannam@167
|
186 for (ia = 0; ia < na; ++ia) {
|
cannam@167
|
187 C *pb = b + (ib * n + n - 1) * na + ia;
|
cannam@167
|
188 C *pa = a + ib * n * na + ia;
|
cannam@167
|
189 c_re(*pb) = c_re(*pa);
|
cannam@167
|
190 c_im(*pb) = c_im(*pa);
|
cannam@167
|
191 }
|
cannam@167
|
192 }
|
cannam@167
|
193 }
|
cannam@167
|
194
|
cannam@167
|
195 void aphase_shift(C *b, C *a, int n, int nb, int na, double sign)
|
cannam@167
|
196 {
|
cannam@167
|
197 int j, jb, ja;
|
cannam@167
|
198 trigreal twopin;
|
cannam@167
|
199 twopin = K2PI / n;
|
cannam@167
|
200
|
cannam@167
|
201 for (jb = 0; jb < nb; ++jb)
|
cannam@167
|
202 for (j = 0; j < n; ++j) {
|
cannam@167
|
203 trigreal s = sign * SIN(j * twopin);
|
cannam@167
|
204 trigreal c = COS(j * twopin);
|
cannam@167
|
205
|
cannam@167
|
206 for (ja = 0; ja < na; ++ja) {
|
cannam@167
|
207 int k = (jb * n + j) * na + ja;
|
cannam@167
|
208 c_re(b[k]) = c_re(a[k]) * c - c_im(a[k]) * s;
|
cannam@167
|
209 c_im(b[k]) = c_re(a[k]) * s + c_im(a[k]) * c;
|
cannam@167
|
210 }
|
cannam@167
|
211 }
|
cannam@167
|
212 }
|
cannam@167
|
213
|
cannam@167
|
214 /* A = alpha * A (complex, in place) */
|
cannam@167
|
215 void ascale(C *a, C alpha, int n)
|
cannam@167
|
216 {
|
cannam@167
|
217 int i;
|
cannam@167
|
218
|
cannam@167
|
219 for (i = 0; i < n; ++i) {
|
cannam@167
|
220 R xr = c_re(a[i]), xi = c_im(a[i]);
|
cannam@167
|
221 c_re(a[i]) = xr * c_re(alpha) - xi * c_im(alpha);
|
cannam@167
|
222 c_im(a[i]) = xr * c_im(alpha) + xi * c_re(alpha);
|
cannam@167
|
223 }
|
cannam@167
|
224 }
|
cannam@167
|
225
|
cannam@167
|
226
|
cannam@167
|
227 double acmp(C *a, C *b, int n, const char *test, double tol)
|
cannam@167
|
228 {
|
cannam@167
|
229 double d = aerror(a, b, n);
|
cannam@167
|
230 if (d > tol) {
|
cannam@167
|
231 ovtpvt_err("Found relative error %e (%s)\n", d, test);
|
cannam@167
|
232
|
cannam@167
|
233 {
|
cannam@167
|
234 int i, N;
|
cannam@167
|
235 N = n > 300 && verbose <= 2 ? 300 : n;
|
cannam@167
|
236 for (i = 0; i < N; ++i)
|
cannam@167
|
237 ovtpvt_err("%8d %16.12f %16.12f %16.12f %16.12f\n", i,
|
cannam@167
|
238 (double) c_re(a[i]), (double) c_im(a[i]),
|
cannam@167
|
239 (double) c_re(b[i]), (double) c_im(b[i]));
|
cannam@167
|
240 }
|
cannam@167
|
241
|
cannam@167
|
242 bench_exit(EXIT_FAILURE);
|
cannam@167
|
243 }
|
cannam@167
|
244 return d;
|
cannam@167
|
245 }
|
cannam@167
|
246
|
cannam@167
|
247
|
cannam@167
|
248 /*
|
cannam@167
|
249 * Implementation of the FFT tester described in
|
cannam@167
|
250 *
|
cannam@167
|
251 * Funda Ergün. Testing multivariate linear functions: Overcoming the
|
cannam@167
|
252 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual
|
cannam@167
|
253 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
|
cannam@167
|
254 * Nevada, 29 May--1 June 1995.
|
cannam@167
|
255 *
|
cannam@167
|
256 * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without
|
cannam@167
|
257 * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000).
|
cannam@167
|
258 */
|
cannam@167
|
259
|
cannam@167
|
260 static double impulse0(dofft_closure *k,
|
cannam@167
|
261 int n, int vecn,
|
cannam@167
|
262 C *inA, C *inB, C *inC,
|
cannam@167
|
263 C *outA, C *outB, C *outC,
|
cannam@167
|
264 C *tmp, int rounds, double tol)
|
cannam@167
|
265 {
|
cannam@167
|
266 int N = n * vecn;
|
cannam@167
|
267 double e = 0.0;
|
cannam@167
|
268 int j;
|
cannam@167
|
269
|
cannam@167
|
270 k->apply(k, inA, tmp);
|
cannam@167
|
271 e = dmax(e, acmp(tmp, outA, N, "impulse 1", tol));
|
cannam@167
|
272
|
cannam@167
|
273 for (j = 0; j < rounds; ++j) {
|
cannam@167
|
274 arand(inB, N);
|
cannam@167
|
275 asub(inC, inA, inB, N);
|
cannam@167
|
276 k->apply(k, inB, outB);
|
cannam@167
|
277 k->apply(k, inC, outC);
|
cannam@167
|
278 aadd(tmp, outB, outC, N);
|
cannam@167
|
279 e = dmax(e, acmp(tmp, outA, N, "impulse", tol));
|
cannam@167
|
280 }
|
cannam@167
|
281 return e;
|
cannam@167
|
282 }
|
cannam@167
|
283
|
cannam@167
|
284 double impulse(dofft_closure *k,
|
cannam@167
|
285 int n, int vecn,
|
cannam@167
|
286 C *inA, C *inB, C *inC,
|
cannam@167
|
287 C *outA, C *outB, C *outC,
|
cannam@167
|
288 C *tmp, int rounds, double tol)
|
cannam@167
|
289 {
|
cannam@167
|
290 int i, j;
|
cannam@167
|
291 double e = 0.0;
|
cannam@167
|
292
|
cannam@167
|
293 /* check impulsive input */
|
cannam@167
|
294 for (i = 0; i < vecn; ++i) {
|
cannam@167
|
295 R x = (sqrt(n)*(i+1)) / (double)(vecn+1);
|
cannam@167
|
296 for (j = 0; j < n; ++j) {
|
cannam@167
|
297 c_re(inA[j + i * n]) = 0;
|
cannam@167
|
298 c_im(inA[j + i * n]) = 0;
|
cannam@167
|
299 c_re(outA[j + i * n]) = x;
|
cannam@167
|
300 c_im(outA[j + i * n]) = 0;
|
cannam@167
|
301 }
|
cannam@167
|
302 c_re(inA[i * n]) = x;
|
cannam@167
|
303 c_im(inA[i * n]) = 0;
|
cannam@167
|
304 }
|
cannam@167
|
305
|
cannam@167
|
306 e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC,
|
cannam@167
|
307 tmp, rounds, tol));
|
cannam@167
|
308
|
cannam@167
|
309 /* check constant input */
|
cannam@167
|
310 for (i = 0; i < vecn; ++i) {
|
cannam@167
|
311 R x = (i+1) / ((double)(vecn+1) * sqrt(n));
|
cannam@167
|
312 for (j = 0; j < n; ++j) {
|
cannam@167
|
313 c_re(inA[j + i * n]) = x;
|
cannam@167
|
314 c_im(inA[j + i * n]) = 0;
|
cannam@167
|
315 c_re(outA[j + i * n]) = 0;
|
cannam@167
|
316 c_im(outA[j + i * n]) = 0;
|
cannam@167
|
317 }
|
cannam@167
|
318 c_re(outA[i * n]) = n * x;
|
cannam@167
|
319 c_im(outA[i * n]) = 0;
|
cannam@167
|
320 }
|
cannam@167
|
321
|
cannam@167
|
322 e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC,
|
cannam@167
|
323 tmp, rounds, tol));
|
cannam@167
|
324 return e;
|
cannam@167
|
325 }
|
cannam@167
|
326
|
cannam@167
|
327 double linear(dofft_closure *k, int realp,
|
cannam@167
|
328 int n, C *inA, C *inB, C *inC, C *outA,
|
cannam@167
|
329 C *outB, C *outC, C *tmp, int rounds, double tol)
|
cannam@167
|
330 {
|
cannam@167
|
331 int j;
|
cannam@167
|
332 double e = 0.0;
|
cannam@167
|
333
|
cannam@167
|
334 for (j = 0; j < rounds; ++j) {
|
cannam@167
|
335 C alpha, beta;
|
cannam@167
|
336 c_re(alpha) = mydrand();
|
cannam@167
|
337 c_im(alpha) = realp ? 0.0 : mydrand();
|
cannam@167
|
338 c_re(beta) = mydrand();
|
cannam@167
|
339 c_im(beta) = realp ? 0.0 : mydrand();
|
cannam@167
|
340 arand(inA, n);
|
cannam@167
|
341 arand(inB, n);
|
cannam@167
|
342 k->apply(k, inA, outA);
|
cannam@167
|
343 k->apply(k, inB, outB);
|
cannam@167
|
344
|
cannam@167
|
345 ascale(outA, alpha, n);
|
cannam@167
|
346 ascale(outB, beta, n);
|
cannam@167
|
347 aadd(tmp, outA, outB, n);
|
cannam@167
|
348 ascale(inA, alpha, n);
|
cannam@167
|
349 ascale(inB, beta, n);
|
cannam@167
|
350 aadd(inC, inA, inB, n);
|
cannam@167
|
351 k->apply(k, inC, outC);
|
cannam@167
|
352
|
cannam@167
|
353 e = dmax(e, acmp(outC, tmp, n, "linear", tol));
|
cannam@167
|
354 }
|
cannam@167
|
355 return e;
|
cannam@167
|
356 }
|
cannam@167
|
357
|
cannam@167
|
358
|
cannam@167
|
359
|
cannam@167
|
360 double tf_shift(dofft_closure *k,
|
cannam@167
|
361 int realp, const bench_tensor *sz,
|
cannam@167
|
362 int n, int vecn, double sign,
|
cannam@167
|
363 C *inA, C *inB, C *outA, C *outB, C *tmp,
|
cannam@167
|
364 int rounds, double tol, int which_shift)
|
cannam@167
|
365 {
|
cannam@167
|
366 int nb, na, dim, N = n * vecn;
|
cannam@167
|
367 int i, j;
|
cannam@167
|
368 double e = 0.0;
|
cannam@167
|
369
|
cannam@167
|
370 /* test 3: check the time-shift property */
|
cannam@167
|
371 /* the paper performs more tests, but this code should be fine too */
|
cannam@167
|
372
|
cannam@167
|
373 nb = 1;
|
cannam@167
|
374 na = n;
|
cannam@167
|
375
|
cannam@167
|
376 /* check shifts across all SZ dimensions */
|
cannam@167
|
377 for (dim = 0; dim < sz->rnk; ++dim) {
|
cannam@167
|
378 int ncur = sz->dims[dim].n;
|
cannam@167
|
379
|
cannam@167
|
380 na /= ncur;
|
cannam@167
|
381
|
cannam@167
|
382 for (j = 0; j < rounds; ++j) {
|
cannam@167
|
383 arand(inA, N);
|
cannam@167
|
384
|
cannam@167
|
385 if (which_shift == TIME_SHIFT) {
|
cannam@167
|
386 for (i = 0; i < vecn; ++i) {
|
cannam@167
|
387 if (realp) mkreal(inA + i * n, n);
|
cannam@167
|
388 arol(inB + i * n, inA + i * n, ncur, nb, na);
|
cannam@167
|
389 }
|
cannam@167
|
390 k->apply(k, inA, outA);
|
cannam@167
|
391 k->apply(k, inB, outB);
|
cannam@167
|
392 for (i = 0; i < vecn; ++i)
|
cannam@167
|
393 aphase_shift(tmp + i * n, outB + i * n, ncur,
|
cannam@167
|
394 nb, na, sign);
|
cannam@167
|
395 e = dmax(e, acmp(tmp, outA, N, "time shift", tol));
|
cannam@167
|
396 } else {
|
cannam@167
|
397 for (i = 0; i < vecn; ++i) {
|
cannam@167
|
398 if (realp)
|
cannam@167
|
399 mkhermitian(inA + i * n, sz->rnk, sz->dims, 1);
|
cannam@167
|
400 aphase_shift(inB + i * n, inA + i * n, ncur,
|
cannam@167
|
401 nb, na, -sign);
|
cannam@167
|
402 }
|
cannam@167
|
403 k->apply(k, inA, outA);
|
cannam@167
|
404 k->apply(k, inB, outB);
|
cannam@167
|
405 for (i = 0; i < vecn; ++i)
|
cannam@167
|
406 arol(tmp + i * n, outB + i * n, ncur, nb, na);
|
cannam@167
|
407 e = dmax(e, acmp(tmp, outA, N, "freq shift", tol));
|
cannam@167
|
408 }
|
cannam@167
|
409 }
|
cannam@167
|
410
|
cannam@167
|
411 nb *= ncur;
|
cannam@167
|
412 }
|
cannam@167
|
413 return e;
|
cannam@167
|
414 }
|
cannam@167
|
415
|
cannam@167
|
416
|
cannam@167
|
417 void preserves_input(dofft_closure *k, aconstrain constrain,
|
cannam@167
|
418 int n, C *inA, C *inB, C *outB, int rounds)
|
cannam@167
|
419 {
|
cannam@167
|
420 int j;
|
cannam@167
|
421 int recopy_input = k->recopy_input;
|
cannam@167
|
422
|
cannam@167
|
423 k->recopy_input = 1;
|
cannam@167
|
424 for (j = 0; j < rounds; ++j) {
|
cannam@167
|
425 arand(inA, n);
|
cannam@167
|
426 if (constrain)
|
cannam@167
|
427 constrain(inA, n);
|
cannam@167
|
428
|
cannam@167
|
429 acopy(inB, inA, n);
|
cannam@167
|
430 k->apply(k, inB, outB);
|
cannam@167
|
431 acmp(inB, inA, n, "preserves_input", 0.0);
|
cannam@167
|
432 }
|
cannam@167
|
433 k->recopy_input = recopy_input;
|
cannam@167
|
434 }
|
cannam@167
|
435
|
cannam@167
|
436
|
cannam@167
|
437 /* Make a copy of the size tensor, with the same dimensions, but with
|
cannam@167
|
438 the strides corresponding to a "packed" row-major array with the
|
cannam@167
|
439 given stride. */
|
cannam@167
|
440 bench_tensor *verify_pack(const bench_tensor *sz, int s)
|
cannam@167
|
441 {
|
cannam@167
|
442 bench_tensor *x = tensor_copy(sz);
|
cannam@167
|
443 if (BENCH_FINITE_RNK(x->rnk) && x->rnk > 0) {
|
cannam@167
|
444 int i;
|
cannam@167
|
445 x->dims[x->rnk - 1].is = s;
|
cannam@167
|
446 x->dims[x->rnk - 1].os = s;
|
cannam@167
|
447 for (i = x->rnk - 1; i > 0; --i) {
|
cannam@167
|
448 x->dims[i - 1].is = x->dims[i].is * x->dims[i].n;
|
cannam@167
|
449 x->dims[i - 1].os = x->dims[i].os * x->dims[i].n;
|
cannam@167
|
450 }
|
cannam@167
|
451 }
|
cannam@167
|
452 return x;
|
cannam@167
|
453 }
|
cannam@167
|
454
|
cannam@167
|
455 static int all_zero(C *a, int n)
|
cannam@167
|
456 {
|
cannam@167
|
457 int i;
|
cannam@167
|
458 for (i = 0; i < n; ++i)
|
cannam@167
|
459 if (c_re(a[i]) != 0.0 || c_im(a[i]) != 0.0)
|
cannam@167
|
460 return 0;
|
cannam@167
|
461 return 1;
|
cannam@167
|
462 }
|
cannam@167
|
463
|
cannam@167
|
464 static int one_accuracy_test(dofft_closure *k, aconstrain constrain,
|
cannam@167
|
465 int sign, int n, C *a, C *b,
|
cannam@167
|
466 double t[6])
|
cannam@167
|
467 {
|
cannam@167
|
468 double err[6];
|
cannam@167
|
469
|
cannam@167
|
470 if (constrain)
|
cannam@167
|
471 constrain(a, n);
|
cannam@167
|
472
|
cannam@167
|
473 if (all_zero(a, n))
|
cannam@167
|
474 return 0;
|
cannam@167
|
475
|
cannam@167
|
476 k->apply(k, a, b);
|
cannam@167
|
477 fftaccuracy(n, a, b, sign, err);
|
cannam@167
|
478
|
cannam@167
|
479 t[0] += err[0];
|
cannam@167
|
480 t[1] += err[1] * err[1];
|
cannam@167
|
481 t[2] = dmax(t[2], err[2]);
|
cannam@167
|
482 t[3] += err[3];
|
cannam@167
|
483 t[4] += err[4] * err[4];
|
cannam@167
|
484 t[5] = dmax(t[5], err[5]);
|
cannam@167
|
485
|
cannam@167
|
486 return 1;
|
cannam@167
|
487 }
|
cannam@167
|
488
|
cannam@167
|
489 void accuracy_test(dofft_closure *k, aconstrain constrain,
|
cannam@167
|
490 int sign, int n, C *a, C *b, int rounds, int impulse_rounds,
|
cannam@167
|
491 double t[6])
|
cannam@167
|
492 {
|
cannam@167
|
493 int r, i;
|
cannam@167
|
494 int ntests = 0;
|
cannam@167
|
495 bench_complex czero = {0, 0};
|
cannam@167
|
496
|
cannam@167
|
497 for (i = 0; i < 6; ++i) t[i] = 0.0;
|
cannam@167
|
498
|
cannam@167
|
499 for (r = 0; r < rounds; ++r) {
|
cannam@167
|
500 arand(a, n);
|
cannam@167
|
501 if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
cannam@167
|
502 ++ntests;
|
cannam@167
|
503 }
|
cannam@167
|
504
|
cannam@167
|
505 /* impulses at beginning of array */
|
cannam@167
|
506 for (r = 0; r < impulse_rounds; ++r) {
|
cannam@167
|
507 if (r > n - r - 1)
|
cannam@167
|
508 continue;
|
cannam@167
|
509
|
cannam@167
|
510 caset(a, n, czero);
|
cannam@167
|
511 c_re(a[r]) = c_im(a[r]) = 1.0;
|
cannam@167
|
512
|
cannam@167
|
513 if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
cannam@167
|
514 ++ntests;
|
cannam@167
|
515 }
|
cannam@167
|
516
|
cannam@167
|
517 /* impulses at end of array */
|
cannam@167
|
518 for (r = 0; r < impulse_rounds; ++r) {
|
cannam@167
|
519 if (r <= n - r - 1)
|
cannam@167
|
520 continue;
|
cannam@167
|
521
|
cannam@167
|
522 caset(a, n, czero);
|
cannam@167
|
523 c_re(a[n - r - 1]) = c_im(a[n - r - 1]) = 1.0;
|
cannam@167
|
524
|
cannam@167
|
525 if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
cannam@167
|
526 ++ntests;
|
cannam@167
|
527 }
|
cannam@167
|
528
|
cannam@167
|
529 /* randomly-located impulses */
|
cannam@167
|
530 for (r = 0; r < impulse_rounds; ++r) {
|
cannam@167
|
531 caset(a, n, czero);
|
cannam@167
|
532 i = rand() % n;
|
cannam@167
|
533 c_re(a[i]) = c_im(a[i]) = 1.0;
|
cannam@167
|
534
|
cannam@167
|
535 if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
cannam@167
|
536 ++ntests;
|
cannam@167
|
537 }
|
cannam@167
|
538
|
cannam@167
|
539 t[0] /= ntests;
|
cannam@167
|
540 t[1] = sqrt(t[1] / ntests);
|
cannam@167
|
541 t[3] /= ntests;
|
cannam@167
|
542 t[4] = sqrt(t[4] / ntests);
|
cannam@167
|
543
|
cannam@167
|
544 fftaccuracy_done();
|
cannam@167
|
545 }
|