Mercurial > hg > sv-dependency-builds
diff src/fftw-3.3.5/libbench2/verify-lib.c @ 42:2cd0e3b3e1fd
Current fftw source
author | Chris Cannam |
---|---|
date | Tue, 18 Oct 2016 13:40:26 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/fftw-3.3.5/libbench2/verify-lib.c Tue Oct 18 13:40:26 2016 +0100 @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "verify.h" +#include <math.h> +#include <stdlib.h> +#include <stdio.h> + +/* + * Utility functions: + */ +static double dabs(double x) { return (x < 0.0) ? -x : x; } +static double dmin(double x, double y) { return (x < y) ? x : y; } +static double norm2(double x, double y) { return dmax(dabs(x), dabs(y)); } + +double dmax(double x, double y) { return (x > y) ? x : y; } + +static double aerror(C *a, C *b, int n) +{ + if (n > 0) { + /* compute the relative Linf error */ + double e = 0.0, mag = 0.0; + int i; + + for (i = 0; i < n; ++i) { + e = dmax(e, norm2(c_re(a[i]) - c_re(b[i]), + c_im(a[i]) - c_im(b[i]))); + mag = dmax(mag, + dmin(norm2(c_re(a[i]), c_im(a[i])), + norm2(c_re(b[i]), c_im(b[i])))); + } + e /= mag; + +#ifdef HAVE_ISNAN + BENCH_ASSERT(!isnan(e)); +#endif + return e; + } else + return 0.0; +} + +#ifdef HAVE_DRAND48 +# if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48 +extern double drand48(void); +# endif +double mydrand(void) +{ + return drand48() - 0.5; +} +#else +double mydrand(void) +{ + double d = rand(); + return (d / (double) RAND_MAX) - 0.5; +} +#endif + +void arand(C *a, int n) +{ + int i; + + /* generate random inputs */ + for (i = 0; i < n; ++i) { + c_re(a[i]) = mydrand(); + c_im(a[i]) = mydrand(); + } +} + +/* make array real */ +void mkreal(C *A, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_im(A[i]) = 0.0; + } +} + +static void assign_conj(C *Ac, C *A, int rank, const bench_iodim *dim, int stride) +{ + if (rank == 0) { + c_re(*Ac) = c_re(*A); + c_im(*Ac) = -c_im(*A); + } + else { + int i, n0 = dim[rank - 1].n, s = stride; + rank -= 1; + stride *= n0; + assign_conj(Ac, A, rank, dim, stride); + for (i = 1; i < n0; ++i) + assign_conj(Ac + (n0 - i) * s, A + i * s, rank, dim, stride); + } +} + +/* make array hermitian */ +void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride) +{ + if (rank == 0) + c_im(*A) = 0.0; + else { + int i, n0 = dim[rank - 1].n, s = stride; + rank -= 1; + stride *= n0; + mkhermitian(A, rank, dim, stride); + for (i = 1; 2*i < n0; ++i) + assign_conj(A + (n0 - i) * s, A + i * s, rank, dim, stride); + if (2*i == n0) + mkhermitian(A + i * s, rank, dim, stride); + } +} + +void mkhermitian1(C *a, int n) +{ + bench_iodim d; + + d.n = n; + d.is = d.os = 1; + mkhermitian(a, 1, &d, 1); +} + +/* C = A */ +void acopy(C *c, C *a, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_re(c[i]) = c_re(a[i]); + c_im(c[i]) = c_im(a[i]); + } +} + +/* C = A + B */ +void aadd(C *c, C *a, C *b, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_re(c[i]) = c_re(a[i]) + c_re(b[i]); + c_im(c[i]) = c_im(a[i]) + c_im(b[i]); + } +} + +/* C = A - B */ +void asub(C *c, C *a, C *b, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_re(c[i]) = c_re(a[i]) - c_re(b[i]); + c_im(c[i]) = c_im(a[i]) - c_im(b[i]); + } +} + +/* B = rotate left A (complex) */ +void arol(C *b, C *a, int n, int nb, int na) +{ + int i, ib, ia; + + for (ib = 0; ib < nb; ++ib) { + for (i = 0; i < n - 1; ++i) + for (ia = 0; ia < na; ++ia) { + C *pb = b + (ib * n + i) * na + ia; + C *pa = a + (ib * n + i + 1) * na + ia; + c_re(*pb) = c_re(*pa); + c_im(*pb) = c_im(*pa); + } + + for (ia = 0; ia < na; ++ia) { + C *pb = b + (ib * n + n - 1) * na + ia; + C *pa = a + ib * n * na + ia; + c_re(*pb) = c_re(*pa); + c_im(*pb) = c_im(*pa); + } + } +} + +void aphase_shift(C *b, C *a, int n, int nb, int na, double sign) +{ + int j, jb, ja; + trigreal twopin; + twopin = K2PI / n; + + for (jb = 0; jb < nb; ++jb) + for (j = 0; j < n; ++j) { + trigreal s = sign * SIN(j * twopin); + trigreal c = COS(j * twopin); + + for (ja = 0; ja < na; ++ja) { + int k = (jb * n + j) * na + ja; + c_re(b[k]) = c_re(a[k]) * c - c_im(a[k]) * s; + c_im(b[k]) = c_re(a[k]) * s + c_im(a[k]) * c; + } + } +} + +/* A = alpha * A (complex, in place) */ +void ascale(C *a, C alpha, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + R xr = c_re(a[i]), xi = c_im(a[i]); + c_re(a[i]) = xr * c_re(alpha) - xi * c_im(alpha); + c_im(a[i]) = xr * c_im(alpha) + xi * c_re(alpha); + } +} + + +double acmp(C *a, C *b, int n, const char *test, double tol) +{ + double d = aerror(a, b, n); + if (d > tol) { + ovtpvt_err("Found relative error %e (%s)\n", d, test); + + { + int i, N; + N = n > 300 && verbose <= 2 ? 300 : n; + for (i = 0; i < N; ++i) + ovtpvt_err("%8d %16.12f %16.12f %16.12f %16.12f\n", i, + (double) c_re(a[i]), (double) c_im(a[i]), + (double) c_re(b[i]), (double) c_im(b[i])); + } + + bench_exit(EXIT_FAILURE); + } + return d; +} + + +/* + * Implementation of the FFT tester described in + * + * Funda Ergün. Testing multivariate linear functions: Overcoming the + * generator bottleneck. In Proceedings of the Twenty-Seventh Annual + * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, + * Nevada, 29 May--1 June 1995. + * + * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without + * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000). + */ + +static double impulse0(dofft_closure *k, + int n, int vecn, + C *inA, C *inB, C *inC, + C *outA, C *outB, C *outC, + C *tmp, int rounds, double tol) +{ + int N = n * vecn; + double e = 0.0; + int j; + + k->apply(k, inA, tmp); + e = dmax(e, acmp(tmp, outA, N, "impulse 1", tol)); + + for (j = 0; j < rounds; ++j) { + arand(inB, N); + asub(inC, inA, inB, N); + k->apply(k, inB, outB); + k->apply(k, inC, outC); + aadd(tmp, outB, outC, N); + e = dmax(e, acmp(tmp, outA, N, "impulse", tol)); + } + return e; +} + +double impulse(dofft_closure *k, + int n, int vecn, + C *inA, C *inB, C *inC, + C *outA, C *outB, C *outC, + C *tmp, int rounds, double tol) +{ + int i, j; + double e = 0.0; + + /* check impulsive input */ + for (i = 0; i < vecn; ++i) { + R x = (sqrt(n)*(i+1)) / (double)(vecn+1); + for (j = 0; j < n; ++j) { + c_re(inA[j + i * n]) = 0; + c_im(inA[j + i * n]) = 0; + c_re(outA[j + i * n]) = x; + c_im(outA[j + i * n]) = 0; + } + c_re(inA[i * n]) = x; + c_im(inA[i * n]) = 0; + } + + e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol)); + + /* check constant input */ + for (i = 0; i < vecn; ++i) { + R x = (i+1) / ((double)(vecn+1) * sqrt(n)); + for (j = 0; j < n; ++j) { + c_re(inA[j + i * n]) = x; + c_im(inA[j + i * n]) = 0; + c_re(outA[j + i * n]) = 0; + c_im(outA[j + i * n]) = 0; + } + c_re(outA[i * n]) = n * x; + c_im(outA[i * n]) = 0; + } + + e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol)); + return e; +} + +double linear(dofft_closure *k, int realp, + int n, C *inA, C *inB, C *inC, C *outA, + C *outB, C *outC, C *tmp, int rounds, double tol) +{ + int j; + double e = 0.0; + + for (j = 0; j < rounds; ++j) { + C alpha, beta; + c_re(alpha) = mydrand(); + c_im(alpha) = realp ? 0.0 : mydrand(); + c_re(beta) = mydrand(); + c_im(beta) = realp ? 0.0 : mydrand(); + arand(inA, n); + arand(inB, n); + k->apply(k, inA, outA); + k->apply(k, inB, outB); + + ascale(outA, alpha, n); + ascale(outB, beta, n); + aadd(tmp, outA, outB, n); + ascale(inA, alpha, n); + ascale(inB, beta, n); + aadd(inC, inA, inB, n); + k->apply(k, inC, outC); + + e = dmax(e, acmp(outC, tmp, n, "linear", tol)); + } + return e; +} + + + +double tf_shift(dofft_closure *k, + int realp, const bench_tensor *sz, + int n, int vecn, double sign, + C *inA, C *inB, C *outA, C *outB, C *tmp, + int rounds, double tol, int which_shift) +{ + int nb, na, dim, N = n * vecn; + int i, j; + double e = 0.0; + + /* test 3: check the time-shift property */ + /* the paper performs more tests, but this code should be fine too */ + + nb = 1; + na = n; + + /* check shifts across all SZ dimensions */ + for (dim = 0; dim < sz->rnk; ++dim) { + int ncur = sz->dims[dim].n; + + na /= ncur; + + for (j = 0; j < rounds; ++j) { + arand(inA, N); + + if (which_shift == TIME_SHIFT) { + for (i = 0; i < vecn; ++i) { + if (realp) mkreal(inA + i * n, n); + arol(inB + i * n, inA + i * n, ncur, nb, na); + } + k->apply(k, inA, outA); + k->apply(k, inB, outB); + for (i = 0; i < vecn; ++i) + aphase_shift(tmp + i * n, outB + i * n, ncur, + nb, na, sign); + e = dmax(e, acmp(tmp, outA, N, "time shift", tol)); + } else { + for (i = 0; i < vecn; ++i) { + if (realp) + mkhermitian(inA + i * n, sz->rnk, sz->dims, 1); + aphase_shift(inB + i * n, inA + i * n, ncur, + nb, na, -sign); + } + k->apply(k, inA, outA); + k->apply(k, inB, outB); + for (i = 0; i < vecn; ++i) + arol(tmp + i * n, outB + i * n, ncur, nb, na); + e = dmax(e, acmp(tmp, outA, N, "freq shift", tol)); + } + } + + nb *= ncur; + } + return e; +} + + +void preserves_input(dofft_closure *k, aconstrain constrain, + int n, C *inA, C *inB, C *outB, int rounds) +{ + int j; + int recopy_input = k->recopy_input; + + k->recopy_input = 1; + for (j = 0; j < rounds; ++j) { + arand(inA, n); + if (constrain) + constrain(inA, n); + + acopy(inB, inA, n); + k->apply(k, inB, outB); + acmp(inB, inA, n, "preserves_input", 0.0); + } + k->recopy_input = recopy_input; +} + + +/* Make a copy of the size tensor, with the same dimensions, but with + the strides corresponding to a "packed" row-major array with the + given stride. */ +bench_tensor *verify_pack(const bench_tensor *sz, int s) +{ + bench_tensor *x = tensor_copy(sz); + if (BENCH_FINITE_RNK(x->rnk) && x->rnk > 0) { + int i; + x->dims[x->rnk - 1].is = s; + x->dims[x->rnk - 1].os = s; + for (i = x->rnk - 1; i > 0; --i) { + x->dims[i - 1].is = x->dims[i].is * x->dims[i].n; + x->dims[i - 1].os = x->dims[i].os * x->dims[i].n; + } + } + return x; +} + +static int all_zero(C *a, int n) +{ + int i; + for (i = 0; i < n; ++i) + if (c_re(a[i]) != 0.0 || c_im(a[i]) != 0.0) + return 0; + return 1; +} + +static int one_accuracy_test(dofft_closure *k, aconstrain constrain, + int sign, int n, C *a, C *b, + double t[6]) +{ + double err[6]; + + if (constrain) + constrain(a, n); + + if (all_zero(a, n)) + return 0; + + k->apply(k, a, b); + fftaccuracy(n, a, b, sign, err); + + t[0] += err[0]; + t[1] += err[1] * err[1]; + t[2] = dmax(t[2], err[2]); + t[3] += err[3]; + t[4] += err[4] * err[4]; + t[5] = dmax(t[5], err[5]); + + return 1; +} + +void accuracy_test(dofft_closure *k, aconstrain constrain, + int sign, int n, C *a, C *b, int rounds, int impulse_rounds, + double t[6]) +{ + int r, i; + int ntests = 0; + bench_complex czero = {0, 0}; + + for (i = 0; i < 6; ++i) t[i] = 0.0; + + for (r = 0; r < rounds; ++r) { + arand(a, n); + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + /* impulses at beginning of array */ + for (r = 0; r < impulse_rounds; ++r) { + if (r > n - r - 1) + continue; + + caset(a, n, czero); + c_re(a[r]) = c_im(a[r]) = 1.0; + + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + /* impulses at end of array */ + for (r = 0; r < impulse_rounds; ++r) { + if (r <= n - r - 1) + continue; + + caset(a, n, czero); + c_re(a[n - r - 1]) = c_im(a[n - r - 1]) = 1.0; + + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + /* randomly-located impulses */ + for (r = 0; r < impulse_rounds; ++r) { + caset(a, n, czero); + i = rand() % n; + c_re(a[i]) = c_im(a[i]) = 1.0; + + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + t[0] /= ntests; + t[1] = sqrt(t[1] / ntests); + t[3] /= ntests; + t[4] = sqrt(t[4] / ntests); + + fftaccuracy_done(); +}