cannam@167: /* cannam@167: * Copyright (c) 2003, 2007-14 Matteo Frigo cannam@167: * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology cannam@167: * cannam@167: * This program is free software; you can redistribute it and/or modify cannam@167: * it under the terms of the GNU General Public License as published by cannam@167: * the Free Software Foundation; either version 2 of the License, or cannam@167: * (at your option) any later version. cannam@167: * cannam@167: * This program is distributed in the hope that it will be useful, cannam@167: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@167: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@167: * GNU General Public License for more details. cannam@167: * cannam@167: * You should have received a copy of the GNU General Public License cannam@167: * along with this program; if not, write to the Free Software cannam@167: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@167: * cannam@167: */ cannam@167: cannam@167: cannam@167: #include "verify.h" cannam@167: #include cannam@167: #include cannam@167: #include cannam@167: cannam@167: /* cannam@167: * Utility functions: cannam@167: */ cannam@167: static double dabs(double x) { return (x < 0.0) ? -x : x; } cannam@167: static double dmin(double x, double y) { return (x < y) ? x : y; } cannam@167: static double norm2(double x, double y) { return dmax(dabs(x), dabs(y)); } cannam@167: cannam@167: double dmax(double x, double y) { return (x > y) ? x : y; } cannam@167: cannam@167: static double aerror(C *a, C *b, int n) cannam@167: { cannam@167: if (n > 0) { cannam@167: /* compute the relative Linf error */ cannam@167: double e = 0.0, mag = 0.0; cannam@167: int i; cannam@167: cannam@167: for (i = 0; i < n; ++i) { cannam@167: e = dmax(e, norm2(c_re(a[i]) - c_re(b[i]), cannam@167: c_im(a[i]) - c_im(b[i]))); cannam@167: mag = dmax(mag, cannam@167: dmin(norm2(c_re(a[i]), c_im(a[i])), cannam@167: norm2(c_re(b[i]), c_im(b[i])))); cannam@167: } cannam@167: e /= mag; cannam@167: cannam@167: #ifdef HAVE_ISNAN cannam@167: BENCH_ASSERT(!isnan(e)); cannam@167: #endif cannam@167: return e; cannam@167: } else cannam@167: return 0.0; cannam@167: } cannam@167: cannam@167: #ifdef HAVE_DRAND48 cannam@167: # if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48 cannam@167: extern double drand48(void); cannam@167: # endif cannam@167: double mydrand(void) cannam@167: { cannam@167: return drand48() - 0.5; cannam@167: } cannam@167: #else cannam@167: double mydrand(void) cannam@167: { cannam@167: double d = rand(); cannam@167: return (d / (double) RAND_MAX) - 0.5; cannam@167: } cannam@167: #endif cannam@167: cannam@167: void arand(C *a, int n) cannam@167: { cannam@167: int i; cannam@167: cannam@167: /* generate random inputs */ cannam@167: for (i = 0; i < n; ++i) { cannam@167: c_re(a[i]) = mydrand(); cannam@167: c_im(a[i]) = mydrand(); cannam@167: } cannam@167: } cannam@167: cannam@167: /* make array real */ cannam@167: void mkreal(C *A, int n) cannam@167: { cannam@167: int i; cannam@167: cannam@167: for (i = 0; i < n; ++i) { cannam@167: c_im(A[i]) = 0.0; cannam@167: } cannam@167: } cannam@167: cannam@167: static void assign_conj(C *Ac, C *A, int rank, const bench_iodim *dim, int stride) cannam@167: { cannam@167: if (rank == 0) { cannam@167: c_re(*Ac) = c_re(*A); cannam@167: c_im(*Ac) = -c_im(*A); cannam@167: } cannam@167: else { cannam@167: int i, n0 = dim[rank - 1].n, s = stride; cannam@167: rank -= 1; cannam@167: stride *= n0; cannam@167: assign_conj(Ac, A, rank, dim, stride); cannam@167: for (i = 1; i < n0; ++i) cannam@167: assign_conj(Ac + (n0 - i) * s, A + i * s, rank, dim, stride); cannam@167: } cannam@167: } cannam@167: cannam@167: /* make array hermitian */ cannam@167: void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride) cannam@167: { cannam@167: if (rank == 0) cannam@167: c_im(*A) = 0.0; cannam@167: else { cannam@167: int i, n0 = dim[rank - 1].n, s = stride; cannam@167: rank -= 1; cannam@167: stride *= n0; cannam@167: mkhermitian(A, rank, dim, stride); cannam@167: for (i = 1; 2*i < n0; ++i) cannam@167: assign_conj(A + (n0 - i) * s, A + i * s, rank, dim, stride); cannam@167: if (2*i == n0) cannam@167: mkhermitian(A + i * s, rank, dim, stride); cannam@167: } cannam@167: } cannam@167: cannam@167: void mkhermitian1(C *a, int n) cannam@167: { cannam@167: bench_iodim d; cannam@167: cannam@167: d.n = n; cannam@167: d.is = d.os = 1; cannam@167: mkhermitian(a, 1, &d, 1); cannam@167: } cannam@167: cannam@167: /* C = A */ cannam@167: void acopy(C *c, C *a, int n) cannam@167: { cannam@167: int i; cannam@167: cannam@167: for (i = 0; i < n; ++i) { cannam@167: c_re(c[i]) = c_re(a[i]); cannam@167: c_im(c[i]) = c_im(a[i]); cannam@167: } cannam@167: } cannam@167: cannam@167: /* C = A + B */ cannam@167: void aadd(C *c, C *a, C *b, int n) cannam@167: { cannam@167: int i; cannam@167: cannam@167: for (i = 0; i < n; ++i) { cannam@167: c_re(c[i]) = c_re(a[i]) + c_re(b[i]); cannam@167: c_im(c[i]) = c_im(a[i]) + c_im(b[i]); cannam@167: } cannam@167: } cannam@167: cannam@167: /* C = A - B */ cannam@167: void asub(C *c, C *a, C *b, int n) cannam@167: { cannam@167: int i; cannam@167: cannam@167: for (i = 0; i < n; ++i) { cannam@167: c_re(c[i]) = c_re(a[i]) - c_re(b[i]); cannam@167: c_im(c[i]) = c_im(a[i]) - c_im(b[i]); cannam@167: } cannam@167: } cannam@167: cannam@167: /* B = rotate left A (complex) */ cannam@167: void arol(C *b, C *a, int n, int nb, int na) cannam@167: { cannam@167: int i, ib, ia; cannam@167: cannam@167: for (ib = 0; ib < nb; ++ib) { cannam@167: for (i = 0; i < n - 1; ++i) cannam@167: for (ia = 0; ia < na; ++ia) { cannam@167: C *pb = b + (ib * n + i) * na + ia; cannam@167: C *pa = a + (ib * n + i + 1) * na + ia; cannam@167: c_re(*pb) = c_re(*pa); cannam@167: c_im(*pb) = c_im(*pa); cannam@167: } cannam@167: cannam@167: for (ia = 0; ia < na; ++ia) { cannam@167: C *pb = b + (ib * n + n - 1) * na + ia; cannam@167: C *pa = a + ib * n * na + ia; cannam@167: c_re(*pb) = c_re(*pa); cannam@167: c_im(*pb) = c_im(*pa); cannam@167: } cannam@167: } cannam@167: } cannam@167: cannam@167: void aphase_shift(C *b, C *a, int n, int nb, int na, double sign) cannam@167: { cannam@167: int j, jb, ja; cannam@167: trigreal twopin; cannam@167: twopin = K2PI / n; cannam@167: cannam@167: for (jb = 0; jb < nb; ++jb) cannam@167: for (j = 0; j < n; ++j) { cannam@167: trigreal s = sign * SIN(j * twopin); cannam@167: trigreal c = COS(j * twopin); cannam@167: cannam@167: for (ja = 0; ja < na; ++ja) { cannam@167: int k = (jb * n + j) * na + ja; cannam@167: c_re(b[k]) = c_re(a[k]) * c - c_im(a[k]) * s; cannam@167: c_im(b[k]) = c_re(a[k]) * s + c_im(a[k]) * c; cannam@167: } cannam@167: } cannam@167: } cannam@167: cannam@167: /* A = alpha * A (complex, in place) */ cannam@167: void ascale(C *a, C alpha, int n) cannam@167: { cannam@167: int i; cannam@167: cannam@167: for (i = 0; i < n; ++i) { cannam@167: R xr = c_re(a[i]), xi = c_im(a[i]); cannam@167: c_re(a[i]) = xr * c_re(alpha) - xi * c_im(alpha); cannam@167: c_im(a[i]) = xr * c_im(alpha) + xi * c_re(alpha); cannam@167: } cannam@167: } cannam@167: cannam@167: cannam@167: double acmp(C *a, C *b, int n, const char *test, double tol) cannam@167: { cannam@167: double d = aerror(a, b, n); cannam@167: if (d > tol) { cannam@167: ovtpvt_err("Found relative error %e (%s)\n", d, test); cannam@167: cannam@167: { cannam@167: int i, N; cannam@167: N = n > 300 && verbose <= 2 ? 300 : n; cannam@167: for (i = 0; i < N; ++i) cannam@167: ovtpvt_err("%8d %16.12f %16.12f %16.12f %16.12f\n", i, cannam@167: (double) c_re(a[i]), (double) c_im(a[i]), cannam@167: (double) c_re(b[i]), (double) c_im(b[i])); cannam@167: } cannam@167: cannam@167: bench_exit(EXIT_FAILURE); cannam@167: } cannam@167: return d; cannam@167: } cannam@167: cannam@167: cannam@167: /* cannam@167: * Implementation of the FFT tester described in cannam@167: * cannam@167: * Funda Ergün. Testing multivariate linear functions: Overcoming the cannam@167: * generator bottleneck. In Proceedings of the Twenty-Seventh Annual cannam@167: * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, cannam@167: * Nevada, 29 May--1 June 1995. cannam@167: * cannam@167: * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without cannam@167: * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000). cannam@167: */ cannam@167: cannam@167: static double impulse0(dofft_closure *k, cannam@167: int n, int vecn, cannam@167: C *inA, C *inB, C *inC, cannam@167: C *outA, C *outB, C *outC, cannam@167: C *tmp, int rounds, double tol) cannam@167: { cannam@167: int N = n * vecn; cannam@167: double e = 0.0; cannam@167: int j; cannam@167: cannam@167: k->apply(k, inA, tmp); cannam@167: e = dmax(e, acmp(tmp, outA, N, "impulse 1", tol)); cannam@167: cannam@167: for (j = 0; j < rounds; ++j) { cannam@167: arand(inB, N); cannam@167: asub(inC, inA, inB, N); cannam@167: k->apply(k, inB, outB); cannam@167: k->apply(k, inC, outC); cannam@167: aadd(tmp, outB, outC, N); cannam@167: e = dmax(e, acmp(tmp, outA, N, "impulse", tol)); cannam@167: } cannam@167: return e; cannam@167: } cannam@167: cannam@167: double impulse(dofft_closure *k, cannam@167: int n, int vecn, cannam@167: C *inA, C *inB, C *inC, cannam@167: C *outA, C *outB, C *outC, cannam@167: C *tmp, int rounds, double tol) cannam@167: { cannam@167: int i, j; cannam@167: double e = 0.0; cannam@167: cannam@167: /* check impulsive input */ cannam@167: for (i = 0; i < vecn; ++i) { cannam@167: R x = (sqrt(n)*(i+1)) / (double)(vecn+1); cannam@167: for (j = 0; j < n; ++j) { cannam@167: c_re(inA[j + i * n]) = 0; cannam@167: c_im(inA[j + i * n]) = 0; cannam@167: c_re(outA[j + i * n]) = x; cannam@167: c_im(outA[j + i * n]) = 0; cannam@167: } cannam@167: c_re(inA[i * n]) = x; cannam@167: c_im(inA[i * n]) = 0; cannam@167: } cannam@167: cannam@167: e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, cannam@167: tmp, rounds, tol)); cannam@167: cannam@167: /* check constant input */ cannam@167: for (i = 0; i < vecn; ++i) { cannam@167: R x = (i+1) / ((double)(vecn+1) * sqrt(n)); cannam@167: for (j = 0; j < n; ++j) { cannam@167: c_re(inA[j + i * n]) = x; cannam@167: c_im(inA[j + i * n]) = 0; cannam@167: c_re(outA[j + i * n]) = 0; cannam@167: c_im(outA[j + i * n]) = 0; cannam@167: } cannam@167: c_re(outA[i * n]) = n * x; cannam@167: c_im(outA[i * n]) = 0; cannam@167: } cannam@167: cannam@167: e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, cannam@167: tmp, rounds, tol)); cannam@167: return e; cannam@167: } cannam@167: cannam@167: double linear(dofft_closure *k, int realp, cannam@167: int n, C *inA, C *inB, C *inC, C *outA, cannam@167: C *outB, C *outC, C *tmp, int rounds, double tol) cannam@167: { cannam@167: int j; cannam@167: double e = 0.0; cannam@167: cannam@167: for (j = 0; j < rounds; ++j) { cannam@167: C alpha, beta; cannam@167: c_re(alpha) = mydrand(); cannam@167: c_im(alpha) = realp ? 0.0 : mydrand(); cannam@167: c_re(beta) = mydrand(); cannam@167: c_im(beta) = realp ? 0.0 : mydrand(); cannam@167: arand(inA, n); cannam@167: arand(inB, n); cannam@167: k->apply(k, inA, outA); cannam@167: k->apply(k, inB, outB); cannam@167: cannam@167: ascale(outA, alpha, n); cannam@167: ascale(outB, beta, n); cannam@167: aadd(tmp, outA, outB, n); cannam@167: ascale(inA, alpha, n); cannam@167: ascale(inB, beta, n); cannam@167: aadd(inC, inA, inB, n); cannam@167: k->apply(k, inC, outC); cannam@167: cannam@167: e = dmax(e, acmp(outC, tmp, n, "linear", tol)); cannam@167: } cannam@167: return e; cannam@167: } cannam@167: cannam@167: cannam@167: cannam@167: double tf_shift(dofft_closure *k, cannam@167: int realp, const bench_tensor *sz, cannam@167: int n, int vecn, double sign, cannam@167: C *inA, C *inB, C *outA, C *outB, C *tmp, cannam@167: int rounds, double tol, int which_shift) cannam@167: { cannam@167: int nb, na, dim, N = n * vecn; cannam@167: int i, j; cannam@167: double e = 0.0; cannam@167: cannam@167: /* test 3: check the time-shift property */ cannam@167: /* the paper performs more tests, but this code should be fine too */ cannam@167: cannam@167: nb = 1; cannam@167: na = n; cannam@167: cannam@167: /* check shifts across all SZ dimensions */ cannam@167: for (dim = 0; dim < sz->rnk; ++dim) { cannam@167: int ncur = sz->dims[dim].n; cannam@167: cannam@167: na /= ncur; cannam@167: cannam@167: for (j = 0; j < rounds; ++j) { cannam@167: arand(inA, N); cannam@167: cannam@167: if (which_shift == TIME_SHIFT) { cannam@167: for (i = 0; i < vecn; ++i) { cannam@167: if (realp) mkreal(inA + i * n, n); cannam@167: arol(inB + i * n, inA + i * n, ncur, nb, na); cannam@167: } cannam@167: k->apply(k, inA, outA); cannam@167: k->apply(k, inB, outB); cannam@167: for (i = 0; i < vecn; ++i) cannam@167: aphase_shift(tmp + i * n, outB + i * n, ncur, cannam@167: nb, na, sign); cannam@167: e = dmax(e, acmp(tmp, outA, N, "time shift", tol)); cannam@167: } else { cannam@167: for (i = 0; i < vecn; ++i) { cannam@167: if (realp) cannam@167: mkhermitian(inA + i * n, sz->rnk, sz->dims, 1); cannam@167: aphase_shift(inB + i * n, inA + i * n, ncur, cannam@167: nb, na, -sign); cannam@167: } cannam@167: k->apply(k, inA, outA); cannam@167: k->apply(k, inB, outB); cannam@167: for (i = 0; i < vecn; ++i) cannam@167: arol(tmp + i * n, outB + i * n, ncur, nb, na); cannam@167: e = dmax(e, acmp(tmp, outA, N, "freq shift", tol)); cannam@167: } cannam@167: } cannam@167: cannam@167: nb *= ncur; cannam@167: } cannam@167: return e; cannam@167: } cannam@167: cannam@167: cannam@167: void preserves_input(dofft_closure *k, aconstrain constrain, cannam@167: int n, C *inA, C *inB, C *outB, int rounds) cannam@167: { cannam@167: int j; cannam@167: int recopy_input = k->recopy_input; cannam@167: cannam@167: k->recopy_input = 1; cannam@167: for (j = 0; j < rounds; ++j) { cannam@167: arand(inA, n); cannam@167: if (constrain) cannam@167: constrain(inA, n); cannam@167: cannam@167: acopy(inB, inA, n); cannam@167: k->apply(k, inB, outB); cannam@167: acmp(inB, inA, n, "preserves_input", 0.0); cannam@167: } cannam@167: k->recopy_input = recopy_input; cannam@167: } cannam@167: cannam@167: cannam@167: /* Make a copy of the size tensor, with the same dimensions, but with cannam@167: the strides corresponding to a "packed" row-major array with the cannam@167: given stride. */ cannam@167: bench_tensor *verify_pack(const bench_tensor *sz, int s) cannam@167: { cannam@167: bench_tensor *x = tensor_copy(sz); cannam@167: if (BENCH_FINITE_RNK(x->rnk) && x->rnk > 0) { cannam@167: int i; cannam@167: x->dims[x->rnk - 1].is = s; cannam@167: x->dims[x->rnk - 1].os = s; cannam@167: for (i = x->rnk - 1; i > 0; --i) { cannam@167: x->dims[i - 1].is = x->dims[i].is * x->dims[i].n; cannam@167: x->dims[i - 1].os = x->dims[i].os * x->dims[i].n; cannam@167: } cannam@167: } cannam@167: return x; cannam@167: } cannam@167: cannam@167: static int all_zero(C *a, int n) cannam@167: { cannam@167: int i; cannam@167: for (i = 0; i < n; ++i) cannam@167: if (c_re(a[i]) != 0.0 || c_im(a[i]) != 0.0) cannam@167: return 0; cannam@167: return 1; cannam@167: } cannam@167: cannam@167: static int one_accuracy_test(dofft_closure *k, aconstrain constrain, cannam@167: int sign, int n, C *a, C *b, cannam@167: double t[6]) cannam@167: { cannam@167: double err[6]; cannam@167: cannam@167: if (constrain) cannam@167: constrain(a, n); cannam@167: cannam@167: if (all_zero(a, n)) cannam@167: return 0; cannam@167: cannam@167: k->apply(k, a, b); cannam@167: fftaccuracy(n, a, b, sign, err); cannam@167: cannam@167: t[0] += err[0]; cannam@167: t[1] += err[1] * err[1]; cannam@167: t[2] = dmax(t[2], err[2]); cannam@167: t[3] += err[3]; cannam@167: t[4] += err[4] * err[4]; cannam@167: t[5] = dmax(t[5], err[5]); cannam@167: cannam@167: return 1; cannam@167: } cannam@167: cannam@167: void accuracy_test(dofft_closure *k, aconstrain constrain, cannam@167: int sign, int n, C *a, C *b, int rounds, int impulse_rounds, cannam@167: double t[6]) cannam@167: { cannam@167: int r, i; cannam@167: int ntests = 0; cannam@167: bench_complex czero = {0, 0}; cannam@167: cannam@167: for (i = 0; i < 6; ++i) t[i] = 0.0; cannam@167: cannam@167: for (r = 0; r < rounds; ++r) { cannam@167: arand(a, n); cannam@167: if (one_accuracy_test(k, constrain, sign, n, a, b, t)) cannam@167: ++ntests; cannam@167: } cannam@167: cannam@167: /* impulses at beginning of array */ cannam@167: for (r = 0; r < impulse_rounds; ++r) { cannam@167: if (r > n - r - 1) cannam@167: continue; cannam@167: cannam@167: caset(a, n, czero); cannam@167: c_re(a[r]) = c_im(a[r]) = 1.0; cannam@167: cannam@167: if (one_accuracy_test(k, constrain, sign, n, a, b, t)) cannam@167: ++ntests; cannam@167: } cannam@167: cannam@167: /* impulses at end of array */ cannam@167: for (r = 0; r < impulse_rounds; ++r) { cannam@167: if (r <= n - r - 1) cannam@167: continue; cannam@167: cannam@167: caset(a, n, czero); cannam@167: c_re(a[n - r - 1]) = c_im(a[n - r - 1]) = 1.0; cannam@167: cannam@167: if (one_accuracy_test(k, constrain, sign, n, a, b, t)) cannam@167: ++ntests; cannam@167: } cannam@167: cannam@167: /* randomly-located impulses */ cannam@167: for (r = 0; r < impulse_rounds; ++r) { cannam@167: caset(a, n, czero); cannam@167: i = rand() % n; cannam@167: c_re(a[i]) = c_im(a[i]) = 1.0; cannam@167: cannam@167: if (one_accuracy_test(k, constrain, sign, n, a, b, t)) cannam@167: ++ntests; cannam@167: } cannam@167: cannam@167: t[0] /= ntests; cannam@167: t[1] = sqrt(t[1] / ntests); cannam@167: t[3] /= ntests; cannam@167: t[4] = sqrt(t[4] / ntests); cannam@167: cannam@167: fftaccuracy_done(); cannam@167: }