annotate src/fftw-3.3.8/libbench2/verify-dft.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21
Chris@82 22 #include "verify.h"
Chris@82 23
Chris@82 24 /* copy A into B, using output stride of A and input stride of B */
Chris@82 25 typedef struct {
Chris@82 26 dotens2_closure k;
Chris@82 27 R *ra; R *ia;
Chris@82 28 R *rb; R *ib;
Chris@82 29 int scalea, scaleb;
Chris@82 30 } cpy_closure;
Chris@82 31
Chris@82 32 static void cpy0(dotens2_closure *k_,
Chris@82 33 int indxa, int ondxa, int indxb, int ondxb)
Chris@82 34 {
Chris@82 35 cpy_closure *k = (cpy_closure *)k_;
Chris@82 36 k->rb[indxb * k->scaleb] = k->ra[ondxa * k->scalea];
Chris@82 37 k->ib[indxb * k->scaleb] = k->ia[ondxa * k->scalea];
Chris@82 38 UNUSED(indxa); UNUSED(ondxb);
Chris@82 39 }
Chris@82 40
Chris@82 41 static void cpy(R *ra, R *ia, const bench_tensor *sza, int scalea,
Chris@82 42 R *rb, R *ib, const bench_tensor *szb, int scaleb)
Chris@82 43 {
Chris@82 44 cpy_closure k;
Chris@82 45 k.k.apply = cpy0;
Chris@82 46 k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib;
Chris@82 47 k.scalea = scalea; k.scaleb = scaleb;
Chris@82 48 bench_dotens2(sza, szb, &k.k);
Chris@82 49 }
Chris@82 50
Chris@82 51 typedef struct {
Chris@82 52 dofft_closure k;
Chris@82 53 bench_problem *p;
Chris@82 54 } dofft_dft_closure;
Chris@82 55
Chris@82 56 static void dft_apply(dofft_closure *k_, bench_complex *in, bench_complex *out)
Chris@82 57 {
Chris@82 58 dofft_dft_closure *k = (dofft_dft_closure *)k_;
Chris@82 59 bench_problem *p = k->p;
Chris@82 60 bench_tensor *totalsz, *pckdsz;
Chris@82 61 bench_tensor *totalsz_swap, *pckdsz_swap;
Chris@82 62 bench_real *ri, *ii, *ro, *io;
Chris@82 63 int totalscale;
Chris@82 64
Chris@82 65 totalsz = tensor_append(p->vecsz, p->sz);
Chris@82 66 pckdsz = verify_pack(totalsz, 2);
Chris@82 67 ri = (bench_real *) p->in;
Chris@82 68 ro = (bench_real *) p->out;
Chris@82 69
Chris@82 70 totalsz_swap = tensor_copy_swapio(totalsz);
Chris@82 71 pckdsz_swap = tensor_copy_swapio(pckdsz);
Chris@82 72
Chris@82 73 /* confusion: the stride is the distance between complex elements
Chris@82 74 when using interleaved format, but it is the distance between
Chris@82 75 real elements when using split format */
Chris@82 76 if (p->split) {
Chris@82 77 ii = p->ini ? (bench_real *) p->ini : ri + p->iphyssz;
Chris@82 78 io = p->outi ? (bench_real *) p->outi : ro + p->ophyssz;
Chris@82 79 totalscale = 1;
Chris@82 80 } else {
Chris@82 81 ii = p->ini ? (bench_real *) p->ini : ri + 1;
Chris@82 82 io = p->outi ? (bench_real *) p->outi : ro + 1;
Chris@82 83 totalscale = 2;
Chris@82 84 }
Chris@82 85
Chris@82 86 cpy(&c_re(in[0]), &c_im(in[0]), pckdsz, 1,
Chris@82 87 ri, ii, totalsz, totalscale);
Chris@82 88 after_problem_ccopy_from(p, ri, ii);
Chris@82 89 doit(1, p);
Chris@82 90 after_problem_ccopy_to(p, ro, io);
Chris@82 91 if (k->k.recopy_input)
Chris@82 92 cpy(ri, ii, totalsz_swap, totalscale,
Chris@82 93 &c_re(in[0]), &c_im(in[0]), pckdsz_swap, 1);
Chris@82 94 cpy(ro, io, totalsz, totalscale,
Chris@82 95 &c_re(out[0]), &c_im(out[0]), pckdsz, 1);
Chris@82 96
Chris@82 97 tensor_destroy(totalsz);
Chris@82 98 tensor_destroy(pckdsz);
Chris@82 99 tensor_destroy(totalsz_swap);
Chris@82 100 tensor_destroy(pckdsz_swap);
Chris@82 101 }
Chris@82 102
Chris@82 103 void verify_dft(bench_problem *p, int rounds, double tol, errors *e)
Chris@82 104 {
Chris@82 105 C *inA, *inB, *inC, *outA, *outB, *outC, *tmp;
Chris@82 106 int n, vecn, N;
Chris@82 107 dofft_dft_closure k;
Chris@82 108
Chris@82 109 BENCH_ASSERT(p->kind == PROBLEM_COMPLEX);
Chris@82 110
Chris@82 111 k.k.apply = dft_apply;
Chris@82 112 k.k.recopy_input = 0;
Chris@82 113 k.p = p;
Chris@82 114
Chris@82 115 if (rounds == 0)
Chris@82 116 rounds = 20; /* default value */
Chris@82 117
Chris@82 118 n = tensor_sz(p->sz);
Chris@82 119 vecn = tensor_sz(p->vecsz);
Chris@82 120 N = n * vecn;
Chris@82 121
Chris@82 122 inA = (C *) bench_malloc(N * sizeof(C));
Chris@82 123 inB = (C *) bench_malloc(N * sizeof(C));
Chris@82 124 inC = (C *) bench_malloc(N * sizeof(C));
Chris@82 125 outA = (C *) bench_malloc(N * sizeof(C));
Chris@82 126 outB = (C *) bench_malloc(N * sizeof(C));
Chris@82 127 outC = (C *) bench_malloc(N * sizeof(C));
Chris@82 128 tmp = (C *) bench_malloc(N * sizeof(C));
Chris@82 129
Chris@82 130 e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC,
Chris@82 131 tmp, rounds, tol);
Chris@82 132 e->l = linear(&k.k, 0, N, inA, inB, inC, outA, outB, outC,
Chris@82 133 tmp, rounds, tol);
Chris@82 134
Chris@82 135 e->s = 0.0;
Chris@82 136 e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign,
Chris@82 137 inA, inB, outA, outB,
Chris@82 138 tmp, rounds, tol, TIME_SHIFT));
Chris@82 139 e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign,
Chris@82 140 inA, inB, outA, outB,
Chris@82 141 tmp, rounds, tol, FREQ_SHIFT));
Chris@82 142
Chris@82 143 if (!p->in_place && !p->destroy_input)
Chris@82 144 preserves_input(&k.k, 0, N, inA, inB, outB, rounds);
Chris@82 145
Chris@82 146 bench_free(tmp);
Chris@82 147 bench_free(outC);
Chris@82 148 bench_free(outB);
Chris@82 149 bench_free(outA);
Chris@82 150 bench_free(inC);
Chris@82 151 bench_free(inB);
Chris@82 152 bench_free(inA);
Chris@82 153 }
Chris@82 154
Chris@82 155
Chris@82 156 void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds,
Chris@82 157 double t[6])
Chris@82 158 {
Chris@82 159 dofft_dft_closure k;
Chris@82 160 int n;
Chris@82 161 C *a, *b;
Chris@82 162
Chris@82 163 BENCH_ASSERT(p->kind == PROBLEM_COMPLEX);
Chris@82 164 BENCH_ASSERT(p->sz->rnk == 1);
Chris@82 165 BENCH_ASSERT(p->vecsz->rnk == 0);
Chris@82 166
Chris@82 167 k.k.apply = dft_apply;
Chris@82 168 k.k.recopy_input = 0;
Chris@82 169 k.p = p;
Chris@82 170 n = tensor_sz(p->sz);
Chris@82 171
Chris@82 172 a = (C *) bench_malloc(n * sizeof(C));
Chris@82 173 b = (C *) bench_malloc(n * sizeof(C));
Chris@82 174 accuracy_test(&k.k, 0, p->sign, n, a, b, rounds, impulse_rounds, t);
Chris@82 175 bench_free(b);
Chris@82 176 bench_free(a);
Chris@82 177 }