cannam@167: /* fftw hook to be used in the benchmark program. cannam@167: cannam@167: We keep it in a separate file because cannam@167: cannam@167: 1) bench.c is supposed to test the API---we do not want to #include cannam@167: "ifftw.h" and accidentally use internal symbols/macros. cannam@167: 2) this code is a royal mess. The messiness is due to cannam@167: A) confusion between internal fftw tensors and bench_tensor's cannam@167: (which we want to keep separate because the benchmark cannam@167: program tests other routines too) cannam@167: B) despite A), our desire to recycle the libbench verifier. cannam@167: */ cannam@167: cannam@167: #include cannam@167: #include "libbench2/bench-user.h" cannam@167: cannam@167: #define CALLING_FFTW /* hack for Windows DLL nonsense */ cannam@167: #include "api/api.h" cannam@167: #include "dft/dft.h" cannam@167: #include "rdft/rdft.h" cannam@167: cannam@167: extern int paranoid; /* in bench.c */ cannam@167: extern X(plan) the_plan; /* in bench.c */ cannam@167: cannam@167: /* cannam@167: transform an fftw tensor into a bench_tensor. cannam@167: */ cannam@167: static bench_tensor *fftw_tensor_to_bench_tensor(tensor *t) cannam@167: { cannam@167: bench_tensor *bt = mktensor(t->rnk); cannam@167: cannam@167: if (FINITE_RNK(t->rnk)) { cannam@167: int i; cannam@167: for (i = 0; i < t->rnk; ++i) { cannam@167: /* FIXME: 64-bit unclean because of INT -> int conversion */ cannam@167: bt->dims[i].n = t->dims[i].n; cannam@167: bt->dims[i].is = t->dims[i].is; cannam@167: bt->dims[i].os = t->dims[i].os; cannam@167: BENCH_ASSERT(bt->dims[i].n == t->dims[i].n); cannam@167: BENCH_ASSERT(bt->dims[i].is == t->dims[i].is); cannam@167: BENCH_ASSERT(bt->dims[i].os == t->dims[i].os); cannam@167: } cannam@167: } cannam@167: return bt; cannam@167: } cannam@167: cannam@167: /* cannam@167: transform an fftw problem into a bench_problem. cannam@167: */ cannam@167: static bench_problem *fftw_problem_to_bench_problem(planner *plnr, cannam@167: const problem *p_) cannam@167: { cannam@167: bench_problem *bp = 0; cannam@167: switch (p_->adt->problem_kind) { cannam@167: case PROBLEM_DFT: cannam@167: { cannam@167: const problem_dft *p = (const problem_dft *) p_; cannam@167: cannam@167: if (!p->ri || !p->ii) cannam@167: abort(); cannam@167: cannam@167: bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); cannam@167: cannam@167: bp->kind = PROBLEM_COMPLEX; cannam@167: bp->sign = FFT_SIGN; cannam@167: bp->split = 1; /* tensor strides are in R's, not C's */ cannam@167: bp->in = UNTAINT(p->ri); cannam@167: bp->out = UNTAINT(p->ro); cannam@167: bp->ini = UNTAINT(p->ii); cannam@167: bp->outi = UNTAINT(p->io); cannam@167: bp->inphys = bp->outphys = 0; cannam@167: bp->iphyssz = bp->ophyssz = 0; cannam@167: bp->in_place = p->ri == p->ro; cannam@167: bp->sz = fftw_tensor_to_bench_tensor(p->sz); cannam@167: bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); cannam@167: bp->k = 0; cannam@167: break; cannam@167: } cannam@167: case PROBLEM_RDFT: cannam@167: { cannam@167: const problem_rdft *p = (const problem_rdft *) p_; cannam@167: int i; cannam@167: cannam@167: if (!p->I || !p->O) cannam@167: abort(); cannam@167: cannam@167: for (i = 0; i < p->sz->rnk; ++i) cannam@167: switch (p->kind[i]) { cannam@167: case R2HC01: cannam@167: case R2HC10: cannam@167: case R2HC11: cannam@167: case HC2R01: cannam@167: case HC2R10: cannam@167: case HC2R11: cannam@167: return bp; cannam@167: default: cannam@167: ; cannam@167: } cannam@167: cannam@167: bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); cannam@167: cannam@167: bp->kind = PROBLEM_R2R; cannam@167: bp->sign = FFT_SIGN; cannam@167: bp->split = 0; cannam@167: bp->in = UNTAINT(p->I); cannam@167: bp->out = UNTAINT(p->O); cannam@167: bp->ini = bp->outi = 0; cannam@167: bp->inphys = bp->outphys = 0; cannam@167: bp->iphyssz = bp->ophyssz = 0; cannam@167: bp->in_place = p->I == p->O; cannam@167: bp->sz = fftw_tensor_to_bench_tensor(p->sz); cannam@167: bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); cannam@167: bp->k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * p->sz->rnk); cannam@167: for (i = 0; i < p->sz->rnk; ++i) cannam@167: switch (p->kind[i]) { cannam@167: case R2HC: bp->k[i] = R2R_R2HC; break; cannam@167: case HC2R: bp->k[i] = R2R_HC2R; break; cannam@167: case DHT: bp->k[i] = R2R_DHT; break; cannam@167: case REDFT00: bp->k[i] = R2R_REDFT00; break; cannam@167: case REDFT01: bp->k[i] = R2R_REDFT01; break; cannam@167: case REDFT10: bp->k[i] = R2R_REDFT10; break; cannam@167: case REDFT11: bp->k[i] = R2R_REDFT11; break; cannam@167: case RODFT00: bp->k[i] = R2R_RODFT00; break; cannam@167: case RODFT01: bp->k[i] = R2R_RODFT01; break; cannam@167: case RODFT10: bp->k[i] = R2R_RODFT10; break; cannam@167: case RODFT11: bp->k[i] = R2R_RODFT11; break; cannam@167: default: CK(0); cannam@167: } cannam@167: break; cannam@167: } cannam@167: case PROBLEM_RDFT2: cannam@167: { cannam@167: const problem_rdft2 *p = (const problem_rdft2 *) p_; cannam@167: int rnk = p->sz->rnk; cannam@167: cannam@167: if (!p->r0 || !p->r1 || !p->cr || !p->ci) cannam@167: abort(); cannam@167: cannam@167: /* give up verifying rdft2 R2HCII */ cannam@167: if (p->kind != R2HC && p->kind != HC2R) cannam@167: return bp; cannam@167: cannam@167: if (rnk > 0) { cannam@167: /* can't verify separate even/odd arrays for now */ cannam@167: if (2 * (p->r1 - p->r0) != cannam@167: ((p->kind == R2HC) ? cannam@167: p->sz->dims[rnk-1].is : p->sz->dims[rnk-1].os)) cannam@167: return bp; cannam@167: } cannam@167: cannam@167: bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); cannam@167: cannam@167: bp->kind = PROBLEM_REAL; cannam@167: bp->sign = p->kind == R2HC ? FFT_SIGN : -FFT_SIGN; cannam@167: bp->split = 1; /* tensor strides are in R's, not C's */ cannam@167: if (p->kind == R2HC) { cannam@167: bp->sign = FFT_SIGN; cannam@167: bp->in = UNTAINT(p->r0); cannam@167: bp->out = UNTAINT(p->cr); cannam@167: bp->ini = 0; cannam@167: bp->outi = UNTAINT(p->ci); cannam@167: } cannam@167: else { cannam@167: bp->sign = -FFT_SIGN; cannam@167: bp->out = UNTAINT(p->r0); cannam@167: bp->in = UNTAINT(p->cr); cannam@167: bp->outi = 0; cannam@167: bp->ini = UNTAINT(p->ci); cannam@167: } cannam@167: bp->inphys = bp->outphys = 0; cannam@167: bp->iphyssz = bp->ophyssz = 0; cannam@167: bp->in_place = p->r0 == p->cr; cannam@167: bp->sz = fftw_tensor_to_bench_tensor(p->sz); cannam@167: if (rnk > 0) { cannam@167: if (p->kind == R2HC) cannam@167: bp->sz->dims[rnk-1].is /= 2; cannam@167: else cannam@167: bp->sz->dims[rnk-1].os /= 2; cannam@167: } cannam@167: bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); cannam@167: bp->k = 0; cannam@167: break; cannam@167: } cannam@167: default: cannam@167: abort(); cannam@167: } cannam@167: cannam@167: bp->userinfo = 0; cannam@167: bp->pstring = 0; cannam@167: bp->destroy_input = !NO_DESTROY_INPUTP(plnr); cannam@167: cannam@167: return bp; cannam@167: } cannam@167: cannam@167: static void hook(planner *plnr, plan *pln, const problem *p_, int optimalp) cannam@167: { cannam@167: int rounds = 5; cannam@167: double tol = SINGLE_PRECISION ? 1.0e-3 : 1.0e-10; cannam@167: UNUSED(optimalp); cannam@167: cannam@167: if (verbose > 5) { cannam@167: printer *pr = X(mkprinter_file)(stdout); cannam@167: pr->print(pr, "%P:%(%p%)\n", p_, pln); cannam@167: X(printer_destroy)(pr); cannam@167: printf("cost %g \n\n", pln->pcost); cannam@167: } cannam@167: cannam@167: if (paranoid) { cannam@167: bench_problem *bp; cannam@167: cannam@167: bp = fftw_problem_to_bench_problem(plnr, p_); cannam@167: if (bp) { cannam@167: X(plan) the_plan_save = the_plan; cannam@167: cannam@167: the_plan = (apiplan *) MALLOC(sizeof(apiplan), PLANS); cannam@167: the_plan->pln = pln; cannam@167: the_plan->prb = (problem *) p_; cannam@167: cannam@167: X(plan_awake)(pln, AWAKE_SQRTN_TABLE); cannam@167: verify_problem(bp, rounds, tol); cannam@167: X(plan_awake)(pln, SLEEPY); cannam@167: cannam@167: X(ifree)(the_plan); cannam@167: the_plan = the_plan_save; cannam@167: cannam@167: problem_destroy(bp); cannam@167: } cannam@167: cannam@167: } cannam@167: } cannam@167: cannam@167: static void paranoid_checks(void) cannam@167: { cannam@167: /* FIXME: assumes char = 8 bits, which is false on at least one cannam@167: DSP I know of. */ cannam@167: #if 0 cannam@167: /* if flags_t is not 64 bits i want to know it. */ cannam@167: CK(sizeof(flags_t) == 8); cannam@167: cannam@167: CK(sizeof(md5uint) >= 4); cannam@167: #endif cannam@167: cannam@167: CK(sizeof(uintptr_t) >= sizeof(R *)); cannam@167: cannam@167: CK(sizeof(INT) >= sizeof(R *)); cannam@167: } cannam@167: cannam@167: void install_hook(void) cannam@167: { cannam@167: planner *plnr = X(the_planner)(); cannam@167: plnr->hook = hook; cannam@167: paranoid_checks(); cannam@167: } cannam@167: cannam@167: void uninstall_hook(void) cannam@167: { cannam@167: planner *plnr = X(the_planner)(); cannam@167: plnr->hook = 0; cannam@167: }