cannam@167: /* See bench.c. We keep a few common subroutines in this file so cannam@167: that they can be re-used in the MPI test program. */ cannam@167: cannam@167: #include cannam@167: #include cannam@167: #include cannam@167: #include "tests/fftw-bench.h" cannam@167: cannam@167: /* define to enable code that traps floating-point exceptions. cannam@167: Disabled by default because I don't want to worry about the cannam@167: portability of such code. feenableexcept() seems to be a GNU cannam@167: thing */ cannam@167: #undef TRAP_FP_EXCEPTIONS cannam@167: cannam@167: #ifdef TRAP_FP_EXCEPTIONS cannam@167: # include cannam@167: # include cannam@167: #endif cannam@167: cannam@167: #ifdef _OPENMP cannam@167: # include cannam@167: #endif cannam@167: cannam@167: #ifdef HAVE_SMP cannam@167: int threads_ok = 1; cannam@167: #endif cannam@167: cannam@167: FFTW(plan) the_plan = 0; cannam@167: cannam@167: static const char *wisdat = "wis.dat"; cannam@167: unsigned the_flags = 0; cannam@167: int paranoid = 0; cannam@167: int usewisdom = 0; cannam@167: int havewisdom = 0; cannam@167: int nthreads = 1; cannam@167: int amnesia = 0; cannam@167: cannam@167: extern void install_hook(void); /* in hook.c */ cannam@167: extern void uninstall_hook(void); /* in hook.c */ cannam@167: cannam@167: #ifdef FFTW_RANDOM_ESTIMATOR cannam@167: extern unsigned FFTW(random_estimate_seed); cannam@167: #endif cannam@167: cannam@167: #ifdef TRAP_FP_EXCEPTIONS cannam@167: static void sigfpe_handler(int sig, siginfo_t *info, void *context) cannam@167: { cannam@167: /* fftw code is not supposed to generate FP exceptions */ cannam@167: UNUSED(sig); UNUSED(info); UNUSED(context); cannam@167: fprintf(stderr, "caught FPE, aborting\n"); cannam@167: abort(); cannam@167: } cannam@167: cannam@167: static void setup_sigfpe_handler(void) cannam@167: { cannam@167: struct sigaction a; cannam@167: feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW); cannam@167: memset(&a, 0, sizeof(a)); cannam@167: a.sa_sigaction = sigfpe_handler; cannam@167: a.sa_flags = SA_SIGINFO; cannam@167: if (sigaction(SIGFPE, &a, NULL) == -1) { cannam@167: fprintf(stderr, "cannot install sigfpe handler\n"); cannam@167: exit(1); cannam@167: } cannam@167: } cannam@167: #else cannam@167: static void setup_sigfpe_handler(void) cannam@167: { cannam@167: } cannam@167: #endif cannam@167: cannam@167: void useropt(const char *arg) cannam@167: { cannam@167: int x; cannam@167: double y; cannam@167: cannam@167: if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT; cannam@167: else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE; cannam@167: else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT; cannam@167: else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE; cannam@167: else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED; cannam@167: else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD; cannam@167: else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP; cannam@167: else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY; cannam@167: else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x; cannam@167: else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x; cannam@167: else if (!strcmp(arg, "paranoid")) paranoid = 1; cannam@167: else if (!strcmp(arg, "wisdom")) usewisdom = 1; cannam@167: else if (!strcmp(arg, "amnesia")) amnesia = 1; cannam@167: else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x; cannam@167: #ifdef FFTW_RANDOM_ESTIMATOR cannam@167: else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x; cannam@167: #endif cannam@167: else if (sscanf(arg, "timelimit=%lg", &y) == 1) { cannam@167: FFTW(set_timelimit)(y); cannam@167: } cannam@167: cannam@167: else fprintf(stderr, "unknown user option: %s. Ignoring.\n", arg); cannam@167: } cannam@167: cannam@167: void rdwisdom(void) cannam@167: { cannam@167: FILE *f; cannam@167: double tim; cannam@167: int success = 0; cannam@167: cannam@167: if (havewisdom) return; cannam@167: cannam@167: #ifdef HAVE_SMP cannam@167: if (threads_ok) { cannam@167: BENCH_ASSERT(FFTW(init_threads)()); cannam@167: FFTW(plan_with_nthreads)(nthreads); cannam@167: FFTW(make_planner_thread_safe)(); cannam@167: #ifdef _OPENMP cannam@167: omp_set_num_threads(nthreads); cannam@167: #endif cannam@167: } cannam@167: else if (nthreads > 1 && verbose > 1) { cannam@167: fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads); cannam@167: nthreads = 1; cannam@167: } cannam@167: #endif cannam@167: cannam@167: if (!usewisdom) return; cannam@167: cannam@167: timer_start(USER_TIMER); cannam@167: if ((f = fopen(wisdat, "r"))) { cannam@167: if (!import_wisdom(f)) cannam@167: fprintf(stderr, "bench: ERROR reading wisdom\n"); cannam@167: else cannam@167: success = 1; cannam@167: fclose(f); cannam@167: } cannam@167: tim = timer_stop(USER_TIMER); cannam@167: cannam@167: if (success) { cannam@167: if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim); cannam@167: cannam@167: if (verbose > 3) cannam@167: export_wisdom(stdout); cannam@167: if (verbose > 1) cannam@167: printf("\n"); cannam@167: } cannam@167: havewisdom = 1; cannam@167: } cannam@167: cannam@167: void wrwisdom(void) cannam@167: { cannam@167: FILE *f; cannam@167: double tim; cannam@167: if (!havewisdom) return; cannam@167: cannam@167: timer_start(USER_TIMER); cannam@167: if ((f = fopen(wisdat, "w"))) { cannam@167: export_wisdom(f); cannam@167: fclose(f); cannam@167: } cannam@167: tim = timer_stop(USER_TIMER); cannam@167: if (verbose > 1) printf("write wisdom took %g seconds\n", tim); cannam@167: } cannam@167: cannam@167: static unsigned preserve_input_flags(bench_problem *p) cannam@167: { cannam@167: /* cannam@167: * fftw3 cannot preserve input for multidimensional c2r transforms. cannam@167: * Enforce FFTW_DESTROY_INPUT cannam@167: */ cannam@167: if (p->kind == PROBLEM_REAL && cannam@167: p->sign > 0 && cannam@167: !p->in_place && cannam@167: p->sz->rnk > 1) cannam@167: p->destroy_input = 1; cannam@167: cannam@167: if (p->destroy_input) cannam@167: return FFTW_DESTROY_INPUT; cannam@167: else cannam@167: return FFTW_PRESERVE_INPUT; cannam@167: } cannam@167: cannam@167: int can_do(bench_problem *p) cannam@167: { cannam@167: double tim; cannam@167: cannam@167: if (verbose > 2 && p->pstring) cannam@167: printf("Planning %s...\n", p->pstring); cannam@167: rdwisdom(); cannam@167: cannam@167: timer_start(USER_TIMER); cannam@167: the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE); cannam@167: tim = timer_stop(USER_TIMER); cannam@167: if (verbose > 2) printf("estimate-planner time: %g s\n", tim); cannam@167: cannam@167: if (the_plan) { cannam@167: FFTW(destroy_plan)(the_plan); cannam@167: return 1; cannam@167: } cannam@167: return 0; cannam@167: } cannam@167: cannam@167: void setup(bench_problem *p) cannam@167: { cannam@167: double tim; cannam@167: cannam@167: setup_sigfpe_handler(); cannam@167: cannam@167: if (amnesia) { cannam@167: FFTW(forget_wisdom)(); cannam@167: havewisdom = 0; cannam@167: } cannam@167: cannam@167: /* Regression test: check that fftw_malloc exists and links cannam@167: * properly */ cannam@167: { cannam@167: void *ptr = FFTW(malloc(42)); cannam@167: BENCH_ASSERT(FFTW(alignment_of)(ptr) == 0); cannam@167: FFTW(free(ptr)); cannam@167: } cannam@167: cannam@167: rdwisdom(); cannam@167: install_hook(); cannam@167: cannam@167: #ifdef HAVE_SMP cannam@167: if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads); cannam@167: #endif cannam@167: cannam@167: timer_start(USER_TIMER); cannam@167: the_plan = mkplan(p, preserve_input_flags(p) | the_flags); cannam@167: tim = timer_stop(USER_TIMER); cannam@167: if (verbose > 1) printf("planner time: %g s\n", tim); cannam@167: cannam@167: BENCH_ASSERT(the_plan); cannam@167: cannam@167: { cannam@167: double add, mul, nfma, cost, pcost; cannam@167: FFTW(flops)(the_plan, &add, &mul, &nfma); cannam@167: cost = FFTW(estimate_cost)(the_plan); cannam@167: pcost = FFTW(cost)(the_plan); cannam@167: if (verbose > 1) { cannam@167: FFTW(print_plan)(the_plan); cannam@167: printf("\n"); cannam@167: printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n", cannam@167: add, mul, nfma); cannam@167: printf("estimated cost: %f, pcost = %f\n", cost, pcost); cannam@167: } cannam@167: } cannam@167: } cannam@167: cannam@167: cannam@167: void doit(int iter, bench_problem *p) cannam@167: { cannam@167: int i; cannam@167: FFTW(plan) q = the_plan; cannam@167: cannam@167: UNUSED(p); cannam@167: for (i = 0; i < iter; ++i) cannam@167: FFTW(execute)(q); cannam@167: } cannam@167: cannam@167: void done(bench_problem *p) cannam@167: { cannam@167: UNUSED(p); cannam@167: cannam@167: FFTW(destroy_plan)(the_plan); cannam@167: uninstall_hook(); cannam@167: } cannam@167: cannam@167: void cleanup(void) cannam@167: { cannam@167: initial_cleanup(); cannam@167: cannam@167: wrwisdom(); cannam@167: #ifdef HAVE_SMP cannam@167: FFTW(cleanup_threads)(); cannam@167: #else cannam@167: FFTW(cleanup)(); cannam@167: #endif cannam@167: cannam@167: # ifdef FFTW_DEBUG_MALLOC cannam@167: { cannam@167: /* undocumented memory checker */ cannam@167: FFTW_EXTERN void FFTW(malloc_print_minfo)(int v); cannam@167: FFTW(malloc_print_minfo)(verbose); cannam@167: } cannam@167: # endif cannam@167: cannam@167: final_cleanup(); cannam@167: }