Chris@42
|
1 /* See bench.c. We keep a few common subroutines in this file so
|
Chris@42
|
2 that they can be re-used in the MPI test program. */
|
Chris@42
|
3
|
Chris@42
|
4 #include <math.h>
|
Chris@42
|
5 #include <stdio.h>
|
Chris@42
|
6 #include <string.h>
|
Chris@42
|
7 #include "fftw-bench.h"
|
Chris@42
|
8
|
Chris@42
|
9 /* define to enable code that traps floating-point exceptions.
|
Chris@42
|
10 Disabled by default because I don't want to worry about the
|
Chris@42
|
11 portability of such code. feenableexcept() seems to be a GNU
|
Chris@42
|
12 thing */
|
Chris@42
|
13 #undef TRAP_FP_EXCEPTIONS
|
Chris@42
|
14
|
Chris@42
|
15 #ifdef TRAP_FP_EXCEPTIONS
|
Chris@42
|
16 # include <signal.h>
|
Chris@42
|
17 # include <fenv.h>
|
Chris@42
|
18 #endif
|
Chris@42
|
19
|
Chris@42
|
20 #ifdef _OPENMP
|
Chris@42
|
21 # include <omp.h>
|
Chris@42
|
22 #endif
|
Chris@42
|
23
|
Chris@42
|
24 #ifdef HAVE_SMP
|
Chris@42
|
25 int threads_ok = 1;
|
Chris@42
|
26 #endif
|
Chris@42
|
27
|
Chris@42
|
28 FFTW(plan) the_plan = 0;
|
Chris@42
|
29
|
Chris@42
|
30 static const char *wisdat = "wis.dat";
|
Chris@42
|
31 unsigned the_flags = 0;
|
Chris@42
|
32 int paranoid = 0;
|
Chris@42
|
33 int usewisdom = 0;
|
Chris@42
|
34 int havewisdom = 0;
|
Chris@42
|
35 int nthreads = 1;
|
Chris@42
|
36 int amnesia = 0;
|
Chris@42
|
37
|
Chris@42
|
38 extern void install_hook(void); /* in hook.c */
|
Chris@42
|
39 extern void uninstall_hook(void); /* in hook.c */
|
Chris@42
|
40
|
Chris@42
|
41 #ifdef FFTW_RANDOM_ESTIMATOR
|
Chris@42
|
42 extern unsigned FFTW(random_estimate_seed);
|
Chris@42
|
43 #endif
|
Chris@42
|
44
|
Chris@42
|
45 #ifdef TRAP_FP_EXCEPTIONS
|
Chris@42
|
46 static void sigfpe_handler(int sig, siginfo_t *info, void *context)
|
Chris@42
|
47 {
|
Chris@42
|
48 /* fftw code is not supposed to generate FP exceptions */
|
Chris@42
|
49 UNUSED(sig); UNUSED(info); UNUSED(context);
|
Chris@42
|
50 fprintf(stderr, "caught FPE, aborting\n");
|
Chris@42
|
51 abort();
|
Chris@42
|
52 }
|
Chris@42
|
53
|
Chris@42
|
54 static void setup_sigfpe_handler(void)
|
Chris@42
|
55 {
|
Chris@42
|
56 struct sigaction a;
|
Chris@42
|
57 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW);
|
Chris@42
|
58 memset(&a, 0, sizeof(a));
|
Chris@42
|
59 a.sa_sigaction = sigfpe_handler;
|
Chris@42
|
60 a.sa_flags = SA_SIGINFO;
|
Chris@42
|
61 if (sigaction(SIGFPE, &a, NULL) == -1) {
|
Chris@42
|
62 fprintf(stderr, "cannot install sigfpe handler\n");
|
Chris@42
|
63 exit(1);
|
Chris@42
|
64 }
|
Chris@42
|
65 }
|
Chris@42
|
66 #else
|
Chris@42
|
67 static void setup_sigfpe_handler(void)
|
Chris@42
|
68 {
|
Chris@42
|
69 }
|
Chris@42
|
70 #endif
|
Chris@42
|
71
|
Chris@42
|
72 void useropt(const char *arg)
|
Chris@42
|
73 {
|
Chris@42
|
74 int x;
|
Chris@42
|
75 double y;
|
Chris@42
|
76
|
Chris@42
|
77 if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT;
|
Chris@42
|
78 else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE;
|
Chris@42
|
79 else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT;
|
Chris@42
|
80 else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE;
|
Chris@42
|
81 else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED;
|
Chris@42
|
82 else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD;
|
Chris@42
|
83 else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP;
|
Chris@42
|
84 else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY;
|
Chris@42
|
85 else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x;
|
Chris@42
|
86 else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x;
|
Chris@42
|
87 else if (!strcmp(arg, "paranoid")) paranoid = 1;
|
Chris@42
|
88 else if (!strcmp(arg, "wisdom")) usewisdom = 1;
|
Chris@42
|
89 else if (!strcmp(arg, "amnesia")) amnesia = 1;
|
Chris@42
|
90 else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x;
|
Chris@42
|
91 #ifdef FFTW_RANDOM_ESTIMATOR
|
Chris@42
|
92 else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x;
|
Chris@42
|
93 #endif
|
Chris@42
|
94 else if (sscanf(arg, "timelimit=%lg", &y) == 1) {
|
Chris@42
|
95 FFTW(set_timelimit)(y);
|
Chris@42
|
96 }
|
Chris@42
|
97
|
Chris@42
|
98 else fprintf(stderr, "unknown user option: %s. Ignoring.\n", arg);
|
Chris@42
|
99 }
|
Chris@42
|
100
|
Chris@42
|
101 void rdwisdom(void)
|
Chris@42
|
102 {
|
Chris@42
|
103 FILE *f;
|
Chris@42
|
104 double tim;
|
Chris@42
|
105 int success = 0;
|
Chris@42
|
106
|
Chris@42
|
107 if (havewisdom) return;
|
Chris@42
|
108
|
Chris@42
|
109 #ifdef HAVE_SMP
|
Chris@42
|
110 if (threads_ok) {
|
Chris@42
|
111 BENCH_ASSERT(FFTW(init_threads)());
|
Chris@42
|
112 FFTW(plan_with_nthreads)(nthreads);
|
Chris@42
|
113 FFTW(make_planner_thread_safe)();
|
Chris@42
|
114 #ifdef _OPENMP
|
Chris@42
|
115 omp_set_num_threads(nthreads);
|
Chris@42
|
116 #endif
|
Chris@42
|
117 }
|
Chris@42
|
118 else if (nthreads > 1 && verbose > 1) {
|
Chris@42
|
119 fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads);
|
Chris@42
|
120 nthreads = 1;
|
Chris@42
|
121 }
|
Chris@42
|
122 #endif
|
Chris@42
|
123
|
Chris@42
|
124 if (!usewisdom) return;
|
Chris@42
|
125
|
Chris@42
|
126 timer_start(USER_TIMER);
|
Chris@42
|
127 if ((f = fopen(wisdat, "r"))) {
|
Chris@42
|
128 if (!import_wisdom(f))
|
Chris@42
|
129 fprintf(stderr, "bench: ERROR reading wisdom\n");
|
Chris@42
|
130 else
|
Chris@42
|
131 success = 1;
|
Chris@42
|
132 fclose(f);
|
Chris@42
|
133 }
|
Chris@42
|
134 tim = timer_stop(USER_TIMER);
|
Chris@42
|
135
|
Chris@42
|
136 if (success) {
|
Chris@42
|
137 if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim);
|
Chris@42
|
138
|
Chris@42
|
139 if (verbose > 3)
|
Chris@42
|
140 export_wisdom(stdout);
|
Chris@42
|
141 if (verbose > 1)
|
Chris@42
|
142 printf("\n");
|
Chris@42
|
143 }
|
Chris@42
|
144 havewisdom = 1;
|
Chris@42
|
145 }
|
Chris@42
|
146
|
Chris@42
|
147 void wrwisdom(void)
|
Chris@42
|
148 {
|
Chris@42
|
149 FILE *f;
|
Chris@42
|
150 double tim;
|
Chris@42
|
151 if (!havewisdom) return;
|
Chris@42
|
152
|
Chris@42
|
153 timer_start(USER_TIMER);
|
Chris@42
|
154 if ((f = fopen(wisdat, "w"))) {
|
Chris@42
|
155 export_wisdom(f);
|
Chris@42
|
156 fclose(f);
|
Chris@42
|
157 }
|
Chris@42
|
158 tim = timer_stop(USER_TIMER);
|
Chris@42
|
159 if (verbose > 1) printf("write wisdom took %g seconds\n", tim);
|
Chris@42
|
160 }
|
Chris@42
|
161
|
Chris@42
|
162 static unsigned preserve_input_flags(bench_problem *p)
|
Chris@42
|
163 {
|
Chris@42
|
164 /*
|
Chris@42
|
165 * fftw3 cannot preserve input for multidimensional c2r transforms.
|
Chris@42
|
166 * Enforce FFTW_DESTROY_INPUT
|
Chris@42
|
167 */
|
Chris@42
|
168 if (p->kind == PROBLEM_REAL &&
|
Chris@42
|
169 p->sign > 0 &&
|
Chris@42
|
170 !p->in_place &&
|
Chris@42
|
171 p->sz->rnk > 1)
|
Chris@42
|
172 p->destroy_input = 1;
|
Chris@42
|
173
|
Chris@42
|
174 if (p->destroy_input)
|
Chris@42
|
175 return FFTW_DESTROY_INPUT;
|
Chris@42
|
176 else
|
Chris@42
|
177 return FFTW_PRESERVE_INPUT;
|
Chris@42
|
178 }
|
Chris@42
|
179
|
Chris@42
|
180 int can_do(bench_problem *p)
|
Chris@42
|
181 {
|
Chris@42
|
182 double tim;
|
Chris@42
|
183
|
Chris@42
|
184 if (verbose > 2 && p->pstring)
|
Chris@42
|
185 printf("Planning %s...\n", p->pstring);
|
Chris@42
|
186 rdwisdom();
|
Chris@42
|
187
|
Chris@42
|
188 timer_start(USER_TIMER);
|
Chris@42
|
189 the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE);
|
Chris@42
|
190 tim = timer_stop(USER_TIMER);
|
Chris@42
|
191 if (verbose > 2) printf("estimate-planner time: %g s\n", tim);
|
Chris@42
|
192
|
Chris@42
|
193 if (the_plan) {
|
Chris@42
|
194 FFTW(destroy_plan)(the_plan);
|
Chris@42
|
195 return 1;
|
Chris@42
|
196 }
|
Chris@42
|
197 return 0;
|
Chris@42
|
198 }
|
Chris@42
|
199
|
Chris@42
|
200 void setup(bench_problem *p)
|
Chris@42
|
201 {
|
Chris@42
|
202 double tim;
|
Chris@42
|
203
|
Chris@42
|
204 setup_sigfpe_handler();
|
Chris@42
|
205
|
Chris@42
|
206 if (amnesia) {
|
Chris@42
|
207 FFTW(forget_wisdom)();
|
Chris@42
|
208 havewisdom = 0;
|
Chris@42
|
209 }
|
Chris@42
|
210
|
Chris@42
|
211 /* Regression test: check that fftw_malloc exists and links
|
Chris@42
|
212 * properly */
|
Chris@42
|
213 {
|
Chris@42
|
214 void *ptr = FFTW(malloc(42));
|
Chris@42
|
215 BENCH_ASSERT(FFTW(alignment_of)(ptr) == 0);
|
Chris@42
|
216 FFTW(free(ptr));
|
Chris@42
|
217 }
|
Chris@42
|
218
|
Chris@42
|
219 rdwisdom();
|
Chris@42
|
220 install_hook();
|
Chris@42
|
221
|
Chris@42
|
222 #ifdef HAVE_SMP
|
Chris@42
|
223 if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads);
|
Chris@42
|
224 #endif
|
Chris@42
|
225
|
Chris@42
|
226 timer_start(USER_TIMER);
|
Chris@42
|
227 the_plan = mkplan(p, preserve_input_flags(p) | the_flags);
|
Chris@42
|
228 tim = timer_stop(USER_TIMER);
|
Chris@42
|
229 if (verbose > 1) printf("planner time: %g s\n", tim);
|
Chris@42
|
230
|
Chris@42
|
231 BENCH_ASSERT(the_plan);
|
Chris@42
|
232
|
Chris@42
|
233 {
|
Chris@42
|
234 double add, mul, nfma, cost, pcost;
|
Chris@42
|
235 FFTW(flops)(the_plan, &add, &mul, &nfma);
|
Chris@42
|
236 cost = FFTW(estimate_cost)(the_plan);
|
Chris@42
|
237 pcost = FFTW(cost)(the_plan);
|
Chris@42
|
238 if (verbose > 1) {
|
Chris@42
|
239 FFTW(print_plan)(the_plan);
|
Chris@42
|
240 printf("\n");
|
Chris@42
|
241 printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n",
|
Chris@42
|
242 add, mul, nfma);
|
Chris@42
|
243 printf("estimated cost: %f, pcost = %f\n", cost, pcost);
|
Chris@42
|
244 }
|
Chris@42
|
245 }
|
Chris@42
|
246 }
|
Chris@42
|
247
|
Chris@42
|
248
|
Chris@42
|
249 void doit(int iter, bench_problem *p)
|
Chris@42
|
250 {
|
Chris@42
|
251 int i;
|
Chris@42
|
252 FFTW(plan) q = the_plan;
|
Chris@42
|
253
|
Chris@42
|
254 UNUSED(p);
|
Chris@42
|
255 for (i = 0; i < iter; ++i)
|
Chris@42
|
256 FFTW(execute)(q);
|
Chris@42
|
257 }
|
Chris@42
|
258
|
Chris@42
|
259 void done(bench_problem *p)
|
Chris@42
|
260 {
|
Chris@42
|
261 UNUSED(p);
|
Chris@42
|
262
|
Chris@42
|
263 FFTW(destroy_plan)(the_plan);
|
Chris@42
|
264 uninstall_hook();
|
Chris@42
|
265 }
|
Chris@42
|
266
|
Chris@42
|
267 void cleanup(void)
|
Chris@42
|
268 {
|
Chris@42
|
269 initial_cleanup();
|
Chris@42
|
270
|
Chris@42
|
271 wrwisdom();
|
Chris@42
|
272 #ifdef HAVE_SMP
|
Chris@42
|
273 FFTW(cleanup_threads)();
|
Chris@42
|
274 #else
|
Chris@42
|
275 FFTW(cleanup)();
|
Chris@42
|
276 #endif
|
Chris@42
|
277
|
Chris@42
|
278 # ifdef FFTW_DEBUG_MALLOC
|
Chris@42
|
279 {
|
Chris@42
|
280 /* undocumented memory checker */
|
Chris@42
|
281 FFTW_EXTERN void FFTW(malloc_print_minfo)(int v);
|
Chris@42
|
282 FFTW(malloc_print_minfo)(verbose);
|
Chris@42
|
283 }
|
Chris@42
|
284 # endif
|
Chris@42
|
285
|
Chris@42
|
286 final_cleanup();
|
Chris@42
|
287 }
|