Chris@10
|
1 /* See bench.c. We keep a few common subroutines in this file so
|
Chris@10
|
2 that they can be re-used in the MPI test program. */
|
Chris@10
|
3
|
Chris@10
|
4 #include <math.h>
|
Chris@10
|
5 #include <stdio.h>
|
Chris@10
|
6 #include <string.h>
|
Chris@10
|
7 #include "fftw-bench.h"
|
Chris@10
|
8
|
Chris@10
|
9 #ifdef _OPENMP
|
Chris@10
|
10 # include <omp.h>
|
Chris@10
|
11 #endif
|
Chris@10
|
12
|
Chris@10
|
13 #ifdef HAVE_SMP
|
Chris@10
|
14 int threads_ok = 1;
|
Chris@10
|
15 #endif
|
Chris@10
|
16
|
Chris@10
|
17 FFTW(plan) the_plan = 0;
|
Chris@10
|
18
|
Chris@10
|
19 static const char *wisdat = "wis.dat";
|
Chris@10
|
20 unsigned the_flags = 0;
|
Chris@10
|
21 int paranoid = 0;
|
Chris@10
|
22 int usewisdom = 0;
|
Chris@10
|
23 int havewisdom = 0;
|
Chris@10
|
24 int nthreads = 1;
|
Chris@10
|
25 int amnesia = 0;
|
Chris@10
|
26
|
Chris@10
|
27 extern void install_hook(void); /* in hook.c */
|
Chris@10
|
28 extern void uninstall_hook(void); /* in hook.c */
|
Chris@10
|
29
|
Chris@10
|
30 #ifdef FFTW_RANDOM_ESTIMATOR
|
Chris@10
|
31 extern unsigned FFTW(random_estimate_seed);
|
Chris@10
|
32 #endif
|
Chris@10
|
33
|
Chris@10
|
34 void useropt(const char *arg)
|
Chris@10
|
35 {
|
Chris@10
|
36 int x;
|
Chris@10
|
37 double y;
|
Chris@10
|
38
|
Chris@10
|
39 if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT;
|
Chris@10
|
40 else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE;
|
Chris@10
|
41 else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT;
|
Chris@10
|
42 else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE;
|
Chris@10
|
43 else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED;
|
Chris@10
|
44 else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD;
|
Chris@10
|
45 else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP;
|
Chris@10
|
46 else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY;
|
Chris@10
|
47 else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x;
|
Chris@10
|
48 else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x;
|
Chris@10
|
49 else if (!strcmp(arg, "paranoid")) paranoid = 1;
|
Chris@10
|
50 else if (!strcmp(arg, "wisdom")) usewisdom = 1;
|
Chris@10
|
51 else if (!strcmp(arg, "amnesia")) amnesia = 1;
|
Chris@10
|
52 else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x;
|
Chris@10
|
53 #ifdef FFTW_RANDOM_ESTIMATOR
|
Chris@10
|
54 else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x;
|
Chris@10
|
55 #endif
|
Chris@10
|
56 else if (sscanf(arg, "timelimit=%lg", &y) == 1) {
|
Chris@10
|
57 FFTW(set_timelimit)(y);
|
Chris@10
|
58 }
|
Chris@10
|
59
|
Chris@10
|
60 else fprintf(stderr, "unknown user option: %s. Ignoring.\n", arg);
|
Chris@10
|
61 }
|
Chris@10
|
62
|
Chris@10
|
63 void rdwisdom(void)
|
Chris@10
|
64 {
|
Chris@10
|
65 FILE *f;
|
Chris@10
|
66 double tim;
|
Chris@10
|
67 int success = 0;
|
Chris@10
|
68
|
Chris@10
|
69 if (havewisdom) return;
|
Chris@10
|
70
|
Chris@10
|
71 #ifdef HAVE_SMP
|
Chris@10
|
72 if (threads_ok) {
|
Chris@10
|
73 BENCH_ASSERT(FFTW(init_threads)());
|
Chris@10
|
74 FFTW(plan_with_nthreads)(nthreads);
|
Chris@10
|
75 #ifdef _OPENMP
|
Chris@10
|
76 omp_set_num_threads(nthreads);
|
Chris@10
|
77 #endif
|
Chris@10
|
78 }
|
Chris@10
|
79 else if (nthreads > 1 && verbose > 1) {
|
Chris@10
|
80 fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads);
|
Chris@10
|
81 nthreads = 1;
|
Chris@10
|
82 }
|
Chris@10
|
83 #endif
|
Chris@10
|
84
|
Chris@10
|
85 if (!usewisdom) return;
|
Chris@10
|
86
|
Chris@10
|
87 timer_start(USER_TIMER);
|
Chris@10
|
88 if ((f = fopen(wisdat, "r"))) {
|
Chris@10
|
89 if (!import_wisdom(f))
|
Chris@10
|
90 fprintf(stderr, "bench: ERROR reading wisdom\n");
|
Chris@10
|
91 else
|
Chris@10
|
92 success = 1;
|
Chris@10
|
93 fclose(f);
|
Chris@10
|
94 }
|
Chris@10
|
95 tim = timer_stop(USER_TIMER);
|
Chris@10
|
96
|
Chris@10
|
97 if (success) {
|
Chris@10
|
98 if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim);
|
Chris@10
|
99
|
Chris@10
|
100 if (verbose > 3)
|
Chris@10
|
101 export_wisdom(stdout);
|
Chris@10
|
102 if (verbose > 1)
|
Chris@10
|
103 printf("\n");
|
Chris@10
|
104 }
|
Chris@10
|
105 havewisdom = 1;
|
Chris@10
|
106 }
|
Chris@10
|
107
|
Chris@10
|
108 void wrwisdom(void)
|
Chris@10
|
109 {
|
Chris@10
|
110 FILE *f;
|
Chris@10
|
111 double tim;
|
Chris@10
|
112 if (!havewisdom) return;
|
Chris@10
|
113
|
Chris@10
|
114 timer_start(USER_TIMER);
|
Chris@10
|
115 if ((f = fopen(wisdat, "w"))) {
|
Chris@10
|
116 export_wisdom(f);
|
Chris@10
|
117 fclose(f);
|
Chris@10
|
118 }
|
Chris@10
|
119 tim = timer_stop(USER_TIMER);
|
Chris@10
|
120 if (verbose > 1) printf("write wisdom took %g seconds\n", tim);
|
Chris@10
|
121 }
|
Chris@10
|
122
|
Chris@10
|
123 static unsigned preserve_input_flags(bench_problem *p)
|
Chris@10
|
124 {
|
Chris@10
|
125 /*
|
Chris@10
|
126 * fftw3 cannot preserve input for multidimensional c2r transforms.
|
Chris@10
|
127 * Enforce FFTW_DESTROY_INPUT
|
Chris@10
|
128 */
|
Chris@10
|
129 if (p->kind == PROBLEM_REAL &&
|
Chris@10
|
130 p->sign > 0 &&
|
Chris@10
|
131 !p->in_place &&
|
Chris@10
|
132 p->sz->rnk > 1)
|
Chris@10
|
133 p->destroy_input = 1;
|
Chris@10
|
134
|
Chris@10
|
135 if (p->destroy_input)
|
Chris@10
|
136 return FFTW_DESTROY_INPUT;
|
Chris@10
|
137 else
|
Chris@10
|
138 return FFTW_PRESERVE_INPUT;
|
Chris@10
|
139 }
|
Chris@10
|
140
|
Chris@10
|
141 int can_do(bench_problem *p)
|
Chris@10
|
142 {
|
Chris@10
|
143 double tim;
|
Chris@10
|
144
|
Chris@10
|
145 if (verbose > 2 && p->pstring)
|
Chris@10
|
146 printf("Planning %s...\n", p->pstring);
|
Chris@10
|
147 rdwisdom();
|
Chris@10
|
148
|
Chris@10
|
149 timer_start(USER_TIMER);
|
Chris@10
|
150 the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE);
|
Chris@10
|
151 tim = timer_stop(USER_TIMER);
|
Chris@10
|
152 if (verbose > 2) printf("estimate-planner time: %g s\n", tim);
|
Chris@10
|
153
|
Chris@10
|
154 if (the_plan) {
|
Chris@10
|
155 FFTW(destroy_plan)(the_plan);
|
Chris@10
|
156 return 1;
|
Chris@10
|
157 }
|
Chris@10
|
158 return 0;
|
Chris@10
|
159 }
|
Chris@10
|
160
|
Chris@10
|
161 void setup(bench_problem *p)
|
Chris@10
|
162 {
|
Chris@10
|
163 double tim;
|
Chris@10
|
164
|
Chris@10
|
165 if (amnesia) {
|
Chris@10
|
166 FFTW(forget_wisdom)();
|
Chris@10
|
167 havewisdom = 0;
|
Chris@10
|
168 }
|
Chris@10
|
169
|
Chris@10
|
170 /* Regression test: check that fftw_malloc exists and links
|
Chris@10
|
171 * properly */
|
Chris@10
|
172 FFTW(free(FFTW(malloc(42))));
|
Chris@10
|
173
|
Chris@10
|
174 rdwisdom();
|
Chris@10
|
175 install_hook();
|
Chris@10
|
176
|
Chris@10
|
177 #ifdef HAVE_SMP
|
Chris@10
|
178 if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads);
|
Chris@10
|
179 #endif
|
Chris@10
|
180
|
Chris@10
|
181 timer_start(USER_TIMER);
|
Chris@10
|
182 the_plan = mkplan(p, preserve_input_flags(p) | the_flags);
|
Chris@10
|
183 tim = timer_stop(USER_TIMER);
|
Chris@10
|
184 if (verbose > 1) printf("planner time: %g s\n", tim);
|
Chris@10
|
185
|
Chris@10
|
186 BENCH_ASSERT(the_plan);
|
Chris@10
|
187
|
Chris@10
|
188 {
|
Chris@10
|
189 double add, mul, nfma, cost, pcost;
|
Chris@10
|
190 FFTW(flops)(the_plan, &add, &mul, &nfma);
|
Chris@10
|
191 cost = FFTW(estimate_cost)(the_plan);
|
Chris@10
|
192 pcost = FFTW(cost)(the_plan);
|
Chris@10
|
193 if (verbose > 1) {
|
Chris@10
|
194 FFTW(print_plan)(the_plan);
|
Chris@10
|
195 printf("\n");
|
Chris@10
|
196 printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n",
|
Chris@10
|
197 add, mul, nfma);
|
Chris@10
|
198 printf("estimated cost: %f, pcost = %f\n", cost, pcost);
|
Chris@10
|
199 }
|
Chris@10
|
200 }
|
Chris@10
|
201 }
|
Chris@10
|
202
|
Chris@10
|
203
|
Chris@10
|
204 void doit(int iter, bench_problem *p)
|
Chris@10
|
205 {
|
Chris@10
|
206 int i;
|
Chris@10
|
207 FFTW(plan) q = the_plan;
|
Chris@10
|
208
|
Chris@10
|
209 UNUSED(p);
|
Chris@10
|
210 for (i = 0; i < iter; ++i)
|
Chris@10
|
211 FFTW(execute)(q);
|
Chris@10
|
212 }
|
Chris@10
|
213
|
Chris@10
|
214 void done(bench_problem *p)
|
Chris@10
|
215 {
|
Chris@10
|
216 UNUSED(p);
|
Chris@10
|
217
|
Chris@10
|
218 FFTW(destroy_plan)(the_plan);
|
Chris@10
|
219 uninstall_hook();
|
Chris@10
|
220 }
|
Chris@10
|
221
|
Chris@10
|
222 void cleanup(void)
|
Chris@10
|
223 {
|
Chris@10
|
224 initial_cleanup();
|
Chris@10
|
225
|
Chris@10
|
226 wrwisdom();
|
Chris@10
|
227 #ifdef HAVE_SMP
|
Chris@10
|
228 FFTW(cleanup_threads)();
|
Chris@10
|
229 #else
|
Chris@10
|
230 FFTW(cleanup)();
|
Chris@10
|
231 #endif
|
Chris@10
|
232
|
Chris@10
|
233 # ifdef FFTW_DEBUG_MALLOC
|
Chris@10
|
234 {
|
Chris@10
|
235 /* undocumented memory checker */
|
Chris@10
|
236 FFTW_EXTERN void FFTW(malloc_print_minfo)(int v);
|
Chris@10
|
237 FFTW(malloc_print_minfo)(verbose);
|
Chris@10
|
238 }
|
Chris@10
|
239 # endif
|
Chris@10
|
240
|
Chris@10
|
241 final_cleanup();
|
Chris@10
|
242 }
|