Chris@42
|
1 /**************************************************************************/
|
Chris@42
|
2 /* NOTE to users: this is the FFTW self-test and benchmark program.
|
Chris@42
|
3 It is probably NOT a good place to learn FFTW usage, since it has a
|
Chris@42
|
4 lot of added complexity in order to exercise and test the full API,
|
Chris@42
|
5 etcetera. We suggest reading the manual.
|
Chris@42
|
6
|
Chris@42
|
7 (Some of the self-test code is split off into fftw-bench.c and
|
Chris@42
|
8 hook.c.) */
|
Chris@42
|
9 /**************************************************************************/
|
Chris@42
|
10
|
Chris@42
|
11 #include <math.h>
|
Chris@42
|
12 #include <stdio.h>
|
Chris@42
|
13 #include <string.h>
|
Chris@42
|
14 #include "fftw-bench.h"
|
Chris@42
|
15
|
Chris@42
|
16 static const char *mkversion(void) { return FFTW(version); }
|
Chris@42
|
17 static const char *mkcc(void) { return FFTW(cc); }
|
Chris@42
|
18 static const char *mkcodelet_optim(void) { return FFTW(codelet_optim); }
|
Chris@42
|
19
|
Chris@42
|
20 BEGIN_BENCH_DOC
|
Chris@42
|
21 BENCH_DOC("name", "fftw3")
|
Chris@42
|
22 BENCH_DOCF("version", mkversion)
|
Chris@42
|
23 BENCH_DOCF("cc", mkcc)
|
Chris@42
|
24 BENCH_DOCF("codelet-optim", mkcodelet_optim)
|
Chris@42
|
25 END_BENCH_DOC
|
Chris@42
|
26
|
Chris@42
|
27 static FFTW(iodim) *bench_tensor_to_fftw_iodim(bench_tensor *t)
|
Chris@42
|
28 {
|
Chris@42
|
29 FFTW(iodim) *d;
|
Chris@42
|
30 int i;
|
Chris@42
|
31
|
Chris@42
|
32 BENCH_ASSERT(t->rnk >= 0);
|
Chris@42
|
33 if (t->rnk == 0) return 0;
|
Chris@42
|
34
|
Chris@42
|
35 d = (FFTW(iodim) *)bench_malloc(sizeof(FFTW(iodim)) * t->rnk);
|
Chris@42
|
36 for (i = 0; i < t->rnk; ++i) {
|
Chris@42
|
37 d[i].n = t->dims[i].n;
|
Chris@42
|
38 d[i].is = t->dims[i].is;
|
Chris@42
|
39 d[i].os = t->dims[i].os;
|
Chris@42
|
40 }
|
Chris@42
|
41
|
Chris@42
|
42 return d;
|
Chris@42
|
43 }
|
Chris@42
|
44
|
Chris@42
|
45 static void extract_reim_split(int sign, int size, bench_real *p,
|
Chris@42
|
46 bench_real **r, bench_real **i)
|
Chris@42
|
47 {
|
Chris@42
|
48 if (sign == FFTW_FORWARD) {
|
Chris@42
|
49 *r = p + 0;
|
Chris@42
|
50 *i = p + size;
|
Chris@42
|
51 } else {
|
Chris@42
|
52 *r = p + size;
|
Chris@42
|
53 *i = p + 0;
|
Chris@42
|
54 }
|
Chris@42
|
55 }
|
Chris@42
|
56
|
Chris@42
|
57 static int sizeof_problem(bench_problem *p)
|
Chris@42
|
58 {
|
Chris@42
|
59 return tensor_sz(p->sz) * tensor_sz(p->vecsz);
|
Chris@42
|
60 }
|
Chris@42
|
61
|
Chris@42
|
62 /* ouch */
|
Chris@42
|
63 static int expressible_as_api_many(bench_tensor *t)
|
Chris@42
|
64 {
|
Chris@42
|
65 int i;
|
Chris@42
|
66
|
Chris@42
|
67 BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
Chris@42
|
68
|
Chris@42
|
69 i = t->rnk - 1;
|
Chris@42
|
70 while (--i >= 0) {
|
Chris@42
|
71 bench_iodim *d = t->dims + i;
|
Chris@42
|
72 if (d[0].is % d[1].is) return 0;
|
Chris@42
|
73 if (d[0].os % d[1].os) return 0;
|
Chris@42
|
74 }
|
Chris@42
|
75 return 1;
|
Chris@42
|
76 }
|
Chris@42
|
77
|
Chris@42
|
78 static int *mkn(bench_tensor *t)
|
Chris@42
|
79 {
|
Chris@42
|
80 int *n = (int *) bench_malloc(sizeof(int *) * t->rnk);
|
Chris@42
|
81 int i;
|
Chris@42
|
82 for (i = 0; i < t->rnk; ++i)
|
Chris@42
|
83 n[i] = t->dims[i].n;
|
Chris@42
|
84 return n;
|
Chris@42
|
85 }
|
Chris@42
|
86
|
Chris@42
|
87 static void mknembed_many(bench_tensor *t, int **inembedp, int **onembedp)
|
Chris@42
|
88 {
|
Chris@42
|
89 int i;
|
Chris@42
|
90 bench_iodim *d;
|
Chris@42
|
91 int *inembed = (int *) bench_malloc(sizeof(int *) * t->rnk);
|
Chris@42
|
92 int *onembed = (int *) bench_malloc(sizeof(int *) * t->rnk);
|
Chris@42
|
93
|
Chris@42
|
94 BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
Chris@42
|
95 *inembedp = inembed; *onembedp = onembed;
|
Chris@42
|
96
|
Chris@42
|
97 i = t->rnk - 1;
|
Chris@42
|
98 while (--i >= 0) {
|
Chris@42
|
99 d = t->dims + i;
|
Chris@42
|
100 inembed[i+1] = d[0].is / d[1].is;
|
Chris@42
|
101 onembed[i+1] = d[0].os / d[1].os;
|
Chris@42
|
102 }
|
Chris@42
|
103 }
|
Chris@42
|
104
|
Chris@42
|
105 /* try to use the most appropriate API function. Big mess. */
|
Chris@42
|
106
|
Chris@42
|
107 static int imax(int a, int b) { return (a > b ? a : b); }
|
Chris@42
|
108
|
Chris@42
|
109 static int halfish_sizeof_problem(bench_problem *p)
|
Chris@42
|
110 {
|
Chris@42
|
111 int n2 = sizeof_problem(p);
|
Chris@42
|
112 if (BENCH_FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0)
|
Chris@42
|
113 n2 = (n2 / imax(p->sz->dims[p->sz->rnk - 1].n, 1)) *
|
Chris@42
|
114 (p->sz->dims[p->sz->rnk - 1].n / 2 + 1);
|
Chris@42
|
115 return n2;
|
Chris@42
|
116 }
|
Chris@42
|
117
|
Chris@42
|
118 static FFTW(plan) mkplan_real_split(bench_problem *p, unsigned flags)
|
Chris@42
|
119 {
|
Chris@42
|
120 FFTW(plan) pln;
|
Chris@42
|
121 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
Chris@42
|
122 FFTW(iodim) *dims, *howmany_dims;
|
Chris@42
|
123 bench_real *ri, *ii, *ro, *io;
|
Chris@42
|
124 int n2 = halfish_sizeof_problem(p);
|
Chris@42
|
125
|
Chris@42
|
126 extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->in, &ri, &ii);
|
Chris@42
|
127 extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->out, &ro, &io);
|
Chris@42
|
128
|
Chris@42
|
129 dims = bench_tensor_to_fftw_iodim(sz);
|
Chris@42
|
130 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
Chris@42
|
131 if (p->sign < 0) {
|
Chris@42
|
132 if (verbose > 2) printf("using plan_guru_split_dft_r2c\n");
|
Chris@42
|
133 pln = FFTW(plan_guru_split_dft_r2c)(sz->rnk, dims,
|
Chris@42
|
134 vecsz->rnk, howmany_dims,
|
Chris@42
|
135 ri, ro, io, flags);
|
Chris@42
|
136 }
|
Chris@42
|
137 else {
|
Chris@42
|
138 if (verbose > 2) printf("using plan_guru_split_dft_c2r\n");
|
Chris@42
|
139 pln = FFTW(plan_guru_split_dft_c2r)(sz->rnk, dims,
|
Chris@42
|
140 vecsz->rnk, howmany_dims,
|
Chris@42
|
141 ri, ii, ro, flags);
|
Chris@42
|
142 }
|
Chris@42
|
143 bench_free(dims);
|
Chris@42
|
144 bench_free(howmany_dims);
|
Chris@42
|
145 return pln;
|
Chris@42
|
146 }
|
Chris@42
|
147
|
Chris@42
|
148 static FFTW(plan) mkplan_real_interleaved(bench_problem *p, unsigned flags)
|
Chris@42
|
149 {
|
Chris@42
|
150 FFTW(plan) pln;
|
Chris@42
|
151 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
Chris@42
|
152
|
Chris@42
|
153 if (vecsz->rnk == 0 && tensor_unitstridep(sz)
|
Chris@42
|
154 && tensor_real_rowmajorp(sz, p->sign, p->in_place))
|
Chris@42
|
155 goto api_simple;
|
Chris@42
|
156
|
Chris@42
|
157 if (vecsz->rnk == 1 && expressible_as_api_many(sz))
|
Chris@42
|
158 goto api_many;
|
Chris@42
|
159
|
Chris@42
|
160 goto api_guru;
|
Chris@42
|
161
|
Chris@42
|
162 api_simple:
|
Chris@42
|
163 switch (sz->rnk) {
|
Chris@42
|
164 case 1:
|
Chris@42
|
165 if (p->sign < 0) {
|
Chris@42
|
166 if (verbose > 2) printf("using plan_dft_r2c_1d\n");
|
Chris@42
|
167 return FFTW(plan_dft_r2c_1d)(sz->dims[0].n,
|
Chris@42
|
168 (bench_real *) p->in,
|
Chris@42
|
169 (bench_complex *) p->out,
|
Chris@42
|
170 flags);
|
Chris@42
|
171 }
|
Chris@42
|
172 else {
|
Chris@42
|
173 if (verbose > 2) printf("using plan_dft_c2r_1d\n");
|
Chris@42
|
174 return FFTW(plan_dft_c2r_1d)(sz->dims[0].n,
|
Chris@42
|
175 (bench_complex *) p->in,
|
Chris@42
|
176 (bench_real *) p->out,
|
Chris@42
|
177 flags);
|
Chris@42
|
178 }
|
Chris@42
|
179 break;
|
Chris@42
|
180 case 2:
|
Chris@42
|
181 if (p->sign < 0) {
|
Chris@42
|
182 if (verbose > 2) printf("using plan_dft_r2c_2d\n");
|
Chris@42
|
183 return FFTW(plan_dft_r2c_2d)(sz->dims[0].n, sz->dims[1].n,
|
Chris@42
|
184 (bench_real *) p->in,
|
Chris@42
|
185 (bench_complex *) p->out,
|
Chris@42
|
186 flags);
|
Chris@42
|
187 }
|
Chris@42
|
188 else {
|
Chris@42
|
189 if (verbose > 2) printf("using plan_dft_c2r_2d\n");
|
Chris@42
|
190 return FFTW(plan_dft_c2r_2d)(sz->dims[0].n, sz->dims[1].n,
|
Chris@42
|
191 (bench_complex *) p->in,
|
Chris@42
|
192 (bench_real *) p->out,
|
Chris@42
|
193 flags);
|
Chris@42
|
194 }
|
Chris@42
|
195 break;
|
Chris@42
|
196 case 3:
|
Chris@42
|
197 if (p->sign < 0) {
|
Chris@42
|
198 if (verbose > 2) printf("using plan_dft_r2c_3d\n");
|
Chris@42
|
199 return FFTW(plan_dft_r2c_3d)(
|
Chris@42
|
200 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
Chris@42
|
201 (bench_real *) p->in, (bench_complex *) p->out,
|
Chris@42
|
202 flags);
|
Chris@42
|
203 }
|
Chris@42
|
204 else {
|
Chris@42
|
205 if (verbose > 2) printf("using plan_dft_c2r_3d\n");
|
Chris@42
|
206 return FFTW(plan_dft_c2r_3d)(
|
Chris@42
|
207 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
Chris@42
|
208 (bench_complex *) p->in, (bench_real *) p->out,
|
Chris@42
|
209 flags);
|
Chris@42
|
210 }
|
Chris@42
|
211 break;
|
Chris@42
|
212 default: {
|
Chris@42
|
213 int *n = mkn(sz);
|
Chris@42
|
214 if (p->sign < 0) {
|
Chris@42
|
215 if (verbose > 2) printf("using plan_dft_r2c\n");
|
Chris@42
|
216 pln = FFTW(plan_dft_r2c)(sz->rnk, n,
|
Chris@42
|
217 (bench_real *) p->in,
|
Chris@42
|
218 (bench_complex *) p->out,
|
Chris@42
|
219 flags);
|
Chris@42
|
220 }
|
Chris@42
|
221 else {
|
Chris@42
|
222 if (verbose > 2) printf("using plan_dft_c2r\n");
|
Chris@42
|
223 pln = FFTW(plan_dft_c2r)(sz->rnk, n,
|
Chris@42
|
224 (bench_complex *) p->in,
|
Chris@42
|
225 (bench_real *) p->out,
|
Chris@42
|
226 flags);
|
Chris@42
|
227 }
|
Chris@42
|
228 bench_free(n);
|
Chris@42
|
229 return pln;
|
Chris@42
|
230 }
|
Chris@42
|
231 }
|
Chris@42
|
232
|
Chris@42
|
233 api_many:
|
Chris@42
|
234 {
|
Chris@42
|
235 int *n, *inembed, *onembed;
|
Chris@42
|
236 BENCH_ASSERT(vecsz->rnk == 1);
|
Chris@42
|
237 n = mkn(sz);
|
Chris@42
|
238 mknembed_many(sz, &inembed, &onembed);
|
Chris@42
|
239 if (p->sign < 0) {
|
Chris@42
|
240 if (verbose > 2) printf("using plan_many_dft_r2c\n");
|
Chris@42
|
241 pln = FFTW(plan_many_dft_r2c)(
|
Chris@42
|
242 sz->rnk, n, vecsz->dims[0].n,
|
Chris@42
|
243 (bench_real *) p->in, inembed,
|
Chris@42
|
244 sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
Chris@42
|
245 (bench_complex *) p->out, onembed,
|
Chris@42
|
246 sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
Chris@42
|
247 flags);
|
Chris@42
|
248 }
|
Chris@42
|
249 else {
|
Chris@42
|
250 if (verbose > 2) printf("using plan_many_dft_c2r\n");
|
Chris@42
|
251 pln = FFTW(plan_many_dft_c2r)(
|
Chris@42
|
252 sz->rnk, n, vecsz->dims[0].n,
|
Chris@42
|
253 (bench_complex *) p->in, inembed,
|
Chris@42
|
254 sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
Chris@42
|
255 (bench_real *) p->out, onembed,
|
Chris@42
|
256 sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
Chris@42
|
257 flags);
|
Chris@42
|
258 }
|
Chris@42
|
259 bench_free(n); bench_free(inembed); bench_free(onembed);
|
Chris@42
|
260 return pln;
|
Chris@42
|
261 }
|
Chris@42
|
262
|
Chris@42
|
263 api_guru:
|
Chris@42
|
264 {
|
Chris@42
|
265 FFTW(iodim) *dims, *howmany_dims;
|
Chris@42
|
266
|
Chris@42
|
267 if (p->sign < 0) {
|
Chris@42
|
268 dims = bench_tensor_to_fftw_iodim(sz);
|
Chris@42
|
269 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
Chris@42
|
270 if (verbose > 2) printf("using plan_guru_dft_r2c\n");
|
Chris@42
|
271 pln = FFTW(plan_guru_dft_r2c)(sz->rnk, dims,
|
Chris@42
|
272 vecsz->rnk, howmany_dims,
|
Chris@42
|
273 (bench_real *) p->in,
|
Chris@42
|
274 (bench_complex *) p->out,
|
Chris@42
|
275 flags);
|
Chris@42
|
276 }
|
Chris@42
|
277 else {
|
Chris@42
|
278 dims = bench_tensor_to_fftw_iodim(sz);
|
Chris@42
|
279 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
Chris@42
|
280 if (verbose > 2) printf("using plan_guru_dft_c2r\n");
|
Chris@42
|
281 pln = FFTW(plan_guru_dft_c2r)(sz->rnk, dims,
|
Chris@42
|
282 vecsz->rnk, howmany_dims,
|
Chris@42
|
283 (bench_complex *) p->in,
|
Chris@42
|
284 (bench_real *) p->out,
|
Chris@42
|
285 flags);
|
Chris@42
|
286 }
|
Chris@42
|
287 bench_free(dims);
|
Chris@42
|
288 bench_free(howmany_dims);
|
Chris@42
|
289 return pln;
|
Chris@42
|
290 }
|
Chris@42
|
291 }
|
Chris@42
|
292
|
Chris@42
|
293 static FFTW(plan) mkplan_real(bench_problem *p, unsigned flags)
|
Chris@42
|
294 {
|
Chris@42
|
295 if (p->split)
|
Chris@42
|
296 return mkplan_real_split(p, flags);
|
Chris@42
|
297 else
|
Chris@42
|
298 return mkplan_real_interleaved(p, flags);
|
Chris@42
|
299 }
|
Chris@42
|
300
|
Chris@42
|
301 static FFTW(plan) mkplan_complex_split(bench_problem *p, unsigned flags)
|
Chris@42
|
302 {
|
Chris@42
|
303 FFTW(plan) pln;
|
Chris@42
|
304 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
Chris@42
|
305 FFTW(iodim) *dims, *howmany_dims;
|
Chris@42
|
306 bench_real *ri, *ii, *ro, *io;
|
Chris@42
|
307
|
Chris@42
|
308 extract_reim_split(p->sign, p->iphyssz, (bench_real *) p->in, &ri, &ii);
|
Chris@42
|
309 extract_reim_split(p->sign, p->ophyssz, (bench_real *) p->out, &ro, &io);
|
Chris@42
|
310
|
Chris@42
|
311 dims = bench_tensor_to_fftw_iodim(sz);
|
Chris@42
|
312 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
Chris@42
|
313 if (verbose > 2) printf("using plan_guru_split_dft\n");
|
Chris@42
|
314 pln = FFTW(plan_guru_split_dft)(sz->rnk, dims,
|
Chris@42
|
315 vecsz->rnk, howmany_dims,
|
Chris@42
|
316 ri, ii, ro, io, flags);
|
Chris@42
|
317 bench_free(dims);
|
Chris@42
|
318 bench_free(howmany_dims);
|
Chris@42
|
319 return pln;
|
Chris@42
|
320 }
|
Chris@42
|
321
|
Chris@42
|
322 static FFTW(plan) mkplan_complex_interleaved(bench_problem *p, unsigned flags)
|
Chris@42
|
323 {
|
Chris@42
|
324 FFTW(plan) pln;
|
Chris@42
|
325 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
Chris@42
|
326
|
Chris@42
|
327 if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz))
|
Chris@42
|
328 goto api_simple;
|
Chris@42
|
329
|
Chris@42
|
330 if (vecsz->rnk == 1 && expressible_as_api_many(sz))
|
Chris@42
|
331 goto api_many;
|
Chris@42
|
332
|
Chris@42
|
333 goto api_guru;
|
Chris@42
|
334
|
Chris@42
|
335 api_simple:
|
Chris@42
|
336 switch (sz->rnk) {
|
Chris@42
|
337 case 1:
|
Chris@42
|
338 if (verbose > 2) printf("using plan_dft_1d\n");
|
Chris@42
|
339 return FFTW(plan_dft_1d)(sz->dims[0].n,
|
Chris@42
|
340 (bench_complex *) p->in,
|
Chris@42
|
341 (bench_complex *) p->out,
|
Chris@42
|
342 p->sign, flags);
|
Chris@42
|
343 break;
|
Chris@42
|
344 case 2:
|
Chris@42
|
345 if (verbose > 2) printf("using plan_dft_2d\n");
|
Chris@42
|
346 return FFTW(plan_dft_2d)(sz->dims[0].n, sz->dims[1].n,
|
Chris@42
|
347 (bench_complex *) p->in,
|
Chris@42
|
348 (bench_complex *) p->out,
|
Chris@42
|
349 p->sign, flags);
|
Chris@42
|
350 break;
|
Chris@42
|
351 case 3:
|
Chris@42
|
352 if (verbose > 2) printf("using plan_dft_3d\n");
|
Chris@42
|
353 return FFTW(plan_dft_3d)(
|
Chris@42
|
354 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
Chris@42
|
355 (bench_complex *) p->in, (bench_complex *) p->out,
|
Chris@42
|
356 p->sign, flags);
|
Chris@42
|
357 break;
|
Chris@42
|
358 default: {
|
Chris@42
|
359 int *n = mkn(sz);
|
Chris@42
|
360 if (verbose > 2) printf("using plan_dft\n");
|
Chris@42
|
361 pln = FFTW(plan_dft)(sz->rnk, n,
|
Chris@42
|
362 (bench_complex *) p->in,
|
Chris@42
|
363 (bench_complex *) p->out, p->sign, flags);
|
Chris@42
|
364 bench_free(n);
|
Chris@42
|
365 return pln;
|
Chris@42
|
366 }
|
Chris@42
|
367 }
|
Chris@42
|
368
|
Chris@42
|
369 api_many:
|
Chris@42
|
370 {
|
Chris@42
|
371 int *n, *inembed, *onembed;
|
Chris@42
|
372 BENCH_ASSERT(vecsz->rnk == 1);
|
Chris@42
|
373 n = mkn(sz);
|
Chris@42
|
374 mknembed_many(sz, &inembed, &onembed);
|
Chris@42
|
375 if (verbose > 2) printf("using plan_many_dft\n");
|
Chris@42
|
376 pln = FFTW(plan_many_dft)(
|
Chris@42
|
377 sz->rnk, n, vecsz->dims[0].n,
|
Chris@42
|
378 (bench_complex *) p->in,
|
Chris@42
|
379 inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
Chris@42
|
380 (bench_complex *) p->out,
|
Chris@42
|
381 onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
Chris@42
|
382 p->sign, flags);
|
Chris@42
|
383 bench_free(n); bench_free(inembed); bench_free(onembed);
|
Chris@42
|
384 return pln;
|
Chris@42
|
385 }
|
Chris@42
|
386
|
Chris@42
|
387 api_guru:
|
Chris@42
|
388 {
|
Chris@42
|
389 FFTW(iodim) *dims, *howmany_dims;
|
Chris@42
|
390
|
Chris@42
|
391 dims = bench_tensor_to_fftw_iodim(sz);
|
Chris@42
|
392 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
Chris@42
|
393 if (verbose > 2) printf("using plan_guru_dft\n");
|
Chris@42
|
394 pln = FFTW(plan_guru_dft)(sz->rnk, dims,
|
Chris@42
|
395 vecsz->rnk, howmany_dims,
|
Chris@42
|
396 (bench_complex *) p->in,
|
Chris@42
|
397 (bench_complex *) p->out,
|
Chris@42
|
398 p->sign, flags);
|
Chris@42
|
399 bench_free(dims);
|
Chris@42
|
400 bench_free(howmany_dims);
|
Chris@42
|
401 return pln;
|
Chris@42
|
402 }
|
Chris@42
|
403 }
|
Chris@42
|
404
|
Chris@42
|
405 static FFTW(plan) mkplan_complex(bench_problem *p, unsigned flags)
|
Chris@42
|
406 {
|
Chris@42
|
407 if (p->split)
|
Chris@42
|
408 return mkplan_complex_split(p, flags);
|
Chris@42
|
409 else
|
Chris@42
|
410 return mkplan_complex_interleaved(p, flags);
|
Chris@42
|
411 }
|
Chris@42
|
412
|
Chris@42
|
413 static FFTW(plan) mkplan_r2r(bench_problem *p, unsigned flags)
|
Chris@42
|
414 {
|
Chris@42
|
415 FFTW(plan) pln;
|
Chris@42
|
416 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
Chris@42
|
417 FFTW(r2r_kind) *k;
|
Chris@42
|
418
|
Chris@42
|
419 k = (FFTW(r2r_kind) *) bench_malloc(sizeof(FFTW(r2r_kind)) * sz->rnk);
|
Chris@42
|
420 {
|
Chris@42
|
421 int i;
|
Chris@42
|
422 for (i = 0; i < sz->rnk; ++i)
|
Chris@42
|
423 switch (p->k[i]) {
|
Chris@42
|
424 case R2R_R2HC: k[i] = FFTW_R2HC; break;
|
Chris@42
|
425 case R2R_HC2R: k[i] = FFTW_HC2R; break;
|
Chris@42
|
426 case R2R_DHT: k[i] = FFTW_DHT; break;
|
Chris@42
|
427 case R2R_REDFT00: k[i] = FFTW_REDFT00; break;
|
Chris@42
|
428 case R2R_REDFT01: k[i] = FFTW_REDFT01; break;
|
Chris@42
|
429 case R2R_REDFT10: k[i] = FFTW_REDFT10; break;
|
Chris@42
|
430 case R2R_REDFT11: k[i] = FFTW_REDFT11; break;
|
Chris@42
|
431 case R2R_RODFT00: k[i] = FFTW_RODFT00; break;
|
Chris@42
|
432 case R2R_RODFT01: k[i] = FFTW_RODFT01; break;
|
Chris@42
|
433 case R2R_RODFT10: k[i] = FFTW_RODFT10; break;
|
Chris@42
|
434 case R2R_RODFT11: k[i] = FFTW_RODFT11; break;
|
Chris@42
|
435 default: BENCH_ASSERT(0);
|
Chris@42
|
436 }
|
Chris@42
|
437 }
|
Chris@42
|
438
|
Chris@42
|
439 if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz))
|
Chris@42
|
440 goto api_simple;
|
Chris@42
|
441
|
Chris@42
|
442 if (vecsz->rnk == 1 && expressible_as_api_many(sz))
|
Chris@42
|
443 goto api_many;
|
Chris@42
|
444
|
Chris@42
|
445 goto api_guru;
|
Chris@42
|
446
|
Chris@42
|
447 api_simple:
|
Chris@42
|
448 switch (sz->rnk) {
|
Chris@42
|
449 case 1:
|
Chris@42
|
450 if (verbose > 2) printf("using plan_r2r_1d\n");
|
Chris@42
|
451 pln = FFTW(plan_r2r_1d)(sz->dims[0].n,
|
Chris@42
|
452 (bench_real *) p->in,
|
Chris@42
|
453 (bench_real *) p->out,
|
Chris@42
|
454 k[0], flags);
|
Chris@42
|
455 goto done;
|
Chris@42
|
456 case 2:
|
Chris@42
|
457 if (verbose > 2) printf("using plan_r2r_2d\n");
|
Chris@42
|
458 pln = FFTW(plan_r2r_2d)(sz->dims[0].n, sz->dims[1].n,
|
Chris@42
|
459 (bench_real *) p->in,
|
Chris@42
|
460 (bench_real *) p->out,
|
Chris@42
|
461 k[0], k[1], flags);
|
Chris@42
|
462 goto done;
|
Chris@42
|
463 case 3:
|
Chris@42
|
464 if (verbose > 2) printf("using plan_r2r_3d\n");
|
Chris@42
|
465 pln = FFTW(plan_r2r_3d)(
|
Chris@42
|
466 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
Chris@42
|
467 (bench_real *) p->in, (bench_real *) p->out,
|
Chris@42
|
468 k[0], k[1], k[2], flags);
|
Chris@42
|
469 goto done;
|
Chris@42
|
470 default: {
|
Chris@42
|
471 int *n = mkn(sz);
|
Chris@42
|
472 if (verbose > 2) printf("using plan_r2r\n");
|
Chris@42
|
473 pln = FFTW(plan_r2r)(sz->rnk, n,
|
Chris@42
|
474 (bench_real *) p->in, (bench_real *) p->out,
|
Chris@42
|
475 k, flags);
|
Chris@42
|
476 bench_free(n);
|
Chris@42
|
477 goto done;
|
Chris@42
|
478 }
|
Chris@42
|
479 }
|
Chris@42
|
480
|
Chris@42
|
481 api_many:
|
Chris@42
|
482 {
|
Chris@42
|
483 int *n, *inembed, *onembed;
|
Chris@42
|
484 BENCH_ASSERT(vecsz->rnk == 1);
|
Chris@42
|
485 n = mkn(sz);
|
Chris@42
|
486 mknembed_many(sz, &inembed, &onembed);
|
Chris@42
|
487 if (verbose > 2) printf("using plan_many_r2r\n");
|
Chris@42
|
488 pln = FFTW(plan_many_r2r)(
|
Chris@42
|
489 sz->rnk, n, vecsz->dims[0].n,
|
Chris@42
|
490 (bench_real *) p->in,
|
Chris@42
|
491 inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
Chris@42
|
492 (bench_real *) p->out,
|
Chris@42
|
493 onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
Chris@42
|
494 k, flags);
|
Chris@42
|
495 bench_free(n); bench_free(inembed); bench_free(onembed);
|
Chris@42
|
496 goto done;
|
Chris@42
|
497 }
|
Chris@42
|
498
|
Chris@42
|
499 api_guru:
|
Chris@42
|
500 {
|
Chris@42
|
501 FFTW(iodim) *dims, *howmany_dims;
|
Chris@42
|
502
|
Chris@42
|
503 dims = bench_tensor_to_fftw_iodim(sz);
|
Chris@42
|
504 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
Chris@42
|
505 if (verbose > 2) printf("using plan_guru_r2r\n");
|
Chris@42
|
506 pln = FFTW(plan_guru_r2r)(sz->rnk, dims,
|
Chris@42
|
507 vecsz->rnk, howmany_dims,
|
Chris@42
|
508 (bench_real *) p->in,
|
Chris@42
|
509 (bench_real *) p->out, k, flags);
|
Chris@42
|
510 bench_free(dims);
|
Chris@42
|
511 bench_free(howmany_dims);
|
Chris@42
|
512 goto done;
|
Chris@42
|
513 }
|
Chris@42
|
514
|
Chris@42
|
515 done:
|
Chris@42
|
516 bench_free(k);
|
Chris@42
|
517 return pln;
|
Chris@42
|
518 }
|
Chris@42
|
519
|
Chris@42
|
520 FFTW(plan) mkplan(bench_problem *p, unsigned flags)
|
Chris@42
|
521 {
|
Chris@42
|
522 switch (p->kind) {
|
Chris@42
|
523 case PROBLEM_COMPLEX: return mkplan_complex(p, flags);
|
Chris@42
|
524 case PROBLEM_REAL: return mkplan_real(p, flags);
|
Chris@42
|
525 case PROBLEM_R2R: return mkplan_r2r(p, flags);
|
Chris@42
|
526 default: BENCH_ASSERT(0); return 0;
|
Chris@42
|
527 }
|
Chris@42
|
528 }
|
Chris@42
|
529
|
Chris@42
|
530 void main_init(int *argc, char ***argv)
|
Chris@42
|
531 {
|
Chris@42
|
532 UNUSED(argc);
|
Chris@42
|
533 UNUSED(argv);
|
Chris@42
|
534 }
|
Chris@42
|
535
|
Chris@42
|
536 void initial_cleanup(void)
|
Chris@42
|
537 {
|
Chris@42
|
538 }
|
Chris@42
|
539
|
Chris@42
|
540 void final_cleanup(void)
|
Chris@42
|
541 {
|
Chris@42
|
542 }
|
Chris@42
|
543
|
Chris@42
|
544 int import_wisdom(FILE *f)
|
Chris@42
|
545 {
|
Chris@42
|
546 return FFTW(import_wisdom_from_file)(f);
|
Chris@42
|
547 }
|
Chris@42
|
548
|
Chris@42
|
549 void export_wisdom(FILE *f)
|
Chris@42
|
550 {
|
Chris@42
|
551 FFTW(export_wisdom_to_file)(f);
|
Chris@42
|
552 }
|