cannam@167
|
1 /**************************************************************************/
|
cannam@167
|
2 /* NOTE to users: this is the FFTW self-test and benchmark program.
|
cannam@167
|
3 It is probably NOT a good place to learn FFTW usage, since it has a
|
cannam@167
|
4 lot of added complexity in order to exercise and test the full API,
|
cannam@167
|
5 etcetera. We suggest reading the manual.
|
cannam@167
|
6
|
cannam@167
|
7 (Some of the self-test code is split off into fftw-bench.c and
|
cannam@167
|
8 hook.c.) */
|
cannam@167
|
9 /**************************************************************************/
|
cannam@167
|
10
|
cannam@167
|
11 #include <math.h>
|
cannam@167
|
12 #include <stdio.h>
|
cannam@167
|
13 #include <string.h>
|
cannam@167
|
14 #include "tests/fftw-bench.h"
|
cannam@167
|
15
|
cannam@167
|
16 static const char *mkversion(void) { return FFTW(version); }
|
cannam@167
|
17 static const char *mkcc(void) { return FFTW(cc); }
|
cannam@167
|
18 static const char *mkcodelet_optim(void) { return FFTW(codelet_optim); }
|
cannam@167
|
19
|
cannam@167
|
20 BEGIN_BENCH_DOC
|
cannam@167
|
21 BENCH_DOC("name", "fftw3")
|
cannam@167
|
22 BENCH_DOCF("version", mkversion)
|
cannam@167
|
23 BENCH_DOCF("cc", mkcc)
|
cannam@167
|
24 BENCH_DOCF("codelet-optim", mkcodelet_optim)
|
cannam@167
|
25 END_BENCH_DOC
|
cannam@167
|
26
|
cannam@167
|
27 static FFTW(iodim) *bench_tensor_to_fftw_iodim(bench_tensor *t)
|
cannam@167
|
28 {
|
cannam@167
|
29 FFTW(iodim) *d;
|
cannam@167
|
30 int i;
|
cannam@167
|
31
|
cannam@167
|
32 BENCH_ASSERT(t->rnk >= 0);
|
cannam@167
|
33 if (t->rnk == 0) return 0;
|
cannam@167
|
34
|
cannam@167
|
35 d = (FFTW(iodim) *)bench_malloc(sizeof(FFTW(iodim)) * t->rnk);
|
cannam@167
|
36 for (i = 0; i < t->rnk; ++i) {
|
cannam@167
|
37 d[i].n = t->dims[i].n;
|
cannam@167
|
38 d[i].is = t->dims[i].is;
|
cannam@167
|
39 d[i].os = t->dims[i].os;
|
cannam@167
|
40 }
|
cannam@167
|
41
|
cannam@167
|
42 return d;
|
cannam@167
|
43 }
|
cannam@167
|
44
|
cannam@167
|
45 static void extract_reim_split(int sign, int size, bench_real *p,
|
cannam@167
|
46 bench_real **r, bench_real **i)
|
cannam@167
|
47 {
|
cannam@167
|
48 if (sign == FFTW_FORWARD) {
|
cannam@167
|
49 *r = p + 0;
|
cannam@167
|
50 *i = p + size;
|
cannam@167
|
51 } else {
|
cannam@167
|
52 *r = p + size;
|
cannam@167
|
53 *i = p + 0;
|
cannam@167
|
54 }
|
cannam@167
|
55 }
|
cannam@167
|
56
|
cannam@167
|
57 static int sizeof_problem(bench_problem *p)
|
cannam@167
|
58 {
|
cannam@167
|
59 return tensor_sz(p->sz) * tensor_sz(p->vecsz);
|
cannam@167
|
60 }
|
cannam@167
|
61
|
cannam@167
|
62 /* ouch */
|
cannam@167
|
63 static int expressible_as_api_many(bench_tensor *t)
|
cannam@167
|
64 {
|
cannam@167
|
65 int i;
|
cannam@167
|
66
|
cannam@167
|
67 BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
cannam@167
|
68
|
cannam@167
|
69 i = t->rnk - 1;
|
cannam@167
|
70 while (--i >= 0) {
|
cannam@167
|
71 bench_iodim *d = t->dims + i;
|
cannam@167
|
72 if (d[0].is % d[1].is) return 0;
|
cannam@167
|
73 if (d[0].os % d[1].os) return 0;
|
cannam@167
|
74 }
|
cannam@167
|
75 return 1;
|
cannam@167
|
76 }
|
cannam@167
|
77
|
cannam@167
|
78 static int *mkn(bench_tensor *t)
|
cannam@167
|
79 {
|
cannam@167
|
80 int *n = (int *) bench_malloc(sizeof(int *) * t->rnk);
|
cannam@167
|
81 int i;
|
cannam@167
|
82 for (i = 0; i < t->rnk; ++i)
|
cannam@167
|
83 n[i] = t->dims[i].n;
|
cannam@167
|
84 return n;
|
cannam@167
|
85 }
|
cannam@167
|
86
|
cannam@167
|
87 static void mknembed_many(bench_tensor *t, int **inembedp, int **onembedp)
|
cannam@167
|
88 {
|
cannam@167
|
89 int i;
|
cannam@167
|
90 bench_iodim *d;
|
cannam@167
|
91 int *inembed = (int *) bench_malloc(sizeof(int *) * t->rnk);
|
cannam@167
|
92 int *onembed = (int *) bench_malloc(sizeof(int *) * t->rnk);
|
cannam@167
|
93
|
cannam@167
|
94 BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
cannam@167
|
95 *inembedp = inembed; *onembedp = onembed;
|
cannam@167
|
96
|
cannam@167
|
97 i = t->rnk - 1;
|
cannam@167
|
98 while (--i >= 0) {
|
cannam@167
|
99 d = t->dims + i;
|
cannam@167
|
100 inembed[i+1] = d[0].is / d[1].is;
|
cannam@167
|
101 onembed[i+1] = d[0].os / d[1].os;
|
cannam@167
|
102 }
|
cannam@167
|
103 }
|
cannam@167
|
104
|
cannam@167
|
105 /* try to use the most appropriate API function. Big mess. */
|
cannam@167
|
106
|
cannam@167
|
107 static int imax(int a, int b) { return (a > b ? a : b); }
|
cannam@167
|
108
|
cannam@167
|
109 static int halfish_sizeof_problem(bench_problem *p)
|
cannam@167
|
110 {
|
cannam@167
|
111 int n2 = sizeof_problem(p);
|
cannam@167
|
112 if (BENCH_FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0)
|
cannam@167
|
113 n2 = (n2 / imax(p->sz->dims[p->sz->rnk - 1].n, 1)) *
|
cannam@167
|
114 (p->sz->dims[p->sz->rnk - 1].n / 2 + 1);
|
cannam@167
|
115 return n2;
|
cannam@167
|
116 }
|
cannam@167
|
117
|
cannam@167
|
118 static FFTW(plan) mkplan_real_split(bench_problem *p, unsigned flags)
|
cannam@167
|
119 {
|
cannam@167
|
120 FFTW(plan) pln;
|
cannam@167
|
121 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
cannam@167
|
122 FFTW(iodim) *dims, *howmany_dims;
|
cannam@167
|
123 bench_real *ri, *ii, *ro, *io;
|
cannam@167
|
124 int n2 = halfish_sizeof_problem(p);
|
cannam@167
|
125
|
cannam@167
|
126 extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->in, &ri, &ii);
|
cannam@167
|
127 extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->out, &ro, &io);
|
cannam@167
|
128
|
cannam@167
|
129 dims = bench_tensor_to_fftw_iodim(sz);
|
cannam@167
|
130 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
cannam@167
|
131 if (p->sign < 0) {
|
cannam@167
|
132 if (verbose > 2) printf("using plan_guru_split_dft_r2c\n");
|
cannam@167
|
133 pln = FFTW(plan_guru_split_dft_r2c)(sz->rnk, dims,
|
cannam@167
|
134 vecsz->rnk, howmany_dims,
|
cannam@167
|
135 ri, ro, io, flags);
|
cannam@167
|
136 }
|
cannam@167
|
137 else {
|
cannam@167
|
138 if (verbose > 2) printf("using plan_guru_split_dft_c2r\n");
|
cannam@167
|
139 pln = FFTW(plan_guru_split_dft_c2r)(sz->rnk, dims,
|
cannam@167
|
140 vecsz->rnk, howmany_dims,
|
cannam@167
|
141 ri, ii, ro, flags);
|
cannam@167
|
142 }
|
cannam@167
|
143 bench_free(dims);
|
cannam@167
|
144 bench_free(howmany_dims);
|
cannam@167
|
145 return pln;
|
cannam@167
|
146 }
|
cannam@167
|
147
|
cannam@167
|
148 static FFTW(plan) mkplan_real_interleaved(bench_problem *p, unsigned flags)
|
cannam@167
|
149 {
|
cannam@167
|
150 FFTW(plan) pln;
|
cannam@167
|
151 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
cannam@167
|
152
|
cannam@167
|
153 if (vecsz->rnk == 0 && tensor_unitstridep(sz)
|
cannam@167
|
154 && tensor_real_rowmajorp(sz, p->sign, p->in_place))
|
cannam@167
|
155 goto api_simple;
|
cannam@167
|
156
|
cannam@167
|
157 if (vecsz->rnk == 1 && expressible_as_api_many(sz))
|
cannam@167
|
158 goto api_many;
|
cannam@167
|
159
|
cannam@167
|
160 goto api_guru;
|
cannam@167
|
161
|
cannam@167
|
162 api_simple:
|
cannam@167
|
163 switch (sz->rnk) {
|
cannam@167
|
164 case 1:
|
cannam@167
|
165 if (p->sign < 0) {
|
cannam@167
|
166 if (verbose > 2) printf("using plan_dft_r2c_1d\n");
|
cannam@167
|
167 return FFTW(plan_dft_r2c_1d)(sz->dims[0].n,
|
cannam@167
|
168 (bench_real *) p->in,
|
cannam@167
|
169 (bench_complex *) p->out,
|
cannam@167
|
170 flags);
|
cannam@167
|
171 }
|
cannam@167
|
172 else {
|
cannam@167
|
173 if (verbose > 2) printf("using plan_dft_c2r_1d\n");
|
cannam@167
|
174 return FFTW(plan_dft_c2r_1d)(sz->dims[0].n,
|
cannam@167
|
175 (bench_complex *) p->in,
|
cannam@167
|
176 (bench_real *) p->out,
|
cannam@167
|
177 flags);
|
cannam@167
|
178 }
|
cannam@167
|
179 break;
|
cannam@167
|
180 case 2:
|
cannam@167
|
181 if (p->sign < 0) {
|
cannam@167
|
182 if (verbose > 2) printf("using plan_dft_r2c_2d\n");
|
cannam@167
|
183 return FFTW(plan_dft_r2c_2d)(sz->dims[0].n, sz->dims[1].n,
|
cannam@167
|
184 (bench_real *) p->in,
|
cannam@167
|
185 (bench_complex *) p->out,
|
cannam@167
|
186 flags);
|
cannam@167
|
187 }
|
cannam@167
|
188 else {
|
cannam@167
|
189 if (verbose > 2) printf("using plan_dft_c2r_2d\n");
|
cannam@167
|
190 return FFTW(plan_dft_c2r_2d)(sz->dims[0].n, sz->dims[1].n,
|
cannam@167
|
191 (bench_complex *) p->in,
|
cannam@167
|
192 (bench_real *) p->out,
|
cannam@167
|
193 flags);
|
cannam@167
|
194 }
|
cannam@167
|
195 break;
|
cannam@167
|
196 case 3:
|
cannam@167
|
197 if (p->sign < 0) {
|
cannam@167
|
198 if (verbose > 2) printf("using plan_dft_r2c_3d\n");
|
cannam@167
|
199 return FFTW(plan_dft_r2c_3d)(
|
cannam@167
|
200 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
cannam@167
|
201 (bench_real *) p->in, (bench_complex *) p->out,
|
cannam@167
|
202 flags);
|
cannam@167
|
203 }
|
cannam@167
|
204 else {
|
cannam@167
|
205 if (verbose > 2) printf("using plan_dft_c2r_3d\n");
|
cannam@167
|
206 return FFTW(plan_dft_c2r_3d)(
|
cannam@167
|
207 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
cannam@167
|
208 (bench_complex *) p->in, (bench_real *) p->out,
|
cannam@167
|
209 flags);
|
cannam@167
|
210 }
|
cannam@167
|
211 break;
|
cannam@167
|
212 default: {
|
cannam@167
|
213 int *n = mkn(sz);
|
cannam@167
|
214 if (p->sign < 0) {
|
cannam@167
|
215 if (verbose > 2) printf("using plan_dft_r2c\n");
|
cannam@167
|
216 pln = FFTW(plan_dft_r2c)(sz->rnk, n,
|
cannam@167
|
217 (bench_real *) p->in,
|
cannam@167
|
218 (bench_complex *) p->out,
|
cannam@167
|
219 flags);
|
cannam@167
|
220 }
|
cannam@167
|
221 else {
|
cannam@167
|
222 if (verbose > 2) printf("using plan_dft_c2r\n");
|
cannam@167
|
223 pln = FFTW(plan_dft_c2r)(sz->rnk, n,
|
cannam@167
|
224 (bench_complex *) p->in,
|
cannam@167
|
225 (bench_real *) p->out,
|
cannam@167
|
226 flags);
|
cannam@167
|
227 }
|
cannam@167
|
228 bench_free(n);
|
cannam@167
|
229 return pln;
|
cannam@167
|
230 }
|
cannam@167
|
231 }
|
cannam@167
|
232
|
cannam@167
|
233 api_many:
|
cannam@167
|
234 {
|
cannam@167
|
235 int *n, *inembed, *onembed;
|
cannam@167
|
236 BENCH_ASSERT(vecsz->rnk == 1);
|
cannam@167
|
237 n = mkn(sz);
|
cannam@167
|
238 mknembed_many(sz, &inembed, &onembed);
|
cannam@167
|
239 if (p->sign < 0) {
|
cannam@167
|
240 if (verbose > 2) printf("using plan_many_dft_r2c\n");
|
cannam@167
|
241 pln = FFTW(plan_many_dft_r2c)(
|
cannam@167
|
242 sz->rnk, n, vecsz->dims[0].n,
|
cannam@167
|
243 (bench_real *) p->in, inembed,
|
cannam@167
|
244 sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
cannam@167
|
245 (bench_complex *) p->out, onembed,
|
cannam@167
|
246 sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
cannam@167
|
247 flags);
|
cannam@167
|
248 }
|
cannam@167
|
249 else {
|
cannam@167
|
250 if (verbose > 2) printf("using plan_many_dft_c2r\n");
|
cannam@167
|
251 pln = FFTW(plan_many_dft_c2r)(
|
cannam@167
|
252 sz->rnk, n, vecsz->dims[0].n,
|
cannam@167
|
253 (bench_complex *) p->in, inembed,
|
cannam@167
|
254 sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
cannam@167
|
255 (bench_real *) p->out, onembed,
|
cannam@167
|
256 sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
cannam@167
|
257 flags);
|
cannam@167
|
258 }
|
cannam@167
|
259 bench_free(n); bench_free(inembed); bench_free(onembed);
|
cannam@167
|
260 return pln;
|
cannam@167
|
261 }
|
cannam@167
|
262
|
cannam@167
|
263 api_guru:
|
cannam@167
|
264 {
|
cannam@167
|
265 FFTW(iodim) *dims, *howmany_dims;
|
cannam@167
|
266
|
cannam@167
|
267 if (p->sign < 0) {
|
cannam@167
|
268 dims = bench_tensor_to_fftw_iodim(sz);
|
cannam@167
|
269 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
cannam@167
|
270 if (verbose > 2) printf("using plan_guru_dft_r2c\n");
|
cannam@167
|
271 pln = FFTW(plan_guru_dft_r2c)(sz->rnk, dims,
|
cannam@167
|
272 vecsz->rnk, howmany_dims,
|
cannam@167
|
273 (bench_real *) p->in,
|
cannam@167
|
274 (bench_complex *) p->out,
|
cannam@167
|
275 flags);
|
cannam@167
|
276 }
|
cannam@167
|
277 else {
|
cannam@167
|
278 dims = bench_tensor_to_fftw_iodim(sz);
|
cannam@167
|
279 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
cannam@167
|
280 if (verbose > 2) printf("using plan_guru_dft_c2r\n");
|
cannam@167
|
281 pln = FFTW(plan_guru_dft_c2r)(sz->rnk, dims,
|
cannam@167
|
282 vecsz->rnk, howmany_dims,
|
cannam@167
|
283 (bench_complex *) p->in,
|
cannam@167
|
284 (bench_real *) p->out,
|
cannam@167
|
285 flags);
|
cannam@167
|
286 }
|
cannam@167
|
287 bench_free(dims);
|
cannam@167
|
288 bench_free(howmany_dims);
|
cannam@167
|
289 return pln;
|
cannam@167
|
290 }
|
cannam@167
|
291 }
|
cannam@167
|
292
|
cannam@167
|
293 static FFTW(plan) mkplan_real(bench_problem *p, unsigned flags)
|
cannam@167
|
294 {
|
cannam@167
|
295 if (p->split)
|
cannam@167
|
296 return mkplan_real_split(p, flags);
|
cannam@167
|
297 else
|
cannam@167
|
298 return mkplan_real_interleaved(p, flags);
|
cannam@167
|
299 }
|
cannam@167
|
300
|
cannam@167
|
301 static FFTW(plan) mkplan_complex_split(bench_problem *p, unsigned flags)
|
cannam@167
|
302 {
|
cannam@167
|
303 FFTW(plan) pln;
|
cannam@167
|
304 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
cannam@167
|
305 FFTW(iodim) *dims, *howmany_dims;
|
cannam@167
|
306 bench_real *ri, *ii, *ro, *io;
|
cannam@167
|
307
|
cannam@167
|
308 extract_reim_split(p->sign, p->iphyssz, (bench_real *) p->in, &ri, &ii);
|
cannam@167
|
309 extract_reim_split(p->sign, p->ophyssz, (bench_real *) p->out, &ro, &io);
|
cannam@167
|
310
|
cannam@167
|
311 dims = bench_tensor_to_fftw_iodim(sz);
|
cannam@167
|
312 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
cannam@167
|
313 if (verbose > 2) printf("using plan_guru_split_dft\n");
|
cannam@167
|
314 pln = FFTW(plan_guru_split_dft)(sz->rnk, dims,
|
cannam@167
|
315 vecsz->rnk, howmany_dims,
|
cannam@167
|
316 ri, ii, ro, io, flags);
|
cannam@167
|
317 bench_free(dims);
|
cannam@167
|
318 bench_free(howmany_dims);
|
cannam@167
|
319 return pln;
|
cannam@167
|
320 }
|
cannam@167
|
321
|
cannam@167
|
322 static FFTW(plan) mkplan_complex_interleaved(bench_problem *p, unsigned flags)
|
cannam@167
|
323 {
|
cannam@167
|
324 FFTW(plan) pln;
|
cannam@167
|
325 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
cannam@167
|
326
|
cannam@167
|
327 if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz))
|
cannam@167
|
328 goto api_simple;
|
cannam@167
|
329
|
cannam@167
|
330 if (vecsz->rnk == 1 && expressible_as_api_many(sz))
|
cannam@167
|
331 goto api_many;
|
cannam@167
|
332
|
cannam@167
|
333 goto api_guru;
|
cannam@167
|
334
|
cannam@167
|
335 api_simple:
|
cannam@167
|
336 switch (sz->rnk) {
|
cannam@167
|
337 case 1:
|
cannam@167
|
338 if (verbose > 2) printf("using plan_dft_1d\n");
|
cannam@167
|
339 return FFTW(plan_dft_1d)(sz->dims[0].n,
|
cannam@167
|
340 (bench_complex *) p->in,
|
cannam@167
|
341 (bench_complex *) p->out,
|
cannam@167
|
342 p->sign, flags);
|
cannam@167
|
343 break;
|
cannam@167
|
344 case 2:
|
cannam@167
|
345 if (verbose > 2) printf("using plan_dft_2d\n");
|
cannam@167
|
346 return FFTW(plan_dft_2d)(sz->dims[0].n, sz->dims[1].n,
|
cannam@167
|
347 (bench_complex *) p->in,
|
cannam@167
|
348 (bench_complex *) p->out,
|
cannam@167
|
349 p->sign, flags);
|
cannam@167
|
350 break;
|
cannam@167
|
351 case 3:
|
cannam@167
|
352 if (verbose > 2) printf("using plan_dft_3d\n");
|
cannam@167
|
353 return FFTW(plan_dft_3d)(
|
cannam@167
|
354 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
cannam@167
|
355 (bench_complex *) p->in, (bench_complex *) p->out,
|
cannam@167
|
356 p->sign, flags);
|
cannam@167
|
357 break;
|
cannam@167
|
358 default: {
|
cannam@167
|
359 int *n = mkn(sz);
|
cannam@167
|
360 if (verbose > 2) printf("using plan_dft\n");
|
cannam@167
|
361 pln = FFTW(plan_dft)(sz->rnk, n,
|
cannam@167
|
362 (bench_complex *) p->in,
|
cannam@167
|
363 (bench_complex *) p->out, p->sign, flags);
|
cannam@167
|
364 bench_free(n);
|
cannam@167
|
365 return pln;
|
cannam@167
|
366 }
|
cannam@167
|
367 }
|
cannam@167
|
368
|
cannam@167
|
369 api_many:
|
cannam@167
|
370 {
|
cannam@167
|
371 int *n, *inembed, *onembed;
|
cannam@167
|
372 BENCH_ASSERT(vecsz->rnk == 1);
|
cannam@167
|
373 n = mkn(sz);
|
cannam@167
|
374 mknembed_many(sz, &inembed, &onembed);
|
cannam@167
|
375 if (verbose > 2) printf("using plan_many_dft\n");
|
cannam@167
|
376 pln = FFTW(plan_many_dft)(
|
cannam@167
|
377 sz->rnk, n, vecsz->dims[0].n,
|
cannam@167
|
378 (bench_complex *) p->in,
|
cannam@167
|
379 inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
cannam@167
|
380 (bench_complex *) p->out,
|
cannam@167
|
381 onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
cannam@167
|
382 p->sign, flags);
|
cannam@167
|
383 bench_free(n); bench_free(inembed); bench_free(onembed);
|
cannam@167
|
384 return pln;
|
cannam@167
|
385 }
|
cannam@167
|
386
|
cannam@167
|
387 api_guru:
|
cannam@167
|
388 {
|
cannam@167
|
389 FFTW(iodim) *dims, *howmany_dims;
|
cannam@167
|
390
|
cannam@167
|
391 dims = bench_tensor_to_fftw_iodim(sz);
|
cannam@167
|
392 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
cannam@167
|
393 if (verbose > 2) printf("using plan_guru_dft\n");
|
cannam@167
|
394 pln = FFTW(plan_guru_dft)(sz->rnk, dims,
|
cannam@167
|
395 vecsz->rnk, howmany_dims,
|
cannam@167
|
396 (bench_complex *) p->in,
|
cannam@167
|
397 (bench_complex *) p->out,
|
cannam@167
|
398 p->sign, flags);
|
cannam@167
|
399 bench_free(dims);
|
cannam@167
|
400 bench_free(howmany_dims);
|
cannam@167
|
401 return pln;
|
cannam@167
|
402 }
|
cannam@167
|
403 }
|
cannam@167
|
404
|
cannam@167
|
405 static FFTW(plan) mkplan_complex(bench_problem *p, unsigned flags)
|
cannam@167
|
406 {
|
cannam@167
|
407 if (p->split)
|
cannam@167
|
408 return mkplan_complex_split(p, flags);
|
cannam@167
|
409 else
|
cannam@167
|
410 return mkplan_complex_interleaved(p, flags);
|
cannam@167
|
411 }
|
cannam@167
|
412
|
cannam@167
|
413 static FFTW(plan) mkplan_r2r(bench_problem *p, unsigned flags)
|
cannam@167
|
414 {
|
cannam@167
|
415 FFTW(plan) pln;
|
cannam@167
|
416 bench_tensor *sz = p->sz, *vecsz = p->vecsz;
|
cannam@167
|
417 FFTW(r2r_kind) *k;
|
cannam@167
|
418
|
cannam@167
|
419 k = (FFTW(r2r_kind) *) bench_malloc(sizeof(FFTW(r2r_kind)) * sz->rnk);
|
cannam@167
|
420 {
|
cannam@167
|
421 int i;
|
cannam@167
|
422 for (i = 0; i < sz->rnk; ++i)
|
cannam@167
|
423 switch (p->k[i]) {
|
cannam@167
|
424 case R2R_R2HC: k[i] = FFTW_R2HC; break;
|
cannam@167
|
425 case R2R_HC2R: k[i] = FFTW_HC2R; break;
|
cannam@167
|
426 case R2R_DHT: k[i] = FFTW_DHT; break;
|
cannam@167
|
427 case R2R_REDFT00: k[i] = FFTW_REDFT00; break;
|
cannam@167
|
428 case R2R_REDFT01: k[i] = FFTW_REDFT01; break;
|
cannam@167
|
429 case R2R_REDFT10: k[i] = FFTW_REDFT10; break;
|
cannam@167
|
430 case R2R_REDFT11: k[i] = FFTW_REDFT11; break;
|
cannam@167
|
431 case R2R_RODFT00: k[i] = FFTW_RODFT00; break;
|
cannam@167
|
432 case R2R_RODFT01: k[i] = FFTW_RODFT01; break;
|
cannam@167
|
433 case R2R_RODFT10: k[i] = FFTW_RODFT10; break;
|
cannam@167
|
434 case R2R_RODFT11: k[i] = FFTW_RODFT11; break;
|
cannam@167
|
435 default: BENCH_ASSERT(0);
|
cannam@167
|
436 }
|
cannam@167
|
437 }
|
cannam@167
|
438
|
cannam@167
|
439 if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz))
|
cannam@167
|
440 goto api_simple;
|
cannam@167
|
441
|
cannam@167
|
442 if (vecsz->rnk == 1 && expressible_as_api_many(sz))
|
cannam@167
|
443 goto api_many;
|
cannam@167
|
444
|
cannam@167
|
445 goto api_guru;
|
cannam@167
|
446
|
cannam@167
|
447 api_simple:
|
cannam@167
|
448 switch (sz->rnk) {
|
cannam@167
|
449 case 1:
|
cannam@167
|
450 if (verbose > 2) printf("using plan_r2r_1d\n");
|
cannam@167
|
451 pln = FFTW(plan_r2r_1d)(sz->dims[0].n,
|
cannam@167
|
452 (bench_real *) p->in,
|
cannam@167
|
453 (bench_real *) p->out,
|
cannam@167
|
454 k[0], flags);
|
cannam@167
|
455 goto done;
|
cannam@167
|
456 case 2:
|
cannam@167
|
457 if (verbose > 2) printf("using plan_r2r_2d\n");
|
cannam@167
|
458 pln = FFTW(plan_r2r_2d)(sz->dims[0].n, sz->dims[1].n,
|
cannam@167
|
459 (bench_real *) p->in,
|
cannam@167
|
460 (bench_real *) p->out,
|
cannam@167
|
461 k[0], k[1], flags);
|
cannam@167
|
462 goto done;
|
cannam@167
|
463 case 3:
|
cannam@167
|
464 if (verbose > 2) printf("using plan_r2r_3d\n");
|
cannam@167
|
465 pln = FFTW(plan_r2r_3d)(
|
cannam@167
|
466 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n,
|
cannam@167
|
467 (bench_real *) p->in, (bench_real *) p->out,
|
cannam@167
|
468 k[0], k[1], k[2], flags);
|
cannam@167
|
469 goto done;
|
cannam@167
|
470 default: {
|
cannam@167
|
471 int *n = mkn(sz);
|
cannam@167
|
472 if (verbose > 2) printf("using plan_r2r\n");
|
cannam@167
|
473 pln = FFTW(plan_r2r)(sz->rnk, n,
|
cannam@167
|
474 (bench_real *) p->in, (bench_real *) p->out,
|
cannam@167
|
475 k, flags);
|
cannam@167
|
476 bench_free(n);
|
cannam@167
|
477 goto done;
|
cannam@167
|
478 }
|
cannam@167
|
479 }
|
cannam@167
|
480
|
cannam@167
|
481 api_many:
|
cannam@167
|
482 {
|
cannam@167
|
483 int *n, *inembed, *onembed;
|
cannam@167
|
484 BENCH_ASSERT(vecsz->rnk == 1);
|
cannam@167
|
485 n = mkn(sz);
|
cannam@167
|
486 mknembed_many(sz, &inembed, &onembed);
|
cannam@167
|
487 if (verbose > 2) printf("using plan_many_r2r\n");
|
cannam@167
|
488 pln = FFTW(plan_many_r2r)(
|
cannam@167
|
489 sz->rnk, n, vecsz->dims[0].n,
|
cannam@167
|
490 (bench_real *) p->in,
|
cannam@167
|
491 inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is,
|
cannam@167
|
492 (bench_real *) p->out,
|
cannam@167
|
493 onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os,
|
cannam@167
|
494 k, flags);
|
cannam@167
|
495 bench_free(n); bench_free(inembed); bench_free(onembed);
|
cannam@167
|
496 goto done;
|
cannam@167
|
497 }
|
cannam@167
|
498
|
cannam@167
|
499 api_guru:
|
cannam@167
|
500 {
|
cannam@167
|
501 FFTW(iodim) *dims, *howmany_dims;
|
cannam@167
|
502
|
cannam@167
|
503 dims = bench_tensor_to_fftw_iodim(sz);
|
cannam@167
|
504 howmany_dims = bench_tensor_to_fftw_iodim(vecsz);
|
cannam@167
|
505 if (verbose > 2) printf("using plan_guru_r2r\n");
|
cannam@167
|
506 pln = FFTW(plan_guru_r2r)(sz->rnk, dims,
|
cannam@167
|
507 vecsz->rnk, howmany_dims,
|
cannam@167
|
508 (bench_real *) p->in,
|
cannam@167
|
509 (bench_real *) p->out, k, flags);
|
cannam@167
|
510 bench_free(dims);
|
cannam@167
|
511 bench_free(howmany_dims);
|
cannam@167
|
512 goto done;
|
cannam@167
|
513 }
|
cannam@167
|
514
|
cannam@167
|
515 done:
|
cannam@167
|
516 bench_free(k);
|
cannam@167
|
517 return pln;
|
cannam@167
|
518 }
|
cannam@167
|
519
|
cannam@167
|
520 FFTW(plan) mkplan(bench_problem *p, unsigned flags)
|
cannam@167
|
521 {
|
cannam@167
|
522 switch (p->kind) {
|
cannam@167
|
523 case PROBLEM_COMPLEX: return mkplan_complex(p, flags);
|
cannam@167
|
524 case PROBLEM_REAL: return mkplan_real(p, flags);
|
cannam@167
|
525 case PROBLEM_R2R: return mkplan_r2r(p, flags);
|
cannam@167
|
526 default: BENCH_ASSERT(0); return 0;
|
cannam@167
|
527 }
|
cannam@167
|
528 }
|
cannam@167
|
529
|
cannam@167
|
530 void main_init(int *argc, char ***argv)
|
cannam@167
|
531 {
|
cannam@167
|
532 UNUSED(argc);
|
cannam@167
|
533 UNUSED(argv);
|
cannam@167
|
534 }
|
cannam@167
|
535
|
cannam@167
|
536 void initial_cleanup(void)
|
cannam@167
|
537 {
|
cannam@167
|
538 }
|
cannam@167
|
539
|
cannam@167
|
540 void final_cleanup(void)
|
cannam@167
|
541 {
|
cannam@167
|
542 }
|
cannam@167
|
543
|
cannam@167
|
544 int import_wisdom(FILE *f)
|
cannam@167
|
545 {
|
cannam@167
|
546 return FFTW(import_wisdom_from_file)(f);
|
cannam@167
|
547 }
|
cannam@167
|
548
|
cannam@167
|
549 void export_wisdom(FILE *f)
|
cannam@167
|
550 {
|
cannam@167
|
551 FFTW(export_wisdom_to_file)(f);
|
cannam@167
|
552 }
|