Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/tests/bench.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /**************************************************************************/ | |
2 /* NOTE to users: this is the FFTW self-test and benchmark program. | |
3 It is probably NOT a good place to learn FFTW usage, since it has a | |
4 lot of added complexity in order to exercise and test the full API, | |
5 etcetera. We suggest reading the manual. | |
6 | |
7 (Some of the self-test code is split off into fftw-bench.c and | |
8 hook.c.) */ | |
9 /**************************************************************************/ | |
10 | |
11 #include <math.h> | |
12 #include <stdio.h> | |
13 #include <string.h> | |
14 #include "fftw-bench.h" | |
15 | |
16 static const char *mkversion(void) { return FFTW(version); } | |
17 static const char *mkcc(void) { return FFTW(cc); } | |
18 static const char *mkcodelet_optim(void) { return FFTW(codelet_optim); } | |
19 | |
20 BEGIN_BENCH_DOC | |
21 BENCH_DOC("name", "fftw3") | |
22 BENCH_DOCF("version", mkversion) | |
23 BENCH_DOCF("cc", mkcc) | |
24 BENCH_DOCF("codelet-optim", mkcodelet_optim) | |
25 END_BENCH_DOC | |
26 | |
27 static FFTW(iodim) *bench_tensor_to_fftw_iodim(bench_tensor *t) | |
28 { | |
29 FFTW(iodim) *d; | |
30 int i; | |
31 | |
32 BENCH_ASSERT(t->rnk >= 0); | |
33 if (t->rnk == 0) return 0; | |
34 | |
35 d = (FFTW(iodim) *)bench_malloc(sizeof(FFTW(iodim)) * t->rnk); | |
36 for (i = 0; i < t->rnk; ++i) { | |
37 d[i].n = t->dims[i].n; | |
38 d[i].is = t->dims[i].is; | |
39 d[i].os = t->dims[i].os; | |
40 } | |
41 | |
42 return d; | |
43 } | |
44 | |
45 static void extract_reim_split(int sign, int size, bench_real *p, | |
46 bench_real **r, bench_real **i) | |
47 { | |
48 if (sign == FFTW_FORWARD) { | |
49 *r = p + 0; | |
50 *i = p + size; | |
51 } else { | |
52 *r = p + size; | |
53 *i = p + 0; | |
54 } | |
55 } | |
56 | |
57 static int sizeof_problem(bench_problem *p) | |
58 { | |
59 return tensor_sz(p->sz) * tensor_sz(p->vecsz); | |
60 } | |
61 | |
62 /* ouch */ | |
63 static int expressible_as_api_many(bench_tensor *t) | |
64 { | |
65 int i; | |
66 | |
67 BENCH_ASSERT(FINITE_RNK(t->rnk)); | |
68 | |
69 i = t->rnk - 1; | |
70 while (--i >= 0) { | |
71 bench_iodim *d = t->dims + i; | |
72 if (d[0].is % d[1].is) return 0; | |
73 if (d[0].os % d[1].os) return 0; | |
74 } | |
75 return 1; | |
76 } | |
77 | |
78 static int *mkn(bench_tensor *t) | |
79 { | |
80 int *n = (int *) bench_malloc(sizeof(int *) * t->rnk); | |
81 int i; | |
82 for (i = 0; i < t->rnk; ++i) | |
83 n[i] = t->dims[i].n; | |
84 return n; | |
85 } | |
86 | |
87 static void mknembed_many(bench_tensor *t, int **inembedp, int **onembedp) | |
88 { | |
89 int i; | |
90 bench_iodim *d; | |
91 int *inembed = (int *) bench_malloc(sizeof(int *) * t->rnk); | |
92 int *onembed = (int *) bench_malloc(sizeof(int *) * t->rnk); | |
93 | |
94 BENCH_ASSERT(FINITE_RNK(t->rnk)); | |
95 *inembedp = inembed; *onembedp = onembed; | |
96 | |
97 i = t->rnk - 1; | |
98 while (--i >= 0) { | |
99 d = t->dims + i; | |
100 inembed[i+1] = d[0].is / d[1].is; | |
101 onembed[i+1] = d[0].os / d[1].os; | |
102 } | |
103 } | |
104 | |
105 /* try to use the most appropriate API function. Big mess. */ | |
106 | |
107 static int imax(int a, int b) { return (a > b ? a : b); } | |
108 | |
109 static int halfish_sizeof_problem(bench_problem *p) | |
110 { | |
111 int n2 = sizeof_problem(p); | |
112 if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0) | |
113 n2 = (n2 / imax(p->sz->dims[p->sz->rnk - 1].n, 1)) * | |
114 (p->sz->dims[p->sz->rnk - 1].n / 2 + 1); | |
115 return n2; | |
116 } | |
117 | |
118 static FFTW(plan) mkplan_real_split(bench_problem *p, unsigned flags) | |
119 { | |
120 FFTW(plan) pln; | |
121 bench_tensor *sz = p->sz, *vecsz = p->vecsz; | |
122 FFTW(iodim) *dims, *howmany_dims; | |
123 bench_real *ri, *ii, *ro, *io; | |
124 int n2 = halfish_sizeof_problem(p); | |
125 | |
126 extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->in, &ri, &ii); | |
127 extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->out, &ro, &io); | |
128 | |
129 dims = bench_tensor_to_fftw_iodim(sz); | |
130 howmany_dims = bench_tensor_to_fftw_iodim(vecsz); | |
131 if (p->sign < 0) { | |
132 if (verbose > 2) printf("using plan_guru_split_dft_r2c\n"); | |
133 pln = FFTW(plan_guru_split_dft_r2c)(sz->rnk, dims, | |
134 vecsz->rnk, howmany_dims, | |
135 ri, ro, io, flags); | |
136 } | |
137 else { | |
138 if (verbose > 2) printf("using plan_guru_split_dft_c2r\n"); | |
139 pln = FFTW(plan_guru_split_dft_c2r)(sz->rnk, dims, | |
140 vecsz->rnk, howmany_dims, | |
141 ri, ii, ro, flags); | |
142 } | |
143 bench_free(dims); | |
144 bench_free(howmany_dims); | |
145 return pln; | |
146 } | |
147 | |
148 static FFTW(plan) mkplan_real_interleaved(bench_problem *p, unsigned flags) | |
149 { | |
150 FFTW(plan) pln; | |
151 bench_tensor *sz = p->sz, *vecsz = p->vecsz; | |
152 | |
153 if (vecsz->rnk == 0 && tensor_unitstridep(sz) | |
154 && tensor_real_rowmajorp(sz, p->sign, p->in_place)) | |
155 goto api_simple; | |
156 | |
157 if (vecsz->rnk == 1 && expressible_as_api_many(sz)) | |
158 goto api_many; | |
159 | |
160 goto api_guru; | |
161 | |
162 api_simple: | |
163 switch (sz->rnk) { | |
164 case 1: | |
165 if (p->sign < 0) { | |
166 if (verbose > 2) printf("using plan_dft_r2c_1d\n"); | |
167 return FFTW(plan_dft_r2c_1d)(sz->dims[0].n, | |
168 (bench_real *) p->in, | |
169 (bench_complex *) p->out, | |
170 flags); | |
171 } | |
172 else { | |
173 if (verbose > 2) printf("using plan_dft_c2r_1d\n"); | |
174 return FFTW(plan_dft_c2r_1d)(sz->dims[0].n, | |
175 (bench_complex *) p->in, | |
176 (bench_real *) p->out, | |
177 flags); | |
178 } | |
179 break; | |
180 case 2: | |
181 if (p->sign < 0) { | |
182 if (verbose > 2) printf("using plan_dft_r2c_2d\n"); | |
183 return FFTW(plan_dft_r2c_2d)(sz->dims[0].n, sz->dims[1].n, | |
184 (bench_real *) p->in, | |
185 (bench_complex *) p->out, | |
186 flags); | |
187 } | |
188 else { | |
189 if (verbose > 2) printf("using plan_dft_c2r_2d\n"); | |
190 return FFTW(plan_dft_c2r_2d)(sz->dims[0].n, sz->dims[1].n, | |
191 (bench_complex *) p->in, | |
192 (bench_real *) p->out, | |
193 flags); | |
194 } | |
195 break; | |
196 case 3: | |
197 if (p->sign < 0) { | |
198 if (verbose > 2) printf("using plan_dft_r2c_3d\n"); | |
199 return FFTW(plan_dft_r2c_3d)( | |
200 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, | |
201 (bench_real *) p->in, (bench_complex *) p->out, | |
202 flags); | |
203 } | |
204 else { | |
205 if (verbose > 2) printf("using plan_dft_c2r_3d\n"); | |
206 return FFTW(plan_dft_c2r_3d)( | |
207 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, | |
208 (bench_complex *) p->in, (bench_real *) p->out, | |
209 flags); | |
210 } | |
211 break; | |
212 default: { | |
213 int *n = mkn(sz); | |
214 if (p->sign < 0) { | |
215 if (verbose > 2) printf("using plan_dft_r2c\n"); | |
216 pln = FFTW(plan_dft_r2c)(sz->rnk, n, | |
217 (bench_real *) p->in, | |
218 (bench_complex *) p->out, | |
219 flags); | |
220 } | |
221 else { | |
222 if (verbose > 2) printf("using plan_dft_c2r\n"); | |
223 pln = FFTW(plan_dft_c2r)(sz->rnk, n, | |
224 (bench_complex *) p->in, | |
225 (bench_real *) p->out, | |
226 flags); | |
227 } | |
228 bench_free(n); | |
229 return pln; | |
230 } | |
231 } | |
232 | |
233 api_many: | |
234 { | |
235 int *n, *inembed, *onembed; | |
236 BENCH_ASSERT(vecsz->rnk == 1); | |
237 n = mkn(sz); | |
238 mknembed_many(sz, &inembed, &onembed); | |
239 if (p->sign < 0) { | |
240 if (verbose > 2) printf("using plan_many_dft_r2c\n"); | |
241 pln = FFTW(plan_many_dft_r2c)( | |
242 sz->rnk, n, vecsz->dims[0].n, | |
243 (bench_real *) p->in, inembed, | |
244 sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, | |
245 (bench_complex *) p->out, onembed, | |
246 sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, | |
247 flags); | |
248 } | |
249 else { | |
250 if (verbose > 2) printf("using plan_many_dft_c2r\n"); | |
251 pln = FFTW(plan_many_dft_c2r)( | |
252 sz->rnk, n, vecsz->dims[0].n, | |
253 (bench_complex *) p->in, inembed, | |
254 sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, | |
255 (bench_real *) p->out, onembed, | |
256 sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, | |
257 flags); | |
258 } | |
259 bench_free(n); bench_free(inembed); bench_free(onembed); | |
260 return pln; | |
261 } | |
262 | |
263 api_guru: | |
264 { | |
265 FFTW(iodim) *dims, *howmany_dims; | |
266 | |
267 if (p->sign < 0) { | |
268 dims = bench_tensor_to_fftw_iodim(sz); | |
269 howmany_dims = bench_tensor_to_fftw_iodim(vecsz); | |
270 if (verbose > 2) printf("using plan_guru_dft_r2c\n"); | |
271 pln = FFTW(plan_guru_dft_r2c)(sz->rnk, dims, | |
272 vecsz->rnk, howmany_dims, | |
273 (bench_real *) p->in, | |
274 (bench_complex *) p->out, | |
275 flags); | |
276 } | |
277 else { | |
278 dims = bench_tensor_to_fftw_iodim(sz); | |
279 howmany_dims = bench_tensor_to_fftw_iodim(vecsz); | |
280 if (verbose > 2) printf("using plan_guru_dft_c2r\n"); | |
281 pln = FFTW(plan_guru_dft_c2r)(sz->rnk, dims, | |
282 vecsz->rnk, howmany_dims, | |
283 (bench_complex *) p->in, | |
284 (bench_real *) p->out, | |
285 flags); | |
286 } | |
287 bench_free(dims); | |
288 bench_free(howmany_dims); | |
289 return pln; | |
290 } | |
291 } | |
292 | |
293 static FFTW(plan) mkplan_real(bench_problem *p, unsigned flags) | |
294 { | |
295 if (p->split) | |
296 return mkplan_real_split(p, flags); | |
297 else | |
298 return mkplan_real_interleaved(p, flags); | |
299 } | |
300 | |
301 static FFTW(plan) mkplan_complex_split(bench_problem *p, unsigned flags) | |
302 { | |
303 FFTW(plan) pln; | |
304 bench_tensor *sz = p->sz, *vecsz = p->vecsz; | |
305 FFTW(iodim) *dims, *howmany_dims; | |
306 bench_real *ri, *ii, *ro, *io; | |
307 | |
308 extract_reim_split(p->sign, p->iphyssz, (bench_real *) p->in, &ri, &ii); | |
309 extract_reim_split(p->sign, p->ophyssz, (bench_real *) p->out, &ro, &io); | |
310 | |
311 dims = bench_tensor_to_fftw_iodim(sz); | |
312 howmany_dims = bench_tensor_to_fftw_iodim(vecsz); | |
313 if (verbose > 2) printf("using plan_guru_split_dft\n"); | |
314 pln = FFTW(plan_guru_split_dft)(sz->rnk, dims, | |
315 vecsz->rnk, howmany_dims, | |
316 ri, ii, ro, io, flags); | |
317 bench_free(dims); | |
318 bench_free(howmany_dims); | |
319 return pln; | |
320 } | |
321 | |
322 static FFTW(plan) mkplan_complex_interleaved(bench_problem *p, unsigned flags) | |
323 { | |
324 FFTW(plan) pln; | |
325 bench_tensor *sz = p->sz, *vecsz = p->vecsz; | |
326 | |
327 if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz)) | |
328 goto api_simple; | |
329 | |
330 if (vecsz->rnk == 1 && expressible_as_api_many(sz)) | |
331 goto api_many; | |
332 | |
333 goto api_guru; | |
334 | |
335 api_simple: | |
336 switch (sz->rnk) { | |
337 case 1: | |
338 if (verbose > 2) printf("using plan_dft_1d\n"); | |
339 return FFTW(plan_dft_1d)(sz->dims[0].n, | |
340 (bench_complex *) p->in, | |
341 (bench_complex *) p->out, | |
342 p->sign, flags); | |
343 break; | |
344 case 2: | |
345 if (verbose > 2) printf("using plan_dft_2d\n"); | |
346 return FFTW(plan_dft_2d)(sz->dims[0].n, sz->dims[1].n, | |
347 (bench_complex *) p->in, | |
348 (bench_complex *) p->out, | |
349 p->sign, flags); | |
350 break; | |
351 case 3: | |
352 if (verbose > 2) printf("using plan_dft_3d\n"); | |
353 return FFTW(plan_dft_3d)( | |
354 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, | |
355 (bench_complex *) p->in, (bench_complex *) p->out, | |
356 p->sign, flags); | |
357 break; | |
358 default: { | |
359 int *n = mkn(sz); | |
360 if (verbose > 2) printf("using plan_dft\n"); | |
361 pln = FFTW(plan_dft)(sz->rnk, n, | |
362 (bench_complex *) p->in, | |
363 (bench_complex *) p->out, p->sign, flags); | |
364 bench_free(n); | |
365 return pln; | |
366 } | |
367 } | |
368 | |
369 api_many: | |
370 { | |
371 int *n, *inembed, *onembed; | |
372 BENCH_ASSERT(vecsz->rnk == 1); | |
373 n = mkn(sz); | |
374 mknembed_many(sz, &inembed, &onembed); | |
375 if (verbose > 2) printf("using plan_many_dft\n"); | |
376 pln = FFTW(plan_many_dft)( | |
377 sz->rnk, n, vecsz->dims[0].n, | |
378 (bench_complex *) p->in, | |
379 inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, | |
380 (bench_complex *) p->out, | |
381 onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, | |
382 p->sign, flags); | |
383 bench_free(n); bench_free(inembed); bench_free(onembed); | |
384 return pln; | |
385 } | |
386 | |
387 api_guru: | |
388 { | |
389 FFTW(iodim) *dims, *howmany_dims; | |
390 | |
391 dims = bench_tensor_to_fftw_iodim(sz); | |
392 howmany_dims = bench_tensor_to_fftw_iodim(vecsz); | |
393 if (verbose > 2) printf("using plan_guru_dft\n"); | |
394 pln = FFTW(plan_guru_dft)(sz->rnk, dims, | |
395 vecsz->rnk, howmany_dims, | |
396 (bench_complex *) p->in, | |
397 (bench_complex *) p->out, | |
398 p->sign, flags); | |
399 bench_free(dims); | |
400 bench_free(howmany_dims); | |
401 return pln; | |
402 } | |
403 } | |
404 | |
405 static FFTW(plan) mkplan_complex(bench_problem *p, unsigned flags) | |
406 { | |
407 if (p->split) | |
408 return mkplan_complex_split(p, flags); | |
409 else | |
410 return mkplan_complex_interleaved(p, flags); | |
411 } | |
412 | |
413 static FFTW(plan) mkplan_r2r(bench_problem *p, unsigned flags) | |
414 { | |
415 FFTW(plan) pln; | |
416 bench_tensor *sz = p->sz, *vecsz = p->vecsz; | |
417 FFTW(r2r_kind) *k; | |
418 | |
419 k = (FFTW(r2r_kind) *) bench_malloc(sizeof(FFTW(r2r_kind)) * sz->rnk); | |
420 { | |
421 int i; | |
422 for (i = 0; i < sz->rnk; ++i) | |
423 switch (p->k[i]) { | |
424 case R2R_R2HC: k[i] = FFTW_R2HC; break; | |
425 case R2R_HC2R: k[i] = FFTW_HC2R; break; | |
426 case R2R_DHT: k[i] = FFTW_DHT; break; | |
427 case R2R_REDFT00: k[i] = FFTW_REDFT00; break; | |
428 case R2R_REDFT01: k[i] = FFTW_REDFT01; break; | |
429 case R2R_REDFT10: k[i] = FFTW_REDFT10; break; | |
430 case R2R_REDFT11: k[i] = FFTW_REDFT11; break; | |
431 case R2R_RODFT00: k[i] = FFTW_RODFT00; break; | |
432 case R2R_RODFT01: k[i] = FFTW_RODFT01; break; | |
433 case R2R_RODFT10: k[i] = FFTW_RODFT10; break; | |
434 case R2R_RODFT11: k[i] = FFTW_RODFT11; break; | |
435 default: BENCH_ASSERT(0); | |
436 } | |
437 } | |
438 | |
439 if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz)) | |
440 goto api_simple; | |
441 | |
442 if (vecsz->rnk == 1 && expressible_as_api_many(sz)) | |
443 goto api_many; | |
444 | |
445 goto api_guru; | |
446 | |
447 api_simple: | |
448 switch (sz->rnk) { | |
449 case 1: | |
450 if (verbose > 2) printf("using plan_r2r_1d\n"); | |
451 pln = FFTW(plan_r2r_1d)(sz->dims[0].n, | |
452 (bench_real *) p->in, | |
453 (bench_real *) p->out, | |
454 k[0], flags); | |
455 goto done; | |
456 case 2: | |
457 if (verbose > 2) printf("using plan_r2r_2d\n"); | |
458 pln = FFTW(plan_r2r_2d)(sz->dims[0].n, sz->dims[1].n, | |
459 (bench_real *) p->in, | |
460 (bench_real *) p->out, | |
461 k[0], k[1], flags); | |
462 goto done; | |
463 case 3: | |
464 if (verbose > 2) printf("using plan_r2r_3d\n"); | |
465 pln = FFTW(plan_r2r_3d)( | |
466 sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, | |
467 (bench_real *) p->in, (bench_real *) p->out, | |
468 k[0], k[1], k[2], flags); | |
469 goto done; | |
470 default: { | |
471 int *n = mkn(sz); | |
472 if (verbose > 2) printf("using plan_r2r\n"); | |
473 pln = FFTW(plan_r2r)(sz->rnk, n, | |
474 (bench_real *) p->in, (bench_real *) p->out, | |
475 k, flags); | |
476 bench_free(n); | |
477 goto done; | |
478 } | |
479 } | |
480 | |
481 api_many: | |
482 { | |
483 int *n, *inembed, *onembed; | |
484 BENCH_ASSERT(vecsz->rnk == 1); | |
485 n = mkn(sz); | |
486 mknembed_many(sz, &inembed, &onembed); | |
487 if (verbose > 2) printf("using plan_many_r2r\n"); | |
488 pln = FFTW(plan_many_r2r)( | |
489 sz->rnk, n, vecsz->dims[0].n, | |
490 (bench_real *) p->in, | |
491 inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, | |
492 (bench_real *) p->out, | |
493 onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, | |
494 k, flags); | |
495 bench_free(n); bench_free(inembed); bench_free(onembed); | |
496 goto done; | |
497 } | |
498 | |
499 api_guru: | |
500 { | |
501 FFTW(iodim) *dims, *howmany_dims; | |
502 | |
503 dims = bench_tensor_to_fftw_iodim(sz); | |
504 howmany_dims = bench_tensor_to_fftw_iodim(vecsz); | |
505 if (verbose > 2) printf("using plan_guru_r2r\n"); | |
506 pln = FFTW(plan_guru_r2r)(sz->rnk, dims, | |
507 vecsz->rnk, howmany_dims, | |
508 (bench_real *) p->in, | |
509 (bench_real *) p->out, k, flags); | |
510 bench_free(dims); | |
511 bench_free(howmany_dims); | |
512 goto done; | |
513 } | |
514 | |
515 done: | |
516 bench_free(k); | |
517 return pln; | |
518 } | |
519 | |
520 FFTW(plan) mkplan(bench_problem *p, unsigned flags) | |
521 { | |
522 switch (p->kind) { | |
523 case PROBLEM_COMPLEX: return mkplan_complex(p, flags); | |
524 case PROBLEM_REAL: return mkplan_real(p, flags); | |
525 case PROBLEM_R2R: return mkplan_r2r(p, flags); | |
526 default: BENCH_ASSERT(0); return 0; | |
527 } | |
528 } | |
529 | |
530 void main_init(int *argc, char ***argv) | |
531 { | |
532 UNUSED(argc); | |
533 UNUSED(argv); | |
534 } | |
535 | |
536 void initial_cleanup(void) | |
537 { | |
538 } | |
539 | |
540 void final_cleanup(void) | |
541 { | |
542 } | |
543 | |
544 int import_wisdom(FILE *f) | |
545 { | |
546 return FFTW(import_wisdom_from_file)(f); | |
547 } | |
548 | |
549 void export_wisdom(FILE *f) | |
550 { | |
551 FFTW(export_wisdom_to_file)(f); | |
552 } |