cannam@127
|
1 /* fftw hook to be used in the benchmark program.
|
cannam@127
|
2
|
cannam@127
|
3 We keep it in a separate file because
|
cannam@127
|
4
|
cannam@127
|
5 1) bench.c is supposed to test the API---we do not want to #include
|
cannam@127
|
6 "ifftw.h" and accidentally use internal symbols/macros.
|
cannam@127
|
7 2) this code is a royal mess. The messiness is due to
|
cannam@127
|
8 A) confusion between internal fftw tensors and bench_tensor's
|
cannam@127
|
9 (which we want to keep separate because the benchmark
|
cannam@127
|
10 program tests other routines too)
|
cannam@127
|
11 B) despite A), our desire to recycle the libbench verifier.
|
cannam@127
|
12 */
|
cannam@127
|
13
|
cannam@127
|
14 #include <stdio.h>
|
cannam@127
|
15 #include "bench-user.h"
|
cannam@127
|
16
|
cannam@127
|
17 #define CALLING_FFTW /* hack for Windows DLL nonsense */
|
cannam@127
|
18 #include "api.h"
|
cannam@127
|
19 #include "dft.h"
|
cannam@127
|
20 #include "rdft.h"
|
cannam@127
|
21
|
cannam@127
|
22 extern int paranoid; /* in bench.c */
|
cannam@127
|
23 extern X(plan) the_plan; /* in bench.c */
|
cannam@127
|
24
|
cannam@127
|
25 /*
|
cannam@127
|
26 transform an fftw tensor into a bench_tensor.
|
cannam@127
|
27 */
|
cannam@127
|
28 static bench_tensor *fftw_tensor_to_bench_tensor(tensor *t)
|
cannam@127
|
29 {
|
cannam@127
|
30 bench_tensor *bt = mktensor(t->rnk);
|
cannam@127
|
31
|
cannam@127
|
32 if (FINITE_RNK(t->rnk)) {
|
cannam@127
|
33 int i;
|
cannam@127
|
34 for (i = 0; i < t->rnk; ++i) {
|
cannam@127
|
35 /* FIXME: 64-bit unclean because of INT -> int conversion */
|
cannam@127
|
36 bt->dims[i].n = t->dims[i].n;
|
cannam@127
|
37 bt->dims[i].is = t->dims[i].is;
|
cannam@127
|
38 bt->dims[i].os = t->dims[i].os;
|
cannam@127
|
39 BENCH_ASSERT(bt->dims[i].n == t->dims[i].n);
|
cannam@127
|
40 BENCH_ASSERT(bt->dims[i].is == t->dims[i].is);
|
cannam@127
|
41 BENCH_ASSERT(bt->dims[i].os == t->dims[i].os);
|
cannam@127
|
42 }
|
cannam@127
|
43 }
|
cannam@127
|
44 return bt;
|
cannam@127
|
45 }
|
cannam@127
|
46
|
cannam@127
|
47 /*
|
cannam@127
|
48 transform an fftw problem into a bench_problem.
|
cannam@127
|
49 */
|
cannam@127
|
50 static bench_problem *fftw_problem_to_bench_problem(planner *plnr,
|
cannam@127
|
51 const problem *p_)
|
cannam@127
|
52 {
|
cannam@127
|
53 bench_problem *bp = 0;
|
cannam@127
|
54 switch (p_->adt->problem_kind) {
|
cannam@127
|
55 case PROBLEM_DFT:
|
cannam@127
|
56 {
|
cannam@127
|
57 const problem_dft *p = (const problem_dft *) p_;
|
cannam@127
|
58
|
cannam@127
|
59 if (!p->ri || !p->ii)
|
cannam@127
|
60 abort();
|
cannam@127
|
61
|
cannam@127
|
62 bp = (bench_problem *) bench_malloc(sizeof(bench_problem));
|
cannam@127
|
63
|
cannam@127
|
64 bp->kind = PROBLEM_COMPLEX;
|
cannam@127
|
65 bp->sign = FFT_SIGN;
|
cannam@127
|
66 bp->split = 1; /* tensor strides are in R's, not C's */
|
cannam@127
|
67 bp->in = UNTAINT(p->ri);
|
cannam@127
|
68 bp->out = UNTAINT(p->ro);
|
cannam@127
|
69 bp->ini = UNTAINT(p->ii);
|
cannam@127
|
70 bp->outi = UNTAINT(p->io);
|
cannam@127
|
71 bp->inphys = bp->outphys = 0;
|
cannam@127
|
72 bp->iphyssz = bp->ophyssz = 0;
|
cannam@127
|
73 bp->in_place = p->ri == p->ro;
|
cannam@127
|
74 bp->sz = fftw_tensor_to_bench_tensor(p->sz);
|
cannam@127
|
75 bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz);
|
cannam@127
|
76 bp->k = 0;
|
cannam@127
|
77 break;
|
cannam@127
|
78 }
|
cannam@127
|
79 case PROBLEM_RDFT:
|
cannam@127
|
80 {
|
cannam@127
|
81 const problem_rdft *p = (const problem_rdft *) p_;
|
cannam@127
|
82 int i;
|
cannam@127
|
83
|
cannam@127
|
84 if (!p->I || !p->O)
|
cannam@127
|
85 abort();
|
cannam@127
|
86
|
cannam@127
|
87 for (i = 0; i < p->sz->rnk; ++i)
|
cannam@127
|
88 switch (p->kind[i]) {
|
cannam@127
|
89 case R2HC01:
|
cannam@127
|
90 case R2HC10:
|
cannam@127
|
91 case R2HC11:
|
cannam@127
|
92 case HC2R01:
|
cannam@127
|
93 case HC2R10:
|
cannam@127
|
94 case HC2R11:
|
cannam@127
|
95 return bp;
|
cannam@127
|
96 default:
|
cannam@127
|
97 ;
|
cannam@127
|
98 }
|
cannam@127
|
99
|
cannam@127
|
100 bp = (bench_problem *) bench_malloc(sizeof(bench_problem));
|
cannam@127
|
101
|
cannam@127
|
102 bp->kind = PROBLEM_R2R;
|
cannam@127
|
103 bp->sign = FFT_SIGN;
|
cannam@127
|
104 bp->split = 0;
|
cannam@127
|
105 bp->in = UNTAINT(p->I);
|
cannam@127
|
106 bp->out = UNTAINT(p->O);
|
cannam@127
|
107 bp->ini = bp->outi = 0;
|
cannam@127
|
108 bp->inphys = bp->outphys = 0;
|
cannam@127
|
109 bp->iphyssz = bp->ophyssz = 0;
|
cannam@127
|
110 bp->in_place = p->I == p->O;
|
cannam@127
|
111 bp->sz = fftw_tensor_to_bench_tensor(p->sz);
|
cannam@127
|
112 bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz);
|
cannam@127
|
113 bp->k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * p->sz->rnk);
|
cannam@127
|
114 for (i = 0; i < p->sz->rnk; ++i)
|
cannam@127
|
115 switch (p->kind[i]) {
|
cannam@127
|
116 case R2HC: bp->k[i] = R2R_R2HC; break;
|
cannam@127
|
117 case HC2R: bp->k[i] = R2R_HC2R; break;
|
cannam@127
|
118 case DHT: bp->k[i] = R2R_DHT; break;
|
cannam@127
|
119 case REDFT00: bp->k[i] = R2R_REDFT00; break;
|
cannam@127
|
120 case REDFT01: bp->k[i] = R2R_REDFT01; break;
|
cannam@127
|
121 case REDFT10: bp->k[i] = R2R_REDFT10; break;
|
cannam@127
|
122 case REDFT11: bp->k[i] = R2R_REDFT11; break;
|
cannam@127
|
123 case RODFT00: bp->k[i] = R2R_RODFT00; break;
|
cannam@127
|
124 case RODFT01: bp->k[i] = R2R_RODFT01; break;
|
cannam@127
|
125 case RODFT10: bp->k[i] = R2R_RODFT10; break;
|
cannam@127
|
126 case RODFT11: bp->k[i] = R2R_RODFT11; break;
|
cannam@127
|
127 default: CK(0);
|
cannam@127
|
128 }
|
cannam@127
|
129 break;
|
cannam@127
|
130 }
|
cannam@127
|
131 case PROBLEM_RDFT2:
|
cannam@127
|
132 {
|
cannam@127
|
133 const problem_rdft2 *p = (const problem_rdft2 *) p_;
|
cannam@127
|
134 int rnk = p->sz->rnk;
|
cannam@127
|
135
|
cannam@127
|
136 if (!p->r0 || !p->r1 || !p->cr || !p->ci)
|
cannam@127
|
137 abort();
|
cannam@127
|
138
|
cannam@127
|
139 /* give up verifying rdft2 R2HCII */
|
cannam@127
|
140 if (p->kind != R2HC && p->kind != HC2R)
|
cannam@127
|
141 return bp;
|
cannam@127
|
142
|
cannam@127
|
143 if (rnk > 0) {
|
cannam@127
|
144 /* can't verify separate even/odd arrays for now */
|
cannam@127
|
145 if (2 * (p->r1 - p->r0) !=
|
cannam@127
|
146 ((p->kind == R2HC) ?
|
cannam@127
|
147 p->sz->dims[rnk-1].is : p->sz->dims[rnk-1].os))
|
cannam@127
|
148 return bp;
|
cannam@127
|
149 }
|
cannam@127
|
150
|
cannam@127
|
151 bp = (bench_problem *) bench_malloc(sizeof(bench_problem));
|
cannam@127
|
152
|
cannam@127
|
153 bp->kind = PROBLEM_REAL;
|
cannam@127
|
154 bp->sign = p->kind == R2HC ? FFT_SIGN : -FFT_SIGN;
|
cannam@127
|
155 bp->split = 1; /* tensor strides are in R's, not C's */
|
cannam@127
|
156 if (p->kind == R2HC) {
|
cannam@127
|
157 bp->sign = FFT_SIGN;
|
cannam@127
|
158 bp->in = UNTAINT(p->r0);
|
cannam@127
|
159 bp->out = UNTAINT(p->cr);
|
cannam@127
|
160 bp->ini = 0;
|
cannam@127
|
161 bp->outi = UNTAINT(p->ci);
|
cannam@127
|
162 }
|
cannam@127
|
163 else {
|
cannam@127
|
164 bp->sign = -FFT_SIGN;
|
cannam@127
|
165 bp->out = UNTAINT(p->r0);
|
cannam@127
|
166 bp->in = UNTAINT(p->cr);
|
cannam@127
|
167 bp->outi = 0;
|
cannam@127
|
168 bp->ini = UNTAINT(p->ci);
|
cannam@127
|
169 }
|
cannam@127
|
170 bp->inphys = bp->outphys = 0;
|
cannam@127
|
171 bp->iphyssz = bp->ophyssz = 0;
|
cannam@127
|
172 bp->in_place = p->r0 == p->cr;
|
cannam@127
|
173 bp->sz = fftw_tensor_to_bench_tensor(p->sz);
|
cannam@127
|
174 if (rnk > 0) {
|
cannam@127
|
175 if (p->kind == R2HC)
|
cannam@127
|
176 bp->sz->dims[rnk-1].is /= 2;
|
cannam@127
|
177 else
|
cannam@127
|
178 bp->sz->dims[rnk-1].os /= 2;
|
cannam@127
|
179 }
|
cannam@127
|
180 bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz);
|
cannam@127
|
181 bp->k = 0;
|
cannam@127
|
182 break;
|
cannam@127
|
183 }
|
cannam@127
|
184 default:
|
cannam@127
|
185 abort();
|
cannam@127
|
186 }
|
cannam@127
|
187
|
cannam@127
|
188 bp->userinfo = 0;
|
cannam@127
|
189 bp->pstring = 0;
|
cannam@127
|
190 bp->destroy_input = !NO_DESTROY_INPUTP(plnr);
|
cannam@127
|
191
|
cannam@127
|
192 return bp;
|
cannam@127
|
193 }
|
cannam@127
|
194
|
cannam@127
|
195 static void hook(planner *plnr, plan *pln, const problem *p_, int optimalp)
|
cannam@127
|
196 {
|
cannam@127
|
197 int rounds = 5;
|
cannam@127
|
198 double tol = SINGLE_PRECISION ? 1.0e-3 : 1.0e-10;
|
cannam@127
|
199 UNUSED(optimalp);
|
cannam@127
|
200
|
cannam@127
|
201 if (verbose > 5) {
|
cannam@127
|
202 printer *pr = X(mkprinter_file)(stdout);
|
cannam@127
|
203 pr->print(pr, "%P:%(%p%)\n", p_, pln);
|
cannam@127
|
204 X(printer_destroy)(pr);
|
cannam@127
|
205 printf("cost %g \n\n", pln->pcost);
|
cannam@127
|
206 }
|
cannam@127
|
207
|
cannam@127
|
208 if (paranoid) {
|
cannam@127
|
209 bench_problem *bp;
|
cannam@127
|
210
|
cannam@127
|
211 bp = fftw_problem_to_bench_problem(plnr, p_);
|
cannam@127
|
212 if (bp) {
|
cannam@127
|
213 X(plan) the_plan_save = the_plan;
|
cannam@127
|
214
|
cannam@127
|
215 the_plan = (apiplan *) MALLOC(sizeof(apiplan), PLANS);
|
cannam@127
|
216 the_plan->pln = pln;
|
cannam@127
|
217 the_plan->prb = (problem *) p_;
|
cannam@127
|
218
|
cannam@127
|
219 X(plan_awake)(pln, AWAKE_SQRTN_TABLE);
|
cannam@127
|
220 verify_problem(bp, rounds, tol);
|
cannam@127
|
221 X(plan_awake)(pln, SLEEPY);
|
cannam@127
|
222
|
cannam@127
|
223 X(ifree)(the_plan);
|
cannam@127
|
224 the_plan = the_plan_save;
|
cannam@127
|
225
|
cannam@127
|
226 problem_destroy(bp);
|
cannam@127
|
227 }
|
cannam@127
|
228
|
cannam@127
|
229 }
|
cannam@127
|
230 }
|
cannam@127
|
231
|
cannam@127
|
232 static void paranoid_checks(void)
|
cannam@127
|
233 {
|
cannam@127
|
234 /* FIXME: assumes char = 8 bits, which is false on at least one
|
cannam@127
|
235 DSP I know of. */
|
cannam@127
|
236 #if 0
|
cannam@127
|
237 /* if flags_t is not 64 bits i want to know it. */
|
cannam@127
|
238 CK(sizeof(flags_t) == 8);
|
cannam@127
|
239
|
cannam@127
|
240 CK(sizeof(md5uint) >= 4);
|
cannam@127
|
241 #endif
|
cannam@127
|
242
|
cannam@127
|
243 CK(sizeof(uintptr_t) >= sizeof(R *));
|
cannam@127
|
244
|
cannam@127
|
245 CK(sizeof(INT) >= sizeof(R *));
|
cannam@127
|
246 }
|
cannam@127
|
247
|
cannam@127
|
248 void install_hook(void)
|
cannam@127
|
249 {
|
cannam@127
|
250 planner *plnr = X(the_planner)();
|
cannam@127
|
251 plnr->hook = hook;
|
cannam@127
|
252 paranoid_checks();
|
cannam@127
|
253 }
|
cannam@127
|
254
|
cannam@127
|
255 void uninstall_hook(void)
|
cannam@127
|
256 {
|
cannam@127
|
257 planner *plnr = X(the_planner)();
|
cannam@127
|
258 plnr->hook = 0;
|
cannam@127
|
259 }
|