cannam@95
|
1 /*
|
cannam@95
|
2 * Copyright (c) 2001 Matteo Frigo
|
cannam@95
|
3 * Copyright (c) 2001 Massachusetts Institute of Technology
|
cannam@95
|
4 *
|
cannam@95
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@95
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@95
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@95
|
8 * (at your option) any later version.
|
cannam@95
|
9 *
|
cannam@95
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@95
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@95
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@95
|
13 * GNU General Public License for more details.
|
cannam@95
|
14 *
|
cannam@95
|
15 * You should have received a copy of the GNU General Public License
|
cannam@95
|
16 * along with this program; if not, write to the Free Software
|
cannam@95
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@95
|
18 *
|
cannam@95
|
19 */
|
cannam@95
|
20
|
cannam@95
|
21
|
cannam@95
|
22 #include "config.h"
|
cannam@95
|
23 #include "bench.h"
|
cannam@95
|
24 #include <stdio.h>
|
cannam@95
|
25 #include <stdlib.h>
|
cannam@95
|
26 #include <string.h>
|
cannam@95
|
27 #include <ctype.h>
|
cannam@95
|
28
|
cannam@95
|
29 int always_pad_real = 0; /* by default, only pad in-place case */
|
cannam@95
|
30
|
cannam@95
|
31 typedef enum {
|
cannam@95
|
32 SAME, PADDED, HALFISH
|
cannam@95
|
33 } n_transform;
|
cannam@95
|
34
|
cannam@95
|
35 /* funny transformations for last dimension of PROBLEM_REAL */
|
cannam@95
|
36 static int transform_n(int n, n_transform nt)
|
cannam@95
|
37 {
|
cannam@95
|
38 switch (nt) {
|
cannam@95
|
39 case SAME: return n;
|
cannam@95
|
40 case PADDED: return 2*(n/2+1);
|
cannam@95
|
41 case HALFISH: return (n/2+1);
|
cannam@95
|
42 default: BENCH_ASSERT(0); return 0;
|
cannam@95
|
43 }
|
cannam@95
|
44 }
|
cannam@95
|
45
|
cannam@95
|
46 /* do what I mean */
|
cannam@95
|
47 static bench_tensor *dwim(bench_tensor *t, bench_iodim **last_iodim,
|
cannam@95
|
48 n_transform nti, n_transform nto,
|
cannam@95
|
49 bench_iodim *dt)
|
cannam@95
|
50 {
|
cannam@95
|
51 int i;
|
cannam@95
|
52 bench_iodim *d, *d1;
|
cannam@95
|
53
|
cannam@95
|
54 if (!FINITE_RNK(t->rnk) || t->rnk < 1)
|
cannam@95
|
55 return t;
|
cannam@95
|
56
|
cannam@95
|
57 i = t->rnk;
|
cannam@95
|
58 d1 = *last_iodim;
|
cannam@95
|
59
|
cannam@95
|
60 while (--i >= 0) {
|
cannam@95
|
61 d = t->dims + i;
|
cannam@95
|
62 if (!d->is)
|
cannam@95
|
63 d->is = d1->is * transform_n(d1->n, d1==dt ? nti : SAME);
|
cannam@95
|
64 if (!d->os)
|
cannam@95
|
65 d->os = d1->os * transform_n(d1->n, d1==dt ? nto : SAME);
|
cannam@95
|
66 d1 = d;
|
cannam@95
|
67 }
|
cannam@95
|
68
|
cannam@95
|
69 *last_iodim = d1;
|
cannam@95
|
70 return t;
|
cannam@95
|
71 }
|
cannam@95
|
72
|
cannam@95
|
73 static void transpose_tensor(bench_tensor *t)
|
cannam@95
|
74 {
|
cannam@95
|
75 if (!FINITE_RNK(t->rnk) || t->rnk < 2)
|
cannam@95
|
76 return;
|
cannam@95
|
77
|
cannam@95
|
78 t->dims[0].os = t->dims[1].os;
|
cannam@95
|
79 t->dims[1].os = t->dims[0].os * t->dims[0].n;
|
cannam@95
|
80 }
|
cannam@95
|
81
|
cannam@95
|
82 static const char *parseint(const char *s, int *n)
|
cannam@95
|
83 {
|
cannam@95
|
84 int sign = 1;
|
cannam@95
|
85
|
cannam@95
|
86 *n = 0;
|
cannam@95
|
87
|
cannam@95
|
88 if (*s == '-') {
|
cannam@95
|
89 sign = -1;
|
cannam@95
|
90 ++s;
|
cannam@95
|
91 } else if (*s == '+') {
|
cannam@95
|
92 sign = +1;
|
cannam@95
|
93 ++s;
|
cannam@95
|
94 }
|
cannam@95
|
95
|
cannam@95
|
96 BENCH_ASSERT(isdigit(*s));
|
cannam@95
|
97 while (isdigit(*s)) {
|
cannam@95
|
98 *n = *n * 10 + (*s - '0');
|
cannam@95
|
99 ++s;
|
cannam@95
|
100 }
|
cannam@95
|
101
|
cannam@95
|
102 *n *= sign;
|
cannam@95
|
103
|
cannam@95
|
104 if (*s == 'k' || *s == 'K') {
|
cannam@95
|
105 *n *= 1024;
|
cannam@95
|
106 ++s;
|
cannam@95
|
107 }
|
cannam@95
|
108
|
cannam@95
|
109 if (*s == 'm' || *s == 'M') {
|
cannam@95
|
110 *n *= 1024 * 1024;
|
cannam@95
|
111 ++s;
|
cannam@95
|
112 }
|
cannam@95
|
113
|
cannam@95
|
114 return s;
|
cannam@95
|
115 }
|
cannam@95
|
116
|
cannam@95
|
117 struct dimlist { bench_iodim car; r2r_kind_t k; struct dimlist *cdr; };
|
cannam@95
|
118
|
cannam@95
|
119 static const char *parsetensor(const char *s, bench_tensor **tp,
|
cannam@95
|
120 r2r_kind_t **k)
|
cannam@95
|
121 {
|
cannam@95
|
122 struct dimlist *l = 0, *m;
|
cannam@95
|
123 bench_tensor *t;
|
cannam@95
|
124 int rnk = 0;
|
cannam@95
|
125
|
cannam@95
|
126 L1:
|
cannam@95
|
127 m = (struct dimlist *)bench_malloc(sizeof(struct dimlist));
|
cannam@95
|
128 /* nconc onto l */
|
cannam@95
|
129 m->cdr = l; l = m;
|
cannam@95
|
130 ++rnk;
|
cannam@95
|
131
|
cannam@95
|
132 s = parseint(s, &m->car.n);
|
cannam@95
|
133
|
cannam@95
|
134 if (*s == ':') {
|
cannam@95
|
135 /* read input stride */
|
cannam@95
|
136 ++s;
|
cannam@95
|
137 s = parseint(s, &m->car.is);
|
cannam@95
|
138 if (*s == ':') {
|
cannam@95
|
139 /* read output stride */
|
cannam@95
|
140 ++s;
|
cannam@95
|
141 s = parseint(s, &m->car.os);
|
cannam@95
|
142 } else {
|
cannam@95
|
143 /* default */
|
cannam@95
|
144 m->car.os = m->car.is;
|
cannam@95
|
145 }
|
cannam@95
|
146 } else {
|
cannam@95
|
147 m->car.is = 0;
|
cannam@95
|
148 m->car.os = 0;
|
cannam@95
|
149 }
|
cannam@95
|
150
|
cannam@95
|
151 if (*s == 'f' || *s == 'F') {
|
cannam@95
|
152 m->k = R2R_R2HC;
|
cannam@95
|
153 ++s;
|
cannam@95
|
154 }
|
cannam@95
|
155 else if (*s == 'b' || *s == 'B') {
|
cannam@95
|
156 m->k = R2R_HC2R;
|
cannam@95
|
157 ++s;
|
cannam@95
|
158 }
|
cannam@95
|
159 else if (*s == 'h' || *s == 'H') {
|
cannam@95
|
160 m->k = R2R_DHT;
|
cannam@95
|
161 ++s;
|
cannam@95
|
162 }
|
cannam@95
|
163 else if (*s == 'e' || *s == 'E' || *s == 'o' || *s == 'O') {
|
cannam@95
|
164 char c = *(s++);
|
cannam@95
|
165 int ab;
|
cannam@95
|
166
|
cannam@95
|
167 s = parseint(s, &ab);
|
cannam@95
|
168
|
cannam@95
|
169 if (c == 'e' || c == 'E') {
|
cannam@95
|
170 if (ab == 0)
|
cannam@95
|
171 m->k = R2R_REDFT00;
|
cannam@95
|
172 else if (ab == 1)
|
cannam@95
|
173 m->k = R2R_REDFT01;
|
cannam@95
|
174 else if (ab == 10)
|
cannam@95
|
175 m->k = R2R_REDFT10;
|
cannam@95
|
176 else if (ab == 11)
|
cannam@95
|
177 m->k = R2R_REDFT11;
|
cannam@95
|
178 else
|
cannam@95
|
179 BENCH_ASSERT(0);
|
cannam@95
|
180 }
|
cannam@95
|
181 else {
|
cannam@95
|
182 if (ab == 0)
|
cannam@95
|
183 m->k = R2R_RODFT00;
|
cannam@95
|
184 else if (ab == 1)
|
cannam@95
|
185 m->k = R2R_RODFT01;
|
cannam@95
|
186 else if (ab == 10)
|
cannam@95
|
187 m->k = R2R_RODFT10;
|
cannam@95
|
188 else if (ab == 11)
|
cannam@95
|
189 m->k = R2R_RODFT11;
|
cannam@95
|
190 else
|
cannam@95
|
191 BENCH_ASSERT(0);
|
cannam@95
|
192 }
|
cannam@95
|
193 }
|
cannam@95
|
194 else
|
cannam@95
|
195 m->k = R2R_R2HC;
|
cannam@95
|
196
|
cannam@95
|
197 if (*s == 'x' || *s == 'X') {
|
cannam@95
|
198 ++s;
|
cannam@95
|
199 goto L1;
|
cannam@95
|
200 }
|
cannam@95
|
201
|
cannam@95
|
202 /* now we have a dimlist. Build bench_tensor, etc. */
|
cannam@95
|
203
|
cannam@95
|
204 if (k && rnk > 0) {
|
cannam@95
|
205 int i;
|
cannam@95
|
206 *k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * rnk);
|
cannam@95
|
207 for (m = l, i = rnk - 1; i >= 0; --i, m = m->cdr) {
|
cannam@95
|
208 BENCH_ASSERT(m);
|
cannam@95
|
209 (*k)[i] = m->k;
|
cannam@95
|
210 }
|
cannam@95
|
211 }
|
cannam@95
|
212
|
cannam@95
|
213 t = mktensor(rnk);
|
cannam@95
|
214 while (--rnk >= 0) {
|
cannam@95
|
215 bench_iodim *d = t->dims + rnk;
|
cannam@95
|
216 BENCH_ASSERT(l);
|
cannam@95
|
217 m = l; l = m->cdr;
|
cannam@95
|
218 d->n = m->car.n;
|
cannam@95
|
219 d->is = m->car.is;
|
cannam@95
|
220 d->os = m->car.os;
|
cannam@95
|
221 bench_free(m);
|
cannam@95
|
222 }
|
cannam@95
|
223
|
cannam@95
|
224 *tp = t;
|
cannam@95
|
225 return s;
|
cannam@95
|
226 }
|
cannam@95
|
227
|
cannam@95
|
228 /* parse a problem description, return a problem */
|
cannam@95
|
229 bench_problem *problem_parse(const char *s)
|
cannam@95
|
230 {
|
cannam@95
|
231 bench_problem *p;
|
cannam@95
|
232 bench_iodim last_iodim0 = {1,1,1}, *last_iodim = &last_iodim0;
|
cannam@95
|
233 bench_iodim *sz_last_iodim;
|
cannam@95
|
234 bench_tensor *sz;
|
cannam@95
|
235 n_transform nti = SAME, nto = SAME;
|
cannam@95
|
236 int transpose = 0;
|
cannam@95
|
237
|
cannam@95
|
238 p = (bench_problem *) bench_malloc(sizeof(bench_problem));
|
cannam@95
|
239 p->kind = PROBLEM_COMPLEX;
|
cannam@95
|
240 p->k = 0;
|
cannam@95
|
241 p->sign = -1;
|
cannam@95
|
242 p->in = p->out = 0;
|
cannam@95
|
243 p->inphys = p->outphys = 0;
|
cannam@95
|
244 p->iphyssz = p->ophyssz = 0;
|
cannam@95
|
245 p->in_place = 0;
|
cannam@95
|
246 p->destroy_input = 0;
|
cannam@95
|
247 p->split = 0;
|
cannam@95
|
248 p->userinfo = 0;
|
cannam@95
|
249 p->scrambled_in = p->scrambled_out = 0;
|
cannam@95
|
250 p->sz = p->vecsz = 0;
|
cannam@95
|
251 p->ini = p->outi = 0;
|
cannam@95
|
252 p->pstring = (char *) bench_malloc(sizeof(char) * (strlen(s) + 1));
|
cannam@95
|
253 strcpy(p->pstring, s);
|
cannam@95
|
254
|
cannam@95
|
255 L1:
|
cannam@95
|
256 switch (tolower(*s)) {
|
cannam@95
|
257 case 'i': p->in_place = 1; ++s; goto L1;
|
cannam@95
|
258 case 'o': p->in_place = 0; ++s; goto L1;
|
cannam@95
|
259 case 'd': p->destroy_input = 1; ++s; goto L1;
|
cannam@95
|
260 case '/': p->split = 1; ++s; goto L1;
|
cannam@95
|
261 case 'f':
|
cannam@95
|
262 case '-': p->sign = -1; ++s; goto L1;
|
cannam@95
|
263 case 'b':
|
cannam@95
|
264 case '+': p->sign = 1; ++s; goto L1;
|
cannam@95
|
265 case 'r': p->kind = PROBLEM_REAL; ++s; goto L1;
|
cannam@95
|
266 case 'c': p->kind = PROBLEM_COMPLEX; ++s; goto L1;
|
cannam@95
|
267 case 'k': p->kind = PROBLEM_R2R; ++s; goto L1;
|
cannam@95
|
268 case 't': transpose = 1; ++s; goto L1;
|
cannam@95
|
269
|
cannam@95
|
270 /* hack for MPI: */
|
cannam@95
|
271 case '[': p->scrambled_in = 1; ++s; goto L1;
|
cannam@95
|
272 case ']': p->scrambled_out = 1; ++s; goto L1;
|
cannam@95
|
273
|
cannam@95
|
274 default : ;
|
cannam@95
|
275 }
|
cannam@95
|
276
|
cannam@95
|
277 s = parsetensor(s, &sz, p->kind == PROBLEM_R2R ? &p->k : 0);
|
cannam@95
|
278
|
cannam@95
|
279 if (p->kind == PROBLEM_REAL) {
|
cannam@95
|
280 if (p->sign < 0) {
|
cannam@95
|
281 nti = p->in_place || always_pad_real ? PADDED : SAME;
|
cannam@95
|
282 nto = HALFISH;
|
cannam@95
|
283 }
|
cannam@95
|
284 else {
|
cannam@95
|
285 nti = HALFISH;
|
cannam@95
|
286 nto = p->in_place || always_pad_real ? PADDED : SAME;
|
cannam@95
|
287 }
|
cannam@95
|
288 }
|
cannam@95
|
289
|
cannam@95
|
290 sz_last_iodim = sz->dims + sz->rnk - 1;
|
cannam@95
|
291 if (*s == '*') { /* "external" vector */
|
cannam@95
|
292 ++s;
|
cannam@95
|
293 p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
cannam@95
|
294 s = parsetensor(s, &sz, 0);
|
cannam@95
|
295 p->vecsz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
cannam@95
|
296 } else if (*s == 'v' || *s == 'V') { /* "internal" vector */
|
cannam@95
|
297 bench_tensor *vecsz;
|
cannam@95
|
298 ++s;
|
cannam@95
|
299 s = parsetensor(s, &vecsz, 0);
|
cannam@95
|
300 p->vecsz = dwim(vecsz, &last_iodim, nti, nto, sz_last_iodim);
|
cannam@95
|
301 p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
cannam@95
|
302 } else {
|
cannam@95
|
303 p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
cannam@95
|
304 p->vecsz = mktensor(0);
|
cannam@95
|
305 }
|
cannam@95
|
306
|
cannam@95
|
307 if (transpose) {
|
cannam@95
|
308 transpose_tensor(p->sz);
|
cannam@95
|
309 transpose_tensor(p->vecsz);
|
cannam@95
|
310 }
|
cannam@95
|
311
|
cannam@95
|
312 if (!p->in_place)
|
cannam@95
|
313 p->out = ((bench_real *) p->in) + (1 << 20); /* whatever */
|
cannam@95
|
314
|
cannam@95
|
315 BENCH_ASSERT(p->sz && p->vecsz);
|
cannam@95
|
316 BENCH_ASSERT(!*s);
|
cannam@95
|
317 return p;
|
cannam@95
|
318 }
|
cannam@95
|
319
|
cannam@95
|
320 void problem_destroy(bench_problem *p)
|
cannam@95
|
321 {
|
cannam@95
|
322 BENCH_ASSERT(p);
|
cannam@95
|
323 problem_free(p);
|
cannam@95
|
324 bench_free0(p->k);
|
cannam@95
|
325 bench_free0(p->pstring);
|
cannam@95
|
326 bench_free(p);
|
cannam@95
|
327 }
|
cannam@95
|
328
|