Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/rdft/direct-r2c.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 | |
22 /* direct RDFT solver, using r2c codelets */ | |
23 | |
24 #include "rdft.h" | |
25 | |
26 typedef struct { | |
27 solver super; | |
28 const kr2c_desc *desc; | |
29 kr2c k; | |
30 int bufferedp; | |
31 } S; | |
32 | |
33 typedef struct { | |
34 plan_rdft super; | |
35 | |
36 stride rs, csr, csi; | |
37 stride brs, bcsr, bcsi; | |
38 INT n, vl, rs0, ivs, ovs, ioffset, bioffset; | |
39 kr2c k; | |
40 const S *slv; | |
41 } P; | |
42 | |
43 /************************************************************* | |
44 Nonbuffered code | |
45 *************************************************************/ | |
46 static void apply_r2hc(const plan *ego_, R *I, R *O) | |
47 { | |
48 const P *ego = (const P *) ego_; | |
49 ASSERT_ALIGNED_DOUBLE; | |
50 ego->k(I, I + ego->rs0, O, O + ego->ioffset, | |
51 ego->rs, ego->csr, ego->csi, | |
52 ego->vl, ego->ivs, ego->ovs); | |
53 } | |
54 | |
55 static void apply_hc2r(const plan *ego_, R *I, R *O) | |
56 { | |
57 const P *ego = (const P *) ego_; | |
58 ASSERT_ALIGNED_DOUBLE; | |
59 ego->k(O, O + ego->rs0, I, I + ego->ioffset, | |
60 ego->rs, ego->csr, ego->csi, | |
61 ego->vl, ego->ivs, ego->ovs); | |
62 } | |
63 | |
64 /************************************************************* | |
65 Buffered code | |
66 *************************************************************/ | |
67 /* should not be 2^k to avoid associativity conflicts */ | |
68 static INT compute_batchsize(INT radix) | |
69 { | |
70 /* round up to multiple of 4 */ | |
71 radix += 3; | |
72 radix &= -4; | |
73 | |
74 return (radix + 2); | |
75 } | |
76 | |
77 static void dobatch_r2hc(const P *ego, R *I, R *O, R *buf, INT batchsz) | |
78 { | |
79 X(cpy2d_ci)(I, buf, | |
80 ego->n, ego->rs0, WS(ego->bcsr /* hack */, 1), | |
81 batchsz, ego->ivs, 1, 1); | |
82 | |
83 if (IABS(WS(ego->csr, 1)) < IABS(ego->ovs)) { | |
84 /* transform directly to output */ | |
85 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), | |
86 O, O + ego->ioffset, | |
87 ego->brs, ego->csr, ego->csi, | |
88 batchsz, 1, ego->ovs); | |
89 } else { | |
90 /* transform to buffer and copy back */ | |
91 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), | |
92 buf, buf + ego->bioffset, | |
93 ego->brs, ego->bcsr, ego->bcsi, | |
94 batchsz, 1, 1); | |
95 X(cpy2d_co)(buf, O, | |
96 ego->n, WS(ego->bcsr, 1), WS(ego->csr, 1), | |
97 batchsz, 1, ego->ovs, 1); | |
98 } | |
99 } | |
100 | |
101 static void dobatch_hc2r(const P *ego, R *I, R *O, R *buf, INT batchsz) | |
102 { | |
103 if (IABS(WS(ego->csr, 1)) < IABS(ego->ivs)) { | |
104 /* transform directly from input */ | |
105 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), | |
106 I, I + ego->ioffset, | |
107 ego->brs, ego->csr, ego->csi, | |
108 batchsz, ego->ivs, 1); | |
109 } else { | |
110 /* copy into buffer and transform in place */ | |
111 X(cpy2d_ci)(I, buf, | |
112 ego->n, WS(ego->csr, 1), WS(ego->bcsr, 1), | |
113 batchsz, ego->ivs, 1, 1); | |
114 ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), | |
115 buf, buf + ego->bioffset, | |
116 ego->brs, ego->bcsr, ego->bcsi, | |
117 batchsz, 1, 1); | |
118 } | |
119 X(cpy2d_co)(buf, O, | |
120 ego->n, WS(ego->bcsr /* hack */, 1), ego->rs0, | |
121 batchsz, 1, ego->ovs, 1); | |
122 } | |
123 | |
124 static void iterate(const P *ego, R *I, R *O, | |
125 void (*dobatch)(const P *ego, R *I, R *O, | |
126 R *buf, INT batchsz)) | |
127 { | |
128 R *buf; | |
129 INT vl = ego->vl; | |
130 INT n = ego->n; | |
131 INT i; | |
132 INT batchsz = compute_batchsize(n); | |
133 size_t bufsz = n * batchsz * sizeof(R); | |
134 | |
135 BUF_ALLOC(R *, buf, bufsz); | |
136 | |
137 for (i = 0; i < vl - batchsz; i += batchsz) { | |
138 dobatch(ego, I, O, buf, batchsz); | |
139 I += batchsz * ego->ivs; | |
140 O += batchsz * ego->ovs; | |
141 } | |
142 dobatch(ego, I, O, buf, vl - i); | |
143 | |
144 BUF_FREE(buf, bufsz); | |
145 } | |
146 | |
147 static void apply_buf_r2hc(const plan *ego_, R *I, R *O) | |
148 { | |
149 iterate((const P *) ego_, I, O, dobatch_r2hc); | |
150 } | |
151 | |
152 static void apply_buf_hc2r(const plan *ego_, R *I, R *O) | |
153 { | |
154 iterate((const P *) ego_, I, O, dobatch_hc2r); | |
155 } | |
156 | |
157 static void destroy(plan *ego_) | |
158 { | |
159 P *ego = (P *) ego_; | |
160 X(stride_destroy)(ego->rs); | |
161 X(stride_destroy)(ego->csr); | |
162 X(stride_destroy)(ego->csi); | |
163 X(stride_destroy)(ego->brs); | |
164 X(stride_destroy)(ego->bcsr); | |
165 X(stride_destroy)(ego->bcsi); | |
166 } | |
167 | |
168 static void print(const plan *ego_, printer *p) | |
169 { | |
170 const P *ego = (const P *) ego_; | |
171 const S *s = ego->slv; | |
172 | |
173 if (ego->slv->bufferedp) | |
174 p->print(p, "(rdft-%s-directbuf/%D-r2c-%D%v \"%s\")", | |
175 X(rdft_kind_str)(s->desc->genus->kind), | |
176 /* hack */ WS(ego->bcsr, 1), ego->n, | |
177 ego->vl, s->desc->nam); | |
178 | |
179 else | |
180 p->print(p, "(rdft-%s-direct-r2c-%D%v \"%s\")", | |
181 X(rdft_kind_str)(s->desc->genus->kind), ego->n, | |
182 ego->vl, s->desc->nam); | |
183 } | |
184 | |
185 static INT ioffset(rdft_kind kind, INT sz, INT s) | |
186 { | |
187 return(s * ((kind == R2HC || kind == HC2R) ? sz : (sz - 1))); | |
188 } | |
189 | |
190 static int applicable(const solver *ego_, const problem *p_) | |
191 { | |
192 const S *ego = (const S *) ego_; | |
193 const kr2c_desc *desc = ego->desc; | |
194 const problem_rdft *p = (const problem_rdft *) p_; | |
195 INT vl, ivs, ovs; | |
196 | |
197 return ( | |
198 1 | |
199 && p->sz->rnk == 1 | |
200 && p->vecsz->rnk <= 1 | |
201 && p->sz->dims[0].n == desc->n | |
202 && p->kind[0] == desc->genus->kind | |
203 | |
204 /* check strides etc */ | |
205 && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) | |
206 | |
207 && (0 | |
208 /* can operate out-of-place */ | |
209 || p->I != p->O | |
210 | |
211 /* computing one transform */ | |
212 || vl == 1 | |
213 | |
214 /* can operate in-place as long as strides are the same */ | |
215 || X(tensor_inplace_strides2)(p->sz, p->vecsz) | |
216 ) | |
217 ); | |
218 } | |
219 | |
220 static int applicable_buf(const solver *ego_, const problem *p_) | |
221 { | |
222 const S *ego = (const S *) ego_; | |
223 const kr2c_desc *desc = ego->desc; | |
224 const problem_rdft *p = (const problem_rdft *) p_; | |
225 INT vl, ivs, ovs, batchsz; | |
226 | |
227 return ( | |
228 1 | |
229 && p->sz->rnk == 1 | |
230 && p->vecsz->rnk <= 1 | |
231 && p->sz->dims[0].n == desc->n | |
232 && p->kind[0] == desc->genus->kind | |
233 | |
234 /* check strides etc */ | |
235 && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) | |
236 | |
237 && (batchsz = compute_batchsize(desc->n), 1) | |
238 | |
239 && (0 | |
240 /* can operate out-of-place */ | |
241 || p->I != p->O | |
242 | |
243 /* can operate in-place as long as strides are the same */ | |
244 || X(tensor_inplace_strides2)(p->sz, p->vecsz) | |
245 | |
246 /* can do it if the problem fits in the buffer, no matter | |
247 what the strides are */ | |
248 || vl <= batchsz | |
249 ) | |
250 ); | |
251 } | |
252 | |
253 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) | |
254 { | |
255 const S *ego = (const S *) ego_; | |
256 P *pln; | |
257 const problem_rdft *p; | |
258 iodim *d; | |
259 INT rs, cs, b, n; | |
260 | |
261 static const plan_adt padt = { | |
262 X(rdft_solve), X(null_awake), print, destroy | |
263 }; | |
264 | |
265 UNUSED(plnr); | |
266 | |
267 if (ego->bufferedp) { | |
268 if (!applicable_buf(ego_, p_)) | |
269 return (plan *)0; | |
270 } else { | |
271 if (!applicable(ego_, p_)) | |
272 return (plan *)0; | |
273 } | |
274 | |
275 p = (const problem_rdft *) p_; | |
276 | |
277 if (R2HC_KINDP(p->kind[0])) { | |
278 rs = p->sz->dims[0].is; cs = p->sz->dims[0].os; | |
279 pln = MKPLAN_RDFT(P, &padt, | |
280 ego->bufferedp ? apply_buf_r2hc : apply_r2hc); | |
281 } else { | |
282 rs = p->sz->dims[0].os; cs = p->sz->dims[0].is; | |
283 pln = MKPLAN_RDFT(P, &padt, | |
284 ego->bufferedp ? apply_buf_hc2r : apply_hc2r); | |
285 } | |
286 | |
287 d = p->sz->dims; | |
288 n = d[0].n; | |
289 | |
290 pln->k = ego->k; | |
291 pln->n = n; | |
292 | |
293 pln->rs0 = rs; | |
294 pln->rs = X(mkstride)(n, 2 * rs); | |
295 pln->csr = X(mkstride)(n, cs); | |
296 pln->csi = X(mkstride)(n, -cs); | |
297 pln->ioffset = ioffset(p->kind[0], n, cs); | |
298 | |
299 b = compute_batchsize(n); | |
300 pln->brs = X(mkstride)(n, 2 * b); | |
301 pln->bcsr = X(mkstride)(n, b); | |
302 pln->bcsi = X(mkstride)(n, -b); | |
303 pln->bioffset = ioffset(p->kind[0], n, b); | |
304 | |
305 X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); | |
306 | |
307 pln->slv = ego; | |
308 X(ops_zero)(&pln->super.super.ops); | |
309 | |
310 X(ops_madd2)(pln->vl / ego->desc->genus->vl, | |
311 &ego->desc->ops, | |
312 &pln->super.super.ops); | |
313 | |
314 if (ego->bufferedp) | |
315 pln->super.super.ops.other += 2 * n * pln->vl; | |
316 | |
317 pln->super.super.could_prune_now_p = !ego->bufferedp; | |
318 | |
319 return &(pln->super.super); | |
320 } | |
321 | |
322 /* constructor */ | |
323 static solver *mksolver(kr2c k, const kr2c_desc *desc, int bufferedp) | |
324 { | |
325 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; | |
326 S *slv = MKSOLVER(S, &sadt); | |
327 slv->k = k; | |
328 slv->desc = desc; | |
329 slv->bufferedp = bufferedp; | |
330 return &(slv->super); | |
331 } | |
332 | |
333 solver *X(mksolver_rdft_r2c_direct)(kr2c k, const kr2c_desc *desc) | |
334 { | |
335 return mksolver(k, desc, 0); | |
336 } | |
337 | |
338 solver *X(mksolver_rdft_r2c_directbuf)(kr2c k, const kr2c_desc *desc) | |
339 { | |
340 return mksolver(k, desc, 1); | |
341 } |