comparison src/fftw-3.3.8/rdft/buffered2.c @ 167:bd3cc4d1df30

Add FFTW 3.3.8 source, and a Linux build
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 19 Nov 2019 14:52:55 +0000
parents
children
comparison
equal deleted inserted replaced
166:cbd6d7e562c7 167:bd3cc4d1df30
1 /*
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21
22 /* buffering of rdft2. We always buffer the complex array */
23
24 #include "rdft/rdft.h"
25 #include "dft/dft.h"
26
27 typedef struct {
28 solver super;
29 size_t maxnbuf_ndx;
30 } S;
31
32 static const INT maxnbufs[] = { 8, 256 };
33
34 typedef struct {
35 plan_rdft2 super;
36
37 plan *cld, *cldcpy, *cldrest;
38 INT n, vl, nbuf, bufdist;
39 INT ivs_by_nbuf, ovs_by_nbuf;
40 INT ioffset, roffset;
41 } P;
42
43 /* transform a vector input with the help of bufs */
44 static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
45 {
46 const P *ego = (const P *) ego_;
47 plan_rdft2 *cld = (plan_rdft2 *) ego->cld;
48 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49 INT i, vl = ego->vl, nbuf = ego->nbuf;
50 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
51 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
52 R *bufr = bufs + ego->roffset;
53 R *bufi = bufs + ego->ioffset;
54 plan_rdft2 *cldrest;
55
56 for (i = nbuf; i <= vl; i += nbuf) {
57 /* transform to bufs: */
58 cld->apply((plan *) cld, r0, r1, bufr, bufi);
59 r0 += ivs_by_nbuf; r1 += ivs_by_nbuf;
60
61 /* copy back */
62 cldcpy->apply((plan *) cldcpy, bufr, bufi, cr, ci);
63 cr += ovs_by_nbuf; ci += ovs_by_nbuf;
64 }
65
66 X(ifree)(bufs);
67
68 /* Do the remaining transforms, if any: */
69 cldrest = (plan_rdft2 *) ego->cldrest;
70 cldrest->apply((plan *) cldrest, r0, r1, cr, ci);
71 }
72
73 /* for hc2r problems, copy the input into buffer, and then
74 transform buffer->output, which allows for destruction of the
75 buffer */
76 static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
77 {
78 const P *ego = (const P *) ego_;
79 plan_rdft2 *cld = (plan_rdft2 *) ego->cld;
80 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
81 INT i, vl = ego->vl, nbuf = ego->nbuf;
82 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
83 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
84 R *bufr = bufs + ego->roffset;
85 R *bufi = bufs + ego->ioffset;
86 plan_rdft2 *cldrest;
87
88 for (i = nbuf; i <= vl; i += nbuf) {
89 /* copy input into bufs: */
90 cldcpy->apply((plan *) cldcpy, cr, ci, bufr, bufi);
91 cr += ivs_by_nbuf; ci += ivs_by_nbuf;
92
93 /* transform to output */
94 cld->apply((plan *) cld, r0, r1, bufr, bufi);
95 r0 += ovs_by_nbuf; r1 += ovs_by_nbuf;
96 }
97
98 X(ifree)(bufs);
99
100 /* Do the remaining transforms, if any: */
101 cldrest = (plan_rdft2 *) ego->cldrest;
102 cldrest->apply((plan *) cldrest, r0, r1, cr, ci);
103 }
104
105
106 static void awake(plan *ego_, enum wakefulness wakefulness)
107 {
108 P *ego = (P *) ego_;
109
110 X(plan_awake)(ego->cld, wakefulness);
111 X(plan_awake)(ego->cldcpy, wakefulness);
112 X(plan_awake)(ego->cldrest, wakefulness);
113 }
114
115 static void destroy(plan *ego_)
116 {
117 P *ego = (P *) ego_;
118 X(plan_destroy_internal)(ego->cldrest);
119 X(plan_destroy_internal)(ego->cldcpy);
120 X(plan_destroy_internal)(ego->cld);
121 }
122
123 static void print(const plan *ego_, printer *p)
124 {
125 const P *ego = (const P *) ego_;
126 p->print(p, "(rdft2-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
127 ego->n, ego->nbuf,
128 ego->vl, ego->bufdist % ego->n,
129 ego->cld, ego->cldcpy, ego->cldrest);
130 }
131
132 static int applicable0(const S *ego, const problem *p_, const planner *plnr)
133 {
134 const problem_rdft2 *p = (const problem_rdft2 *) p_;
135 iodim *d = p->sz->dims;
136
137 if (1
138 && p->vecsz->rnk <= 1
139 && p->sz->rnk == 1
140
141 /* we assume even n throughout */
142 && (d[0].n % 2) == 0
143
144 /* and we only consider these two cases */
145 && (p->kind == R2HC || p->kind == HC2R)
146
147 ) {
148 INT vl, ivs, ovs;
149 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
150
151 if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
152 return 0;
153
154 /* if this solver is redundant, in the sense that a solver
155 of lower index generates the same plan, then prune this
156 solver */
157 if (X(nbuf_redundant)(d[0].n, vl,
158 ego->maxnbuf_ndx,
159 maxnbufs, NELEM(maxnbufs)))
160 return 0;
161
162 if (p->r0 != p->cr) {
163 if (p->kind == HC2R) {
164 /* Allow HC2R problems only if the input is to be
165 preserved. This solver sets NO_DESTROY_INPUT,
166 which prevents infinite loops */
167 return (NO_DESTROY_INPUTP(plnr));
168 } else {
169 /*
170 In principle, the buffered transforms might be useful
171 when working out of place. However, in order to
172 prevent infinite loops in the planner, we require
173 that the output stride of the buffered transforms be
174 greater than 2.
175 */
176 return (d[0].os > 2);
177 }
178 }
179
180 /*
181 * If the problem is in place, the input/output strides must
182 * be the same or the whole thing must fit in the buffer.
183 */
184 if (X(rdft2_inplace_strides(p, RNK_MINFTY)))
185 return 1;
186
187 if (/* fits into buffer: */
188 ((p->vecsz->rnk == 0)
189 ||
190 (X(nbuf)(d[0].n, p->vecsz->dims[0].n,
191 maxnbufs[ego->maxnbuf_ndx])
192 == p->vecsz->dims[0].n)))
193 return 1;
194 }
195
196 return 0;
197 }
198
199 static int applicable(const S *ego, const problem *p_, const planner *plnr)
200 {
201 const problem_rdft2 *p;
202
203 if (NO_BUFFERINGP(plnr)) return 0;
204
205 if (!applicable0(ego, p_, plnr)) return 0;
206
207 p = (const problem_rdft2 *) p_;
208 if (p->kind == HC2R) {
209 if (NO_UGLYP(plnr)) {
210 /* UGLY if in-place and too big, since the problem
211 could be solved via transpositions */
212 if (p->r0 == p->cr && X(toobig)(p->sz->dims[0].n))
213 return 0;
214 }
215 } else {
216 if (NO_UGLYP(plnr)) {
217 if (p->r0 != p->cr || X(toobig)(p->sz->dims[0].n))
218 return 0;
219 }
220 }
221 return 1;
222 }
223
224 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
225 {
226 P *pln;
227 const S *ego = (const S *)ego_;
228 plan *cld = (plan *) 0;
229 plan *cldcpy = (plan *) 0;
230 plan *cldrest = (plan *) 0;
231 const problem_rdft2 *p = (const problem_rdft2 *) p_;
232 R *bufs = (R *) 0;
233 INT nbuf = 0, bufdist, n, vl;
234 INT ivs, ovs, ioffset, roffset, id, od;
235
236 static const plan_adt padt = {
237 X(rdft2_solve), awake, print, destroy
238 };
239
240 if (!applicable(ego, p_, plnr))
241 goto nada;
242
243 n = X(tensor_sz)(p->sz);
244 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
245
246 nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
247 bufdist = X(bufdist)(n + 2, vl); /* complex-side rdft2 stores N+2
248 real numbers */
249 A(nbuf > 0);
250
251 /* attempt to keep real and imaginary part in the same order,
252 so as to allow optimizations in the the copy plan */
253 roffset = (p->cr - p->ci > 0) ? (INT)1 : (INT)0;
254 ioffset = 1 - roffset;
255
256 /* initial allocation for the purpose of planning */
257 bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
258
259 id = ivs * (nbuf * (vl / nbuf));
260 od = ovs * (nbuf * (vl / nbuf));
261
262 if (p->kind == R2HC) {
263 /* allow destruction of input if problem is in place */
264 cld = X(mkplan_f_d)(
265 plnr,
266 X(mkproblem_rdft2_d)(
267 X(mktensor_1d)(n, p->sz->dims[0].is, 2),
268 X(mktensor_1d)(nbuf, ivs, bufdist),
269 TAINT(p->r0, ivs * nbuf), TAINT(p->r1, ivs * nbuf),
270 bufs + roffset, bufs + ioffset, p->kind),
271 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
272 if (!cld) goto nada;
273
274 /* copying back from the buffer is a rank-0 DFT: */
275 cldcpy = X(mkplan_d)(
276 plnr,
277 X(mkproblem_dft_d)(
278 X(mktensor_0d)(),
279 X(mktensor_2d)(nbuf, bufdist, ovs,
280 n/2+1, 2, p->sz->dims[0].os),
281 bufs + roffset, bufs + ioffset,
282 TAINT(p->cr, ovs * nbuf), TAINT(p->ci, ovs * nbuf) ));
283 if (!cldcpy) goto nada;
284
285 X(ifree)(bufs); bufs = 0;
286
287 cldrest = X(mkplan_d)(plnr,
288 X(mkproblem_rdft2_d)(
289 X(tensor_copy)(p->sz),
290 X(mktensor_1d)(vl % nbuf, ivs, ovs),
291 p->r0 + id, p->r1 + id,
292 p->cr + od, p->ci + od,
293 p->kind));
294 if (!cldrest) goto nada;
295 pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
296 } else {
297 /* allow destruction of buffer */
298 cld = X(mkplan_f_d)(
299 plnr,
300 X(mkproblem_rdft2_d)(
301 X(mktensor_1d)(n, 2, p->sz->dims[0].os),
302 X(mktensor_1d)(nbuf, bufdist, ovs),
303 TAINT(p->r0, ovs * nbuf), TAINT(p->r1, ovs * nbuf),
304 bufs + roffset, bufs + ioffset, p->kind),
305 0, 0, NO_DESTROY_INPUT);
306 if (!cld) goto nada;
307
308 /* copying input into buffer is a rank-0 DFT: */
309 cldcpy = X(mkplan_d)(
310 plnr,
311 X(mkproblem_dft_d)(
312 X(mktensor_0d)(),
313 X(mktensor_2d)(nbuf, ivs, bufdist,
314 n/2+1, p->sz->dims[0].is, 2),
315 TAINT(p->cr, ivs * nbuf), TAINT(p->ci, ivs * nbuf),
316 bufs + roffset, bufs + ioffset));
317 if (!cldcpy) goto nada;
318
319 X(ifree)(bufs); bufs = 0;
320
321 cldrest = X(mkplan_d)(plnr,
322 X(mkproblem_rdft2_d)(
323 X(tensor_copy)(p->sz),
324 X(mktensor_1d)(vl % nbuf, ivs, ovs),
325 p->r0 + od, p->r1 + od,
326 p->cr + id, p->ci + id,
327 p->kind));
328 if (!cldrest) goto nada;
329
330 pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
331 }
332
333 pln->cld = cld;
334 pln->cldcpy = cldcpy;
335 pln->cldrest = cldrest;
336 pln->n = n;
337 pln->vl = vl;
338 pln->ivs_by_nbuf = ivs * nbuf;
339 pln->ovs_by_nbuf = ovs * nbuf;
340 pln->roffset = roffset;
341 pln->ioffset = ioffset;
342
343 pln->nbuf = nbuf;
344 pln->bufdist = bufdist;
345
346 {
347 opcnt t;
348 X(ops_add)(&cld->ops, &cldcpy->ops, &t);
349 X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
350 }
351
352 return &(pln->super.super);
353
354 nada:
355 X(ifree0)(bufs);
356 X(plan_destroy_internal)(cldrest);
357 X(plan_destroy_internal)(cldcpy);
358 X(plan_destroy_internal)(cld);
359 return (plan *) 0;
360 }
361
362 static solver *mksolver(size_t maxnbuf_ndx)
363 {
364 static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 };
365 S *slv = MKSOLVER(S, &sadt);
366 slv->maxnbuf_ndx = maxnbuf_ndx;
367 return &(slv->super);
368 }
369
370 void X(rdft2_buffered_register)(planner *p)
371 {
372 size_t i;
373 for (i = 0; i < NELEM(maxnbufs); ++i)
374 REGISTER_SOLVER(p, mksolver(i));
375 }