comparison src/fftw-3.3.8/rdft/buffered.c @ 167:bd3cc4d1df30

Add FFTW 3.3.8 source, and a Linux build
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 19 Nov 2019 14:52:55 +0000
parents
children
comparison
equal deleted inserted replaced
166:cbd6d7e562c7 167:bd3cc4d1df30
1 /*
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21
22 #include "rdft/rdft.h"
23
24 typedef struct {
25 solver super;
26 size_t maxnbuf_ndx;
27 } S;
28
29 static const INT maxnbufs[] = { 8, 256 };
30
31 typedef struct {
32 plan_rdft super;
33
34 plan *cld, *cldcpy, *cldrest;
35 INT n, vl, nbuf, bufdist;
36 INT ivs_by_nbuf, ovs_by_nbuf;
37 } P;
38
39 /* transform a vector input with the help of bufs */
40 static void apply(const plan *ego_, R *I, R *O)
41 {
42 const P *ego = (const P *) ego_;
43 plan_rdft *cld = (plan_rdft *) ego->cld;
44 plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
45 plan_rdft *cldrest;
46 INT i, vl = ego->vl, nbuf = ego->nbuf;
47 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
48 R *bufs;
49
50 bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
51
52 for (i = nbuf; i <= vl; i += nbuf) {
53 /* transform to bufs: */
54 cld->apply((plan *) cld, I, bufs);
55 I += ivs_by_nbuf;
56
57 /* copy back */
58 cldcpy->apply((plan *) cldcpy, bufs, O);
59 O += ovs_by_nbuf;
60 }
61
62 X(ifree)(bufs);
63
64 /* Do the remaining transforms, if any: */
65 cldrest = (plan_rdft *) ego->cldrest;
66 cldrest->apply((plan *) cldrest, I, O);
67 }
68
69 /* for hc2r problems, copy the input into buffer, and then
70 transform buffer->output, which allows for destruction of the
71 buffer */
72 static void apply_hc2r(const plan *ego_, R *I, R *O)
73 {
74 const P *ego = (const P *) ego_;
75 plan_rdft *cld = (plan_rdft *) ego->cld;
76 plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
77 plan_rdft *cldrest;
78 INT i, vl = ego->vl, nbuf = ego->nbuf;
79 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
80 R *bufs;
81
82 bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
83
84 for (i = nbuf; i <= vl; i += nbuf) {
85 /* copy input into bufs: */
86 cldcpy->apply((plan *) cldcpy, I, bufs);
87 I += ivs_by_nbuf;
88
89 /* transform to output */
90 cld->apply((plan *) cld, bufs, O);
91 O += ovs_by_nbuf;
92 }
93
94 X(ifree)(bufs);
95
96 /* Do the remaining transforms, if any: */
97 cldrest = (plan_rdft *) ego->cldrest;
98 cldrest->apply((plan *) cldrest, I, O);
99 }
100
101
102 static void awake(plan *ego_, enum wakefulness wakefulness)
103 {
104 P *ego = (P *) ego_;
105
106 X(plan_awake)(ego->cld, wakefulness);
107 X(plan_awake)(ego->cldcpy, wakefulness);
108 X(plan_awake)(ego->cldrest, wakefulness);
109 }
110
111 static void destroy(plan *ego_)
112 {
113 P *ego = (P *) ego_;
114 X(plan_destroy_internal)(ego->cldrest);
115 X(plan_destroy_internal)(ego->cldcpy);
116 X(plan_destroy_internal)(ego->cld);
117 }
118
119 static void print(const plan *ego_, printer *p)
120 {
121 const P *ego = (const P *) ego_;
122 p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
123 ego->n, ego->nbuf,
124 ego->vl, ego->bufdist % ego->n,
125 ego->cld, ego->cldcpy, ego->cldrest);
126 }
127
128 static int applicable0(const S *ego, const problem *p_, const planner *plnr)
129 {
130 const problem_rdft *p = (const problem_rdft *) p_;
131 iodim *d = p->sz->dims;
132
133 if (1
134 && p->vecsz->rnk <= 1
135 && p->sz->rnk == 1
136 ) {
137 INT vl, ivs, ovs;
138 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
139
140 if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
141 return 0;
142
143 /* if this solver is redundant, in the sense that a solver
144 of lower index generates the same plan, then prune this
145 solver */
146 if (X(nbuf_redundant)(d[0].n, vl,
147 ego->maxnbuf_ndx,
148 maxnbufs, NELEM(maxnbufs)))
149 return 0;
150
151 if (p->I != p->O) {
152 if (p->kind[0] == HC2R) {
153 /* Allow HC2R problems only if the input is to be
154 preserved. This solver sets NO_DESTROY_INPUT,
155 which prevents infinite loops */
156 return (NO_DESTROY_INPUTP(plnr));
157 } else {
158 /*
159 In principle, the buffered transforms might be useful
160 when working out of place. However, in order to
161 prevent infinite loops in the planner, we require
162 that the output stride of the buffered transforms be
163 greater than 1.
164 */
165 return (d[0].os > 1);
166 }
167 }
168
169 /*
170 * If the problem is in place, the input/output strides must
171 * be the same or the whole thing must fit in the buffer.
172 */
173 if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
174 return 1;
175
176 if (/* fits into buffer: */
177 ((p->vecsz->rnk == 0)
178 ||
179 (X(nbuf)(d[0].n, p->vecsz->dims[0].n,
180 maxnbufs[ego->maxnbuf_ndx])
181 == p->vecsz->dims[0].n)))
182 return 1;
183 }
184
185 return 0;
186 }
187
188 static int applicable(const S *ego, const problem *p_, const planner *plnr)
189 {
190 const problem_rdft *p;
191
192 if (NO_BUFFERINGP(plnr)) return 0;
193
194 if (!applicable0(ego, p_, plnr)) return 0;
195
196 p = (const problem_rdft *) p_;
197 if (p->kind[0] == HC2R) {
198 if (NO_UGLYP(plnr)) {
199 /* UGLY if in-place and too big, since the problem
200 could be solved via transpositions */
201 if (p->I == p->O && X(toobig)(p->sz->dims[0].n))
202 return 0;
203 }
204 } else {
205 if (NO_UGLYP(plnr)) {
206 if (p->I != p->O) return 0;
207 if (X(toobig)(p->sz->dims[0].n)) return 0;
208 }
209 }
210 return 1;
211 }
212
213 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
214 {
215 P *pln;
216 const S *ego = (const S *)ego_;
217 plan *cld = (plan *) 0;
218 plan *cldcpy = (plan *) 0;
219 plan *cldrest = (plan *) 0;
220 const problem_rdft *p = (const problem_rdft *) p_;
221 R *bufs = (R *) 0;
222 INT nbuf = 0, bufdist, n, vl;
223 INT ivs, ovs;
224 int hc2rp;
225
226 static const plan_adt padt = {
227 X(rdft_solve), awake, print, destroy
228 };
229
230 if (!applicable(ego, p_, plnr))
231 goto nada;
232
233 n = X(tensor_sz)(p->sz);
234 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
235 hc2rp = (p->kind[0] == HC2R);
236
237 nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
238 bufdist = X(bufdist)(n, vl);
239 A(nbuf > 0);
240
241 /* initial allocation for the purpose of planning */
242 bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
243
244 if (hc2rp) {
245 /* allow destruction of buffer */
246 cld = X(mkplan_f_d)(plnr,
247 X(mkproblem_rdft_d)(
248 X(mktensor_1d)(n, 1, p->sz->dims[0].os),
249 X(mktensor_1d)(nbuf, bufdist, ovs),
250 bufs, TAINT(p->O, ovs * nbuf), p->kind),
251 0, 0, NO_DESTROY_INPUT);
252 if (!cld) goto nada;
253
254 /* copying input into buffer buffer is a rank-0 transform: */
255 cldcpy = X(mkplan_d)(plnr,
256 X(mkproblem_rdft_0_d)(
257 X(mktensor_2d)(nbuf, ivs, bufdist,
258 n, p->sz->dims[0].is, 1),
259 TAINT(p->I, ivs * nbuf), bufs));
260 if (!cldcpy) goto nada;
261 } else {
262 /* allow destruction of input if problem is in place */
263 cld = X(mkplan_f_d)(plnr,
264 X(mkproblem_rdft_d)(
265 X(mktensor_1d)(n, p->sz->dims[0].is, 1),
266 X(mktensor_1d)(nbuf, ivs, bufdist),
267 TAINT(p->I, ivs * nbuf), bufs, p->kind),
268 0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
269 if (!cld) goto nada;
270
271 /* copying back from the buffer is a rank-0 transform: */
272 cldcpy = X(mkplan_d)(plnr,
273 X(mkproblem_rdft_0_d)(
274 X(mktensor_2d)(nbuf, bufdist, ovs,
275 n, 1, p->sz->dims[0].os),
276 bufs, TAINT(p->O, ovs * nbuf)));
277 if (!cldcpy) goto nada;
278 }
279
280 /* deallocate buffers, let apply() allocate them for real */
281 X(ifree)(bufs);
282 bufs = 0;
283
284 /* plan the leftover transforms (cldrest): */
285 {
286 INT id = ivs * (nbuf * (vl / nbuf));
287 INT od = ovs * (nbuf * (vl / nbuf));
288 cldrest = X(mkplan_d)(plnr,
289 X(mkproblem_rdft_d)(
290 X(tensor_copy)(p->sz),
291 X(mktensor_1d)(vl % nbuf, ivs, ovs),
292 p->I + id, p->O + od, p->kind));
293 }
294 if (!cldrest) goto nada;
295
296 pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
297 pln->cld = cld;
298 pln->cldcpy = cldcpy;
299 pln->cldrest = cldrest;
300 pln->n = n;
301 pln->vl = vl;
302 pln->ivs_by_nbuf = ivs * nbuf;
303 pln->ovs_by_nbuf = ovs * nbuf;
304
305 pln->nbuf = nbuf;
306 pln->bufdist = bufdist;
307
308 {
309 opcnt t;
310 X(ops_add)(&cld->ops, &cldcpy->ops, &t);
311 X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
312 }
313
314 return &(pln->super.super);
315
316 nada:
317 X(ifree0)(bufs);
318 X(plan_destroy_internal)(cldrest);
319 X(plan_destroy_internal)(cldcpy);
320 X(plan_destroy_internal)(cld);
321 return (plan *) 0;
322 }
323
324 static solver *mksolver(size_t maxnbuf_ndx)
325 {
326 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
327 S *slv = MKSOLVER(S, &sadt);
328 slv->maxnbuf_ndx = maxnbuf_ndx;
329 return &(slv->super);
330 }
331
332 void X(rdft_buffered_register)(planner *p)
333 {
334 size_t i;
335 for (i = 0; i < NELEM(maxnbufs); ++i)
336 REGISTER_SOLVER(p, mksolver(i));
337 }