Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.8/dft/indirect.c @ 167:bd3cc4d1df30
Add FFTW 3.3.8 source, and a Linux build
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Tue, 19 Nov 2019 14:52:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
166:cbd6d7e562c7 | 167:bd3cc4d1df30 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-14 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 | |
22 | |
23 /* solvers/plans for vectors of small DFT's that cannot be done | |
24 in-place directly. Use a rank-0 plan to rearrange the data | |
25 before or after the transform. Can also change an out-of-place | |
26 plan into a copy + in-place (where the in-place transform | |
27 is e.g. unit stride). */ | |
28 | |
29 /* FIXME: merge with rank-geq2.c(?), since this is just a special case | |
30 of a rank split where the first/second transform has rank 0. */ | |
31 | |
32 #include "dft/dft.h" | |
33 | |
34 typedef problem *(*mkcld_t) (const problem_dft *p); | |
35 | |
36 typedef struct { | |
37 dftapply apply; | |
38 problem *(*mkcld)(const problem_dft *p); | |
39 const char *nam; | |
40 } ndrct_adt; | |
41 | |
42 typedef struct { | |
43 solver super; | |
44 const ndrct_adt *adt; | |
45 } S; | |
46 | |
47 typedef struct { | |
48 plan_dft super; | |
49 plan *cldcpy, *cld; | |
50 const S *slv; | |
51 } P; | |
52 | |
53 /*-----------------------------------------------------------------------*/ | |
54 /* first rearrange, then transform */ | |
55 static void apply_before(const plan *ego_, R *ri, R *ii, R *ro, R *io) | |
56 { | |
57 const P *ego = (const P *) ego_; | |
58 | |
59 { | |
60 plan_dft *cldcpy = (plan_dft *) ego->cldcpy; | |
61 cldcpy->apply(ego->cldcpy, ri, ii, ro, io); | |
62 } | |
63 { | |
64 plan_dft *cld = (plan_dft *) ego->cld; | |
65 cld->apply(ego->cld, ro, io, ro, io); | |
66 } | |
67 } | |
68 | |
69 static problem *mkcld_before(const problem_dft *p) | |
70 { | |
71 return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS), | |
72 X(tensor_copy_inplace)(p->vecsz, INPLACE_OS), | |
73 p->ro, p->io, p->ro, p->io); | |
74 } | |
75 | |
76 static const ndrct_adt adt_before = | |
77 { | |
78 apply_before, mkcld_before, "dft-indirect-before" | |
79 }; | |
80 | |
81 /*-----------------------------------------------------------------------*/ | |
82 /* first transform, then rearrange */ | |
83 | |
84 static void apply_after(const plan *ego_, R *ri, R *ii, R *ro, R *io) | |
85 { | |
86 const P *ego = (const P *) ego_; | |
87 | |
88 { | |
89 plan_dft *cld = (plan_dft *) ego->cld; | |
90 cld->apply(ego->cld, ri, ii, ri, ii); | |
91 } | |
92 { | |
93 plan_dft *cldcpy = (plan_dft *) ego->cldcpy; | |
94 cldcpy->apply(ego->cldcpy, ri, ii, ro, io); | |
95 } | |
96 } | |
97 | |
98 static problem *mkcld_after(const problem_dft *p) | |
99 { | |
100 return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS), | |
101 X(tensor_copy_inplace)(p->vecsz, INPLACE_IS), | |
102 p->ri, p->ii, p->ri, p->ii); | |
103 } | |
104 | |
105 static const ndrct_adt adt_after = | |
106 { | |
107 apply_after, mkcld_after, "dft-indirect-after" | |
108 }; | |
109 | |
110 /*-----------------------------------------------------------------------*/ | |
111 static void destroy(plan *ego_) | |
112 { | |
113 P *ego = (P *) ego_; | |
114 X(plan_destroy_internal)(ego->cld); | |
115 X(plan_destroy_internal)(ego->cldcpy); | |
116 } | |
117 | |
118 static void awake(plan *ego_, enum wakefulness wakefulness) | |
119 { | |
120 P *ego = (P *) ego_; | |
121 X(plan_awake)(ego->cldcpy, wakefulness); | |
122 X(plan_awake)(ego->cld, wakefulness); | |
123 } | |
124 | |
125 static void print(const plan *ego_, printer *p) | |
126 { | |
127 const P *ego = (const P *) ego_; | |
128 const S *s = ego->slv; | |
129 p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy); | |
130 } | |
131 | |
132 static int applicable0(const solver *ego_, const problem *p_, | |
133 const planner *plnr) | |
134 { | |
135 const S *ego = (const S *) ego_; | |
136 const problem_dft *p = (const problem_dft *) p_; | |
137 return (1 | |
138 && FINITE_RNK(p->vecsz->rnk) | |
139 | |
140 /* problem must be a nontrivial transform, not just a copy */ | |
141 && p->sz->rnk > 0 | |
142 | |
143 && (0 | |
144 | |
145 /* problem must be in-place & require some | |
146 rearrangement of the data; to prevent | |
147 infinite loops with indirect-transpose, we | |
148 further require that at least some transform | |
149 strides must decrease */ | |
150 || (p->ri == p->ro | |
151 && !X(tensor_inplace_strides2)(p->sz, p->vecsz) | |
152 && X(tensor_strides_decrease)( | |
153 p->sz, p->vecsz, | |
154 ego->adt->apply == apply_after ? | |
155 INPLACE_IS : INPLACE_OS)) | |
156 | |
157 /* or problem must be out of place, transforming | |
158 from stride 1/2 to bigger stride, for apply_after */ | |
159 || (p->ri != p->ro && ego->adt->apply == apply_after | |
160 && !NO_DESTROY_INPUTP(plnr) | |
161 && X(tensor_min_istride)(p->sz) <= 2 | |
162 && X(tensor_min_ostride)(p->sz) > 2) | |
163 | |
164 /* or problem must be out of place, transforming | |
165 to stride 1/2 from bigger stride, for apply_before */ | |
166 || (p->ri != p->ro && ego->adt->apply == apply_before | |
167 && X(tensor_min_ostride)(p->sz) <= 2 | |
168 && X(tensor_min_istride)(p->sz) > 2) | |
169 ) | |
170 ); | |
171 } | |
172 | |
173 static int applicable(const solver *ego_, const problem *p_, | |
174 const planner *plnr) | |
175 { | |
176 if (!applicable0(ego_, p_, plnr)) return 0; | |
177 { | |
178 const problem_dft *p = (const problem_dft *) p_; | |
179 if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; | |
180 } | |
181 return 1; | |
182 } | |
183 | |
184 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) | |
185 { | |
186 const problem_dft *p = (const problem_dft *) p_; | |
187 const S *ego = (const S *) ego_; | |
188 P *pln; | |
189 plan *cld = 0, *cldcpy = 0; | |
190 | |
191 static const plan_adt padt = { | |
192 X(dft_solve), awake, print, destroy | |
193 }; | |
194 | |
195 if (!applicable(ego_, p_, plnr)) | |
196 return (plan *) 0; | |
197 | |
198 cldcpy = | |
199 X(mkplan_d)(plnr, | |
200 X(mkproblem_dft_d)(X(mktensor_0d)(), | |
201 X(tensor_append)(p->vecsz, p->sz), | |
202 p->ri, p->ii, p->ro, p->io)); | |
203 | |
204 if (!cldcpy) goto nada; | |
205 | |
206 cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); | |
207 if (!cld) goto nada; | |
208 | |
209 pln = MKPLAN_DFT(P, &padt, ego->adt->apply); | |
210 pln->cld = cld; | |
211 pln->cldcpy = cldcpy; | |
212 pln->slv = ego; | |
213 X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); | |
214 | |
215 return &(pln->super.super); | |
216 | |
217 nada: | |
218 X(plan_destroy_internal)(cld); | |
219 X(plan_destroy_internal)(cldcpy); | |
220 return (plan *)0; | |
221 } | |
222 | |
223 static solver *mksolver(const ndrct_adt *adt) | |
224 { | |
225 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; | |
226 S *slv = MKSOLVER(S, &sadt); | |
227 slv->adt = adt; | |
228 return &(slv->super); | |
229 } | |
230 | |
231 void X(dft_indirect_register)(planner *p) | |
232 { | |
233 unsigned i; | |
234 static const ndrct_adt *const adts[] = { | |
235 &adt_before, &adt_after | |
236 }; | |
237 | |
238 for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i) | |
239 REGISTER_SOLVER(p, mksolver(adts[i])); | |
240 } |