cannam@95
|
1 /*
|
cannam@95
|
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
cannam@95
|
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
cannam@95
|
4 *
|
cannam@95
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@95
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@95
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@95
|
8 * (at your option) any later version.
|
cannam@95
|
9 *
|
cannam@95
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@95
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@95
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@95
|
13 * GNU General Public License for more details.
|
cannam@95
|
14 *
|
cannam@95
|
15 * You should have received a copy of the GNU General Public License
|
cannam@95
|
16 * along with this program; if not, write to the Free Software
|
cannam@95
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@95
|
18 *
|
cannam@95
|
19 */
|
cannam@95
|
20
|
cannam@95
|
21
|
cannam@95
|
22
|
cannam@95
|
23 /* solvers/plans for vectors of small DFT's that cannot be done
|
cannam@95
|
24 in-place directly. Use a rank-0 plan to rearrange the data
|
cannam@95
|
25 before or after the transform. Can also change an out-of-place
|
cannam@95
|
26 plan into a copy + in-place (where the in-place transform
|
cannam@95
|
27 is e.g. unit stride). */
|
cannam@95
|
28
|
cannam@95
|
29 /* FIXME: merge with rank-geq2.c(?), since this is just a special case
|
cannam@95
|
30 of a rank split where the first/second transform has rank 0. */
|
cannam@95
|
31
|
cannam@95
|
32 #include "dft.h"
|
cannam@95
|
33
|
cannam@95
|
34 typedef problem *(*mkcld_t) (const problem_dft *p);
|
cannam@95
|
35
|
cannam@95
|
36 typedef struct {
|
cannam@95
|
37 dftapply apply;
|
cannam@95
|
38 problem *(*mkcld)(const problem_dft *p);
|
cannam@95
|
39 const char *nam;
|
cannam@95
|
40 } ndrct_adt;
|
cannam@95
|
41
|
cannam@95
|
42 typedef struct {
|
cannam@95
|
43 solver super;
|
cannam@95
|
44 const ndrct_adt *adt;
|
cannam@95
|
45 } S;
|
cannam@95
|
46
|
cannam@95
|
47 typedef struct {
|
cannam@95
|
48 plan_dft super;
|
cannam@95
|
49 plan *cldcpy, *cld;
|
cannam@95
|
50 const S *slv;
|
cannam@95
|
51 } P;
|
cannam@95
|
52
|
cannam@95
|
53 /*-----------------------------------------------------------------------*/
|
cannam@95
|
54 /* first rearrange, then transform */
|
cannam@95
|
55 static void apply_before(const plan *ego_, R *ri, R *ii, R *ro, R *io)
|
cannam@95
|
56 {
|
cannam@95
|
57 const P *ego = (const P *) ego_;
|
cannam@95
|
58
|
cannam@95
|
59 {
|
cannam@95
|
60 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
|
cannam@95
|
61 cldcpy->apply(ego->cldcpy, ri, ii, ro, io);
|
cannam@95
|
62 }
|
cannam@95
|
63 {
|
cannam@95
|
64 plan_dft *cld = (plan_dft *) ego->cld;
|
cannam@95
|
65 cld->apply(ego->cld, ro, io, ro, io);
|
cannam@95
|
66 }
|
cannam@95
|
67 }
|
cannam@95
|
68
|
cannam@95
|
69 static problem *mkcld_before(const problem_dft *p)
|
cannam@95
|
70 {
|
cannam@95
|
71 return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS),
|
cannam@95
|
72 X(tensor_copy_inplace)(p->vecsz, INPLACE_OS),
|
cannam@95
|
73 p->ro, p->io, p->ro, p->io);
|
cannam@95
|
74 }
|
cannam@95
|
75
|
cannam@95
|
76 static const ndrct_adt adt_before =
|
cannam@95
|
77 {
|
cannam@95
|
78 apply_before, mkcld_before, "dft-indirect-before"
|
cannam@95
|
79 };
|
cannam@95
|
80
|
cannam@95
|
81 /*-----------------------------------------------------------------------*/
|
cannam@95
|
82 /* first transform, then rearrange */
|
cannam@95
|
83
|
cannam@95
|
84 static void apply_after(const plan *ego_, R *ri, R *ii, R *ro, R *io)
|
cannam@95
|
85 {
|
cannam@95
|
86 const P *ego = (const P *) ego_;
|
cannam@95
|
87
|
cannam@95
|
88 {
|
cannam@95
|
89 plan_dft *cld = (plan_dft *) ego->cld;
|
cannam@95
|
90 cld->apply(ego->cld, ri, ii, ri, ii);
|
cannam@95
|
91 }
|
cannam@95
|
92 {
|
cannam@95
|
93 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
|
cannam@95
|
94 cldcpy->apply(ego->cldcpy, ri, ii, ro, io);
|
cannam@95
|
95 }
|
cannam@95
|
96 }
|
cannam@95
|
97
|
cannam@95
|
98 static problem *mkcld_after(const problem_dft *p)
|
cannam@95
|
99 {
|
cannam@95
|
100 return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS),
|
cannam@95
|
101 X(tensor_copy_inplace)(p->vecsz, INPLACE_IS),
|
cannam@95
|
102 p->ri, p->ii, p->ri, p->ii);
|
cannam@95
|
103 }
|
cannam@95
|
104
|
cannam@95
|
105 static const ndrct_adt adt_after =
|
cannam@95
|
106 {
|
cannam@95
|
107 apply_after, mkcld_after, "dft-indirect-after"
|
cannam@95
|
108 };
|
cannam@95
|
109
|
cannam@95
|
110 /*-----------------------------------------------------------------------*/
|
cannam@95
|
111 static void destroy(plan *ego_)
|
cannam@95
|
112 {
|
cannam@95
|
113 P *ego = (P *) ego_;
|
cannam@95
|
114 X(plan_destroy_internal)(ego->cld);
|
cannam@95
|
115 X(plan_destroy_internal)(ego->cldcpy);
|
cannam@95
|
116 }
|
cannam@95
|
117
|
cannam@95
|
118 static void awake(plan *ego_, enum wakefulness wakefulness)
|
cannam@95
|
119 {
|
cannam@95
|
120 P *ego = (P *) ego_;
|
cannam@95
|
121 X(plan_awake)(ego->cldcpy, wakefulness);
|
cannam@95
|
122 X(plan_awake)(ego->cld, wakefulness);
|
cannam@95
|
123 }
|
cannam@95
|
124
|
cannam@95
|
125 static void print(const plan *ego_, printer *p)
|
cannam@95
|
126 {
|
cannam@95
|
127 const P *ego = (const P *) ego_;
|
cannam@95
|
128 const S *s = ego->slv;
|
cannam@95
|
129 p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy);
|
cannam@95
|
130 }
|
cannam@95
|
131
|
cannam@95
|
132 static int applicable0(const solver *ego_, const problem *p_,
|
cannam@95
|
133 const planner *plnr)
|
cannam@95
|
134 {
|
cannam@95
|
135 const S *ego = (const S *) ego_;
|
cannam@95
|
136 const problem_dft *p = (const problem_dft *) p_;
|
cannam@95
|
137 return (1
|
cannam@95
|
138 && FINITE_RNK(p->vecsz->rnk)
|
cannam@95
|
139
|
cannam@95
|
140 /* problem must be a nontrivial transform, not just a copy */
|
cannam@95
|
141 && p->sz->rnk > 0
|
cannam@95
|
142
|
cannam@95
|
143 && (0
|
cannam@95
|
144
|
cannam@95
|
145 /* problem must be in-place & require some
|
cannam@95
|
146 rearrangement of the data; to prevent
|
cannam@95
|
147 infinite loops with indirect-transpose, we
|
cannam@95
|
148 further require that at least some transform
|
cannam@95
|
149 strides must decrease */
|
cannam@95
|
150 || (p->ri == p->ro
|
cannam@95
|
151 && !X(tensor_inplace_strides2)(p->sz, p->vecsz)
|
cannam@95
|
152 && X(tensor_strides_decrease)(
|
cannam@95
|
153 p->sz, p->vecsz,
|
cannam@95
|
154 ego->adt->apply == apply_after ?
|
cannam@95
|
155 INPLACE_IS : INPLACE_OS))
|
cannam@95
|
156
|
cannam@95
|
157 /* or problem must be out of place, transforming
|
cannam@95
|
158 from stride 1/2 to bigger stride, for apply_after */
|
cannam@95
|
159 || (p->ri != p->ro && ego->adt->apply == apply_after
|
cannam@95
|
160 && !NO_DESTROY_INPUTP(plnr)
|
cannam@95
|
161 && X(tensor_min_istride)(p->sz) <= 2
|
cannam@95
|
162 && X(tensor_min_ostride)(p->sz) > 2)
|
cannam@95
|
163
|
cannam@95
|
164 /* or problem must be out of place, transforming
|
cannam@95
|
165 to stride 1/2 from bigger stride, for apply_before */
|
cannam@95
|
166 || (p->ri != p->ro && ego->adt->apply == apply_before
|
cannam@95
|
167 && X(tensor_min_ostride)(p->sz) <= 2
|
cannam@95
|
168 && X(tensor_min_istride)(p->sz) > 2)
|
cannam@95
|
169 )
|
cannam@95
|
170 );
|
cannam@95
|
171 }
|
cannam@95
|
172
|
cannam@95
|
173 static int applicable(const solver *ego_, const problem *p_,
|
cannam@95
|
174 const planner *plnr)
|
cannam@95
|
175 {
|
cannam@95
|
176 if (!applicable0(ego_, p_, plnr)) return 0;
|
cannam@95
|
177 {
|
cannam@95
|
178 const problem_dft *p = (const problem_dft *) p_;
|
cannam@95
|
179 if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0;
|
cannam@95
|
180 }
|
cannam@95
|
181 return 1;
|
cannam@95
|
182 }
|
cannam@95
|
183
|
cannam@95
|
184 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
cannam@95
|
185 {
|
cannam@95
|
186 const problem_dft *p = (const problem_dft *) p_;
|
cannam@95
|
187 const S *ego = (const S *) ego_;
|
cannam@95
|
188 P *pln;
|
cannam@95
|
189 plan *cld = 0, *cldcpy = 0;
|
cannam@95
|
190
|
cannam@95
|
191 static const plan_adt padt = {
|
cannam@95
|
192 X(dft_solve), awake, print, destroy
|
cannam@95
|
193 };
|
cannam@95
|
194
|
cannam@95
|
195 if (!applicable(ego_, p_, plnr))
|
cannam@95
|
196 return (plan *) 0;
|
cannam@95
|
197
|
cannam@95
|
198 cldcpy =
|
cannam@95
|
199 X(mkplan_d)(plnr,
|
cannam@95
|
200 X(mkproblem_dft_d)(X(mktensor_0d)(),
|
cannam@95
|
201 X(tensor_append)(p->vecsz, p->sz),
|
cannam@95
|
202 p->ri, p->ii, p->ro, p->io));
|
cannam@95
|
203
|
cannam@95
|
204 if (!cldcpy) goto nada;
|
cannam@95
|
205
|
cannam@95
|
206 cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0);
|
cannam@95
|
207 if (!cld) goto nada;
|
cannam@95
|
208
|
cannam@95
|
209 pln = MKPLAN_DFT(P, &padt, ego->adt->apply);
|
cannam@95
|
210 pln->cld = cld;
|
cannam@95
|
211 pln->cldcpy = cldcpy;
|
cannam@95
|
212 pln->slv = ego;
|
cannam@95
|
213 X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops);
|
cannam@95
|
214
|
cannam@95
|
215 return &(pln->super.super);
|
cannam@95
|
216
|
cannam@95
|
217 nada:
|
cannam@95
|
218 X(plan_destroy_internal)(cld);
|
cannam@95
|
219 X(plan_destroy_internal)(cldcpy);
|
cannam@95
|
220 return (plan *)0;
|
cannam@95
|
221 }
|
cannam@95
|
222
|
cannam@95
|
223 static solver *mksolver(const ndrct_adt *adt)
|
cannam@95
|
224 {
|
cannam@95
|
225 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
|
cannam@95
|
226 S *slv = MKSOLVER(S, &sadt);
|
cannam@95
|
227 slv->adt = adt;
|
cannam@95
|
228 return &(slv->super);
|
cannam@95
|
229 }
|
cannam@95
|
230
|
cannam@95
|
231 void X(dft_indirect_register)(planner *p)
|
cannam@95
|
232 {
|
cannam@95
|
233 unsigned i;
|
cannam@95
|
234 static const ndrct_adt *const adts[] = {
|
cannam@95
|
235 &adt_before, &adt_after
|
cannam@95
|
236 };
|
cannam@95
|
237
|
cannam@95
|
238 for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i)
|
cannam@95
|
239 REGISTER_SOLVER(p, mksolver(adts[i]));
|
cannam@95
|
240 }
|