Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/dft/indirect-transpose.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 /* solvers/plans for vectors of DFTs corresponding to the columns | |
22 of a matrix: first transpose the matrix so that the DFTs are | |
23 contiguous, then do DFTs with transposed output. In particular, | |
24 we restrict ourselves to the case of a square transpose (or a | |
25 sequence thereof). */ | |
26 | |
27 #include "dft.h" | |
28 | |
29 typedef solver S; | |
30 | |
31 typedef struct { | |
32 plan_dft super; | |
33 INT vl, ivs, ovs; | |
34 plan *cldtrans, *cld, *cldrest; | |
35 } P; | |
36 | |
37 /* initial transpose is out-of-place from input to output */ | |
38 static void apply_op(const plan *ego_, R *ri, R *ii, R *ro, R *io) | |
39 { | |
40 const P *ego = (const P *) ego_; | |
41 INT vl = ego->vl, ivs = ego->ivs, ovs = ego->ovs, i; | |
42 | |
43 for (i = 0; i < vl; ++i) { | |
44 { | |
45 plan_dft *cldtrans = (plan_dft *) ego->cldtrans; | |
46 cldtrans->apply(ego->cldtrans, ri, ii, ro, io); | |
47 } | |
48 { | |
49 plan_dft *cld = (plan_dft *) ego->cld; | |
50 cld->apply(ego->cld, ro, io, ro, io); | |
51 } | |
52 ri += ivs; ii += ivs; | |
53 ro += ovs; io += ovs; | |
54 } | |
55 { | |
56 plan_dft *cldrest = (plan_dft *) ego->cldrest; | |
57 cldrest->apply(ego->cldrest, ri, ii, ro, io); | |
58 } | |
59 } | |
60 | |
61 static void destroy(plan *ego_) | |
62 { | |
63 P *ego = (P *) ego_; | |
64 X(plan_destroy_internal)(ego->cldrest); | |
65 X(plan_destroy_internal)(ego->cld); | |
66 X(plan_destroy_internal)(ego->cldtrans); | |
67 } | |
68 | |
69 static void awake(plan *ego_, enum wakefulness wakefulness) | |
70 { | |
71 P *ego = (P *) ego_; | |
72 X(plan_awake)(ego->cldtrans, wakefulness); | |
73 X(plan_awake)(ego->cld, wakefulness); | |
74 X(plan_awake)(ego->cldrest, wakefulness); | |
75 } | |
76 | |
77 static void print(const plan *ego_, printer *p) | |
78 { | |
79 const P *ego = (const P *) ego_; | |
80 p->print(p, "(indirect-transpose%v%(%p%)%(%p%)%(%p%))", | |
81 ego->vl, ego->cldtrans, ego->cld, ego->cldrest); | |
82 } | |
83 | |
84 static int pickdim(const tensor *vs, const tensor *s, int *pdim0, int *pdim1) | |
85 { | |
86 int dim0, dim1; | |
87 *pdim0 = *pdim1 = -1; | |
88 for (dim0 = 0; dim0 < vs->rnk; ++dim0) | |
89 for (dim1 = 0; dim1 < s->rnk; ++dim1) | |
90 if (vs->dims[dim0].n * X(iabs)(vs->dims[dim0].is) <= X(iabs)(s->dims[dim1].is) | |
91 && vs->dims[dim0].n >= s->dims[dim1].n | |
92 && (*pdim0 == -1 | |
93 || (X(iabs)(vs->dims[dim0].is) <= X(iabs)(vs->dims[*pdim0].is) | |
94 && X(iabs)(s->dims[dim1].is) >= X(iabs)(s->dims[*pdim1].is)))) { | |
95 *pdim0 = dim0; | |
96 *pdim1 = dim1; | |
97 } | |
98 return (*pdim0 != -1 && *pdim1 != -1); | |
99 } | |
100 | |
101 static int applicable0(const solver *ego_, const problem *p_, | |
102 const planner *plnr, | |
103 int *pdim0, int *pdim1) | |
104 { | |
105 const problem_dft *p = (const problem_dft *) p_; | |
106 UNUSED(ego_); UNUSED(plnr); | |
107 | |
108 return (1 | |
109 && FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk) | |
110 | |
111 /* FIXME: can/should we relax this constraint? */ | |
112 && X(tensor_inplace_strides2)(p->vecsz, p->sz) | |
113 | |
114 && pickdim(p->vecsz, p->sz, pdim0, pdim1) | |
115 | |
116 /* output should not *already* include the transpose | |
117 (in which case we duplicate the regular indirect.c) */ | |
118 && (p->sz->dims[*pdim1].os != p->vecsz->dims[*pdim0].is) | |
119 ); | |
120 } | |
121 | |
122 static int applicable(const solver *ego_, const problem *p_, | |
123 const planner *plnr, | |
124 int *pdim0, int *pdim1) | |
125 { | |
126 if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0; | |
127 { | |
128 const problem_dft *p = (const problem_dft *) p_; | |
129 INT u = p->ri == p->ii + 1 || p->ii == p->ri + 1 ? (INT)2 : (INT)1; | |
130 | |
131 /* UGLY if does not result in contiguous transforms or | |
132 transforms of contiguous vectors (since the latter at | |
133 least have efficient transpositions) */ | |
134 if (NO_UGLYP(plnr) | |
135 && p->vecsz->dims[*pdim0].is != u | |
136 && !(p->vecsz->rnk == 2 | |
137 && p->vecsz->dims[1-*pdim0].is == u | |
138 && p->vecsz->dims[*pdim0].is | |
139 == u * p->vecsz->dims[1-*pdim0].n)) | |
140 return 0; | |
141 | |
142 if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; | |
143 } | |
144 return 1; | |
145 } | |
146 | |
147 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) | |
148 { | |
149 const problem_dft *p = (const problem_dft *) p_; | |
150 P *pln; | |
151 plan *cld = 0, *cldtrans = 0, *cldrest = 0; | |
152 int pdim0, pdim1; | |
153 tensor *ts, *tv; | |
154 INT vl, ivs, ovs; | |
155 R *rit, *iit, *rot, *iot; | |
156 | |
157 static const plan_adt padt = { | |
158 X(dft_solve), awake, print, destroy | |
159 }; | |
160 | |
161 if (!applicable(ego_, p_, plnr, &pdim0, &pdim1)) | |
162 return (plan *) 0; | |
163 | |
164 vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n; | |
165 A(vl >= 1); | |
166 ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is; | |
167 ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os; | |
168 rit = TAINT(p->ri, vl == 1 ? 0 : ivs); | |
169 iit = TAINT(p->ii, vl == 1 ? 0 : ivs); | |
170 rot = TAINT(p->ro, vl == 1 ? 0 : ovs); | |
171 iot = TAINT(p->io, vl == 1 ? 0 : ovs); | |
172 | |
173 ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS); | |
174 ts->dims[pdim1].os = p->vecsz->dims[pdim0].is; | |
175 tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS); | |
176 tv->dims[pdim0].os = p->sz->dims[pdim1].is; | |
177 tv->dims[pdim0].n = p->sz->dims[pdim1].n; | |
178 cldtrans = X(mkplan_d)(plnr, | |
179 X(mkproblem_dft_d)(X(mktensor_0d)(), | |
180 X(tensor_append)(tv, ts), | |
181 rit, iit, | |
182 rot, iot)); | |
183 X(tensor_destroy2)(ts, tv); | |
184 if (!cldtrans) goto nada; | |
185 | |
186 ts = X(tensor_copy)(p->sz); | |
187 ts->dims[pdim1].is = p->vecsz->dims[pdim0].is; | |
188 tv = X(tensor_copy)(p->vecsz); | |
189 tv->dims[pdim0].is = p->sz->dims[pdim1].is; | |
190 tv->dims[pdim0].n = p->sz->dims[pdim1].n; | |
191 cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv, | |
192 rot, iot, | |
193 rot, iot)); | |
194 if (!cld) goto nada; | |
195 | |
196 tv = X(tensor_copy)(p->vecsz); | |
197 tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n; | |
198 cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv, | |
199 p->ri + ivs * vl, | |
200 p->ii + ivs * vl, | |
201 p->ro + ovs * vl, | |
202 p->io + ovs * vl)); | |
203 if (!cldrest) goto nada; | |
204 | |
205 pln = MKPLAN_DFT(P, &padt, apply_op); | |
206 pln->cldtrans = cldtrans; | |
207 pln->cld = cld; | |
208 pln->cldrest = cldrest; | |
209 pln->vl = vl; | |
210 pln->ivs = ivs; | |
211 pln->ovs = ovs; | |
212 X(ops_cpy)(&cldrest->ops, &pln->super.super.ops); | |
213 X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops); | |
214 X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops); | |
215 return &(pln->super.super); | |
216 | |
217 nada: | |
218 X(plan_destroy_internal)(cldrest); | |
219 X(plan_destroy_internal)(cld); | |
220 X(plan_destroy_internal)(cldtrans); | |
221 return (plan *)0; | |
222 } | |
223 | |
224 static solver *mksolver(void) | |
225 { | |
226 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; | |
227 S *slv = MKSOLVER(S, &sadt); | |
228 return slv; | |
229 } | |
230 | |
231 void X(dft_indirect_transpose_register)(planner *p) | |
232 { | |
233 REGISTER_SOLVER(p, mksolver()); | |
234 } |