comparison src/fftw-3.3.3/mpi/dft-rank-geq2-transposed.c @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
comparison
equal deleted inserted replaced
9:c0fb53affa76 10:37bf6b4a2645
1 /*
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 /* Complex DFTs of rank >= 2, for the case where we are distributed
22 across the first dimension only, and the output is transposed both
23 in data distribution and in ordering (for the first 2 dimensions).
24
25 (Note that we don't have to handle the case where the input is
26 transposed, since this is equivalent to transposed output with the
27 first two dimensions swapped, and is automatically canonicalized as
28 such by dft-problem.c. */
29
30 #include "mpi-dft.h"
31 #include "mpi-transpose.h"
32 #include "dft.h"
33
34 typedef struct {
35 solver super;
36 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
37 } S;
38
39 typedef struct {
40 plan_mpi_dft super;
41
42 plan *cld1, *cldt, *cld2;
43 INT roff, ioff;
44 int preserve_input;
45 } P;
46
47 static void apply(const plan *ego_, R *I, R *O)
48 {
49 const P *ego = (const P *) ego_;
50 plan_dft *cld1, *cld2;
51 plan_rdft *cldt;
52 INT roff = ego->roff, ioff = ego->ioff;
53
54 /* DFT local dimensions */
55 cld1 = (plan_dft *) ego->cld1;
56 if (ego->preserve_input) {
57 cld1->apply(ego->cld1, I+roff, I+ioff, O+roff, O+ioff);
58 I = O;
59 }
60 else
61 cld1->apply(ego->cld1, I+roff, I+ioff, I+roff, I+ioff);
62
63 /* global transpose */
64 cldt = (plan_rdft *) ego->cldt;
65 cldt->apply(ego->cldt, I, O);
66
67 /* DFT final local dimension */
68 cld2 = (plan_dft *) ego->cld2;
69 cld2->apply(ego->cld2, O+roff, O+ioff, O+roff, O+ioff);
70 }
71
72 static int applicable(const S *ego, const problem *p_,
73 const planner *plnr)
74 {
75 const problem_mpi_dft *p = (const problem_mpi_dft *) p_;
76 return (1
77 && p->sz->rnk > 1
78 && p->flags == TRANSPOSED_OUT
79 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
80 && p->I != p->O))
81 && XM(is_local_after)(1, p->sz, IB)
82 && XM(is_local_after)(2, p->sz, OB)
83 && XM(num_blocks)(p->sz->dims[0].n, p->sz->dims[0].b[OB]) == 1
84 && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */
85 || !XM(dft_serial_applicable)(p))
86 );
87 }
88
89 static void awake(plan *ego_, enum wakefulness wakefulness)
90 {
91 P *ego = (P *) ego_;
92 X(plan_awake)(ego->cld1, wakefulness);
93 X(plan_awake)(ego->cldt, wakefulness);
94 X(plan_awake)(ego->cld2, wakefulness);
95 }
96
97 static void destroy(plan *ego_)
98 {
99 P *ego = (P *) ego_;
100 X(plan_destroy_internal)(ego->cld2);
101 X(plan_destroy_internal)(ego->cldt);
102 X(plan_destroy_internal)(ego->cld1);
103 }
104
105 static void print(const plan *ego_, printer *p)
106 {
107 const P *ego = (const P *) ego_;
108 p->print(p, "(mpi-dft-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))",
109 ego->preserve_input==2 ?"/p":"",
110 ego->cld1, ego->cldt, ego->cld2);
111 }
112
113 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
114 {
115 const S *ego = (const S *) ego_;
116 const problem_mpi_dft *p;
117 P *pln;
118 plan *cld1 = 0, *cldt = 0, *cld2 = 0;
119 R *ri, *ii, *ro, *io, *I, *O;
120 tensor *sz;
121 int i, my_pe, n_pes;
122 INT nrest;
123 static const plan_adt padt = {
124 XM(dft_solve), awake, print, destroy
125 };
126
127 UNUSED(ego);
128
129 if (!applicable(ego, p_, plnr))
130 return (plan *) 0;
131
132 p = (const problem_mpi_dft *) p_;
133
134 X(extract_reim)(p->sign, I = p->I, &ri, &ii);
135 X(extract_reim)(p->sign, O = p->O, &ro, &io);
136 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr))
137 I = O;
138 else {
139 ro = ri;
140 io = ii;
141 }
142 MPI_Comm_rank(p->comm, &my_pe);
143 MPI_Comm_size(p->comm, &n_pes);
144
145 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
146 i = p->sz->rnk - 2; A(i >= 0);
147 sz->dims[i].n = p->sz->dims[i+1].n;
148 sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
149 for (--i; i >= 0; --i) {
150 sz->dims[i].n = p->sz->dims[i+1].n;
151 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
152 }
153 nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
154 {
155 INT is = sz->dims[0].n * sz->dims[0].is;
156 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
157 cld1 = X(mkplan_d)(plnr,
158 X(mkproblem_dft_d)(sz,
159 X(mktensor_2d)(b, is, is,
160 p->vn, 2, 2),
161 ri, ii, ro, io));
162 if (XM(any_true)(!cld1, p->comm)) goto nada;
163 }
164
165 nrest *= p->vn;
166 cldt = X(mkplan_d)(plnr,
167 XM(mkproblem_transpose)(
168 p->sz->dims[0].n, p->sz->dims[1].n, nrest * 2,
169 I, O,
170 p->sz->dims[0].b[IB], p->sz->dims[1].b[OB],
171 p->comm, 0));
172 if (XM(any_true)(!cldt, p->comm)) goto nada;
173
174 X(extract_reim)(p->sign, O, &ro, &io);
175 {
176 INT is = p->sz->dims[0].n * nrest * 2;
177 INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe);
178 cld2 = X(mkplan_d)(plnr,
179 X(mkproblem_dft_d)(X(mktensor_1d)(
180 p->sz->dims[0].n,
181 nrest * 2, nrest * 2),
182 X(mktensor_2d)(b, is, is,
183 nrest, 2, 2),
184 ro, io, ro, io));
185 if (XM(any_true)(!cld2, p->comm)) goto nada;
186 }
187
188 pln = MKPLAN_MPI_DFT(P, &padt, apply);
189 pln->cld1 = cld1;
190 pln->cldt = cldt;
191 pln->cld2 = cld2;
192 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
193 pln->roff = ri - p->I;
194 pln->ioff = ii - p->I;
195
196 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
197 X(ops_add2)(&cldt->ops, &pln->super.super.ops);
198
199 return &(pln->super.super);
200
201 nada:
202 X(plan_destroy_internal)(cld2);
203 X(plan_destroy_internal)(cldt);
204 X(plan_destroy_internal)(cld1);
205 return (plan *) 0;
206 }
207
208 static solver *mksolver(int preserve_input)
209 {
210 static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 };
211 S *slv = MKSOLVER(S, &sadt);
212 slv->preserve_input = preserve_input;
213 return &(slv->super);
214 }
215
216 void XM(dft_rank_geq2_transposed_register)(planner *p)
217 {
218 int preserve_input;
219 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
220 REGISTER_SOLVER(p, mksolver(preserve_input));
221 }