comparison src/fftw-3.3.3/mpi/rdft-rank-geq2-transposed.c @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
comparison
equal deleted inserted replaced
9:c0fb53affa76 10:37bf6b4a2645
1 /*
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 /* Complex RDFTs of rank >= 2, for the case where we are distributed
22 across the first dimension only, and the output is transposed both
23 in data distribution and in ordering (for the first 2 dimensions).
24
25 (Note that we don't have to handle the case where the input is
26 transposed, since this is equivalent to transposed output with the
27 first two dimensions swapped, and is automatically canonicalized as
28 such by rdft-problem.c. */
29
30 #include "mpi-rdft.h"
31 #include "mpi-transpose.h"
32
33 typedef struct {
34 solver super;
35 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
36 } S;
37
38 typedef struct {
39 plan_mpi_rdft super;
40
41 plan *cld1, *cldt, *cld2;
42 INT roff, ioff;
43 int preserve_input;
44 } P;
45
46 static void apply(const plan *ego_, R *I, R *O)
47 {
48 const P *ego = (const P *) ego_;
49 plan_rdft *cld1, *cld2, *cldt;
50
51 /* RDFT local dimensions */
52 cld1 = (plan_rdft *) ego->cld1;
53 if (ego->preserve_input) {
54 cld1->apply(ego->cld1, I, O);
55 I = O;
56 }
57 else
58 cld1->apply(ego->cld1, I, I);
59
60 /* global transpose */
61 cldt = (plan_rdft *) ego->cldt;
62 cldt->apply(ego->cldt, I, O);
63
64 /* RDFT final local dimension */
65 cld2 = (plan_rdft *) ego->cld2;
66 cld2->apply(ego->cld2, O, O);
67 }
68
69 static int applicable(const S *ego, const problem *p_,
70 const planner *plnr)
71 {
72 const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_;
73 return (1
74 && p->sz->rnk > 1
75 && p->flags == TRANSPOSED_OUT
76 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
77 && p->I != p->O))
78 && XM(is_local_after)(1, p->sz, IB)
79 && XM(is_local_after)(2, p->sz, OB)
80 && XM(num_blocks)(p->sz->dims[0].n, p->sz->dims[0].b[OB]) == 1
81 && (!NO_SLOWP(plnr) /* slow if rdft-serial is applicable */
82 || !XM(rdft_serial_applicable)(p))
83 );
84 }
85
86 static void awake(plan *ego_, enum wakefulness wakefulness)
87 {
88 P *ego = (P *) ego_;
89 X(plan_awake)(ego->cld1, wakefulness);
90 X(plan_awake)(ego->cldt, wakefulness);
91 X(plan_awake)(ego->cld2, wakefulness);
92 }
93
94 static void destroy(plan *ego_)
95 {
96 P *ego = (P *) ego_;
97 X(plan_destroy_internal)(ego->cld2);
98 X(plan_destroy_internal)(ego->cldt);
99 X(plan_destroy_internal)(ego->cld1);
100 }
101
102 static void print(const plan *ego_, printer *p)
103 {
104 const P *ego = (const P *) ego_;
105 p->print(p, "(mpi-rdft-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))",
106 ego->preserve_input==2 ?"/p":"",
107 ego->cld1, ego->cldt, ego->cld2);
108 }
109
110 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
111 {
112 const S *ego = (const S *) ego_;
113 const problem_mpi_rdft *p;
114 P *pln;
115 plan *cld1 = 0, *cldt = 0, *cld2 = 0;
116 R *I, *O, *I2;
117 tensor *sz;
118 int i, my_pe, n_pes;
119 INT nrest;
120 static const plan_adt padt = {
121 XM(rdft_solve), awake, print, destroy
122 };
123
124 UNUSED(ego);
125
126 if (!applicable(ego, p_, plnr))
127 return (plan *) 0;
128
129 p = (const problem_mpi_rdft *) p_;
130
131 I2 = I = p->I;
132 O = p->O;
133 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr))
134 I = O;
135 MPI_Comm_rank(p->comm, &my_pe);
136 MPI_Comm_size(p->comm, &n_pes);
137
138 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
139 i = p->sz->rnk - 2; A(i >= 0);
140 sz->dims[i].n = p->sz->dims[i+1].n;
141 sz->dims[i].is = sz->dims[i].os = p->vn;
142 for (--i; i >= 0; --i) {
143 sz->dims[i].n = p->sz->dims[i+1].n;
144 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
145 }
146 nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
147 {
148 INT is = sz->dims[0].n * sz->dims[0].is;
149 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
150 cld1 = X(mkplan_d)(plnr,
151 X(mkproblem_rdft_d)(sz,
152 X(mktensor_2d)(b, is, is,
153 p->vn, 1, 1),
154 I2, I, p->kind + 1));
155 if (XM(any_true)(!cld1, p->comm)) goto nada;
156 }
157
158 nrest *= p->vn;
159 cldt = X(mkplan_d)(plnr,
160 XM(mkproblem_transpose)(
161 p->sz->dims[0].n, p->sz->dims[1].n, nrest,
162 I, O,
163 p->sz->dims[0].b[IB], p->sz->dims[1].b[OB],
164 p->comm, 0));
165 if (XM(any_true)(!cldt, p->comm)) goto nada;
166
167 {
168 INT is = p->sz->dims[0].n * nrest;
169 INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe);
170 cld2 = X(mkplan_d)(plnr,
171 X(mkproblem_rdft_1_d)(X(mktensor_1d)(
172 p->sz->dims[0].n,
173 nrest, nrest),
174 X(mktensor_2d)(b, is, is,
175 nrest, 1, 1),
176 O, O, p->kind[0]));
177 if (XM(any_true)(!cld2, p->comm)) goto nada;
178 }
179
180 pln = MKPLAN_MPI_RDFT(P, &padt, apply);
181 pln->cld1 = cld1;
182 pln->cldt = cldt;
183 pln->cld2 = cld2;
184 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
185
186 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
187 X(ops_add2)(&cldt->ops, &pln->super.super.ops);
188
189 return &(pln->super.super);
190
191 nada:
192 X(plan_destroy_internal)(cld2);
193 X(plan_destroy_internal)(cldt);
194 X(plan_destroy_internal)(cld1);
195 return (plan *) 0;
196 }
197
198 static solver *mksolver(int preserve_input)
199 {
200 static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 };
201 S *slv = MKSOLVER(S, &sadt);
202 slv->preserve_input = preserve_input;
203 return &(slv->super);
204 }
205
206 void XM(rdft_rank_geq2_transposed_register)(planner *p)
207 {
208 int preserve_input;
209 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
210 REGISTER_SOLVER(p, mksolver(preserve_input));
211 }