annotate src/fftw-3.3.5/mpi/rdft2-rank-geq2-transposed.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* Real-input (r2c) DFTs of rank >= 2, for the case where we are distributed
Chris@42 22 across the first dimension only, and the output is transposed both
Chris@42 23 in data distribution and in ordering (for the first 2 dimensions).
Chris@42 24
Chris@42 25 Conversely, real-output (c2r) DFTs where the input is transposed.
Chris@42 26
Chris@42 27 We don't currently support transposed-input r2c or transposed-output
Chris@42 28 c2r transforms. */
Chris@42 29
Chris@42 30 #include "mpi-rdft2.h"
Chris@42 31 #include "mpi-transpose.h"
Chris@42 32 #include "rdft.h"
Chris@42 33 #include "dft.h"
Chris@42 34
Chris@42 35 typedef struct {
Chris@42 36 solver super;
Chris@42 37 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
Chris@42 38 } S;
Chris@42 39
Chris@42 40 typedef struct {
Chris@42 41 plan_mpi_rdft2 super;
Chris@42 42
Chris@42 43 plan *cld1, *cldt, *cld2;
Chris@42 44 INT vn;
Chris@42 45 int preserve_input;
Chris@42 46 } P;
Chris@42 47
Chris@42 48 static void apply_r2c(const plan *ego_, R *I, R *O)
Chris@42 49 {
Chris@42 50 const P *ego = (const P *) ego_;
Chris@42 51 plan_rdft2 *cld1;
Chris@42 52 plan_dft *cld2;
Chris@42 53 plan_rdft *cldt;
Chris@42 54
Chris@42 55 /* RDFT2 local dimensions */
Chris@42 56 cld1 = (plan_rdft2 *) ego->cld1;
Chris@42 57 if (ego->preserve_input) {
Chris@42 58 cld1->apply(ego->cld1, I, I+ego->vn, O, O+1);
Chris@42 59 I = O;
Chris@42 60 }
Chris@42 61 else
Chris@42 62 cld1->apply(ego->cld1, I, I+ego->vn, I, I+1);
Chris@42 63
Chris@42 64 /* global transpose */
Chris@42 65 cldt = (plan_rdft *) ego->cldt;
Chris@42 66 cldt->apply(ego->cldt, I, O);
Chris@42 67
Chris@42 68 /* DFT final local dimension */
Chris@42 69 cld2 = (plan_dft *) ego->cld2;
Chris@42 70 cld2->apply(ego->cld2, O, O+1, O, O+1);
Chris@42 71 }
Chris@42 72
Chris@42 73 static void apply_c2r(const plan *ego_, R *I, R *O)
Chris@42 74 {
Chris@42 75 const P *ego = (const P *) ego_;
Chris@42 76 plan_rdft2 *cld1;
Chris@42 77 plan_dft *cld2;
Chris@42 78 plan_rdft *cldt;
Chris@42 79
Chris@42 80 /* IDFT local dimensions */
Chris@42 81 cld2 = (plan_dft *) ego->cld2;
Chris@42 82 if (ego->preserve_input) {
Chris@42 83 cld2->apply(ego->cld2, I+1, I, O+1, O);
Chris@42 84 I = O;
Chris@42 85 }
Chris@42 86 else
Chris@42 87 cld2->apply(ego->cld2, I+1, I, I+1, I);
Chris@42 88
Chris@42 89 /* global transpose */
Chris@42 90 cldt = (plan_rdft *) ego->cldt;
Chris@42 91 cldt->apply(ego->cldt, I, O);
Chris@42 92
Chris@42 93 /* RDFT2 final local dimension */
Chris@42 94 cld1 = (plan_rdft2 *) ego->cld1;
Chris@42 95 cld1->apply(ego->cld1, O, O+ego->vn, O, O+1);
Chris@42 96 }
Chris@42 97
Chris@42 98 static int applicable(const S *ego, const problem *p_,
Chris@42 99 const planner *plnr)
Chris@42 100 {
Chris@42 101 const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_;
Chris@42 102 return (1
Chris@42 103 && p->sz->rnk > 1
Chris@42 104 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
Chris@42 105 && p->I != p->O))
Chris@42 106 && ((p->flags == TRANSPOSED_OUT && p->kind == R2HC
Chris@42 107 && XM(is_local_after)(1, p->sz, IB)
Chris@42 108 && XM(is_local_after)(2, p->sz, OB)
Chris@42 109 && XM(num_blocks)(p->sz->dims[0].n,
Chris@42 110 p->sz->dims[0].b[OB]) == 1)
Chris@42 111 ||
Chris@42 112 (p->flags == TRANSPOSED_IN && p->kind == HC2R
Chris@42 113 && XM(is_local_after)(1, p->sz, OB)
Chris@42 114 && XM(is_local_after)(2, p->sz, IB)
Chris@42 115 && XM(num_blocks)(p->sz->dims[0].n,
Chris@42 116 p->sz->dims[0].b[IB]) == 1))
Chris@42 117 && (!NO_SLOWP(plnr) /* slow if rdft2-serial is applicable */
Chris@42 118 || !XM(rdft2_serial_applicable)(p))
Chris@42 119 );
Chris@42 120 }
Chris@42 121
Chris@42 122 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 123 {
Chris@42 124 P *ego = (P *) ego_;
Chris@42 125 X(plan_awake)(ego->cld1, wakefulness);
Chris@42 126 X(plan_awake)(ego->cldt, wakefulness);
Chris@42 127 X(plan_awake)(ego->cld2, wakefulness);
Chris@42 128 }
Chris@42 129
Chris@42 130 static void destroy(plan *ego_)
Chris@42 131 {
Chris@42 132 P *ego = (P *) ego_;
Chris@42 133 X(plan_destroy_internal)(ego->cld2);
Chris@42 134 X(plan_destroy_internal)(ego->cldt);
Chris@42 135 X(plan_destroy_internal)(ego->cld1);
Chris@42 136 }
Chris@42 137
Chris@42 138 static void print(const plan *ego_, printer *p)
Chris@42 139 {
Chris@42 140 const P *ego = (const P *) ego_;
Chris@42 141 p->print(p, "(mpi-rdft2-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))",
Chris@42 142 ego->preserve_input==2 ?"/p":"",
Chris@42 143 ego->cld1, ego->cldt, ego->cld2);
Chris@42 144 }
Chris@42 145
Chris@42 146 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 147 {
Chris@42 148 const S *ego = (const S *) ego_;
Chris@42 149 const problem_mpi_rdft2 *p;
Chris@42 150 P *pln;
Chris@42 151 plan *cld1 = 0, *cldt = 0, *cld2 = 0;
Chris@42 152 R *r0, *r1, *cr, *ci, *ri, *ii, *ro, *io, *I, *O;
Chris@42 153 tensor *sz;
Chris@42 154 int i, my_pe, n_pes;
Chris@42 155 INT nrest, n1, b1;
Chris@42 156 static const plan_adt padt = {
Chris@42 157 XM(rdft2_solve), awake, print, destroy
Chris@42 158 };
Chris@42 159 block_kind k1, k2;
Chris@42 160
Chris@42 161 UNUSED(ego);
Chris@42 162
Chris@42 163 if (!applicable(ego, p_, plnr))
Chris@42 164 return (plan *) 0;
Chris@42 165
Chris@42 166 p = (const problem_mpi_rdft2 *) p_;
Chris@42 167
Chris@42 168 I = p->I; O = p->O;
Chris@42 169 if (p->kind == R2HC) {
Chris@42 170 k1 = IB; k2 = OB;
Chris@42 171 r1 = (r0 = I) + p->vn;
Chris@42 172 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) {
Chris@42 173 ci = (cr = O) + 1;
Chris@42 174 I = O;
Chris@42 175 }
Chris@42 176 else
Chris@42 177 ci = (cr = I) + 1;
Chris@42 178 io = ii = (ro = ri = O) + 1;
Chris@42 179 }
Chris@42 180 else {
Chris@42 181 k1 = OB; k2 = IB;
Chris@42 182 r1 = (r0 = O) + p->vn;
Chris@42 183 ci = (cr = O) + 1;
Chris@42 184 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) {
Chris@42 185 ri = (ii = I) + 1;
Chris@42 186 ro = (io = O) + 1;
Chris@42 187 I = O;
Chris@42 188 }
Chris@42 189 else
Chris@42 190 ro = ri = (io = ii = I) + 1;
Chris@42 191 }
Chris@42 192
Chris@42 193 MPI_Comm_rank(p->comm, &my_pe);
Chris@42 194 MPI_Comm_size(p->comm, &n_pes);
Chris@42 195
Chris@42 196 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
Chris@42 197 i = p->sz->rnk - 2; A(i >= 0);
Chris@42 198 sz->dims[i].n = p->sz->dims[i+1].n / 2 + 1;
Chris@42 199 sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
Chris@42 200 for (--i; i >= 0; --i) {
Chris@42 201 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@42 202 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
Chris@42 203 }
Chris@42 204 nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
Chris@42 205 {
Chris@42 206 INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC);
Chris@42 207 INT is = sz->dims[0].n * sz->dims[0].is;
Chris@42 208 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[k1], my_pe);
Chris@42 209 sz->dims[p->sz->rnk - 2].n = p->sz->dims[p->sz->rnk - 1].n;
Chris@42 210 cld1 = X(mkplan_d)(plnr,
Chris@42 211 X(mkproblem_rdft2_d)(sz,
Chris@42 212 X(mktensor_2d)(b, is, is,
Chris@42 213 p->vn,ivs,ovs),
Chris@42 214 r0, r1, cr, ci, p->kind));
Chris@42 215 if (XM(any_true)(!cld1, p->comm)) goto nada;
Chris@42 216 }
Chris@42 217
Chris@42 218 nrest *= p->vn;
Chris@42 219 n1 = p->sz->dims[1].n;
Chris@42 220 b1 = p->sz->dims[1].b[k2];
Chris@42 221 if (p->sz->rnk == 2) { /* n1 dimension is cut in ~half */
Chris@42 222 n1 = n1 / 2 + 1;
Chris@42 223 b1 = b1 == p->sz->dims[1].n ? n1 : b1;
Chris@42 224 }
Chris@42 225
Chris@42 226 if (p->kind == R2HC)
Chris@42 227 cldt = X(mkplan_d)(plnr,
Chris@42 228 XM(mkproblem_transpose)(
Chris@42 229 p->sz->dims[0].n, n1, nrest * 2,
Chris@42 230 I, O,
Chris@42 231 p->sz->dims[0].b[IB], b1,
Chris@42 232 p->comm, 0));
Chris@42 233 else
Chris@42 234 cldt = X(mkplan_d)(plnr,
Chris@42 235 XM(mkproblem_transpose)(
Chris@42 236 n1, p->sz->dims[0].n, nrest * 2,
Chris@42 237 I, O,
Chris@42 238 b1, p->sz->dims[0].b[OB],
Chris@42 239 p->comm, 0));
Chris@42 240 if (XM(any_true)(!cldt, p->comm)) goto nada;
Chris@42 241
Chris@42 242 {
Chris@42 243 INT is = p->sz->dims[0].n * nrest * 2;
Chris@42 244 INT b = XM(block)(n1, b1, my_pe);
Chris@42 245 cld2 = X(mkplan_d)(plnr,
Chris@42 246 X(mkproblem_dft_d)(X(mktensor_1d)(
Chris@42 247 p->sz->dims[0].n,
Chris@42 248 nrest * 2, nrest * 2),
Chris@42 249 X(mktensor_2d)(b, is, is,
Chris@42 250 nrest, 2, 2),
Chris@42 251 ri, ii, ro, io));
Chris@42 252 if (XM(any_true)(!cld2, p->comm)) goto nada;
Chris@42 253 }
Chris@42 254
Chris@42 255 pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r);
Chris@42 256 pln->cld1 = cld1;
Chris@42 257 pln->cldt = cldt;
Chris@42 258 pln->cld2 = cld2;
Chris@42 259 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
Chris@42 260 pln->vn = p->vn;
Chris@42 261
Chris@42 262 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@42 263 X(ops_add2)(&cldt->ops, &pln->super.super.ops);
Chris@42 264
Chris@42 265 return &(pln->super.super);
Chris@42 266
Chris@42 267 nada:
Chris@42 268 X(plan_destroy_internal)(cld2);
Chris@42 269 X(plan_destroy_internal)(cldt);
Chris@42 270 X(plan_destroy_internal)(cld1);
Chris@42 271 return (plan *) 0;
Chris@42 272 }
Chris@42 273
Chris@42 274 static solver *mksolver(int preserve_input)
Chris@42 275 {
Chris@42 276 static const solver_adt sadt = { PROBLEM_MPI_RDFT2, mkplan, 0 };
Chris@42 277 S *slv = MKSOLVER(S, &sadt);
Chris@42 278 slv->preserve_input = preserve_input;
Chris@42 279 return &(slv->super);
Chris@42 280 }
Chris@42 281
Chris@42 282 void XM(rdft2_rank_geq2_transposed_register)(planner *p)
Chris@42 283 {
Chris@42 284 int preserve_input;
Chris@42 285 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
Chris@42 286 REGISTER_SOLVER(p, mksolver(preserve_input));
Chris@42 287 }