annotate src/fftw-3.3.8/mpi/rdft-rank-geq2-transposed.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* Complex RDFTs of rank >= 2, for the case where we are distributed
Chris@82 22 across the first dimension only, and the output is transposed both
Chris@82 23 in data distribution and in ordering (for the first 2 dimensions).
Chris@82 24
Chris@82 25 (Note that we don't have to handle the case where the input is
Chris@82 26 transposed, since this is equivalent to transposed output with the
Chris@82 27 first two dimensions swapped, and is automatically canonicalized as
Chris@82 28 such by rdft-problem.c. */
Chris@82 29
Chris@82 30 #include "mpi-rdft.h"
Chris@82 31 #include "mpi-transpose.h"
Chris@82 32
Chris@82 33 typedef struct {
Chris@82 34 solver super;
Chris@82 35 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
Chris@82 36 } S;
Chris@82 37
Chris@82 38 typedef struct {
Chris@82 39 plan_mpi_rdft super;
Chris@82 40
Chris@82 41 plan *cld1, *cldt, *cld2;
Chris@82 42 INT roff, ioff;
Chris@82 43 int preserve_input;
Chris@82 44 } P;
Chris@82 45
Chris@82 46 static void apply(const plan *ego_, R *I, R *O)
Chris@82 47 {
Chris@82 48 const P *ego = (const P *) ego_;
Chris@82 49 plan_rdft *cld1, *cld2, *cldt;
Chris@82 50
Chris@82 51 /* RDFT local dimensions */
Chris@82 52 cld1 = (plan_rdft *) ego->cld1;
Chris@82 53 if (ego->preserve_input) {
Chris@82 54 cld1->apply(ego->cld1, I, O);
Chris@82 55 I = O;
Chris@82 56 }
Chris@82 57 else
Chris@82 58 cld1->apply(ego->cld1, I, I);
Chris@82 59
Chris@82 60 /* global transpose */
Chris@82 61 cldt = (plan_rdft *) ego->cldt;
Chris@82 62 cldt->apply(ego->cldt, I, O);
Chris@82 63
Chris@82 64 /* RDFT final local dimension */
Chris@82 65 cld2 = (plan_rdft *) ego->cld2;
Chris@82 66 cld2->apply(ego->cld2, O, O);
Chris@82 67 }
Chris@82 68
Chris@82 69 static int applicable(const S *ego, const problem *p_,
Chris@82 70 const planner *plnr)
Chris@82 71 {
Chris@82 72 const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_;
Chris@82 73 return (1
Chris@82 74 && p->sz->rnk > 1
Chris@82 75 && p->flags == TRANSPOSED_OUT
Chris@82 76 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
Chris@82 77 && p->I != p->O))
Chris@82 78 && XM(is_local_after)(1, p->sz, IB)
Chris@82 79 && XM(is_local_after)(2, p->sz, OB)
Chris@82 80 && XM(num_blocks)(p->sz->dims[0].n, p->sz->dims[0].b[OB]) == 1
Chris@82 81 && (!NO_SLOWP(plnr) /* slow if rdft-serial is applicable */
Chris@82 82 || !XM(rdft_serial_applicable)(p))
Chris@82 83 );
Chris@82 84 }
Chris@82 85
Chris@82 86 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@82 87 {
Chris@82 88 P *ego = (P *) ego_;
Chris@82 89 X(plan_awake)(ego->cld1, wakefulness);
Chris@82 90 X(plan_awake)(ego->cldt, wakefulness);
Chris@82 91 X(plan_awake)(ego->cld2, wakefulness);
Chris@82 92 }
Chris@82 93
Chris@82 94 static void destroy(plan *ego_)
Chris@82 95 {
Chris@82 96 P *ego = (P *) ego_;
Chris@82 97 X(plan_destroy_internal)(ego->cld2);
Chris@82 98 X(plan_destroy_internal)(ego->cldt);
Chris@82 99 X(plan_destroy_internal)(ego->cld1);
Chris@82 100 }
Chris@82 101
Chris@82 102 static void print(const plan *ego_, printer *p)
Chris@82 103 {
Chris@82 104 const P *ego = (const P *) ego_;
Chris@82 105 p->print(p, "(mpi-rdft-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))",
Chris@82 106 ego->preserve_input==2 ?"/p":"",
Chris@82 107 ego->cld1, ego->cldt, ego->cld2);
Chris@82 108 }
Chris@82 109
Chris@82 110 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@82 111 {
Chris@82 112 const S *ego = (const S *) ego_;
Chris@82 113 const problem_mpi_rdft *p;
Chris@82 114 P *pln;
Chris@82 115 plan *cld1 = 0, *cldt = 0, *cld2 = 0;
Chris@82 116 R *I, *O, *I2;
Chris@82 117 tensor *sz;
Chris@82 118 int i, my_pe, n_pes;
Chris@82 119 INT nrest;
Chris@82 120 static const plan_adt padt = {
Chris@82 121 XM(rdft_solve), awake, print, destroy
Chris@82 122 };
Chris@82 123
Chris@82 124 UNUSED(ego);
Chris@82 125
Chris@82 126 if (!applicable(ego, p_, plnr))
Chris@82 127 return (plan *) 0;
Chris@82 128
Chris@82 129 p = (const problem_mpi_rdft *) p_;
Chris@82 130
Chris@82 131 I2 = I = p->I;
Chris@82 132 O = p->O;
Chris@82 133 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr))
Chris@82 134 I = O;
Chris@82 135 MPI_Comm_rank(p->comm, &my_pe);
Chris@82 136 MPI_Comm_size(p->comm, &n_pes);
Chris@82 137
Chris@82 138 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
Chris@82 139 i = p->sz->rnk - 2; A(i >= 0);
Chris@82 140 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@82 141 sz->dims[i].is = sz->dims[i].os = p->vn;
Chris@82 142 for (--i; i >= 0; --i) {
Chris@82 143 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@82 144 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
Chris@82 145 }
Chris@82 146 nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
Chris@82 147 {
Chris@82 148 INT is = sz->dims[0].n * sz->dims[0].is;
Chris@82 149 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
Chris@82 150 cld1 = X(mkplan_d)(plnr,
Chris@82 151 X(mkproblem_rdft_d)(sz,
Chris@82 152 X(mktensor_2d)(b, is, is,
Chris@82 153 p->vn, 1, 1),
Chris@82 154 I2, I, p->kind + 1));
Chris@82 155 if (XM(any_true)(!cld1, p->comm)) goto nada;
Chris@82 156 }
Chris@82 157
Chris@82 158 nrest *= p->vn;
Chris@82 159 cldt = X(mkplan_d)(plnr,
Chris@82 160 XM(mkproblem_transpose)(
Chris@82 161 p->sz->dims[0].n, p->sz->dims[1].n, nrest,
Chris@82 162 I, O,
Chris@82 163 p->sz->dims[0].b[IB], p->sz->dims[1].b[OB],
Chris@82 164 p->comm, 0));
Chris@82 165 if (XM(any_true)(!cldt, p->comm)) goto nada;
Chris@82 166
Chris@82 167 {
Chris@82 168 INT is = p->sz->dims[0].n * nrest;
Chris@82 169 INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe);
Chris@82 170 cld2 = X(mkplan_d)(plnr,
Chris@82 171 X(mkproblem_rdft_1_d)(X(mktensor_1d)(
Chris@82 172 p->sz->dims[0].n,
Chris@82 173 nrest, nrest),
Chris@82 174 X(mktensor_2d)(b, is, is,
Chris@82 175 nrest, 1, 1),
Chris@82 176 O, O, p->kind[0]));
Chris@82 177 if (XM(any_true)(!cld2, p->comm)) goto nada;
Chris@82 178 }
Chris@82 179
Chris@82 180 pln = MKPLAN_MPI_RDFT(P, &padt, apply);
Chris@82 181 pln->cld1 = cld1;
Chris@82 182 pln->cldt = cldt;
Chris@82 183 pln->cld2 = cld2;
Chris@82 184 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
Chris@82 185
Chris@82 186 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@82 187 X(ops_add2)(&cldt->ops, &pln->super.super.ops);
Chris@82 188
Chris@82 189 return &(pln->super.super);
Chris@82 190
Chris@82 191 nada:
Chris@82 192 X(plan_destroy_internal)(cld2);
Chris@82 193 X(plan_destroy_internal)(cldt);
Chris@82 194 X(plan_destroy_internal)(cld1);
Chris@82 195 return (plan *) 0;
Chris@82 196 }
Chris@82 197
Chris@82 198 static solver *mksolver(int preserve_input)
Chris@82 199 {
Chris@82 200 static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 };
Chris@82 201 S *slv = MKSOLVER(S, &sadt);
Chris@82 202 slv->preserve_input = preserve_input;
Chris@82 203 return &(slv->super);
Chris@82 204 }
Chris@82 205
Chris@82 206 void XM(rdft_rank_geq2_transposed_register)(planner *p)
Chris@82 207 {
Chris@82 208 int preserve_input;
Chris@82 209 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
Chris@82 210 REGISTER_SOLVER(p, mksolver(preserve_input));
Chris@82 211 }