annotate src/fftw-3.3.5/mpi/dft-rank-geq2.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* Complex DFTs of rank >= 2, for the case where we are distributed
Chris@42 22 across the first dimension only, and the output is not transposed. */
Chris@42 23
Chris@42 24 #include "mpi-dft.h"
Chris@42 25 #include "dft.h"
Chris@42 26
Chris@42 27 typedef struct {
Chris@42 28 solver super;
Chris@42 29 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
Chris@42 30 } S;
Chris@42 31
Chris@42 32 typedef struct {
Chris@42 33 plan_mpi_dft super;
Chris@42 34
Chris@42 35 plan *cld1, *cld2;
Chris@42 36 INT roff, ioff;
Chris@42 37 int preserve_input;
Chris@42 38 } P;
Chris@42 39
Chris@42 40 static void apply(const plan *ego_, R *I, R *O)
Chris@42 41 {
Chris@42 42 const P *ego = (const P *) ego_;
Chris@42 43 plan_dft *cld1;
Chris@42 44 plan_rdft *cld2;
Chris@42 45 INT roff = ego->roff, ioff = ego->ioff;
Chris@42 46
Chris@42 47 /* DFT local dimensions */
Chris@42 48 cld1 = (plan_dft *) ego->cld1;
Chris@42 49 if (ego->preserve_input) {
Chris@42 50 cld1->apply(ego->cld1, I+roff, I+ioff, O+roff, O+ioff);
Chris@42 51 I = O;
Chris@42 52 }
Chris@42 53 else
Chris@42 54 cld1->apply(ego->cld1, I+roff, I+ioff, I+roff, I+ioff);
Chris@42 55
Chris@42 56 /* DFT non-local dimension (via dft-rank1-bigvec, usually): */
Chris@42 57 cld2 = (plan_rdft *) ego->cld2;
Chris@42 58 cld2->apply(ego->cld2, I, O);
Chris@42 59 }
Chris@42 60
Chris@42 61 static int applicable(const S *ego, const problem *p_,
Chris@42 62 const planner *plnr)
Chris@42 63 {
Chris@42 64 const problem_mpi_dft *p = (const problem_mpi_dft *) p_;
Chris@42 65 return (1
Chris@42 66 && p->sz->rnk > 1
Chris@42 67 && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */
Chris@42 68 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
Chris@42 69 && p->I != p->O))
Chris@42 70 && XM(is_local_after)(1, p->sz, IB)
Chris@42 71 && XM(is_local_after)(1, p->sz, OB)
Chris@42 72 && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */
Chris@42 73 || !XM(dft_serial_applicable)(p))
Chris@42 74 );
Chris@42 75 }
Chris@42 76
Chris@42 77 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 78 {
Chris@42 79 P *ego = (P *) ego_;
Chris@42 80 X(plan_awake)(ego->cld1, wakefulness);
Chris@42 81 X(plan_awake)(ego->cld2, wakefulness);
Chris@42 82 }
Chris@42 83
Chris@42 84 static void destroy(plan *ego_)
Chris@42 85 {
Chris@42 86 P *ego = (P *) ego_;
Chris@42 87 X(plan_destroy_internal)(ego->cld2);
Chris@42 88 X(plan_destroy_internal)(ego->cld1);
Chris@42 89 }
Chris@42 90
Chris@42 91 static void print(const plan *ego_, printer *p)
Chris@42 92 {
Chris@42 93 const P *ego = (const P *) ego_;
Chris@42 94 p->print(p, "(mpi-dft-rank-geq2%s%(%p%)%(%p%))",
Chris@42 95 ego->preserve_input==2 ?"/p":"", ego->cld1, ego->cld2);
Chris@42 96 }
Chris@42 97
Chris@42 98 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 99 {
Chris@42 100 const S *ego = (const S *) ego_;
Chris@42 101 const problem_mpi_dft *p;
Chris@42 102 P *pln;
Chris@42 103 plan *cld1 = 0, *cld2 = 0;
Chris@42 104 R *ri, *ii, *ro, *io, *I, *O;
Chris@42 105 tensor *sz;
Chris@42 106 dtensor *sz2;
Chris@42 107 int i, my_pe, n_pes;
Chris@42 108 INT nrest;
Chris@42 109 static const plan_adt padt = {
Chris@42 110 XM(dft_solve), awake, print, destroy
Chris@42 111 };
Chris@42 112
Chris@42 113 UNUSED(ego);
Chris@42 114
Chris@42 115 if (!applicable(ego, p_, plnr))
Chris@42 116 return (plan *) 0;
Chris@42 117
Chris@42 118 p = (const problem_mpi_dft *) p_;
Chris@42 119
Chris@42 120 X(extract_reim)(p->sign, I = p->I, &ri, &ii);
Chris@42 121 X(extract_reim)(p->sign, O = p->O, &ro, &io);
Chris@42 122 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr))
Chris@42 123 I = O;
Chris@42 124 else {
Chris@42 125 ro = ri;
Chris@42 126 io = ii;
Chris@42 127 }
Chris@42 128 MPI_Comm_rank(p->comm, &my_pe);
Chris@42 129 MPI_Comm_size(p->comm, &n_pes);
Chris@42 130
Chris@42 131 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
Chris@42 132 i = p->sz->rnk - 2; A(i >= 0);
Chris@42 133 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@42 134 sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
Chris@42 135 for (--i; i >= 0; --i) {
Chris@42 136 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@42 137 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
Chris@42 138 }
Chris@42 139 nrest = X(tensor_sz)(sz);
Chris@42 140 {
Chris@42 141 INT is = sz->dims[0].n * sz->dims[0].is;
Chris@42 142 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
Chris@42 143 cld1 = X(mkplan_d)(plnr,
Chris@42 144 X(mkproblem_dft_d)(sz,
Chris@42 145 X(mktensor_2d)(b, is, is,
Chris@42 146 p->vn, 2, 2),
Chris@42 147 ri, ii, ro, io));
Chris@42 148 if (XM(any_true)(!cld1, p->comm)) goto nada;
Chris@42 149 }
Chris@42 150
Chris@42 151 sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */
Chris@42 152 sz2->dims[0] = p->sz->dims[0];
Chris@42 153 cld2 = X(mkplan_d)(plnr, XM(mkproblem_dft_d)(sz2, nrest * p->vn,
Chris@42 154 I, O, p->comm, p->sign,
Chris@42 155 RANK1_BIGVEC_ONLY));
Chris@42 156 if (XM(any_true)(!cld2, p->comm)) goto nada;
Chris@42 157
Chris@42 158 pln = MKPLAN_MPI_DFT(P, &padt, apply);
Chris@42 159 pln->cld1 = cld1;
Chris@42 160 pln->cld2 = cld2;
Chris@42 161 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
Chris@42 162 pln->roff = ri - p->I;
Chris@42 163 pln->ioff = ii - p->I;
Chris@42 164
Chris@42 165 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@42 166
Chris@42 167 return &(pln->super.super);
Chris@42 168
Chris@42 169 nada:
Chris@42 170 X(plan_destroy_internal)(cld2);
Chris@42 171 X(plan_destroy_internal)(cld1);
Chris@42 172 return (plan *) 0;
Chris@42 173 }
Chris@42 174
Chris@42 175 static solver *mksolver(int preserve_input)
Chris@42 176 {
Chris@42 177 static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 };
Chris@42 178 S *slv = MKSOLVER(S, &sadt);
Chris@42 179 slv->preserve_input = preserve_input;
Chris@42 180 return &(slv->super);
Chris@42 181 }
Chris@42 182
Chris@42 183 void XM(dft_rank_geq2_register)(planner *p)
Chris@42 184 {
Chris@42 185 int preserve_input;
Chris@42 186 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
Chris@42 187 REGISTER_SOLVER(p, mksolver(preserve_input));
Chris@42 188 }