annotate src/fftw-3.3.3/mpi/dft-rank1-bigvec.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* Complex DFTs of rank == 1 when the vector length vn is >= # processes.
Chris@10 22 In this case, we don't need to use a six-step type algorithm, and can
Chris@10 23 instead transpose the DFT dimension with the vector dimension to
Chris@10 24 make the DFT local. */
Chris@10 25
Chris@10 26 #include "mpi-dft.h"
Chris@10 27 #include "mpi-transpose.h"
Chris@10 28 #include "dft.h"
Chris@10 29
Chris@10 30 typedef struct {
Chris@10 31 solver super;
Chris@10 32 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
Chris@10 33 rearrangement rearrange;
Chris@10 34 } S;
Chris@10 35
Chris@10 36 typedef struct {
Chris@10 37 plan_mpi_dft super;
Chris@10 38
Chris@10 39 plan *cldt_before, *cld, *cldt_after;
Chris@10 40 INT roff, ioff;
Chris@10 41 int preserve_input;
Chris@10 42 rearrangement rearrange;
Chris@10 43 } P;
Chris@10 44
Chris@10 45 static void apply(const plan *ego_, R *I, R *O)
Chris@10 46 {
Chris@10 47 const P *ego = (const P *) ego_;
Chris@10 48 plan_dft *cld;
Chris@10 49 plan_rdft *cldt_before, *cldt_after;
Chris@10 50 INT roff = ego->roff, ioff = ego->ioff;
Chris@10 51
Chris@10 52 /* global transpose */
Chris@10 53 cldt_before = (plan_rdft *) ego->cldt_before;
Chris@10 54 cldt_before->apply(ego->cldt_before, I, O);
Chris@10 55
Chris@10 56 if (ego->preserve_input) I = O;
Chris@10 57
Chris@10 58 /* 1d DFT(s) */
Chris@10 59 cld = (plan_dft *) ego->cld;
Chris@10 60 cld->apply(ego->cld, O+roff, O+ioff, I+roff, I+ioff);
Chris@10 61
Chris@10 62 /* global transpose */
Chris@10 63 cldt_after = (plan_rdft *) ego->cldt_after;
Chris@10 64 cldt_after->apply(ego->cldt_after, I, O);
Chris@10 65 }
Chris@10 66
Chris@10 67 static int applicable(const S *ego, const problem *p_,
Chris@10 68 const planner *plnr)
Chris@10 69 {
Chris@10 70 const problem_mpi_dft *p = (const problem_mpi_dft *) p_;
Chris@10 71 int n_pes;
Chris@10 72 MPI_Comm_size(p->comm, &n_pes);
Chris@10 73 return (1
Chris@10 74 && p->sz->rnk == 1
Chris@10 75 && !(p->flags & ~RANK1_BIGVEC_ONLY)
Chris@10 76 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
Chris@10 77 && p->I != p->O))
Chris@10 78 && (p->vn >= n_pes /* TODO: relax this, using more memory? */
Chris@10 79 || (p->flags & RANK1_BIGVEC_ONLY))
Chris@10 80
Chris@10 81 && XM(rearrange_applicable)(ego->rearrange,
Chris@10 82 p->sz->dims[0], p->vn, n_pes)
Chris@10 83
Chris@10 84 && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */
Chris@10 85 || !XM(dft_serial_applicable)(p))
Chris@10 86 );
Chris@10 87 }
Chris@10 88
Chris@10 89 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@10 90 {
Chris@10 91 P *ego = (P *) ego_;
Chris@10 92 X(plan_awake)(ego->cldt_before, wakefulness);
Chris@10 93 X(plan_awake)(ego->cld, wakefulness);
Chris@10 94 X(plan_awake)(ego->cldt_after, wakefulness);
Chris@10 95 }
Chris@10 96
Chris@10 97 static void destroy(plan *ego_)
Chris@10 98 {
Chris@10 99 P *ego = (P *) ego_;
Chris@10 100 X(plan_destroy_internal)(ego->cldt_after);
Chris@10 101 X(plan_destroy_internal)(ego->cld);
Chris@10 102 X(plan_destroy_internal)(ego->cldt_before);
Chris@10 103 }
Chris@10 104
Chris@10 105 static void print(const plan *ego_, printer *p)
Chris@10 106 {
Chris@10 107 const P *ego = (const P *) ego_;
Chris@10 108 const char descrip[][16] = { "contig", "discontig", "square-after",
Chris@10 109 "square-middle", "square-before" };
Chris@10 110 p->print(p, "(mpi-dft-rank1-bigvec/%s%s %(%p%) %(%p%) %(%p%))",
Chris@10 111 descrip[ego->rearrange], ego->preserve_input==2 ?"/p":"",
Chris@10 112 ego->cldt_before, ego->cld, ego->cldt_after);
Chris@10 113 }
Chris@10 114
Chris@10 115 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@10 116 {
Chris@10 117 const S *ego = (const S *) ego_;
Chris@10 118 const problem_mpi_dft *p;
Chris@10 119 P *pln;
Chris@10 120 plan *cld = 0, *cldt_before = 0, *cldt_after = 0;
Chris@10 121 R *ri, *ii, *ro, *io, *I, *O;
Chris@10 122 INT yblock, yb, nx, ny, vn;
Chris@10 123 int my_pe, n_pes;
Chris@10 124 static const plan_adt padt = {
Chris@10 125 XM(dft_solve), awake, print, destroy
Chris@10 126 };
Chris@10 127
Chris@10 128 UNUSED(ego);
Chris@10 129
Chris@10 130 if (!applicable(ego, p_, plnr))
Chris@10 131 return (plan *) 0;
Chris@10 132
Chris@10 133 p = (const problem_mpi_dft *) p_;
Chris@10 134
Chris@10 135 MPI_Comm_rank(p->comm, &my_pe);
Chris@10 136 MPI_Comm_size(p->comm, &n_pes);
Chris@10 137
Chris@10 138 nx = p->sz->dims[0].n;
Chris@10 139 if (!(ny = XM(rearrange_ny)(ego->rearrange, p->sz->dims[0],p->vn,n_pes)))
Chris@10 140 return (plan *) 0;
Chris@10 141 vn = p->vn / ny;
Chris@10 142 A(ny * vn == p->vn);
Chris@10 143
Chris@10 144 yblock = XM(default_block)(ny, n_pes);
Chris@10 145 cldt_before = X(mkplan_d)(plnr,
Chris@10 146 XM(mkproblem_transpose)(
Chris@10 147 nx, ny, vn*2,
Chris@10 148 I = p->I, O = p->O,
Chris@10 149 p->sz->dims[0].b[IB], yblock,
Chris@10 150 p->comm, 0));
Chris@10 151 if (XM(any_true)(!cldt_before, p->comm)) goto nada;
Chris@10 152 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { I = O; }
Chris@10 153
Chris@10 154 X(extract_reim)(p->sign, I, &ri, &ii);
Chris@10 155 X(extract_reim)(p->sign, O, &ro, &io);
Chris@10 156
Chris@10 157 yb = XM(block)(ny, yblock, my_pe);
Chris@10 158 cld = X(mkplan_d)(plnr,
Chris@10 159 X(mkproblem_dft_d)(X(mktensor_1d)(nx, vn*2, vn*2),
Chris@10 160 X(mktensor_2d)(yb, vn*2*nx, vn*2*nx,
Chris@10 161 vn, 2, 2),
Chris@10 162 ro, io, ri, ii));
Chris@10 163 if (XM(any_true)(!cld, p->comm)) goto nada;
Chris@10 164
Chris@10 165 cldt_after = X(mkplan_d)(plnr,
Chris@10 166 XM(mkproblem_transpose)(
Chris@10 167 ny, nx, vn*2,
Chris@10 168 I, O,
Chris@10 169 yblock, p->sz->dims[0].b[OB],
Chris@10 170 p->comm, 0));
Chris@10 171 if (XM(any_true)(!cldt_after, p->comm)) goto nada;
Chris@10 172
Chris@10 173 pln = MKPLAN_MPI_DFT(P, &padt, apply);
Chris@10 174
Chris@10 175 pln->cldt_before = cldt_before;
Chris@10 176 pln->cld = cld;
Chris@10 177 pln->cldt_after = cldt_after;
Chris@10 178 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
Chris@10 179 pln->roff = ro - p->O;
Chris@10 180 pln->ioff = io - p->O;
Chris@10 181 pln->rearrange = ego->rearrange;
Chris@10 182
Chris@10 183 X(ops_add)(&cldt_before->ops, &cld->ops, &pln->super.super.ops);
Chris@10 184 X(ops_add2)(&cldt_after->ops, &pln->super.super.ops);
Chris@10 185
Chris@10 186 return &(pln->super.super);
Chris@10 187
Chris@10 188 nada:
Chris@10 189 X(plan_destroy_internal)(cldt_after);
Chris@10 190 X(plan_destroy_internal)(cld);
Chris@10 191 X(plan_destroy_internal)(cldt_before);
Chris@10 192 return (plan *) 0;
Chris@10 193 }
Chris@10 194
Chris@10 195 static solver *mksolver(rearrangement rearrange, int preserve_input)
Chris@10 196 {
Chris@10 197 static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 };
Chris@10 198 S *slv = MKSOLVER(S, &sadt);
Chris@10 199 slv->rearrange = rearrange;
Chris@10 200 slv->preserve_input = preserve_input;
Chris@10 201 return &(slv->super);
Chris@10 202 }
Chris@10 203
Chris@10 204 void XM(dft_rank1_bigvec_register)(planner *p)
Chris@10 205 {
Chris@10 206 rearrangement rearrange;
Chris@10 207 int preserve_input;
Chris@10 208 FORALL_REARRANGE(rearrange)
Chris@10 209 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
Chris@10 210 REGISTER_SOLVER(p, mksolver(rearrange, preserve_input));
Chris@10 211 }