annotate src/fftw-3.3.8/mpi/rdft-rank-geq2.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* Complex RDFTs of rank >= 2, for the case where we are distributed
Chris@82 22 across the first dimension only, and the output is not transposed. */
Chris@82 23
Chris@82 24 #include "mpi-rdft.h"
Chris@82 25
Chris@82 26 typedef struct {
Chris@82 27 solver super;
Chris@82 28 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
Chris@82 29 } S;
Chris@82 30
Chris@82 31 typedef struct {
Chris@82 32 plan_mpi_rdft super;
Chris@82 33
Chris@82 34 plan *cld1, *cld2;
Chris@82 35 int preserve_input;
Chris@82 36 } P;
Chris@82 37
Chris@82 38 static void apply(const plan *ego_, R *I, R *O)
Chris@82 39 {
Chris@82 40 const P *ego = (const P *) ego_;
Chris@82 41 plan_rdft *cld1, *cld2;
Chris@82 42
Chris@82 43 /* RDFT local dimensions */
Chris@82 44 cld1 = (plan_rdft *) ego->cld1;
Chris@82 45 if (ego->preserve_input) {
Chris@82 46 cld1->apply(ego->cld1, I, O);
Chris@82 47 I = O;
Chris@82 48 }
Chris@82 49 else
Chris@82 50 cld1->apply(ego->cld1, I, I);
Chris@82 51
Chris@82 52 /* RDFT non-local dimension (via rdft-rank1-bigvec, usually): */
Chris@82 53 cld2 = (plan_rdft *) ego->cld2;
Chris@82 54 cld2->apply(ego->cld2, I, O);
Chris@82 55 }
Chris@82 56
Chris@82 57 static int applicable(const S *ego, const problem *p_,
Chris@82 58 const planner *plnr)
Chris@82 59 {
Chris@82 60 const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_;
Chris@82 61 return (1
Chris@82 62 && p->sz->rnk > 1
Chris@82 63 && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */
Chris@82 64 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
Chris@82 65 && p->I != p->O))
Chris@82 66 && XM(is_local_after)(1, p->sz, IB)
Chris@82 67 && XM(is_local_after)(1, p->sz, OB)
Chris@82 68 && (!NO_SLOWP(plnr) /* slow if rdft-serial is applicable */
Chris@82 69 || !XM(rdft_serial_applicable)(p))
Chris@82 70 );
Chris@82 71 }
Chris@82 72
Chris@82 73 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@82 74 {
Chris@82 75 P *ego = (P *) ego_;
Chris@82 76 X(plan_awake)(ego->cld1, wakefulness);
Chris@82 77 X(plan_awake)(ego->cld2, wakefulness);
Chris@82 78 }
Chris@82 79
Chris@82 80 static void destroy(plan *ego_)
Chris@82 81 {
Chris@82 82 P *ego = (P *) ego_;
Chris@82 83 X(plan_destroy_internal)(ego->cld2);
Chris@82 84 X(plan_destroy_internal)(ego->cld1);
Chris@82 85 }
Chris@82 86
Chris@82 87 static void print(const plan *ego_, printer *p)
Chris@82 88 {
Chris@82 89 const P *ego = (const P *) ego_;
Chris@82 90 p->print(p, "(mpi-rdft-rank-geq2%s%(%p%)%(%p%))",
Chris@82 91 ego->preserve_input==2 ?"/p":"", ego->cld1, ego->cld2);
Chris@82 92 }
Chris@82 93
Chris@82 94 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@82 95 {
Chris@82 96 const S *ego = (const S *) ego_;
Chris@82 97 const problem_mpi_rdft *p;
Chris@82 98 P *pln;
Chris@82 99 plan *cld1 = 0, *cld2 = 0;
Chris@82 100 R *I, *O, *I2;
Chris@82 101 tensor *sz;
Chris@82 102 dtensor *sz2;
Chris@82 103 int i, my_pe, n_pes;
Chris@82 104 INT nrest;
Chris@82 105 static const plan_adt padt = {
Chris@82 106 XM(rdft_solve), awake, print, destroy
Chris@82 107 };
Chris@82 108
Chris@82 109 UNUSED(ego);
Chris@82 110
Chris@82 111 if (!applicable(ego, p_, plnr))
Chris@82 112 return (plan *) 0;
Chris@82 113
Chris@82 114 p = (const problem_mpi_rdft *) p_;
Chris@82 115
Chris@82 116 I2 = I = p->I;
Chris@82 117 O = p->O;
Chris@82 118 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr))
Chris@82 119 I = O;
Chris@82 120 MPI_Comm_rank(p->comm, &my_pe);
Chris@82 121 MPI_Comm_size(p->comm, &n_pes);
Chris@82 122
Chris@82 123 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
Chris@82 124 i = p->sz->rnk - 2; A(i >= 0);
Chris@82 125 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@82 126 sz->dims[i].is = sz->dims[i].os = p->vn;
Chris@82 127 for (--i; i >= 0; --i) {
Chris@82 128 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@82 129 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
Chris@82 130 }
Chris@82 131 nrest = X(tensor_sz)(sz);
Chris@82 132 {
Chris@82 133 INT is = sz->dims[0].n * sz->dims[0].is;
Chris@82 134 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
Chris@82 135 cld1 = X(mkplan_d)(plnr,
Chris@82 136 X(mkproblem_rdft_d)(sz,
Chris@82 137 X(mktensor_2d)(b, is, is,
Chris@82 138 p->vn, 1, 1),
Chris@82 139 I2, I, p->kind + 1));
Chris@82 140 if (XM(any_true)(!cld1, p->comm)) goto nada;
Chris@82 141 }
Chris@82 142
Chris@82 143 sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */
Chris@82 144 sz2->dims[0] = p->sz->dims[0];
Chris@82 145 cld2 = X(mkplan_d)(plnr, XM(mkproblem_rdft_d)(sz2, nrest * p->vn,
Chris@82 146 I, O,
Chris@82 147 p->comm, p->kind,
Chris@82 148 RANK1_BIGVEC_ONLY));
Chris@82 149 if (XM(any_true)(!cld2, p->comm)) goto nada;
Chris@82 150
Chris@82 151 pln = MKPLAN_MPI_RDFT(P, &padt, apply);
Chris@82 152 pln->cld1 = cld1;
Chris@82 153 pln->cld2 = cld2;
Chris@82 154 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
Chris@82 155
Chris@82 156 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@82 157
Chris@82 158 return &(pln->super.super);
Chris@82 159
Chris@82 160 nada:
Chris@82 161 X(plan_destroy_internal)(cld2);
Chris@82 162 X(plan_destroy_internal)(cld1);
Chris@82 163 return (plan *) 0;
Chris@82 164 }
Chris@82 165
Chris@82 166 static solver *mksolver(int preserve_input)
Chris@82 167 {
Chris@82 168 static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 };
Chris@82 169 S *slv = MKSOLVER(S, &sadt);
Chris@82 170 slv->preserve_input = preserve_input;
Chris@82 171 return &(slv->super);
Chris@82 172 }
Chris@82 173
Chris@82 174 void XM(rdft_rank_geq2_register)(planner *p)
Chris@82 175 {
Chris@82 176 int preserve_input;
Chris@82 177 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
Chris@82 178 REGISTER_SOLVER(p, mksolver(preserve_input));
Chris@82 179 }