annotate src/fftw-3.3.3/mpi/rdft2-rank-geq2.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* Complex RDFT2s of rank >= 2, for the case where we are distributed
Chris@10 22 across the first dimension only, and the output is not transposed. */
Chris@10 23
Chris@10 24 #include "mpi-dft.h"
Chris@10 25 #include "mpi-rdft2.h"
Chris@10 26 #include "rdft.h"
Chris@10 27
Chris@10 28 typedef struct {
Chris@10 29 solver super;
Chris@10 30 int preserve_input; /* preserve input even if DESTROY_INPUT was passed */
Chris@10 31 } S;
Chris@10 32
Chris@10 33 typedef struct {
Chris@10 34 plan_mpi_rdft2 super;
Chris@10 35
Chris@10 36 plan *cld1, *cld2;
Chris@10 37 INT vn;
Chris@10 38 int preserve_input;
Chris@10 39 } P;
Chris@10 40
Chris@10 41 static void apply_r2c(const plan *ego_, R *I, R *O)
Chris@10 42 {
Chris@10 43 const P *ego = (const P *) ego_;
Chris@10 44 plan_rdft2 *cld1;
Chris@10 45 plan_rdft *cld2;
Chris@10 46
Chris@10 47 /* RDFT2 local dimensions */
Chris@10 48 cld1 = (plan_rdft2 *) ego->cld1;
Chris@10 49 if (ego->preserve_input) {
Chris@10 50 cld1->apply(ego->cld1, I, I+ego->vn, O, O+1);
Chris@10 51 I = O;
Chris@10 52 }
Chris@10 53 else
Chris@10 54 cld1->apply(ego->cld1, I, I+ego->vn, I, I+1);
Chris@10 55
Chris@10 56 /* DFT non-local dimension (via dft-rank1-bigvec, usually): */
Chris@10 57 cld2 = (plan_rdft *) ego->cld2;
Chris@10 58 cld2->apply(ego->cld2, I, O);
Chris@10 59 }
Chris@10 60
Chris@10 61 static void apply_c2r(const plan *ego_, R *I, R *O)
Chris@10 62 {
Chris@10 63 const P *ego = (const P *) ego_;
Chris@10 64 plan_rdft2 *cld1;
Chris@10 65 plan_rdft *cld2;
Chris@10 66
Chris@10 67 /* DFT non-local dimension (via dft-rank1-bigvec, usually): */
Chris@10 68 cld2 = (plan_rdft *) ego->cld2;
Chris@10 69 cld2->apply(ego->cld2, I, O);
Chris@10 70
Chris@10 71 /* RDFT2 local dimensions */
Chris@10 72 cld1 = (plan_rdft2 *) ego->cld1;
Chris@10 73 cld1->apply(ego->cld1, O, O+ego->vn, O, O+1);
Chris@10 74
Chris@10 75 }
Chris@10 76
Chris@10 77 static int applicable(const S *ego, const problem *p_,
Chris@10 78 const planner *plnr)
Chris@10 79 {
Chris@10 80 const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_;
Chris@10 81 return (1
Chris@10 82 && p->sz->rnk > 1
Chris@10 83 && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */
Chris@10 84 && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
Chris@10 85 && p->I != p->O
Chris@10 86 && p->kind == R2HC))
Chris@10 87 && XM(is_local_after)(1, p->sz, IB)
Chris@10 88 && XM(is_local_after)(1, p->sz, OB)
Chris@10 89 && (!NO_SLOWP(plnr) /* slow if rdft2-serial is applicable */
Chris@10 90 || !XM(rdft2_serial_applicable)(p))
Chris@10 91 );
Chris@10 92 }
Chris@10 93
Chris@10 94 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@10 95 {
Chris@10 96 P *ego = (P *) ego_;
Chris@10 97 X(plan_awake)(ego->cld1, wakefulness);
Chris@10 98 X(plan_awake)(ego->cld2, wakefulness);
Chris@10 99 }
Chris@10 100
Chris@10 101 static void destroy(plan *ego_)
Chris@10 102 {
Chris@10 103 P *ego = (P *) ego_;
Chris@10 104 X(plan_destroy_internal)(ego->cld2);
Chris@10 105 X(plan_destroy_internal)(ego->cld1);
Chris@10 106 }
Chris@10 107
Chris@10 108 static void print(const plan *ego_, printer *p)
Chris@10 109 {
Chris@10 110 const P *ego = (const P *) ego_;
Chris@10 111 p->print(p, "(mpi-rdft2-rank-geq2%s%(%p%)%(%p%))",
Chris@10 112 ego->preserve_input==2 ?"/p":"", ego->cld1, ego->cld2);
Chris@10 113 }
Chris@10 114
Chris@10 115 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@10 116 {
Chris@10 117 const S *ego = (const S *) ego_;
Chris@10 118 const problem_mpi_rdft2 *p;
Chris@10 119 P *pln;
Chris@10 120 plan *cld1 = 0, *cld2 = 0;
Chris@10 121 R *r0, *r1, *cr, *ci, *I, *O;
Chris@10 122 tensor *sz;
Chris@10 123 dtensor *sz2;
Chris@10 124 int i, my_pe, n_pes;
Chris@10 125 INT nrest;
Chris@10 126 static const plan_adt padt = {
Chris@10 127 XM(rdft2_solve), awake, print, destroy
Chris@10 128 };
Chris@10 129
Chris@10 130 UNUSED(ego);
Chris@10 131
Chris@10 132 if (!applicable(ego, p_, plnr))
Chris@10 133 return (plan *) 0;
Chris@10 134
Chris@10 135 p = (const problem_mpi_rdft2 *) p_;
Chris@10 136
Chris@10 137 I = p->I; O = p->O;
Chris@10 138 if (p->kind == R2HC) {
Chris@10 139 r1 = (r0 = p->I) + p->vn;
Chris@10 140 if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) {
Chris@10 141 ci = (cr = p->O) + 1;
Chris@10 142 I = O;
Chris@10 143 }
Chris@10 144 else
Chris@10 145 ci = (cr = p->I) + 1;
Chris@10 146 }
Chris@10 147 else {
Chris@10 148 r1 = (r0 = p->O) + p->vn;
Chris@10 149 ci = (cr = p->O) + 1;
Chris@10 150 }
Chris@10 151
Chris@10 152 MPI_Comm_rank(p->comm, &my_pe);
Chris@10 153 MPI_Comm_size(p->comm, &n_pes);
Chris@10 154
Chris@10 155 sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
Chris@10 156 i = p->sz->rnk - 2; A(i >= 0);
Chris@10 157 sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
Chris@10 158 sz->dims[i].n = p->sz->dims[i+1].n / 2 + 1;
Chris@10 159 for (--i; i >= 0; --i) {
Chris@10 160 sz->dims[i].n = p->sz->dims[i+1].n;
Chris@10 161 sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
Chris@10 162 }
Chris@10 163 nrest = X(tensor_sz)(sz);
Chris@10 164 {
Chris@10 165 INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC);
Chris@10 166 INT is = sz->dims[0].n * sz->dims[0].is;
Chris@10 167 INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
Chris@10 168 sz->dims[p->sz->rnk - 2].n = p->sz->dims[p->sz->rnk - 1].n;
Chris@10 169 cld1 = X(mkplan_d)(plnr,
Chris@10 170 X(mkproblem_rdft2_d)(sz,
Chris@10 171 X(mktensor_2d)(b, is, is,
Chris@10 172 p->vn,ivs,ovs),
Chris@10 173 r0, r1, cr, ci, p->kind));
Chris@10 174 if (XM(any_true)(!cld1, p->comm)) goto nada;
Chris@10 175 }
Chris@10 176
Chris@10 177 sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */
Chris@10 178 sz2->dims[0] = p->sz->dims[0];
Chris@10 179 cld2 = X(mkplan_d)(plnr, XM(mkproblem_dft_d)(sz2, nrest * p->vn,
Chris@10 180 I, O, p->comm,
Chris@10 181 p->kind == R2HC ?
Chris@10 182 FFT_SIGN : -FFT_SIGN,
Chris@10 183 RANK1_BIGVEC_ONLY));
Chris@10 184 if (XM(any_true)(!cld2, p->comm)) goto nada;
Chris@10 185
Chris@10 186 pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r);
Chris@10 187 pln->cld1 = cld1;
Chris@10 188 pln->cld2 = cld2;
Chris@10 189 pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
Chris@10 190 pln->vn = p->vn;
Chris@10 191
Chris@10 192 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@10 193
Chris@10 194 return &(pln->super.super);
Chris@10 195
Chris@10 196 nada:
Chris@10 197 X(plan_destroy_internal)(cld2);
Chris@10 198 X(plan_destroy_internal)(cld1);
Chris@10 199 return (plan *) 0;
Chris@10 200 }
Chris@10 201
Chris@10 202 static solver *mksolver(int preserve_input)
Chris@10 203 {
Chris@10 204 static const solver_adt sadt = { PROBLEM_MPI_RDFT2, mkplan, 0 };
Chris@10 205 S *slv = MKSOLVER(S, &sadt);
Chris@10 206 slv->preserve_input = preserve_input;
Chris@10 207 return &(slv->super);
Chris@10 208 }
Chris@10 209
Chris@10 210 void XM(rdft2_rank_geq2_register)(planner *p)
Chris@10 211 {
Chris@10 212 int preserve_input;
Chris@10 213 for (preserve_input = 0; preserve_input <= 1; ++preserve_input)
Chris@10 214 REGISTER_SOLVER(p, mksolver(preserve_input));
Chris@10 215 }