annotate src/fftw-3.3.3/mpi/api.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 #include "api.h"
Chris@10 22 #include "fftw3-mpi.h"
Chris@10 23 #include "ifftw-mpi.h"
Chris@10 24 #include "mpi-transpose.h"
Chris@10 25 #include "mpi-dft.h"
Chris@10 26 #include "mpi-rdft.h"
Chris@10 27 #include "mpi-rdft2.h"
Chris@10 28
Chris@10 29 /* Convert API flags to internal MPI flags. */
Chris@10 30 #define MPI_FLAGS(f) ((f) >> 27)
Chris@10 31
Chris@10 32 /*************************************************************************/
Chris@10 33
Chris@10 34 static int mpi_inited = 0;
Chris@10 35
Chris@10 36 static MPI_Comm problem_comm(const problem *p) {
Chris@10 37 switch (p->adt->problem_kind) {
Chris@10 38 case PROBLEM_MPI_DFT:
Chris@10 39 return ((const problem_mpi_dft *) p)->comm;
Chris@10 40 case PROBLEM_MPI_RDFT:
Chris@10 41 return ((const problem_mpi_rdft *) p)->comm;
Chris@10 42 case PROBLEM_MPI_RDFT2:
Chris@10 43 return ((const problem_mpi_rdft2 *) p)->comm;
Chris@10 44 case PROBLEM_MPI_TRANSPOSE:
Chris@10 45 return ((const problem_mpi_transpose *) p)->comm;
Chris@10 46 default:
Chris@10 47 return MPI_COMM_NULL;
Chris@10 48 }
Chris@10 49 }
Chris@10 50
Chris@10 51 /* used to synchronize cost measurements (timing or estimation)
Chris@10 52 across all processes for an MPI problem, which is critical to
Chris@10 53 ensure that all processes decide to use the same MPI plans
Chris@10 54 (whereas serial plans need not be syncronized). */
Chris@10 55 static double cost_hook(const problem *p, double t, cost_kind k)
Chris@10 56 {
Chris@10 57 MPI_Comm comm = problem_comm(p);
Chris@10 58 double tsum;
Chris@10 59 if (comm == MPI_COMM_NULL) return t;
Chris@10 60 MPI_Allreduce(&t, &tsum, 1, MPI_DOUBLE,
Chris@10 61 k == COST_SUM ? MPI_SUM : MPI_MAX, comm);
Chris@10 62 return tsum;
Chris@10 63 }
Chris@10 64
Chris@10 65 /* Used to reject wisdom that is not in sync across all processes
Chris@10 66 for an MPI problem, which is critical to ensure that all processes
Chris@10 67 decide to use the same MPI plans. (Even though costs are synchronized,
Chris@10 68 above, out-of-sync wisdom may result from plans being produced
Chris@10 69 by communicators that do not span all processes, either from a
Chris@10 70 user-specified communicator or e.g. from transpose-recurse. */
Chris@10 71 static int wisdom_ok_hook(const problem *p, flags_t flags)
Chris@10 72 {
Chris@10 73 MPI_Comm comm = problem_comm(p);
Chris@10 74 int eq_me, eq_all;
Chris@10 75 /* unpack flags bitfield, since MPI communications may involve
Chris@10 76 byte-order changes and MPI cannot do this for bit fields */
Chris@10 77 #if SIZEOF_UNSIGNED_INT >= 4 /* must be big enough to hold 20-bit fields */
Chris@10 78 unsigned int f[5];
Chris@10 79 #else
Chris@10 80 unsigned long f[5]; /* at least 32 bits as per C standard */
Chris@10 81 #endif
Chris@10 82
Chris@10 83 if (comm == MPI_COMM_NULL) return 1; /* non-MPI wisdom is always ok */
Chris@10 84
Chris@10 85 if (XM(any_true)(0, comm)) return 0; /* some process had nowisdom_hook */
Chris@10 86
Chris@10 87 /* otherwise, check that the flags and solver index are identical
Chris@10 88 on all processes in this problem's communicator.
Chris@10 89
Chris@10 90 TO DO: possibly we can relax strict equality, but it is
Chris@10 91 critical to ensure that any flags which affect what plan is
Chris@10 92 created (and whether the solver is applicable) are the same,
Chris@10 93 e.g. DESTROY_INPUT, NO_UGLY, etcetera. (If the MPI algorithm
Chris@10 94 differs between processes, deadlocks/crashes generally result.) */
Chris@10 95 f[0] = flags.l;
Chris@10 96 f[1] = flags.hash_info;
Chris@10 97 f[2] = flags.timelimit_impatience;
Chris@10 98 f[3] = flags.u;
Chris@10 99 f[4] = flags.slvndx;
Chris@10 100 MPI_Bcast(f, 5,
Chris@10 101 SIZEOF_UNSIGNED_INT >= 4 ? MPI_UNSIGNED : MPI_UNSIGNED_LONG,
Chris@10 102 0, comm);
Chris@10 103 eq_me = f[0] == flags.l && f[1] == flags.hash_info
Chris@10 104 && f[2] == flags.timelimit_impatience
Chris@10 105 && f[3] == flags.u && f[4] == flags.slvndx;
Chris@10 106 MPI_Allreduce(&eq_me, &eq_all, 1, MPI_INT, MPI_LAND, comm);
Chris@10 107 return eq_all;
Chris@10 108 }
Chris@10 109
Chris@10 110 /* This hook is called when wisdom is not found. The any_true here
Chris@10 111 matches up with the any_true in wisdom_ok_hook, in order to handle
Chris@10 112 the case where some processes had wisdom (and called wisdom_ok_hook)
Chris@10 113 and some processes didn't have wisdom (and called nowisdom_hook). */
Chris@10 114 static void nowisdom_hook(const problem *p)
Chris@10 115 {
Chris@10 116 MPI_Comm comm = problem_comm(p);
Chris@10 117 if (comm == MPI_COMM_NULL) return; /* nothing to do for non-MPI p */
Chris@10 118 XM(any_true)(1, comm); /* signal nowisdom to any wisdom_ok_hook */
Chris@10 119 }
Chris@10 120
Chris@10 121 /* needed to synchronize planner bogosity flag, in case non-MPI problems
Chris@10 122 on a subset of processes encountered bogus wisdom */
Chris@10 123 static wisdom_state_t bogosity_hook(wisdom_state_t state, const problem *p)
Chris@10 124 {
Chris@10 125 MPI_Comm comm = problem_comm(p);
Chris@10 126 if (comm != MPI_COMM_NULL /* an MPI problem */
Chris@10 127 && XM(any_true)(state == WISDOM_IS_BOGUS, comm)) /* bogus somewhere */
Chris@10 128 return WISDOM_IS_BOGUS;
Chris@10 129 return state;
Chris@10 130 }
Chris@10 131
Chris@10 132 void XM(init)(void)
Chris@10 133 {
Chris@10 134 if (!mpi_inited) {
Chris@10 135 planner *plnr = X(the_planner)();
Chris@10 136 plnr->cost_hook = cost_hook;
Chris@10 137 plnr->wisdom_ok_hook = wisdom_ok_hook;
Chris@10 138 plnr->nowisdom_hook = nowisdom_hook;
Chris@10 139 plnr->bogosity_hook = bogosity_hook;
Chris@10 140 XM(conf_standard)(plnr);
Chris@10 141 mpi_inited = 1;
Chris@10 142 }
Chris@10 143 }
Chris@10 144
Chris@10 145 void XM(cleanup)(void)
Chris@10 146 {
Chris@10 147 X(cleanup)();
Chris@10 148 mpi_inited = 0;
Chris@10 149 }
Chris@10 150
Chris@10 151 /*************************************************************************/
Chris@10 152
Chris@10 153 static dtensor *mkdtensor_api(int rnk, const XM(ddim) *dims0)
Chris@10 154 {
Chris@10 155 dtensor *x = XM(mkdtensor)(rnk);
Chris@10 156 int i;
Chris@10 157 for (i = 0; i < rnk; ++i) {
Chris@10 158 x->dims[i].n = dims0[i].n;
Chris@10 159 x->dims[i].b[IB] = dims0[i].ib;
Chris@10 160 x->dims[i].b[OB] = dims0[i].ob;
Chris@10 161 }
Chris@10 162 return x;
Chris@10 163 }
Chris@10 164
Chris@10 165 static dtensor *default_sz(int rnk, const XM(ddim) *dims0, int n_pes,
Chris@10 166 int rdft2)
Chris@10 167 {
Chris@10 168 dtensor *sz = XM(mkdtensor)(rnk);
Chris@10 169 dtensor *sz0 = mkdtensor_api(rnk, dims0);
Chris@10 170 block_kind k;
Chris@10 171 int i;
Chris@10 172
Chris@10 173 for (i = 0; i < rnk; ++i)
Chris@10 174 sz->dims[i].n = dims0[i].n;
Chris@10 175
Chris@10 176 if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1;
Chris@10 177
Chris@10 178 for (i = 0; i < rnk; ++i) {
Chris@10 179 sz->dims[i].b[IB] = dims0[i].ib ? dims0[i].ib : sz->dims[i].n;
Chris@10 180 sz->dims[i].b[OB] = dims0[i].ob ? dims0[i].ob : sz->dims[i].n;
Chris@10 181 }
Chris@10 182
Chris@10 183 /* If we haven't used all of the processes yet, and some of the
Chris@10 184 block sizes weren't specified (i.e. 0), then set the
Chris@10 185 unspecified blocks so as to use as many processes as
Chris@10 186 possible with as few distributed dimensions as possible. */
Chris@10 187 FORALL_BLOCK_KIND(k) {
Chris@10 188 INT nb = XM(num_blocks_total)(sz, k);
Chris@10 189 INT np = n_pes / nb;
Chris@10 190 for (i = 0; i < rnk && np > 1; ++i)
Chris@10 191 if (!sz0->dims[i].b[k]) {
Chris@10 192 sz->dims[i].b[k] = XM(default_block)(sz->dims[i].n, np);
Chris@10 193 nb *= XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[k]);
Chris@10 194 np = n_pes / nb;
Chris@10 195 }
Chris@10 196 }
Chris@10 197
Chris@10 198 if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n;
Chris@10 199
Chris@10 200 /* punt for 1d prime */
Chris@10 201 if (rnk == 1 && X(is_prime)(sz->dims[0].n))
Chris@10 202 sz->dims[0].b[IB] = sz->dims[0].b[OB] = sz->dims[0].n;
Chris@10 203
Chris@10 204 XM(dtensor_destroy)(sz0);
Chris@10 205 sz0 = XM(dtensor_canonical)(sz, 0);
Chris@10 206 XM(dtensor_destroy)(sz);
Chris@10 207 return sz0;
Chris@10 208 }
Chris@10 209
Chris@10 210 /* allocate simple local (serial) dims array corresponding to n[rnk] */
Chris@10 211 static XM(ddim) *simple_dims(int rnk, const ptrdiff_t *n)
Chris@10 212 {
Chris@10 213 XM(ddim) *dims = (XM(ddim) *) MALLOC(sizeof(XM(ddim)) * rnk,
Chris@10 214 TENSORS);
Chris@10 215 int i;
Chris@10 216 for (i = 0; i < rnk; ++i)
Chris@10 217 dims[i].n = dims[i].ib = dims[i].ob = n[i];
Chris@10 218 return dims;
Chris@10 219 }
Chris@10 220
Chris@10 221 /*************************************************************************/
Chris@10 222
Chris@10 223 static void local_size(int my_pe, const dtensor *sz, block_kind k,
Chris@10 224 ptrdiff_t *local_n, ptrdiff_t *local_start)
Chris@10 225 {
Chris@10 226 int i;
Chris@10 227 if (my_pe >= XM(num_blocks_total)(sz, k))
Chris@10 228 for (i = 0; i < sz->rnk; ++i)
Chris@10 229 local_n[i] = local_start[i] = 0;
Chris@10 230 else {
Chris@10 231 XM(block_coords)(sz, k, my_pe, local_start);
Chris@10 232 for (i = 0; i < sz->rnk; ++i) {
Chris@10 233 local_n[i] = XM(block)(sz->dims[i].n, sz->dims[i].b[k],
Chris@10 234 local_start[i]);
Chris@10 235 local_start[i] *= sz->dims[i].b[k];
Chris@10 236 }
Chris@10 237 }
Chris@10 238 }
Chris@10 239
Chris@10 240 static INT prod(int rnk, const ptrdiff_t *local_n)
Chris@10 241 {
Chris@10 242 int i;
Chris@10 243 INT N = 1;
Chris@10 244 for (i = 0; i < rnk; ++i) N *= local_n[i];
Chris@10 245 return N;
Chris@10 246 }
Chris@10 247
Chris@10 248 ptrdiff_t XM(local_size_guru)(int rnk, const XM(ddim) *dims0,
Chris@10 249 ptrdiff_t howmany, MPI_Comm comm,
Chris@10 250 ptrdiff_t *local_n_in,
Chris@10 251 ptrdiff_t *local_start_in,
Chris@10 252 ptrdiff_t *local_n_out,
Chris@10 253 ptrdiff_t *local_start_out,
Chris@10 254 int sign, unsigned flags)
Chris@10 255 {
Chris@10 256 INT N;
Chris@10 257 int my_pe, n_pes, i;
Chris@10 258 dtensor *sz;
Chris@10 259
Chris@10 260 if (rnk == 0)
Chris@10 261 return howmany;
Chris@10 262
Chris@10 263 MPI_Comm_rank(comm, &my_pe);
Chris@10 264 MPI_Comm_size(comm, &n_pes);
Chris@10 265 sz = default_sz(rnk, dims0, n_pes, 0);
Chris@10 266
Chris@10 267 /* Now, we must figure out how much local space the user should
Chris@10 268 allocate (or at least an upper bound). This depends strongly
Chris@10 269 on the exact algorithms we employ...ugh! FIXME: get this info
Chris@10 270 from the solvers somehow? */
Chris@10 271 N = 1; /* never return zero allocation size */
Chris@10 272 if (rnk > 1 && XM(is_block1d)(sz, IB) && XM(is_block1d)(sz, OB)) {
Chris@10 273 INT Nafter;
Chris@10 274 ddim odims[2];
Chris@10 275
Chris@10 276 /* dft-rank-geq2-transposed */
Chris@10 277 odims[0] = sz->dims[0]; odims[1] = sz->dims[1]; /* save */
Chris@10 278 /* we may need extra space for transposed intermediate data */
Chris@10 279 for (i = 0; i < 2; ++i)
Chris@10 280 if (XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[IB]) == 1 &&
Chris@10 281 XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[OB]) == 1) {
Chris@10 282 sz->dims[i].b[IB]
Chris@10 283 = XM(default_block)(sz->dims[i].n, n_pes);
Chris@10 284 sz->dims[1-i].b[IB] = sz->dims[1-i].n;
Chris@10 285 local_size(my_pe, sz, IB, local_n_in, local_start_in);
Chris@10 286 N = X(imax)(N, prod(rnk, local_n_in));
Chris@10 287 sz->dims[i] = odims[i];
Chris@10 288 sz->dims[1-i] = odims[1-i];
Chris@10 289 break;
Chris@10 290 }
Chris@10 291
Chris@10 292 /* dft-rank-geq2 */
Chris@10 293 Nafter = howmany;
Chris@10 294 for (i = 1; i < sz->rnk; ++i) Nafter *= sz->dims[i].n;
Chris@10 295 N = X(imax)(N, (sz->dims[0].n
Chris@10 296 * XM(block)(Nafter, XM(default_block)(Nafter, n_pes),
Chris@10 297 my_pe) + howmany - 1) / howmany);
Chris@10 298
Chris@10 299 /* dft-rank-geq2 with dimensions swapped */
Chris@10 300 Nafter = howmany * sz->dims[0].n;
Chris@10 301 for (i = 2; i < sz->rnk; ++i) Nafter *= sz->dims[i].n;
Chris@10 302 N = X(imax)(N, (sz->dims[1].n
Chris@10 303 * XM(block)(Nafter, XM(default_block)(Nafter, n_pes),
Chris@10 304 my_pe) + howmany - 1) / howmany);
Chris@10 305 }
Chris@10 306 else if (rnk == 1) {
Chris@10 307 if (howmany >= n_pes && !MPI_FLAGS(flags)) { /* dft-rank1-bigvec */
Chris@10 308 ptrdiff_t n[2], start[2];
Chris@10 309 dtensor *sz2 = XM(mkdtensor)(2);
Chris@10 310 sz2->dims[0] = sz->dims[0];
Chris@10 311 sz2->dims[0].b[IB] = sz->dims[0].n;
Chris@10 312 sz2->dims[1].n = sz2->dims[1].b[OB] = howmany;
Chris@10 313 sz2->dims[1].b[IB] = XM(default_block)(howmany, n_pes);
Chris@10 314 local_size(my_pe, sz2, IB, n, start);
Chris@10 315 XM(dtensor_destroy)(sz2);
Chris@10 316 N = X(imax)(N, (prod(2, n) + howmany - 1) / howmany);
Chris@10 317 }
Chris@10 318 else { /* dft-rank1 */
Chris@10 319 INT r, m, rblock[2], mblock[2];
Chris@10 320
Chris@10 321 /* Since the 1d transforms are so different, we require
Chris@10 322 the user to call local_size_1d for this case. Ugh. */
Chris@10 323 CK(sign == FFTW_FORWARD || sign == FFTW_BACKWARD);
Chris@10 324
Chris@10 325 if ((r = XM(choose_radix)(sz->dims[0], n_pes, flags, sign,
Chris@10 326 rblock, mblock))) {
Chris@10 327 m = sz->dims[0].n / r;
Chris@10 328 if (flags & FFTW_MPI_SCRAMBLED_IN)
Chris@10 329 sz->dims[0].b[IB] = rblock[IB] * m;
Chris@10 330 else { /* !SCRAMBLED_IN */
Chris@10 331 sz->dims[0].b[IB] = r * mblock[IB];
Chris@10 332 N = X(imax)(N, rblock[IB] * m);
Chris@10 333 }
Chris@10 334 if (flags & FFTW_MPI_SCRAMBLED_OUT)
Chris@10 335 sz->dims[0].b[OB] = r * mblock[OB];
Chris@10 336 else { /* !SCRAMBLED_OUT */
Chris@10 337 N = X(imax)(N, r * mblock[OB]);
Chris@10 338 sz->dims[0].b[OB] = rblock[OB] * m;
Chris@10 339 }
Chris@10 340 }
Chris@10 341 }
Chris@10 342 }
Chris@10 343
Chris@10 344 local_size(my_pe, sz, IB, local_n_in, local_start_in);
Chris@10 345 local_size(my_pe, sz, OB, local_n_out, local_start_out);
Chris@10 346
Chris@10 347 /* at least, make sure we have enough space to store input & output */
Chris@10 348 N = X(imax)(N, X(imax)(prod(rnk, local_n_in), prod(rnk, local_n_out)));
Chris@10 349
Chris@10 350 XM(dtensor_destroy)(sz);
Chris@10 351 return N * howmany;
Chris@10 352 }
Chris@10 353
Chris@10 354 ptrdiff_t XM(local_size_many_transposed)(int rnk, const ptrdiff_t *n,
Chris@10 355 ptrdiff_t howmany,
Chris@10 356 ptrdiff_t xblock, ptrdiff_t yblock,
Chris@10 357 MPI_Comm comm,
Chris@10 358 ptrdiff_t *local_nx,
Chris@10 359 ptrdiff_t *local_x_start,
Chris@10 360 ptrdiff_t *local_ny,
Chris@10 361 ptrdiff_t *local_y_start)
Chris@10 362 {
Chris@10 363 ptrdiff_t N;
Chris@10 364 XM(ddim) *dims;
Chris@10 365 ptrdiff_t *local;
Chris@10 366
Chris@10 367 if (rnk == 0) {
Chris@10 368 *local_nx = *local_ny = 1;
Chris@10 369 *local_x_start = *local_y_start = 0;
Chris@10 370 return howmany;
Chris@10 371 }
Chris@10 372
Chris@10 373 dims = simple_dims(rnk, n);
Chris@10 374 local = (ptrdiff_t *) MALLOC(sizeof(ptrdiff_t) * rnk * 4, TENSORS);
Chris@10 375
Chris@10 376 /* default 1d block distribution, with transposed output
Chris@10 377 if yblock < n[1] */
Chris@10 378 dims[0].ib = xblock;
Chris@10 379 if (rnk > 1) {
Chris@10 380 if (yblock < n[1])
Chris@10 381 dims[1].ob = yblock;
Chris@10 382 else
Chris@10 383 dims[0].ob = xblock;
Chris@10 384 }
Chris@10 385 else
Chris@10 386 dims[0].ob = xblock; /* FIXME: 1d not really supported here
Chris@10 387 since we don't have flags/sign */
Chris@10 388
Chris@10 389 N = XM(local_size_guru)(rnk, dims, howmany, comm,
Chris@10 390 local, local + rnk,
Chris@10 391 local + 2*rnk, local + 3*rnk,
Chris@10 392 0, 0);
Chris@10 393 *local_nx = local[0];
Chris@10 394 *local_x_start = local[rnk];
Chris@10 395 if (rnk > 1) {
Chris@10 396 *local_ny = local[2*rnk + 1];
Chris@10 397 *local_y_start = local[3*rnk + 1];
Chris@10 398 }
Chris@10 399 else {
Chris@10 400 *local_ny = *local_nx;
Chris@10 401 *local_y_start = *local_x_start;
Chris@10 402 }
Chris@10 403 X(ifree)(local);
Chris@10 404 X(ifree)(dims);
Chris@10 405 return N;
Chris@10 406 }
Chris@10 407
Chris@10 408 ptrdiff_t XM(local_size_many)(int rnk, const ptrdiff_t *n,
Chris@10 409 ptrdiff_t howmany,
Chris@10 410 ptrdiff_t xblock,
Chris@10 411 MPI_Comm comm,
Chris@10 412 ptrdiff_t *local_nx,
Chris@10 413 ptrdiff_t *local_x_start)
Chris@10 414 {
Chris@10 415 ptrdiff_t local_ny, local_y_start;
Chris@10 416 return XM(local_size_many_transposed)(rnk, n, howmany,
Chris@10 417 xblock, rnk > 1
Chris@10 418 ? n[1] : FFTW_MPI_DEFAULT_BLOCK,
Chris@10 419 comm,
Chris@10 420 local_nx, local_x_start,
Chris@10 421 &local_ny, &local_y_start);
Chris@10 422 }
Chris@10 423
Chris@10 424
Chris@10 425 ptrdiff_t XM(local_size_transposed)(int rnk, const ptrdiff_t *n,
Chris@10 426 MPI_Comm comm,
Chris@10 427 ptrdiff_t *local_nx,
Chris@10 428 ptrdiff_t *local_x_start,
Chris@10 429 ptrdiff_t *local_ny,
Chris@10 430 ptrdiff_t *local_y_start)
Chris@10 431 {
Chris@10 432 return XM(local_size_many_transposed)(rnk, n, 1,
Chris@10 433 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 434 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 435 comm,
Chris@10 436 local_nx, local_x_start,
Chris@10 437 local_ny, local_y_start);
Chris@10 438 }
Chris@10 439
Chris@10 440 ptrdiff_t XM(local_size)(int rnk, const ptrdiff_t *n,
Chris@10 441 MPI_Comm comm,
Chris@10 442 ptrdiff_t *local_nx,
Chris@10 443 ptrdiff_t *local_x_start)
Chris@10 444 {
Chris@10 445 return XM(local_size_many)(rnk, n, 1, FFTW_MPI_DEFAULT_BLOCK, comm,
Chris@10 446 local_nx, local_x_start);
Chris@10 447 }
Chris@10 448
Chris@10 449 ptrdiff_t XM(local_size_many_1d)(ptrdiff_t nx, ptrdiff_t howmany,
Chris@10 450 MPI_Comm comm, int sign, unsigned flags,
Chris@10 451 ptrdiff_t *local_nx, ptrdiff_t *local_x_start,
Chris@10 452 ptrdiff_t *local_ny, ptrdiff_t *local_y_start)
Chris@10 453 {
Chris@10 454 XM(ddim) d;
Chris@10 455 d.n = nx;
Chris@10 456 d.ib = d.ob = FFTW_MPI_DEFAULT_BLOCK;
Chris@10 457 return XM(local_size_guru)(1, &d, howmany, comm,
Chris@10 458 local_nx, local_x_start,
Chris@10 459 local_ny, local_y_start, sign, flags);
Chris@10 460 }
Chris@10 461
Chris@10 462 ptrdiff_t XM(local_size_1d)(ptrdiff_t nx,
Chris@10 463 MPI_Comm comm, int sign, unsigned flags,
Chris@10 464 ptrdiff_t *local_nx, ptrdiff_t *local_x_start,
Chris@10 465 ptrdiff_t *local_ny, ptrdiff_t *local_y_start)
Chris@10 466 {
Chris@10 467 return XM(local_size_many_1d)(nx, 1, comm, sign, flags,
Chris@10 468 local_nx, local_x_start,
Chris@10 469 local_ny, local_y_start);
Chris@10 470 }
Chris@10 471
Chris@10 472 ptrdiff_t XM(local_size_2d_transposed)(ptrdiff_t nx, ptrdiff_t ny,
Chris@10 473 MPI_Comm comm,
Chris@10 474 ptrdiff_t *local_nx,
Chris@10 475 ptrdiff_t *local_x_start,
Chris@10 476 ptrdiff_t *local_ny,
Chris@10 477 ptrdiff_t *local_y_start)
Chris@10 478 {
Chris@10 479 ptrdiff_t n[2];
Chris@10 480 n[0] = nx; n[1] = ny;
Chris@10 481 return XM(local_size_transposed)(2, n, comm,
Chris@10 482 local_nx, local_x_start,
Chris@10 483 local_ny, local_y_start);
Chris@10 484 }
Chris@10 485
Chris@10 486 ptrdiff_t XM(local_size_2d)(ptrdiff_t nx, ptrdiff_t ny, MPI_Comm comm,
Chris@10 487 ptrdiff_t *local_nx, ptrdiff_t *local_x_start)
Chris@10 488 {
Chris@10 489 ptrdiff_t n[2];
Chris@10 490 n[0] = nx; n[1] = ny;
Chris@10 491 return XM(local_size)(2, n, comm, local_nx, local_x_start);
Chris@10 492 }
Chris@10 493
Chris@10 494 ptrdiff_t XM(local_size_3d_transposed)(ptrdiff_t nx, ptrdiff_t ny,
Chris@10 495 ptrdiff_t nz,
Chris@10 496 MPI_Comm comm,
Chris@10 497 ptrdiff_t *local_nx,
Chris@10 498 ptrdiff_t *local_x_start,
Chris@10 499 ptrdiff_t *local_ny,
Chris@10 500 ptrdiff_t *local_y_start)
Chris@10 501 {
Chris@10 502 ptrdiff_t n[3];
Chris@10 503 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@10 504 return XM(local_size_transposed)(3, n, comm,
Chris@10 505 local_nx, local_x_start,
Chris@10 506 local_ny, local_y_start);
Chris@10 507 }
Chris@10 508
Chris@10 509 ptrdiff_t XM(local_size_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@10 510 MPI_Comm comm,
Chris@10 511 ptrdiff_t *local_nx, ptrdiff_t *local_x_start)
Chris@10 512 {
Chris@10 513 ptrdiff_t n[3];
Chris@10 514 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@10 515 return XM(local_size)(3, n, comm, local_nx, local_x_start);
Chris@10 516 }
Chris@10 517
Chris@10 518 /*************************************************************************/
Chris@10 519 /* Transpose API */
Chris@10 520
Chris@10 521 X(plan) XM(plan_many_transpose)(ptrdiff_t nx, ptrdiff_t ny,
Chris@10 522 ptrdiff_t howmany,
Chris@10 523 ptrdiff_t xblock, ptrdiff_t yblock,
Chris@10 524 R *in, R *out,
Chris@10 525 MPI_Comm comm, unsigned flags)
Chris@10 526 {
Chris@10 527 int n_pes;
Chris@10 528 XM(init)();
Chris@10 529
Chris@10 530 if (howmany < 0 || xblock < 0 || yblock < 0 ||
Chris@10 531 nx <= 0 || ny <= 0) return 0;
Chris@10 532
Chris@10 533 MPI_Comm_size(comm, &n_pes);
Chris@10 534 if (!xblock) xblock = XM(default_block)(nx, n_pes);
Chris@10 535 if (!yblock) yblock = XM(default_block)(ny, n_pes);
Chris@10 536 if (n_pes < XM(num_blocks)(nx, xblock)
Chris@10 537 || n_pes < XM(num_blocks)(ny, yblock))
Chris@10 538 return 0;
Chris@10 539
Chris@10 540 return
Chris@10 541 X(mkapiplan)(FFTW_FORWARD, flags,
Chris@10 542 XM(mkproblem_transpose)(nx, ny, howmany,
Chris@10 543 in, out, xblock, yblock,
Chris@10 544 comm, MPI_FLAGS(flags)));
Chris@10 545 }
Chris@10 546
Chris@10 547 X(plan) XM(plan_transpose)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out,
Chris@10 548 MPI_Comm comm, unsigned flags)
Chris@10 549
Chris@10 550 {
Chris@10 551 return XM(plan_many_transpose)(nx, ny, 1,
Chris@10 552 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 553 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 554 in, out, comm, flags);
Chris@10 555 }
Chris@10 556
Chris@10 557 /*************************************************************************/
Chris@10 558 /* Complex DFT API */
Chris@10 559
Chris@10 560 X(plan) XM(plan_guru_dft)(int rnk, const XM(ddim) *dims0,
Chris@10 561 ptrdiff_t howmany,
Chris@10 562 C *in, C *out,
Chris@10 563 MPI_Comm comm, int sign, unsigned flags)
Chris@10 564 {
Chris@10 565 int n_pes, i;
Chris@10 566 dtensor *sz;
Chris@10 567
Chris@10 568 XM(init)();
Chris@10 569
Chris@10 570 if (howmany < 0 || rnk < 1) return 0;
Chris@10 571 for (i = 0; i < rnk; ++i)
Chris@10 572 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
Chris@10 573 return 0;
Chris@10 574
Chris@10 575 MPI_Comm_size(comm, &n_pes);
Chris@10 576 sz = default_sz(rnk, dims0, n_pes, 0);
Chris@10 577
Chris@10 578 if (XM(num_blocks_total)(sz, IB) > n_pes
Chris@10 579 || XM(num_blocks_total)(sz, OB) > n_pes) {
Chris@10 580 XM(dtensor_destroy)(sz);
Chris@10 581 return 0;
Chris@10 582 }
Chris@10 583
Chris@10 584 return
Chris@10 585 X(mkapiplan)(sign, flags,
Chris@10 586 XM(mkproblem_dft_d)(sz, howmany,
Chris@10 587 (R *) in, (R *) out,
Chris@10 588 comm, sign,
Chris@10 589 MPI_FLAGS(flags)));
Chris@10 590 }
Chris@10 591
Chris@10 592 X(plan) XM(plan_many_dft)(int rnk, const ptrdiff_t *n,
Chris@10 593 ptrdiff_t howmany,
Chris@10 594 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@10 595 C *in, C *out,
Chris@10 596 MPI_Comm comm, int sign, unsigned flags)
Chris@10 597 {
Chris@10 598 XM(ddim) *dims = simple_dims(rnk, n);
Chris@10 599 X(plan) pln;
Chris@10 600
Chris@10 601 if (rnk == 1) {
Chris@10 602 dims[0].ib = iblock;
Chris@10 603 dims[0].ob = oblock;
Chris@10 604 }
Chris@10 605 else if (rnk > 1) {
Chris@10 606 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@10 607 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@10 608 }
Chris@10 609
Chris@10 610 pln = XM(plan_guru_dft)(rnk,dims,howmany, in,out, comm, sign, flags);
Chris@10 611 X(ifree)(dims);
Chris@10 612 return pln;
Chris@10 613 }
Chris@10 614
Chris@10 615 X(plan) XM(plan_dft)(int rnk, const ptrdiff_t *n, C *in, C *out,
Chris@10 616 MPI_Comm comm, int sign, unsigned flags)
Chris@10 617 {
Chris@10 618 return XM(plan_many_dft)(rnk, n, 1,
Chris@10 619 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 620 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 621 in, out, comm, sign, flags);
Chris@10 622 }
Chris@10 623
Chris@10 624 X(plan) XM(plan_dft_1d)(ptrdiff_t nx, C *in, C *out,
Chris@10 625 MPI_Comm comm, int sign, unsigned flags)
Chris@10 626 {
Chris@10 627 return XM(plan_dft)(1, &nx, in, out, comm, sign, flags);
Chris@10 628 }
Chris@10 629
Chris@10 630 X(plan) XM(plan_dft_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, C *out,
Chris@10 631 MPI_Comm comm, int sign, unsigned flags)
Chris@10 632 {
Chris@10 633 ptrdiff_t n[2];
Chris@10 634 n[0] = nx; n[1] = ny;
Chris@10 635 return XM(plan_dft)(2, n, in, out, comm, sign, flags);
Chris@10 636 }
Chris@10 637
Chris@10 638 X(plan) XM(plan_dft_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@10 639 C *in, C *out,
Chris@10 640 MPI_Comm comm, int sign, unsigned flags)
Chris@10 641 {
Chris@10 642 ptrdiff_t n[3];
Chris@10 643 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@10 644 return XM(plan_dft)(3, n, in, out, comm, sign, flags);
Chris@10 645 }
Chris@10 646
Chris@10 647 /*************************************************************************/
Chris@10 648 /* R2R API */
Chris@10 649
Chris@10 650 X(plan) XM(plan_guru_r2r)(int rnk, const XM(ddim) *dims0,
Chris@10 651 ptrdiff_t howmany,
Chris@10 652 R *in, R *out,
Chris@10 653 MPI_Comm comm, const X(r2r_kind) *kind,
Chris@10 654 unsigned flags)
Chris@10 655 {
Chris@10 656 int n_pes, i;
Chris@10 657 dtensor *sz;
Chris@10 658 rdft_kind *k;
Chris@10 659 X(plan) pln;
Chris@10 660
Chris@10 661 XM(init)();
Chris@10 662
Chris@10 663 if (howmany < 0 || rnk < 1) return 0;
Chris@10 664 for (i = 0; i < rnk; ++i)
Chris@10 665 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
Chris@10 666 return 0;
Chris@10 667
Chris@10 668 k = X(map_r2r_kind)(rnk, kind);
Chris@10 669
Chris@10 670 MPI_Comm_size(comm, &n_pes);
Chris@10 671 sz = default_sz(rnk, dims0, n_pes, 0);
Chris@10 672
Chris@10 673 if (XM(num_blocks_total)(sz, IB) > n_pes
Chris@10 674 || XM(num_blocks_total)(sz, OB) > n_pes) {
Chris@10 675 XM(dtensor_destroy)(sz);
Chris@10 676 return 0;
Chris@10 677 }
Chris@10 678
Chris@10 679 pln = X(mkapiplan)(0, flags,
Chris@10 680 XM(mkproblem_rdft_d)(sz, howmany,
Chris@10 681 in, out,
Chris@10 682 comm, k, MPI_FLAGS(flags)));
Chris@10 683 X(ifree0)(k);
Chris@10 684 return pln;
Chris@10 685 }
Chris@10 686
Chris@10 687 X(plan) XM(plan_many_r2r)(int rnk, const ptrdiff_t *n,
Chris@10 688 ptrdiff_t howmany,
Chris@10 689 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@10 690 R *in, R *out,
Chris@10 691 MPI_Comm comm, const X(r2r_kind) *kind,
Chris@10 692 unsigned flags)
Chris@10 693 {
Chris@10 694 XM(ddim) *dims = simple_dims(rnk, n);
Chris@10 695 X(plan) pln;
Chris@10 696
Chris@10 697 if (rnk == 1) {
Chris@10 698 dims[0].ib = iblock;
Chris@10 699 dims[0].ob = oblock;
Chris@10 700 }
Chris@10 701 else if (rnk > 1) {
Chris@10 702 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@10 703 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@10 704 }
Chris@10 705
Chris@10 706 pln = XM(plan_guru_r2r)(rnk,dims,howmany, in,out, comm, kind, flags);
Chris@10 707 X(ifree)(dims);
Chris@10 708 return pln;
Chris@10 709 }
Chris@10 710
Chris@10 711 X(plan) XM(plan_r2r)(int rnk, const ptrdiff_t *n, R *in, R *out,
Chris@10 712 MPI_Comm comm,
Chris@10 713 const X(r2r_kind) *kind,
Chris@10 714 unsigned flags)
Chris@10 715 {
Chris@10 716 return XM(plan_many_r2r)(rnk, n, 1,
Chris@10 717 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 718 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 719 in, out, comm, kind, flags);
Chris@10 720 }
Chris@10 721
Chris@10 722 X(plan) XM(plan_r2r_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out,
Chris@10 723 MPI_Comm comm,
Chris@10 724 X(r2r_kind) kindx, X(r2r_kind) kindy,
Chris@10 725 unsigned flags)
Chris@10 726 {
Chris@10 727 ptrdiff_t n[2];
Chris@10 728 X(r2r_kind) kind[2];
Chris@10 729 n[0] = nx; n[1] = ny;
Chris@10 730 kind[0] = kindx; kind[1] = kindy;
Chris@10 731 return XM(plan_r2r)(2, n, in, out, comm, kind, flags);
Chris@10 732 }
Chris@10 733
Chris@10 734 X(plan) XM(plan_r2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@10 735 R *in, R *out,
Chris@10 736 MPI_Comm comm,
Chris@10 737 X(r2r_kind) kindx, X(r2r_kind) kindy,
Chris@10 738 X(r2r_kind) kindz,
Chris@10 739 unsigned flags)
Chris@10 740 {
Chris@10 741 ptrdiff_t n[3];
Chris@10 742 X(r2r_kind) kind[3];
Chris@10 743 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@10 744 kind[0] = kindx; kind[1] = kindy; kind[2] = kindz;
Chris@10 745 return XM(plan_r2r)(3, n, in, out, comm, kind, flags);
Chris@10 746 }
Chris@10 747
Chris@10 748 /*************************************************************************/
Chris@10 749 /* R2C/C2R API */
Chris@10 750
Chris@10 751 static X(plan) plan_guru_rdft2(int rnk, const XM(ddim) *dims0,
Chris@10 752 ptrdiff_t howmany,
Chris@10 753 R *r, C *c,
Chris@10 754 MPI_Comm comm, rdft_kind kind, unsigned flags)
Chris@10 755 {
Chris@10 756 int n_pes, i;
Chris@10 757 dtensor *sz;
Chris@10 758 R *cr = (R *) c;
Chris@10 759
Chris@10 760 XM(init)();
Chris@10 761
Chris@10 762 if (howmany < 0 || rnk < 2) return 0;
Chris@10 763 for (i = 0; i < rnk; ++i)
Chris@10 764 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
Chris@10 765 return 0;
Chris@10 766
Chris@10 767 MPI_Comm_size(comm, &n_pes);
Chris@10 768 sz = default_sz(rnk, dims0, n_pes, 1);
Chris@10 769
Chris@10 770 sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1;
Chris@10 771 if (XM(num_blocks_total)(sz, IB) > n_pes
Chris@10 772 || XM(num_blocks_total)(sz, OB) > n_pes) {
Chris@10 773 XM(dtensor_destroy)(sz);
Chris@10 774 return 0;
Chris@10 775 }
Chris@10 776 sz->dims[rnk-1].n = dims0[rnk-1].n;
Chris@10 777
Chris@10 778 if (kind == R2HC)
Chris@10 779 return X(mkapiplan)(0, flags,
Chris@10 780 XM(mkproblem_rdft2_d)(sz, howmany,
Chris@10 781 r, cr, comm, R2HC,
Chris@10 782 MPI_FLAGS(flags)));
Chris@10 783 else
Chris@10 784 return X(mkapiplan)(0, flags,
Chris@10 785 XM(mkproblem_rdft2_d)(sz, howmany,
Chris@10 786 cr, r, comm, HC2R,
Chris@10 787 MPI_FLAGS(flags)));
Chris@10 788 }
Chris@10 789
Chris@10 790 X(plan) XM(plan_many_dft_r2c)(int rnk, const ptrdiff_t *n,
Chris@10 791 ptrdiff_t howmany,
Chris@10 792 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@10 793 R *in, C *out,
Chris@10 794 MPI_Comm comm, unsigned flags)
Chris@10 795 {
Chris@10 796 XM(ddim) *dims = simple_dims(rnk, n);
Chris@10 797 X(plan) pln;
Chris@10 798
Chris@10 799 if (rnk == 1) {
Chris@10 800 dims[0].ib = iblock;
Chris@10 801 dims[0].ob = oblock;
Chris@10 802 }
Chris@10 803 else if (rnk > 1) {
Chris@10 804 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@10 805 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@10 806 }
Chris@10 807
Chris@10 808 pln = plan_guru_rdft2(rnk,dims,howmany, in,out, comm, R2HC, flags);
Chris@10 809 X(ifree)(dims);
Chris@10 810 return pln;
Chris@10 811 }
Chris@10 812
Chris@10 813 X(plan) XM(plan_many_dft_c2r)(int rnk, const ptrdiff_t *n,
Chris@10 814 ptrdiff_t howmany,
Chris@10 815 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@10 816 C *in, R *out,
Chris@10 817 MPI_Comm comm, unsigned flags)
Chris@10 818 {
Chris@10 819 XM(ddim) *dims = simple_dims(rnk, n);
Chris@10 820 X(plan) pln;
Chris@10 821
Chris@10 822 if (rnk == 1) {
Chris@10 823 dims[0].ib = iblock;
Chris@10 824 dims[0].ob = oblock;
Chris@10 825 }
Chris@10 826 else if (rnk > 1) {
Chris@10 827 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@10 828 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@10 829 }
Chris@10 830
Chris@10 831 pln = plan_guru_rdft2(rnk,dims,howmany, out,in, comm, HC2R, flags);
Chris@10 832 X(ifree)(dims);
Chris@10 833 return pln;
Chris@10 834 }
Chris@10 835
Chris@10 836 X(plan) XM(plan_dft_r2c)(int rnk, const ptrdiff_t *n, R *in, C *out,
Chris@10 837 MPI_Comm comm, unsigned flags)
Chris@10 838 {
Chris@10 839 return XM(plan_many_dft_r2c)(rnk, n, 1,
Chris@10 840 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 841 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 842 in, out, comm, flags);
Chris@10 843 }
Chris@10 844
Chris@10 845 X(plan) XM(plan_dft_r2c_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, C *out,
Chris@10 846 MPI_Comm comm, unsigned flags)
Chris@10 847 {
Chris@10 848 ptrdiff_t n[2];
Chris@10 849 n[0] = nx; n[1] = ny;
Chris@10 850 return XM(plan_dft_r2c)(2, n, in, out, comm, flags);
Chris@10 851 }
Chris@10 852
Chris@10 853 X(plan) XM(plan_dft_r2c_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@10 854 R *in, C *out, MPI_Comm comm, unsigned flags)
Chris@10 855 {
Chris@10 856 ptrdiff_t n[3];
Chris@10 857 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@10 858 return XM(plan_dft_r2c)(3, n, in, out, comm, flags);
Chris@10 859 }
Chris@10 860
Chris@10 861 X(plan) XM(plan_dft_c2r)(int rnk, const ptrdiff_t *n, C *in, R *out,
Chris@10 862 MPI_Comm comm, unsigned flags)
Chris@10 863 {
Chris@10 864 return XM(plan_many_dft_c2r)(rnk, n, 1,
Chris@10 865 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 866 FFTW_MPI_DEFAULT_BLOCK,
Chris@10 867 in, out, comm, flags);
Chris@10 868 }
Chris@10 869
Chris@10 870 X(plan) XM(plan_dft_c2r_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, R *out,
Chris@10 871 MPI_Comm comm, unsigned flags)
Chris@10 872 {
Chris@10 873 ptrdiff_t n[2];
Chris@10 874 n[0] = nx; n[1] = ny;
Chris@10 875 return XM(plan_dft_c2r)(2, n, in, out, comm, flags);
Chris@10 876 }
Chris@10 877
Chris@10 878 X(plan) XM(plan_dft_c2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@10 879 C *in, R *out, MPI_Comm comm, unsigned flags)
Chris@10 880 {
Chris@10 881 ptrdiff_t n[3];
Chris@10 882 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@10 883 return XM(plan_dft_c2r)(3, n, in, out, comm, flags);
Chris@10 884 }
Chris@10 885
Chris@10 886 /*************************************************************************/
Chris@10 887 /* New-array execute functions */
Chris@10 888
Chris@10 889 void XM(execute_dft)(const X(plan) p, C *in, C *out) {
Chris@10 890 /* internally, MPI plans are just rdft plans */
Chris@10 891 X(execute_r2r)(p, (R*) in, (R*) out);
Chris@10 892 }
Chris@10 893
Chris@10 894 void XM(execute_dft_r2c)(const X(plan) p, R *in, C *out) {
Chris@10 895 /* internally, MPI plans are just rdft plans */
Chris@10 896 X(execute_r2r)(p, in, (R*) out);
Chris@10 897 }
Chris@10 898
Chris@10 899 void XM(execute_dft_c2r)(const X(plan) p, C *in, R *out) {
Chris@10 900 /* internally, MPI plans are just rdft plans */
Chris@10 901 X(execute_r2r)(p, (R*) in, out);
Chris@10 902 }
Chris@10 903
Chris@10 904 void XM(execute_r2r)(const X(plan) p, R *in, R *out) {
Chris@10 905 /* internally, MPI plans are just rdft plans */
Chris@10 906 X(execute_r2r)(p, in, out);
Chris@10 907 }