annotate src/fftw-3.3.5/mpi/api.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 7867fa7e1b6b
children
rev   line source
cannam@127 1 /*
cannam@127 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
cannam@127 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
cannam@127 4 *
cannam@127 5 * This program is free software; you can redistribute it and/or modify
cannam@127 6 * it under the terms of the GNU General Public License as published by
cannam@127 7 * the Free Software Foundation; either version 2 of the License, or
cannam@127 8 * (at your option) any later version.
cannam@127 9 *
cannam@127 10 * This program is distributed in the hope that it will be useful,
cannam@127 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@127 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@127 13 * GNU General Public License for more details.
cannam@127 14 *
cannam@127 15 * You should have received a copy of the GNU General Public License
cannam@127 16 * along with this program; if not, write to the Free Software
cannam@127 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cannam@127 18 *
cannam@127 19 */
cannam@127 20
cannam@127 21 #include "api.h"
cannam@127 22 #include "fftw3-mpi.h"
cannam@127 23 #include "ifftw-mpi.h"
cannam@127 24 #include "mpi-transpose.h"
cannam@127 25 #include "mpi-dft.h"
cannam@127 26 #include "mpi-rdft.h"
cannam@127 27 #include "mpi-rdft2.h"
cannam@127 28
cannam@127 29 /* Convert API flags to internal MPI flags. */
cannam@127 30 #define MPI_FLAGS(f) ((f) >> 27)
cannam@127 31
cannam@127 32 /*************************************************************************/
cannam@127 33
cannam@127 34 static int mpi_inited = 0;
cannam@127 35
cannam@127 36 static MPI_Comm problem_comm(const problem *p) {
cannam@127 37 switch (p->adt->problem_kind) {
cannam@127 38 case PROBLEM_MPI_DFT:
cannam@127 39 return ((const problem_mpi_dft *) p)->comm;
cannam@127 40 case PROBLEM_MPI_RDFT:
cannam@127 41 return ((const problem_mpi_rdft *) p)->comm;
cannam@127 42 case PROBLEM_MPI_RDFT2:
cannam@127 43 return ((const problem_mpi_rdft2 *) p)->comm;
cannam@127 44 case PROBLEM_MPI_TRANSPOSE:
cannam@127 45 return ((const problem_mpi_transpose *) p)->comm;
cannam@127 46 default:
cannam@127 47 return MPI_COMM_NULL;
cannam@127 48 }
cannam@127 49 }
cannam@127 50
cannam@127 51 /* used to synchronize cost measurements (timing or estimation)
cannam@127 52 across all processes for an MPI problem, which is critical to
cannam@127 53 ensure that all processes decide to use the same MPI plans
cannam@127 54 (whereas serial plans need not be syncronized). */
cannam@127 55 static double cost_hook(const problem *p, double t, cost_kind k)
cannam@127 56 {
cannam@127 57 MPI_Comm comm = problem_comm(p);
cannam@127 58 double tsum;
cannam@127 59 if (comm == MPI_COMM_NULL) return t;
cannam@127 60 MPI_Allreduce(&t, &tsum, 1, MPI_DOUBLE,
cannam@127 61 k == COST_SUM ? MPI_SUM : MPI_MAX, comm);
cannam@127 62 return tsum;
cannam@127 63 }
cannam@127 64
cannam@127 65 /* Used to reject wisdom that is not in sync across all processes
cannam@127 66 for an MPI problem, which is critical to ensure that all processes
cannam@127 67 decide to use the same MPI plans. (Even though costs are synchronized,
cannam@127 68 above, out-of-sync wisdom may result from plans being produced
cannam@127 69 by communicators that do not span all processes, either from a
cannam@127 70 user-specified communicator or e.g. from transpose-recurse. */
cannam@127 71 static int wisdom_ok_hook(const problem *p, flags_t flags)
cannam@127 72 {
cannam@127 73 MPI_Comm comm = problem_comm(p);
cannam@127 74 int eq_me, eq_all;
cannam@127 75 /* unpack flags bitfield, since MPI communications may involve
cannam@127 76 byte-order changes and MPI cannot do this for bit fields */
cannam@127 77 #if SIZEOF_UNSIGNED_INT >= 4 /* must be big enough to hold 20-bit fields */
cannam@127 78 unsigned int f[5];
cannam@127 79 #else
cannam@127 80 unsigned long f[5]; /* at least 32 bits as per C standard */
cannam@127 81 #endif
cannam@127 82
cannam@127 83 if (comm == MPI_COMM_NULL) return 1; /* non-MPI wisdom is always ok */
cannam@127 84
cannam@127 85 if (XM(any_true)(0, comm)) return 0; /* some process had nowisdom_hook */
cannam@127 86
cannam@127 87 /* otherwise, check that the flags and solver index are identical
cannam@127 88 on all processes in this problem's communicator.
cannam@127 89
cannam@127 90 TO DO: possibly we can relax strict equality, but it is
cannam@127 91 critical to ensure that any flags which affect what plan is
cannam@127 92 created (and whether the solver is applicable) are the same,
cannam@127 93 e.g. DESTROY_INPUT, NO_UGLY, etcetera. (If the MPI algorithm
cannam@127 94 differs between processes, deadlocks/crashes generally result.) */
cannam@127 95 f[0] = flags.l;
cannam@127 96 f[1] = flags.hash_info;
cannam@127 97 f[2] = flags.timelimit_impatience;
cannam@127 98 f[3] = flags.u;
cannam@127 99 f[4] = flags.slvndx;
cannam@127 100 MPI_Bcast(f, 5,
cannam@127 101 SIZEOF_UNSIGNED_INT >= 4 ? MPI_UNSIGNED : MPI_UNSIGNED_LONG,
cannam@127 102 0, comm);
cannam@127 103 eq_me = f[0] == flags.l && f[1] == flags.hash_info
cannam@127 104 && f[2] == flags.timelimit_impatience
cannam@127 105 && f[3] == flags.u && f[4] == flags.slvndx;
cannam@127 106 MPI_Allreduce(&eq_me, &eq_all, 1, MPI_INT, MPI_LAND, comm);
cannam@127 107 return eq_all;
cannam@127 108 }
cannam@127 109
cannam@127 110 /* This hook is called when wisdom is not found. The any_true here
cannam@127 111 matches up with the any_true in wisdom_ok_hook, in order to handle
cannam@127 112 the case where some processes had wisdom (and called wisdom_ok_hook)
cannam@127 113 and some processes didn't have wisdom (and called nowisdom_hook). */
cannam@127 114 static void nowisdom_hook(const problem *p)
cannam@127 115 {
cannam@127 116 MPI_Comm comm = problem_comm(p);
cannam@127 117 if (comm == MPI_COMM_NULL) return; /* nothing to do for non-MPI p */
cannam@127 118 XM(any_true)(1, comm); /* signal nowisdom to any wisdom_ok_hook */
cannam@127 119 }
cannam@127 120
cannam@127 121 /* needed to synchronize planner bogosity flag, in case non-MPI problems
cannam@127 122 on a subset of processes encountered bogus wisdom */
cannam@127 123 static wisdom_state_t bogosity_hook(wisdom_state_t state, const problem *p)
cannam@127 124 {
cannam@127 125 MPI_Comm comm = problem_comm(p);
cannam@127 126 if (comm != MPI_COMM_NULL /* an MPI problem */
cannam@127 127 && XM(any_true)(state == WISDOM_IS_BOGUS, comm)) /* bogus somewhere */
cannam@127 128 return WISDOM_IS_BOGUS;
cannam@127 129 return state;
cannam@127 130 }
cannam@127 131
cannam@127 132 void XM(init)(void)
cannam@127 133 {
cannam@127 134 if (!mpi_inited) {
cannam@127 135 planner *plnr = X(the_planner)();
cannam@127 136 plnr->cost_hook = cost_hook;
cannam@127 137 plnr->wisdom_ok_hook = wisdom_ok_hook;
cannam@127 138 plnr->nowisdom_hook = nowisdom_hook;
cannam@127 139 plnr->bogosity_hook = bogosity_hook;
cannam@127 140 XM(conf_standard)(plnr);
cannam@127 141 mpi_inited = 1;
cannam@127 142 }
cannam@127 143 }
cannam@127 144
cannam@127 145 void XM(cleanup)(void)
cannam@127 146 {
cannam@127 147 X(cleanup)();
cannam@127 148 mpi_inited = 0;
cannam@127 149 }
cannam@127 150
cannam@127 151 /*************************************************************************/
cannam@127 152
cannam@127 153 static dtensor *mkdtensor_api(int rnk, const XM(ddim) *dims0)
cannam@127 154 {
cannam@127 155 dtensor *x = XM(mkdtensor)(rnk);
cannam@127 156 int i;
cannam@127 157 for (i = 0; i < rnk; ++i) {
cannam@127 158 x->dims[i].n = dims0[i].n;
cannam@127 159 x->dims[i].b[IB] = dims0[i].ib;
cannam@127 160 x->dims[i].b[OB] = dims0[i].ob;
cannam@127 161 }
cannam@127 162 return x;
cannam@127 163 }
cannam@127 164
cannam@127 165 static dtensor *default_sz(int rnk, const XM(ddim) *dims0, int n_pes,
cannam@127 166 int rdft2)
cannam@127 167 {
cannam@127 168 dtensor *sz = XM(mkdtensor)(rnk);
cannam@127 169 dtensor *sz0 = mkdtensor_api(rnk, dims0);
cannam@127 170 block_kind k;
cannam@127 171 int i;
cannam@127 172
cannam@127 173 for (i = 0; i < rnk; ++i)
cannam@127 174 sz->dims[i].n = dims0[i].n;
cannam@127 175
cannam@127 176 if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1;
cannam@127 177
cannam@127 178 for (i = 0; i < rnk; ++i) {
cannam@127 179 sz->dims[i].b[IB] = dims0[i].ib ? dims0[i].ib : sz->dims[i].n;
cannam@127 180 sz->dims[i].b[OB] = dims0[i].ob ? dims0[i].ob : sz->dims[i].n;
cannam@127 181 }
cannam@127 182
cannam@127 183 /* If we haven't used all of the processes yet, and some of the
cannam@127 184 block sizes weren't specified (i.e. 0), then set the
cannam@127 185 unspecified blocks so as to use as many processes as
cannam@127 186 possible with as few distributed dimensions as possible. */
cannam@127 187 FORALL_BLOCK_KIND(k) {
cannam@127 188 INT nb = XM(num_blocks_total)(sz, k);
cannam@127 189 INT np = n_pes / nb;
cannam@127 190 for (i = 0; i < rnk && np > 1; ++i)
cannam@127 191 if (!sz0->dims[i].b[k]) {
cannam@127 192 sz->dims[i].b[k] = XM(default_block)(sz->dims[i].n, np);
cannam@127 193 nb *= XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[k]);
cannam@127 194 np = n_pes / nb;
cannam@127 195 }
cannam@127 196 }
cannam@127 197
cannam@127 198 if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n;
cannam@127 199
cannam@127 200 /* punt for 1d prime */
cannam@127 201 if (rnk == 1 && X(is_prime)(sz->dims[0].n))
cannam@127 202 sz->dims[0].b[IB] = sz->dims[0].b[OB] = sz->dims[0].n;
cannam@127 203
cannam@127 204 XM(dtensor_destroy)(sz0);
cannam@127 205 sz0 = XM(dtensor_canonical)(sz, 0);
cannam@127 206 XM(dtensor_destroy)(sz);
cannam@127 207 return sz0;
cannam@127 208 }
cannam@127 209
cannam@127 210 /* allocate simple local (serial) dims array corresponding to n[rnk] */
cannam@127 211 static XM(ddim) *simple_dims(int rnk, const ptrdiff_t *n)
cannam@127 212 {
cannam@127 213 XM(ddim) *dims = (XM(ddim) *) MALLOC(sizeof(XM(ddim)) * rnk,
cannam@127 214 TENSORS);
cannam@127 215 int i;
cannam@127 216 for (i = 0; i < rnk; ++i)
cannam@127 217 dims[i].n = dims[i].ib = dims[i].ob = n[i];
cannam@127 218 return dims;
cannam@127 219 }
cannam@127 220
cannam@127 221 /*************************************************************************/
cannam@127 222
cannam@127 223 static void local_size(int my_pe, const dtensor *sz, block_kind k,
cannam@127 224 ptrdiff_t *local_n, ptrdiff_t *local_start)
cannam@127 225 {
cannam@127 226 int i;
cannam@127 227 if (my_pe >= XM(num_blocks_total)(sz, k))
cannam@127 228 for (i = 0; i < sz->rnk; ++i)
cannam@127 229 local_n[i] = local_start[i] = 0;
cannam@127 230 else {
cannam@127 231 XM(block_coords)(sz, k, my_pe, local_start);
cannam@127 232 for (i = 0; i < sz->rnk; ++i) {
cannam@127 233 local_n[i] = XM(block)(sz->dims[i].n, sz->dims[i].b[k],
cannam@127 234 local_start[i]);
cannam@127 235 local_start[i] *= sz->dims[i].b[k];
cannam@127 236 }
cannam@127 237 }
cannam@127 238 }
cannam@127 239
cannam@127 240 static INT prod(int rnk, const ptrdiff_t *local_n)
cannam@127 241 {
cannam@127 242 int i;
cannam@127 243 INT N = 1;
cannam@127 244 for (i = 0; i < rnk; ++i) N *= local_n[i];
cannam@127 245 return N;
cannam@127 246 }
cannam@127 247
cannam@127 248 ptrdiff_t XM(local_size_guru)(int rnk, const XM(ddim) *dims0,
cannam@127 249 ptrdiff_t howmany, MPI_Comm comm,
cannam@127 250 ptrdiff_t *local_n_in,
cannam@127 251 ptrdiff_t *local_start_in,
cannam@127 252 ptrdiff_t *local_n_out,
cannam@127 253 ptrdiff_t *local_start_out,
cannam@127 254 int sign, unsigned flags)
cannam@127 255 {
cannam@127 256 INT N;
cannam@127 257 int my_pe, n_pes, i;
cannam@127 258 dtensor *sz;
cannam@127 259
cannam@127 260 if (rnk == 0)
cannam@127 261 return howmany;
cannam@127 262
cannam@127 263 MPI_Comm_rank(comm, &my_pe);
cannam@127 264 MPI_Comm_size(comm, &n_pes);
cannam@127 265 sz = default_sz(rnk, dims0, n_pes, 0);
cannam@127 266
cannam@127 267 /* Now, we must figure out how much local space the user should
cannam@127 268 allocate (or at least an upper bound). This depends strongly
cannam@127 269 on the exact algorithms we employ...ugh! FIXME: get this info
cannam@127 270 from the solvers somehow? */
cannam@127 271 N = 1; /* never return zero allocation size */
cannam@127 272 if (rnk > 1 && XM(is_block1d)(sz, IB) && XM(is_block1d)(sz, OB)) {
cannam@127 273 INT Nafter;
cannam@127 274 ddim odims[2];
cannam@127 275
cannam@127 276 /* dft-rank-geq2-transposed */
cannam@127 277 odims[0] = sz->dims[0]; odims[1] = sz->dims[1]; /* save */
cannam@127 278 /* we may need extra space for transposed intermediate data */
cannam@127 279 for (i = 0; i < 2; ++i)
cannam@127 280 if (XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[IB]) == 1 &&
cannam@127 281 XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[OB]) == 1) {
cannam@127 282 sz->dims[i].b[IB]
cannam@127 283 = XM(default_block)(sz->dims[i].n, n_pes);
cannam@127 284 sz->dims[1-i].b[IB] = sz->dims[1-i].n;
cannam@127 285 local_size(my_pe, sz, IB, local_n_in, local_start_in);
cannam@127 286 N = X(imax)(N, prod(rnk, local_n_in));
cannam@127 287 sz->dims[i] = odims[i];
cannam@127 288 sz->dims[1-i] = odims[1-i];
cannam@127 289 break;
cannam@127 290 }
cannam@127 291
cannam@127 292 /* dft-rank-geq2 */
cannam@127 293 Nafter = howmany;
cannam@127 294 for (i = 1; i < sz->rnk; ++i) Nafter *= sz->dims[i].n;
cannam@127 295 N = X(imax)(N, (sz->dims[0].n
cannam@127 296 * XM(block)(Nafter, XM(default_block)(Nafter, n_pes),
cannam@127 297 my_pe) + howmany - 1) / howmany);
cannam@127 298
cannam@127 299 /* dft-rank-geq2 with dimensions swapped */
cannam@127 300 Nafter = howmany * sz->dims[0].n;
cannam@127 301 for (i = 2; i < sz->rnk; ++i) Nafter *= sz->dims[i].n;
cannam@127 302 N = X(imax)(N, (sz->dims[1].n
cannam@127 303 * XM(block)(Nafter, XM(default_block)(Nafter, n_pes),
cannam@127 304 my_pe) + howmany - 1) / howmany);
cannam@127 305 }
cannam@127 306 else if (rnk == 1) {
cannam@127 307 if (howmany >= n_pes && !MPI_FLAGS(flags)) { /* dft-rank1-bigvec */
cannam@127 308 ptrdiff_t n[2], start[2];
cannam@127 309 dtensor *sz2 = XM(mkdtensor)(2);
cannam@127 310 sz2->dims[0] = sz->dims[0];
cannam@127 311 sz2->dims[0].b[IB] = sz->dims[0].n;
cannam@127 312 sz2->dims[1].n = sz2->dims[1].b[OB] = howmany;
cannam@127 313 sz2->dims[1].b[IB] = XM(default_block)(howmany, n_pes);
cannam@127 314 local_size(my_pe, sz2, IB, n, start);
cannam@127 315 XM(dtensor_destroy)(sz2);
cannam@127 316 N = X(imax)(N, (prod(2, n) + howmany - 1) / howmany);
cannam@127 317 }
cannam@127 318 else { /* dft-rank1 */
cannam@127 319 INT r, m, rblock[2], mblock[2];
cannam@127 320
cannam@127 321 /* Since the 1d transforms are so different, we require
cannam@127 322 the user to call local_size_1d for this case. Ugh. */
cannam@127 323 CK(sign == FFTW_FORWARD || sign == FFTW_BACKWARD);
cannam@127 324
cannam@127 325 if ((r = XM(choose_radix)(sz->dims[0], n_pes, flags, sign,
cannam@127 326 rblock, mblock))) {
cannam@127 327 m = sz->dims[0].n / r;
cannam@127 328 if (flags & FFTW_MPI_SCRAMBLED_IN)
cannam@127 329 sz->dims[0].b[IB] = rblock[IB] * m;
cannam@127 330 else { /* !SCRAMBLED_IN */
cannam@127 331 sz->dims[0].b[IB] = r * mblock[IB];
cannam@127 332 N = X(imax)(N, rblock[IB] * m);
cannam@127 333 }
cannam@127 334 if (flags & FFTW_MPI_SCRAMBLED_OUT)
cannam@127 335 sz->dims[0].b[OB] = r * mblock[OB];
cannam@127 336 else { /* !SCRAMBLED_OUT */
cannam@127 337 N = X(imax)(N, r * mblock[OB]);
cannam@127 338 sz->dims[0].b[OB] = rblock[OB] * m;
cannam@127 339 }
cannam@127 340 }
cannam@127 341 }
cannam@127 342 }
cannam@127 343
cannam@127 344 local_size(my_pe, sz, IB, local_n_in, local_start_in);
cannam@127 345 local_size(my_pe, sz, OB, local_n_out, local_start_out);
cannam@127 346
cannam@127 347 /* at least, make sure we have enough space to store input & output */
cannam@127 348 N = X(imax)(N, X(imax)(prod(rnk, local_n_in), prod(rnk, local_n_out)));
cannam@127 349
cannam@127 350 XM(dtensor_destroy)(sz);
cannam@127 351 return N * howmany;
cannam@127 352 }
cannam@127 353
cannam@127 354 ptrdiff_t XM(local_size_many_transposed)(int rnk, const ptrdiff_t *n,
cannam@127 355 ptrdiff_t howmany,
cannam@127 356 ptrdiff_t xblock, ptrdiff_t yblock,
cannam@127 357 MPI_Comm comm,
cannam@127 358 ptrdiff_t *local_nx,
cannam@127 359 ptrdiff_t *local_x_start,
cannam@127 360 ptrdiff_t *local_ny,
cannam@127 361 ptrdiff_t *local_y_start)
cannam@127 362 {
cannam@127 363 ptrdiff_t N;
cannam@127 364 XM(ddim) *dims;
cannam@127 365 ptrdiff_t *local;
cannam@127 366
cannam@127 367 if (rnk == 0) {
cannam@127 368 *local_nx = *local_ny = 1;
cannam@127 369 *local_x_start = *local_y_start = 0;
cannam@127 370 return howmany;
cannam@127 371 }
cannam@127 372
cannam@127 373 dims = simple_dims(rnk, n);
cannam@127 374 local = (ptrdiff_t *) MALLOC(sizeof(ptrdiff_t) * rnk * 4, TENSORS);
cannam@127 375
cannam@127 376 /* default 1d block distribution, with transposed output
cannam@127 377 if yblock < n[1] */
cannam@127 378 dims[0].ib = xblock;
cannam@127 379 if (rnk > 1) {
cannam@127 380 if (yblock < n[1])
cannam@127 381 dims[1].ob = yblock;
cannam@127 382 else
cannam@127 383 dims[0].ob = xblock;
cannam@127 384 }
cannam@127 385 else
cannam@127 386 dims[0].ob = xblock; /* FIXME: 1d not really supported here
cannam@127 387 since we don't have flags/sign */
cannam@127 388
cannam@127 389 N = XM(local_size_guru)(rnk, dims, howmany, comm,
cannam@127 390 local, local + rnk,
cannam@127 391 local + 2*rnk, local + 3*rnk,
cannam@127 392 0, 0);
cannam@127 393 *local_nx = local[0];
cannam@127 394 *local_x_start = local[rnk];
cannam@127 395 if (rnk > 1) {
cannam@127 396 *local_ny = local[2*rnk + 1];
cannam@127 397 *local_y_start = local[3*rnk + 1];
cannam@127 398 }
cannam@127 399 else {
cannam@127 400 *local_ny = *local_nx;
cannam@127 401 *local_y_start = *local_x_start;
cannam@127 402 }
cannam@127 403 X(ifree)(local);
cannam@127 404 X(ifree)(dims);
cannam@127 405 return N;
cannam@127 406 }
cannam@127 407
cannam@127 408 ptrdiff_t XM(local_size_many)(int rnk, const ptrdiff_t *n,
cannam@127 409 ptrdiff_t howmany,
cannam@127 410 ptrdiff_t xblock,
cannam@127 411 MPI_Comm comm,
cannam@127 412 ptrdiff_t *local_nx,
cannam@127 413 ptrdiff_t *local_x_start)
cannam@127 414 {
cannam@127 415 ptrdiff_t local_ny, local_y_start;
cannam@127 416 return XM(local_size_many_transposed)(rnk, n, howmany,
cannam@127 417 xblock, rnk > 1
cannam@127 418 ? n[1] : FFTW_MPI_DEFAULT_BLOCK,
cannam@127 419 comm,
cannam@127 420 local_nx, local_x_start,
cannam@127 421 &local_ny, &local_y_start);
cannam@127 422 }
cannam@127 423
cannam@127 424
cannam@127 425 ptrdiff_t XM(local_size_transposed)(int rnk, const ptrdiff_t *n,
cannam@127 426 MPI_Comm comm,
cannam@127 427 ptrdiff_t *local_nx,
cannam@127 428 ptrdiff_t *local_x_start,
cannam@127 429 ptrdiff_t *local_ny,
cannam@127 430 ptrdiff_t *local_y_start)
cannam@127 431 {
cannam@127 432 return XM(local_size_many_transposed)(rnk, n, 1,
cannam@127 433 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 434 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 435 comm,
cannam@127 436 local_nx, local_x_start,
cannam@127 437 local_ny, local_y_start);
cannam@127 438 }
cannam@127 439
cannam@127 440 ptrdiff_t XM(local_size)(int rnk, const ptrdiff_t *n,
cannam@127 441 MPI_Comm comm,
cannam@127 442 ptrdiff_t *local_nx,
cannam@127 443 ptrdiff_t *local_x_start)
cannam@127 444 {
cannam@127 445 return XM(local_size_many)(rnk, n, 1, FFTW_MPI_DEFAULT_BLOCK, comm,
cannam@127 446 local_nx, local_x_start);
cannam@127 447 }
cannam@127 448
cannam@127 449 ptrdiff_t XM(local_size_many_1d)(ptrdiff_t nx, ptrdiff_t howmany,
cannam@127 450 MPI_Comm comm, int sign, unsigned flags,
cannam@127 451 ptrdiff_t *local_nx, ptrdiff_t *local_x_start,
cannam@127 452 ptrdiff_t *local_ny, ptrdiff_t *local_y_start)
cannam@127 453 {
cannam@127 454 XM(ddim) d;
cannam@127 455 d.n = nx;
cannam@127 456 d.ib = d.ob = FFTW_MPI_DEFAULT_BLOCK;
cannam@127 457 return XM(local_size_guru)(1, &d, howmany, comm,
cannam@127 458 local_nx, local_x_start,
cannam@127 459 local_ny, local_y_start, sign, flags);
cannam@127 460 }
cannam@127 461
cannam@127 462 ptrdiff_t XM(local_size_1d)(ptrdiff_t nx,
cannam@127 463 MPI_Comm comm, int sign, unsigned flags,
cannam@127 464 ptrdiff_t *local_nx, ptrdiff_t *local_x_start,
cannam@127 465 ptrdiff_t *local_ny, ptrdiff_t *local_y_start)
cannam@127 466 {
cannam@127 467 return XM(local_size_many_1d)(nx, 1, comm, sign, flags,
cannam@127 468 local_nx, local_x_start,
cannam@127 469 local_ny, local_y_start);
cannam@127 470 }
cannam@127 471
cannam@127 472 ptrdiff_t XM(local_size_2d_transposed)(ptrdiff_t nx, ptrdiff_t ny,
cannam@127 473 MPI_Comm comm,
cannam@127 474 ptrdiff_t *local_nx,
cannam@127 475 ptrdiff_t *local_x_start,
cannam@127 476 ptrdiff_t *local_ny,
cannam@127 477 ptrdiff_t *local_y_start)
cannam@127 478 {
cannam@127 479 ptrdiff_t n[2];
cannam@127 480 n[0] = nx; n[1] = ny;
cannam@127 481 return XM(local_size_transposed)(2, n, comm,
cannam@127 482 local_nx, local_x_start,
cannam@127 483 local_ny, local_y_start);
cannam@127 484 }
cannam@127 485
cannam@127 486 ptrdiff_t XM(local_size_2d)(ptrdiff_t nx, ptrdiff_t ny, MPI_Comm comm,
cannam@127 487 ptrdiff_t *local_nx, ptrdiff_t *local_x_start)
cannam@127 488 {
cannam@127 489 ptrdiff_t n[2];
cannam@127 490 n[0] = nx; n[1] = ny;
cannam@127 491 return XM(local_size)(2, n, comm, local_nx, local_x_start);
cannam@127 492 }
cannam@127 493
cannam@127 494 ptrdiff_t XM(local_size_3d_transposed)(ptrdiff_t nx, ptrdiff_t ny,
cannam@127 495 ptrdiff_t nz,
cannam@127 496 MPI_Comm comm,
cannam@127 497 ptrdiff_t *local_nx,
cannam@127 498 ptrdiff_t *local_x_start,
cannam@127 499 ptrdiff_t *local_ny,
cannam@127 500 ptrdiff_t *local_y_start)
cannam@127 501 {
cannam@127 502 ptrdiff_t n[3];
cannam@127 503 n[0] = nx; n[1] = ny; n[2] = nz;
cannam@127 504 return XM(local_size_transposed)(3, n, comm,
cannam@127 505 local_nx, local_x_start,
cannam@127 506 local_ny, local_y_start);
cannam@127 507 }
cannam@127 508
cannam@127 509 ptrdiff_t XM(local_size_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
cannam@127 510 MPI_Comm comm,
cannam@127 511 ptrdiff_t *local_nx, ptrdiff_t *local_x_start)
cannam@127 512 {
cannam@127 513 ptrdiff_t n[3];
cannam@127 514 n[0] = nx; n[1] = ny; n[2] = nz;
cannam@127 515 return XM(local_size)(3, n, comm, local_nx, local_x_start);
cannam@127 516 }
cannam@127 517
cannam@127 518 /*************************************************************************/
cannam@127 519 /* Transpose API */
cannam@127 520
cannam@127 521 X(plan) XM(plan_many_transpose)(ptrdiff_t nx, ptrdiff_t ny,
cannam@127 522 ptrdiff_t howmany,
cannam@127 523 ptrdiff_t xblock, ptrdiff_t yblock,
cannam@127 524 R *in, R *out,
cannam@127 525 MPI_Comm comm, unsigned flags)
cannam@127 526 {
cannam@127 527 int n_pes;
cannam@127 528 XM(init)();
cannam@127 529
cannam@127 530 if (howmany < 0 || xblock < 0 || yblock < 0 ||
cannam@127 531 nx <= 0 || ny <= 0) return 0;
cannam@127 532
cannam@127 533 MPI_Comm_size(comm, &n_pes);
cannam@127 534 if (!xblock) xblock = XM(default_block)(nx, n_pes);
cannam@127 535 if (!yblock) yblock = XM(default_block)(ny, n_pes);
cannam@127 536 if (n_pes < XM(num_blocks)(nx, xblock)
cannam@127 537 || n_pes < XM(num_blocks)(ny, yblock))
cannam@127 538 return 0;
cannam@127 539
cannam@127 540 return
cannam@127 541 X(mkapiplan)(FFTW_FORWARD, flags,
cannam@127 542 XM(mkproblem_transpose)(nx, ny, howmany,
cannam@127 543 in, out, xblock, yblock,
cannam@127 544 comm, MPI_FLAGS(flags)));
cannam@127 545 }
cannam@127 546
cannam@127 547 X(plan) XM(plan_transpose)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out,
cannam@127 548 MPI_Comm comm, unsigned flags)
cannam@127 549
cannam@127 550 {
cannam@127 551 return XM(plan_many_transpose)(nx, ny, 1,
cannam@127 552 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 553 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 554 in, out, comm, flags);
cannam@127 555 }
cannam@127 556
cannam@127 557 /*************************************************************************/
cannam@127 558 /* Complex DFT API */
cannam@127 559
cannam@127 560 X(plan) XM(plan_guru_dft)(int rnk, const XM(ddim) *dims0,
cannam@127 561 ptrdiff_t howmany,
cannam@127 562 C *in, C *out,
cannam@127 563 MPI_Comm comm, int sign, unsigned flags)
cannam@127 564 {
cannam@127 565 int n_pes, i;
cannam@127 566 dtensor *sz;
cannam@127 567
cannam@127 568 XM(init)();
cannam@127 569
cannam@127 570 if (howmany < 0 || rnk < 1) return 0;
cannam@127 571 for (i = 0; i < rnk; ++i)
cannam@127 572 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
cannam@127 573 return 0;
cannam@127 574
cannam@127 575 MPI_Comm_size(comm, &n_pes);
cannam@127 576 sz = default_sz(rnk, dims0, n_pes, 0);
cannam@127 577
cannam@127 578 if (XM(num_blocks_total)(sz, IB) > n_pes
cannam@127 579 || XM(num_blocks_total)(sz, OB) > n_pes) {
cannam@127 580 XM(dtensor_destroy)(sz);
cannam@127 581 return 0;
cannam@127 582 }
cannam@127 583
cannam@127 584 return
cannam@127 585 X(mkapiplan)(sign, flags,
cannam@127 586 XM(mkproblem_dft_d)(sz, howmany,
cannam@127 587 (R *) in, (R *) out,
cannam@127 588 comm, sign,
cannam@127 589 MPI_FLAGS(flags)));
cannam@127 590 }
cannam@127 591
cannam@127 592 X(plan) XM(plan_many_dft)(int rnk, const ptrdiff_t *n,
cannam@127 593 ptrdiff_t howmany,
cannam@127 594 ptrdiff_t iblock, ptrdiff_t oblock,
cannam@127 595 C *in, C *out,
cannam@127 596 MPI_Comm comm, int sign, unsigned flags)
cannam@127 597 {
cannam@127 598 XM(ddim) *dims = simple_dims(rnk, n);
cannam@127 599 X(plan) pln;
cannam@127 600
cannam@127 601 if (rnk == 1) {
cannam@127 602 dims[0].ib = iblock;
cannam@127 603 dims[0].ob = oblock;
cannam@127 604 }
cannam@127 605 else if (rnk > 1) {
cannam@127 606 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
cannam@127 607 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
cannam@127 608 }
cannam@127 609
cannam@127 610 pln = XM(plan_guru_dft)(rnk,dims,howmany, in,out, comm, sign, flags);
cannam@127 611 X(ifree)(dims);
cannam@127 612 return pln;
cannam@127 613 }
cannam@127 614
cannam@127 615 X(plan) XM(plan_dft)(int rnk, const ptrdiff_t *n, C *in, C *out,
cannam@127 616 MPI_Comm comm, int sign, unsigned flags)
cannam@127 617 {
cannam@127 618 return XM(plan_many_dft)(rnk, n, 1,
cannam@127 619 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 620 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 621 in, out, comm, sign, flags);
cannam@127 622 }
cannam@127 623
cannam@127 624 X(plan) XM(plan_dft_1d)(ptrdiff_t nx, C *in, C *out,
cannam@127 625 MPI_Comm comm, int sign, unsigned flags)
cannam@127 626 {
cannam@127 627 return XM(plan_dft)(1, &nx, in, out, comm, sign, flags);
cannam@127 628 }
cannam@127 629
cannam@127 630 X(plan) XM(plan_dft_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, C *out,
cannam@127 631 MPI_Comm comm, int sign, unsigned flags)
cannam@127 632 {
cannam@127 633 ptrdiff_t n[2];
cannam@127 634 n[0] = nx; n[1] = ny;
cannam@127 635 return XM(plan_dft)(2, n, in, out, comm, sign, flags);
cannam@127 636 }
cannam@127 637
cannam@127 638 X(plan) XM(plan_dft_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
cannam@127 639 C *in, C *out,
cannam@127 640 MPI_Comm comm, int sign, unsigned flags)
cannam@127 641 {
cannam@127 642 ptrdiff_t n[3];
cannam@127 643 n[0] = nx; n[1] = ny; n[2] = nz;
cannam@127 644 return XM(plan_dft)(3, n, in, out, comm, sign, flags);
cannam@127 645 }
cannam@127 646
cannam@127 647 /*************************************************************************/
cannam@127 648 /* R2R API */
cannam@127 649
cannam@127 650 X(plan) XM(plan_guru_r2r)(int rnk, const XM(ddim) *dims0,
cannam@127 651 ptrdiff_t howmany,
cannam@127 652 R *in, R *out,
cannam@127 653 MPI_Comm comm, const X(r2r_kind) *kind,
cannam@127 654 unsigned flags)
cannam@127 655 {
cannam@127 656 int n_pes, i;
cannam@127 657 dtensor *sz;
cannam@127 658 rdft_kind *k;
cannam@127 659 X(plan) pln;
cannam@127 660
cannam@127 661 XM(init)();
cannam@127 662
cannam@127 663 if (howmany < 0 || rnk < 1) return 0;
cannam@127 664 for (i = 0; i < rnk; ++i)
cannam@127 665 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
cannam@127 666 return 0;
cannam@127 667
cannam@127 668 k = X(map_r2r_kind)(rnk, kind);
cannam@127 669
cannam@127 670 MPI_Comm_size(comm, &n_pes);
cannam@127 671 sz = default_sz(rnk, dims0, n_pes, 0);
cannam@127 672
cannam@127 673 if (XM(num_blocks_total)(sz, IB) > n_pes
cannam@127 674 || XM(num_blocks_total)(sz, OB) > n_pes) {
cannam@127 675 XM(dtensor_destroy)(sz);
cannam@127 676 return 0;
cannam@127 677 }
cannam@127 678
cannam@127 679 pln = X(mkapiplan)(0, flags,
cannam@127 680 XM(mkproblem_rdft_d)(sz, howmany,
cannam@127 681 in, out,
cannam@127 682 comm, k, MPI_FLAGS(flags)));
cannam@127 683 X(ifree0)(k);
cannam@127 684 return pln;
cannam@127 685 }
cannam@127 686
cannam@127 687 X(plan) XM(plan_many_r2r)(int rnk, const ptrdiff_t *n,
cannam@127 688 ptrdiff_t howmany,
cannam@127 689 ptrdiff_t iblock, ptrdiff_t oblock,
cannam@127 690 R *in, R *out,
cannam@127 691 MPI_Comm comm, const X(r2r_kind) *kind,
cannam@127 692 unsigned flags)
cannam@127 693 {
cannam@127 694 XM(ddim) *dims = simple_dims(rnk, n);
cannam@127 695 X(plan) pln;
cannam@127 696
cannam@127 697 if (rnk == 1) {
cannam@127 698 dims[0].ib = iblock;
cannam@127 699 dims[0].ob = oblock;
cannam@127 700 }
cannam@127 701 else if (rnk > 1) {
cannam@127 702 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
cannam@127 703 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
cannam@127 704 }
cannam@127 705
cannam@127 706 pln = XM(plan_guru_r2r)(rnk,dims,howmany, in,out, comm, kind, flags);
cannam@127 707 X(ifree)(dims);
cannam@127 708 return pln;
cannam@127 709 }
cannam@127 710
cannam@127 711 X(plan) XM(plan_r2r)(int rnk, const ptrdiff_t *n, R *in, R *out,
cannam@127 712 MPI_Comm comm,
cannam@127 713 const X(r2r_kind) *kind,
cannam@127 714 unsigned flags)
cannam@127 715 {
cannam@127 716 return XM(plan_many_r2r)(rnk, n, 1,
cannam@127 717 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 718 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 719 in, out, comm, kind, flags);
cannam@127 720 }
cannam@127 721
cannam@127 722 X(plan) XM(plan_r2r_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out,
cannam@127 723 MPI_Comm comm,
cannam@127 724 X(r2r_kind) kindx, X(r2r_kind) kindy,
cannam@127 725 unsigned flags)
cannam@127 726 {
cannam@127 727 ptrdiff_t n[2];
cannam@127 728 X(r2r_kind) kind[2];
cannam@127 729 n[0] = nx; n[1] = ny;
cannam@127 730 kind[0] = kindx; kind[1] = kindy;
cannam@127 731 return XM(plan_r2r)(2, n, in, out, comm, kind, flags);
cannam@127 732 }
cannam@127 733
cannam@127 734 X(plan) XM(plan_r2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
cannam@127 735 R *in, R *out,
cannam@127 736 MPI_Comm comm,
cannam@127 737 X(r2r_kind) kindx, X(r2r_kind) kindy,
cannam@127 738 X(r2r_kind) kindz,
cannam@127 739 unsigned flags)
cannam@127 740 {
cannam@127 741 ptrdiff_t n[3];
cannam@127 742 X(r2r_kind) kind[3];
cannam@127 743 n[0] = nx; n[1] = ny; n[2] = nz;
cannam@127 744 kind[0] = kindx; kind[1] = kindy; kind[2] = kindz;
cannam@127 745 return XM(plan_r2r)(3, n, in, out, comm, kind, flags);
cannam@127 746 }
cannam@127 747
cannam@127 748 /*************************************************************************/
cannam@127 749 /* R2C/C2R API */
cannam@127 750
cannam@127 751 static X(plan) plan_guru_rdft2(int rnk, const XM(ddim) *dims0,
cannam@127 752 ptrdiff_t howmany,
cannam@127 753 R *r, C *c,
cannam@127 754 MPI_Comm comm, rdft_kind kind, unsigned flags)
cannam@127 755 {
cannam@127 756 int n_pes, i;
cannam@127 757 dtensor *sz;
cannam@127 758 R *cr = (R *) c;
cannam@127 759
cannam@127 760 XM(init)();
cannam@127 761
cannam@127 762 if (howmany < 0 || rnk < 2) return 0;
cannam@127 763 for (i = 0; i < rnk; ++i)
cannam@127 764 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
cannam@127 765 return 0;
cannam@127 766
cannam@127 767 MPI_Comm_size(comm, &n_pes);
cannam@127 768 sz = default_sz(rnk, dims0, n_pes, 1);
cannam@127 769
cannam@127 770 sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1;
cannam@127 771 if (XM(num_blocks_total)(sz, IB) > n_pes
cannam@127 772 || XM(num_blocks_total)(sz, OB) > n_pes) {
cannam@127 773 XM(dtensor_destroy)(sz);
cannam@127 774 return 0;
cannam@127 775 }
cannam@127 776 sz->dims[rnk-1].n = dims0[rnk-1].n;
cannam@127 777
cannam@127 778 if (kind == R2HC)
cannam@127 779 return X(mkapiplan)(0, flags,
cannam@127 780 XM(mkproblem_rdft2_d)(sz, howmany,
cannam@127 781 r, cr, comm, R2HC,
cannam@127 782 MPI_FLAGS(flags)));
cannam@127 783 else
cannam@127 784 return X(mkapiplan)(0, flags,
cannam@127 785 XM(mkproblem_rdft2_d)(sz, howmany,
cannam@127 786 cr, r, comm, HC2R,
cannam@127 787 MPI_FLAGS(flags)));
cannam@127 788 }
cannam@127 789
cannam@127 790 X(plan) XM(plan_many_dft_r2c)(int rnk, const ptrdiff_t *n,
cannam@127 791 ptrdiff_t howmany,
cannam@127 792 ptrdiff_t iblock, ptrdiff_t oblock,
cannam@127 793 R *in, C *out,
cannam@127 794 MPI_Comm comm, unsigned flags)
cannam@127 795 {
cannam@127 796 XM(ddim) *dims = simple_dims(rnk, n);
cannam@127 797 X(plan) pln;
cannam@127 798
cannam@127 799 if (rnk == 1) {
cannam@127 800 dims[0].ib = iblock;
cannam@127 801 dims[0].ob = oblock;
cannam@127 802 }
cannam@127 803 else if (rnk > 1) {
cannam@127 804 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
cannam@127 805 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
cannam@127 806 }
cannam@127 807
cannam@127 808 pln = plan_guru_rdft2(rnk,dims,howmany, in,out, comm, R2HC, flags);
cannam@127 809 X(ifree)(dims);
cannam@127 810 return pln;
cannam@127 811 }
cannam@127 812
cannam@127 813 X(plan) XM(plan_many_dft_c2r)(int rnk, const ptrdiff_t *n,
cannam@127 814 ptrdiff_t howmany,
cannam@127 815 ptrdiff_t iblock, ptrdiff_t oblock,
cannam@127 816 C *in, R *out,
cannam@127 817 MPI_Comm comm, unsigned flags)
cannam@127 818 {
cannam@127 819 XM(ddim) *dims = simple_dims(rnk, n);
cannam@127 820 X(plan) pln;
cannam@127 821
cannam@127 822 if (rnk == 1) {
cannam@127 823 dims[0].ib = iblock;
cannam@127 824 dims[0].ob = oblock;
cannam@127 825 }
cannam@127 826 else if (rnk > 1) {
cannam@127 827 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
cannam@127 828 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
cannam@127 829 }
cannam@127 830
cannam@127 831 pln = plan_guru_rdft2(rnk,dims,howmany, out,in, comm, HC2R, flags);
cannam@127 832 X(ifree)(dims);
cannam@127 833 return pln;
cannam@127 834 }
cannam@127 835
cannam@127 836 X(plan) XM(plan_dft_r2c)(int rnk, const ptrdiff_t *n, R *in, C *out,
cannam@127 837 MPI_Comm comm, unsigned flags)
cannam@127 838 {
cannam@127 839 return XM(plan_many_dft_r2c)(rnk, n, 1,
cannam@127 840 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 841 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 842 in, out, comm, flags);
cannam@127 843 }
cannam@127 844
cannam@127 845 X(plan) XM(plan_dft_r2c_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, C *out,
cannam@127 846 MPI_Comm comm, unsigned flags)
cannam@127 847 {
cannam@127 848 ptrdiff_t n[2];
cannam@127 849 n[0] = nx; n[1] = ny;
cannam@127 850 return XM(plan_dft_r2c)(2, n, in, out, comm, flags);
cannam@127 851 }
cannam@127 852
cannam@127 853 X(plan) XM(plan_dft_r2c_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
cannam@127 854 R *in, C *out, MPI_Comm comm, unsigned flags)
cannam@127 855 {
cannam@127 856 ptrdiff_t n[3];
cannam@127 857 n[0] = nx; n[1] = ny; n[2] = nz;
cannam@127 858 return XM(plan_dft_r2c)(3, n, in, out, comm, flags);
cannam@127 859 }
cannam@127 860
cannam@127 861 X(plan) XM(plan_dft_c2r)(int rnk, const ptrdiff_t *n, C *in, R *out,
cannam@127 862 MPI_Comm comm, unsigned flags)
cannam@127 863 {
cannam@127 864 return XM(plan_many_dft_c2r)(rnk, n, 1,
cannam@127 865 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 866 FFTW_MPI_DEFAULT_BLOCK,
cannam@127 867 in, out, comm, flags);
cannam@127 868 }
cannam@127 869
cannam@127 870 X(plan) XM(plan_dft_c2r_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, R *out,
cannam@127 871 MPI_Comm comm, unsigned flags)
cannam@127 872 {
cannam@127 873 ptrdiff_t n[2];
cannam@127 874 n[0] = nx; n[1] = ny;
cannam@127 875 return XM(plan_dft_c2r)(2, n, in, out, comm, flags);
cannam@127 876 }
cannam@127 877
cannam@127 878 X(plan) XM(plan_dft_c2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
cannam@127 879 C *in, R *out, MPI_Comm comm, unsigned flags)
cannam@127 880 {
cannam@127 881 ptrdiff_t n[3];
cannam@127 882 n[0] = nx; n[1] = ny; n[2] = nz;
cannam@127 883 return XM(plan_dft_c2r)(3, n, in, out, comm, flags);
cannam@127 884 }
cannam@127 885
cannam@127 886 /*************************************************************************/
cannam@127 887 /* New-array execute functions */
cannam@127 888
cannam@127 889 void XM(execute_dft)(const X(plan) p, C *in, C *out) {
cannam@127 890 /* internally, MPI plans are just rdft plans */
cannam@127 891 X(execute_r2r)(p, (R*) in, (R*) out);
cannam@127 892 }
cannam@127 893
cannam@127 894 void XM(execute_dft_r2c)(const X(plan) p, R *in, C *out) {
cannam@127 895 /* internally, MPI plans are just rdft plans */
cannam@127 896 X(execute_r2r)(p, in, (R*) out);
cannam@127 897 }
cannam@127 898
cannam@127 899 void XM(execute_dft_c2r)(const X(plan) p, C *in, R *out) {
cannam@127 900 /* internally, MPI plans are just rdft plans */
cannam@127 901 X(execute_r2r)(p, (R*) in, out);
cannam@127 902 }
cannam@127 903
cannam@127 904 void XM(execute_r2r)(const X(plan) p, R *in, R *out) {
cannam@127 905 /* internally, MPI plans are just rdft plans */
cannam@127 906 X(execute_r2r)(p, in, out);
cannam@127 907 }