annotate fft/fftw/fftw-3.3.4/mpi/api.c @ 40:223f770b5341 kissfft-double tip

Try a double-precision kissfft
author Chris Cannam
date Wed, 07 Sep 2016 10:40:32 +0100
parents 26056e866c29
children
rev   line source
Chris@19 1 /*
Chris@19 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@19 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@19 4 *
Chris@19 5 * This program is free software; you can redistribute it and/or modify
Chris@19 6 * it under the terms of the GNU General Public License as published by
Chris@19 7 * the Free Software Foundation; either version 2 of the License, or
Chris@19 8 * (at your option) any later version.
Chris@19 9 *
Chris@19 10 * This program is distributed in the hope that it will be useful,
Chris@19 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@19 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@19 13 * GNU General Public License for more details.
Chris@19 14 *
Chris@19 15 * You should have received a copy of the GNU General Public License
Chris@19 16 * along with this program; if not, write to the Free Software
Chris@19 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@19 18 *
Chris@19 19 */
Chris@19 20
Chris@19 21 #include "api.h"
Chris@19 22 #include "fftw3-mpi.h"
Chris@19 23 #include "ifftw-mpi.h"
Chris@19 24 #include "mpi-transpose.h"
Chris@19 25 #include "mpi-dft.h"
Chris@19 26 #include "mpi-rdft.h"
Chris@19 27 #include "mpi-rdft2.h"
Chris@19 28
Chris@19 29 /* Convert API flags to internal MPI flags. */
Chris@19 30 #define MPI_FLAGS(f) ((f) >> 27)
Chris@19 31
Chris@19 32 /*************************************************************************/
Chris@19 33
Chris@19 34 static int mpi_inited = 0;
Chris@19 35
Chris@19 36 static MPI_Comm problem_comm(const problem *p) {
Chris@19 37 switch (p->adt->problem_kind) {
Chris@19 38 case PROBLEM_MPI_DFT:
Chris@19 39 return ((const problem_mpi_dft *) p)->comm;
Chris@19 40 case PROBLEM_MPI_RDFT:
Chris@19 41 return ((const problem_mpi_rdft *) p)->comm;
Chris@19 42 case PROBLEM_MPI_RDFT2:
Chris@19 43 return ((const problem_mpi_rdft2 *) p)->comm;
Chris@19 44 case PROBLEM_MPI_TRANSPOSE:
Chris@19 45 return ((const problem_mpi_transpose *) p)->comm;
Chris@19 46 default:
Chris@19 47 return MPI_COMM_NULL;
Chris@19 48 }
Chris@19 49 }
Chris@19 50
Chris@19 51 /* used to synchronize cost measurements (timing or estimation)
Chris@19 52 across all processes for an MPI problem, which is critical to
Chris@19 53 ensure that all processes decide to use the same MPI plans
Chris@19 54 (whereas serial plans need not be syncronized). */
Chris@19 55 static double cost_hook(const problem *p, double t, cost_kind k)
Chris@19 56 {
Chris@19 57 MPI_Comm comm = problem_comm(p);
Chris@19 58 double tsum;
Chris@19 59 if (comm == MPI_COMM_NULL) return t;
Chris@19 60 MPI_Allreduce(&t, &tsum, 1, MPI_DOUBLE,
Chris@19 61 k == COST_SUM ? MPI_SUM : MPI_MAX, comm);
Chris@19 62 return tsum;
Chris@19 63 }
Chris@19 64
Chris@19 65 /* Used to reject wisdom that is not in sync across all processes
Chris@19 66 for an MPI problem, which is critical to ensure that all processes
Chris@19 67 decide to use the same MPI plans. (Even though costs are synchronized,
Chris@19 68 above, out-of-sync wisdom may result from plans being produced
Chris@19 69 by communicators that do not span all processes, either from a
Chris@19 70 user-specified communicator or e.g. from transpose-recurse. */
Chris@19 71 static int wisdom_ok_hook(const problem *p, flags_t flags)
Chris@19 72 {
Chris@19 73 MPI_Comm comm = problem_comm(p);
Chris@19 74 int eq_me, eq_all;
Chris@19 75 /* unpack flags bitfield, since MPI communications may involve
Chris@19 76 byte-order changes and MPI cannot do this for bit fields */
Chris@19 77 #if SIZEOF_UNSIGNED_INT >= 4 /* must be big enough to hold 20-bit fields */
Chris@19 78 unsigned int f[5];
Chris@19 79 #else
Chris@19 80 unsigned long f[5]; /* at least 32 bits as per C standard */
Chris@19 81 #endif
Chris@19 82
Chris@19 83 if (comm == MPI_COMM_NULL) return 1; /* non-MPI wisdom is always ok */
Chris@19 84
Chris@19 85 if (XM(any_true)(0, comm)) return 0; /* some process had nowisdom_hook */
Chris@19 86
Chris@19 87 /* otherwise, check that the flags and solver index are identical
Chris@19 88 on all processes in this problem's communicator.
Chris@19 89
Chris@19 90 TO DO: possibly we can relax strict equality, but it is
Chris@19 91 critical to ensure that any flags which affect what plan is
Chris@19 92 created (and whether the solver is applicable) are the same,
Chris@19 93 e.g. DESTROY_INPUT, NO_UGLY, etcetera. (If the MPI algorithm
Chris@19 94 differs between processes, deadlocks/crashes generally result.) */
Chris@19 95 f[0] = flags.l;
Chris@19 96 f[1] = flags.hash_info;
Chris@19 97 f[2] = flags.timelimit_impatience;
Chris@19 98 f[3] = flags.u;
Chris@19 99 f[4] = flags.slvndx;
Chris@19 100 MPI_Bcast(f, 5,
Chris@19 101 SIZEOF_UNSIGNED_INT >= 4 ? MPI_UNSIGNED : MPI_UNSIGNED_LONG,
Chris@19 102 0, comm);
Chris@19 103 eq_me = f[0] == flags.l && f[1] == flags.hash_info
Chris@19 104 && f[2] == flags.timelimit_impatience
Chris@19 105 && f[3] == flags.u && f[4] == flags.slvndx;
Chris@19 106 MPI_Allreduce(&eq_me, &eq_all, 1, MPI_INT, MPI_LAND, comm);
Chris@19 107 return eq_all;
Chris@19 108 }
Chris@19 109
Chris@19 110 /* This hook is called when wisdom is not found. The any_true here
Chris@19 111 matches up with the any_true in wisdom_ok_hook, in order to handle
Chris@19 112 the case where some processes had wisdom (and called wisdom_ok_hook)
Chris@19 113 and some processes didn't have wisdom (and called nowisdom_hook). */
Chris@19 114 static void nowisdom_hook(const problem *p)
Chris@19 115 {
Chris@19 116 MPI_Comm comm = problem_comm(p);
Chris@19 117 if (comm == MPI_COMM_NULL) return; /* nothing to do for non-MPI p */
Chris@19 118 XM(any_true)(1, comm); /* signal nowisdom to any wisdom_ok_hook */
Chris@19 119 }
Chris@19 120
Chris@19 121 /* needed to synchronize planner bogosity flag, in case non-MPI problems
Chris@19 122 on a subset of processes encountered bogus wisdom */
Chris@19 123 static wisdom_state_t bogosity_hook(wisdom_state_t state, const problem *p)
Chris@19 124 {
Chris@19 125 MPI_Comm comm = problem_comm(p);
Chris@19 126 if (comm != MPI_COMM_NULL /* an MPI problem */
Chris@19 127 && XM(any_true)(state == WISDOM_IS_BOGUS, comm)) /* bogus somewhere */
Chris@19 128 return WISDOM_IS_BOGUS;
Chris@19 129 return state;
Chris@19 130 }
Chris@19 131
Chris@19 132 void XM(init)(void)
Chris@19 133 {
Chris@19 134 if (!mpi_inited) {
Chris@19 135 planner *plnr = X(the_planner)();
Chris@19 136 plnr->cost_hook = cost_hook;
Chris@19 137 plnr->wisdom_ok_hook = wisdom_ok_hook;
Chris@19 138 plnr->nowisdom_hook = nowisdom_hook;
Chris@19 139 plnr->bogosity_hook = bogosity_hook;
Chris@19 140 XM(conf_standard)(plnr);
Chris@19 141 mpi_inited = 1;
Chris@19 142 }
Chris@19 143 }
Chris@19 144
Chris@19 145 void XM(cleanup)(void)
Chris@19 146 {
Chris@19 147 X(cleanup)();
Chris@19 148 mpi_inited = 0;
Chris@19 149 }
Chris@19 150
Chris@19 151 /*************************************************************************/
Chris@19 152
Chris@19 153 static dtensor *mkdtensor_api(int rnk, const XM(ddim) *dims0)
Chris@19 154 {
Chris@19 155 dtensor *x = XM(mkdtensor)(rnk);
Chris@19 156 int i;
Chris@19 157 for (i = 0; i < rnk; ++i) {
Chris@19 158 x->dims[i].n = dims0[i].n;
Chris@19 159 x->dims[i].b[IB] = dims0[i].ib;
Chris@19 160 x->dims[i].b[OB] = dims0[i].ob;
Chris@19 161 }
Chris@19 162 return x;
Chris@19 163 }
Chris@19 164
Chris@19 165 static dtensor *default_sz(int rnk, const XM(ddim) *dims0, int n_pes,
Chris@19 166 int rdft2)
Chris@19 167 {
Chris@19 168 dtensor *sz = XM(mkdtensor)(rnk);
Chris@19 169 dtensor *sz0 = mkdtensor_api(rnk, dims0);
Chris@19 170 block_kind k;
Chris@19 171 int i;
Chris@19 172
Chris@19 173 for (i = 0; i < rnk; ++i)
Chris@19 174 sz->dims[i].n = dims0[i].n;
Chris@19 175
Chris@19 176 if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1;
Chris@19 177
Chris@19 178 for (i = 0; i < rnk; ++i) {
Chris@19 179 sz->dims[i].b[IB] = dims0[i].ib ? dims0[i].ib : sz->dims[i].n;
Chris@19 180 sz->dims[i].b[OB] = dims0[i].ob ? dims0[i].ob : sz->dims[i].n;
Chris@19 181 }
Chris@19 182
Chris@19 183 /* If we haven't used all of the processes yet, and some of the
Chris@19 184 block sizes weren't specified (i.e. 0), then set the
Chris@19 185 unspecified blocks so as to use as many processes as
Chris@19 186 possible with as few distributed dimensions as possible. */
Chris@19 187 FORALL_BLOCK_KIND(k) {
Chris@19 188 INT nb = XM(num_blocks_total)(sz, k);
Chris@19 189 INT np = n_pes / nb;
Chris@19 190 for (i = 0; i < rnk && np > 1; ++i)
Chris@19 191 if (!sz0->dims[i].b[k]) {
Chris@19 192 sz->dims[i].b[k] = XM(default_block)(sz->dims[i].n, np);
Chris@19 193 nb *= XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[k]);
Chris@19 194 np = n_pes / nb;
Chris@19 195 }
Chris@19 196 }
Chris@19 197
Chris@19 198 if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n;
Chris@19 199
Chris@19 200 /* punt for 1d prime */
Chris@19 201 if (rnk == 1 && X(is_prime)(sz->dims[0].n))
Chris@19 202 sz->dims[0].b[IB] = sz->dims[0].b[OB] = sz->dims[0].n;
Chris@19 203
Chris@19 204 XM(dtensor_destroy)(sz0);
Chris@19 205 sz0 = XM(dtensor_canonical)(sz, 0);
Chris@19 206 XM(dtensor_destroy)(sz);
Chris@19 207 return sz0;
Chris@19 208 }
Chris@19 209
Chris@19 210 /* allocate simple local (serial) dims array corresponding to n[rnk] */
Chris@19 211 static XM(ddim) *simple_dims(int rnk, const ptrdiff_t *n)
Chris@19 212 {
Chris@19 213 XM(ddim) *dims = (XM(ddim) *) MALLOC(sizeof(XM(ddim)) * rnk,
Chris@19 214 TENSORS);
Chris@19 215 int i;
Chris@19 216 for (i = 0; i < rnk; ++i)
Chris@19 217 dims[i].n = dims[i].ib = dims[i].ob = n[i];
Chris@19 218 return dims;
Chris@19 219 }
Chris@19 220
Chris@19 221 /*************************************************************************/
Chris@19 222
Chris@19 223 static void local_size(int my_pe, const dtensor *sz, block_kind k,
Chris@19 224 ptrdiff_t *local_n, ptrdiff_t *local_start)
Chris@19 225 {
Chris@19 226 int i;
Chris@19 227 if (my_pe >= XM(num_blocks_total)(sz, k))
Chris@19 228 for (i = 0; i < sz->rnk; ++i)
Chris@19 229 local_n[i] = local_start[i] = 0;
Chris@19 230 else {
Chris@19 231 XM(block_coords)(sz, k, my_pe, local_start);
Chris@19 232 for (i = 0; i < sz->rnk; ++i) {
Chris@19 233 local_n[i] = XM(block)(sz->dims[i].n, sz->dims[i].b[k],
Chris@19 234 local_start[i]);
Chris@19 235 local_start[i] *= sz->dims[i].b[k];
Chris@19 236 }
Chris@19 237 }
Chris@19 238 }
Chris@19 239
Chris@19 240 static INT prod(int rnk, const ptrdiff_t *local_n)
Chris@19 241 {
Chris@19 242 int i;
Chris@19 243 INT N = 1;
Chris@19 244 for (i = 0; i < rnk; ++i) N *= local_n[i];
Chris@19 245 return N;
Chris@19 246 }
Chris@19 247
Chris@19 248 ptrdiff_t XM(local_size_guru)(int rnk, const XM(ddim) *dims0,
Chris@19 249 ptrdiff_t howmany, MPI_Comm comm,
Chris@19 250 ptrdiff_t *local_n_in,
Chris@19 251 ptrdiff_t *local_start_in,
Chris@19 252 ptrdiff_t *local_n_out,
Chris@19 253 ptrdiff_t *local_start_out,
Chris@19 254 int sign, unsigned flags)
Chris@19 255 {
Chris@19 256 INT N;
Chris@19 257 int my_pe, n_pes, i;
Chris@19 258 dtensor *sz;
Chris@19 259
Chris@19 260 if (rnk == 0)
Chris@19 261 return howmany;
Chris@19 262
Chris@19 263 MPI_Comm_rank(comm, &my_pe);
Chris@19 264 MPI_Comm_size(comm, &n_pes);
Chris@19 265 sz = default_sz(rnk, dims0, n_pes, 0);
Chris@19 266
Chris@19 267 /* Now, we must figure out how much local space the user should
Chris@19 268 allocate (or at least an upper bound). This depends strongly
Chris@19 269 on the exact algorithms we employ...ugh! FIXME: get this info
Chris@19 270 from the solvers somehow? */
Chris@19 271 N = 1; /* never return zero allocation size */
Chris@19 272 if (rnk > 1 && XM(is_block1d)(sz, IB) && XM(is_block1d)(sz, OB)) {
Chris@19 273 INT Nafter;
Chris@19 274 ddim odims[2];
Chris@19 275
Chris@19 276 /* dft-rank-geq2-transposed */
Chris@19 277 odims[0] = sz->dims[0]; odims[1] = sz->dims[1]; /* save */
Chris@19 278 /* we may need extra space for transposed intermediate data */
Chris@19 279 for (i = 0; i < 2; ++i)
Chris@19 280 if (XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[IB]) == 1 &&
Chris@19 281 XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[OB]) == 1) {
Chris@19 282 sz->dims[i].b[IB]
Chris@19 283 = XM(default_block)(sz->dims[i].n, n_pes);
Chris@19 284 sz->dims[1-i].b[IB] = sz->dims[1-i].n;
Chris@19 285 local_size(my_pe, sz, IB, local_n_in, local_start_in);
Chris@19 286 N = X(imax)(N, prod(rnk, local_n_in));
Chris@19 287 sz->dims[i] = odims[i];
Chris@19 288 sz->dims[1-i] = odims[1-i];
Chris@19 289 break;
Chris@19 290 }
Chris@19 291
Chris@19 292 /* dft-rank-geq2 */
Chris@19 293 Nafter = howmany;
Chris@19 294 for (i = 1; i < sz->rnk; ++i) Nafter *= sz->dims[i].n;
Chris@19 295 N = X(imax)(N, (sz->dims[0].n
Chris@19 296 * XM(block)(Nafter, XM(default_block)(Nafter, n_pes),
Chris@19 297 my_pe) + howmany - 1) / howmany);
Chris@19 298
Chris@19 299 /* dft-rank-geq2 with dimensions swapped */
Chris@19 300 Nafter = howmany * sz->dims[0].n;
Chris@19 301 for (i = 2; i < sz->rnk; ++i) Nafter *= sz->dims[i].n;
Chris@19 302 N = X(imax)(N, (sz->dims[1].n
Chris@19 303 * XM(block)(Nafter, XM(default_block)(Nafter, n_pes),
Chris@19 304 my_pe) + howmany - 1) / howmany);
Chris@19 305 }
Chris@19 306 else if (rnk == 1) {
Chris@19 307 if (howmany >= n_pes && !MPI_FLAGS(flags)) { /* dft-rank1-bigvec */
Chris@19 308 ptrdiff_t n[2], start[2];
Chris@19 309 dtensor *sz2 = XM(mkdtensor)(2);
Chris@19 310 sz2->dims[0] = sz->dims[0];
Chris@19 311 sz2->dims[0].b[IB] = sz->dims[0].n;
Chris@19 312 sz2->dims[1].n = sz2->dims[1].b[OB] = howmany;
Chris@19 313 sz2->dims[1].b[IB] = XM(default_block)(howmany, n_pes);
Chris@19 314 local_size(my_pe, sz2, IB, n, start);
Chris@19 315 XM(dtensor_destroy)(sz2);
Chris@19 316 N = X(imax)(N, (prod(2, n) + howmany - 1) / howmany);
Chris@19 317 }
Chris@19 318 else { /* dft-rank1 */
Chris@19 319 INT r, m, rblock[2], mblock[2];
Chris@19 320
Chris@19 321 /* Since the 1d transforms are so different, we require
Chris@19 322 the user to call local_size_1d for this case. Ugh. */
Chris@19 323 CK(sign == FFTW_FORWARD || sign == FFTW_BACKWARD);
Chris@19 324
Chris@19 325 if ((r = XM(choose_radix)(sz->dims[0], n_pes, flags, sign,
Chris@19 326 rblock, mblock))) {
Chris@19 327 m = sz->dims[0].n / r;
Chris@19 328 if (flags & FFTW_MPI_SCRAMBLED_IN)
Chris@19 329 sz->dims[0].b[IB] = rblock[IB] * m;
Chris@19 330 else { /* !SCRAMBLED_IN */
Chris@19 331 sz->dims[0].b[IB] = r * mblock[IB];
Chris@19 332 N = X(imax)(N, rblock[IB] * m);
Chris@19 333 }
Chris@19 334 if (flags & FFTW_MPI_SCRAMBLED_OUT)
Chris@19 335 sz->dims[0].b[OB] = r * mblock[OB];
Chris@19 336 else { /* !SCRAMBLED_OUT */
Chris@19 337 N = X(imax)(N, r * mblock[OB]);
Chris@19 338 sz->dims[0].b[OB] = rblock[OB] * m;
Chris@19 339 }
Chris@19 340 }
Chris@19 341 }
Chris@19 342 }
Chris@19 343
Chris@19 344 local_size(my_pe, sz, IB, local_n_in, local_start_in);
Chris@19 345 local_size(my_pe, sz, OB, local_n_out, local_start_out);
Chris@19 346
Chris@19 347 /* at least, make sure we have enough space to store input & output */
Chris@19 348 N = X(imax)(N, X(imax)(prod(rnk, local_n_in), prod(rnk, local_n_out)));
Chris@19 349
Chris@19 350 XM(dtensor_destroy)(sz);
Chris@19 351 return N * howmany;
Chris@19 352 }
Chris@19 353
Chris@19 354 ptrdiff_t XM(local_size_many_transposed)(int rnk, const ptrdiff_t *n,
Chris@19 355 ptrdiff_t howmany,
Chris@19 356 ptrdiff_t xblock, ptrdiff_t yblock,
Chris@19 357 MPI_Comm comm,
Chris@19 358 ptrdiff_t *local_nx,
Chris@19 359 ptrdiff_t *local_x_start,
Chris@19 360 ptrdiff_t *local_ny,
Chris@19 361 ptrdiff_t *local_y_start)
Chris@19 362 {
Chris@19 363 ptrdiff_t N;
Chris@19 364 XM(ddim) *dims;
Chris@19 365 ptrdiff_t *local;
Chris@19 366
Chris@19 367 if (rnk == 0) {
Chris@19 368 *local_nx = *local_ny = 1;
Chris@19 369 *local_x_start = *local_y_start = 0;
Chris@19 370 return howmany;
Chris@19 371 }
Chris@19 372
Chris@19 373 dims = simple_dims(rnk, n);
Chris@19 374 local = (ptrdiff_t *) MALLOC(sizeof(ptrdiff_t) * rnk * 4, TENSORS);
Chris@19 375
Chris@19 376 /* default 1d block distribution, with transposed output
Chris@19 377 if yblock < n[1] */
Chris@19 378 dims[0].ib = xblock;
Chris@19 379 if (rnk > 1) {
Chris@19 380 if (yblock < n[1])
Chris@19 381 dims[1].ob = yblock;
Chris@19 382 else
Chris@19 383 dims[0].ob = xblock;
Chris@19 384 }
Chris@19 385 else
Chris@19 386 dims[0].ob = xblock; /* FIXME: 1d not really supported here
Chris@19 387 since we don't have flags/sign */
Chris@19 388
Chris@19 389 N = XM(local_size_guru)(rnk, dims, howmany, comm,
Chris@19 390 local, local + rnk,
Chris@19 391 local + 2*rnk, local + 3*rnk,
Chris@19 392 0, 0);
Chris@19 393 *local_nx = local[0];
Chris@19 394 *local_x_start = local[rnk];
Chris@19 395 if (rnk > 1) {
Chris@19 396 *local_ny = local[2*rnk + 1];
Chris@19 397 *local_y_start = local[3*rnk + 1];
Chris@19 398 }
Chris@19 399 else {
Chris@19 400 *local_ny = *local_nx;
Chris@19 401 *local_y_start = *local_x_start;
Chris@19 402 }
Chris@19 403 X(ifree)(local);
Chris@19 404 X(ifree)(dims);
Chris@19 405 return N;
Chris@19 406 }
Chris@19 407
Chris@19 408 ptrdiff_t XM(local_size_many)(int rnk, const ptrdiff_t *n,
Chris@19 409 ptrdiff_t howmany,
Chris@19 410 ptrdiff_t xblock,
Chris@19 411 MPI_Comm comm,
Chris@19 412 ptrdiff_t *local_nx,
Chris@19 413 ptrdiff_t *local_x_start)
Chris@19 414 {
Chris@19 415 ptrdiff_t local_ny, local_y_start;
Chris@19 416 return XM(local_size_many_transposed)(rnk, n, howmany,
Chris@19 417 xblock, rnk > 1
Chris@19 418 ? n[1] : FFTW_MPI_DEFAULT_BLOCK,
Chris@19 419 comm,
Chris@19 420 local_nx, local_x_start,
Chris@19 421 &local_ny, &local_y_start);
Chris@19 422 }
Chris@19 423
Chris@19 424
Chris@19 425 ptrdiff_t XM(local_size_transposed)(int rnk, const ptrdiff_t *n,
Chris@19 426 MPI_Comm comm,
Chris@19 427 ptrdiff_t *local_nx,
Chris@19 428 ptrdiff_t *local_x_start,
Chris@19 429 ptrdiff_t *local_ny,
Chris@19 430 ptrdiff_t *local_y_start)
Chris@19 431 {
Chris@19 432 return XM(local_size_many_transposed)(rnk, n, 1,
Chris@19 433 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 434 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 435 comm,
Chris@19 436 local_nx, local_x_start,
Chris@19 437 local_ny, local_y_start);
Chris@19 438 }
Chris@19 439
Chris@19 440 ptrdiff_t XM(local_size)(int rnk, const ptrdiff_t *n,
Chris@19 441 MPI_Comm comm,
Chris@19 442 ptrdiff_t *local_nx,
Chris@19 443 ptrdiff_t *local_x_start)
Chris@19 444 {
Chris@19 445 return XM(local_size_many)(rnk, n, 1, FFTW_MPI_DEFAULT_BLOCK, comm,
Chris@19 446 local_nx, local_x_start);
Chris@19 447 }
Chris@19 448
Chris@19 449 ptrdiff_t XM(local_size_many_1d)(ptrdiff_t nx, ptrdiff_t howmany,
Chris@19 450 MPI_Comm comm, int sign, unsigned flags,
Chris@19 451 ptrdiff_t *local_nx, ptrdiff_t *local_x_start,
Chris@19 452 ptrdiff_t *local_ny, ptrdiff_t *local_y_start)
Chris@19 453 {
Chris@19 454 XM(ddim) d;
Chris@19 455 d.n = nx;
Chris@19 456 d.ib = d.ob = FFTW_MPI_DEFAULT_BLOCK;
Chris@19 457 return XM(local_size_guru)(1, &d, howmany, comm,
Chris@19 458 local_nx, local_x_start,
Chris@19 459 local_ny, local_y_start, sign, flags);
Chris@19 460 }
Chris@19 461
Chris@19 462 ptrdiff_t XM(local_size_1d)(ptrdiff_t nx,
Chris@19 463 MPI_Comm comm, int sign, unsigned flags,
Chris@19 464 ptrdiff_t *local_nx, ptrdiff_t *local_x_start,
Chris@19 465 ptrdiff_t *local_ny, ptrdiff_t *local_y_start)
Chris@19 466 {
Chris@19 467 return XM(local_size_many_1d)(nx, 1, comm, sign, flags,
Chris@19 468 local_nx, local_x_start,
Chris@19 469 local_ny, local_y_start);
Chris@19 470 }
Chris@19 471
Chris@19 472 ptrdiff_t XM(local_size_2d_transposed)(ptrdiff_t nx, ptrdiff_t ny,
Chris@19 473 MPI_Comm comm,
Chris@19 474 ptrdiff_t *local_nx,
Chris@19 475 ptrdiff_t *local_x_start,
Chris@19 476 ptrdiff_t *local_ny,
Chris@19 477 ptrdiff_t *local_y_start)
Chris@19 478 {
Chris@19 479 ptrdiff_t n[2];
Chris@19 480 n[0] = nx; n[1] = ny;
Chris@19 481 return XM(local_size_transposed)(2, n, comm,
Chris@19 482 local_nx, local_x_start,
Chris@19 483 local_ny, local_y_start);
Chris@19 484 }
Chris@19 485
Chris@19 486 ptrdiff_t XM(local_size_2d)(ptrdiff_t nx, ptrdiff_t ny, MPI_Comm comm,
Chris@19 487 ptrdiff_t *local_nx, ptrdiff_t *local_x_start)
Chris@19 488 {
Chris@19 489 ptrdiff_t n[2];
Chris@19 490 n[0] = nx; n[1] = ny;
Chris@19 491 return XM(local_size)(2, n, comm, local_nx, local_x_start);
Chris@19 492 }
Chris@19 493
Chris@19 494 ptrdiff_t XM(local_size_3d_transposed)(ptrdiff_t nx, ptrdiff_t ny,
Chris@19 495 ptrdiff_t nz,
Chris@19 496 MPI_Comm comm,
Chris@19 497 ptrdiff_t *local_nx,
Chris@19 498 ptrdiff_t *local_x_start,
Chris@19 499 ptrdiff_t *local_ny,
Chris@19 500 ptrdiff_t *local_y_start)
Chris@19 501 {
Chris@19 502 ptrdiff_t n[3];
Chris@19 503 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@19 504 return XM(local_size_transposed)(3, n, comm,
Chris@19 505 local_nx, local_x_start,
Chris@19 506 local_ny, local_y_start);
Chris@19 507 }
Chris@19 508
Chris@19 509 ptrdiff_t XM(local_size_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@19 510 MPI_Comm comm,
Chris@19 511 ptrdiff_t *local_nx, ptrdiff_t *local_x_start)
Chris@19 512 {
Chris@19 513 ptrdiff_t n[3];
Chris@19 514 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@19 515 return XM(local_size)(3, n, comm, local_nx, local_x_start);
Chris@19 516 }
Chris@19 517
Chris@19 518 /*************************************************************************/
Chris@19 519 /* Transpose API */
Chris@19 520
Chris@19 521 X(plan) XM(plan_many_transpose)(ptrdiff_t nx, ptrdiff_t ny,
Chris@19 522 ptrdiff_t howmany,
Chris@19 523 ptrdiff_t xblock, ptrdiff_t yblock,
Chris@19 524 R *in, R *out,
Chris@19 525 MPI_Comm comm, unsigned flags)
Chris@19 526 {
Chris@19 527 int n_pes;
Chris@19 528 XM(init)();
Chris@19 529
Chris@19 530 if (howmany < 0 || xblock < 0 || yblock < 0 ||
Chris@19 531 nx <= 0 || ny <= 0) return 0;
Chris@19 532
Chris@19 533 MPI_Comm_size(comm, &n_pes);
Chris@19 534 if (!xblock) xblock = XM(default_block)(nx, n_pes);
Chris@19 535 if (!yblock) yblock = XM(default_block)(ny, n_pes);
Chris@19 536 if (n_pes < XM(num_blocks)(nx, xblock)
Chris@19 537 || n_pes < XM(num_blocks)(ny, yblock))
Chris@19 538 return 0;
Chris@19 539
Chris@19 540 return
Chris@19 541 X(mkapiplan)(FFTW_FORWARD, flags,
Chris@19 542 XM(mkproblem_transpose)(nx, ny, howmany,
Chris@19 543 in, out, xblock, yblock,
Chris@19 544 comm, MPI_FLAGS(flags)));
Chris@19 545 }
Chris@19 546
Chris@19 547 X(plan) XM(plan_transpose)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out,
Chris@19 548 MPI_Comm comm, unsigned flags)
Chris@19 549
Chris@19 550 {
Chris@19 551 return XM(plan_many_transpose)(nx, ny, 1,
Chris@19 552 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 553 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 554 in, out, comm, flags);
Chris@19 555 }
Chris@19 556
Chris@19 557 /*************************************************************************/
Chris@19 558 /* Complex DFT API */
Chris@19 559
Chris@19 560 X(plan) XM(plan_guru_dft)(int rnk, const XM(ddim) *dims0,
Chris@19 561 ptrdiff_t howmany,
Chris@19 562 C *in, C *out,
Chris@19 563 MPI_Comm comm, int sign, unsigned flags)
Chris@19 564 {
Chris@19 565 int n_pes, i;
Chris@19 566 dtensor *sz;
Chris@19 567
Chris@19 568 XM(init)();
Chris@19 569
Chris@19 570 if (howmany < 0 || rnk < 1) return 0;
Chris@19 571 for (i = 0; i < rnk; ++i)
Chris@19 572 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
Chris@19 573 return 0;
Chris@19 574
Chris@19 575 MPI_Comm_size(comm, &n_pes);
Chris@19 576 sz = default_sz(rnk, dims0, n_pes, 0);
Chris@19 577
Chris@19 578 if (XM(num_blocks_total)(sz, IB) > n_pes
Chris@19 579 || XM(num_blocks_total)(sz, OB) > n_pes) {
Chris@19 580 XM(dtensor_destroy)(sz);
Chris@19 581 return 0;
Chris@19 582 }
Chris@19 583
Chris@19 584 return
Chris@19 585 X(mkapiplan)(sign, flags,
Chris@19 586 XM(mkproblem_dft_d)(sz, howmany,
Chris@19 587 (R *) in, (R *) out,
Chris@19 588 comm, sign,
Chris@19 589 MPI_FLAGS(flags)));
Chris@19 590 }
Chris@19 591
Chris@19 592 X(plan) XM(plan_many_dft)(int rnk, const ptrdiff_t *n,
Chris@19 593 ptrdiff_t howmany,
Chris@19 594 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@19 595 C *in, C *out,
Chris@19 596 MPI_Comm comm, int sign, unsigned flags)
Chris@19 597 {
Chris@19 598 XM(ddim) *dims = simple_dims(rnk, n);
Chris@19 599 X(plan) pln;
Chris@19 600
Chris@19 601 if (rnk == 1) {
Chris@19 602 dims[0].ib = iblock;
Chris@19 603 dims[0].ob = oblock;
Chris@19 604 }
Chris@19 605 else if (rnk > 1) {
Chris@19 606 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@19 607 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@19 608 }
Chris@19 609
Chris@19 610 pln = XM(plan_guru_dft)(rnk,dims,howmany, in,out, comm, sign, flags);
Chris@19 611 X(ifree)(dims);
Chris@19 612 return pln;
Chris@19 613 }
Chris@19 614
Chris@19 615 X(plan) XM(plan_dft)(int rnk, const ptrdiff_t *n, C *in, C *out,
Chris@19 616 MPI_Comm comm, int sign, unsigned flags)
Chris@19 617 {
Chris@19 618 return XM(plan_many_dft)(rnk, n, 1,
Chris@19 619 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 620 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 621 in, out, comm, sign, flags);
Chris@19 622 }
Chris@19 623
Chris@19 624 X(plan) XM(plan_dft_1d)(ptrdiff_t nx, C *in, C *out,
Chris@19 625 MPI_Comm comm, int sign, unsigned flags)
Chris@19 626 {
Chris@19 627 return XM(plan_dft)(1, &nx, in, out, comm, sign, flags);
Chris@19 628 }
Chris@19 629
Chris@19 630 X(plan) XM(plan_dft_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, C *out,
Chris@19 631 MPI_Comm comm, int sign, unsigned flags)
Chris@19 632 {
Chris@19 633 ptrdiff_t n[2];
Chris@19 634 n[0] = nx; n[1] = ny;
Chris@19 635 return XM(plan_dft)(2, n, in, out, comm, sign, flags);
Chris@19 636 }
Chris@19 637
Chris@19 638 X(plan) XM(plan_dft_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@19 639 C *in, C *out,
Chris@19 640 MPI_Comm comm, int sign, unsigned flags)
Chris@19 641 {
Chris@19 642 ptrdiff_t n[3];
Chris@19 643 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@19 644 return XM(plan_dft)(3, n, in, out, comm, sign, flags);
Chris@19 645 }
Chris@19 646
Chris@19 647 /*************************************************************************/
Chris@19 648 /* R2R API */
Chris@19 649
Chris@19 650 X(plan) XM(plan_guru_r2r)(int rnk, const XM(ddim) *dims0,
Chris@19 651 ptrdiff_t howmany,
Chris@19 652 R *in, R *out,
Chris@19 653 MPI_Comm comm, const X(r2r_kind) *kind,
Chris@19 654 unsigned flags)
Chris@19 655 {
Chris@19 656 int n_pes, i;
Chris@19 657 dtensor *sz;
Chris@19 658 rdft_kind *k;
Chris@19 659 X(plan) pln;
Chris@19 660
Chris@19 661 XM(init)();
Chris@19 662
Chris@19 663 if (howmany < 0 || rnk < 1) return 0;
Chris@19 664 for (i = 0; i < rnk; ++i)
Chris@19 665 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
Chris@19 666 return 0;
Chris@19 667
Chris@19 668 k = X(map_r2r_kind)(rnk, kind);
Chris@19 669
Chris@19 670 MPI_Comm_size(comm, &n_pes);
Chris@19 671 sz = default_sz(rnk, dims0, n_pes, 0);
Chris@19 672
Chris@19 673 if (XM(num_blocks_total)(sz, IB) > n_pes
Chris@19 674 || XM(num_blocks_total)(sz, OB) > n_pes) {
Chris@19 675 XM(dtensor_destroy)(sz);
Chris@19 676 return 0;
Chris@19 677 }
Chris@19 678
Chris@19 679 pln = X(mkapiplan)(0, flags,
Chris@19 680 XM(mkproblem_rdft_d)(sz, howmany,
Chris@19 681 in, out,
Chris@19 682 comm, k, MPI_FLAGS(flags)));
Chris@19 683 X(ifree0)(k);
Chris@19 684 return pln;
Chris@19 685 }
Chris@19 686
Chris@19 687 X(plan) XM(plan_many_r2r)(int rnk, const ptrdiff_t *n,
Chris@19 688 ptrdiff_t howmany,
Chris@19 689 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@19 690 R *in, R *out,
Chris@19 691 MPI_Comm comm, const X(r2r_kind) *kind,
Chris@19 692 unsigned flags)
Chris@19 693 {
Chris@19 694 XM(ddim) *dims = simple_dims(rnk, n);
Chris@19 695 X(plan) pln;
Chris@19 696
Chris@19 697 if (rnk == 1) {
Chris@19 698 dims[0].ib = iblock;
Chris@19 699 dims[0].ob = oblock;
Chris@19 700 }
Chris@19 701 else if (rnk > 1) {
Chris@19 702 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@19 703 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@19 704 }
Chris@19 705
Chris@19 706 pln = XM(plan_guru_r2r)(rnk,dims,howmany, in,out, comm, kind, flags);
Chris@19 707 X(ifree)(dims);
Chris@19 708 return pln;
Chris@19 709 }
Chris@19 710
Chris@19 711 X(plan) XM(plan_r2r)(int rnk, const ptrdiff_t *n, R *in, R *out,
Chris@19 712 MPI_Comm comm,
Chris@19 713 const X(r2r_kind) *kind,
Chris@19 714 unsigned flags)
Chris@19 715 {
Chris@19 716 return XM(plan_many_r2r)(rnk, n, 1,
Chris@19 717 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 718 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 719 in, out, comm, kind, flags);
Chris@19 720 }
Chris@19 721
Chris@19 722 X(plan) XM(plan_r2r_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out,
Chris@19 723 MPI_Comm comm,
Chris@19 724 X(r2r_kind) kindx, X(r2r_kind) kindy,
Chris@19 725 unsigned flags)
Chris@19 726 {
Chris@19 727 ptrdiff_t n[2];
Chris@19 728 X(r2r_kind) kind[2];
Chris@19 729 n[0] = nx; n[1] = ny;
Chris@19 730 kind[0] = kindx; kind[1] = kindy;
Chris@19 731 return XM(plan_r2r)(2, n, in, out, comm, kind, flags);
Chris@19 732 }
Chris@19 733
Chris@19 734 X(plan) XM(plan_r2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@19 735 R *in, R *out,
Chris@19 736 MPI_Comm comm,
Chris@19 737 X(r2r_kind) kindx, X(r2r_kind) kindy,
Chris@19 738 X(r2r_kind) kindz,
Chris@19 739 unsigned flags)
Chris@19 740 {
Chris@19 741 ptrdiff_t n[3];
Chris@19 742 X(r2r_kind) kind[3];
Chris@19 743 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@19 744 kind[0] = kindx; kind[1] = kindy; kind[2] = kindz;
Chris@19 745 return XM(plan_r2r)(3, n, in, out, comm, kind, flags);
Chris@19 746 }
Chris@19 747
Chris@19 748 /*************************************************************************/
Chris@19 749 /* R2C/C2R API */
Chris@19 750
Chris@19 751 static X(plan) plan_guru_rdft2(int rnk, const XM(ddim) *dims0,
Chris@19 752 ptrdiff_t howmany,
Chris@19 753 R *r, C *c,
Chris@19 754 MPI_Comm comm, rdft_kind kind, unsigned flags)
Chris@19 755 {
Chris@19 756 int n_pes, i;
Chris@19 757 dtensor *sz;
Chris@19 758 R *cr = (R *) c;
Chris@19 759
Chris@19 760 XM(init)();
Chris@19 761
Chris@19 762 if (howmany < 0 || rnk < 2) return 0;
Chris@19 763 for (i = 0; i < rnk; ++i)
Chris@19 764 if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0)
Chris@19 765 return 0;
Chris@19 766
Chris@19 767 MPI_Comm_size(comm, &n_pes);
Chris@19 768 sz = default_sz(rnk, dims0, n_pes, 1);
Chris@19 769
Chris@19 770 sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1;
Chris@19 771 if (XM(num_blocks_total)(sz, IB) > n_pes
Chris@19 772 || XM(num_blocks_total)(sz, OB) > n_pes) {
Chris@19 773 XM(dtensor_destroy)(sz);
Chris@19 774 return 0;
Chris@19 775 }
Chris@19 776 sz->dims[rnk-1].n = dims0[rnk-1].n;
Chris@19 777
Chris@19 778 if (kind == R2HC)
Chris@19 779 return X(mkapiplan)(0, flags,
Chris@19 780 XM(mkproblem_rdft2_d)(sz, howmany,
Chris@19 781 r, cr, comm, R2HC,
Chris@19 782 MPI_FLAGS(flags)));
Chris@19 783 else
Chris@19 784 return X(mkapiplan)(0, flags,
Chris@19 785 XM(mkproblem_rdft2_d)(sz, howmany,
Chris@19 786 cr, r, comm, HC2R,
Chris@19 787 MPI_FLAGS(flags)));
Chris@19 788 }
Chris@19 789
Chris@19 790 X(plan) XM(plan_many_dft_r2c)(int rnk, const ptrdiff_t *n,
Chris@19 791 ptrdiff_t howmany,
Chris@19 792 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@19 793 R *in, C *out,
Chris@19 794 MPI_Comm comm, unsigned flags)
Chris@19 795 {
Chris@19 796 XM(ddim) *dims = simple_dims(rnk, n);
Chris@19 797 X(plan) pln;
Chris@19 798
Chris@19 799 if (rnk == 1) {
Chris@19 800 dims[0].ib = iblock;
Chris@19 801 dims[0].ob = oblock;
Chris@19 802 }
Chris@19 803 else if (rnk > 1) {
Chris@19 804 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@19 805 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@19 806 }
Chris@19 807
Chris@19 808 pln = plan_guru_rdft2(rnk,dims,howmany, in,out, comm, R2HC, flags);
Chris@19 809 X(ifree)(dims);
Chris@19 810 return pln;
Chris@19 811 }
Chris@19 812
Chris@19 813 X(plan) XM(plan_many_dft_c2r)(int rnk, const ptrdiff_t *n,
Chris@19 814 ptrdiff_t howmany,
Chris@19 815 ptrdiff_t iblock, ptrdiff_t oblock,
Chris@19 816 C *in, R *out,
Chris@19 817 MPI_Comm comm, unsigned flags)
Chris@19 818 {
Chris@19 819 XM(ddim) *dims = simple_dims(rnk, n);
Chris@19 820 X(plan) pln;
Chris@19 821
Chris@19 822 if (rnk == 1) {
Chris@19 823 dims[0].ib = iblock;
Chris@19 824 dims[0].ob = oblock;
Chris@19 825 }
Chris@19 826 else if (rnk > 1) {
Chris@19 827 dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock;
Chris@19 828 dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock;
Chris@19 829 }
Chris@19 830
Chris@19 831 pln = plan_guru_rdft2(rnk,dims,howmany, out,in, comm, HC2R, flags);
Chris@19 832 X(ifree)(dims);
Chris@19 833 return pln;
Chris@19 834 }
Chris@19 835
Chris@19 836 X(plan) XM(plan_dft_r2c)(int rnk, const ptrdiff_t *n, R *in, C *out,
Chris@19 837 MPI_Comm comm, unsigned flags)
Chris@19 838 {
Chris@19 839 return XM(plan_many_dft_r2c)(rnk, n, 1,
Chris@19 840 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 841 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 842 in, out, comm, flags);
Chris@19 843 }
Chris@19 844
Chris@19 845 X(plan) XM(plan_dft_r2c_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, C *out,
Chris@19 846 MPI_Comm comm, unsigned flags)
Chris@19 847 {
Chris@19 848 ptrdiff_t n[2];
Chris@19 849 n[0] = nx; n[1] = ny;
Chris@19 850 return XM(plan_dft_r2c)(2, n, in, out, comm, flags);
Chris@19 851 }
Chris@19 852
Chris@19 853 X(plan) XM(plan_dft_r2c_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@19 854 R *in, C *out, MPI_Comm comm, unsigned flags)
Chris@19 855 {
Chris@19 856 ptrdiff_t n[3];
Chris@19 857 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@19 858 return XM(plan_dft_r2c)(3, n, in, out, comm, flags);
Chris@19 859 }
Chris@19 860
Chris@19 861 X(plan) XM(plan_dft_c2r)(int rnk, const ptrdiff_t *n, C *in, R *out,
Chris@19 862 MPI_Comm comm, unsigned flags)
Chris@19 863 {
Chris@19 864 return XM(plan_many_dft_c2r)(rnk, n, 1,
Chris@19 865 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 866 FFTW_MPI_DEFAULT_BLOCK,
Chris@19 867 in, out, comm, flags);
Chris@19 868 }
Chris@19 869
Chris@19 870 X(plan) XM(plan_dft_c2r_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, R *out,
Chris@19 871 MPI_Comm comm, unsigned flags)
Chris@19 872 {
Chris@19 873 ptrdiff_t n[2];
Chris@19 874 n[0] = nx; n[1] = ny;
Chris@19 875 return XM(plan_dft_c2r)(2, n, in, out, comm, flags);
Chris@19 876 }
Chris@19 877
Chris@19 878 X(plan) XM(plan_dft_c2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz,
Chris@19 879 C *in, R *out, MPI_Comm comm, unsigned flags)
Chris@19 880 {
Chris@19 881 ptrdiff_t n[3];
Chris@19 882 n[0] = nx; n[1] = ny; n[2] = nz;
Chris@19 883 return XM(plan_dft_c2r)(3, n, in, out, comm, flags);
Chris@19 884 }
Chris@19 885
Chris@19 886 /*************************************************************************/
Chris@19 887 /* New-array execute functions */
Chris@19 888
Chris@19 889 void XM(execute_dft)(const X(plan) p, C *in, C *out) {
Chris@19 890 /* internally, MPI plans are just rdft plans */
Chris@19 891 X(execute_r2r)(p, (R*) in, (R*) out);
Chris@19 892 }
Chris@19 893
Chris@19 894 void XM(execute_dft_r2c)(const X(plan) p, R *in, C *out) {
Chris@19 895 /* internally, MPI plans are just rdft plans */
Chris@19 896 X(execute_r2r)(p, in, (R*) out);
Chris@19 897 }
Chris@19 898
Chris@19 899 void XM(execute_dft_c2r)(const X(plan) p, C *in, R *out) {
Chris@19 900 /* internally, MPI plans are just rdft plans */
Chris@19 901 X(execute_r2r)(p, (R*) in, out);
Chris@19 902 }
Chris@19 903
Chris@19 904 void XM(execute_r2r)(const X(plan) p, R *in, R *out) {
Chris@19 905 /* internally, MPI plans are just rdft plans */
Chris@19 906 X(execute_r2r)(p, in, out);
Chris@19 907 }