annotate src/fftw-3.3.5/mpi/ifftw-mpi.h @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* FFTW-MPI internal header file */
Chris@42 22 #ifndef __IFFTW_MPI_H__
Chris@42 23 #define __IFFTW_MPI_H__
Chris@42 24
Chris@42 25 #include "ifftw.h"
Chris@42 26 #include "rdft.h"
Chris@42 27
Chris@42 28 #include <mpi.h>
Chris@42 29
Chris@42 30 /* mpi problem flags: problem-dependent meaning, but in general
Chris@42 31 SCRAMBLED means some reordering *within* the dimensions, while
Chris@42 32 TRANSPOSED means some reordering *of* the dimensions */
Chris@42 33 #define SCRAMBLED_IN (1 << 0)
Chris@42 34 #define SCRAMBLED_OUT (1 << 1)
Chris@42 35 #define TRANSPOSED_IN (1 << 2)
Chris@42 36 #define TRANSPOSED_OUT (1 << 3)
Chris@42 37 #define RANK1_BIGVEC_ONLY (1 << 4) /* for rank=1, allow only bigvec solver */
Chris@42 38
Chris@42 39 #define ONLY_SCRAMBLEDP(flags) (!((flags) & ~(SCRAMBLED_IN|SCRAMBLED_OUT)))
Chris@42 40 #define ONLY_TRANSPOSEDP(flags) (!((flags) & ~(TRANSPOSED_IN|TRANSPOSED_OUT)))
Chris@42 41
Chris@42 42 #if defined(FFTW_SINGLE)
Chris@42 43 # define FFTW_MPI_TYPE MPI_FLOAT
Chris@42 44 #elif defined(FFTW_LDOUBLE)
Chris@42 45 # define FFTW_MPI_TYPE MPI_LONG_DOUBLE
Chris@42 46 #elif defined(FFTW_QUAD)
Chris@42 47 # error MPI quad-precision type is unknown
Chris@42 48 #else
Chris@42 49 # define FFTW_MPI_TYPE MPI_DOUBLE
Chris@42 50 #endif
Chris@42 51
Chris@42 52 /* all fftw-mpi identifiers start with fftw_mpi (or fftwf_mpi etc.) */
Chris@42 53 #define XM(name) X(CONCAT(mpi_, name))
Chris@42 54
Chris@42 55 /***********************************************************************/
Chris@42 56 /* block distributions */
Chris@42 57
Chris@42 58 /* a distributed dimension of length n with input and output block
Chris@42 59 sizes ib and ob, respectively. */
Chris@42 60 typedef enum { IB = 0, OB } block_kind;
Chris@42 61 typedef struct {
Chris@42 62 INT n;
Chris@42 63 INT b[2]; /* b[IB], b[OB] */
Chris@42 64 } ddim;
Chris@42 65
Chris@42 66 /* Loop over k in {IB, OB}. Note: need explicit casts for C++. */
Chris@42 67 #define FORALL_BLOCK_KIND(k) for (k = IB; k <= OB; k = (block_kind) (((int) k) + 1))
Chris@42 68
Chris@42 69 /* unlike tensors in the serial FFTW, the ordering of the dtensor
Chris@42 70 dimensions matters - both the array and the block layout are
Chris@42 71 row-major order. */
Chris@42 72 typedef struct {
Chris@42 73 int rnk;
Chris@42 74 #if defined(STRUCT_HACK_KR)
Chris@42 75 ddim dims[1];
Chris@42 76 #elif defined(STRUCT_HACK_C99)
Chris@42 77 ddim dims[];
Chris@42 78 #else
Chris@42 79 ddim *dims;
Chris@42 80 #endif
Chris@42 81 } dtensor;
Chris@42 82
Chris@42 83
Chris@42 84 /* dtensor.c: */
Chris@42 85 dtensor *XM(mkdtensor)(int rnk);
Chris@42 86 void XM(dtensor_destroy)(dtensor *sz);
Chris@42 87 dtensor *XM(dtensor_copy)(const dtensor *sz);
Chris@42 88 dtensor *XM(dtensor_canonical)(const dtensor *sz, int compress);
Chris@42 89 int XM(dtensor_validp)(const dtensor *sz);
Chris@42 90 void XM(dtensor_md5)(md5 *p, const dtensor *t);
Chris@42 91 void XM(dtensor_print)(const dtensor *t, printer *p);
Chris@42 92
Chris@42 93 /* block.c: */
Chris@42 94
Chris@42 95 /* for a single distributed dimension: */
Chris@42 96 INT XM(num_blocks)(INT n, INT block);
Chris@42 97 int XM(num_blocks_ok)(INT n, INT block, MPI_Comm comm);
Chris@42 98 INT XM(default_block)(INT n, int n_pes);
Chris@42 99 INT XM(block)(INT n, INT block, int which_block);
Chris@42 100
Chris@42 101 /* for multiple distributed dimensions: */
Chris@42 102 INT XM(num_blocks_total)(const dtensor *sz, block_kind k);
Chris@42 103 int XM(idle_process)(const dtensor *sz, block_kind k, int which_pe);
Chris@42 104 void XM(block_coords)(const dtensor *sz, block_kind k, int which_pe,
Chris@42 105 INT *coords);
Chris@42 106 INT XM(total_block)(const dtensor *sz, block_kind k, int which_pe);
Chris@42 107 int XM(is_local_after)(int dim, const dtensor *sz, block_kind k);
Chris@42 108 int XM(is_local)(const dtensor *sz, block_kind k);
Chris@42 109 int XM(is_block1d)(const dtensor *sz, block_kind k);
Chris@42 110
Chris@42 111 /* choose-radix.c */
Chris@42 112 INT XM(choose_radix)(ddim d, int n_pes, unsigned flags, int sign,
Chris@42 113 INT rblock[2], INT mblock[2]);
Chris@42 114
Chris@42 115 /***********************************************************************/
Chris@42 116 /* any_true.c */
Chris@42 117 int XM(any_true)(int condition, MPI_Comm comm);
Chris@42 118 int XM(md5_equal)(md5 m, MPI_Comm comm);
Chris@42 119
Chris@42 120 /* conf.c */
Chris@42 121 void XM(conf_standard)(planner *p);
Chris@42 122
Chris@42 123 /***********************************************************************/
Chris@42 124 /* rearrange.c */
Chris@42 125
Chris@42 126 /* Different ways to rearrange the vector dimension vn during transposition,
Chris@42 127 reflecting different tradeoffs between ease of transposition and
Chris@42 128 contiguity during the subsequent DFTs.
Chris@42 129
Chris@42 130 TODO: can we pare this down to CONTIG and DISCONTIG, at least
Chris@42 131 in MEASURE mode? SQUARE_MIDDLE is also used for 1d destroy-input DFTs. */
Chris@42 132 typedef enum {
Chris@42 133 CONTIG = 0, /* vn x 1: make subsequent DFTs contiguous */
Chris@42 134 DISCONTIG, /* P x (vn/P) for P processes */
Chris@42 135 SQUARE_BEFORE, /* try to get square transpose at beginning */
Chris@42 136 SQUARE_MIDDLE, /* try to get square transpose in the middle */
Chris@42 137 SQUARE_AFTER /* try to get square transpose at end */
Chris@42 138 } rearrangement;
Chris@42 139
Chris@42 140 /* skipping SQUARE_AFTER since it doesn't seem to offer any advantage
Chris@42 141 over SQUARE_BEFORE */
Chris@42 142 #define FORALL_REARRANGE(rearrange) for (rearrange = CONTIG; rearrange <= SQUARE_MIDDLE; rearrange = (rearrangement) (((int) rearrange) + 1))
Chris@42 143
Chris@42 144 int XM(rearrange_applicable)(rearrangement rearrange,
Chris@42 145 ddim dim0, INT vn, int n_pes);
Chris@42 146 INT XM(rearrange_ny)(rearrangement rearrange, ddim dim0, INT vn, int n_pes);
Chris@42 147
Chris@42 148 /***********************************************************************/
Chris@42 149
Chris@42 150 #endif /* __IFFTW_MPI_H__ */
Chris@42 151