annotate src/fftw-3.3.5/dft/rank-geq2.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21
Chris@42 22 /* plans for DFT of rank >= 2 (multidimensional) */
Chris@42 23
Chris@42 24 #include "dft.h"
Chris@42 25
Chris@42 26 typedef struct {
Chris@42 27 solver super;
Chris@42 28 int spltrnk;
Chris@42 29 const int *buddies;
Chris@42 30 size_t nbuddies;
Chris@42 31 } S;
Chris@42 32
Chris@42 33 typedef struct {
Chris@42 34 plan_dft super;
Chris@42 35
Chris@42 36 plan *cld1, *cld2;
Chris@42 37 const S *solver;
Chris@42 38 } P;
Chris@42 39
Chris@42 40 /* Compute multi-dimensional DFT by applying the two cld plans
Chris@42 41 (lower-rnk DFTs). */
Chris@42 42 static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
Chris@42 43 {
Chris@42 44 const P *ego = (const P *) ego_;
Chris@42 45 plan_dft *cld1, *cld2;
Chris@42 46
Chris@42 47 cld1 = (plan_dft *) ego->cld1;
Chris@42 48 cld1->apply(ego->cld1, ri, ii, ro, io);
Chris@42 49
Chris@42 50 cld2 = (plan_dft *) ego->cld2;
Chris@42 51 cld2->apply(ego->cld2, ro, io, ro, io);
Chris@42 52 }
Chris@42 53
Chris@42 54
Chris@42 55 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 56 {
Chris@42 57 P *ego = (P *) ego_;
Chris@42 58 X(plan_awake)(ego->cld1, wakefulness);
Chris@42 59 X(plan_awake)(ego->cld2, wakefulness);
Chris@42 60 }
Chris@42 61
Chris@42 62 static void destroy(plan *ego_)
Chris@42 63 {
Chris@42 64 P *ego = (P *) ego_;
Chris@42 65 X(plan_destroy_internal)(ego->cld2);
Chris@42 66 X(plan_destroy_internal)(ego->cld1);
Chris@42 67 }
Chris@42 68
Chris@42 69 static void print(const plan *ego_, printer *p)
Chris@42 70 {
Chris@42 71 const P *ego = (const P *) ego_;
Chris@42 72 const S *s = ego->solver;
Chris@42 73 p->print(p, "(dft-rank>=2/%d%(%p%)%(%p%))",
Chris@42 74 s->spltrnk, ego->cld1, ego->cld2);
Chris@42 75 }
Chris@42 76
Chris@42 77 static int picksplit(const S *ego, const tensor *sz, int *rp)
Chris@42 78 {
Chris@42 79 A(sz->rnk > 1); /* cannot split rnk <= 1 */
Chris@42 80 if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp))
Chris@42 81 return 0;
Chris@42 82 *rp += 1; /* convert from dim. index to rank */
Chris@42 83 if (*rp >= sz->rnk) /* split must reduce rank */
Chris@42 84 return 0;
Chris@42 85 return 1;
Chris@42 86 }
Chris@42 87
Chris@42 88 static int applicable0(const solver *ego_, const problem *p_, int *rp)
Chris@42 89 {
Chris@42 90 const problem_dft *p = (const problem_dft *) p_;
Chris@42 91 const S *ego = (const S *)ego_;
Chris@42 92 return (1
Chris@42 93 && FINITE_RNK(p->sz->rnk) && FINITE_RNK(p->vecsz->rnk)
Chris@42 94 && p->sz->rnk >= 2
Chris@42 95 && picksplit(ego, p->sz, rp)
Chris@42 96 );
Chris@42 97 }
Chris@42 98
Chris@42 99 /* TODO: revise this. */
Chris@42 100 static int applicable(const solver *ego_, const problem *p_,
Chris@42 101 const planner *plnr, int *rp)
Chris@42 102 {
Chris@42 103 const S *ego = (const S *)ego_;
Chris@42 104 const problem_dft *p = (const problem_dft *) p_;
Chris@42 105
Chris@42 106 if (!applicable0(ego_, p_, rp)) return 0;
Chris@42 107
Chris@42 108 if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) return 0;
Chris@42 109
Chris@42 110 /* Heuristic: if the vector stride is greater than the transform
Chris@42 111 sz, don't use (prefer to do the vector loop first with a
Chris@42 112 vrank-geq1 plan). */
Chris@42 113 if (NO_UGLYP(plnr))
Chris@42 114 if (p->vecsz->rnk > 0 &&
Chris@42 115 X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz))
Chris@42 116 return 0;
Chris@42 117
Chris@42 118 return 1;
Chris@42 119 }
Chris@42 120
Chris@42 121 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 122 {
Chris@42 123 const S *ego = (const S *) ego_;
Chris@42 124 const problem_dft *p;
Chris@42 125 P *pln;
Chris@42 126 plan *cld1 = 0, *cld2 = 0;
Chris@42 127 tensor *sz1, *sz2, *vecszi, *sz2i;
Chris@42 128 int spltrnk;
Chris@42 129
Chris@42 130 static const plan_adt padt = {
Chris@42 131 X(dft_solve), awake, print, destroy
Chris@42 132 };
Chris@42 133
Chris@42 134 if (!applicable(ego_, p_, plnr, &spltrnk))
Chris@42 135 return (plan *) 0;
Chris@42 136
Chris@42 137 p = (const problem_dft *) p_;
Chris@42 138 X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);
Chris@42 139 vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS);
Chris@42 140 sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS);
Chris@42 141
Chris@42 142 cld1 = X(mkplan_d)(plnr,
Chris@42 143 X(mkproblem_dft_d)(X(tensor_copy)(sz2),
Chris@42 144 X(tensor_append)(p->vecsz, sz1),
Chris@42 145 p->ri, p->ii, p->ro, p->io));
Chris@42 146 if (!cld1) goto nada;
Chris@42 147
Chris@42 148 cld2 = X(mkplan_d)(plnr,
Chris@42 149 X(mkproblem_dft_d)(
Chris@42 150 X(tensor_copy_inplace)(sz1, INPLACE_OS),
Chris@42 151 X(tensor_append)(vecszi, sz2i),
Chris@42 152 p->ro, p->io, p->ro, p->io));
Chris@42 153 if (!cld2) goto nada;
Chris@42 154
Chris@42 155 pln = MKPLAN_DFT(P, &padt, apply);
Chris@42 156
Chris@42 157 pln->cld1 = cld1;
Chris@42 158 pln->cld2 = cld2;
Chris@42 159
Chris@42 160 pln->solver = ego;
Chris@42 161 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
Chris@42 162
Chris@42 163 X(tensor_destroy4)(sz1, sz2, vecszi, sz2i);
Chris@42 164
Chris@42 165 return &(pln->super.super);
Chris@42 166
Chris@42 167 nada:
Chris@42 168 X(plan_destroy_internal)(cld2);
Chris@42 169 X(plan_destroy_internal)(cld1);
Chris@42 170 X(tensor_destroy4)(sz1, sz2, vecszi, sz2i);
Chris@42 171 return (plan *) 0;
Chris@42 172 }
Chris@42 173
Chris@42 174 static solver *mksolver(int spltrnk, const int *buddies, size_t nbuddies)
Chris@42 175 {
Chris@42 176 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
Chris@42 177 S *slv = MKSOLVER(S, &sadt);
Chris@42 178 slv->spltrnk = spltrnk;
Chris@42 179 slv->buddies = buddies;
Chris@42 180 slv->nbuddies = nbuddies;
Chris@42 181 return &(slv->super);
Chris@42 182 }
Chris@42 183
Chris@42 184 void X(dft_rank_geq2_register)(planner *p)
Chris@42 185 {
Chris@42 186 static const int buddies[] = { 1, 0, -2 };
Chris@42 187 size_t i;
Chris@42 188
Chris@42 189 for (i = 0; i < NELEM(buddies); ++i)
Chris@42 190 REGISTER_SOLVER(p, mksolver(buddies[i], buddies, NELEM(buddies)));
Chris@42 191
Chris@42 192 /* FIXME:
Chris@42 193
Chris@42 194 Should we try more buddies?
Chris@42 195
Chris@42 196 Another possible variant is to swap cld1 and cld2 (or rather,
Chris@42 197 to swap their problems; they are not interchangeable because
Chris@42 198 cld2 must be in-place). In past versions of FFTW, however, I
Chris@42 199 seem to recall that such rearrangements have made little or no
Chris@42 200 difference.
Chris@42 201 */
Chris@42 202 }