annotate src/fftw-3.3.5/dft/buffered.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21
Chris@42 22 #include "dft.h"
Chris@42 23
Chris@42 24 typedef struct {
Chris@42 25 solver super;
Chris@42 26 size_t maxnbuf_ndx;
Chris@42 27 } S;
Chris@42 28
Chris@42 29 static const INT maxnbufs[] = { 8, 256 };
Chris@42 30
Chris@42 31 typedef struct {
Chris@42 32 plan_dft super;
Chris@42 33
Chris@42 34 plan *cld, *cldcpy, *cldrest;
Chris@42 35 INT n, vl, nbuf, bufdist;
Chris@42 36 INT ivs_by_nbuf, ovs_by_nbuf;
Chris@42 37 INT roffset, ioffset;
Chris@42 38 } P;
Chris@42 39
Chris@42 40 /* transform a vector input with the help of bufs */
Chris@42 41 static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
Chris@42 42 {
Chris@42 43 const P *ego = (const P *) ego_;
Chris@42 44 INT nbuf = ego->nbuf;
Chris@42 45 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
Chris@42 46
Chris@42 47 plan_dft *cld = (plan_dft *) ego->cld;
Chris@42 48 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
Chris@42 49 plan_dft *cldrest;
Chris@42 50 INT i, vl = ego->vl;
Chris@42 51 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
Chris@42 52 INT roffset = ego->roffset, ioffset = ego->ioffset;
Chris@42 53
Chris@42 54 for (i = nbuf; i <= vl; i += nbuf) {
Chris@42 55 /* transform to bufs: */
Chris@42 56 cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
Chris@42 57 ri += ivs_by_nbuf; ii += ivs_by_nbuf;
Chris@42 58
Chris@42 59 /* copy back */
Chris@42 60 cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
Chris@42 61 ro += ovs_by_nbuf; io += ovs_by_nbuf;
Chris@42 62 }
Chris@42 63
Chris@42 64 X(ifree)(bufs);
Chris@42 65
Chris@42 66 /* Do the remaining transforms, if any: */
Chris@42 67 cldrest = (plan_dft *) ego->cldrest;
Chris@42 68 cldrest->apply((plan *) cldrest, ri, ii, ro, io);
Chris@42 69 }
Chris@42 70
Chris@42 71
Chris@42 72 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 73 {
Chris@42 74 P *ego = (P *) ego_;
Chris@42 75
Chris@42 76 X(plan_awake)(ego->cld, wakefulness);
Chris@42 77 X(plan_awake)(ego->cldcpy, wakefulness);
Chris@42 78 X(plan_awake)(ego->cldrest, wakefulness);
Chris@42 79 }
Chris@42 80
Chris@42 81 static void destroy(plan *ego_)
Chris@42 82 {
Chris@42 83 P *ego = (P *) ego_;
Chris@42 84 X(plan_destroy_internal)(ego->cldrest);
Chris@42 85 X(plan_destroy_internal)(ego->cldcpy);
Chris@42 86 X(plan_destroy_internal)(ego->cld);
Chris@42 87 }
Chris@42 88
Chris@42 89 static void print(const plan *ego_, printer *p)
Chris@42 90 {
Chris@42 91 const P *ego = (const P *) ego_;
Chris@42 92 p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
Chris@42 93 ego->n, ego->nbuf,
Chris@42 94 ego->vl, ego->bufdist % ego->n,
Chris@42 95 ego->cld, ego->cldcpy, ego->cldrest);
Chris@42 96 }
Chris@42 97
Chris@42 98 static int applicable0(const S *ego, const problem *p_, const planner *plnr)
Chris@42 99 {
Chris@42 100 const problem_dft *p = (const problem_dft *) p_;
Chris@42 101 const iodim *d = p->sz->dims;
Chris@42 102
Chris@42 103 if (1
Chris@42 104 && p->vecsz->rnk <= 1
Chris@42 105 && p->sz->rnk == 1
Chris@42 106 ) {
Chris@42 107 INT vl, ivs, ovs;
Chris@42 108 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@42 109
Chris@42 110 if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
Chris@42 111 return 0;
Chris@42 112
Chris@42 113 /* if this solver is redundant, in the sense that a solver
Chris@42 114 of lower index generates the same plan, then prune this
Chris@42 115 solver */
Chris@42 116 if (X(nbuf_redundant)(d[0].n, vl,
Chris@42 117 ego->maxnbuf_ndx,
Chris@42 118 maxnbufs, NELEM(maxnbufs)))
Chris@42 119 return 0;
Chris@42 120
Chris@42 121 /*
Chris@42 122 In principle, the buffered transforms might be useful
Chris@42 123 when working out of place. However, in order to
Chris@42 124 prevent infinite loops in the planner, we require
Chris@42 125 that the output stride of the buffered transforms be
Chris@42 126 greater than 2.
Chris@42 127 */
Chris@42 128 if (p->ri != p->ro)
Chris@42 129 return (d[0].os > 2);
Chris@42 130
Chris@42 131 /*
Chris@42 132 * If the problem is in place, the input/output strides must
Chris@42 133 * be the same or the whole thing must fit in the buffer.
Chris@42 134 */
Chris@42 135 if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
Chris@42 136 return 1;
Chris@42 137
Chris@42 138 if (/* fits into buffer: */
Chris@42 139 ((p->vecsz->rnk == 0)
Chris@42 140 ||
Chris@42 141 (X(nbuf)(d[0].n, p->vecsz->dims[0].n,
Chris@42 142 maxnbufs[ego->maxnbuf_ndx])
Chris@42 143 == p->vecsz->dims[0].n)))
Chris@42 144 return 1;
Chris@42 145 }
Chris@42 146
Chris@42 147 return 0;
Chris@42 148 }
Chris@42 149
Chris@42 150 static int applicable(const S *ego, const problem *p_, const planner *plnr)
Chris@42 151 {
Chris@42 152 if (NO_BUFFERINGP(plnr)) return 0;
Chris@42 153 if (!applicable0(ego, p_, plnr)) return 0;
Chris@42 154
Chris@42 155 if (NO_UGLYP(plnr)) {
Chris@42 156 const problem_dft *p = (const problem_dft *) p_;
Chris@42 157 if (p->ri != p->ro) return 0;
Chris@42 158 if (X(toobig)(p->sz->dims[0].n)) return 0;
Chris@42 159 }
Chris@42 160 return 1;
Chris@42 161 }
Chris@42 162
Chris@42 163 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 164 {
Chris@42 165 P *pln;
Chris@42 166 const S *ego = (const S *)ego_;
Chris@42 167 plan *cld = (plan *) 0;
Chris@42 168 plan *cldcpy = (plan *) 0;
Chris@42 169 plan *cldrest = (plan *) 0;
Chris@42 170 const problem_dft *p = (const problem_dft *) p_;
Chris@42 171 R *bufs = (R *) 0;
Chris@42 172 INT nbuf = 0, bufdist, n, vl;
Chris@42 173 INT ivs, ovs, roffset, ioffset;
Chris@42 174
Chris@42 175 static const plan_adt padt = {
Chris@42 176 X(dft_solve), awake, print, destroy
Chris@42 177 };
Chris@42 178
Chris@42 179 if (!applicable(ego, p_, plnr))
Chris@42 180 goto nada;
Chris@42 181
Chris@42 182 n = X(tensor_sz)(p->sz);
Chris@42 183
Chris@42 184 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@42 185
Chris@42 186 nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
Chris@42 187 bufdist = X(bufdist)(n, vl);
Chris@42 188 A(nbuf > 0);
Chris@42 189
Chris@42 190 /* attempt to keep real and imaginary part in the same order,
Chris@42 191 so as to allow optimizations in the the copy plan */
Chris@42 192 roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
Chris@42 193 ioffset = 1 - roffset;
Chris@42 194
Chris@42 195 /* initial allocation for the purpose of planning */
Chris@42 196 bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
Chris@42 197
Chris@42 198 /* allow destruction of input if problem is in place */
Chris@42 199 cld = X(mkplan_f_d)(plnr,
Chris@42 200 X(mkproblem_dft_d)(
Chris@42 201 X(mktensor_1d)(n, p->sz->dims[0].is, 2),
Chris@42 202 X(mktensor_1d)(nbuf, ivs, bufdist * 2),
Chris@42 203 TAINT(p->ri, ivs * nbuf),
Chris@42 204 TAINT(p->ii, ivs * nbuf),
Chris@42 205 bufs + roffset,
Chris@42 206 bufs + ioffset),
Chris@42 207 0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
Chris@42 208 if (!cld)
Chris@42 209 goto nada;
Chris@42 210
Chris@42 211 /* copying back from the buffer is a rank-0 transform: */
Chris@42 212 cldcpy = X(mkplan_d)(plnr,
Chris@42 213 X(mkproblem_dft_d)(
Chris@42 214 X(mktensor_0d)(),
Chris@42 215 X(mktensor_2d)(nbuf, bufdist * 2, ovs,
Chris@42 216 n, 2, p->sz->dims[0].os),
Chris@42 217 bufs + roffset,
Chris@42 218 bufs + ioffset,
Chris@42 219 TAINT(p->ro, ovs * nbuf),
Chris@42 220 TAINT(p->io, ovs * nbuf)));
Chris@42 221 if (!cldcpy)
Chris@42 222 goto nada;
Chris@42 223
Chris@42 224 /* deallocate buffers, let apply() allocate them for real */
Chris@42 225 X(ifree)(bufs);
Chris@42 226 bufs = 0;
Chris@42 227
Chris@42 228 /* plan the leftover transforms (cldrest): */
Chris@42 229 {
Chris@42 230 INT id = ivs * (nbuf * (vl / nbuf));
Chris@42 231 INT od = ovs * (nbuf * (vl / nbuf));
Chris@42 232 cldrest = X(mkplan_d)(plnr,
Chris@42 233 X(mkproblem_dft_d)(
Chris@42 234 X(tensor_copy)(p->sz),
Chris@42 235 X(mktensor_1d)(vl % nbuf, ivs, ovs),
Chris@42 236 p->ri+id, p->ii+id, p->ro+od, p->io+od));
Chris@42 237 }
Chris@42 238 if (!cldrest)
Chris@42 239 goto nada;
Chris@42 240
Chris@42 241 pln = MKPLAN_DFT(P, &padt, apply);
Chris@42 242 pln->cld = cld;
Chris@42 243 pln->cldcpy = cldcpy;
Chris@42 244 pln->cldrest = cldrest;
Chris@42 245 pln->n = n;
Chris@42 246 pln->vl = vl;
Chris@42 247 pln->ivs_by_nbuf = ivs * nbuf;
Chris@42 248 pln->ovs_by_nbuf = ovs * nbuf;
Chris@42 249 pln->roffset = roffset;
Chris@42 250 pln->ioffset = ioffset;
Chris@42 251
Chris@42 252 pln->nbuf = nbuf;
Chris@42 253 pln->bufdist = bufdist;
Chris@42 254
Chris@42 255 {
Chris@42 256 opcnt t;
Chris@42 257 X(ops_add)(&cld->ops, &cldcpy->ops, &t);
Chris@42 258 X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
Chris@42 259 }
Chris@42 260
Chris@42 261 return &(pln->super.super);
Chris@42 262
Chris@42 263 nada:
Chris@42 264 X(ifree0)(bufs);
Chris@42 265 X(plan_destroy_internal)(cldrest);
Chris@42 266 X(plan_destroy_internal)(cldcpy);
Chris@42 267 X(plan_destroy_internal)(cld);
Chris@42 268 return (plan *) 0;
Chris@42 269 }
Chris@42 270
Chris@42 271 static solver *mksolver(size_t maxnbuf_ndx)
Chris@42 272 {
Chris@42 273 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
Chris@42 274 S *slv = MKSOLVER(S, &sadt);
Chris@42 275 slv->maxnbuf_ndx = maxnbuf_ndx;
Chris@42 276 return &(slv->super);
Chris@42 277 }
Chris@42 278
Chris@42 279 void X(dft_buffered_register)(planner *p)
Chris@42 280 {
Chris@42 281 size_t i;
Chris@42 282 for (i = 0; i < NELEM(maxnbufs); ++i)
Chris@42 283 REGISTER_SOLVER(p, mksolver(i));
Chris@42 284 }