annotate src/fftw-3.3.5/rdft/buffered.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21
Chris@42 22 #include "rdft.h"
Chris@42 23
Chris@42 24 typedef struct {
Chris@42 25 solver super;
Chris@42 26 size_t maxnbuf_ndx;
Chris@42 27 } S;
Chris@42 28
Chris@42 29 static const INT maxnbufs[] = { 8, 256 };
Chris@42 30
Chris@42 31 typedef struct {
Chris@42 32 plan_rdft super;
Chris@42 33
Chris@42 34 plan *cld, *cldcpy, *cldrest;
Chris@42 35 INT n, vl, nbuf, bufdist;
Chris@42 36 INT ivs_by_nbuf, ovs_by_nbuf;
Chris@42 37 } P;
Chris@42 38
Chris@42 39 /* transform a vector input with the help of bufs */
Chris@42 40 static void apply(const plan *ego_, R *I, R *O)
Chris@42 41 {
Chris@42 42 const P *ego = (const P *) ego_;
Chris@42 43 plan_rdft *cld = (plan_rdft *) ego->cld;
Chris@42 44 plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
Chris@42 45 plan_rdft *cldrest;
Chris@42 46 INT i, vl = ego->vl, nbuf = ego->nbuf;
Chris@42 47 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
Chris@42 48 R *bufs;
Chris@42 49
Chris@42 50 bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
Chris@42 51
Chris@42 52 for (i = nbuf; i <= vl; i += nbuf) {
Chris@42 53 /* transform to bufs: */
Chris@42 54 cld->apply((plan *) cld, I, bufs);
Chris@42 55 I += ivs_by_nbuf;
Chris@42 56
Chris@42 57 /* copy back */
Chris@42 58 cldcpy->apply((plan *) cldcpy, bufs, O);
Chris@42 59 O += ovs_by_nbuf;
Chris@42 60 }
Chris@42 61
Chris@42 62 X(ifree)(bufs);
Chris@42 63
Chris@42 64 /* Do the remaining transforms, if any: */
Chris@42 65 cldrest = (plan_rdft *) ego->cldrest;
Chris@42 66 cldrest->apply((plan *) cldrest, I, O);
Chris@42 67 }
Chris@42 68
Chris@42 69 /* for hc2r problems, copy the input into buffer, and then
Chris@42 70 transform buffer->output, which allows for destruction of the
Chris@42 71 buffer */
Chris@42 72 static void apply_hc2r(const plan *ego_, R *I, R *O)
Chris@42 73 {
Chris@42 74 const P *ego = (const P *) ego_;
Chris@42 75 plan_rdft *cld = (plan_rdft *) ego->cld;
Chris@42 76 plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
Chris@42 77 plan_rdft *cldrest;
Chris@42 78 INT i, vl = ego->vl, nbuf = ego->nbuf;
Chris@42 79 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
Chris@42 80 R *bufs;
Chris@42 81
Chris@42 82 bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
Chris@42 83
Chris@42 84 for (i = nbuf; i <= vl; i += nbuf) {
Chris@42 85 /* copy input into bufs: */
Chris@42 86 cldcpy->apply((plan *) cldcpy, I, bufs);
Chris@42 87 I += ivs_by_nbuf;
Chris@42 88
Chris@42 89 /* transform to output */
Chris@42 90 cld->apply((plan *) cld, bufs, O);
Chris@42 91 O += ovs_by_nbuf;
Chris@42 92 }
Chris@42 93
Chris@42 94 X(ifree)(bufs);
Chris@42 95
Chris@42 96 /* Do the remaining transforms, if any: */
Chris@42 97 cldrest = (plan_rdft *) ego->cldrest;
Chris@42 98 cldrest->apply((plan *) cldrest, I, O);
Chris@42 99 }
Chris@42 100
Chris@42 101
Chris@42 102 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 103 {
Chris@42 104 P *ego = (P *) ego_;
Chris@42 105
Chris@42 106 X(plan_awake)(ego->cld, wakefulness);
Chris@42 107 X(plan_awake)(ego->cldcpy, wakefulness);
Chris@42 108 X(plan_awake)(ego->cldrest, wakefulness);
Chris@42 109 }
Chris@42 110
Chris@42 111 static void destroy(plan *ego_)
Chris@42 112 {
Chris@42 113 P *ego = (P *) ego_;
Chris@42 114 X(plan_destroy_internal)(ego->cldrest);
Chris@42 115 X(plan_destroy_internal)(ego->cldcpy);
Chris@42 116 X(plan_destroy_internal)(ego->cld);
Chris@42 117 }
Chris@42 118
Chris@42 119 static void print(const plan *ego_, printer *p)
Chris@42 120 {
Chris@42 121 const P *ego = (const P *) ego_;
Chris@42 122 p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
Chris@42 123 ego->n, ego->nbuf,
Chris@42 124 ego->vl, ego->bufdist % ego->n,
Chris@42 125 ego->cld, ego->cldcpy, ego->cldrest);
Chris@42 126 }
Chris@42 127
Chris@42 128 static int applicable0(const S *ego, const problem *p_, const planner *plnr)
Chris@42 129 {
Chris@42 130 const problem_rdft *p = (const problem_rdft *) p_;
Chris@42 131 iodim *d = p->sz->dims;
Chris@42 132
Chris@42 133 if (1
Chris@42 134 && p->vecsz->rnk <= 1
Chris@42 135 && p->sz->rnk == 1
Chris@42 136 ) {
Chris@42 137 INT vl, ivs, ovs;
Chris@42 138 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@42 139
Chris@42 140 if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
Chris@42 141 return 0;
Chris@42 142
Chris@42 143 /* if this solver is redundant, in the sense that a solver
Chris@42 144 of lower index generates the same plan, then prune this
Chris@42 145 solver */
Chris@42 146 if (X(nbuf_redundant)(d[0].n, vl,
Chris@42 147 ego->maxnbuf_ndx,
Chris@42 148 maxnbufs, NELEM(maxnbufs)))
Chris@42 149 return 0;
Chris@42 150
Chris@42 151 if (p->I != p->O) {
Chris@42 152 if (p->kind[0] == HC2R) {
Chris@42 153 /* Allow HC2R problems only if the input is to be
Chris@42 154 preserved. This solver sets NO_DESTROY_INPUT,
Chris@42 155 which prevents infinite loops */
Chris@42 156 return (NO_DESTROY_INPUTP(plnr));
Chris@42 157 } else {
Chris@42 158 /*
Chris@42 159 In principle, the buffered transforms might be useful
Chris@42 160 when working out of place. However, in order to
Chris@42 161 prevent infinite loops in the planner, we require
Chris@42 162 that the output stride of the buffered transforms be
Chris@42 163 greater than 1.
Chris@42 164 */
Chris@42 165 return (d[0].os > 1);
Chris@42 166 }
Chris@42 167 }
Chris@42 168
Chris@42 169 /*
Chris@42 170 * If the problem is in place, the input/output strides must
Chris@42 171 * be the same or the whole thing must fit in the buffer.
Chris@42 172 */
Chris@42 173 if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
Chris@42 174 return 1;
Chris@42 175
Chris@42 176 if (/* fits into buffer: */
Chris@42 177 ((p->vecsz->rnk == 0)
Chris@42 178 ||
Chris@42 179 (X(nbuf)(d[0].n, p->vecsz->dims[0].n,
Chris@42 180 maxnbufs[ego->maxnbuf_ndx])
Chris@42 181 == p->vecsz->dims[0].n)))
Chris@42 182 return 1;
Chris@42 183 }
Chris@42 184
Chris@42 185 return 0;
Chris@42 186 }
Chris@42 187
Chris@42 188 static int applicable(const S *ego, const problem *p_, const planner *plnr)
Chris@42 189 {
Chris@42 190 const problem_rdft *p;
Chris@42 191
Chris@42 192 if (NO_BUFFERINGP(plnr)) return 0;
Chris@42 193
Chris@42 194 if (!applicable0(ego, p_, plnr)) return 0;
Chris@42 195
Chris@42 196 p = (const problem_rdft *) p_;
Chris@42 197 if (p->kind[0] == HC2R) {
Chris@42 198 if (NO_UGLYP(plnr)) {
Chris@42 199 /* UGLY if in-place and too big, since the problem
Chris@42 200 could be solved via transpositions */
Chris@42 201 if (p->I == p->O && X(toobig)(p->sz->dims[0].n))
Chris@42 202 return 0;
Chris@42 203 }
Chris@42 204 } else {
Chris@42 205 if (NO_UGLYP(plnr)) {
Chris@42 206 if (p->I != p->O) return 0;
Chris@42 207 if (X(toobig)(p->sz->dims[0].n)) return 0;
Chris@42 208 }
Chris@42 209 }
Chris@42 210 return 1;
Chris@42 211 }
Chris@42 212
Chris@42 213 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 214 {
Chris@42 215 P *pln;
Chris@42 216 const S *ego = (const S *)ego_;
Chris@42 217 plan *cld = (plan *) 0;
Chris@42 218 plan *cldcpy = (plan *) 0;
Chris@42 219 plan *cldrest = (plan *) 0;
Chris@42 220 const problem_rdft *p = (const problem_rdft *) p_;
Chris@42 221 R *bufs = (R *) 0;
Chris@42 222 INT nbuf = 0, bufdist, n, vl;
Chris@42 223 INT ivs, ovs;
Chris@42 224 int hc2rp;
Chris@42 225
Chris@42 226 static const plan_adt padt = {
Chris@42 227 X(rdft_solve), awake, print, destroy
Chris@42 228 };
Chris@42 229
Chris@42 230 if (!applicable(ego, p_, plnr))
Chris@42 231 goto nada;
Chris@42 232
Chris@42 233 n = X(tensor_sz)(p->sz);
Chris@42 234 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@42 235 hc2rp = (p->kind[0] == HC2R);
Chris@42 236
Chris@42 237 nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
Chris@42 238 bufdist = X(bufdist)(n, vl);
Chris@42 239 A(nbuf > 0);
Chris@42 240
Chris@42 241 /* initial allocation for the purpose of planning */
Chris@42 242 bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
Chris@42 243
Chris@42 244 if (hc2rp) {
Chris@42 245 /* allow destruction of buffer */
Chris@42 246 cld = X(mkplan_f_d)(plnr,
Chris@42 247 X(mkproblem_rdft_d)(
Chris@42 248 X(mktensor_1d)(n, 1, p->sz->dims[0].os),
Chris@42 249 X(mktensor_1d)(nbuf, bufdist, ovs),
Chris@42 250 bufs, TAINT(p->O, ovs * nbuf), p->kind),
Chris@42 251 0, 0, NO_DESTROY_INPUT);
Chris@42 252 if (!cld) goto nada;
Chris@42 253
Chris@42 254 /* copying input into buffer buffer is a rank-0 transform: */
Chris@42 255 cldcpy = X(mkplan_d)(plnr,
Chris@42 256 X(mkproblem_rdft_0_d)(
Chris@42 257 X(mktensor_2d)(nbuf, ivs, bufdist,
Chris@42 258 n, p->sz->dims[0].is, 1),
Chris@42 259 TAINT(p->I, ivs * nbuf), bufs));
Chris@42 260 if (!cldcpy) goto nada;
Chris@42 261 } else {
Chris@42 262 /* allow destruction of input if problem is in place */
Chris@42 263 cld = X(mkplan_f_d)(plnr,
Chris@42 264 X(mkproblem_rdft_d)(
Chris@42 265 X(mktensor_1d)(n, p->sz->dims[0].is, 1),
Chris@42 266 X(mktensor_1d)(nbuf, ivs, bufdist),
Chris@42 267 TAINT(p->I, ivs * nbuf), bufs, p->kind),
Chris@42 268 0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
Chris@42 269 if (!cld) goto nada;
Chris@42 270
Chris@42 271 /* copying back from the buffer is a rank-0 transform: */
Chris@42 272 cldcpy = X(mkplan_d)(plnr,
Chris@42 273 X(mkproblem_rdft_0_d)(
Chris@42 274 X(mktensor_2d)(nbuf, bufdist, ovs,
Chris@42 275 n, 1, p->sz->dims[0].os),
Chris@42 276 bufs, TAINT(p->O, ovs * nbuf)));
Chris@42 277 if (!cldcpy) goto nada;
Chris@42 278 }
Chris@42 279
Chris@42 280 /* deallocate buffers, let apply() allocate them for real */
Chris@42 281 X(ifree)(bufs);
Chris@42 282 bufs = 0;
Chris@42 283
Chris@42 284 /* plan the leftover transforms (cldrest): */
Chris@42 285 {
Chris@42 286 INT id = ivs * (nbuf * (vl / nbuf));
Chris@42 287 INT od = ovs * (nbuf * (vl / nbuf));
Chris@42 288 cldrest = X(mkplan_d)(plnr,
Chris@42 289 X(mkproblem_rdft_d)(
Chris@42 290 X(tensor_copy)(p->sz),
Chris@42 291 X(mktensor_1d)(vl % nbuf, ivs, ovs),
Chris@42 292 p->I + id, p->O + od, p->kind));
Chris@42 293 }
Chris@42 294 if (!cldrest) goto nada;
Chris@42 295
Chris@42 296 pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
Chris@42 297 pln->cld = cld;
Chris@42 298 pln->cldcpy = cldcpy;
Chris@42 299 pln->cldrest = cldrest;
Chris@42 300 pln->n = n;
Chris@42 301 pln->vl = vl;
Chris@42 302 pln->ivs_by_nbuf = ivs * nbuf;
Chris@42 303 pln->ovs_by_nbuf = ovs * nbuf;
Chris@42 304
Chris@42 305 pln->nbuf = nbuf;
Chris@42 306 pln->bufdist = bufdist;
Chris@42 307
Chris@42 308 {
Chris@42 309 opcnt t;
Chris@42 310 X(ops_add)(&cld->ops, &cldcpy->ops, &t);
Chris@42 311 X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
Chris@42 312 }
Chris@42 313
Chris@42 314 return &(pln->super.super);
Chris@42 315
Chris@42 316 nada:
Chris@42 317 X(ifree0)(bufs);
Chris@42 318 X(plan_destroy_internal)(cldrest);
Chris@42 319 X(plan_destroy_internal)(cldcpy);
Chris@42 320 X(plan_destroy_internal)(cld);
Chris@42 321 return (plan *) 0;
Chris@42 322 }
Chris@42 323
Chris@42 324 static solver *mksolver(size_t maxnbuf_ndx)
Chris@42 325 {
Chris@42 326 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
Chris@42 327 S *slv = MKSOLVER(S, &sadt);
Chris@42 328 slv->maxnbuf_ndx = maxnbuf_ndx;
Chris@42 329 return &(slv->super);
Chris@42 330 }
Chris@42 331
Chris@42 332 void X(rdft_buffered_register)(planner *p)
Chris@42 333 {
Chris@42 334 size_t i;
Chris@42 335 for (i = 0; i < NELEM(maxnbufs); ++i)
Chris@42 336 REGISTER_SOLVER(p, mksolver(i));
Chris@42 337 }