annotate src/fftw-3.3.8/rdft/buffered2.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21
Chris@82 22 /* buffering of rdft2. We always buffer the complex array */
Chris@82 23
Chris@82 24 #include "rdft/rdft.h"
Chris@82 25 #include "dft/dft.h"
Chris@82 26
Chris@82 27 typedef struct {
Chris@82 28 solver super;
Chris@82 29 size_t maxnbuf_ndx;
Chris@82 30 } S;
Chris@82 31
Chris@82 32 static const INT maxnbufs[] = { 8, 256 };
Chris@82 33
Chris@82 34 typedef struct {
Chris@82 35 plan_rdft2 super;
Chris@82 36
Chris@82 37 plan *cld, *cldcpy, *cldrest;
Chris@82 38 INT n, vl, nbuf, bufdist;
Chris@82 39 INT ivs_by_nbuf, ovs_by_nbuf;
Chris@82 40 INT ioffset, roffset;
Chris@82 41 } P;
Chris@82 42
Chris@82 43 /* transform a vector input with the help of bufs */
Chris@82 44 static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
Chris@82 45 {
Chris@82 46 const P *ego = (const P *) ego_;
Chris@82 47 plan_rdft2 *cld = (plan_rdft2 *) ego->cld;
Chris@82 48 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
Chris@82 49 INT i, vl = ego->vl, nbuf = ego->nbuf;
Chris@82 50 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
Chris@82 51 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
Chris@82 52 R *bufr = bufs + ego->roffset;
Chris@82 53 R *bufi = bufs + ego->ioffset;
Chris@82 54 plan_rdft2 *cldrest;
Chris@82 55
Chris@82 56 for (i = nbuf; i <= vl; i += nbuf) {
Chris@82 57 /* transform to bufs: */
Chris@82 58 cld->apply((plan *) cld, r0, r1, bufr, bufi);
Chris@82 59 r0 += ivs_by_nbuf; r1 += ivs_by_nbuf;
Chris@82 60
Chris@82 61 /* copy back */
Chris@82 62 cldcpy->apply((plan *) cldcpy, bufr, bufi, cr, ci);
Chris@82 63 cr += ovs_by_nbuf; ci += ovs_by_nbuf;
Chris@82 64 }
Chris@82 65
Chris@82 66 X(ifree)(bufs);
Chris@82 67
Chris@82 68 /* Do the remaining transforms, if any: */
Chris@82 69 cldrest = (plan_rdft2 *) ego->cldrest;
Chris@82 70 cldrest->apply((plan *) cldrest, r0, r1, cr, ci);
Chris@82 71 }
Chris@82 72
Chris@82 73 /* for hc2r problems, copy the input into buffer, and then
Chris@82 74 transform buffer->output, which allows for destruction of the
Chris@82 75 buffer */
Chris@82 76 static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
Chris@82 77 {
Chris@82 78 const P *ego = (const P *) ego_;
Chris@82 79 plan_rdft2 *cld = (plan_rdft2 *) ego->cld;
Chris@82 80 plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
Chris@82 81 INT i, vl = ego->vl, nbuf = ego->nbuf;
Chris@82 82 INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
Chris@82 83 R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
Chris@82 84 R *bufr = bufs + ego->roffset;
Chris@82 85 R *bufi = bufs + ego->ioffset;
Chris@82 86 plan_rdft2 *cldrest;
Chris@82 87
Chris@82 88 for (i = nbuf; i <= vl; i += nbuf) {
Chris@82 89 /* copy input into bufs: */
Chris@82 90 cldcpy->apply((plan *) cldcpy, cr, ci, bufr, bufi);
Chris@82 91 cr += ivs_by_nbuf; ci += ivs_by_nbuf;
Chris@82 92
Chris@82 93 /* transform to output */
Chris@82 94 cld->apply((plan *) cld, r0, r1, bufr, bufi);
Chris@82 95 r0 += ovs_by_nbuf; r1 += ovs_by_nbuf;
Chris@82 96 }
Chris@82 97
Chris@82 98 X(ifree)(bufs);
Chris@82 99
Chris@82 100 /* Do the remaining transforms, if any: */
Chris@82 101 cldrest = (plan_rdft2 *) ego->cldrest;
Chris@82 102 cldrest->apply((plan *) cldrest, r0, r1, cr, ci);
Chris@82 103 }
Chris@82 104
Chris@82 105
Chris@82 106 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@82 107 {
Chris@82 108 P *ego = (P *) ego_;
Chris@82 109
Chris@82 110 X(plan_awake)(ego->cld, wakefulness);
Chris@82 111 X(plan_awake)(ego->cldcpy, wakefulness);
Chris@82 112 X(plan_awake)(ego->cldrest, wakefulness);
Chris@82 113 }
Chris@82 114
Chris@82 115 static void destroy(plan *ego_)
Chris@82 116 {
Chris@82 117 P *ego = (P *) ego_;
Chris@82 118 X(plan_destroy_internal)(ego->cldrest);
Chris@82 119 X(plan_destroy_internal)(ego->cldcpy);
Chris@82 120 X(plan_destroy_internal)(ego->cld);
Chris@82 121 }
Chris@82 122
Chris@82 123 static void print(const plan *ego_, printer *p)
Chris@82 124 {
Chris@82 125 const P *ego = (const P *) ego_;
Chris@82 126 p->print(p, "(rdft2-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
Chris@82 127 ego->n, ego->nbuf,
Chris@82 128 ego->vl, ego->bufdist % ego->n,
Chris@82 129 ego->cld, ego->cldcpy, ego->cldrest);
Chris@82 130 }
Chris@82 131
Chris@82 132 static int applicable0(const S *ego, const problem *p_, const planner *plnr)
Chris@82 133 {
Chris@82 134 const problem_rdft2 *p = (const problem_rdft2 *) p_;
Chris@82 135 iodim *d = p->sz->dims;
Chris@82 136
Chris@82 137 if (1
Chris@82 138 && p->vecsz->rnk <= 1
Chris@82 139 && p->sz->rnk == 1
Chris@82 140
Chris@82 141 /* we assume even n throughout */
Chris@82 142 && (d[0].n % 2) == 0
Chris@82 143
Chris@82 144 /* and we only consider these two cases */
Chris@82 145 && (p->kind == R2HC || p->kind == HC2R)
Chris@82 146
Chris@82 147 ) {
Chris@82 148 INT vl, ivs, ovs;
Chris@82 149 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@82 150
Chris@82 151 if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
Chris@82 152 return 0;
Chris@82 153
Chris@82 154 /* if this solver is redundant, in the sense that a solver
Chris@82 155 of lower index generates the same plan, then prune this
Chris@82 156 solver */
Chris@82 157 if (X(nbuf_redundant)(d[0].n, vl,
Chris@82 158 ego->maxnbuf_ndx,
Chris@82 159 maxnbufs, NELEM(maxnbufs)))
Chris@82 160 return 0;
Chris@82 161
Chris@82 162 if (p->r0 != p->cr) {
Chris@82 163 if (p->kind == HC2R) {
Chris@82 164 /* Allow HC2R problems only if the input is to be
Chris@82 165 preserved. This solver sets NO_DESTROY_INPUT,
Chris@82 166 which prevents infinite loops */
Chris@82 167 return (NO_DESTROY_INPUTP(plnr));
Chris@82 168 } else {
Chris@82 169 /*
Chris@82 170 In principle, the buffered transforms might be useful
Chris@82 171 when working out of place. However, in order to
Chris@82 172 prevent infinite loops in the planner, we require
Chris@82 173 that the output stride of the buffered transforms be
Chris@82 174 greater than 2.
Chris@82 175 */
Chris@82 176 return (d[0].os > 2);
Chris@82 177 }
Chris@82 178 }
Chris@82 179
Chris@82 180 /*
Chris@82 181 * If the problem is in place, the input/output strides must
Chris@82 182 * be the same or the whole thing must fit in the buffer.
Chris@82 183 */
Chris@82 184 if (X(rdft2_inplace_strides(p, RNK_MINFTY)))
Chris@82 185 return 1;
Chris@82 186
Chris@82 187 if (/* fits into buffer: */
Chris@82 188 ((p->vecsz->rnk == 0)
Chris@82 189 ||
Chris@82 190 (X(nbuf)(d[0].n, p->vecsz->dims[0].n,
Chris@82 191 maxnbufs[ego->maxnbuf_ndx])
Chris@82 192 == p->vecsz->dims[0].n)))
Chris@82 193 return 1;
Chris@82 194 }
Chris@82 195
Chris@82 196 return 0;
Chris@82 197 }
Chris@82 198
Chris@82 199 static int applicable(const S *ego, const problem *p_, const planner *plnr)
Chris@82 200 {
Chris@82 201 const problem_rdft2 *p;
Chris@82 202
Chris@82 203 if (NO_BUFFERINGP(plnr)) return 0;
Chris@82 204
Chris@82 205 if (!applicable0(ego, p_, plnr)) return 0;
Chris@82 206
Chris@82 207 p = (const problem_rdft2 *) p_;
Chris@82 208 if (p->kind == HC2R) {
Chris@82 209 if (NO_UGLYP(plnr)) {
Chris@82 210 /* UGLY if in-place and too big, since the problem
Chris@82 211 could be solved via transpositions */
Chris@82 212 if (p->r0 == p->cr && X(toobig)(p->sz->dims[0].n))
Chris@82 213 return 0;
Chris@82 214 }
Chris@82 215 } else {
Chris@82 216 if (NO_UGLYP(plnr)) {
Chris@82 217 if (p->r0 != p->cr || X(toobig)(p->sz->dims[0].n))
Chris@82 218 return 0;
Chris@82 219 }
Chris@82 220 }
Chris@82 221 return 1;
Chris@82 222 }
Chris@82 223
Chris@82 224 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@82 225 {
Chris@82 226 P *pln;
Chris@82 227 const S *ego = (const S *)ego_;
Chris@82 228 plan *cld = (plan *) 0;
Chris@82 229 plan *cldcpy = (plan *) 0;
Chris@82 230 plan *cldrest = (plan *) 0;
Chris@82 231 const problem_rdft2 *p = (const problem_rdft2 *) p_;
Chris@82 232 R *bufs = (R *) 0;
Chris@82 233 INT nbuf = 0, bufdist, n, vl;
Chris@82 234 INT ivs, ovs, ioffset, roffset, id, od;
Chris@82 235
Chris@82 236 static const plan_adt padt = {
Chris@82 237 X(rdft2_solve), awake, print, destroy
Chris@82 238 };
Chris@82 239
Chris@82 240 if (!applicable(ego, p_, plnr))
Chris@82 241 goto nada;
Chris@82 242
Chris@82 243 n = X(tensor_sz)(p->sz);
Chris@82 244 X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
Chris@82 245
Chris@82 246 nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
Chris@82 247 bufdist = X(bufdist)(n + 2, vl); /* complex-side rdft2 stores N+2
Chris@82 248 real numbers */
Chris@82 249 A(nbuf > 0);
Chris@82 250
Chris@82 251 /* attempt to keep real and imaginary part in the same order,
Chris@82 252 so as to allow optimizations in the the copy plan */
Chris@82 253 roffset = (p->cr - p->ci > 0) ? (INT)1 : (INT)0;
Chris@82 254 ioffset = 1 - roffset;
Chris@82 255
Chris@82 256 /* initial allocation for the purpose of planning */
Chris@82 257 bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
Chris@82 258
Chris@82 259 id = ivs * (nbuf * (vl / nbuf));
Chris@82 260 od = ovs * (nbuf * (vl / nbuf));
Chris@82 261
Chris@82 262 if (p->kind == R2HC) {
Chris@82 263 /* allow destruction of input if problem is in place */
Chris@82 264 cld = X(mkplan_f_d)(
Chris@82 265 plnr,
Chris@82 266 X(mkproblem_rdft2_d)(
Chris@82 267 X(mktensor_1d)(n, p->sz->dims[0].is, 2),
Chris@82 268 X(mktensor_1d)(nbuf, ivs, bufdist),
Chris@82 269 TAINT(p->r0, ivs * nbuf), TAINT(p->r1, ivs * nbuf),
Chris@82 270 bufs + roffset, bufs + ioffset, p->kind),
Chris@82 271 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
Chris@82 272 if (!cld) goto nada;
Chris@82 273
Chris@82 274 /* copying back from the buffer is a rank-0 DFT: */
Chris@82 275 cldcpy = X(mkplan_d)(
Chris@82 276 plnr,
Chris@82 277 X(mkproblem_dft_d)(
Chris@82 278 X(mktensor_0d)(),
Chris@82 279 X(mktensor_2d)(nbuf, bufdist, ovs,
Chris@82 280 n/2+1, 2, p->sz->dims[0].os),
Chris@82 281 bufs + roffset, bufs + ioffset,
Chris@82 282 TAINT(p->cr, ovs * nbuf), TAINT(p->ci, ovs * nbuf) ));
Chris@82 283 if (!cldcpy) goto nada;
Chris@82 284
Chris@82 285 X(ifree)(bufs); bufs = 0;
Chris@82 286
Chris@82 287 cldrest = X(mkplan_d)(plnr,
Chris@82 288 X(mkproblem_rdft2_d)(
Chris@82 289 X(tensor_copy)(p->sz),
Chris@82 290 X(mktensor_1d)(vl % nbuf, ivs, ovs),
Chris@82 291 p->r0 + id, p->r1 + id,
Chris@82 292 p->cr + od, p->ci + od,
Chris@82 293 p->kind));
Chris@82 294 if (!cldrest) goto nada;
Chris@82 295 pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
Chris@82 296 } else {
Chris@82 297 /* allow destruction of buffer */
Chris@82 298 cld = X(mkplan_f_d)(
Chris@82 299 plnr,
Chris@82 300 X(mkproblem_rdft2_d)(
Chris@82 301 X(mktensor_1d)(n, 2, p->sz->dims[0].os),
Chris@82 302 X(mktensor_1d)(nbuf, bufdist, ovs),
Chris@82 303 TAINT(p->r0, ovs * nbuf), TAINT(p->r1, ovs * nbuf),
Chris@82 304 bufs + roffset, bufs + ioffset, p->kind),
Chris@82 305 0, 0, NO_DESTROY_INPUT);
Chris@82 306 if (!cld) goto nada;
Chris@82 307
Chris@82 308 /* copying input into buffer is a rank-0 DFT: */
Chris@82 309 cldcpy = X(mkplan_d)(
Chris@82 310 plnr,
Chris@82 311 X(mkproblem_dft_d)(
Chris@82 312 X(mktensor_0d)(),
Chris@82 313 X(mktensor_2d)(nbuf, ivs, bufdist,
Chris@82 314 n/2+1, p->sz->dims[0].is, 2),
Chris@82 315 TAINT(p->cr, ivs * nbuf), TAINT(p->ci, ivs * nbuf),
Chris@82 316 bufs + roffset, bufs + ioffset));
Chris@82 317 if (!cldcpy) goto nada;
Chris@82 318
Chris@82 319 X(ifree)(bufs); bufs = 0;
Chris@82 320
Chris@82 321 cldrest = X(mkplan_d)(plnr,
Chris@82 322 X(mkproblem_rdft2_d)(
Chris@82 323 X(tensor_copy)(p->sz),
Chris@82 324 X(mktensor_1d)(vl % nbuf, ivs, ovs),
Chris@82 325 p->r0 + od, p->r1 + od,
Chris@82 326 p->cr + id, p->ci + id,
Chris@82 327 p->kind));
Chris@82 328 if (!cldrest) goto nada;
Chris@82 329
Chris@82 330 pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
Chris@82 331 }
Chris@82 332
Chris@82 333 pln->cld = cld;
Chris@82 334 pln->cldcpy = cldcpy;
Chris@82 335 pln->cldrest = cldrest;
Chris@82 336 pln->n = n;
Chris@82 337 pln->vl = vl;
Chris@82 338 pln->ivs_by_nbuf = ivs * nbuf;
Chris@82 339 pln->ovs_by_nbuf = ovs * nbuf;
Chris@82 340 pln->roffset = roffset;
Chris@82 341 pln->ioffset = ioffset;
Chris@82 342
Chris@82 343 pln->nbuf = nbuf;
Chris@82 344 pln->bufdist = bufdist;
Chris@82 345
Chris@82 346 {
Chris@82 347 opcnt t;
Chris@82 348 X(ops_add)(&cld->ops, &cldcpy->ops, &t);
Chris@82 349 X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
Chris@82 350 }
Chris@82 351
Chris@82 352 return &(pln->super.super);
Chris@82 353
Chris@82 354 nada:
Chris@82 355 X(ifree0)(bufs);
Chris@82 356 X(plan_destroy_internal)(cldrest);
Chris@82 357 X(plan_destroy_internal)(cldcpy);
Chris@82 358 X(plan_destroy_internal)(cld);
Chris@82 359 return (plan *) 0;
Chris@82 360 }
Chris@82 361
Chris@82 362 static solver *mksolver(size_t maxnbuf_ndx)
Chris@82 363 {
Chris@82 364 static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 };
Chris@82 365 S *slv = MKSOLVER(S, &sadt);
Chris@82 366 slv->maxnbuf_ndx = maxnbuf_ndx;
Chris@82 367 return &(slv->super);
Chris@82 368 }
Chris@82 369
Chris@82 370 void X(rdft2_buffered_register)(planner *p)
Chris@82 371 {
Chris@82 372 size_t i;
Chris@82 373 for (i = 0; i < NELEM(maxnbufs); ++i)
Chris@82 374 REGISTER_SOLVER(p, mksolver(i));
Chris@82 375 }