annotate src/fftw-3.3.5/dft/dftw-genericbuf.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* express a twiddle problem in terms of dft + multiplication by
Chris@42 22 twiddle factors */
Chris@42 23
Chris@42 24 #include "ct.h"
Chris@42 25
Chris@42 26 typedef struct {
Chris@42 27 ct_solver super;
Chris@42 28 INT batchsz;
Chris@42 29 } S;
Chris@42 30
Chris@42 31 typedef struct {
Chris@42 32 plan_dftw super;
Chris@42 33
Chris@42 34 INT r, rs, m, ms, v, vs, mb, me;
Chris@42 35 INT batchsz;
Chris@42 36 plan *cld;
Chris@42 37
Chris@42 38 triggen *t;
Chris@42 39 const S *slv;
Chris@42 40 } P;
Chris@42 41
Chris@42 42
Chris@42 43 #define BATCHDIST(r) ((r) + 16)
Chris@42 44
Chris@42 45 /**************************************************************/
Chris@42 46 static void bytwiddle(const P *ego, INT mb, INT me, R *buf, R *rio, R *iio)
Chris@42 47 {
Chris@42 48 INT j, k;
Chris@42 49 INT r = ego->r, rs = ego->rs, ms = ego->ms;
Chris@42 50 triggen *t = ego->t;
Chris@42 51 for (j = 0; j < r; ++j) {
Chris@42 52 for (k = mb; k < me; ++k)
Chris@42 53 t->rotate(t, j * k,
Chris@42 54 rio[j * rs + k * ms],
Chris@42 55 iio[j * rs + k * ms],
Chris@42 56 &buf[j * 2 + 2 * BATCHDIST(r) * (k - mb) + 0]);
Chris@42 57 }
Chris@42 58 }
Chris@42 59
Chris@42 60 static int applicable0(const S *ego,
Chris@42 61 INT r, INT irs, INT ors,
Chris@42 62 INT m, INT v,
Chris@42 63 INT mcount)
Chris@42 64 {
Chris@42 65 return (1
Chris@42 66 && v == 1
Chris@42 67 && irs == ors
Chris@42 68 && mcount >= ego->batchsz
Chris@42 69 && mcount % ego->batchsz == 0
Chris@42 70 && r >= 64
Chris@42 71 && m >= r
Chris@42 72 );
Chris@42 73 }
Chris@42 74
Chris@42 75 static int applicable(const S *ego,
Chris@42 76 INT r, INT irs, INT ors,
Chris@42 77 INT m, INT v,
Chris@42 78 INT mcount,
Chris@42 79 const planner *plnr)
Chris@42 80 {
Chris@42 81 if (!applicable0(ego, r, irs, ors, m, v, mcount))
Chris@42 82 return 0;
Chris@42 83 if (NO_UGLYP(plnr) && m * r < 65536)
Chris@42 84 return 0;
Chris@42 85
Chris@42 86 return 1;
Chris@42 87 }
Chris@42 88
Chris@42 89 static void dobatch(const P *ego, INT mb, INT me, R *buf, R *rio, R *iio)
Chris@42 90 {
Chris@42 91 plan_dft *cld;
Chris@42 92 INT ms = ego->ms;
Chris@42 93
Chris@42 94 bytwiddle(ego, mb, me, buf, rio, iio);
Chris@42 95
Chris@42 96 cld = (plan_dft *) ego->cld;
Chris@42 97 cld->apply(ego->cld, buf, buf + 1, buf, buf + 1);
Chris@42 98 X(cpy2d_pair_co)(buf, buf + 1,
Chris@42 99 rio + ms * mb, iio + ms * mb,
Chris@42 100 me-mb, 2 * BATCHDIST(ego->r), ms,
Chris@42 101 ego->r, 2, ego->rs);
Chris@42 102 }
Chris@42 103
Chris@42 104 static void apply(const plan *ego_, R *rio, R *iio)
Chris@42 105 {
Chris@42 106 const P *ego = (const P *) ego_;
Chris@42 107 R *buf = (R *) MALLOC(sizeof(R) * 2 * BATCHDIST(ego->r) * ego->batchsz,
Chris@42 108 BUFFERS);
Chris@42 109 INT m;
Chris@42 110
Chris@42 111 for (m = ego->mb; m < ego->me; m += ego->batchsz)
Chris@42 112 dobatch(ego, m, m + ego->batchsz, buf, rio, iio);
Chris@42 113
Chris@42 114 A(m == ego->me);
Chris@42 115
Chris@42 116 X(ifree)(buf);
Chris@42 117 }
Chris@42 118
Chris@42 119 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 120 {
Chris@42 121 P *ego = (P *) ego_;
Chris@42 122 X(plan_awake)(ego->cld, wakefulness);
Chris@42 123
Chris@42 124 switch (wakefulness) {
Chris@42 125 case SLEEPY:
Chris@42 126 X(triggen_destroy)(ego->t); ego->t = 0;
Chris@42 127 break;
Chris@42 128 default:
Chris@42 129 ego->t = X(mktriggen)(AWAKE_SQRTN_TABLE, ego->r * ego->m);
Chris@42 130 break;
Chris@42 131 }
Chris@42 132 }
Chris@42 133
Chris@42 134 static void destroy(plan *ego_)
Chris@42 135 {
Chris@42 136 P *ego = (P *) ego_;
Chris@42 137 X(plan_destroy_internal)(ego->cld);
Chris@42 138 }
Chris@42 139
Chris@42 140 static void print(const plan *ego_, printer *p)
Chris@42 141 {
Chris@42 142 const P *ego = (const P *) ego_;
Chris@42 143 p->print(p, "(dftw-genericbuf/%D-%D-%D%(%p%))",
Chris@42 144 ego->batchsz, ego->r, ego->m, ego->cld);
Chris@42 145 }
Chris@42 146
Chris@42 147 static plan *mkcldw(const ct_solver *ego_,
Chris@42 148 INT r, INT irs, INT ors,
Chris@42 149 INT m, INT ms,
Chris@42 150 INT v, INT ivs, INT ovs,
Chris@42 151 INT mstart, INT mcount,
Chris@42 152 R *rio, R *iio,
Chris@42 153 planner *plnr)
Chris@42 154 {
Chris@42 155 const S *ego = (const S *)ego_;
Chris@42 156 P *pln;
Chris@42 157 plan *cld = 0;
Chris@42 158 R *buf;
Chris@42 159
Chris@42 160 static const plan_adt padt = {
Chris@42 161 0, awake, print, destroy
Chris@42 162 };
Chris@42 163
Chris@42 164 UNUSED(ivs); UNUSED(ovs); UNUSED(rio); UNUSED(iio);
Chris@42 165
Chris@42 166 A(mstart >= 0 && mstart + mcount <= m);
Chris@42 167 if (!applicable(ego, r, irs, ors, m, v, mcount, plnr))
Chris@42 168 return (plan *)0;
Chris@42 169
Chris@42 170 buf = (R *) MALLOC(sizeof(R) * 2 * BATCHDIST(r) * ego->batchsz, BUFFERS);
Chris@42 171 cld = X(mkplan_d)(plnr,
Chris@42 172 X(mkproblem_dft_d)(
Chris@42 173 X(mktensor_1d)(r, 2, 2),
Chris@42 174 X(mktensor_1d)(ego->batchsz,
Chris@42 175 2 * BATCHDIST(r),
Chris@42 176 2 * BATCHDIST(r)),
Chris@42 177 buf, buf + 1, buf, buf + 1
Chris@42 178 )
Chris@42 179 );
Chris@42 180 X(ifree)(buf);
Chris@42 181 if (!cld) goto nada;
Chris@42 182
Chris@42 183 pln = MKPLAN_DFTW(P, &padt, apply);
Chris@42 184 pln->slv = ego;
Chris@42 185 pln->cld = cld;
Chris@42 186 pln->r = r;
Chris@42 187 pln->m = m;
Chris@42 188 pln->ms = ms;
Chris@42 189 pln->rs = irs;
Chris@42 190 pln->batchsz = ego->batchsz;
Chris@42 191 pln->mb = mstart;
Chris@42 192 pln->me = mstart + mcount;
Chris@42 193
Chris@42 194 {
Chris@42 195 double n0 = (r - 1) * (mcount - 1);
Chris@42 196 pln->super.super.ops = cld->ops;
Chris@42 197 pln->super.super.ops.mul += 8 * n0;
Chris@42 198 pln->super.super.ops.add += 4 * n0;
Chris@42 199 pln->super.super.ops.other += 8 * n0;
Chris@42 200 }
Chris@42 201 return &(pln->super.super);
Chris@42 202
Chris@42 203 nada:
Chris@42 204 X(plan_destroy_internal)(cld);
Chris@42 205 return (plan *) 0;
Chris@42 206 }
Chris@42 207
Chris@42 208 static void regsolver(planner *plnr, INT r, INT batchsz)
Chris@42 209 {
Chris@42 210 S *slv = (S *)X(mksolver_ct)(sizeof(S), r, DECDIT, mkcldw, 0);
Chris@42 211 slv->batchsz = batchsz;
Chris@42 212 REGISTER_SOLVER(plnr, &(slv->super.super));
Chris@42 213
Chris@42 214 if (X(mksolver_ct_hook)) {
Chris@42 215 slv = (S *)X(mksolver_ct_hook)(sizeof(S), r, DECDIT, mkcldw, 0);
Chris@42 216 slv->batchsz = batchsz;
Chris@42 217 REGISTER_SOLVER(plnr, &(slv->super.super));
Chris@42 218 }
Chris@42 219
Chris@42 220 }
Chris@42 221
Chris@42 222 void X(ct_genericbuf_register)(planner *p)
Chris@42 223 {
Chris@42 224 static const INT radices[] = { -1, -2, -4, -8, -16, -32, -64 };
Chris@42 225 static const INT batchsizes[] = { 4, 8, 16, 32, 64 };
Chris@42 226 unsigned i, j;
Chris@42 227
Chris@42 228 for (i = 0; i < sizeof(radices) / sizeof(radices[0]); ++i)
Chris@42 229 for (j = 0; j < sizeof(batchsizes) / sizeof(batchsizes[0]); ++j)
Chris@42 230 regsolver(p, radices[i], batchsizes[j]);
Chris@42 231 }