annotate src/fftw-3.3.5/threads/ct.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21
Chris@42 22 #include "threads.h"
Chris@42 23
Chris@42 24 typedef struct {
Chris@42 25 plan_dft super;
Chris@42 26 plan *cld;
Chris@42 27 plan **cldws;
Chris@42 28 int nthr;
Chris@42 29 INT r;
Chris@42 30 } P;
Chris@42 31
Chris@42 32 typedef struct {
Chris@42 33 plan **cldws;
Chris@42 34 R *r, *i;
Chris@42 35 } PD;
Chris@42 36
Chris@42 37 static void *spawn_apply(spawn_data *d)
Chris@42 38 {
Chris@42 39 PD *ego = (PD *) d->data;
Chris@42 40 INT thr_num = d->thr_num;
Chris@42 41
Chris@42 42 plan_dftw *cldw = (plan_dftw *) (ego->cldws[thr_num]);
Chris@42 43 cldw->apply((plan *) cldw, ego->r, ego->i);
Chris@42 44 return 0;
Chris@42 45 }
Chris@42 46
Chris@42 47 static void apply_dit(const plan *ego_, R *ri, R *ii, R *ro, R *io)
Chris@42 48 {
Chris@42 49 const P *ego = (const P *) ego_;
Chris@42 50 plan_dft *cld;
Chris@42 51
Chris@42 52 cld = (plan_dft *) ego->cld;
Chris@42 53 cld->apply(ego->cld, ri, ii, ro, io);
Chris@42 54
Chris@42 55 {
Chris@42 56 PD d;
Chris@42 57
Chris@42 58 d.r = ro; d.i = io;
Chris@42 59 d.cldws = ego->cldws;
Chris@42 60
Chris@42 61 X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d);
Chris@42 62 }
Chris@42 63 }
Chris@42 64
Chris@42 65 static void apply_dif(const plan *ego_, R *ri, R *ii, R *ro, R *io)
Chris@42 66 {
Chris@42 67 const P *ego = (const P *) ego_;
Chris@42 68 plan_dft *cld;
Chris@42 69
Chris@42 70 {
Chris@42 71 PD d;
Chris@42 72
Chris@42 73 d.r = ri; d.i = ii;
Chris@42 74 d.cldws = ego->cldws;
Chris@42 75
Chris@42 76 X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d);
Chris@42 77 }
Chris@42 78
Chris@42 79 cld = (plan_dft *) ego->cld;
Chris@42 80 cld->apply(ego->cld, ri, ii, ro, io);
Chris@42 81 }
Chris@42 82
Chris@42 83 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 84 {
Chris@42 85 P *ego = (P *) ego_;
Chris@42 86 int i;
Chris@42 87 X(plan_awake)(ego->cld, wakefulness);
Chris@42 88 for (i = 0; i < ego->nthr; ++i)
Chris@42 89 X(plan_awake)(ego->cldws[i], wakefulness);
Chris@42 90 }
Chris@42 91
Chris@42 92 static void destroy(plan *ego_)
Chris@42 93 {
Chris@42 94 P *ego = (P *) ego_;
Chris@42 95 int i;
Chris@42 96 X(plan_destroy_internal)(ego->cld);
Chris@42 97 for (i = 0; i < ego->nthr; ++i)
Chris@42 98 X(plan_destroy_internal)(ego->cldws[i]);
Chris@42 99 X(ifree)(ego->cldws);
Chris@42 100 }
Chris@42 101
Chris@42 102 static void print(const plan *ego_, printer *p)
Chris@42 103 {
Chris@42 104 const P *ego = (const P *) ego_;
Chris@42 105 int i;
Chris@42 106 p->print(p, "(dft-thr-ct-%s-x%d/%D",
Chris@42 107 ego->super.apply == apply_dit ? "dit" : "dif",
Chris@42 108 ego->nthr, ego->r);
Chris@42 109 for (i = 0; i < ego->nthr; ++i)
Chris@42 110 if (i == 0 || (ego->cldws[i] != ego->cldws[i-1] &&
Chris@42 111 (i <= 1 || ego->cldws[i] != ego->cldws[i-2])))
Chris@42 112 p->print(p, "%(%p%)", ego->cldws[i]);
Chris@42 113 p->print(p, "%(%p%))", ego->cld);
Chris@42 114 }
Chris@42 115
Chris@42 116 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 117 {
Chris@42 118 const ct_solver *ego = (const ct_solver *) ego_;
Chris@42 119 const problem_dft *p;
Chris@42 120 P *pln = 0;
Chris@42 121 plan *cld = 0, **cldws = 0;
Chris@42 122 INT n, r, m, v, ivs, ovs;
Chris@42 123 INT block_size;
Chris@42 124 int i, nthr, plnr_nthr_save;
Chris@42 125 iodim *d;
Chris@42 126
Chris@42 127 static const plan_adt padt = {
Chris@42 128 X(dft_solve), awake, print, destroy
Chris@42 129 };
Chris@42 130
Chris@42 131 if (plnr->nthr <= 1 || !X(ct_applicable)(ego, p_, plnr))
Chris@42 132 return (plan *) 0;
Chris@42 133
Chris@42 134 p = (const problem_dft *) p_;
Chris@42 135 d = p->sz->dims;
Chris@42 136 n = d[0].n;
Chris@42 137 r = X(choose_radix)(ego->r, n);
Chris@42 138 m = n / r;
Chris@42 139
Chris@42 140 X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs);
Chris@42 141
Chris@42 142 block_size = (m + plnr->nthr - 1) / plnr->nthr;
Chris@42 143 nthr = (int)((m + block_size - 1) / block_size);
Chris@42 144 plnr_nthr_save = plnr->nthr;
Chris@42 145 plnr->nthr = (plnr->nthr + nthr - 1) / nthr;
Chris@42 146
Chris@42 147 cldws = (plan **) MALLOC(sizeof(plan *) * nthr, PLANS);
Chris@42 148 for (i = 0; i < nthr; ++i) cldws[i] = (plan *) 0;
Chris@42 149
Chris@42 150 switch (ego->dec) {
Chris@42 151 case DECDIT:
Chris@42 152 {
Chris@42 153 for (i = 0; i < nthr; ++i) {
Chris@42 154 cldws[i] = ego->mkcldw(ego,
Chris@42 155 r, m * d[0].os, m * d[0].os,
Chris@42 156 m, d[0].os,
Chris@42 157 v, ovs, ovs,
Chris@42 158 i*block_size,
Chris@42 159 (i == nthr - 1) ?
Chris@42 160 (m - i*block_size) : block_size,
Chris@42 161 p->ro, p->io, plnr);
Chris@42 162 if (!cldws[i]) goto nada;
Chris@42 163 }
Chris@42 164
Chris@42 165 plnr->nthr = plnr_nthr_save;
Chris@42 166
Chris@42 167 cld = X(mkplan_d)(plnr,
Chris@42 168 X(mkproblem_dft_d)(
Chris@42 169 X(mktensor_1d)(m, r * d[0].is, d[0].os),
Chris@42 170 X(mktensor_2d)(r, d[0].is, m * d[0].os,
Chris@42 171 v, ivs, ovs),
Chris@42 172 p->ri, p->ii, p->ro, p->io)
Chris@42 173 );
Chris@42 174 if (!cld) goto nada;
Chris@42 175
Chris@42 176 pln = MKPLAN_DFT(P, &padt, apply_dit);
Chris@42 177 break;
Chris@42 178 }
Chris@42 179 case DECDIF:
Chris@42 180 case DECDIF+TRANSPOSE:
Chris@42 181 {
Chris@42 182 INT cors, covs; /* cldw ors, ovs */
Chris@42 183 if (ego->dec == DECDIF+TRANSPOSE) {
Chris@42 184 cors = ivs;
Chris@42 185 covs = m * d[0].is;
Chris@42 186 /* ensure that we generate well-formed dftw subproblems */
Chris@42 187 /* FIXME: too conservative */
Chris@42 188 if (!(1
Chris@42 189 && r == v
Chris@42 190 && d[0].is == r * cors))
Chris@42 191 goto nada;
Chris@42 192
Chris@42 193 /* FIXME: allow in-place only for now, like in
Chris@42 194 fftw-3.[01] */
Chris@42 195 if (!(1
Chris@42 196 && p->ri == p->ro
Chris@42 197 && d[0].is == r * d[0].os
Chris@42 198 && cors == d[0].os
Chris@42 199 && covs == ovs
Chris@42 200 ))
Chris@42 201 goto nada;
Chris@42 202 } else {
Chris@42 203 cors = m * d[0].is;
Chris@42 204 covs = ivs;
Chris@42 205 }
Chris@42 206
Chris@42 207 for (i = 0; i < nthr; ++i) {
Chris@42 208 cldws[i] = ego->mkcldw(ego,
Chris@42 209 r, m * d[0].is, cors,
Chris@42 210 m, d[0].is,
Chris@42 211 v, ivs, covs,
Chris@42 212 i*block_size,
Chris@42 213 (i == nthr - 1) ?
Chris@42 214 (m - i*block_size) : block_size,
Chris@42 215 p->ri, p->ii, plnr);
Chris@42 216 if (!cldws[i]) goto nada;
Chris@42 217 }
Chris@42 218
Chris@42 219 plnr->nthr = plnr_nthr_save;
Chris@42 220
Chris@42 221 cld = X(mkplan_d)(plnr,
Chris@42 222 X(mkproblem_dft_d)(
Chris@42 223 X(mktensor_1d)(m, d[0].is, r * d[0].os),
Chris@42 224 X(mktensor_2d)(r, cors, d[0].os,
Chris@42 225 v, covs, ovs),
Chris@42 226 p->ri, p->ii, p->ro, p->io)
Chris@42 227 );
Chris@42 228 if (!cld) goto nada;
Chris@42 229
Chris@42 230 pln = MKPLAN_DFT(P, &padt, apply_dif);
Chris@42 231 break;
Chris@42 232 }
Chris@42 233
Chris@42 234 default: A(0);
Chris@42 235
Chris@42 236 }
Chris@42 237
Chris@42 238 pln->cld = cld;
Chris@42 239 pln->cldws = cldws;
Chris@42 240 pln->nthr = nthr;
Chris@42 241 pln->r = r;
Chris@42 242 X(ops_zero)(&pln->super.super.ops);
Chris@42 243 for (i = 0; i < nthr; ++i) {
Chris@42 244 X(ops_add2)(&cldws[i]->ops, &pln->super.super.ops);
Chris@42 245 pln->super.super.could_prune_now_p |= cldws[i]->could_prune_now_p;
Chris@42 246 }
Chris@42 247 X(ops_add2)(&cld->ops, &pln->super.super.ops);
Chris@42 248 return &(pln->super.super);
Chris@42 249
Chris@42 250 nada:
Chris@42 251 if (cldws) {
Chris@42 252 for (i = 0; i < nthr; ++i)
Chris@42 253 X(plan_destroy_internal)(cldws[i]);
Chris@42 254 X(ifree)(cldws);
Chris@42 255 }
Chris@42 256 X(plan_destroy_internal)(cld);
Chris@42 257 return (plan *) 0;
Chris@42 258 }
Chris@42 259
Chris@42 260 ct_solver *X(mksolver_ct_threads)(size_t size, INT r, int dec,
Chris@42 261 ct_mkinferior mkcldw,
Chris@42 262 ct_force_vrecursion force_vrecursionp)
Chris@42 263 {
Chris@42 264 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
Chris@42 265 ct_solver *slv = (ct_solver *) X(mksolver)(size, &sadt);
Chris@42 266 slv->r = r;
Chris@42 267 slv->dec = dec;
Chris@42 268 slv->mkcldw = mkcldw;
Chris@42 269 slv->force_vrecursionp = force_vrecursionp;
Chris@42 270 return slv;
Chris@42 271 }