annotate src/fftw-3.3.5/threads/hc2hc.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 #include "threads.h"
Chris@42 22
Chris@42 23 typedef struct {
Chris@42 24 plan_rdft super;
Chris@42 25 plan *cld;
Chris@42 26 plan **cldws;
Chris@42 27 int nthr;
Chris@42 28 INT r;
Chris@42 29 } P;
Chris@42 30
Chris@42 31 typedef struct {
Chris@42 32 plan **cldws;
Chris@42 33 R *IO;
Chris@42 34 } PD;
Chris@42 35
Chris@42 36 static void *spawn_apply(spawn_data *d)
Chris@42 37 {
Chris@42 38 PD *ego = (PD *) d->data;
Chris@42 39
Chris@42 40 plan_hc2hc *cldw = (plan_hc2hc *) (ego->cldws[d->thr_num]);
Chris@42 41 cldw->apply((plan *) cldw, ego->IO);
Chris@42 42 return 0;
Chris@42 43 }
Chris@42 44
Chris@42 45 static void apply_dit(const plan *ego_, R *I, R *O)
Chris@42 46 {
Chris@42 47 const P *ego = (const P *) ego_;
Chris@42 48 plan_rdft *cld;
Chris@42 49
Chris@42 50 cld = (plan_rdft *) ego->cld;
Chris@42 51 cld->apply((plan *) cld, I, O);
Chris@42 52
Chris@42 53 {
Chris@42 54 PD d;
Chris@42 55
Chris@42 56 d.IO = O;
Chris@42 57 d.cldws = ego->cldws;
Chris@42 58
Chris@42 59 X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d);
Chris@42 60 }
Chris@42 61 }
Chris@42 62
Chris@42 63 static void apply_dif(const plan *ego_, R *I, R *O)
Chris@42 64 {
Chris@42 65 const P *ego = (const P *) ego_;
Chris@42 66 plan_rdft *cld;
Chris@42 67
Chris@42 68 {
Chris@42 69 PD d;
Chris@42 70
Chris@42 71 d.IO = I;
Chris@42 72 d.cldws = ego->cldws;
Chris@42 73
Chris@42 74 X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d);
Chris@42 75 }
Chris@42 76
Chris@42 77 cld = (plan_rdft *) ego->cld;
Chris@42 78 cld->apply((plan *) cld, I, O);
Chris@42 79 }
Chris@42 80
Chris@42 81 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@42 82 {
Chris@42 83 P *ego = (P *) ego_;
Chris@42 84 int i;
Chris@42 85 X(plan_awake)(ego->cld, wakefulness);
Chris@42 86 for (i = 0; i < ego->nthr; ++i)
Chris@42 87 X(plan_awake)(ego->cldws[i], wakefulness);
Chris@42 88 }
Chris@42 89
Chris@42 90 static void destroy(plan *ego_)
Chris@42 91 {
Chris@42 92 P *ego = (P *) ego_;
Chris@42 93 int i;
Chris@42 94 X(plan_destroy_internal)(ego->cld);
Chris@42 95 for (i = 0; i < ego->nthr; ++i)
Chris@42 96 X(plan_destroy_internal)(ego->cldws[i]);
Chris@42 97 X(ifree)(ego->cldws);
Chris@42 98 }
Chris@42 99
Chris@42 100 static void print(const plan *ego_, printer *p)
Chris@42 101 {
Chris@42 102 const P *ego = (const P *) ego_;
Chris@42 103 int i;
Chris@42 104 p->print(p, "(rdft-thr-ct-%s-x%d/%D",
Chris@42 105 ego->super.apply == apply_dit ? "dit" : "dif",
Chris@42 106 ego->nthr, ego->r);
Chris@42 107 for (i = 0; i < ego->nthr; ++i)
Chris@42 108 if (i == 0 || (ego->cldws[i] != ego->cldws[i-1] &&
Chris@42 109 (i <= 1 || ego->cldws[i] != ego->cldws[i-2])))
Chris@42 110 p->print(p, "%(%p%)", ego->cldws[i]);
Chris@42 111 p->print(p, "%(%p%))", ego->cld);
Chris@42 112 }
Chris@42 113
Chris@42 114 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
Chris@42 115 {
Chris@42 116 const hc2hc_solver *ego = (const hc2hc_solver *) ego_;
Chris@42 117 const problem_rdft *p;
Chris@42 118 P *pln = 0;
Chris@42 119 plan *cld = 0, **cldws = 0;
Chris@42 120 INT n, r, m, v, ivs, ovs, mcount;
Chris@42 121 int i, nthr, plnr_nthr_save;
Chris@42 122 INT block_size;
Chris@42 123 iodim *d;
Chris@42 124
Chris@42 125 static const plan_adt padt = {
Chris@42 126 X(rdft_solve), awake, print, destroy
Chris@42 127 };
Chris@42 128
Chris@42 129 if (plnr->nthr <= 1 || !X(hc2hc_applicable)(ego, p_, plnr))
Chris@42 130 return (plan *) 0;
Chris@42 131
Chris@42 132 p = (const problem_rdft *) p_;
Chris@42 133 d = p->sz->dims;
Chris@42 134 n = d[0].n;
Chris@42 135 r = X(choose_radix)(ego->r, n);
Chris@42 136 m = n / r;
Chris@42 137 mcount = (m + 2) / 2;
Chris@42 138
Chris@42 139 X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs);
Chris@42 140
Chris@42 141 block_size = (mcount + plnr->nthr - 1) / plnr->nthr;
Chris@42 142 nthr = (int)((mcount + block_size - 1) / block_size);
Chris@42 143 plnr_nthr_save = plnr->nthr;
Chris@42 144 plnr->nthr = (plnr->nthr + nthr - 1) / nthr;
Chris@42 145
Chris@42 146 cldws = (plan **) MALLOC(sizeof(plan *) * nthr, PLANS);
Chris@42 147 for (i = 0; i < nthr; ++i) cldws[i] = (plan *) 0;
Chris@42 148
Chris@42 149 switch (p->kind[0]) {
Chris@42 150 case R2HC:
Chris@42 151 for (i = 0; i < nthr; ++i) {
Chris@42 152 cldws[i] = ego->mkcldw(ego,
Chris@42 153 R2HC, r, m, d[0].os, v, ovs,
Chris@42 154 i*block_size,
Chris@42 155 (i == nthr - 1) ?
Chris@42 156 (mcount - i*block_size) : block_size,
Chris@42 157 p->O, plnr);
Chris@42 158 if (!cldws[i]) goto nada;
Chris@42 159 }
Chris@42 160
Chris@42 161 plnr->nthr = plnr_nthr_save;
Chris@42 162
Chris@42 163 cld = X(mkplan_d)(plnr,
Chris@42 164 X(mkproblem_rdft_d)(
Chris@42 165 X(mktensor_1d)(m, r * d[0].is, d[0].os),
Chris@42 166 X(mktensor_2d)(r, d[0].is, m * d[0].os,
Chris@42 167 v, ivs, ovs),
Chris@42 168 p->I, p->O, p->kind)
Chris@42 169 );
Chris@42 170 if (!cld) goto nada;
Chris@42 171
Chris@42 172 pln = MKPLAN_RDFT(P, &padt, apply_dit);
Chris@42 173 break;
Chris@42 174
Chris@42 175 case HC2R:
Chris@42 176 for (i = 0; i < nthr; ++i) {
Chris@42 177 cldws[i] = ego->mkcldw(ego,
Chris@42 178 HC2R, r, m, d[0].is, v, ivs,
Chris@42 179 i*block_size,
Chris@42 180 (i == nthr - 1) ?
Chris@42 181 (mcount - i*block_size) : block_size,
Chris@42 182 p->I, plnr);
Chris@42 183 if (!cldws[i]) goto nada;
Chris@42 184 }
Chris@42 185
Chris@42 186 plnr->nthr = plnr_nthr_save;
Chris@42 187
Chris@42 188 cld = X(mkplan_d)(plnr,
Chris@42 189 X(mkproblem_rdft_d)(
Chris@42 190 X(mktensor_1d)(m, d[0].is, r * d[0].os),
Chris@42 191 X(mktensor_2d)(r, m * d[0].is, d[0].os,
Chris@42 192 v, ivs, ovs),
Chris@42 193 p->I, p->O, p->kind)
Chris@42 194 );
Chris@42 195 if (!cld) goto nada;
Chris@42 196
Chris@42 197 pln = MKPLAN_RDFT(P, &padt, apply_dif);
Chris@42 198 break;
Chris@42 199
Chris@42 200 default:
Chris@42 201 A(0);
Chris@42 202 }
Chris@42 203
Chris@42 204 pln->cld = cld;
Chris@42 205 pln->cldws = cldws;
Chris@42 206 pln->nthr = nthr;
Chris@42 207 pln->r = r;
Chris@42 208 X(ops_zero)(&pln->super.super.ops);
Chris@42 209 for (i = 0; i < nthr; ++i) {
Chris@42 210 X(ops_add2)(&cldws[i]->ops, &pln->super.super.ops);
Chris@42 211 pln->super.super.could_prune_now_p |= cldws[i]->could_prune_now_p;
Chris@42 212 }
Chris@42 213 X(ops_add2)(&cld->ops, &pln->super.super.ops);
Chris@42 214 return &(pln->super.super);
Chris@42 215
Chris@42 216 nada:
Chris@42 217 if (cldws) {
Chris@42 218 for (i = 0; i < nthr; ++i)
Chris@42 219 X(plan_destroy_internal)(cldws[i]);
Chris@42 220 X(ifree)(cldws);
Chris@42 221 }
Chris@42 222 X(plan_destroy_internal)(cld);
Chris@42 223 return (plan *) 0;
Chris@42 224 }
Chris@42 225
Chris@42 226 hc2hc_solver *X(mksolver_hc2hc_threads)(size_t size, INT r,
Chris@42 227 hc2hc_mkinferior mkcldw)
Chris@42 228 {
Chris@42 229 static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
Chris@42 230 hc2hc_solver *slv = (hc2hc_solver *)X(mksolver)(size, &sadt);
Chris@42 231 slv->r = r;
Chris@42 232 slv->mkcldw = mkcldw;
Chris@42 233 return slv;
Chris@42 234 }