d@0: /* d@0: * Copyright (c) 2007 Massachusetts Institute of Technology d@0: * d@0: * This program is free software; you can redistribute it and/or modify d@0: * it under the terms of the GNU General Public License as published by d@0: * the Free Software Foundation; either version 2 of the License, or d@0: * (at your option) any later version. d@0: * d@0: * This program is distributed in the hope that it will be useful, d@0: * but WITHOUT ANY WARRANTY; without even the implied warranty of d@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the d@0: * GNU General Public License for more details. d@0: * d@0: * You should have received a copy of the GNU General Public License d@0: * along with this program; if not, write to the Free Software d@0: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA d@0: * d@0: */ d@0: d@0: /* direct DFT solver via cell library */ d@0: d@0: #include "dft.h" d@0: #include "ct.h" d@0: d@0: #if HAVE_CELL d@0: d@0: #include "simd.h" d@0: #include "fftw-cell.h" d@0: d@0: typedef struct { d@0: solver super; d@0: int cutdim; d@0: } S; d@0: d@0: typedef struct { d@0: plan_dft super; d@0: struct spu_radices radices; d@0: /* strides expressed in reals */ d@0: INT n, is, os; d@0: struct cell_iodim v[2]; d@0: int cutdim; d@0: int sign; d@0: int Wsz; d@0: R *W; d@0: d@0: /* optional twiddle factors for dftw: */ d@0: INT rw, mw; /* rw == 0 indicates no twiddle factors */ d@0: twid *td; d@0: } P; d@0: d@0: d@0: /* op counts of SPU codelets */ d@0: static const opcnt n_ops[33] = { d@0: [2] = {2, 0, 0, 0}, d@0: [3] = {3, 1, 3, 0}, d@0: [4] = {6, 0, 2, 0}, d@0: [5] = {7, 2, 9, 0}, d@0: [6] = {12, 2, 6, 0}, d@0: [7] = {9, 3, 21, 0}, d@0: [8] = {16, 0, 10, 0}, d@0: [9] = {12, 4, 34, 0}, d@0: [10] = {24, 4, 18, 0}, d@0: [11] = {15, 5, 55, 0}, d@0: [12] = {30, 2, 18, 0}, d@0: [13] = {31, 6, 57, 0}, d@0: [14] = {32, 6, 42, 0}, d@0: [15] = {36, 7, 42, 0}, d@0: [16] = {38, 0, 34, 0}, d@0: [32] = {88, 0, 98, 0}, d@0: }; d@0: d@0: static const opcnt t_ops[33] = { d@0: [2] = {3, 2, 0, 0}, d@0: [3] = {5, 5, 3, 0}, d@0: [4] = {9, 6, 2, 0}, d@0: [5] = {11, 10, 9, 0}, d@0: [6] = {17, 12, 6, 0}, d@0: [7] = {15, 15, 21, 0}, d@0: [8] = {23, 14, 10, 0}, d@0: [9] = {20, 20, 34, 0}, d@0: [10] = {33, 22, 18, 0}, d@0: [12] = {41, 24, 18, 0}, d@0: [15] = {50, 35, 42, 0}, d@0: [16] = {53, 30, 34, 0}, d@0: [32] = {119, 62, 98, 0}, d@0: }; d@0: d@0: static void compute_opcnt(const struct spu_radices *p, d@0: INT n, INT v, opcnt *ops) d@0: { d@0: INT r; d@0: signed char *q; d@0: d@0: X(ops_zero)(ops); d@0: d@0: for (q = p->r; (r = *q) > 0; ++q) d@0: X(ops_madd2)(v * (n / r) / VL, &t_ops[r], ops); d@0: d@0: X(ops_madd2)(v * (n / (-r)) / VL, &n_ops[-r], ops); d@0: } d@0: d@0: static INT extent(struct cell_iodim *d) d@0: { d@0: return d->n1 - d->n0; d@0: } d@0: d@0: /* FIXME: this is totally broken */ d@0: static void cost_model(const P *pln, opcnt *ops) d@0: { d@0: INT r = pln->n; d@0: INT v0 = extent(pln->v + 0); d@0: INT v1 = extent(pln->v + 1); d@0: d@0: compute_opcnt(&pln->radices, r, v0 * v1, ops); d@0: d@0: /* penalize cuts across short dimensions */ d@0: if (extent(pln->v + pln->cutdim) < extent(pln->v + 1 - pln->cutdim)) d@0: ops->other += 3.14159; d@0: } d@0: d@0: /* expressed in real numbers */ d@0: static INT compute_twiddle_size(const struct spu_radices *p, INT n) d@0: { d@0: INT sz = 0; d@0: INT r; d@0: signed char *q; d@0: d@0: for (q = p->r; (r = *q) > 0; ++q) { d@0: n /= r; d@0: sz += 2 * (r - 1) * n; d@0: } d@0: d@0: return sz; d@0: } d@0: d@0: /* FIXME: find a way to use the normal FFTW twiddle mechanisms for this */ d@0: static void fill_twiddles(enum wakefulness wakefulness, d@0: R *W, const signed char *q, INT n) d@0: { d@0: INT r; d@0: d@0: for ( ; (r = *q) > 0; ++q) { d@0: triggen *t = X(mktriggen)(wakefulness, n); d@0: INT i, j, v, m = n / r; d@0: d@0: for (j = 0; j < m; j += VL) { d@0: for (i = 1; i < r; ++i) { d@0: for (v = 0; v < VL; ++v) { d@0: t->cexp(t, i * (j + v), W); d@0: W += 2; d@0: } d@0: } d@0: } d@0: X(triggen_destroy)(t); d@0: n = m; d@0: } d@0: } d@0: d@0: static R *make_twiddles(enum wakefulness wakefulness, d@0: const struct spu_radices *p, INT n, int *Wsz) d@0: { d@0: INT sz = compute_twiddle_size(p, n); d@0: R *W = X(cell_aligned_malloc)(sz * sizeof(R)); d@0: A(FITS_IN_INT(sz)); d@0: *Wsz = sz; d@0: fill_twiddles(wakefulness, W, p->r, n); d@0: return W; d@0: } d@0: d@0: static int fits_in_local_store(INT n, INT v) d@0: { d@0: /* the SPU has space for 3 * MAX_N complex numbers. We need d@0: n*(v+1) for data plus n for twiddle factors. */ d@0: return n * (v+2) <= 3 * MAX_N; d@0: } d@0: d@0: static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) d@0: { d@0: const P *ego = (const P *) ego_; d@0: R *xi, *xo; d@0: int i, v; d@0: int nspe = X(cell_nspe)(); d@0: int cutdim = ego->cutdim; d@0: int contiguous_r = ((ego->is == 2) && (ego->os == 2)); d@0: d@0: /* find pointer to beginning of data, depending on sign */ d@0: if (ego->sign == FFT_SIGN) { d@0: xi = ri; xo = ro; d@0: } else { d@0: xi = ii; xo = io; d@0: } d@0: d@0: /* fill contexts */ d@0: v = ego->v[cutdim].n1; d@0: d@0: for (i = 0; i < nspe; ++i) { d@0: int chunk; d@0: struct spu_context *ctx = X(cell_get_ctx)(i); d@0: struct dft_context *dft = &ctx->u.dft; d@0: d@0: ctx->op = FFTW_SPE_DFT; d@0: d@0: dft->r = ego->radices; d@0: dft->n = ego->n; d@0: dft->is_bytes = ego->is * sizeof(R); d@0: dft->os_bytes = ego->os * sizeof(R); d@0: dft->v[0] = ego->v[0]; d@0: dft->v[1] = ego->v[1]; d@0: dft->sign = ego->sign; d@0: A(FITS_IN_INT(ego->Wsz * sizeof(R))); d@0: dft->Wsz_bytes = ego->Wsz * sizeof(R); d@0: dft->W = (uintptr_t)ego->W; d@0: dft->xi = (uintptr_t)xi; d@0: dft->xo = (uintptr_t)xo; d@0: d@0: /* partition v into pieces of equal size, subject to alignment d@0: constraints */ d@0: if (cutdim == 0 && !contiguous_r) { d@0: /* CUTDIM = 0 and the SPU uses transposed DMA. d@0: We must preserve the alignment of the dimension 0 in the d@0: cut */ d@0: chunk = VL * ((v - ego->v[cutdim].n0) / (VL * (nspe - i))); d@0: } else { d@0: chunk = (v - ego->v[cutdim].n0) / (nspe - i); d@0: } d@0: d@0: dft->v[cutdim].n1 = v; d@0: v -= chunk; d@0: dft->v[cutdim].n0 = v; d@0: d@0: /* optional dftw twiddles */ d@0: if (ego->rw) d@0: dft->Ww = (uintptr_t)ego->td->W; d@0: } d@0: d@0: A(v == ego->v[cutdim].n0); d@0: d@0: /* activate spe's */ d@0: X(cell_spe_awake_all)(); d@0: d@0: /* wait for completion */ d@0: X(cell_spe_wait_all)(); d@0: } d@0: d@0: static void print(const plan *ego_, printer *p) d@0: { d@0: const P *ego = (const P *) ego_; d@0: int i; d@0: p->print(p, "(dft-direct-cell-%D/%d", ego->n, ego->cutdim); d@0: for (i = 0; i < 2; ++i) d@0: p->print(p, "%v", (INT)ego->v[i].n1); d@0: p->print(p, ")"); d@0: } d@0: d@0: static void awake(plan *ego_, enum wakefulness wakefulness) d@0: { d@0: P *ego = (P *) ego_; d@0: d@0: /* awake the optional dftw twiddles */ d@0: if (ego->rw) { d@0: static const tw_instr tw[] = { d@0: { TW_CEXP, 0, 0 }, d@0: { TW_FULL, 0, 0 }, d@0: { TW_NEXT, 1, 0 } d@0: }; d@0: X(twiddle_awake)(wakefulness, &ego->td, tw, d@0: ego->rw * ego->mw, ego->rw, ego->mw); d@0: } d@0: d@0: /* awake the twiddles for the dft part */ d@0: switch (wakefulness) { d@0: case SLEEPY: d@0: free(ego->W); d@0: ego->W = 0; d@0: break; d@0: default: d@0: ego->W = make_twiddles(wakefulness, &ego->radices, d@0: ego->n, &ego->Wsz); d@0: break; d@0: } d@0: } d@0: d@0: static int contiguous_or_aligned_p(INT s_bytes) d@0: { d@0: return (s_bytes == 2 * sizeof(R)) || ((s_bytes % ALIGNMENTA) == 0); d@0: } d@0: d@0: static int build_vdim(int inplacep, d@0: INT r, INT irs, INT ors, d@0: INT m, INT ims, INT oms, int dm, d@0: INT v, INT ivs, INT ovs, d@0: struct cell_iodim vd[2], int cutdim) d@0: { d@0: int vm, vv; d@0: int contiguous_r = ((irs == 2) && (ors == 2)); d@0: d@0: /* 32-bit overflow? */ d@0: if (!(1 d@0: && FITS_IN_INT(r) d@0: && FITS_IN_INT(irs * sizeof(R)) d@0: && FITS_IN_INT(ors * sizeof(R)) d@0: && FITS_IN_INT(m) d@0: && FITS_IN_INT(ims * sizeof(R)) d@0: && FITS_IN_INT(oms * sizeof(R)) d@0: && FITS_IN_INT(v) d@0: && FITS_IN_INT(ivs * sizeof(R)) d@0: && FITS_IN_INT(ovs * sizeof(R)))) d@0: return 0; d@0: d@0: /* R dimension must be aligned in all cases */ d@0: if (!(1 d@0: && r % VL == 0 /* REDUNDANT */ d@0: && contiguous_or_aligned_p(irs * sizeof(R)) d@0: && contiguous_or_aligned_p(ors * sizeof(R)))) d@0: return 0; d@0: d@0: if ((irs == 2 || ims == 2) && (ors == 2 || oms == 2)) { d@0: /* Case 1: in SPU, let N=R, V0=M, V1=V */ d@0: vm = 0; d@0: vv = 1; d@0: } else if ((irs == 2 || ivs == 2) && (ors == 2 || ovs == 2)) { d@0: /* Case 2: in SPU, let N=R, V0=V, V1=M */ d@0: vm = 1; d@0: vv = 0; d@0: } else { d@0: /* can't do it */ d@0: return 0; d@0: } d@0: d@0: vd[vm].n0 = 0; vd[vm].n1 = m; d@0: vd[vm].is_bytes = ims * sizeof(R); vd[vm].os_bytes = oms * sizeof(R); d@0: vd[vm].dm = dm; d@0: d@0: vd[vv].n0 = 0; vd[vv].n1 = v; d@0: vd[vv].is_bytes = ivs * sizeof(R); vd[vv].os_bytes = ovs * sizeof(R); d@0: vd[vv].dm = 0; d@0: d@0: /* Restrictions on the size of the SPU local store: */ d@0: if (!(0 d@0: /* for contiguous I/O, one array of size R must fit into d@0: local store. (The fits_in_local_store() check is d@0: redundant because R <= MAX_N holds, but we check anyway d@0: for clarity */ d@0: || (contiguous_r && fits_in_local_store(r, 1)) d@0: d@0: /* for noncontiguous I/O, VL arrays of size R must fit into d@0: local store because of transposed DMA */ d@0: || fits_in_local_store(r, VL))) d@0: return 0; d@0: d@0: /* SPU DMA restrictions: */ d@0: if (!(1 d@0: /* If R is noncontiguous, then the SPU uses transposed DMA d@0: and therefore dimension 0 must be aligned */ d@0: && (contiguous_r || vd[0].n1 % VL == 0) d@0: d@0: /* dimension 1 is arbitrary */ d@0: d@0: /* dimension-0 strides must be either contiguous or aligned */ d@0: && contiguous_or_aligned_p((INT)vd[0].is_bytes) d@0: && contiguous_or_aligned_p((INT)vd[0].os_bytes) d@0: d@0: /* dimension-1 strides must be aligned */ d@0: && ((vd[1].is_bytes % ALIGNMENTA) == 0) d@0: && ((vd[1].os_bytes % ALIGNMENTA) == 0) d@0: )) d@0: return 0; d@0: d@0: /* see if we can do it without overwriting the input with itself */ d@0: if (!(0 d@0: /* can operate out-of-place */ d@0: || !inplacep d@0: d@0: /* all strides are in-place */ d@0: || (1 d@0: && irs == ors d@0: && ims == oms d@0: && ivs == ovs) d@0: d@0: /* we cut across in-place dimension 1, and dimension 0 fits d@0: into local store */ d@0: || (1 d@0: && cutdim == 1 d@0: && vd[cutdim].is_bytes == vd[cutdim].os_bytes d@0: && fits_in_local_store(r, extent(vd + 0))) d@0: )) d@0: return 0; d@0: d@0: return 1; d@0: } d@0: d@0: static d@0: const struct spu_radices *find_radices(R *ri, R *ii, R *ro, R *io, d@0: INT n, int *sign) d@0: { d@0: const struct spu_radices *p; d@0: R *xi, *xo; d@0: d@0: /* 32-bit overflow? */ d@0: if (!FITS_IN_INT(n)) d@0: return 0; d@0: d@0: /* valid n? */ d@0: if (n <= 0 || n > MAX_N || ((n % REQUIRE_N_MULTIPLE_OF) != 0)) d@0: return 0; d@0: d@0: /* see if we have a plan for this N */ d@0: p = X(spu_radices) + n / REQUIRE_N_MULTIPLE_OF; d@0: if (!p->r[0]) d@0: return 0; d@0: d@0: /* check whether the data format is supported */ d@0: if (ii == ri + 1 && io == ro + 1) { /* R I R I ... format */ d@0: *sign = FFT_SIGN; d@0: xi = ri; xo = ro; d@0: } else if (ri == ii + 1 && ro == io + 1) { /* I R I R ... format */ d@0: *sign = -FFT_SIGN; d@0: xi = ii; xo = io; d@0: } else d@0: return 0; /* can't do it */ d@0: d@0: if (!ALIGNEDA(xi) || !ALIGNEDA(xo)) d@0: return 0; d@0: d@0: return p; d@0: } d@0: d@0: static const plan_adt padt = { d@0: X(dft_solve), awake, print, X(plan_null_destroy) d@0: }; d@0: d@0: static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) d@0: { d@0: P *pln; d@0: const S *ego = (const S *)ego_; d@0: const problem_dft *p = (const problem_dft *) p_; d@0: int sign; d@0: const struct spu_radices *radices; d@0: struct cell_iodim vd[2]; d@0: INT m, ims, oms, v, ivs, ovs; d@0: d@0: /* basic conditions */ d@0: if (!(1 d@0: && X(cell_nspe)() > 0 d@0: && p->sz->rnk == 1 d@0: && p->vecsz->rnk <= 2 d@0: && !NO_SIMDP(plnr) d@0: )) d@0: return 0; d@0: d@0: /* see if SPU supports N */ d@0: { d@0: iodim *d = p->sz->dims; d@0: radices = find_radices(p->ri, p->ii, p->ro, p->io, d[0].n, &sign); d@0: if (!radices) d@0: return 0; d@0: } d@0: d@0: /* canonicalize to vrank 2 */ d@0: if (p->vecsz->rnk >= 1) { d@0: iodim *d = p->vecsz->dims + 0; d@0: m = d->n; ims = d->is; oms = d->os; d@0: } else { d@0: m = 1; ims = oms = 0; d@0: } d@0: d@0: if (p->vecsz->rnk >= 2) { d@0: iodim *d = p->vecsz->dims + 1; d@0: v = d->n; ivs = d->is; ovs = d->os; d@0: } else { d@0: v = 1; ivs = ovs = 0; d@0: } d@0: d@0: /* see if strides are supported by the SPU DMA routine */ d@0: { d@0: iodim *d = p->sz->dims + 0; d@0: if (!build_vdim(p->ri == p->ro, d@0: d->n, d->is, d->os, d@0: m, ims, oms, 0, d@0: v, ivs, ovs, d@0: vd, ego->cutdim)) d@0: return 0; d@0: } d@0: d@0: pln = MKPLAN_DFT(P, &padt, apply); d@0: d@0: pln->radices = *radices; d@0: { d@0: iodim *d = p->sz->dims + 0; d@0: pln->n = d[0].n; d@0: pln->is = d[0].is; d@0: pln->os = d[0].os; d@0: } d@0: pln->sign = sign; d@0: pln->v[0] = vd[0]; d@0: pln->v[1] = vd[1]; d@0: pln->cutdim = ego->cutdim; d@0: pln->W = 0; d@0: d@0: pln->rw = 0; d@0: d@0: cost_model(pln, &pln->super.super.ops); d@0: d@0: return &(pln->super.super); d@0: } d@0: d@0: static void solver_destroy(solver *ego) d@0: { d@0: UNUSED(ego); d@0: X(cell_deactivate_spes)(); d@0: } d@0: d@0: static solver *mksolver(int cutdim) d@0: { d@0: static const solver_adt sadt = { PROBLEM_DFT, mkplan, solver_destroy }; d@0: S *slv = MKSOLVER(S, &sadt); d@0: slv->cutdim = cutdim; d@0: X(cell_activate_spes)(); d@0: return &(slv->super); d@0: } d@0: d@0: void X(dft_direct_cell_register)(planner *p) d@0: { d@0: REGISTER_SOLVER(p, mksolver(0)); d@0: REGISTER_SOLVER(p, mksolver(1)); d@0: } d@0: d@0: /**************************************************************/ d@0: /* solvers with twiddle factors: */ d@0: d@0: typedef struct { d@0: plan_dftw super; d@0: plan *cld; d@0: } Pw; d@0: d@0: typedef struct { d@0: ct_solver super; d@0: int cutdim; d@0: } Sw; d@0: d@0: static void destroyw(plan *ego_) d@0: { d@0: Pw *ego = (Pw *) ego_; d@0: X(plan_destroy_internal)(ego->cld); d@0: } d@0: d@0: static void printw(const plan *ego_, printer *p) d@0: { d@0: const Pw *ego = (const Pw *) ego_; d@0: const P *cld = (const P *) ego->cld; d@0: p->print(p, "(dftw-direct-cell-%D-%D%v%(%p%))", d@0: cld->rw, cld->mw, cld->v[1].n1, d@0: ego->cld); d@0: } d@0: d@0: static void awakew(plan *ego_, enum wakefulness wakefulness) d@0: { d@0: Pw *ego = (Pw *) ego_; d@0: X(plan_awake)(ego->cld, wakefulness); d@0: } d@0: d@0: static void applyw(const plan *ego_, R *rio, R *iio) d@0: { d@0: const Pw *ego = (const Pw *) ego_; d@0: dftapply cldapply = ((plan_dft *) ego->cld)->apply; d@0: cldapply(ego->cld, rio, iio, rio, iio); d@0: } d@0: d@0: static plan *mkcldw(const ct_solver *ego_, d@0: INT r, INT irs, INT ors, d@0: INT m, INT ms, d@0: INT v, INT ivs, INT ovs, d@0: INT mstart, INT mcount, d@0: R *rio, R *iio, d@0: planner *plnr) d@0: { d@0: const Sw *ego = (const Sw *)ego_; d@0: const struct spu_radices *radices; d@0: int sign; d@0: Pw *pln; d@0: P *cld; d@0: struct cell_iodim vd[2]; d@0: int dm = 0; d@0: d@0: static const plan_adt padtw = { d@0: 0, awakew, printw, destroyw d@0: }; d@0: d@0: /* use only if cell is enabled */ d@0: if (NO_SIMDP(plnr) || X(cell_nspe)() <= 0) d@0: return 0; d@0: d@0: /* no way in hell this SPU stuff is going to work with pthreads */ d@0: if (mstart != 0 || mcount != m) d@0: return 0; d@0: d@0: /* don't bother for small N */ d@0: if (r * m * v <= MAX_N / 16 /* ARBITRARY */) d@0: return 0; d@0: d@0: /* check whether the R dimension is supported */ d@0: radices = find_radices(rio, iio, rio, iio, r, &sign); d@0: d@0: if (!radices) d@0: return 0; d@0: d@0: /* encode decimation in DM */ d@0: switch (ego->super.dec) { d@0: case DECDIT: d@0: case DECDIT+TRANSPOSE: d@0: dm = 1; d@0: break; d@0: case DECDIF: d@0: case DECDIF+TRANSPOSE: d@0: dm = -1; d@0: break; d@0: } d@0: d@0: if (!build_vdim(1, d@0: r, irs, ors, d@0: m, ms, ms, dm, d@0: v, ivs, ovs, d@0: vd, ego->cutdim)) d@0: return 0; d@0: d@0: cld = MKPLAN_DFT(P, &padt, apply); d@0: d@0: cld->radices = *radices; d@0: cld->n = r; d@0: cld->is = irs; d@0: cld->os = ors; d@0: cld->sign = sign; d@0: cld->W = 0; d@0: d@0: cld->rw = r; cld->mw = m; cld->td = 0; d@0: d@0: cld->v[0] = vd[0]; d@0: cld->v[1] = vd[1]; d@0: cld->cutdim = ego->cutdim; d@0: d@0: pln = MKPLAN_DFTW(Pw, &padtw, applyw); d@0: pln->cld = &cld->super.super; d@0: d@0: cost_model(cld, &pln->super.super.ops); d@0: d@0: /* for twiddle factors: one mul and one fma per complex point */ d@0: pln->super.super.ops.fma += (r * m * v) / VL; d@0: pln->super.super.ops.mul += (r * m * v) / VL; d@0: d@0: /* FIXME: heuristics */ d@0: /* pay penalty for large radices: */ d@0: if (r > MAX_N / 16) d@0: pln->super.super.ops.other += ((r - (MAX_N / 16)) * m * v); d@0: d@0: return &(pln->super.super); d@0: } d@0: d@0: /* heuristic to enable vector recursion */ d@0: static int force_vrecur(const ct_solver *ego, const problem_dft *p) d@0: { d@0: iodim *d; d@0: INT n, r, m; d@0: INT cutoff = 128; d@0: d@0: A(p->vecsz->rnk == 1); d@0: A(p->sz->rnk == 1); d@0: d@0: n = p->sz->dims[0].n; d@0: r = X(choose_radix)(ego->r, n); d@0: m = n / r; d@0: d@0: d = p->vecsz->dims + 0; d@0: return (1 d@0: /* some vector dimension is contiguous */ d@0: && (d->is == 2 || d->os == 2) d@0: d@0: /* vector is sufficiently long */ d@0: && d->n >= cutoff d@0: d@0: /* transform is sufficiently long */ d@0: && m >= cutoff d@0: && r >= cutoff); d@0: } d@0: d@0: static void regsolverw(planner *plnr, INT r, int dec, int cutdim) d@0: { d@0: Sw *slv = (Sw *)X(mksolver_ct)(sizeof(Sw), r, dec, mkcldw, force_vrecur); d@0: slv->cutdim = cutdim; d@0: REGISTER_SOLVER(plnr, &(slv->super.super)); d@0: } d@0: d@0: void X(ct_cell_direct_register)(planner *p) d@0: { d@0: INT n; d@0: d@0: for (n = 0; n <= MAX_N; n += REQUIRE_N_MULTIPLE_OF) { d@0: const struct spu_radices *r = d@0: X(spu_radices) + n / REQUIRE_N_MULTIPLE_OF; d@0: if (r->r[0]) { d@0: regsolverw(p, n, DECDIT, 0); d@0: regsolverw(p, n, DECDIT, 1); d@0: regsolverw(p, n, DECDIF+TRANSPOSE, 0); d@0: regsolverw(p, n, DECDIF+TRANSPOSE, 1); d@0: } d@0: } d@0: } d@0: d@0: d@0: #endif /* HAVE_CELL */ d@0: d@0: