annotate src/fftw-3.3.8/dft/generic.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 #include "dft/dft.h"
Chris@82 22
Chris@82 23 typedef struct {
Chris@82 24 solver super;
Chris@82 25 } S;
Chris@82 26
Chris@82 27 typedef struct {
Chris@82 28 plan_dft super;
Chris@82 29 twid *td;
Chris@82 30 INT n, is, os;
Chris@82 31 } P;
Chris@82 32
Chris@82 33
Chris@82 34 static void cdot(INT n, const E *x, const R *w,
Chris@82 35 R *or0, R *oi0, R *or1, R *oi1)
Chris@82 36 {
Chris@82 37 INT i;
Chris@82 38
Chris@82 39 E rr = x[0], ri = 0, ir = x[1], ii = 0;
Chris@82 40 x += 2;
Chris@82 41 for (i = 1; i + i < n; ++i) {
Chris@82 42 rr += x[0] * w[0];
Chris@82 43 ir += x[1] * w[0];
Chris@82 44 ri += x[2] * w[1];
Chris@82 45 ii += x[3] * w[1];
Chris@82 46 x += 4; w += 2;
Chris@82 47 }
Chris@82 48 *or0 = rr + ii;
Chris@82 49 *oi0 = ir - ri;
Chris@82 50 *or1 = rr - ii;
Chris@82 51 *oi1 = ir + ri;
Chris@82 52 }
Chris@82 53
Chris@82 54 static void hartley(INT n, const R *xr, const R *xi, INT xs, E *o,
Chris@82 55 R *pr, R *pi)
Chris@82 56 {
Chris@82 57 INT i;
Chris@82 58 E sr, si;
Chris@82 59 o[0] = sr = xr[0]; o[1] = si = xi[0]; o += 2;
Chris@82 60 for (i = 1; i + i < n; ++i) {
Chris@82 61 sr += (o[0] = xr[i * xs] + xr[(n - i) * xs]);
Chris@82 62 si += (o[1] = xi[i * xs] + xi[(n - i) * xs]);
Chris@82 63 o[2] = xr[i * xs] - xr[(n - i) * xs];
Chris@82 64 o[3] = xi[i * xs] - xi[(n - i) * xs];
Chris@82 65 o += 4;
Chris@82 66 }
Chris@82 67 *pr = sr;
Chris@82 68 *pi = si;
Chris@82 69 }
Chris@82 70
Chris@82 71 static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
Chris@82 72 {
Chris@82 73 const P *ego = (const P *) ego_;
Chris@82 74 INT i;
Chris@82 75 INT n = ego->n, is = ego->is, os = ego->os;
Chris@82 76 const R *W = ego->td->W;
Chris@82 77 E *buf;
Chris@82 78 size_t bufsz = n * 2 * sizeof(E);
Chris@82 79
Chris@82 80 BUF_ALLOC(E *, buf, bufsz);
Chris@82 81 hartley(n, ri, ii, is, buf, ro, io);
Chris@82 82
Chris@82 83 for (i = 1; i + i < n; ++i) {
Chris@82 84 cdot(n, buf, W,
Chris@82 85 ro + i * os, io + i * os,
Chris@82 86 ro + (n - i) * os, io + (n - i) * os);
Chris@82 87 W += n - 1;
Chris@82 88 }
Chris@82 89
Chris@82 90 BUF_FREE(buf, bufsz);
Chris@82 91 }
Chris@82 92
Chris@82 93 static void awake(plan *ego_, enum wakefulness wakefulness)
Chris@82 94 {
Chris@82 95 P *ego = (P *) ego_;
Chris@82 96 static const tw_instr half_tw[] = {
Chris@82 97 { TW_HALF, 1, 0 },
Chris@82 98 { TW_NEXT, 1, 0 }
Chris@82 99 };
Chris@82 100
Chris@82 101 X(twiddle_awake)(wakefulness, &ego->td, half_tw, ego->n, ego->n,
Chris@82 102 (ego->n - 1) / 2);
Chris@82 103 }
Chris@82 104
Chris@82 105 static void print(const plan *ego_, printer *p)
Chris@82 106 {
Chris@82 107 const P *ego = (const P *) ego_;
Chris@82 108
Chris@82 109 p->print(p, "(dft-generic-%D)", ego->n);
Chris@82 110 }
Chris@82 111
Chris@82 112 static int applicable(const solver *ego, const problem *p_,
Chris@82 113 const planner *plnr)
Chris@82 114 {
Chris@82 115 const problem_dft *p = (const problem_dft *) p_;
Chris@82 116 UNUSED(ego);
Chris@82 117
Chris@82 118 return (1
Chris@82 119 && p->sz->rnk == 1
Chris@82 120 && p->vecsz->rnk == 0
Chris@82 121 && (p->sz->dims[0].n % 2) == 1
Chris@82 122 && CIMPLIES(NO_LARGE_GENERICP(plnr), p->sz->dims[0].n < GENERIC_MIN_BAD)
Chris@82 123 && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > GENERIC_MAX_SLOW)
Chris@82 124 && X(is_prime)(p->sz->dims[0].n)
Chris@82 125 );
Chris@82 126 }
Chris@82 127
Chris@82 128 static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
Chris@82 129 {
Chris@82 130 const problem_dft *p;
Chris@82 131 P *pln;
Chris@82 132 INT n;
Chris@82 133
Chris@82 134 static const plan_adt padt = {
Chris@82 135 X(dft_solve), awake, print, X(plan_null_destroy)
Chris@82 136 };
Chris@82 137
Chris@82 138 if (!applicable(ego, p_, plnr))
Chris@82 139 return (plan *)0;
Chris@82 140
Chris@82 141 pln = MKPLAN_DFT(P, &padt, apply);
Chris@82 142
Chris@82 143 p = (const problem_dft *) p_;
Chris@82 144 pln->n = n = p->sz->dims[0].n;
Chris@82 145 pln->is = p->sz->dims[0].is;
Chris@82 146 pln->os = p->sz->dims[0].os;
Chris@82 147 pln->td = 0;
Chris@82 148
Chris@82 149 pln->super.super.ops.add = (n-1) * 5;
Chris@82 150 pln->super.super.ops.mul = 0;
Chris@82 151 pln->super.super.ops.fma = (n-1) * (n-1) ;
Chris@82 152 #if 0 /* these are nice pipelined sequential loads and should cost nothing */
Chris@82 153 pln->super.super.ops.other = (n-1)*(4 + 1 + 2 * (n-1)); /* approximate */
Chris@82 154 #endif
Chris@82 155
Chris@82 156 return &(pln->super.super);
Chris@82 157 }
Chris@82 158
Chris@82 159 static solver *mksolver(void)
Chris@82 160 {
Chris@82 161 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
Chris@82 162 S *slv = MKSOLVER(S, &sadt);
Chris@82 163 return &(slv->super);
Chris@82 164 }
Chris@82 165
Chris@82 166 void X(dft_generic_register)(planner *p)
Chris@82 167 {
Chris@82 168 REGISTER_SOLVER(p, mksolver());
Chris@82 169 }