Chris@42
|
1 /*
|
Chris@42
|
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
|
Chris@42
|
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
Chris@42
|
4 *
|
Chris@42
|
5 * This program is free software; you can redistribute it and/or modify
|
Chris@42
|
6 * it under the terms of the GNU General Public License as published by
|
Chris@42
|
7 * the Free Software Foundation; either version 2 of the License, or
|
Chris@42
|
8 * (at your option) any later version.
|
Chris@42
|
9 *
|
Chris@42
|
10 * This program is distributed in the hope that it will be useful,
|
Chris@42
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@42
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@42
|
13 * GNU General Public License for more details.
|
Chris@42
|
14 *
|
Chris@42
|
15 * You should have received a copy of the GNU General Public License
|
Chris@42
|
16 * along with this program; if not, write to the Free Software
|
Chris@42
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@42
|
18 *
|
Chris@42
|
19 */
|
Chris@42
|
20
|
Chris@42
|
21 /* express a hc2hc problem in terms of rdft + multiplication by
|
Chris@42
|
22 twiddle factors */
|
Chris@42
|
23
|
Chris@42
|
24 #include "hc2hc.h"
|
Chris@42
|
25
|
Chris@42
|
26 typedef hc2hc_solver S;
|
Chris@42
|
27
|
Chris@42
|
28 typedef struct {
|
Chris@42
|
29 plan_hc2hc super;
|
Chris@42
|
30
|
Chris@42
|
31 INT r, m, s, vl, vs, mstart1, mcount1;
|
Chris@42
|
32 plan *cld0;
|
Chris@42
|
33 plan *cld;
|
Chris@42
|
34 twid *td;
|
Chris@42
|
35 } P;
|
Chris@42
|
36
|
Chris@42
|
37
|
Chris@42
|
38 /**************************************************************/
|
Chris@42
|
39 static void mktwiddle(P *ego, enum wakefulness wakefulness)
|
Chris@42
|
40 {
|
Chris@42
|
41 static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } };
|
Chris@42
|
42
|
Chris@42
|
43 /* note that R and M are swapped, to allow for sequential
|
Chris@42
|
44 access both to data and twiddles */
|
Chris@42
|
45 X(twiddle_awake)(wakefulness, &ego->td, tw,
|
Chris@42
|
46 ego->r * ego->m, ego->m, ego->r);
|
Chris@42
|
47 }
|
Chris@42
|
48
|
Chris@42
|
49 static void bytwiddle(const P *ego, R *IO, R sign)
|
Chris@42
|
50 {
|
Chris@42
|
51 INT i, j, k;
|
Chris@42
|
52 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
|
Chris@42
|
53 INT ms = m * s;
|
Chris@42
|
54 INT mstart1 = ego->mstart1, mcount1 = ego->mcount1;
|
Chris@42
|
55 INT wrem = 2 * ((m-1)/2 - mcount1);
|
Chris@42
|
56
|
Chris@42
|
57 for (i = 0; i < vl; ++i, IO += vs) {
|
Chris@42
|
58 const R *W = ego->td->W;
|
Chris@42
|
59
|
Chris@42
|
60 A(m % 2 == 1);
|
Chris@42
|
61 for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) {
|
Chris@42
|
62 /* pr := IO + (j + mstart1) * s + k * ms */
|
Chris@42
|
63 R *pr = IO + mstart1 * s + k * ms;
|
Chris@42
|
64
|
Chris@42
|
65 /* pi := IO + (m - j - mstart1) * s + k * ms */
|
Chris@42
|
66 R *pi = IO - mstart1 * s + (k + 1) * ms;
|
Chris@42
|
67
|
Chris@42
|
68 for (j = 0; j < mcount1; ++j, pr += s, pi -= s) {
|
Chris@42
|
69 E xr = *pr;
|
Chris@42
|
70 E xi = *pi;
|
Chris@42
|
71 E wr = W[0];
|
Chris@42
|
72 E wi = sign * W[1];
|
Chris@42
|
73 *pr = xr * wr - xi * wi;
|
Chris@42
|
74 *pi = xi * wr + xr * wi;
|
Chris@42
|
75 W += 2;
|
Chris@42
|
76 }
|
Chris@42
|
77 W += wrem;
|
Chris@42
|
78 }
|
Chris@42
|
79 }
|
Chris@42
|
80 }
|
Chris@42
|
81
|
Chris@42
|
82 static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend)
|
Chris@42
|
83 {
|
Chris@42
|
84 INT k;
|
Chris@42
|
85 INT ms = m * s;
|
Chris@42
|
86 INT js = jstart * s;
|
Chris@42
|
87 for (k = 0; k + k < r; ++k) {
|
Chris@42
|
88 /* pr := IO + (m - j) * s + k * ms */
|
Chris@42
|
89 R *pr = IO + (k + 1) * ms - js;
|
Chris@42
|
90 /* pi := IO + (m - j) * s + (r - 1 - k) * ms */
|
Chris@42
|
91 R *pi = IO + (r - k) * ms - js;
|
Chris@42
|
92 INT j;
|
Chris@42
|
93 for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) {
|
Chris@42
|
94 R t = *pr;
|
Chris@42
|
95 *pr = *pi;
|
Chris@42
|
96 *pi = t;
|
Chris@42
|
97 }
|
Chris@42
|
98 }
|
Chris@42
|
99 }
|
Chris@42
|
100
|
Chris@42
|
101 static void reorder_dit(const P *ego, R *IO)
|
Chris@42
|
102 {
|
Chris@42
|
103 INT i, k;
|
Chris@42
|
104 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
|
Chris@42
|
105 INT ms = m * s;
|
Chris@42
|
106 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
|
Chris@42
|
107
|
Chris@42
|
108 for (i = 0; i < vl; ++i, IO += vs) {
|
Chris@42
|
109 for (k = 1; k + k < r; ++k) {
|
Chris@42
|
110 R *p0 = IO + k * ms;
|
Chris@42
|
111 R *p1 = IO + (r - k) * ms;
|
Chris@42
|
112 INT j;
|
Chris@42
|
113
|
Chris@42
|
114 for (j = mstart1; j < mend1; ++j) {
|
Chris@42
|
115 E rp, ip, im, rm;
|
Chris@42
|
116 rp = p0[j * s];
|
Chris@42
|
117 im = p1[ms - j * s];
|
Chris@42
|
118 rm = p1[j * s];
|
Chris@42
|
119 ip = p0[ms - j * s];
|
Chris@42
|
120 p0[j * s] = rp - im;
|
Chris@42
|
121 p1[ms - j * s] = rp + im;
|
Chris@42
|
122 p1[j * s] = rm - ip;
|
Chris@42
|
123 p0[ms - j * s] = ip + rm;
|
Chris@42
|
124 }
|
Chris@42
|
125 }
|
Chris@42
|
126
|
Chris@42
|
127 swapri(IO, r, m, s, mstart1, mend1);
|
Chris@42
|
128 }
|
Chris@42
|
129 }
|
Chris@42
|
130
|
Chris@42
|
131 static void reorder_dif(const P *ego, R *IO)
|
Chris@42
|
132 {
|
Chris@42
|
133 INT i, k;
|
Chris@42
|
134 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
|
Chris@42
|
135 INT ms = m * s;
|
Chris@42
|
136 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
|
Chris@42
|
137
|
Chris@42
|
138 for (i = 0; i < vl; ++i, IO += vs) {
|
Chris@42
|
139 swapri(IO, r, m, s, mstart1, mend1);
|
Chris@42
|
140
|
Chris@42
|
141 for (k = 1; k + k < r; ++k) {
|
Chris@42
|
142 R *p0 = IO + k * ms;
|
Chris@42
|
143 R *p1 = IO + (r - k) * ms;
|
Chris@42
|
144 const R half = K(0.5);
|
Chris@42
|
145 INT j;
|
Chris@42
|
146
|
Chris@42
|
147 for (j = mstart1; j < mend1; ++j) {
|
Chris@42
|
148 E rp, ip, im, rm;
|
Chris@42
|
149 rp = half * p0[j * s];
|
Chris@42
|
150 im = half * p1[ms - j * s];
|
Chris@42
|
151 rm = half * p1[j * s];
|
Chris@42
|
152 ip = half * p0[ms - j * s];
|
Chris@42
|
153 p0[j * s] = rp + im;
|
Chris@42
|
154 p1[ms - j * s] = im - rp;
|
Chris@42
|
155 p1[j * s] = rm + ip;
|
Chris@42
|
156 p0[ms - j * s] = ip - rm;
|
Chris@42
|
157 }
|
Chris@42
|
158 }
|
Chris@42
|
159 }
|
Chris@42
|
160 }
|
Chris@42
|
161
|
Chris@42
|
162 static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr)
|
Chris@42
|
163 {
|
Chris@42
|
164 return (1
|
Chris@42
|
165 && (kind == R2HC || kind == HC2R)
|
Chris@42
|
166 && (m % 2)
|
Chris@42
|
167 && (r % 2)
|
Chris@42
|
168 && !NO_SLOWP(plnr)
|
Chris@42
|
169 );
|
Chris@42
|
170 }
|
Chris@42
|
171
|
Chris@42
|
172 /**************************************************************/
|
Chris@42
|
173
|
Chris@42
|
174 static void apply_dit(const plan *ego_, R *IO)
|
Chris@42
|
175 {
|
Chris@42
|
176 const P *ego = (const P *) ego_;
|
Chris@42
|
177 INT start;
|
Chris@42
|
178 plan_rdft *cld, *cld0;
|
Chris@42
|
179
|
Chris@42
|
180 bytwiddle(ego, IO, K(-1.0));
|
Chris@42
|
181
|
Chris@42
|
182 cld0 = (plan_rdft *) ego->cld0;
|
Chris@42
|
183 cld0->apply(ego->cld0, IO, IO);
|
Chris@42
|
184
|
Chris@42
|
185 start = ego->mstart1 * ego->s;
|
Chris@42
|
186 cld = (plan_rdft *) ego->cld;
|
Chris@42
|
187 cld->apply(ego->cld, IO + start, IO + start);
|
Chris@42
|
188
|
Chris@42
|
189 reorder_dit(ego, IO);
|
Chris@42
|
190 }
|
Chris@42
|
191
|
Chris@42
|
192 static void apply_dif(const plan *ego_, R *IO)
|
Chris@42
|
193 {
|
Chris@42
|
194 const P *ego = (const P *) ego_;
|
Chris@42
|
195 INT start;
|
Chris@42
|
196 plan_rdft *cld, *cld0;
|
Chris@42
|
197
|
Chris@42
|
198 reorder_dif(ego, IO);
|
Chris@42
|
199
|
Chris@42
|
200 cld0 = (plan_rdft *) ego->cld0;
|
Chris@42
|
201 cld0->apply(ego->cld0, IO, IO);
|
Chris@42
|
202
|
Chris@42
|
203 start = ego->mstart1 * ego->s;
|
Chris@42
|
204 cld = (plan_rdft *) ego->cld;
|
Chris@42
|
205 cld->apply(ego->cld, IO + start, IO + start);
|
Chris@42
|
206
|
Chris@42
|
207 bytwiddle(ego, IO, K(1.0));
|
Chris@42
|
208 }
|
Chris@42
|
209
|
Chris@42
|
210
|
Chris@42
|
211 static void awake(plan *ego_, enum wakefulness wakefulness)
|
Chris@42
|
212 {
|
Chris@42
|
213 P *ego = (P *) ego_;
|
Chris@42
|
214 X(plan_awake)(ego->cld0, wakefulness);
|
Chris@42
|
215 X(plan_awake)(ego->cld, wakefulness);
|
Chris@42
|
216 mktwiddle(ego, wakefulness);
|
Chris@42
|
217 }
|
Chris@42
|
218
|
Chris@42
|
219 static void destroy(plan *ego_)
|
Chris@42
|
220 {
|
Chris@42
|
221 P *ego = (P *) ego_;
|
Chris@42
|
222 X(plan_destroy_internal)(ego->cld);
|
Chris@42
|
223 X(plan_destroy_internal)(ego->cld0);
|
Chris@42
|
224 }
|
Chris@42
|
225
|
Chris@42
|
226 static void print(const plan *ego_, printer *p)
|
Chris@42
|
227 {
|
Chris@42
|
228 const P *ego = (const P *) ego_;
|
Chris@42
|
229 p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))",
|
Chris@42
|
230 ego->super.apply == apply_dit ? "dit" : "dif",
|
Chris@42
|
231 ego->r, ego->m, ego->vl, ego->cld0, ego->cld);
|
Chris@42
|
232 }
|
Chris@42
|
233
|
Chris@42
|
234 static plan *mkcldw(const hc2hc_solver *ego_,
|
Chris@42
|
235 rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs,
|
Chris@42
|
236 INT mstart, INT mcount,
|
Chris@42
|
237 R *IO, planner *plnr)
|
Chris@42
|
238 {
|
Chris@42
|
239 P *pln;
|
Chris@42
|
240 plan *cld0 = 0, *cld = 0;
|
Chris@42
|
241 INT mstart1, mcount1, mstride;
|
Chris@42
|
242
|
Chris@42
|
243 static const plan_adt padt = {
|
Chris@42
|
244 0, awake, print, destroy
|
Chris@42
|
245 };
|
Chris@42
|
246
|
Chris@42
|
247 UNUSED(ego_);
|
Chris@42
|
248
|
Chris@42
|
249 A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2);
|
Chris@42
|
250
|
Chris@42
|
251 if (!applicable(kind, r, m, plnr))
|
Chris@42
|
252 return (plan *)0;
|
Chris@42
|
253
|
Chris@42
|
254 A(m % 2);
|
Chris@42
|
255 mstart1 = mstart + (mstart == 0);
|
Chris@42
|
256 mcount1 = mcount - (mstart == 0);
|
Chris@42
|
257 mstride = m - (mstart + mcount - 1) - mstart1;
|
Chris@42
|
258
|
Chris@42
|
259 /* 0th (DC) transform (vl of these), if mstart == 0 */
|
Chris@42
|
260 cld0 = X(mkplan_d)(plnr,
|
Chris@42
|
261 X(mkproblem_rdft_1_d)(
|
Chris@42
|
262 mstart == 0 ? X(mktensor_1d)(r, m * s, m * s)
|
Chris@42
|
263 : X(mktensor_0d)(),
|
Chris@42
|
264 X(mktensor_1d)(vl, vs, vs),
|
Chris@42
|
265 IO, IO, kind)
|
Chris@42
|
266 );
|
Chris@42
|
267 if (!cld0) goto nada;
|
Chris@42
|
268
|
Chris@42
|
269 /* twiddle transforms: there are 2 x mcount1 x vl of these
|
Chris@42
|
270 (where 2 corresponds to the real and imaginary parts) ...
|
Chris@42
|
271 the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */
|
Chris@42
|
272 cld = X(mkplan_d)(plnr,
|
Chris@42
|
273 X(mkproblem_rdft_1_d)(
|
Chris@42
|
274 X(mktensor_1d)(r, m * s, m * s),
|
Chris@42
|
275 X(mktensor_3d)(2, mstride * s, mstride * s,
|
Chris@42
|
276 mcount1, s, s,
|
Chris@42
|
277 vl, vs, vs),
|
Chris@42
|
278 IO + s * mstart1, IO + s * mstart1, kind)
|
Chris@42
|
279 );
|
Chris@42
|
280 if (!cld) goto nada;
|
Chris@42
|
281
|
Chris@42
|
282 pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif);
|
Chris@42
|
283 pln->cld = cld;
|
Chris@42
|
284 pln->cld0 = cld0;
|
Chris@42
|
285 pln->r = r;
|
Chris@42
|
286 pln->m = m;
|
Chris@42
|
287 pln->s = s;
|
Chris@42
|
288 pln->vl = vl;
|
Chris@42
|
289 pln->vs = vs;
|
Chris@42
|
290 pln->td = 0;
|
Chris@42
|
291 pln->mstart1 = mstart1;
|
Chris@42
|
292 pln->mcount1 = mcount1;
|
Chris@42
|
293
|
Chris@42
|
294 {
|
Chris@42
|
295 double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl;
|
Chris@42
|
296 pln->super.super.ops = cld->ops;
|
Chris@42
|
297 pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0;
|
Chris@42
|
298 pln->super.super.ops.add += 4.0 * n0;
|
Chris@42
|
299 pln->super.super.ops.other += 11.0 * n0;
|
Chris@42
|
300 }
|
Chris@42
|
301 return &(pln->super.super);
|
Chris@42
|
302
|
Chris@42
|
303 nada:
|
Chris@42
|
304 X(plan_destroy_internal)(cld);
|
Chris@42
|
305 X(plan_destroy_internal)(cld0);
|
Chris@42
|
306 return (plan *) 0;
|
Chris@42
|
307 }
|
Chris@42
|
308
|
Chris@42
|
309 static void regsolver(planner *plnr, INT r)
|
Chris@42
|
310 {
|
Chris@42
|
311 S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw);
|
Chris@42
|
312 REGISTER_SOLVER(plnr, &(slv->super));
|
Chris@42
|
313 if (X(mksolver_hc2hc_hook)) {
|
Chris@42
|
314 slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw);
|
Chris@42
|
315 REGISTER_SOLVER(plnr, &(slv->super));
|
Chris@42
|
316 }
|
Chris@42
|
317 }
|
Chris@42
|
318
|
Chris@42
|
319 void X(hc2hc_generic_register)(planner *p)
|
Chris@42
|
320 {
|
Chris@42
|
321 regsolver(p, 0);
|
Chris@42
|
322 }
|