Chris@10
|
1 /*
|
Chris@10
|
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
Chris@10
|
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
Chris@10
|
4 *
|
Chris@10
|
5 * This program is free software; you can redistribute it and/or modify
|
Chris@10
|
6 * it under the terms of the GNU General Public License as published by
|
Chris@10
|
7 * the Free Software Foundation; either version 2 of the License, or
|
Chris@10
|
8 * (at your option) any later version.
|
Chris@10
|
9 *
|
Chris@10
|
10 * This program is distributed in the hope that it will be useful,
|
Chris@10
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@10
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@10
|
13 * GNU General Public License for more details.
|
Chris@10
|
14 *
|
Chris@10
|
15 * You should have received a copy of the GNU General Public License
|
Chris@10
|
16 * along with this program; if not, write to the Free Software
|
Chris@10
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@10
|
18 *
|
Chris@10
|
19 */
|
Chris@10
|
20
|
Chris@10
|
21 /* express a hc2hc problem in terms of rdft + multiplication by
|
Chris@10
|
22 twiddle factors */
|
Chris@10
|
23
|
Chris@10
|
24 #include "hc2hc.h"
|
Chris@10
|
25
|
Chris@10
|
26 typedef hc2hc_solver S;
|
Chris@10
|
27
|
Chris@10
|
28 typedef struct {
|
Chris@10
|
29 plan_hc2hc super;
|
Chris@10
|
30
|
Chris@10
|
31 INT r, m, s, vl, vs, mstart1, mcount1;
|
Chris@10
|
32 plan *cld0;
|
Chris@10
|
33 plan *cld;
|
Chris@10
|
34 twid *td;
|
Chris@10
|
35 } P;
|
Chris@10
|
36
|
Chris@10
|
37
|
Chris@10
|
38 /**************************************************************/
|
Chris@10
|
39 static void mktwiddle(P *ego, enum wakefulness wakefulness)
|
Chris@10
|
40 {
|
Chris@10
|
41 static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } };
|
Chris@10
|
42
|
Chris@10
|
43 /* note that R and M are swapped, to allow for sequential
|
Chris@10
|
44 access both to data and twiddles */
|
Chris@10
|
45 X(twiddle_awake)(wakefulness, &ego->td, tw,
|
Chris@10
|
46 ego->r * ego->m, ego->m, ego->r);
|
Chris@10
|
47 }
|
Chris@10
|
48
|
Chris@10
|
49 static void bytwiddle(const P *ego, R *IO, R sign)
|
Chris@10
|
50 {
|
Chris@10
|
51 INT i, j, k;
|
Chris@10
|
52 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
|
Chris@10
|
53 INT ms = m * s;
|
Chris@10
|
54 INT mstart1 = ego->mstart1, mcount1 = ego->mcount1;
|
Chris@10
|
55 INT wrem = 2 * ((m-1)/2 - mcount1);
|
Chris@10
|
56
|
Chris@10
|
57 for (i = 0; i < vl; ++i, IO += vs) {
|
Chris@10
|
58 const R *W = ego->td->W;
|
Chris@10
|
59
|
Chris@10
|
60 A(m % 2 == 1);
|
Chris@10
|
61 for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) {
|
Chris@10
|
62 /* pr := IO + (j + mstart1) * s + k * ms */
|
Chris@10
|
63 R *pr = IO + mstart1 * s + k * ms;
|
Chris@10
|
64
|
Chris@10
|
65 /* pi := IO + (m - j - mstart1) * s + k * ms */
|
Chris@10
|
66 R *pi = IO - mstart1 * s + (k + 1) * ms;
|
Chris@10
|
67
|
Chris@10
|
68 for (j = 0; j < mcount1; ++j, pr += s, pi -= s) {
|
Chris@10
|
69 E xr = *pr;
|
Chris@10
|
70 E xi = *pi;
|
Chris@10
|
71 E wr = W[0];
|
Chris@10
|
72 E wi = sign * W[1];
|
Chris@10
|
73 *pr = xr * wr - xi * wi;
|
Chris@10
|
74 *pi = xi * wr + xr * wi;
|
Chris@10
|
75 W += 2;
|
Chris@10
|
76 }
|
Chris@10
|
77 W += wrem;
|
Chris@10
|
78 }
|
Chris@10
|
79 }
|
Chris@10
|
80 }
|
Chris@10
|
81
|
Chris@10
|
82 static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend)
|
Chris@10
|
83 {
|
Chris@10
|
84 INT k;
|
Chris@10
|
85 INT ms = m * s;
|
Chris@10
|
86 INT js = jstart * s;
|
Chris@10
|
87 for (k = 0; k + k < r; ++k) {
|
Chris@10
|
88 /* pr := IO + (m - j) * s + k * ms */
|
Chris@10
|
89 R *pr = IO + (k + 1) * ms - js;
|
Chris@10
|
90 /* pi := IO + (m - j) * s + (r - 1 - k) * ms */
|
Chris@10
|
91 R *pi = IO + (r - k) * ms - js;
|
Chris@10
|
92 INT j;
|
Chris@10
|
93 for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) {
|
Chris@10
|
94 R t = *pr;
|
Chris@10
|
95 *pr = *pi;
|
Chris@10
|
96 *pi = t;
|
Chris@10
|
97 }
|
Chris@10
|
98 }
|
Chris@10
|
99 }
|
Chris@10
|
100
|
Chris@10
|
101 static void reorder_dit(const P *ego, R *IO)
|
Chris@10
|
102 {
|
Chris@10
|
103 INT i, k;
|
Chris@10
|
104 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
|
Chris@10
|
105 INT ms = m * s;
|
Chris@10
|
106 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
|
Chris@10
|
107
|
Chris@10
|
108 for (i = 0; i < vl; ++i, IO += vs) {
|
Chris@10
|
109 for (k = 1; k + k < r; ++k) {
|
Chris@10
|
110 R *p0 = IO + k * ms;
|
Chris@10
|
111 R *p1 = IO + (r - k) * ms;
|
Chris@10
|
112 INT j;
|
Chris@10
|
113
|
Chris@10
|
114 for (j = mstart1; j < mend1; ++j) {
|
Chris@10
|
115 E rp, ip, im, rm;
|
Chris@10
|
116 rp = p0[j * s];
|
Chris@10
|
117 im = p1[ms - j * s];
|
Chris@10
|
118 rm = p1[j * s];
|
Chris@10
|
119 ip = p0[ms - j * s];
|
Chris@10
|
120 p0[j * s] = rp - im;
|
Chris@10
|
121 p1[ms - j * s] = rp + im;
|
Chris@10
|
122 p1[j * s] = rm - ip;
|
Chris@10
|
123 p0[ms - j * s] = ip + rm;
|
Chris@10
|
124 }
|
Chris@10
|
125 }
|
Chris@10
|
126
|
Chris@10
|
127 swapri(IO, r, m, s, mstart1, mend1);
|
Chris@10
|
128 }
|
Chris@10
|
129 }
|
Chris@10
|
130
|
Chris@10
|
131 static void reorder_dif(const P *ego, R *IO)
|
Chris@10
|
132 {
|
Chris@10
|
133 INT i, k;
|
Chris@10
|
134 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
|
Chris@10
|
135 INT ms = m * s;
|
Chris@10
|
136 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
|
Chris@10
|
137
|
Chris@10
|
138 for (i = 0; i < vl; ++i, IO += vs) {
|
Chris@10
|
139 swapri(IO, r, m, s, mstart1, mend1);
|
Chris@10
|
140
|
Chris@10
|
141 for (k = 1; k + k < r; ++k) {
|
Chris@10
|
142 R *p0 = IO + k * ms;
|
Chris@10
|
143 R *p1 = IO + (r - k) * ms;
|
Chris@10
|
144 const R half = K(0.5);
|
Chris@10
|
145 INT j;
|
Chris@10
|
146
|
Chris@10
|
147 for (j = mstart1; j < mend1; ++j) {
|
Chris@10
|
148 E rp, ip, im, rm;
|
Chris@10
|
149 rp = half * p0[j * s];
|
Chris@10
|
150 im = half * p1[ms - j * s];
|
Chris@10
|
151 rm = half * p1[j * s];
|
Chris@10
|
152 ip = half * p0[ms - j * s];
|
Chris@10
|
153 p0[j * s] = rp + im;
|
Chris@10
|
154 p1[ms - j * s] = im - rp;
|
Chris@10
|
155 p1[j * s] = rm + ip;
|
Chris@10
|
156 p0[ms - j * s] = ip - rm;
|
Chris@10
|
157 }
|
Chris@10
|
158 }
|
Chris@10
|
159 }
|
Chris@10
|
160 }
|
Chris@10
|
161
|
Chris@10
|
162 static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr)
|
Chris@10
|
163 {
|
Chris@10
|
164 return (1
|
Chris@10
|
165 && (kind == R2HC || kind == HC2R)
|
Chris@10
|
166 && (m % 2)
|
Chris@10
|
167 && (r % 2)
|
Chris@10
|
168 && !NO_SLOWP(plnr)
|
Chris@10
|
169 );
|
Chris@10
|
170 }
|
Chris@10
|
171
|
Chris@10
|
172 /**************************************************************/
|
Chris@10
|
173
|
Chris@10
|
174 static void apply_dit(const plan *ego_, R *IO)
|
Chris@10
|
175 {
|
Chris@10
|
176 const P *ego = (const P *) ego_;
|
Chris@10
|
177 INT start;
|
Chris@10
|
178 plan_rdft *cld, *cld0;
|
Chris@10
|
179
|
Chris@10
|
180 bytwiddle(ego, IO, K(-1.0));
|
Chris@10
|
181
|
Chris@10
|
182 cld0 = (plan_rdft *) ego->cld0;
|
Chris@10
|
183 cld0->apply(ego->cld0, IO, IO);
|
Chris@10
|
184
|
Chris@10
|
185 start = ego->mstart1 * ego->s;
|
Chris@10
|
186 cld = (plan_rdft *) ego->cld;
|
Chris@10
|
187 cld->apply(ego->cld, IO + start, IO + start);
|
Chris@10
|
188
|
Chris@10
|
189 reorder_dit(ego, IO);
|
Chris@10
|
190 }
|
Chris@10
|
191
|
Chris@10
|
192 static void apply_dif(const plan *ego_, R *IO)
|
Chris@10
|
193 {
|
Chris@10
|
194 const P *ego = (const P *) ego_;
|
Chris@10
|
195 INT start;
|
Chris@10
|
196 plan_rdft *cld, *cld0;
|
Chris@10
|
197
|
Chris@10
|
198 reorder_dif(ego, IO);
|
Chris@10
|
199
|
Chris@10
|
200 cld0 = (plan_rdft *) ego->cld0;
|
Chris@10
|
201 cld0->apply(ego->cld0, IO, IO);
|
Chris@10
|
202
|
Chris@10
|
203 start = ego->mstart1 * ego->s;
|
Chris@10
|
204 cld = (plan_rdft *) ego->cld;
|
Chris@10
|
205 cld->apply(ego->cld, IO + start, IO + start);
|
Chris@10
|
206
|
Chris@10
|
207 bytwiddle(ego, IO, K(1.0));
|
Chris@10
|
208 }
|
Chris@10
|
209
|
Chris@10
|
210
|
Chris@10
|
211 static void awake(plan *ego_, enum wakefulness wakefulness)
|
Chris@10
|
212 {
|
Chris@10
|
213 P *ego = (P *) ego_;
|
Chris@10
|
214 X(plan_awake)(ego->cld0, wakefulness);
|
Chris@10
|
215 X(plan_awake)(ego->cld, wakefulness);
|
Chris@10
|
216 mktwiddle(ego, wakefulness);
|
Chris@10
|
217 }
|
Chris@10
|
218
|
Chris@10
|
219 static void destroy(plan *ego_)
|
Chris@10
|
220 {
|
Chris@10
|
221 P *ego = (P *) ego_;
|
Chris@10
|
222 X(plan_destroy_internal)(ego->cld);
|
Chris@10
|
223 X(plan_destroy_internal)(ego->cld0);
|
Chris@10
|
224 }
|
Chris@10
|
225
|
Chris@10
|
226 static void print(const plan *ego_, printer *p)
|
Chris@10
|
227 {
|
Chris@10
|
228 const P *ego = (const P *) ego_;
|
Chris@10
|
229 p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))",
|
Chris@10
|
230 ego->super.apply == apply_dit ? "dit" : "dif",
|
Chris@10
|
231 ego->r, ego->m, ego->vl, ego->cld0, ego->cld);
|
Chris@10
|
232 }
|
Chris@10
|
233
|
Chris@10
|
234 static plan *mkcldw(const hc2hc_solver *ego_,
|
Chris@10
|
235 rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs,
|
Chris@10
|
236 INT mstart, INT mcount,
|
Chris@10
|
237 R *IO, planner *plnr)
|
Chris@10
|
238 {
|
Chris@10
|
239 P *pln;
|
Chris@10
|
240 plan *cld0 = 0, *cld = 0;
|
Chris@10
|
241 INT mstart1, mcount1, mstride;
|
Chris@10
|
242
|
Chris@10
|
243 static const plan_adt padt = {
|
Chris@10
|
244 0, awake, print, destroy
|
Chris@10
|
245 };
|
Chris@10
|
246
|
Chris@10
|
247 UNUSED(ego_);
|
Chris@10
|
248
|
Chris@10
|
249 A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2);
|
Chris@10
|
250
|
Chris@10
|
251 if (!applicable(kind, r, m, plnr))
|
Chris@10
|
252 return (plan *)0;
|
Chris@10
|
253
|
Chris@10
|
254 A(m % 2);
|
Chris@10
|
255 mstart1 = mstart + (mstart == 0);
|
Chris@10
|
256 mcount1 = mcount - (mstart == 0);
|
Chris@10
|
257 mstride = m - (mstart + mcount - 1) - mstart1;
|
Chris@10
|
258
|
Chris@10
|
259 /* 0th (DC) transform (vl of these), if mstart == 0 */
|
Chris@10
|
260 cld0 = X(mkplan_d)(plnr,
|
Chris@10
|
261 X(mkproblem_rdft_1_d)(
|
Chris@10
|
262 mstart == 0 ? X(mktensor_1d)(r, m * s, m * s)
|
Chris@10
|
263 : X(mktensor_0d)(),
|
Chris@10
|
264 X(mktensor_1d)(vl, vs, vs),
|
Chris@10
|
265 IO, IO, kind)
|
Chris@10
|
266 );
|
Chris@10
|
267 if (!cld0) goto nada;
|
Chris@10
|
268
|
Chris@10
|
269 /* twiddle transforms: there are 2 x mcount1 x vl of these
|
Chris@10
|
270 (where 2 corresponds to the real and imaginary parts) ...
|
Chris@10
|
271 the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */
|
Chris@10
|
272 cld = X(mkplan_d)(plnr,
|
Chris@10
|
273 X(mkproblem_rdft_1_d)(
|
Chris@10
|
274 X(mktensor_1d)(r, m * s, m * s),
|
Chris@10
|
275 X(mktensor_3d)(2, mstride * s, mstride * s,
|
Chris@10
|
276 mcount1, s, s,
|
Chris@10
|
277 vl, vs, vs),
|
Chris@10
|
278 IO + s * mstart1, IO + s * mstart1, kind)
|
Chris@10
|
279 );
|
Chris@10
|
280 if (!cld) goto nada;
|
Chris@10
|
281
|
Chris@10
|
282 pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif);
|
Chris@10
|
283 pln->cld = cld;
|
Chris@10
|
284 pln->cld0 = cld0;
|
Chris@10
|
285 pln->r = r;
|
Chris@10
|
286 pln->m = m;
|
Chris@10
|
287 pln->s = s;
|
Chris@10
|
288 pln->vl = vl;
|
Chris@10
|
289 pln->vs = vs;
|
Chris@10
|
290 pln->td = 0;
|
Chris@10
|
291 pln->mstart1 = mstart1;
|
Chris@10
|
292 pln->mcount1 = mcount1;
|
Chris@10
|
293
|
Chris@10
|
294 {
|
Chris@10
|
295 double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl;
|
Chris@10
|
296 pln->super.super.ops = cld->ops;
|
Chris@10
|
297 pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0;
|
Chris@10
|
298 pln->super.super.ops.add += 4.0 * n0;
|
Chris@10
|
299 pln->super.super.ops.other += 11.0 * n0;
|
Chris@10
|
300 }
|
Chris@10
|
301 return &(pln->super.super);
|
Chris@10
|
302
|
Chris@10
|
303 nada:
|
Chris@10
|
304 X(plan_destroy_internal)(cld);
|
Chris@10
|
305 X(plan_destroy_internal)(cld0);
|
Chris@10
|
306 return (plan *) 0;
|
Chris@10
|
307 }
|
Chris@10
|
308
|
Chris@10
|
309 static void regsolver(planner *plnr, INT r)
|
Chris@10
|
310 {
|
Chris@10
|
311 S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw);
|
Chris@10
|
312 REGISTER_SOLVER(plnr, &(slv->super));
|
Chris@10
|
313 if (X(mksolver_hc2hc_hook)) {
|
Chris@10
|
314 slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw);
|
Chris@10
|
315 REGISTER_SOLVER(plnr, &(slv->super));
|
Chris@10
|
316 }
|
Chris@10
|
317 }
|
Chris@10
|
318
|
Chris@10
|
319 void X(hc2hc_generic_register)(planner *p)
|
Chris@10
|
320 {
|
Chris@10
|
321 regsolver(p, 0);
|
Chris@10
|
322 }
|