comparison src/fftw-3.3.3/rdft/hc2hc-generic.c @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
comparison
equal deleted inserted replaced
9:c0fb53affa76 10:37bf6b4a2645
1 /*
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 /* express a hc2hc problem in terms of rdft + multiplication by
22 twiddle factors */
23
24 #include "hc2hc.h"
25
26 typedef hc2hc_solver S;
27
28 typedef struct {
29 plan_hc2hc super;
30
31 INT r, m, s, vl, vs, mstart1, mcount1;
32 plan *cld0;
33 plan *cld;
34 twid *td;
35 } P;
36
37
38 /**************************************************************/
39 static void mktwiddle(P *ego, enum wakefulness wakefulness)
40 {
41 static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } };
42
43 /* note that R and M are swapped, to allow for sequential
44 access both to data and twiddles */
45 X(twiddle_awake)(wakefulness, &ego->td, tw,
46 ego->r * ego->m, ego->m, ego->r);
47 }
48
49 static void bytwiddle(const P *ego, R *IO, R sign)
50 {
51 INT i, j, k;
52 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
53 INT ms = m * s;
54 INT mstart1 = ego->mstart1, mcount1 = ego->mcount1;
55 INT wrem = 2 * ((m-1)/2 - mcount1);
56
57 for (i = 0; i < vl; ++i, IO += vs) {
58 const R *W = ego->td->W;
59
60 A(m % 2 == 1);
61 for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) {
62 /* pr := IO + (j + mstart1) * s + k * ms */
63 R *pr = IO + mstart1 * s + k * ms;
64
65 /* pi := IO + (m - j - mstart1) * s + k * ms */
66 R *pi = IO - mstart1 * s + (k + 1) * ms;
67
68 for (j = 0; j < mcount1; ++j, pr += s, pi -= s) {
69 E xr = *pr;
70 E xi = *pi;
71 E wr = W[0];
72 E wi = sign * W[1];
73 *pr = xr * wr - xi * wi;
74 *pi = xi * wr + xr * wi;
75 W += 2;
76 }
77 W += wrem;
78 }
79 }
80 }
81
82 static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend)
83 {
84 INT k;
85 INT ms = m * s;
86 INT js = jstart * s;
87 for (k = 0; k + k < r; ++k) {
88 /* pr := IO + (m - j) * s + k * ms */
89 R *pr = IO + (k + 1) * ms - js;
90 /* pi := IO + (m - j) * s + (r - 1 - k) * ms */
91 R *pi = IO + (r - k) * ms - js;
92 INT j;
93 for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) {
94 R t = *pr;
95 *pr = *pi;
96 *pi = t;
97 }
98 }
99 }
100
101 static void reorder_dit(const P *ego, R *IO)
102 {
103 INT i, k;
104 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
105 INT ms = m * s;
106 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
107
108 for (i = 0; i < vl; ++i, IO += vs) {
109 for (k = 1; k + k < r; ++k) {
110 R *p0 = IO + k * ms;
111 R *p1 = IO + (r - k) * ms;
112 INT j;
113
114 for (j = mstart1; j < mend1; ++j) {
115 E rp, ip, im, rm;
116 rp = p0[j * s];
117 im = p1[ms - j * s];
118 rm = p1[j * s];
119 ip = p0[ms - j * s];
120 p0[j * s] = rp - im;
121 p1[ms - j * s] = rp + im;
122 p1[j * s] = rm - ip;
123 p0[ms - j * s] = ip + rm;
124 }
125 }
126
127 swapri(IO, r, m, s, mstart1, mend1);
128 }
129 }
130
131 static void reorder_dif(const P *ego, R *IO)
132 {
133 INT i, k;
134 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
135 INT ms = m * s;
136 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
137
138 for (i = 0; i < vl; ++i, IO += vs) {
139 swapri(IO, r, m, s, mstart1, mend1);
140
141 for (k = 1; k + k < r; ++k) {
142 R *p0 = IO + k * ms;
143 R *p1 = IO + (r - k) * ms;
144 const R half = K(0.5);
145 INT j;
146
147 for (j = mstart1; j < mend1; ++j) {
148 E rp, ip, im, rm;
149 rp = half * p0[j * s];
150 im = half * p1[ms - j * s];
151 rm = half * p1[j * s];
152 ip = half * p0[ms - j * s];
153 p0[j * s] = rp + im;
154 p1[ms - j * s] = im - rp;
155 p1[j * s] = rm + ip;
156 p0[ms - j * s] = ip - rm;
157 }
158 }
159 }
160 }
161
162 static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr)
163 {
164 return (1
165 && (kind == R2HC || kind == HC2R)
166 && (m % 2)
167 && (r % 2)
168 && !NO_SLOWP(plnr)
169 );
170 }
171
172 /**************************************************************/
173
174 static void apply_dit(const plan *ego_, R *IO)
175 {
176 const P *ego = (const P *) ego_;
177 INT start;
178 plan_rdft *cld, *cld0;
179
180 bytwiddle(ego, IO, K(-1.0));
181
182 cld0 = (plan_rdft *) ego->cld0;
183 cld0->apply(ego->cld0, IO, IO);
184
185 start = ego->mstart1 * ego->s;
186 cld = (plan_rdft *) ego->cld;
187 cld->apply(ego->cld, IO + start, IO + start);
188
189 reorder_dit(ego, IO);
190 }
191
192 static void apply_dif(const plan *ego_, R *IO)
193 {
194 const P *ego = (const P *) ego_;
195 INT start;
196 plan_rdft *cld, *cld0;
197
198 reorder_dif(ego, IO);
199
200 cld0 = (plan_rdft *) ego->cld0;
201 cld0->apply(ego->cld0, IO, IO);
202
203 start = ego->mstart1 * ego->s;
204 cld = (plan_rdft *) ego->cld;
205 cld->apply(ego->cld, IO + start, IO + start);
206
207 bytwiddle(ego, IO, K(1.0));
208 }
209
210
211 static void awake(plan *ego_, enum wakefulness wakefulness)
212 {
213 P *ego = (P *) ego_;
214 X(plan_awake)(ego->cld0, wakefulness);
215 X(plan_awake)(ego->cld, wakefulness);
216 mktwiddle(ego, wakefulness);
217 }
218
219 static void destroy(plan *ego_)
220 {
221 P *ego = (P *) ego_;
222 X(plan_destroy_internal)(ego->cld);
223 X(plan_destroy_internal)(ego->cld0);
224 }
225
226 static void print(const plan *ego_, printer *p)
227 {
228 const P *ego = (const P *) ego_;
229 p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))",
230 ego->super.apply == apply_dit ? "dit" : "dif",
231 ego->r, ego->m, ego->vl, ego->cld0, ego->cld);
232 }
233
234 static plan *mkcldw(const hc2hc_solver *ego_,
235 rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs,
236 INT mstart, INT mcount,
237 R *IO, planner *plnr)
238 {
239 P *pln;
240 plan *cld0 = 0, *cld = 0;
241 INT mstart1, mcount1, mstride;
242
243 static const plan_adt padt = {
244 0, awake, print, destroy
245 };
246
247 UNUSED(ego_);
248
249 A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2);
250
251 if (!applicable(kind, r, m, plnr))
252 return (plan *)0;
253
254 A(m % 2);
255 mstart1 = mstart + (mstart == 0);
256 mcount1 = mcount - (mstart == 0);
257 mstride = m - (mstart + mcount - 1) - mstart1;
258
259 /* 0th (DC) transform (vl of these), if mstart == 0 */
260 cld0 = X(mkplan_d)(plnr,
261 X(mkproblem_rdft_1_d)(
262 mstart == 0 ? X(mktensor_1d)(r, m * s, m * s)
263 : X(mktensor_0d)(),
264 X(mktensor_1d)(vl, vs, vs),
265 IO, IO, kind)
266 );
267 if (!cld0) goto nada;
268
269 /* twiddle transforms: there are 2 x mcount1 x vl of these
270 (where 2 corresponds to the real and imaginary parts) ...
271 the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */
272 cld = X(mkplan_d)(plnr,
273 X(mkproblem_rdft_1_d)(
274 X(mktensor_1d)(r, m * s, m * s),
275 X(mktensor_3d)(2, mstride * s, mstride * s,
276 mcount1, s, s,
277 vl, vs, vs),
278 IO + s * mstart1, IO + s * mstart1, kind)
279 );
280 if (!cld) goto nada;
281
282 pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif);
283 pln->cld = cld;
284 pln->cld0 = cld0;
285 pln->r = r;
286 pln->m = m;
287 pln->s = s;
288 pln->vl = vl;
289 pln->vs = vs;
290 pln->td = 0;
291 pln->mstart1 = mstart1;
292 pln->mcount1 = mcount1;
293
294 {
295 double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl;
296 pln->super.super.ops = cld->ops;
297 pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0;
298 pln->super.super.ops.add += 4.0 * n0;
299 pln->super.super.ops.other += 11.0 * n0;
300 }
301 return &(pln->super.super);
302
303 nada:
304 X(plan_destroy_internal)(cld);
305 X(plan_destroy_internal)(cld0);
306 return (plan *) 0;
307 }
308
309 static void regsolver(planner *plnr, INT r)
310 {
311 S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw);
312 REGISTER_SOLVER(plnr, &(slv->super));
313 if (X(mksolver_hc2hc_hook)) {
314 slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw);
315 REGISTER_SOLVER(plnr, &(slv->super));
316 }
317 }
318
319 void X(hc2hc_generic_register)(planner *p)
320 {
321 regsolver(p, 0);
322 }