Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/rdft/hc2hc-generic.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 /* express a hc2hc problem in terms of rdft + multiplication by | |
22 twiddle factors */ | |
23 | |
24 #include "hc2hc.h" | |
25 | |
26 typedef hc2hc_solver S; | |
27 | |
28 typedef struct { | |
29 plan_hc2hc super; | |
30 | |
31 INT r, m, s, vl, vs, mstart1, mcount1; | |
32 plan *cld0; | |
33 plan *cld; | |
34 twid *td; | |
35 } P; | |
36 | |
37 | |
38 /**************************************************************/ | |
39 static void mktwiddle(P *ego, enum wakefulness wakefulness) | |
40 { | |
41 static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } }; | |
42 | |
43 /* note that R and M are swapped, to allow for sequential | |
44 access both to data and twiddles */ | |
45 X(twiddle_awake)(wakefulness, &ego->td, tw, | |
46 ego->r * ego->m, ego->m, ego->r); | |
47 } | |
48 | |
49 static void bytwiddle(const P *ego, R *IO, R sign) | |
50 { | |
51 INT i, j, k; | |
52 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; | |
53 INT ms = m * s; | |
54 INT mstart1 = ego->mstart1, mcount1 = ego->mcount1; | |
55 INT wrem = 2 * ((m-1)/2 - mcount1); | |
56 | |
57 for (i = 0; i < vl; ++i, IO += vs) { | |
58 const R *W = ego->td->W; | |
59 | |
60 A(m % 2 == 1); | |
61 for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) { | |
62 /* pr := IO + (j + mstart1) * s + k * ms */ | |
63 R *pr = IO + mstart1 * s + k * ms; | |
64 | |
65 /* pi := IO + (m - j - mstart1) * s + k * ms */ | |
66 R *pi = IO - mstart1 * s + (k + 1) * ms; | |
67 | |
68 for (j = 0; j < mcount1; ++j, pr += s, pi -= s) { | |
69 E xr = *pr; | |
70 E xi = *pi; | |
71 E wr = W[0]; | |
72 E wi = sign * W[1]; | |
73 *pr = xr * wr - xi * wi; | |
74 *pi = xi * wr + xr * wi; | |
75 W += 2; | |
76 } | |
77 W += wrem; | |
78 } | |
79 } | |
80 } | |
81 | |
82 static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend) | |
83 { | |
84 INT k; | |
85 INT ms = m * s; | |
86 INT js = jstart * s; | |
87 for (k = 0; k + k < r; ++k) { | |
88 /* pr := IO + (m - j) * s + k * ms */ | |
89 R *pr = IO + (k + 1) * ms - js; | |
90 /* pi := IO + (m - j) * s + (r - 1 - k) * ms */ | |
91 R *pi = IO + (r - k) * ms - js; | |
92 INT j; | |
93 for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) { | |
94 R t = *pr; | |
95 *pr = *pi; | |
96 *pi = t; | |
97 } | |
98 } | |
99 } | |
100 | |
101 static void reorder_dit(const P *ego, R *IO) | |
102 { | |
103 INT i, k; | |
104 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; | |
105 INT ms = m * s; | |
106 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1; | |
107 | |
108 for (i = 0; i < vl; ++i, IO += vs) { | |
109 for (k = 1; k + k < r; ++k) { | |
110 R *p0 = IO + k * ms; | |
111 R *p1 = IO + (r - k) * ms; | |
112 INT j; | |
113 | |
114 for (j = mstart1; j < mend1; ++j) { | |
115 E rp, ip, im, rm; | |
116 rp = p0[j * s]; | |
117 im = p1[ms - j * s]; | |
118 rm = p1[j * s]; | |
119 ip = p0[ms - j * s]; | |
120 p0[j * s] = rp - im; | |
121 p1[ms - j * s] = rp + im; | |
122 p1[j * s] = rm - ip; | |
123 p0[ms - j * s] = ip + rm; | |
124 } | |
125 } | |
126 | |
127 swapri(IO, r, m, s, mstart1, mend1); | |
128 } | |
129 } | |
130 | |
131 static void reorder_dif(const P *ego, R *IO) | |
132 { | |
133 INT i, k; | |
134 INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; | |
135 INT ms = m * s; | |
136 INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1; | |
137 | |
138 for (i = 0; i < vl; ++i, IO += vs) { | |
139 swapri(IO, r, m, s, mstart1, mend1); | |
140 | |
141 for (k = 1; k + k < r; ++k) { | |
142 R *p0 = IO + k * ms; | |
143 R *p1 = IO + (r - k) * ms; | |
144 const R half = K(0.5); | |
145 INT j; | |
146 | |
147 for (j = mstart1; j < mend1; ++j) { | |
148 E rp, ip, im, rm; | |
149 rp = half * p0[j * s]; | |
150 im = half * p1[ms - j * s]; | |
151 rm = half * p1[j * s]; | |
152 ip = half * p0[ms - j * s]; | |
153 p0[j * s] = rp + im; | |
154 p1[ms - j * s] = im - rp; | |
155 p1[j * s] = rm + ip; | |
156 p0[ms - j * s] = ip - rm; | |
157 } | |
158 } | |
159 } | |
160 } | |
161 | |
162 static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr) | |
163 { | |
164 return (1 | |
165 && (kind == R2HC || kind == HC2R) | |
166 && (m % 2) | |
167 && (r % 2) | |
168 && !NO_SLOWP(plnr) | |
169 ); | |
170 } | |
171 | |
172 /**************************************************************/ | |
173 | |
174 static void apply_dit(const plan *ego_, R *IO) | |
175 { | |
176 const P *ego = (const P *) ego_; | |
177 INT start; | |
178 plan_rdft *cld, *cld0; | |
179 | |
180 bytwiddle(ego, IO, K(-1.0)); | |
181 | |
182 cld0 = (plan_rdft *) ego->cld0; | |
183 cld0->apply(ego->cld0, IO, IO); | |
184 | |
185 start = ego->mstart1 * ego->s; | |
186 cld = (plan_rdft *) ego->cld; | |
187 cld->apply(ego->cld, IO + start, IO + start); | |
188 | |
189 reorder_dit(ego, IO); | |
190 } | |
191 | |
192 static void apply_dif(const plan *ego_, R *IO) | |
193 { | |
194 const P *ego = (const P *) ego_; | |
195 INT start; | |
196 plan_rdft *cld, *cld0; | |
197 | |
198 reorder_dif(ego, IO); | |
199 | |
200 cld0 = (plan_rdft *) ego->cld0; | |
201 cld0->apply(ego->cld0, IO, IO); | |
202 | |
203 start = ego->mstart1 * ego->s; | |
204 cld = (plan_rdft *) ego->cld; | |
205 cld->apply(ego->cld, IO + start, IO + start); | |
206 | |
207 bytwiddle(ego, IO, K(1.0)); | |
208 } | |
209 | |
210 | |
211 static void awake(plan *ego_, enum wakefulness wakefulness) | |
212 { | |
213 P *ego = (P *) ego_; | |
214 X(plan_awake)(ego->cld0, wakefulness); | |
215 X(plan_awake)(ego->cld, wakefulness); | |
216 mktwiddle(ego, wakefulness); | |
217 } | |
218 | |
219 static void destroy(plan *ego_) | |
220 { | |
221 P *ego = (P *) ego_; | |
222 X(plan_destroy_internal)(ego->cld); | |
223 X(plan_destroy_internal)(ego->cld0); | |
224 } | |
225 | |
226 static void print(const plan *ego_, printer *p) | |
227 { | |
228 const P *ego = (const P *) ego_; | |
229 p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))", | |
230 ego->super.apply == apply_dit ? "dit" : "dif", | |
231 ego->r, ego->m, ego->vl, ego->cld0, ego->cld); | |
232 } | |
233 | |
234 static plan *mkcldw(const hc2hc_solver *ego_, | |
235 rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs, | |
236 INT mstart, INT mcount, | |
237 R *IO, planner *plnr) | |
238 { | |
239 P *pln; | |
240 plan *cld0 = 0, *cld = 0; | |
241 INT mstart1, mcount1, mstride; | |
242 | |
243 static const plan_adt padt = { | |
244 0, awake, print, destroy | |
245 }; | |
246 | |
247 UNUSED(ego_); | |
248 | |
249 A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2); | |
250 | |
251 if (!applicable(kind, r, m, plnr)) | |
252 return (plan *)0; | |
253 | |
254 A(m % 2); | |
255 mstart1 = mstart + (mstart == 0); | |
256 mcount1 = mcount - (mstart == 0); | |
257 mstride = m - (mstart + mcount - 1) - mstart1; | |
258 | |
259 /* 0th (DC) transform (vl of these), if mstart == 0 */ | |
260 cld0 = X(mkplan_d)(plnr, | |
261 X(mkproblem_rdft_1_d)( | |
262 mstart == 0 ? X(mktensor_1d)(r, m * s, m * s) | |
263 : X(mktensor_0d)(), | |
264 X(mktensor_1d)(vl, vs, vs), | |
265 IO, IO, kind) | |
266 ); | |
267 if (!cld0) goto nada; | |
268 | |
269 /* twiddle transforms: there are 2 x mcount1 x vl of these | |
270 (where 2 corresponds to the real and imaginary parts) ... | |
271 the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */ | |
272 cld = X(mkplan_d)(plnr, | |
273 X(mkproblem_rdft_1_d)( | |
274 X(mktensor_1d)(r, m * s, m * s), | |
275 X(mktensor_3d)(2, mstride * s, mstride * s, | |
276 mcount1, s, s, | |
277 vl, vs, vs), | |
278 IO + s * mstart1, IO + s * mstart1, kind) | |
279 ); | |
280 if (!cld) goto nada; | |
281 | |
282 pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif); | |
283 pln->cld = cld; | |
284 pln->cld0 = cld0; | |
285 pln->r = r; | |
286 pln->m = m; | |
287 pln->s = s; | |
288 pln->vl = vl; | |
289 pln->vs = vs; | |
290 pln->td = 0; | |
291 pln->mstart1 = mstart1; | |
292 pln->mcount1 = mcount1; | |
293 | |
294 { | |
295 double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl; | |
296 pln->super.super.ops = cld->ops; | |
297 pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0; | |
298 pln->super.super.ops.add += 4.0 * n0; | |
299 pln->super.super.ops.other += 11.0 * n0; | |
300 } | |
301 return &(pln->super.super); | |
302 | |
303 nada: | |
304 X(plan_destroy_internal)(cld); | |
305 X(plan_destroy_internal)(cld0); | |
306 return (plan *) 0; | |
307 } | |
308 | |
309 static void regsolver(planner *plnr, INT r) | |
310 { | |
311 S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw); | |
312 REGISTER_SOLVER(plnr, &(slv->super)); | |
313 if (X(mksolver_hc2hc_hook)) { | |
314 slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw); | |
315 REGISTER_SOLVER(plnr, &(slv->super)); | |
316 } | |
317 } | |
318 | |
319 void X(hc2hc_generic_register)(planner *p) | |
320 { | |
321 regsolver(p, 0); | |
322 } |