Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/dft/rader.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 #include "dft.h" | |
22 | |
23 /* | |
24 * Compute transforms of prime sizes using Rader's trick: turn them | |
25 * into convolutions of size n - 1, which you then perform via a pair | |
26 * of FFTs. | |
27 */ | |
28 | |
29 typedef struct { | |
30 solver super; | |
31 } S; | |
32 | |
33 typedef struct { | |
34 plan_dft super; | |
35 | |
36 plan *cld1, *cld2; | |
37 R *omega; | |
38 INT n, g, ginv; | |
39 INT is, os; | |
40 plan *cld_omega; | |
41 } P; | |
42 | |
43 static rader_tl *omegas = 0; | |
44 | |
45 static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv) | |
46 { | |
47 plan_dft *p = (plan_dft *) p_; | |
48 R *omega; | |
49 INT i, gpower; | |
50 trigreal scale; | |
51 triggen *t; | |
52 | |
53 if ((omega = X(rader_tl_find)(n, n, ginv, omegas))) | |
54 return omega; | |
55 | |
56 omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES); | |
57 | |
58 scale = n - 1.0; /* normalization for convolution */ | |
59 | |
60 t = X(mktriggen)(wakefulness, n); | |
61 for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) { | |
62 trigreal w[2]; | |
63 t->cexpl(t, gpower, w); | |
64 omega[2*i] = w[0] / scale; | |
65 omega[2*i+1] = FFT_SIGN * w[1] / scale; | |
66 } | |
67 X(triggen_destroy)(t); | |
68 A(gpower == 1); | |
69 | |
70 p->apply(p_, omega, omega + 1, omega, omega + 1); | |
71 | |
72 X(rader_tl_insert)(n, n, ginv, omega, &omegas); | |
73 return omega; | |
74 } | |
75 | |
76 static void free_omega(R *omega) | |
77 { | |
78 X(rader_tl_delete)(omega, &omegas); | |
79 } | |
80 | |
81 | |
82 /***************************************************************************/ | |
83 | |
84 /* Below, we extensively use the identity that fft(x*)* = ifft(x) in | |
85 order to share data between forward and backward transforms and to | |
86 obviate the necessity of having separate forward and backward | |
87 plans. (Although we often compute separate plans these days anyway | |
88 due to the differing strides, etcetera.) | |
89 | |
90 Of course, since the new FFTW gives us separate pointers to | |
91 the real and imaginary parts, we could have instead used the | |
92 fft(r,i) = ifft(i,r) form of this identity, but it was easier to | |
93 reuse the code from our old version. */ | |
94 | |
95 static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) | |
96 { | |
97 const P *ego = (const P *) ego_; | |
98 INT is, os; | |
99 INT k, gpower, g, r; | |
100 R *buf; | |
101 R r0 = ri[0], i0 = ii[0]; | |
102 | |
103 r = ego->n; is = ego->is; os = ego->os; g = ego->g; | |
104 buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS); | |
105 | |
106 /* First, permute the input, storing in buf: */ | |
107 for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) { | |
108 R rA, iA; | |
109 rA = ri[gpower * is]; | |
110 iA = ii[gpower * is]; | |
111 buf[2*k] = rA; buf[2*k + 1] = iA; | |
112 } | |
113 /* gpower == g^(r-1) mod r == 1 */; | |
114 | |
115 | |
116 /* compute DFT of buf, storing in output (except DC): */ | |
117 { | |
118 plan_dft *cld = (plan_dft *) ego->cld1; | |
119 cld->apply(ego->cld1, buf, buf+1, ro+os, io+os); | |
120 } | |
121 | |
122 /* set output DC component: */ | |
123 { | |
124 ro[0] = r0 + ro[os]; | |
125 io[0] = i0 + io[os]; | |
126 } | |
127 | |
128 /* now, multiply by omega: */ | |
129 { | |
130 const R *omega = ego->omega; | |
131 for (k = 0; k < r - 1; ++k) { | |
132 E rB, iB, rW, iW; | |
133 rW = omega[2*k]; | |
134 iW = omega[2*k+1]; | |
135 rB = ro[(k+1)*os]; | |
136 iB = io[(k+1)*os]; | |
137 ro[(k+1)*os] = rW * rB - iW * iB; | |
138 io[(k+1)*os] = -(rW * iB + iW * rB); | |
139 } | |
140 } | |
141 | |
142 /* this will add input[0] to all of the outputs after the ifft */ | |
143 ro[os] += r0; | |
144 io[os] -= i0; | |
145 | |
146 /* inverse FFT: */ | |
147 { | |
148 plan_dft *cld = (plan_dft *) ego->cld2; | |
149 cld->apply(ego->cld2, ro+os, io+os, buf, buf+1); | |
150 } | |
151 | |
152 /* finally, do inverse permutation to unshuffle the output: */ | |
153 { | |
154 INT ginv = ego->ginv; | |
155 gpower = 1; | |
156 for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) { | |
157 ro[gpower * os] = buf[2*k]; | |
158 io[gpower * os] = -buf[2*k+1]; | |
159 } | |
160 A(gpower == 1); | |
161 } | |
162 | |
163 | |
164 X(ifree)(buf); | |
165 } | |
166 | |
167 /***************************************************************************/ | |
168 | |
169 static void awake(plan *ego_, enum wakefulness wakefulness) | |
170 { | |
171 P *ego = (P *) ego_; | |
172 | |
173 X(plan_awake)(ego->cld1, wakefulness); | |
174 X(plan_awake)(ego->cld2, wakefulness); | |
175 X(plan_awake)(ego->cld_omega, wakefulness); | |
176 | |
177 switch (wakefulness) { | |
178 case SLEEPY: | |
179 free_omega(ego->omega); | |
180 ego->omega = 0; | |
181 break; | |
182 default: | |
183 ego->g = X(find_generator)(ego->n); | |
184 ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n); | |
185 A(MULMOD(ego->g, ego->ginv, ego->n) == 1); | |
186 | |
187 ego->omega = mkomega(wakefulness, | |
188 ego->cld_omega, ego->n, ego->ginv); | |
189 break; | |
190 } | |
191 } | |
192 | |
193 static void destroy(plan *ego_) | |
194 { | |
195 P *ego = (P *) ego_; | |
196 X(plan_destroy_internal)(ego->cld_omega); | |
197 X(plan_destroy_internal)(ego->cld2); | |
198 X(plan_destroy_internal)(ego->cld1); | |
199 } | |
200 | |
201 static void print(const plan *ego_, printer *p) | |
202 { | |
203 const P *ego = (const P *)ego_; | |
204 p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)", | |
205 ego->n, ego->is, ego->os, ego->cld1); | |
206 if (ego->cld2 != ego->cld1) | |
207 p->print(p, "%(%p%)", ego->cld2); | |
208 if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2) | |
209 p->print(p, "%(%p%)", ego->cld_omega); | |
210 p->putchr(p, ')'); | |
211 } | |
212 | |
213 static int applicable(const solver *ego_, const problem *p_, | |
214 const planner *plnr) | |
215 { | |
216 const problem_dft *p = (const problem_dft *) p_; | |
217 UNUSED(ego_); | |
218 return (1 | |
219 && p->sz->rnk == 1 | |
220 && p->vecsz->rnk == 0 | |
221 && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW) | |
222 && X(is_prime)(p->sz->dims[0].n) | |
223 | |
224 /* proclaim the solver SLOW if p-1 is not easily factorizable. | |
225 Bluestein should take care of this case. */ | |
226 && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1)) | |
227 ); | |
228 } | |
229 | |
230 static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io, | |
231 planner *plnr) | |
232 { | |
233 plan *cld1 = (plan *) 0; | |
234 plan *cld2 = (plan *) 0; | |
235 plan *cld_omega = (plan *) 0; | |
236 R *buf = (R *) 0; | |
237 | |
238 /* initial allocation for the purpose of planning */ | |
239 buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS); | |
240 | |
241 cld1 = X(mkplan_f_d)(plnr, | |
242 X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os), | |
243 X(mktensor_1d)(1, 0, 0), | |
244 buf, buf + 1, ro + os, io + os), | |
245 NO_SLOW, 0, 0); | |
246 if (!cld1) goto nada; | |
247 | |
248 cld2 = X(mkplan_f_d)(plnr, | |
249 X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2), | |
250 X(mktensor_1d)(1, 0, 0), | |
251 ro + os, io + os, buf, buf + 1), | |
252 NO_SLOW, 0, 0); | |
253 | |
254 if (!cld2) goto nada; | |
255 | |
256 /* plan for omega array */ | |
257 cld_omega = X(mkplan_f_d)(plnr, | |
258 X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2), | |
259 X(mktensor_1d)(1, 0, 0), | |
260 buf, buf + 1, buf, buf + 1), | |
261 NO_SLOW, ESTIMATE, 0); | |
262 if (!cld_omega) goto nada; | |
263 | |
264 /* deallocate buffers; let awake() or apply() allocate them for real */ | |
265 X(ifree)(buf); | |
266 buf = 0; | |
267 | |
268 pln->cld1 = cld1; | |
269 pln->cld2 = cld2; | |
270 pln->cld_omega = cld_omega; | |
271 pln->omega = 0; | |
272 pln->n = n; | |
273 pln->is = is; | |
274 pln->os = os; | |
275 | |
276 X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); | |
277 pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6; | |
278 pln->super.super.ops.add += (n - 1) * 2 + 4; | |
279 pln->super.super.ops.mul += (n - 1) * 4; | |
280 | |
281 return 1; | |
282 | |
283 nada: | |
284 X(ifree0)(buf); | |
285 X(plan_destroy_internal)(cld_omega); | |
286 X(plan_destroy_internal)(cld2); | |
287 X(plan_destroy_internal)(cld1); | |
288 return 0; | |
289 } | |
290 | |
291 static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) | |
292 { | |
293 const problem_dft *p = (const problem_dft *) p_; | |
294 P *pln; | |
295 INT n; | |
296 INT is, os; | |
297 | |
298 static const plan_adt padt = { | |
299 X(dft_solve), awake, print, destroy | |
300 }; | |
301 | |
302 if (!applicable(ego, p_, plnr)) | |
303 return (plan *) 0; | |
304 | |
305 n = p->sz->dims[0].n; | |
306 is = p->sz->dims[0].is; | |
307 os = p->sz->dims[0].os; | |
308 | |
309 pln = MKPLAN_DFT(P, &padt, apply); | |
310 if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) { | |
311 X(ifree)(pln); | |
312 return (plan *) 0; | |
313 } | |
314 return &(pln->super.super); | |
315 } | |
316 | |
317 static solver *mksolver(void) | |
318 { | |
319 static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; | |
320 S *slv = MKSOLVER(S, &sadt); | |
321 return &(slv->super); | |
322 } | |
323 | |
324 void X(dft_rader_register)(planner *p) | |
325 { | |
326 REGISTER_SOLVER(p, mksolver()); | |
327 } |