To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / rader.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (8.68 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
#include "dft/dft.h"
22

    
23
/*
24
 * Compute transforms of prime sizes using Rader's trick: turn them
25
 * into convolutions of size n - 1, which you then perform via a pair
26
 * of FFTs.
27
 */
28

    
29
typedef struct {
30
     solver super;
31
} S;
32

    
33
typedef struct {
34
     plan_dft super;
35

    
36
     plan *cld1, *cld2;
37
     R *omega;
38
     INT n, g, ginv;
39
     INT is, os;
40
     plan *cld_omega;
41
} P;
42

    
43
static rader_tl *omegas = 0;
44

    
45
static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
46
{
47
     plan_dft *p = (plan_dft *) p_;
48
     R *omega;
49
     INT i, gpower;
50
     trigreal scale;
51
     triggen *t;
52

    
53
     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
54
          return omega;
55

    
56
     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);
57

    
58
     scale = n - 1.0; /* normalization for convolution */
59

    
60
     t = X(mktriggen)(wakefulness, n);
61
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
62
          trigreal w[2];
63
          t->cexpl(t, gpower, w);
64
          omega[2*i] = w[0] / scale;
65
          omega[2*i+1] = FFT_SIGN * w[1] / scale;
66
     }
67
     X(triggen_destroy)(t);
68
     A(gpower == 1);
69

    
70
     p->apply(p_, omega, omega + 1, omega, omega + 1);
71

    
72
     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
73
     return omega;
74
}
75

    
76
static void free_omega(R *omega)
77
{
78
     X(rader_tl_delete)(omega, &omegas);
79
}
80

    
81

    
82
/***************************************************************************/
83

    
84
/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
85
   order to share data between forward and backward transforms and to
86
   obviate the necessity of having separate forward and backward
87
   plans.  (Although we often compute separate plans these days anyway
88
   due to the differing strides, etcetera.)
89

90
   Of course, since the new FFTW gives us separate pointers to
91
   the real and imaginary parts, we could have instead used the
92
   fft(r,i) = ifft(i,r) form of this identity, but it was easier to
93
   reuse the code from our old version. */
94

    
95
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
96
{
97
     const P *ego = (const P *) ego_;
98
     INT is, os;
99
     INT k, gpower, g, r;
100
     R *buf;
101
     R r0 = ri[0], i0 = ii[0];
102

    
103
     r = ego->n; is = ego->is; os = ego->os; g = ego->g; 
104
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
105

    
106
     /* First, permute the input, storing in buf: */
107
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
108
          R rA, iA;
109
          rA = ri[gpower * is];
110
          iA = ii[gpower * is];
111
          buf[2*k] = rA; buf[2*k + 1] = iA;
112
     }
113
     /* gpower == g^(r-1) mod r == 1 */;
114

    
115

    
116
     /* compute DFT of buf, storing in output (except DC): */
117
     {
118
            plan_dft *cld = (plan_dft *) ego->cld1;
119
            cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
120
     }
121

    
122
     /* set output DC component: */
123
     {
124
          ro[0] = r0 + ro[os];
125
          io[0] = i0 + io[os];
126
     }
127

    
128
     /* now, multiply by omega: */
129
     {
130
          const R *omega = ego->omega;
131
          for (k = 0; k < r - 1; ++k) {
132
               E rB, iB, rW, iW;
133
               rW = omega[2*k];
134
               iW = omega[2*k+1];
135
               rB = ro[(k+1)*os];
136
               iB = io[(k+1)*os];
137
               ro[(k+1)*os] = rW * rB - iW * iB;
138
               io[(k+1)*os] = -(rW * iB + iW * rB);
139
          }
140
     }
141
     
142
     /* this will add input[0] to all of the outputs after the ifft */
143
     ro[os] += r0;
144
     io[os] -= i0;
145

    
146
     /* inverse FFT: */
147
     {
148
            plan_dft *cld = (plan_dft *) ego->cld2;
149
            cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
150
     }
151
     
152
     /* finally, do inverse permutation to unshuffle the output: */
153
     {
154
          INT ginv = ego->ginv;
155
          gpower = 1;
156
          for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
157
               ro[gpower * os] = buf[2*k];
158
               io[gpower * os] = -buf[2*k+1];
159
          }
160
          A(gpower == 1);
161
     }
162

    
163

    
164
     X(ifree)(buf);
165
}
166

    
167
/***************************************************************************/
168

    
169
static void awake(plan *ego_, enum wakefulness wakefulness)
170
{
171
     P *ego = (P *) ego_;
172

    
173
     X(plan_awake)(ego->cld1, wakefulness);
174
     X(plan_awake)(ego->cld2, wakefulness);
175
     X(plan_awake)(ego->cld_omega, wakefulness);
176

    
177
     switch (wakefulness) {
178
         case SLEEPY:
179
              free_omega(ego->omega);
180
              ego->omega = 0;
181
              break;
182
         default:
183
              ego->g = X(find_generator)(ego->n);
184
              ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
185
              A(MULMOD(ego->g, ego->ginv, ego->n) == 1);
186

    
187
              ego->omega = mkomega(wakefulness,
188
                                   ego->cld_omega, ego->n, ego->ginv);
189
              break;
190
     }
191
}
192

    
193
static void destroy(plan *ego_)
194
{
195
     P *ego = (P *) ego_;
196
     X(plan_destroy_internal)(ego->cld_omega);
197
     X(plan_destroy_internal)(ego->cld2);
198
     X(plan_destroy_internal)(ego->cld1);
199
}
200

    
201
static void print(const plan *ego_, printer *p)
202
{
203
     const P *ego = (const P *)ego_;
204
     p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
205
              ego->n, ego->is, ego->os, ego->cld1);
206
     if (ego->cld2 != ego->cld1)
207
          p->print(p, "%(%p%)", ego->cld2);
208
     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
209
          p->print(p, "%(%p%)", ego->cld_omega);
210
     p->putchr(p, ')');
211
}
212

    
213
static int applicable(const solver *ego_, const problem *p_,
214
                      const planner *plnr)
215
{
216
     const problem_dft *p = (const problem_dft *) p_;
217
     UNUSED(ego_);
218
     return (1
219
             && p->sz->rnk == 1
220
             && p->vecsz->rnk == 0
221
             && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
222
             && X(is_prime)(p->sz->dims[0].n)
223

    
224
             /* proclaim the solver SLOW if p-1 is not easily factorizable.
225
                Bluestein should take care of this case. */
226
             && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
227
          );
228
}
229

    
230
static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
231
               planner *plnr)
232
{
233
     plan *cld1 = (plan *) 0;
234
     plan *cld2 = (plan *) 0;
235
     plan *cld_omega = (plan *) 0;
236
     R *buf = (R *) 0;
237

    
238
     /* initial allocation for the purpose of planning */
239
     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);
240

    
241
     cld1 = X(mkplan_f_d)(plnr, 
242
                          X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
243
                                             X(mktensor_1d)(1, 0, 0),
244
                                             buf, buf + 1, ro + os, io + os),
245
                          NO_SLOW, 0, 0);
246
     if (!cld1) goto nada;
247

    
248
     cld2 = X(mkplan_f_d)(plnr, 
249
                          X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
250
                                             X(mktensor_1d)(1, 0, 0),
251
                                             ro + os, io + os, buf, buf + 1),
252
                          NO_SLOW, 0, 0);
253

    
254
     if (!cld2) goto nada;
255

    
256
     /* plan for omega array */
257
     cld_omega = X(mkplan_f_d)(plnr, 
258
                               X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
259
                                                  X(mktensor_1d)(1, 0, 0),
260
                                                  buf, buf + 1, buf, buf + 1),
261
                               NO_SLOW, ESTIMATE, 0);
262
     if (!cld_omega) goto nada;
263

    
264
     /* deallocate buffers; let awake() or apply() allocate them for real */
265
     X(ifree)(buf);
266
     buf = 0;
267

    
268
     pln->cld1 = cld1;
269
     pln->cld2 = cld2;
270
     pln->cld_omega = cld_omega;
271
     pln->omega = 0;
272
     pln->n = n;
273
     pln->is = is;
274
     pln->os = os;
275

    
276
     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
277
     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
278
     pln->super.super.ops.add += (n - 1) * 2 + 4;
279
     pln->super.super.ops.mul += (n - 1) * 4;
280

    
281
     return 1;
282

    
283
 nada:
284
     X(ifree0)(buf);
285
     X(plan_destroy_internal)(cld_omega);
286
     X(plan_destroy_internal)(cld2);
287
     X(plan_destroy_internal)(cld1);
288
     return 0;
289
}
290

    
291
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
292
{
293
     const problem_dft *p = (const problem_dft *) p_;
294
     P *pln;
295
     INT n;
296
     INT is, os;
297

    
298
     static const plan_adt padt = {
299
          X(dft_solve), awake, print, destroy
300
     };
301

    
302
     if (!applicable(ego, p_, plnr))
303
          return (plan *) 0;
304

    
305
     n = p->sz->dims[0].n;
306
     is = p->sz->dims[0].is;
307
     os = p->sz->dims[0].os;
308

    
309
     pln = MKPLAN_DFT(P, &padt, apply);
310
     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
311
          X(ifree)(pln);
312
          return (plan *) 0;
313
     }
314
     return &(pln->super.super);
315
}
316

    
317
static solver *mksolver(void)
318
{
319
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
320
     S *slv = MKSOLVER(S, &sadt);
321
     return &(slv->super);
322
}
323

    
324
void X(dft_rader_register)(planner *p)
325
{
326
     REGISTER_SOLVER(p, mksolver());
327
}