To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / direct.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (7.81 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21

    
22
/* direct DFT solver, if we have a codelet */
23

    
24
#include "dft/dft.h"
25

    
26
typedef struct {
27
     solver super;
28
     const kdft_desc *desc;
29
     kdft k;
30
     int bufferedp;
31
} S;
32

    
33
typedef struct {
34
     plan_dft super;
35

    
36
     stride is, os, bufstride;
37
     INT n, vl, ivs, ovs;
38
     kdft k;
39
     const S *slv;
40
} P;
41

    
42
static void dobatch(const P *ego, R *ri, R *ii, R *ro, R *io, 
43
                    R *buf, INT batchsz)
44
{
45
     X(cpy2d_pair_ci)(ri, ii, buf, buf+1,
46
                      ego->n, WS(ego->is, 1), WS(ego->bufstride, 1),
47
                      batchsz, ego->ivs, 2);
48
     
49
     if (IABS(WS(ego->os, 1)) < IABS(ego->ovs)) {
50
          /* transform directly to output */
51
          ego->k(buf, buf+1, ro, io, 
52
                 ego->bufstride, ego->os, batchsz, 2, ego->ovs);
53
     } else {
54
          /* transform to buffer and copy back */
55
          ego->k(buf, buf+1, buf, buf+1, 
56
                 ego->bufstride, ego->bufstride, batchsz, 2, 2);
57
          X(cpy2d_pair_co)(buf, buf+1, ro, io,
58
                           ego->n, WS(ego->bufstride, 1), WS(ego->os, 1), 
59
                           batchsz, 2, ego->ovs);
60
     }
61
}
62

    
63
static INT compute_batchsize(INT n)
64
{
65
     /* round up to multiple of 4 */
66
     n += 3;
67
     n &= -4;
68

    
69
     return (n + 2);
70
}
71

    
72
static void apply_buf(const plan *ego_, R *ri, R *ii, R *ro, R *io)
73
{
74
     const P *ego = (const P *) ego_;
75
     R *buf;
76
     INT vl = ego->vl, n = ego->n, batchsz = compute_batchsize(n);
77
     INT i;
78
     size_t bufsz = n * batchsz * 2 * sizeof(R);
79

    
80
     BUF_ALLOC(R *, buf, bufsz);
81

    
82
     for (i = 0; i < vl - batchsz; i += batchsz) {
83
          dobatch(ego, ri, ii, ro, io, buf, batchsz);
84
          ri += batchsz * ego->ivs; ii += batchsz * ego->ivs;
85
          ro += batchsz * ego->ovs; io += batchsz * ego->ovs;
86
     }
87
     dobatch(ego, ri, ii, ro, io, buf, vl - i);
88

    
89
     BUF_FREE(buf, bufsz);
90
}
91

    
92
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
93
{
94
     const P *ego = (const P *) ego_;
95
     ASSERT_ALIGNED_DOUBLE;
96
     ego->k(ri, ii, ro, io, ego->is, ego->os, ego->vl, ego->ivs, ego->ovs);
97
}
98

    
99
static void apply_extra_iter(const plan *ego_, R *ri, R *ii, R *ro, R *io)
100
{
101
     const P *ego = (const P *) ego_;
102
     INT vl = ego->vl;
103

    
104
     ASSERT_ALIGNED_DOUBLE;
105

    
106
     /* for 4-way SIMD when VL is odd: iterate over an
107
        even vector length VL, and then execute the last
108
        iteration as a 2-vector with vector stride 0. */
109
     ego->k(ri, ii, ro, io, ego->is, ego->os, vl - 1, ego->ivs, ego->ovs);
110

    
111
     ego->k(ri + (vl - 1) * ego->ivs, ii + (vl - 1) * ego->ivs,
112
            ro + (vl - 1) * ego->ovs, io + (vl - 1) * ego->ovs,
113
            ego->is, ego->os, 1, 0, 0);
114
}
115

    
116
static void destroy(plan *ego_)
117
{
118
     P *ego = (P *) ego_;
119
     X(stride_destroy)(ego->is);
120
     X(stride_destroy)(ego->os);
121
     X(stride_destroy)(ego->bufstride);
122
}
123

    
124
static void print(const plan *ego_, printer *p)
125
{
126
     const P *ego = (const P *) ego_;
127
     const S *s = ego->slv;
128
     const kdft_desc *d = s->desc;
129

    
130
     if (ego->slv->bufferedp)
131
          p->print(p, "(dft-directbuf/%D-%D%v \"%s\")", 
132
                   compute_batchsize(d->sz), d->sz, ego->vl, d->nam);
133
     else
134
          p->print(p, "(dft-direct-%D%v \"%s\")", d->sz, ego->vl, d->nam);
135
}
136

    
137
static int applicable_buf(const solver *ego_, const problem *p_,
138
                          const planner *plnr)
139
{
140
     const S *ego = (const S *) ego_;
141
     const problem_dft *p = (const problem_dft *) p_;
142
     const kdft_desc *d = ego->desc;
143
     INT vl;
144
     INT ivs, ovs;
145
     INT batchsz;
146

    
147
     return (
148
          1
149
          && p->sz->rnk == 1
150
          && p->vecsz->rnk == 1
151
          && p->sz->dims[0].n == d->sz
152

    
153
          /* check strides etc */
154
          && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
155

    
156
          /* UGLY if IS <= IVS */
157
          && !(NO_UGLYP(plnr) &&
158
               X(iabs)(p->sz->dims[0].is) <= X(iabs)(ivs))
159

    
160
          && (batchsz = compute_batchsize(d->sz), 1)
161
          && (d->genus->okp(d, 0, ((const R *)0) + 1, p->ro, p->io,
162
                            2 * batchsz, p->sz->dims[0].os,
163
                            batchsz, 2, ovs, plnr))
164
          && (d->genus->okp(d, 0, ((const R *)0) + 1, p->ro, p->io,
165
                            2 * batchsz, p->sz->dims[0].os,
166
                            vl % batchsz, 2, ovs, plnr))
167

    
168

    
169
          && (0
170
              /* can operate out-of-place */
171
              || p->ri != p->ro
172

    
173
              /* can operate in-place as long as strides are the same */
174
              || X(tensor_inplace_strides2)(p->sz, p->vecsz)
175

    
176
              /* can do it if the problem fits in the buffer, no matter
177
                 what the strides are */
178
              || vl <= batchsz
179
               )
180
          );
181
}
182

    
183
static int applicable(const solver *ego_, const problem *p_,
184
                      const planner *plnr, int *extra_iterp)
185
{
186
     const S *ego = (const S *) ego_;
187
     const problem_dft *p = (const problem_dft *) p_;
188
     const kdft_desc *d = ego->desc;
189
     INT vl;
190
     INT ivs, ovs;
191

    
192
     return (
193
          1
194
          && p->sz->rnk == 1
195
          && p->vecsz->rnk <= 1
196
          && p->sz->dims[0].n == d->sz
197

    
198
          /* check strides etc */
199
          && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
200

    
201
          && ((*extra_iterp = 0,
202
               (d->genus->okp(d, p->ri, p->ii, p->ro, p->io,
203
                              p->sz->dims[0].is, p->sz->dims[0].os,
204
                              vl, ivs, ovs, plnr)))
205
              ||
206
              (*extra_iterp = 1,
207
               ((d->genus->okp(d, p->ri, p->ii, p->ro, p->io,
208
                               p->sz->dims[0].is, p->sz->dims[0].os,
209
                               vl - 1, ivs, ovs, plnr))
210
                &&
211
                (d->genus->okp(d, p->ri, p->ii, p->ro, p->io,
212
                               p->sz->dims[0].is, p->sz->dims[0].os,
213
                               2, 0, 0, plnr)))))
214

    
215
          && (0
216
              /* can operate out-of-place */
217
              || p->ri != p->ro
218

    
219
              /* can always compute one transform */
220
              || vl == 1
221

    
222
              /* can operate in-place as long as strides are the same */
223
              || X(tensor_inplace_strides2)(p->sz, p->vecsz)
224
               )
225
          );
226
}
227

    
228

    
229
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
230
{
231
     const S *ego = (const S *) ego_;
232
     P *pln;
233
     const problem_dft *p;
234
     iodim *d;
235
     const kdft_desc *e = ego->desc;
236

    
237
     static const plan_adt padt = {
238
          X(dft_solve), X(null_awake), print, destroy
239
     };
240

    
241
     UNUSED(plnr);
242

    
243
     if (ego->bufferedp) {
244
          if (!applicable_buf(ego_, p_, plnr))
245
               return (plan *)0;
246
          pln = MKPLAN_DFT(P, &padt, apply_buf);
247
     } else {
248
          int extra_iterp = 0;
249
          if (!applicable(ego_, p_, plnr, &extra_iterp))
250
               return (plan *)0;
251
          pln = MKPLAN_DFT(P, &padt, extra_iterp ? apply_extra_iter : apply);
252
     }
253

    
254
     p = (const problem_dft *) p_;
255
     d = p->sz->dims;
256
     pln->k = ego->k;
257
     pln->n = d[0].n;
258
     pln->is = X(mkstride)(pln->n, d[0].is);
259
     pln->os = X(mkstride)(pln->n, d[0].os);
260
     pln->bufstride = X(mkstride)(pln->n, 2 * compute_batchsize(pln->n));
261

    
262
     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
263
     pln->slv = ego;
264

    
265
     X(ops_zero)(&pln->super.super.ops);
266
     X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops);
267

    
268
     if (ego->bufferedp) 
269
          pln->super.super.ops.other += 4 * pln->n * pln->vl;
270

    
271
     pln->super.super.could_prune_now_p = !ego->bufferedp;
272
     return &(pln->super.super);
273
}
274

    
275
static solver *mksolver(kdft k, const kdft_desc *desc, int bufferedp)
276
{
277
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
278
     S *slv = MKSOLVER(S, &sadt);
279
     slv->k = k;
280
     slv->desc = desc;
281
     slv->bufferedp = bufferedp;
282
     return &(slv->super);
283
}
284

    
285
solver *X(mksolver_dft_direct)(kdft k, const kdft_desc *desc)
286
{
287
     return mksolver(k, desc, 0);
288
}
289

    
290
solver *X(mksolver_dft_directbuf)(kdft k, const kdft_desc *desc)
291
{
292
     return mksolver(k, desc, 1);
293
}