To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / buffered.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (7.81 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21

    
22
#include "dft/dft.h"
23

    
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28

    
29
static const INT maxnbufs[] = { 8, 256 };
30

    
31
typedef struct {
32
     plan_dft super;
33

    
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
     INT roffset, ioffset;
38
} P;
39

    
40
/* transform a vector input with the help of bufs */
41
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
42
{
43
     const P *ego = (const P *) ego_;
44
     INT nbuf = ego->nbuf;
45
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46

    
47
     plan_dft *cld = (plan_dft *) ego->cld;
48
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49
     plan_dft *cldrest;
50
     INT i, vl = ego->vl;
51
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52
     INT roffset = ego->roffset, ioffset = ego->ioffset;
53

    
54
     for (i = nbuf; i <= vl; i += nbuf) {
55
          /* transform to bufs: */
56
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57
          ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58

    
59
          /* copy back */
60
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61
          ro += ovs_by_nbuf; io += ovs_by_nbuf;
62
     }
63

    
64
     X(ifree)(bufs);
65

    
66
     /* Do the remaining transforms, if any: */
67
     cldrest = (plan_dft *) ego->cldrest;
68
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69
}
70

    
71

    
72
static void awake(plan *ego_, enum wakefulness wakefulness)
73
{
74
     P *ego = (P *) ego_;
75

    
76
     X(plan_awake)(ego->cld, wakefulness);
77
     X(plan_awake)(ego->cldcpy, wakefulness);
78
     X(plan_awake)(ego->cldrest, wakefulness);
79
}
80

    
81
static void destroy(plan *ego_)
82
{
83
     P *ego = (P *) ego_;
84
     X(plan_destroy_internal)(ego->cldrest);
85
     X(plan_destroy_internal)(ego->cldcpy);
86
     X(plan_destroy_internal)(ego->cld);
87
}
88

    
89
static void print(const plan *ego_, printer *p)
90
{
91
     const P *ego = (const P *) ego_;
92
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93
              ego->n, ego->nbuf,
94
              ego->vl, ego->bufdist % ego->n,
95
              ego->cld, ego->cldcpy, ego->cldrest);
96
}
97

    
98
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
99
{
100
     const problem_dft *p = (const problem_dft *) p_;
101
     const iodim *d = p->sz->dims;
102

    
103
     if (1
104
         && p->vecsz->rnk <= 1
105
         && p->sz->rnk == 1
106
          ) {
107
          INT vl, ivs, ovs;
108
          X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109

    
110
          if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111
               return 0;
112

    
113
          /* if this solver is redundant, in the sense that a solver
114
             of lower index generates the same plan, then prune this
115
             solver */
116
          if (X(nbuf_redundant)(d[0].n, vl, 
117
                                ego->maxnbuf_ndx,
118
                                maxnbufs, NELEM(maxnbufs)))
119
               return 0;
120

    
121
          /*
122
            In principle, the buffered transforms might be useful
123
            when working out of place.  However, in order to
124
            prevent infinite loops in the planner, we require
125
            that the output stride of the buffered transforms be
126
            greater than 2.
127
          */
128
          if (p->ri != p->ro)
129
               return (d[0].os > 2);
130

    
131
          /*
132
           * If the problem is in place, the input/output strides must
133
           * be the same or the whole thing must fit in the buffer.
134
           */
135
          if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136
               return 1;
137

    
138
          if (/* fits into buffer: */
139
               ((p->vecsz->rnk == 0)
140
                ||
141
                (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
142
                         maxnbufs[ego->maxnbuf_ndx]) 
143
                 == p->vecsz->dims[0].n)))
144
               return 1;
145
     }
146

    
147
     return 0;
148
}
149

    
150
static int applicable(const S *ego, const problem *p_, const planner *plnr)
151
{
152
     if (NO_BUFFERINGP(plnr)) return 0;
153
     if (!applicable0(ego, p_, plnr)) return 0;
154

    
155
     if (NO_UGLYP(plnr)) {
156
          const problem_dft *p = (const problem_dft *) p_;
157
          if (p->ri != p->ro) return 0;
158
          if (X(toobig)(p->sz->dims[0].n)) return 0;
159
     }
160
     return 1;
161
}
162

    
163
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
164
{
165
     P *pln;
166
     const S *ego = (const S *)ego_;
167
     plan *cld = (plan *) 0;
168
     plan *cldcpy = (plan *) 0;
169
     plan *cldrest = (plan *) 0;
170
     const problem_dft *p = (const problem_dft *) p_;
171
     R *bufs = (R *) 0;
172
     INT nbuf = 0, bufdist, n, vl;
173
     INT ivs, ovs, roffset, ioffset;
174

    
175
     static const plan_adt padt = {
176
          X(dft_solve), awake, print, destroy
177
     };
178

    
179
     if (!applicable(ego, p_, plnr))
180
          goto nada;
181

    
182
     n = X(tensor_sz)(p->sz);
183

    
184
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185

    
186
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187
     bufdist = X(bufdist)(n, vl);
188
     A(nbuf > 0);
189

    
190
     /* attempt to keep real and imaginary part in the same order,
191
        so as to allow optimizations in the the copy plan */
192
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193
     ioffset = 1 - roffset;
194

    
195
     /* initial allocation for the purpose of planning */
196
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
197

    
198
     /* allow destruction of input if problem is in place */
199
     cld = X(mkplan_f_d)(plnr,
200
                         X(mkproblem_dft_d)(
201
                              X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202
                              X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203
                              TAINT(p->ri, ivs * nbuf),
204
                              TAINT(p->ii, ivs * nbuf),
205
                              bufs + roffset, 
206
                              bufs + ioffset),
207
                         0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208
     if (!cld)
209
          goto nada;
210

    
211
     /* copying back from the buffer is a rank-0 transform: */
212
     cldcpy = X(mkplan_d)(plnr,
213
                          X(mkproblem_dft_d)(
214
                               X(mktensor_0d)(),
215
                               X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216
                                              n, 2, p->sz->dims[0].os),
217
                               bufs + roffset, 
218
                               bufs + ioffset, 
219
                               TAINT(p->ro, ovs * nbuf), 
220
                               TAINT(p->io, ovs * nbuf)));
221
     if (!cldcpy)
222
          goto nada;
223

    
224
     /* deallocate buffers, let apply() allocate them for real */
225
     X(ifree)(bufs);
226
     bufs = 0;
227

    
228
     /* plan the leftover transforms (cldrest): */
229
     {
230
          INT id = ivs * (nbuf * (vl / nbuf));
231
          INT od = ovs * (nbuf * (vl / nbuf));
232
          cldrest = X(mkplan_d)(plnr, 
233
                                X(mkproblem_dft_d)(
234
                                     X(tensor_copy)(p->sz),
235
                                     X(mktensor_1d)(vl % nbuf, ivs, ovs),
236
                                     p->ri+id, p->ii+id, p->ro+od, p->io+od));
237
     }
238
     if (!cldrest)
239
          goto nada;
240

    
241
     pln = MKPLAN_DFT(P, &padt, apply);
242
     pln->cld = cld;
243
     pln->cldcpy = cldcpy;
244
     pln->cldrest = cldrest;
245
     pln->n = n;
246
     pln->vl = vl;
247
     pln->ivs_by_nbuf = ivs * nbuf;
248
     pln->ovs_by_nbuf = ovs * nbuf;
249
     pln->roffset = roffset;
250
     pln->ioffset = ioffset;
251

    
252
     pln->nbuf = nbuf;
253
     pln->bufdist = bufdist;
254

    
255
     {
256
          opcnt t;
257
          X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258
          X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259
     }
260

    
261
     return &(pln->super.super);
262

    
263
 nada:
264
     X(ifree0)(bufs);
265
     X(plan_destroy_internal)(cldrest);
266
     X(plan_destroy_internal)(cldcpy);
267
     X(plan_destroy_internal)(cld);
268
     return (plan *) 0;
269
}
270

    
271
static solver *mksolver(size_t maxnbuf_ndx)
272
{
273
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274
     S *slv = MKSOLVER(S, &sadt);
275
     slv->maxnbuf_ndx = maxnbuf_ndx;
276
     return &(slv->super);
277
}
278

    
279
void X(dft_buffered_register)(planner *p)
280
{
281
     size_t i;
282
     for (i = 0; i < NELEM(maxnbufs); ++i)
283
          REGISTER_SOLVER(p, mksolver(i));
284
}