To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / indirect-transpose.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (7.21 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* solvers/plans for vectors of DFTs corresponding to the columns
22
   of a matrix: first transpose the matrix so that the DFTs are
23
   contiguous, then do DFTs with transposed output.   In particular,
24
   we restrict ourselves to the case of a square transpose (or a
25
   sequence thereof). */
26

    
27
#include "dft/dft.h"
28

    
29
typedef solver S;
30

    
31
typedef struct {
32
     plan_dft super;
33
     INT vl, ivs, ovs;
34
     plan *cldtrans, *cld, *cldrest;
35
} P;
36

    
37
/* initial transpose is out-of-place from input to output */
38
static void apply_op(const plan *ego_, R *ri, R *ii, R *ro, R *io)
39
{
40
     const P *ego = (const P *) ego_;
41
     INT vl = ego->vl, ivs = ego->ivs, ovs = ego->ovs, i;
42

    
43
     for (i = 0; i < vl; ++i) {
44
          {
45
               plan_dft *cldtrans = (plan_dft *) ego->cldtrans;
46
               cldtrans->apply(ego->cldtrans, ri, ii, ro, io);
47
          }
48
          {
49
               plan_dft *cld = (plan_dft *) ego->cld;
50
               cld->apply(ego->cld, ro, io, ro, io);
51
          }
52
          ri += ivs; ii += ivs;
53
          ro += ovs; io += ovs;
54
     }
55
     {
56
          plan_dft *cldrest = (plan_dft *) ego->cldrest;
57
          cldrest->apply(ego->cldrest, ri, ii, ro, io);
58
     }
59
}
60

    
61
static void destroy(plan *ego_)
62
{
63
     P *ego = (P *) ego_;
64
     X(plan_destroy_internal)(ego->cldrest);
65
     X(plan_destroy_internal)(ego->cld);
66
     X(plan_destroy_internal)(ego->cldtrans);
67
}
68

    
69
static void awake(plan *ego_, enum wakefulness wakefulness)
70
{
71
     P *ego = (P *) ego_;
72
     X(plan_awake)(ego->cldtrans, wakefulness);
73
     X(plan_awake)(ego->cld, wakefulness);
74
     X(plan_awake)(ego->cldrest, wakefulness);
75
}
76

    
77
static void print(const plan *ego_, printer *p)
78
{
79
     const P *ego = (const P *) ego_;
80
     p->print(p, "(indirect-transpose%v%(%p%)%(%p%)%(%p%))", 
81
              ego->vl, ego->cldtrans, ego->cld, ego->cldrest);
82
}
83

    
84
static int pickdim(const tensor *vs, const tensor *s, int *pdim0, int *pdim1)
85
{
86
     int dim0, dim1;
87
     *pdim0 = *pdim1 = -1;
88
     for (dim0 = 0; dim0 < vs->rnk; ++dim0)
89
          for (dim1 = 0; dim1 < s->rnk; ++dim1) 
90
               if (vs->dims[dim0].n * X(iabs)(vs->dims[dim0].is) <= X(iabs)(s->dims[dim1].is)
91
                   && vs->dims[dim0].n >= s->dims[dim1].n
92
                   && (*pdim0 == -1 
93
                       || (X(iabs)(vs->dims[dim0].is) <= X(iabs)(vs->dims[*pdim0].is)
94
                           && X(iabs)(s->dims[dim1].is) >= X(iabs)(s->dims[*pdim1].is)))) {
95
                    *pdim0 = dim0;
96
                    *pdim1 = dim1;
97
               }
98
     return (*pdim0 != -1 && *pdim1 != -1);
99
}
100

    
101
static int applicable0(const solver *ego_, const problem *p_,
102
                       const planner *plnr,
103
                       int *pdim0, int *pdim1)
104
{
105
     const problem_dft *p = (const problem_dft *) p_;
106
     UNUSED(ego_); UNUSED(plnr);
107

    
108
     return (1
109
             && FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk)
110

    
111
             /* FIXME: can/should we relax this constraint? */
112
             && X(tensor_inplace_strides2)(p->vecsz, p->sz)
113

    
114
             && pickdim(p->vecsz, p->sz, pdim0, pdim1)
115

    
116
             /* output should not *already* include the transpose
117
                (in which case we duplicate the regular indirect.c) */
118
             && (p->sz->dims[*pdim1].os != p->vecsz->dims[*pdim0].is)
119
          );
120
}
121

    
122
static int applicable(const solver *ego_, const problem *p_,
123
                      const planner *plnr,
124
                      int *pdim0, int *pdim1)
125
{
126
     if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0;
127
     {
128
          const problem_dft *p = (const problem_dft *) p_;
129
          INT u = p->ri == p->ii + 1 || p->ii == p->ri + 1 ? (INT)2 : (INT)1;
130

    
131
          /* UGLY if does not result in contiguous transforms or
132
             transforms of contiguous vectors (since the latter at
133
             least have efficient transpositions) */
134
          if (NO_UGLYP(plnr)
135
              && p->vecsz->dims[*pdim0].is != u
136
              && !(p->vecsz->rnk == 2
137
                   && p->vecsz->dims[1-*pdim0].is == u
138
                   && p->vecsz->dims[*pdim0].is
139
                      == u * p->vecsz->dims[1-*pdim0].n))
140
               return 0;
141

    
142
          if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0;
143
     }
144
     return 1;
145
}
146

    
147
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
148
{
149
     const problem_dft *p = (const problem_dft *) p_;
150
     P *pln;
151
     plan *cld = 0, *cldtrans = 0, *cldrest = 0;
152
     int pdim0, pdim1;
153
     tensor *ts, *tv;
154
     INT vl, ivs, ovs;
155
     R *rit, *iit, *rot, *iot;
156

    
157
     static const plan_adt padt = {
158
          X(dft_solve), awake, print, destroy
159
     };
160

    
161
     if (!applicable(ego_, p_, plnr, &pdim0, &pdim1))
162
          return (plan *) 0;
163

    
164
     vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n;
165
     A(vl >= 1);
166
     ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is;
167
     ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os;
168
     rit = TAINT(p->ri, vl == 1 ? 0 : ivs);
169
     iit = TAINT(p->ii, vl == 1 ? 0 : ivs);
170
     rot = TAINT(p->ro, vl == 1 ? 0 : ovs);
171
     iot = TAINT(p->io, vl == 1 ? 0 : ovs);
172

    
173
     ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS);
174
     ts->dims[pdim1].os = p->vecsz->dims[pdim0].is;
175
     tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS);
176
     tv->dims[pdim0].os = p->sz->dims[pdim1].is;
177
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
178
     cldtrans = X(mkplan_d)(plnr, 
179
                            X(mkproblem_dft_d)(X(mktensor_0d)(),
180
                                               X(tensor_append)(tv, ts),
181
                                               rit, iit, 
182
                                               rot, iot));
183
     X(tensor_destroy2)(ts, tv);
184
     if (!cldtrans) goto nada;
185

    
186
     ts = X(tensor_copy)(p->sz);
187
     ts->dims[pdim1].is = p->vecsz->dims[pdim0].is;
188
     tv = X(tensor_copy)(p->vecsz);
189
     tv->dims[pdim0].is = p->sz->dims[pdim1].is;
190
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
191
     cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv,
192
                                                rot, iot,
193
                                                rot, iot));
194
     if (!cld) goto nada;
195

    
196
     tv = X(tensor_copy)(p->vecsz);
197
     tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n;
198
     cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv,
199
                                                    p->ri + ivs * vl,
200
                                                    p->ii + ivs * vl,
201
                                                    p->ro + ovs * vl,
202
                                                    p->io + ovs * vl));
203
     if (!cldrest) goto nada;
204

    
205
     pln = MKPLAN_DFT(P, &padt, apply_op);
206
     pln->cldtrans = cldtrans;
207
     pln->cld = cld;
208
     pln->cldrest = cldrest;
209
     pln->vl = vl;
210
     pln->ivs = ivs;
211
     pln->ovs = ovs;
212
     X(ops_cpy)(&cldrest->ops, &pln->super.super.ops);
213
     X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops);
214
     X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops);
215
     return &(pln->super.super);
216

    
217
 nada:
218
     X(plan_destroy_internal)(cldrest);
219
     X(plan_destroy_internal)(cld);
220
     X(plan_destroy_internal)(cldtrans);
221
     return (plan *)0;
222
}
223

    
224
static solver *mksolver(void)
225
{
226
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
227
     S *slv = MKSOLVER(S, &sadt);
228
     return slv;
229
}
230

    
231
void X(dft_indirect_transpose_register)(planner *p)
232
{
233
     REGISTER_SOLVER(p, mksolver());
234
}