cannam@127
|
1 /*
|
cannam@127
|
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
|
cannam@127
|
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
cannam@127
|
4 *
|
cannam@127
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@127
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@127
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@127
|
8 * (at your option) any later version.
|
cannam@127
|
9 *
|
cannam@127
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@127
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@127
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@127
|
13 * GNU General Public License for more details.
|
cannam@127
|
14 *
|
cannam@127
|
15 * You should have received a copy of the GNU General Public License
|
cannam@127
|
16 * along with this program; if not, write to the Free Software
|
cannam@127
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@127
|
18 *
|
cannam@127
|
19 */
|
cannam@127
|
20
|
cannam@127
|
21
|
cannam@127
|
22 /* direct RDFT2 R2HC/HC2R solver, if we have a codelet */
|
cannam@127
|
23
|
cannam@127
|
24 #include "rdft.h"
|
cannam@127
|
25
|
cannam@127
|
26 typedef struct {
|
cannam@127
|
27 solver super;
|
cannam@127
|
28 const kr2c_desc *desc;
|
cannam@127
|
29 kr2c k;
|
cannam@127
|
30 } S;
|
cannam@127
|
31
|
cannam@127
|
32 typedef struct {
|
cannam@127
|
33 plan_rdft2 super;
|
cannam@127
|
34
|
cannam@127
|
35 stride rs, cs;
|
cannam@127
|
36 INT vl;
|
cannam@127
|
37 INT ivs, ovs;
|
cannam@127
|
38 kr2c k;
|
cannam@127
|
39 const S *slv;
|
cannam@127
|
40 INT ilast;
|
cannam@127
|
41 } P;
|
cannam@127
|
42
|
cannam@127
|
43 static void apply(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
|
cannam@127
|
44 {
|
cannam@127
|
45 const P *ego = (const P *) ego_;
|
cannam@127
|
46 ASSERT_ALIGNED_DOUBLE;
|
cannam@127
|
47 ego->k(r0, r1, cr, ci,
|
cannam@127
|
48 ego->rs, ego->cs, ego->cs,
|
cannam@127
|
49 ego->vl, ego->ivs, ego->ovs);
|
cannam@127
|
50 }
|
cannam@127
|
51
|
cannam@127
|
52 static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci)
|
cannam@127
|
53 {
|
cannam@127
|
54 const P *ego = (const P *) ego_;
|
cannam@127
|
55 INT i, vl = ego->vl, ovs = ego->ovs;
|
cannam@127
|
56 ASSERT_ALIGNED_DOUBLE;
|
cannam@127
|
57 ego->k(r0, r1, cr, ci,
|
cannam@127
|
58 ego->rs, ego->cs, ego->cs,
|
cannam@127
|
59 vl, ego->ivs, ovs);
|
cannam@127
|
60 for (i = 0; i < vl; ++i, ci += ovs)
|
cannam@127
|
61 ci[0] = ci[ego->ilast] = 0;
|
cannam@127
|
62 }
|
cannam@127
|
63
|
cannam@127
|
64 static void destroy(plan *ego_)
|
cannam@127
|
65 {
|
cannam@127
|
66 P *ego = (P *) ego_;
|
cannam@127
|
67 X(stride_destroy)(ego->rs);
|
cannam@127
|
68 X(stride_destroy)(ego->cs);
|
cannam@127
|
69 }
|
cannam@127
|
70
|
cannam@127
|
71 static void print(const plan *ego_, printer *p)
|
cannam@127
|
72 {
|
cannam@127
|
73 const P *ego = (const P *) ego_;
|
cannam@127
|
74 const S *s = ego->slv;
|
cannam@127
|
75
|
cannam@127
|
76 p->print(p, "(rdft2-%s-direct-%D%v \"%s\")",
|
cannam@127
|
77 X(rdft_kind_str)(s->desc->genus->kind), s->desc->n,
|
cannam@127
|
78 ego->vl, s->desc->nam);
|
cannam@127
|
79 }
|
cannam@127
|
80
|
cannam@127
|
81 static int applicable(const solver *ego_, const problem *p_)
|
cannam@127
|
82 {
|
cannam@127
|
83 const S *ego = (const S *) ego_;
|
cannam@127
|
84 const kr2c_desc *desc = ego->desc;
|
cannam@127
|
85 const problem_rdft2 *p = (const problem_rdft2 *) p_;
|
cannam@127
|
86 INT vl;
|
cannam@127
|
87 INT ivs, ovs;
|
cannam@127
|
88
|
cannam@127
|
89 return (
|
cannam@127
|
90 1
|
cannam@127
|
91 && p->sz->rnk == 1
|
cannam@127
|
92 && p->vecsz->rnk <= 1
|
cannam@127
|
93 && p->sz->dims[0].n == desc->n
|
cannam@127
|
94 && p->kind == desc->genus->kind
|
cannam@127
|
95
|
cannam@127
|
96 /* check strides etc */
|
cannam@127
|
97 && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
|
cannam@127
|
98
|
cannam@127
|
99 && (0
|
cannam@127
|
100 /* can operate out-of-place */
|
cannam@127
|
101 || p->r0 != p->cr
|
cannam@127
|
102
|
cannam@127
|
103 /*
|
cannam@127
|
104 * can compute one transform in-place, no matter
|
cannam@127
|
105 * what the strides are.
|
cannam@127
|
106 */
|
cannam@127
|
107 || p->vecsz->rnk == 0
|
cannam@127
|
108
|
cannam@127
|
109 /* can operate in-place as long as strides are the same */
|
cannam@127
|
110 || X(rdft2_inplace_strides)(p, RNK_MINFTY)
|
cannam@127
|
111 )
|
cannam@127
|
112 );
|
cannam@127
|
113 }
|
cannam@127
|
114
|
cannam@127
|
115 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
cannam@127
|
116 {
|
cannam@127
|
117 const S *ego = (const S *) ego_;
|
cannam@127
|
118 P *pln;
|
cannam@127
|
119 const problem_rdft2 *p;
|
cannam@127
|
120 iodim *d;
|
cannam@127
|
121 int r2hc_kindp;
|
cannam@127
|
122
|
cannam@127
|
123 static const plan_adt padt = {
|
cannam@127
|
124 X(rdft2_solve), X(null_awake), print, destroy
|
cannam@127
|
125 };
|
cannam@127
|
126
|
cannam@127
|
127 UNUSED(plnr);
|
cannam@127
|
128
|
cannam@127
|
129 if (!applicable(ego_, p_))
|
cannam@127
|
130 return (plan *)0;
|
cannam@127
|
131
|
cannam@127
|
132 p = (const problem_rdft2 *) p_;
|
cannam@127
|
133
|
cannam@127
|
134 r2hc_kindp = R2HC_KINDP(p->kind);
|
cannam@127
|
135 A(r2hc_kindp || HC2R_KINDP(p->kind));
|
cannam@127
|
136
|
cannam@127
|
137 pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply);
|
cannam@127
|
138
|
cannam@127
|
139 d = p->sz->dims;
|
cannam@127
|
140
|
cannam@127
|
141 pln->k = ego->k;
|
cannam@127
|
142
|
cannam@127
|
143 pln->rs = X(mkstride)(d->n, r2hc_kindp ? d->is : d->os);
|
cannam@127
|
144 pln->cs = X(mkstride)(d->n, r2hc_kindp ? d->os : d->is);
|
cannam@127
|
145
|
cannam@127
|
146 X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
cannam@127
|
147
|
cannam@127
|
148 /* Nyquist freq., if any */
|
cannam@127
|
149 pln->ilast = (d->n % 2) ? 0 : (d->n/2) * d->os;
|
cannam@127
|
150
|
cannam@127
|
151 pln->slv = ego;
|
cannam@127
|
152 X(ops_zero)(&pln->super.super.ops);
|
cannam@127
|
153 X(ops_madd2)(pln->vl / ego->desc->genus->vl,
|
cannam@127
|
154 &ego->desc->ops,
|
cannam@127
|
155 &pln->super.super.ops);
|
cannam@127
|
156 if (p->kind == R2HC)
|
cannam@127
|
157 pln->super.super.ops.other += 2 * pln->vl; /* + 2 stores */
|
cannam@127
|
158
|
cannam@127
|
159 pln->super.super.could_prune_now_p = 1;
|
cannam@127
|
160 return &(pln->super.super);
|
cannam@127
|
161 }
|
cannam@127
|
162
|
cannam@127
|
163 /* constructor */
|
cannam@127
|
164 solver *X(mksolver_rdft2_direct)(kr2c k, const kr2c_desc *desc)
|
cannam@127
|
165 {
|
cannam@127
|
166 static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 };
|
cannam@127
|
167 S *slv = MKSOLVER(S, &sadt);
|
cannam@127
|
168 slv->k = k;
|
cannam@127
|
169 slv->desc = desc;
|
cannam@127
|
170 return &(slv->super);
|
cannam@127
|
171 }
|