comparison src/fftw-3.3.3/dft/simd/common/genus.c @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
comparison
equal deleted inserted replaced
9:c0fb53affa76 10:37bf6b4a2645
1 /*
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 #include "codelet-dft.h"
22 #include SIMD_HEADER
23
24 #define EXTERN_CONST(t, x) extern const t x; const t x
25
26 static int n1b_okp(const kdft_desc *d,
27 const R *ri, const R *ii, const R *ro, const R *io,
28 INT is, INT os, INT vl, INT ivs, INT ovs,
29 const planner *plnr)
30 {
31 return (1
32 && ALIGNED(ii)
33 && ALIGNED(io)
34 && !NO_SIMDP(plnr)
35 && SIMD_STRIDE_OK(is)
36 && SIMD_STRIDE_OK(os)
37 && SIMD_VSTRIDE_OK(ivs)
38 && SIMD_VSTRIDE_OK(ovs)
39 && ri == ii + 1
40 && ro == io + 1
41 && (vl % VL) == 0
42 && (!d->is || (d->is == is))
43 && (!d->os || (d->os == os))
44 && (!d->ivs || (d->ivs == ivs))
45 && (!d->ovs || (d->ovs == ovs))
46 );
47 }
48
49 EXTERN_CONST(kdft_genus, XSIMD(dft_n1bsimd_genus)) = { n1b_okp, VL };
50
51 static int n1f_okp(const kdft_desc *d,
52 const R *ri, const R *ii, const R *ro, const R *io,
53 INT is, INT os, INT vl, INT ivs, INT ovs,
54 const planner *plnr)
55 {
56 return (1
57 && ALIGNED(ri)
58 && ALIGNED(ro)
59 && !NO_SIMDP(plnr)
60 && SIMD_STRIDE_OK(is)
61 && SIMD_STRIDE_OK(os)
62 && SIMD_VSTRIDE_OK(ivs)
63 && SIMD_VSTRIDE_OK(ovs)
64 && ii == ri + 1
65 && io == ro + 1
66 && (vl % VL) == 0
67 && (!d->is || (d->is == is))
68 && (!d->os || (d->os == os))
69 && (!d->ivs || (d->ivs == ivs))
70 && (!d->ovs || (d->ovs == ovs))
71 );
72 }
73
74 EXTERN_CONST(kdft_genus, XSIMD(dft_n1fsimd_genus)) = { n1f_okp, VL };
75
76 static int n2b_okp(const kdft_desc *d,
77 const R *ri, const R *ii, const R *ro, const R *io,
78 INT is, INT os, INT vl, INT ivs, INT ovs,
79 const planner *plnr)
80 {
81 return (1
82 && ALIGNEDA(ii)
83 && ALIGNEDA(io)
84 && !NO_SIMDP(plnr)
85 && SIMD_STRIDE_OKA(is)
86 && SIMD_VSTRIDE_OKA(ivs)
87 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */
88 && SIMD_STRIDE_OKPAIR(ovs)
89 && ri == ii + 1
90 && ro == io + 1
91 && (vl % VL) == 0
92 && (!d->is || (d->is == is))
93 && (!d->os || (d->os == os))
94 && (!d->ivs || (d->ivs == ivs))
95 && (!d->ovs || (d->ovs == ovs))
96 );
97 }
98
99 EXTERN_CONST(kdft_genus, XSIMD(dft_n2bsimd_genus)) = { n2b_okp, VL };
100
101 static int n2f_okp(const kdft_desc *d,
102 const R *ri, const R *ii, const R *ro, const R *io,
103 INT is, INT os, INT vl, INT ivs, INT ovs,
104 const planner *plnr)
105 {
106 return (1
107 && ALIGNEDA(ri)
108 && ALIGNEDA(ro)
109 && !NO_SIMDP(plnr)
110 && SIMD_STRIDE_OKA(is)
111 && SIMD_VSTRIDE_OKA(ivs)
112 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */
113 && SIMD_STRIDE_OKPAIR(ovs)
114 && ii == ri + 1
115 && io == ro + 1
116 && (vl % VL) == 0
117 && (!d->is || (d->is == is))
118 && (!d->os || (d->os == os))
119 && (!d->ivs || (d->ivs == ivs))
120 && (!d->ovs || (d->ovs == ovs))
121 );
122 }
123
124 EXTERN_CONST(kdft_genus, XSIMD(dft_n2fsimd_genus)) = { n2f_okp, VL };
125
126 static int n2s_okp(const kdft_desc *d,
127 const R *ri, const R *ii, const R *ro, const R *io,
128 INT is, INT os, INT vl, INT ivs, INT ovs,
129 const planner *plnr)
130 {
131 return (1
132 && !NO_SIMDP(plnr)
133 && ALIGNEDA(ri)
134 && ALIGNEDA(ii)
135 && ALIGNEDA(ro)
136 && ALIGNEDA(io)
137 && SIMD_STRIDE_OKA(is)
138 && ivs == 1
139 && os == 1
140 && SIMD_STRIDE_OKA(ovs)
141 && (vl % (2 * VL)) == 0
142 && (!d->is || (d->is == is))
143 && (!d->os || (d->os == os))
144 && (!d->ivs || (d->ivs == ivs))
145 && (!d->ovs || (d->ovs == ovs))
146 );
147 }
148
149 EXTERN_CONST(kdft_genus, XSIMD(dft_n2ssimd_genus)) = { n2s_okp, 2 * VL };
150
151 static int q1b_okp(const ct_desc *d,
152 const R *rio, const R *iio,
153 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
154 const planner *plnr)
155 {
156 return (1
157 && ALIGNED(iio)
158 && !NO_SIMDP(plnr)
159 && SIMD_STRIDE_OK(rs)
160 && SIMD_STRIDE_OK(vs)
161 && SIMD_VSTRIDE_OK(ms)
162 && rio == iio + 1
163 && (m % VL) == 0
164 && (mb % VL) == 0
165 && (me % VL) == 0
166 && (!d->rs || (d->rs == rs))
167 && (!d->vs || (d->vs == vs))
168 && (!d->ms || (d->ms == ms))
169 );
170 }
171 EXTERN_CONST(ct_genus, XSIMD(dft_q1bsimd_genus)) = { q1b_okp, VL };
172
173 static int q1f_okp(const ct_desc *d,
174 const R *rio, const R *iio,
175 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
176 const planner *plnr)
177 {
178 return (1
179 && ALIGNED(rio)
180 && !NO_SIMDP(plnr)
181 && SIMD_STRIDE_OK(rs)
182 && SIMD_STRIDE_OK(vs)
183 && SIMD_VSTRIDE_OK(ms)
184 && iio == rio + 1
185 && (m % VL) == 0
186 && (mb % VL) == 0
187 && (me % VL) == 0
188 && (!d->rs || (d->rs == rs))
189 && (!d->vs || (d->vs == vs))
190 && (!d->ms || (d->ms == ms))
191 );
192 }
193 EXTERN_CONST(ct_genus, XSIMD(dft_q1fsimd_genus)) = { q1f_okp, VL };
194
195 static int t_okp_common(const ct_desc *d,
196 const R *rio, const R *iio,
197 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
198 const planner *plnr)
199 {
200 UNUSED(rio); UNUSED(iio);
201 return (1
202 && !NO_SIMDP(plnr)
203 && SIMD_STRIDE_OKA(rs)
204 && SIMD_VSTRIDE_OKA(ms)
205 && (m % VL) == 0
206 && (mb % VL) == 0
207 && (me % VL) == 0
208 && (!d->rs || (d->rs == rs))
209 && (!d->vs || (d->vs == vs))
210 && (!d->ms || (d->ms == ms))
211 );
212 }
213
214 static int t_okp_commonu(const ct_desc *d,
215 const R *rio, const R *iio,
216 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
217 const planner *plnr)
218 {
219 UNUSED(rio); UNUSED(iio); UNUSED(m);
220 return (1
221 && !NO_SIMDP(plnr)
222 && SIMD_STRIDE_OK(rs)
223 && SIMD_VSTRIDE_OK(ms)
224 && (mb % VL) == 0
225 && (me % VL) == 0
226 && (!d->rs || (d->rs == rs))
227 && (!d->vs || (d->vs == vs))
228 && (!d->ms || (d->ms == ms))
229 );
230 }
231
232 static int t_okp_t1f(const ct_desc *d,
233 const R *rio, const R *iio,
234 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
235 const planner *plnr)
236 {
237 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
238 && iio == rio + 1
239 && ALIGNEDA(rio);
240 }
241
242 EXTERN_CONST(ct_genus, XSIMD(dft_t1fsimd_genus)) = { t_okp_t1f, VL };
243
244 static int t_okp_t1fu(const ct_desc *d,
245 const R *rio, const R *iio,
246 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
247 const planner *plnr)
248 {
249 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
250 && iio == rio + 1
251 && ALIGNED(rio);
252 }
253
254 EXTERN_CONST(ct_genus, XSIMD(dft_t1fusimd_genus)) = { t_okp_t1fu, VL };
255
256 static int t_okp_t1b(const ct_desc *d,
257 const R *rio, const R *iio,
258 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
259 const planner *plnr)
260 {
261 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
262 && rio == iio + 1
263 && ALIGNEDA(iio);
264 }
265
266 EXTERN_CONST(ct_genus, XSIMD(dft_t1bsimd_genus)) = { t_okp_t1b, VL };
267
268 static int t_okp_t1bu(const ct_desc *d,
269 const R *rio, const R *iio,
270 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
271 const planner *plnr)
272 {
273 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
274 && rio == iio + 1
275 && ALIGNED(iio);
276 }
277
278 EXTERN_CONST(ct_genus, XSIMD(dft_t1busimd_genus)) = { t_okp_t1bu, VL };
279
280 /* use t2* codelets only when n = m*radix is small, because
281 t2* codelets use ~2n twiddle factors (instead of ~n) */
282 static int small_enough(const ct_desc *d, INT m)
283 {
284 return m * d->radix <= 16384;
285 }
286
287 static int t_okp_t2f(const ct_desc *d,
288 const R *rio, const R *iio,
289 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
290 const planner *plnr)
291 {
292 return t_okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
293 && small_enough(d, m);
294 }
295
296 EXTERN_CONST(ct_genus, XSIMD(dft_t2fsimd_genus)) = { t_okp_t2f, VL };
297
298 static int t_okp_t2b(const ct_desc *d,
299 const R *rio, const R *iio,
300 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
301 const planner *plnr)
302 {
303 return t_okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
304 && small_enough(d, m);
305 }
306
307 EXTERN_CONST(ct_genus, XSIMD(dft_t2bsimd_genus)) = { t_okp_t2b, VL };
308
309 static int ts_okp(const ct_desc *d,
310 const R *rio, const R *iio,
311 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
312 const planner *plnr)
313 {
314 UNUSED(rio);
315 UNUSED(iio);
316 return (1
317 && !NO_SIMDP(plnr)
318 && ALIGNEDA(rio)
319 && ALIGNEDA(iio)
320 && SIMD_STRIDE_OKA(rs)
321 && ms == 1
322 && (m % (2 * VL)) == 0
323 && (mb % (2 * VL)) == 0
324 && (me % (2 * VL)) == 0
325 && (!d->rs || (d->rs == rs))
326 && (!d->vs || (d->vs == vs))
327 && (!d->ms || (d->ms == ms))
328 );
329 }
330
331 EXTERN_CONST(ct_genus, XSIMD(dft_tssimd_genus)) = { ts_okp, 2 * VL };