Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.3/dft/simd/common/genus.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:c0fb53affa76 | 10:37bf6b4a2645 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-11 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 #include "codelet-dft.h" | |
22 #include SIMD_HEADER | |
23 | |
24 #define EXTERN_CONST(t, x) extern const t x; const t x | |
25 | |
26 static int n1b_okp(const kdft_desc *d, | |
27 const R *ri, const R *ii, const R *ro, const R *io, | |
28 INT is, INT os, INT vl, INT ivs, INT ovs, | |
29 const planner *plnr) | |
30 { | |
31 return (1 | |
32 && ALIGNED(ii) | |
33 && ALIGNED(io) | |
34 && !NO_SIMDP(plnr) | |
35 && SIMD_STRIDE_OK(is) | |
36 && SIMD_STRIDE_OK(os) | |
37 && SIMD_VSTRIDE_OK(ivs) | |
38 && SIMD_VSTRIDE_OK(ovs) | |
39 && ri == ii + 1 | |
40 && ro == io + 1 | |
41 && (vl % VL) == 0 | |
42 && (!d->is || (d->is == is)) | |
43 && (!d->os || (d->os == os)) | |
44 && (!d->ivs || (d->ivs == ivs)) | |
45 && (!d->ovs || (d->ovs == ovs)) | |
46 ); | |
47 } | |
48 | |
49 EXTERN_CONST(kdft_genus, XSIMD(dft_n1bsimd_genus)) = { n1b_okp, VL }; | |
50 | |
51 static int n1f_okp(const kdft_desc *d, | |
52 const R *ri, const R *ii, const R *ro, const R *io, | |
53 INT is, INT os, INT vl, INT ivs, INT ovs, | |
54 const planner *plnr) | |
55 { | |
56 return (1 | |
57 && ALIGNED(ri) | |
58 && ALIGNED(ro) | |
59 && !NO_SIMDP(plnr) | |
60 && SIMD_STRIDE_OK(is) | |
61 && SIMD_STRIDE_OK(os) | |
62 && SIMD_VSTRIDE_OK(ivs) | |
63 && SIMD_VSTRIDE_OK(ovs) | |
64 && ii == ri + 1 | |
65 && io == ro + 1 | |
66 && (vl % VL) == 0 | |
67 && (!d->is || (d->is == is)) | |
68 && (!d->os || (d->os == os)) | |
69 && (!d->ivs || (d->ivs == ivs)) | |
70 && (!d->ovs || (d->ovs == ovs)) | |
71 ); | |
72 } | |
73 | |
74 EXTERN_CONST(kdft_genus, XSIMD(dft_n1fsimd_genus)) = { n1f_okp, VL }; | |
75 | |
76 static int n2b_okp(const kdft_desc *d, | |
77 const R *ri, const R *ii, const R *ro, const R *io, | |
78 INT is, INT os, INT vl, INT ivs, INT ovs, | |
79 const planner *plnr) | |
80 { | |
81 return (1 | |
82 && ALIGNEDA(ii) | |
83 && ALIGNEDA(io) | |
84 && !NO_SIMDP(plnr) | |
85 && SIMD_STRIDE_OKA(is) | |
86 && SIMD_VSTRIDE_OKA(ivs) | |
87 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */ | |
88 && SIMD_STRIDE_OKPAIR(ovs) | |
89 && ri == ii + 1 | |
90 && ro == io + 1 | |
91 && (vl % VL) == 0 | |
92 && (!d->is || (d->is == is)) | |
93 && (!d->os || (d->os == os)) | |
94 && (!d->ivs || (d->ivs == ivs)) | |
95 && (!d->ovs || (d->ovs == ovs)) | |
96 ); | |
97 } | |
98 | |
99 EXTERN_CONST(kdft_genus, XSIMD(dft_n2bsimd_genus)) = { n2b_okp, VL }; | |
100 | |
101 static int n2f_okp(const kdft_desc *d, | |
102 const R *ri, const R *ii, const R *ro, const R *io, | |
103 INT is, INT os, INT vl, INT ivs, INT ovs, | |
104 const planner *plnr) | |
105 { | |
106 return (1 | |
107 && ALIGNEDA(ri) | |
108 && ALIGNEDA(ro) | |
109 && !NO_SIMDP(plnr) | |
110 && SIMD_STRIDE_OKA(is) | |
111 && SIMD_VSTRIDE_OKA(ivs) | |
112 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */ | |
113 && SIMD_STRIDE_OKPAIR(ovs) | |
114 && ii == ri + 1 | |
115 && io == ro + 1 | |
116 && (vl % VL) == 0 | |
117 && (!d->is || (d->is == is)) | |
118 && (!d->os || (d->os == os)) | |
119 && (!d->ivs || (d->ivs == ivs)) | |
120 && (!d->ovs || (d->ovs == ovs)) | |
121 ); | |
122 } | |
123 | |
124 EXTERN_CONST(kdft_genus, XSIMD(dft_n2fsimd_genus)) = { n2f_okp, VL }; | |
125 | |
126 static int n2s_okp(const kdft_desc *d, | |
127 const R *ri, const R *ii, const R *ro, const R *io, | |
128 INT is, INT os, INT vl, INT ivs, INT ovs, | |
129 const planner *plnr) | |
130 { | |
131 return (1 | |
132 && !NO_SIMDP(plnr) | |
133 && ALIGNEDA(ri) | |
134 && ALIGNEDA(ii) | |
135 && ALIGNEDA(ro) | |
136 && ALIGNEDA(io) | |
137 && SIMD_STRIDE_OKA(is) | |
138 && ivs == 1 | |
139 && os == 1 | |
140 && SIMD_STRIDE_OKA(ovs) | |
141 && (vl % (2 * VL)) == 0 | |
142 && (!d->is || (d->is == is)) | |
143 && (!d->os || (d->os == os)) | |
144 && (!d->ivs || (d->ivs == ivs)) | |
145 && (!d->ovs || (d->ovs == ovs)) | |
146 ); | |
147 } | |
148 | |
149 EXTERN_CONST(kdft_genus, XSIMD(dft_n2ssimd_genus)) = { n2s_okp, 2 * VL }; | |
150 | |
151 static int q1b_okp(const ct_desc *d, | |
152 const R *rio, const R *iio, | |
153 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
154 const planner *plnr) | |
155 { | |
156 return (1 | |
157 && ALIGNED(iio) | |
158 && !NO_SIMDP(plnr) | |
159 && SIMD_STRIDE_OK(rs) | |
160 && SIMD_STRIDE_OK(vs) | |
161 && SIMD_VSTRIDE_OK(ms) | |
162 && rio == iio + 1 | |
163 && (m % VL) == 0 | |
164 && (mb % VL) == 0 | |
165 && (me % VL) == 0 | |
166 && (!d->rs || (d->rs == rs)) | |
167 && (!d->vs || (d->vs == vs)) | |
168 && (!d->ms || (d->ms == ms)) | |
169 ); | |
170 } | |
171 EXTERN_CONST(ct_genus, XSIMD(dft_q1bsimd_genus)) = { q1b_okp, VL }; | |
172 | |
173 static int q1f_okp(const ct_desc *d, | |
174 const R *rio, const R *iio, | |
175 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
176 const planner *plnr) | |
177 { | |
178 return (1 | |
179 && ALIGNED(rio) | |
180 && !NO_SIMDP(plnr) | |
181 && SIMD_STRIDE_OK(rs) | |
182 && SIMD_STRIDE_OK(vs) | |
183 && SIMD_VSTRIDE_OK(ms) | |
184 && iio == rio + 1 | |
185 && (m % VL) == 0 | |
186 && (mb % VL) == 0 | |
187 && (me % VL) == 0 | |
188 && (!d->rs || (d->rs == rs)) | |
189 && (!d->vs || (d->vs == vs)) | |
190 && (!d->ms || (d->ms == ms)) | |
191 ); | |
192 } | |
193 EXTERN_CONST(ct_genus, XSIMD(dft_q1fsimd_genus)) = { q1f_okp, VL }; | |
194 | |
195 static int t_okp_common(const ct_desc *d, | |
196 const R *rio, const R *iio, | |
197 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
198 const planner *plnr) | |
199 { | |
200 UNUSED(rio); UNUSED(iio); | |
201 return (1 | |
202 && !NO_SIMDP(plnr) | |
203 && SIMD_STRIDE_OKA(rs) | |
204 && SIMD_VSTRIDE_OKA(ms) | |
205 && (m % VL) == 0 | |
206 && (mb % VL) == 0 | |
207 && (me % VL) == 0 | |
208 && (!d->rs || (d->rs == rs)) | |
209 && (!d->vs || (d->vs == vs)) | |
210 && (!d->ms || (d->ms == ms)) | |
211 ); | |
212 } | |
213 | |
214 static int t_okp_commonu(const ct_desc *d, | |
215 const R *rio, const R *iio, | |
216 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
217 const planner *plnr) | |
218 { | |
219 UNUSED(rio); UNUSED(iio); UNUSED(m); | |
220 return (1 | |
221 && !NO_SIMDP(plnr) | |
222 && SIMD_STRIDE_OK(rs) | |
223 && SIMD_VSTRIDE_OK(ms) | |
224 && (mb % VL) == 0 | |
225 && (me % VL) == 0 | |
226 && (!d->rs || (d->rs == rs)) | |
227 && (!d->vs || (d->vs == vs)) | |
228 && (!d->ms || (d->ms == ms)) | |
229 ); | |
230 } | |
231 | |
232 static int t_okp_t1f(const ct_desc *d, | |
233 const R *rio, const R *iio, | |
234 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
235 const planner *plnr) | |
236 { | |
237 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr) | |
238 && iio == rio + 1 | |
239 && ALIGNEDA(rio); | |
240 } | |
241 | |
242 EXTERN_CONST(ct_genus, XSIMD(dft_t1fsimd_genus)) = { t_okp_t1f, VL }; | |
243 | |
244 static int t_okp_t1fu(const ct_desc *d, | |
245 const R *rio, const R *iio, | |
246 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
247 const planner *plnr) | |
248 { | |
249 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr) | |
250 && iio == rio + 1 | |
251 && ALIGNED(rio); | |
252 } | |
253 | |
254 EXTERN_CONST(ct_genus, XSIMD(dft_t1fusimd_genus)) = { t_okp_t1fu, VL }; | |
255 | |
256 static int t_okp_t1b(const ct_desc *d, | |
257 const R *rio, const R *iio, | |
258 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
259 const planner *plnr) | |
260 { | |
261 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr) | |
262 && rio == iio + 1 | |
263 && ALIGNEDA(iio); | |
264 } | |
265 | |
266 EXTERN_CONST(ct_genus, XSIMD(dft_t1bsimd_genus)) = { t_okp_t1b, VL }; | |
267 | |
268 static int t_okp_t1bu(const ct_desc *d, | |
269 const R *rio, const R *iio, | |
270 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
271 const planner *plnr) | |
272 { | |
273 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr) | |
274 && rio == iio + 1 | |
275 && ALIGNED(iio); | |
276 } | |
277 | |
278 EXTERN_CONST(ct_genus, XSIMD(dft_t1busimd_genus)) = { t_okp_t1bu, VL }; | |
279 | |
280 /* use t2* codelets only when n = m*radix is small, because | |
281 t2* codelets use ~2n twiddle factors (instead of ~n) */ | |
282 static int small_enough(const ct_desc *d, INT m) | |
283 { | |
284 return m * d->radix <= 16384; | |
285 } | |
286 | |
287 static int t_okp_t2f(const ct_desc *d, | |
288 const R *rio, const R *iio, | |
289 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
290 const planner *plnr) | |
291 { | |
292 return t_okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr) | |
293 && small_enough(d, m); | |
294 } | |
295 | |
296 EXTERN_CONST(ct_genus, XSIMD(dft_t2fsimd_genus)) = { t_okp_t2f, VL }; | |
297 | |
298 static int t_okp_t2b(const ct_desc *d, | |
299 const R *rio, const R *iio, | |
300 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
301 const planner *plnr) | |
302 { | |
303 return t_okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr) | |
304 && small_enough(d, m); | |
305 } | |
306 | |
307 EXTERN_CONST(ct_genus, XSIMD(dft_t2bsimd_genus)) = { t_okp_t2b, VL }; | |
308 | |
309 static int ts_okp(const ct_desc *d, | |
310 const R *rio, const R *iio, | |
311 INT rs, INT vs, INT m, INT mb, INT me, INT ms, | |
312 const planner *plnr) | |
313 { | |
314 UNUSED(rio); | |
315 UNUSED(iio); | |
316 return (1 | |
317 && !NO_SIMDP(plnr) | |
318 && ALIGNEDA(rio) | |
319 && ALIGNEDA(iio) | |
320 && SIMD_STRIDE_OKA(rs) | |
321 && ms == 1 | |
322 && (m % (2 * VL)) == 0 | |
323 && (mb % (2 * VL)) == 0 | |
324 && (me % (2 * VL)) == 0 | |
325 && (!d->rs || (d->rs == rs)) | |
326 && (!d->vs || (d->vs == vs)) | |
327 && (!d->ms || (d->ms == ms)) | |
328 ); | |
329 } | |
330 | |
331 EXTERN_CONST(ct_genus, XSIMD(dft_tssimd_genus)) = { ts_okp, 2 * VL }; |