annotate src/fftw-3.3.3/dft/simd/common/genus.c @ 95:89f5e221ed7b

Add FFTW3
author Chris Cannam <cannam@all-day-breakfast.com>
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
rev   line source
cannam@95 1 /*
cannam@95 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
cannam@95 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
cannam@95 4 *
cannam@95 5 * This program is free software; you can redistribute it and/or modify
cannam@95 6 * it under the terms of the GNU General Public License as published by
cannam@95 7 * the Free Software Foundation; either version 2 of the License, or
cannam@95 8 * (at your option) any later version.
cannam@95 9 *
cannam@95 10 * This program is distributed in the hope that it will be useful,
cannam@95 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@95 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@95 13 * GNU General Public License for more details.
cannam@95 14 *
cannam@95 15 * You should have received a copy of the GNU General Public License
cannam@95 16 * along with this program; if not, write to the Free Software
cannam@95 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cannam@95 18 *
cannam@95 19 */
cannam@95 20
cannam@95 21 #include "codelet-dft.h"
cannam@95 22 #include SIMD_HEADER
cannam@95 23
cannam@95 24 #define EXTERN_CONST(t, x) extern const t x; const t x
cannam@95 25
cannam@95 26 static int n1b_okp(const kdft_desc *d,
cannam@95 27 const R *ri, const R *ii, const R *ro, const R *io,
cannam@95 28 INT is, INT os, INT vl, INT ivs, INT ovs,
cannam@95 29 const planner *plnr)
cannam@95 30 {
cannam@95 31 return (1
cannam@95 32 && ALIGNED(ii)
cannam@95 33 && ALIGNED(io)
cannam@95 34 && !NO_SIMDP(plnr)
cannam@95 35 && SIMD_STRIDE_OK(is)
cannam@95 36 && SIMD_STRIDE_OK(os)
cannam@95 37 && SIMD_VSTRIDE_OK(ivs)
cannam@95 38 && SIMD_VSTRIDE_OK(ovs)
cannam@95 39 && ri == ii + 1
cannam@95 40 && ro == io + 1
cannam@95 41 && (vl % VL) == 0
cannam@95 42 && (!d->is || (d->is == is))
cannam@95 43 && (!d->os || (d->os == os))
cannam@95 44 && (!d->ivs || (d->ivs == ivs))
cannam@95 45 && (!d->ovs || (d->ovs == ovs))
cannam@95 46 );
cannam@95 47 }
cannam@95 48
cannam@95 49 EXTERN_CONST(kdft_genus, XSIMD(dft_n1bsimd_genus)) = { n1b_okp, VL };
cannam@95 50
cannam@95 51 static int n1f_okp(const kdft_desc *d,
cannam@95 52 const R *ri, const R *ii, const R *ro, const R *io,
cannam@95 53 INT is, INT os, INT vl, INT ivs, INT ovs,
cannam@95 54 const planner *plnr)
cannam@95 55 {
cannam@95 56 return (1
cannam@95 57 && ALIGNED(ri)
cannam@95 58 && ALIGNED(ro)
cannam@95 59 && !NO_SIMDP(plnr)
cannam@95 60 && SIMD_STRIDE_OK(is)
cannam@95 61 && SIMD_STRIDE_OK(os)
cannam@95 62 && SIMD_VSTRIDE_OK(ivs)
cannam@95 63 && SIMD_VSTRIDE_OK(ovs)
cannam@95 64 && ii == ri + 1
cannam@95 65 && io == ro + 1
cannam@95 66 && (vl % VL) == 0
cannam@95 67 && (!d->is || (d->is == is))
cannam@95 68 && (!d->os || (d->os == os))
cannam@95 69 && (!d->ivs || (d->ivs == ivs))
cannam@95 70 && (!d->ovs || (d->ovs == ovs))
cannam@95 71 );
cannam@95 72 }
cannam@95 73
cannam@95 74 EXTERN_CONST(kdft_genus, XSIMD(dft_n1fsimd_genus)) = { n1f_okp, VL };
cannam@95 75
cannam@95 76 static int n2b_okp(const kdft_desc *d,
cannam@95 77 const R *ri, const R *ii, const R *ro, const R *io,
cannam@95 78 INT is, INT os, INT vl, INT ivs, INT ovs,
cannam@95 79 const planner *plnr)
cannam@95 80 {
cannam@95 81 return (1
cannam@95 82 && ALIGNEDA(ii)
cannam@95 83 && ALIGNEDA(io)
cannam@95 84 && !NO_SIMDP(plnr)
cannam@95 85 && SIMD_STRIDE_OKA(is)
cannam@95 86 && SIMD_VSTRIDE_OKA(ivs)
cannam@95 87 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */
cannam@95 88 && SIMD_STRIDE_OKPAIR(ovs)
cannam@95 89 && ri == ii + 1
cannam@95 90 && ro == io + 1
cannam@95 91 && (vl % VL) == 0
cannam@95 92 && (!d->is || (d->is == is))
cannam@95 93 && (!d->os || (d->os == os))
cannam@95 94 && (!d->ivs || (d->ivs == ivs))
cannam@95 95 && (!d->ovs || (d->ovs == ovs))
cannam@95 96 );
cannam@95 97 }
cannam@95 98
cannam@95 99 EXTERN_CONST(kdft_genus, XSIMD(dft_n2bsimd_genus)) = { n2b_okp, VL };
cannam@95 100
cannam@95 101 static int n2f_okp(const kdft_desc *d,
cannam@95 102 const R *ri, const R *ii, const R *ro, const R *io,
cannam@95 103 INT is, INT os, INT vl, INT ivs, INT ovs,
cannam@95 104 const planner *plnr)
cannam@95 105 {
cannam@95 106 return (1
cannam@95 107 && ALIGNEDA(ri)
cannam@95 108 && ALIGNEDA(ro)
cannam@95 109 && !NO_SIMDP(plnr)
cannam@95 110 && SIMD_STRIDE_OKA(is)
cannam@95 111 && SIMD_VSTRIDE_OKA(ivs)
cannam@95 112 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */
cannam@95 113 && SIMD_STRIDE_OKPAIR(ovs)
cannam@95 114 && ii == ri + 1
cannam@95 115 && io == ro + 1
cannam@95 116 && (vl % VL) == 0
cannam@95 117 && (!d->is || (d->is == is))
cannam@95 118 && (!d->os || (d->os == os))
cannam@95 119 && (!d->ivs || (d->ivs == ivs))
cannam@95 120 && (!d->ovs || (d->ovs == ovs))
cannam@95 121 );
cannam@95 122 }
cannam@95 123
cannam@95 124 EXTERN_CONST(kdft_genus, XSIMD(dft_n2fsimd_genus)) = { n2f_okp, VL };
cannam@95 125
cannam@95 126 static int n2s_okp(const kdft_desc *d,
cannam@95 127 const R *ri, const R *ii, const R *ro, const R *io,
cannam@95 128 INT is, INT os, INT vl, INT ivs, INT ovs,
cannam@95 129 const planner *plnr)
cannam@95 130 {
cannam@95 131 return (1
cannam@95 132 && !NO_SIMDP(plnr)
cannam@95 133 && ALIGNEDA(ri)
cannam@95 134 && ALIGNEDA(ii)
cannam@95 135 && ALIGNEDA(ro)
cannam@95 136 && ALIGNEDA(io)
cannam@95 137 && SIMD_STRIDE_OKA(is)
cannam@95 138 && ivs == 1
cannam@95 139 && os == 1
cannam@95 140 && SIMD_STRIDE_OKA(ovs)
cannam@95 141 && (vl % (2 * VL)) == 0
cannam@95 142 && (!d->is || (d->is == is))
cannam@95 143 && (!d->os || (d->os == os))
cannam@95 144 && (!d->ivs || (d->ivs == ivs))
cannam@95 145 && (!d->ovs || (d->ovs == ovs))
cannam@95 146 );
cannam@95 147 }
cannam@95 148
cannam@95 149 EXTERN_CONST(kdft_genus, XSIMD(dft_n2ssimd_genus)) = { n2s_okp, 2 * VL };
cannam@95 150
cannam@95 151 static int q1b_okp(const ct_desc *d,
cannam@95 152 const R *rio, const R *iio,
cannam@95 153 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 154 const planner *plnr)
cannam@95 155 {
cannam@95 156 return (1
cannam@95 157 && ALIGNED(iio)
cannam@95 158 && !NO_SIMDP(plnr)
cannam@95 159 && SIMD_STRIDE_OK(rs)
cannam@95 160 && SIMD_STRIDE_OK(vs)
cannam@95 161 && SIMD_VSTRIDE_OK(ms)
cannam@95 162 && rio == iio + 1
cannam@95 163 && (m % VL) == 0
cannam@95 164 && (mb % VL) == 0
cannam@95 165 && (me % VL) == 0
cannam@95 166 && (!d->rs || (d->rs == rs))
cannam@95 167 && (!d->vs || (d->vs == vs))
cannam@95 168 && (!d->ms || (d->ms == ms))
cannam@95 169 );
cannam@95 170 }
cannam@95 171 EXTERN_CONST(ct_genus, XSIMD(dft_q1bsimd_genus)) = { q1b_okp, VL };
cannam@95 172
cannam@95 173 static int q1f_okp(const ct_desc *d,
cannam@95 174 const R *rio, const R *iio,
cannam@95 175 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 176 const planner *plnr)
cannam@95 177 {
cannam@95 178 return (1
cannam@95 179 && ALIGNED(rio)
cannam@95 180 && !NO_SIMDP(plnr)
cannam@95 181 && SIMD_STRIDE_OK(rs)
cannam@95 182 && SIMD_STRIDE_OK(vs)
cannam@95 183 && SIMD_VSTRIDE_OK(ms)
cannam@95 184 && iio == rio + 1
cannam@95 185 && (m % VL) == 0
cannam@95 186 && (mb % VL) == 0
cannam@95 187 && (me % VL) == 0
cannam@95 188 && (!d->rs || (d->rs == rs))
cannam@95 189 && (!d->vs || (d->vs == vs))
cannam@95 190 && (!d->ms || (d->ms == ms))
cannam@95 191 );
cannam@95 192 }
cannam@95 193 EXTERN_CONST(ct_genus, XSIMD(dft_q1fsimd_genus)) = { q1f_okp, VL };
cannam@95 194
cannam@95 195 static int t_okp_common(const ct_desc *d,
cannam@95 196 const R *rio, const R *iio,
cannam@95 197 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 198 const planner *plnr)
cannam@95 199 {
cannam@95 200 UNUSED(rio); UNUSED(iio);
cannam@95 201 return (1
cannam@95 202 && !NO_SIMDP(plnr)
cannam@95 203 && SIMD_STRIDE_OKA(rs)
cannam@95 204 && SIMD_VSTRIDE_OKA(ms)
cannam@95 205 && (m % VL) == 0
cannam@95 206 && (mb % VL) == 0
cannam@95 207 && (me % VL) == 0
cannam@95 208 && (!d->rs || (d->rs == rs))
cannam@95 209 && (!d->vs || (d->vs == vs))
cannam@95 210 && (!d->ms || (d->ms == ms))
cannam@95 211 );
cannam@95 212 }
cannam@95 213
cannam@95 214 static int t_okp_commonu(const ct_desc *d,
cannam@95 215 const R *rio, const R *iio,
cannam@95 216 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 217 const planner *plnr)
cannam@95 218 {
cannam@95 219 UNUSED(rio); UNUSED(iio); UNUSED(m);
cannam@95 220 return (1
cannam@95 221 && !NO_SIMDP(plnr)
cannam@95 222 && SIMD_STRIDE_OK(rs)
cannam@95 223 && SIMD_VSTRIDE_OK(ms)
cannam@95 224 && (mb % VL) == 0
cannam@95 225 && (me % VL) == 0
cannam@95 226 && (!d->rs || (d->rs == rs))
cannam@95 227 && (!d->vs || (d->vs == vs))
cannam@95 228 && (!d->ms || (d->ms == ms))
cannam@95 229 );
cannam@95 230 }
cannam@95 231
cannam@95 232 static int t_okp_t1f(const ct_desc *d,
cannam@95 233 const R *rio, const R *iio,
cannam@95 234 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 235 const planner *plnr)
cannam@95 236 {
cannam@95 237 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
cannam@95 238 && iio == rio + 1
cannam@95 239 && ALIGNEDA(rio);
cannam@95 240 }
cannam@95 241
cannam@95 242 EXTERN_CONST(ct_genus, XSIMD(dft_t1fsimd_genus)) = { t_okp_t1f, VL };
cannam@95 243
cannam@95 244 static int t_okp_t1fu(const ct_desc *d,
cannam@95 245 const R *rio, const R *iio,
cannam@95 246 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 247 const planner *plnr)
cannam@95 248 {
cannam@95 249 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
cannam@95 250 && iio == rio + 1
cannam@95 251 && ALIGNED(rio);
cannam@95 252 }
cannam@95 253
cannam@95 254 EXTERN_CONST(ct_genus, XSIMD(dft_t1fusimd_genus)) = { t_okp_t1fu, VL };
cannam@95 255
cannam@95 256 static int t_okp_t1b(const ct_desc *d,
cannam@95 257 const R *rio, const R *iio,
cannam@95 258 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 259 const planner *plnr)
cannam@95 260 {
cannam@95 261 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
cannam@95 262 && rio == iio + 1
cannam@95 263 && ALIGNEDA(iio);
cannam@95 264 }
cannam@95 265
cannam@95 266 EXTERN_CONST(ct_genus, XSIMD(dft_t1bsimd_genus)) = { t_okp_t1b, VL };
cannam@95 267
cannam@95 268 static int t_okp_t1bu(const ct_desc *d,
cannam@95 269 const R *rio, const R *iio,
cannam@95 270 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 271 const planner *plnr)
cannam@95 272 {
cannam@95 273 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
cannam@95 274 && rio == iio + 1
cannam@95 275 && ALIGNED(iio);
cannam@95 276 }
cannam@95 277
cannam@95 278 EXTERN_CONST(ct_genus, XSIMD(dft_t1busimd_genus)) = { t_okp_t1bu, VL };
cannam@95 279
cannam@95 280 /* use t2* codelets only when n = m*radix is small, because
cannam@95 281 t2* codelets use ~2n twiddle factors (instead of ~n) */
cannam@95 282 static int small_enough(const ct_desc *d, INT m)
cannam@95 283 {
cannam@95 284 return m * d->radix <= 16384;
cannam@95 285 }
cannam@95 286
cannam@95 287 static int t_okp_t2f(const ct_desc *d,
cannam@95 288 const R *rio, const R *iio,
cannam@95 289 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 290 const planner *plnr)
cannam@95 291 {
cannam@95 292 return t_okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
cannam@95 293 && small_enough(d, m);
cannam@95 294 }
cannam@95 295
cannam@95 296 EXTERN_CONST(ct_genus, XSIMD(dft_t2fsimd_genus)) = { t_okp_t2f, VL };
cannam@95 297
cannam@95 298 static int t_okp_t2b(const ct_desc *d,
cannam@95 299 const R *rio, const R *iio,
cannam@95 300 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 301 const planner *plnr)
cannam@95 302 {
cannam@95 303 return t_okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
cannam@95 304 && small_enough(d, m);
cannam@95 305 }
cannam@95 306
cannam@95 307 EXTERN_CONST(ct_genus, XSIMD(dft_t2bsimd_genus)) = { t_okp_t2b, VL };
cannam@95 308
cannam@95 309 static int ts_okp(const ct_desc *d,
cannam@95 310 const R *rio, const R *iio,
cannam@95 311 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
cannam@95 312 const planner *plnr)
cannam@95 313 {
cannam@95 314 UNUSED(rio);
cannam@95 315 UNUSED(iio);
cannam@95 316 return (1
cannam@95 317 && !NO_SIMDP(plnr)
cannam@95 318 && ALIGNEDA(rio)
cannam@95 319 && ALIGNEDA(iio)
cannam@95 320 && SIMD_STRIDE_OKA(rs)
cannam@95 321 && ms == 1
cannam@95 322 && (m % (2 * VL)) == 0
cannam@95 323 && (mb % (2 * VL)) == 0
cannam@95 324 && (me % (2 * VL)) == 0
cannam@95 325 && (!d->rs || (d->rs == rs))
cannam@95 326 && (!d->vs || (d->vs == vs))
cannam@95 327 && (!d->ms || (d->ms == ms))
cannam@95 328 );
cannam@95 329 }
cannam@95 330
cannam@95 331 EXTERN_CONST(ct_genus, XSIMD(dft_tssimd_genus)) = { ts_okp, 2 * VL };