annotate src/fftw-3.3.5/dft/simd/common/genus.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 #include "codelet-dft.h"
Chris@42 22 #include SIMD_HEADER
Chris@42 23
Chris@42 24 #define EXTERN_CONST(t, x) extern const t x; const t x
Chris@42 25
Chris@42 26 static int n1b_okp(const kdft_desc *d,
Chris@42 27 const R *ri, const R *ii, const R *ro, const R *io,
Chris@42 28 INT is, INT os, INT vl, INT ivs, INT ovs,
Chris@42 29 const planner *plnr)
Chris@42 30 {
Chris@42 31 return (1
Chris@42 32 && ALIGNED(ii)
Chris@42 33 && ALIGNED(io)
Chris@42 34 && !NO_SIMDP(plnr)
Chris@42 35 && SIMD_STRIDE_OK(is)
Chris@42 36 && SIMD_STRIDE_OK(os)
Chris@42 37 && SIMD_VSTRIDE_OK(ivs)
Chris@42 38 && SIMD_VSTRIDE_OK(ovs)
Chris@42 39 && ri == ii + 1
Chris@42 40 && ro == io + 1
Chris@42 41 && (vl % VL) == 0
Chris@42 42 && (!d->is || (d->is == is))
Chris@42 43 && (!d->os || (d->os == os))
Chris@42 44 && (!d->ivs || (d->ivs == ivs))
Chris@42 45 && (!d->ovs || (d->ovs == ovs))
Chris@42 46 );
Chris@42 47 }
Chris@42 48
Chris@42 49 EXTERN_CONST(kdft_genus, XSIMD(dft_n1bsimd_genus)) = { n1b_okp, VL };
Chris@42 50
Chris@42 51 static int n1f_okp(const kdft_desc *d,
Chris@42 52 const R *ri, const R *ii, const R *ro, const R *io,
Chris@42 53 INT is, INT os, INT vl, INT ivs, INT ovs,
Chris@42 54 const planner *plnr)
Chris@42 55 {
Chris@42 56 return (1
Chris@42 57 && ALIGNED(ri)
Chris@42 58 && ALIGNED(ro)
Chris@42 59 && !NO_SIMDP(plnr)
Chris@42 60 && SIMD_STRIDE_OK(is)
Chris@42 61 && SIMD_STRIDE_OK(os)
Chris@42 62 && SIMD_VSTRIDE_OK(ivs)
Chris@42 63 && SIMD_VSTRIDE_OK(ovs)
Chris@42 64 && ii == ri + 1
Chris@42 65 && io == ro + 1
Chris@42 66 && (vl % VL) == 0
Chris@42 67 && (!d->is || (d->is == is))
Chris@42 68 && (!d->os || (d->os == os))
Chris@42 69 && (!d->ivs || (d->ivs == ivs))
Chris@42 70 && (!d->ovs || (d->ovs == ovs))
Chris@42 71 );
Chris@42 72 }
Chris@42 73
Chris@42 74 EXTERN_CONST(kdft_genus, XSIMD(dft_n1fsimd_genus)) = { n1f_okp, VL };
Chris@42 75
Chris@42 76 static int n2b_okp(const kdft_desc *d,
Chris@42 77 const R *ri, const R *ii, const R *ro, const R *io,
Chris@42 78 INT is, INT os, INT vl, INT ivs, INT ovs,
Chris@42 79 const planner *plnr)
Chris@42 80 {
Chris@42 81 return (1
Chris@42 82 && ALIGNEDA(ii)
Chris@42 83 && ALIGNEDA(io)
Chris@42 84 && !NO_SIMDP(plnr)
Chris@42 85 && SIMD_STRIDE_OKA(is)
Chris@42 86 && SIMD_VSTRIDE_OKA(ivs)
Chris@42 87 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */
Chris@42 88 && SIMD_STRIDE_OKPAIR(ovs)
Chris@42 89 && ri == ii + 1
Chris@42 90 && ro == io + 1
Chris@42 91 && (vl % VL) == 0
Chris@42 92 && (!d->is || (d->is == is))
Chris@42 93 && (!d->os || (d->os == os))
Chris@42 94 && (!d->ivs || (d->ivs == ivs))
Chris@42 95 && (!d->ovs || (d->ovs == ovs))
Chris@42 96 );
Chris@42 97 }
Chris@42 98
Chris@42 99 EXTERN_CONST(kdft_genus, XSIMD(dft_n2bsimd_genus)) = { n2b_okp, VL };
Chris@42 100
Chris@42 101 static int n2f_okp(const kdft_desc *d,
Chris@42 102 const R *ri, const R *ii, const R *ro, const R *io,
Chris@42 103 INT is, INT os, INT vl, INT ivs, INT ovs,
Chris@42 104 const planner *plnr)
Chris@42 105 {
Chris@42 106 return (1
Chris@42 107 && ALIGNEDA(ri)
Chris@42 108 && ALIGNEDA(ro)
Chris@42 109 && !NO_SIMDP(plnr)
Chris@42 110 && SIMD_STRIDE_OKA(is)
Chris@42 111 && SIMD_VSTRIDE_OKA(ivs)
Chris@42 112 && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */
Chris@42 113 && SIMD_STRIDE_OKPAIR(ovs)
Chris@42 114 && ii == ri + 1
Chris@42 115 && io == ro + 1
Chris@42 116 && (vl % VL) == 0
Chris@42 117 && (!d->is || (d->is == is))
Chris@42 118 && (!d->os || (d->os == os))
Chris@42 119 && (!d->ivs || (d->ivs == ivs))
Chris@42 120 && (!d->ovs || (d->ovs == ovs))
Chris@42 121 );
Chris@42 122 }
Chris@42 123
Chris@42 124 EXTERN_CONST(kdft_genus, XSIMD(dft_n2fsimd_genus)) = { n2f_okp, VL };
Chris@42 125
Chris@42 126 static int n2s_okp(const kdft_desc *d,
Chris@42 127 const R *ri, const R *ii, const R *ro, const R *io,
Chris@42 128 INT is, INT os, INT vl, INT ivs, INT ovs,
Chris@42 129 const planner *plnr)
Chris@42 130 {
Chris@42 131 return (1
Chris@42 132 && !NO_SIMDP(plnr)
Chris@42 133 && ALIGNEDA(ri)
Chris@42 134 && ALIGNEDA(ii)
Chris@42 135 && ALIGNEDA(ro)
Chris@42 136 && ALIGNEDA(io)
Chris@42 137 && SIMD_STRIDE_OKA(is)
Chris@42 138 && ivs == 1
Chris@42 139 && os == 1
Chris@42 140 && SIMD_STRIDE_OKA(ovs)
Chris@42 141 && (vl % (2 * VL)) == 0
Chris@42 142 && (!d->is || (d->is == is))
Chris@42 143 && (!d->os || (d->os == os))
Chris@42 144 && (!d->ivs || (d->ivs == ivs))
Chris@42 145 && (!d->ovs || (d->ovs == ovs))
Chris@42 146 );
Chris@42 147 }
Chris@42 148
Chris@42 149 EXTERN_CONST(kdft_genus, XSIMD(dft_n2ssimd_genus)) = { n2s_okp, 2 * VL };
Chris@42 150
Chris@42 151 static int q1b_okp(const ct_desc *d,
Chris@42 152 const R *rio, const R *iio,
Chris@42 153 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 154 const planner *plnr)
Chris@42 155 {
Chris@42 156 return (1
Chris@42 157 && ALIGNED(iio)
Chris@42 158 && !NO_SIMDP(plnr)
Chris@42 159 && SIMD_STRIDE_OK(rs)
Chris@42 160 && SIMD_STRIDE_OK(vs)
Chris@42 161 && SIMD_VSTRIDE_OK(ms)
Chris@42 162 && rio == iio + 1
Chris@42 163 && (m % VL) == 0
Chris@42 164 && (mb % VL) == 0
Chris@42 165 && (me % VL) == 0
Chris@42 166 && (!d->rs || (d->rs == rs))
Chris@42 167 && (!d->vs || (d->vs == vs))
Chris@42 168 && (!d->ms || (d->ms == ms))
Chris@42 169 );
Chris@42 170 }
Chris@42 171 EXTERN_CONST(ct_genus, XSIMD(dft_q1bsimd_genus)) = { q1b_okp, VL };
Chris@42 172
Chris@42 173 static int q1f_okp(const ct_desc *d,
Chris@42 174 const R *rio, const R *iio,
Chris@42 175 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 176 const planner *plnr)
Chris@42 177 {
Chris@42 178 return (1
Chris@42 179 && ALIGNED(rio)
Chris@42 180 && !NO_SIMDP(plnr)
Chris@42 181 && SIMD_STRIDE_OK(rs)
Chris@42 182 && SIMD_STRIDE_OK(vs)
Chris@42 183 && SIMD_VSTRIDE_OK(ms)
Chris@42 184 && iio == rio + 1
Chris@42 185 && (m % VL) == 0
Chris@42 186 && (mb % VL) == 0
Chris@42 187 && (me % VL) == 0
Chris@42 188 && (!d->rs || (d->rs == rs))
Chris@42 189 && (!d->vs || (d->vs == vs))
Chris@42 190 && (!d->ms || (d->ms == ms))
Chris@42 191 );
Chris@42 192 }
Chris@42 193 EXTERN_CONST(ct_genus, XSIMD(dft_q1fsimd_genus)) = { q1f_okp, VL };
Chris@42 194
Chris@42 195 static int t_okp_common(const ct_desc *d,
Chris@42 196 const R *rio, const R *iio,
Chris@42 197 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 198 const planner *plnr)
Chris@42 199 {
Chris@42 200 UNUSED(rio); UNUSED(iio);
Chris@42 201 return (1
Chris@42 202 && !NO_SIMDP(plnr)
Chris@42 203 && SIMD_STRIDE_OKA(rs)
Chris@42 204 && SIMD_VSTRIDE_OKA(ms)
Chris@42 205 && (m % VL) == 0
Chris@42 206 && (mb % VL) == 0
Chris@42 207 && (me % VL) == 0
Chris@42 208 && (!d->rs || (d->rs == rs))
Chris@42 209 && (!d->vs || (d->vs == vs))
Chris@42 210 && (!d->ms || (d->ms == ms))
Chris@42 211 );
Chris@42 212 }
Chris@42 213
Chris@42 214 static int t_okp_commonu(const ct_desc *d,
Chris@42 215 const R *rio, const R *iio,
Chris@42 216 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 217 const planner *plnr)
Chris@42 218 {
Chris@42 219 UNUSED(rio); UNUSED(iio); UNUSED(m);
Chris@42 220 return (1
Chris@42 221 && !NO_SIMDP(plnr)
Chris@42 222 && SIMD_STRIDE_OK(rs)
Chris@42 223 && SIMD_VSTRIDE_OK(ms)
Chris@42 224 && (mb % VL) == 0
Chris@42 225 && (me % VL) == 0
Chris@42 226 && (!d->rs || (d->rs == rs))
Chris@42 227 && (!d->vs || (d->vs == vs))
Chris@42 228 && (!d->ms || (d->ms == ms))
Chris@42 229 );
Chris@42 230 }
Chris@42 231
Chris@42 232 static int t_okp_t1f(const ct_desc *d,
Chris@42 233 const R *rio, const R *iio,
Chris@42 234 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 235 const planner *plnr)
Chris@42 236 {
Chris@42 237 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
Chris@42 238 && iio == rio + 1
Chris@42 239 && ALIGNEDA(rio);
Chris@42 240 }
Chris@42 241
Chris@42 242 EXTERN_CONST(ct_genus, XSIMD(dft_t1fsimd_genus)) = { t_okp_t1f, VL };
Chris@42 243
Chris@42 244 static int t_okp_t1fu(const ct_desc *d,
Chris@42 245 const R *rio, const R *iio,
Chris@42 246 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 247 const planner *plnr)
Chris@42 248 {
Chris@42 249 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
Chris@42 250 && iio == rio + 1
Chris@42 251 && ALIGNED(rio);
Chris@42 252 }
Chris@42 253
Chris@42 254 EXTERN_CONST(ct_genus, XSIMD(dft_t1fusimd_genus)) = { t_okp_t1fu, VL };
Chris@42 255
Chris@42 256 static int t_okp_t1b(const ct_desc *d,
Chris@42 257 const R *rio, const R *iio,
Chris@42 258 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 259 const planner *plnr)
Chris@42 260 {
Chris@42 261 return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
Chris@42 262 && rio == iio + 1
Chris@42 263 && ALIGNEDA(iio);
Chris@42 264 }
Chris@42 265
Chris@42 266 EXTERN_CONST(ct_genus, XSIMD(dft_t1bsimd_genus)) = { t_okp_t1b, VL };
Chris@42 267
Chris@42 268 static int t_okp_t1bu(const ct_desc *d,
Chris@42 269 const R *rio, const R *iio,
Chris@42 270 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 271 const planner *plnr)
Chris@42 272 {
Chris@42 273 return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
Chris@42 274 && rio == iio + 1
Chris@42 275 && ALIGNED(iio);
Chris@42 276 }
Chris@42 277
Chris@42 278 EXTERN_CONST(ct_genus, XSIMD(dft_t1busimd_genus)) = { t_okp_t1bu, VL };
Chris@42 279
Chris@42 280 /* use t2* codelets only when n = m*radix is small, because
Chris@42 281 t2* codelets use ~2n twiddle factors (instead of ~n) */
Chris@42 282 static int small_enough(const ct_desc *d, INT m)
Chris@42 283 {
Chris@42 284 return m * d->radix <= 16384;
Chris@42 285 }
Chris@42 286
Chris@42 287 static int t_okp_t2f(const ct_desc *d,
Chris@42 288 const R *rio, const R *iio,
Chris@42 289 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 290 const planner *plnr)
Chris@42 291 {
Chris@42 292 return t_okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
Chris@42 293 && small_enough(d, m);
Chris@42 294 }
Chris@42 295
Chris@42 296 EXTERN_CONST(ct_genus, XSIMD(dft_t2fsimd_genus)) = { t_okp_t2f, VL };
Chris@42 297
Chris@42 298 static int t_okp_t2b(const ct_desc *d,
Chris@42 299 const R *rio, const R *iio,
Chris@42 300 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 301 const planner *plnr)
Chris@42 302 {
Chris@42 303 return t_okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
Chris@42 304 && small_enough(d, m);
Chris@42 305 }
Chris@42 306
Chris@42 307 EXTERN_CONST(ct_genus, XSIMD(dft_t2bsimd_genus)) = { t_okp_t2b, VL };
Chris@42 308
Chris@42 309 static int ts_okp(const ct_desc *d,
Chris@42 310 const R *rio, const R *iio,
Chris@42 311 INT rs, INT vs, INT m, INT mb, INT me, INT ms,
Chris@42 312 const planner *plnr)
Chris@42 313 {
Chris@42 314 UNUSED(rio);
Chris@42 315 UNUSED(iio);
Chris@42 316 return (1
Chris@42 317 && !NO_SIMDP(plnr)
Chris@42 318 && ALIGNEDA(rio)
Chris@42 319 && ALIGNEDA(iio)
Chris@42 320 && SIMD_STRIDE_OKA(rs)
Chris@42 321 && ms == 1
Chris@42 322 && (m % (2 * VL)) == 0
Chris@42 323 && (mb % (2 * VL)) == 0
Chris@42 324 && (me % (2 * VL)) == 0
Chris@42 325 && (!d->rs || (d->rs == rs))
Chris@42 326 && (!d->vs || (d->vs == vs))
Chris@42 327 && (!d->ms || (d->ms == ms))
Chris@42 328 );
Chris@42 329 }
Chris@42 330
Chris@42 331 EXTERN_CONST(ct_genus, XSIMD(dft_tssimd_genus)) = { ts_okp, 2 * VL };