cannam@95: /* cannam@95: * Copyright (c) 2003, 2007-11 Matteo Frigo cannam@95: * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology cannam@95: * cannam@95: * This program is free software; you can redistribute it and/or modify cannam@95: * it under the terms of the GNU General Public License as published by cannam@95: * the Free Software Foundation; either version 2 of the License, or cannam@95: * (at your option) any later version. cannam@95: * cannam@95: * This program is distributed in the hope that it will be useful, cannam@95: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@95: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@95: * GNU General Public License for more details. cannam@95: * cannam@95: * You should have received a copy of the GNU General Public License cannam@95: * along with this program; if not, write to the Free Software cannam@95: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@95: * cannam@95: */ cannam@95: cannam@95: #include "codelet-dft.h" cannam@95: #include SIMD_HEADER cannam@95: cannam@95: #define EXTERN_CONST(t, x) extern const t x; const t x cannam@95: cannam@95: static int n1b_okp(const kdft_desc *d, cannam@95: const R *ri, const R *ii, const R *ro, const R *io, cannam@95: INT is, INT os, INT vl, INT ivs, INT ovs, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && ALIGNED(ii) cannam@95: && ALIGNED(io) cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OK(is) cannam@95: && SIMD_STRIDE_OK(os) cannam@95: && SIMD_VSTRIDE_OK(ivs) cannam@95: && SIMD_VSTRIDE_OK(ovs) cannam@95: && ri == ii + 1 cannam@95: && ro == io + 1 cannam@95: && (vl % VL) == 0 cannam@95: && (!d->is || (d->is == is)) cannam@95: && (!d->os || (d->os == os)) cannam@95: && (!d->ivs || (d->ivs == ivs)) cannam@95: && (!d->ovs || (d->ovs == ovs)) cannam@95: ); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(kdft_genus, XSIMD(dft_n1bsimd_genus)) = { n1b_okp, VL }; cannam@95: cannam@95: static int n1f_okp(const kdft_desc *d, cannam@95: const R *ri, const R *ii, const R *ro, const R *io, cannam@95: INT is, INT os, INT vl, INT ivs, INT ovs, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && ALIGNED(ri) cannam@95: && ALIGNED(ro) cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OK(is) cannam@95: && SIMD_STRIDE_OK(os) cannam@95: && SIMD_VSTRIDE_OK(ivs) cannam@95: && SIMD_VSTRIDE_OK(ovs) cannam@95: && ii == ri + 1 cannam@95: && io == ro + 1 cannam@95: && (vl % VL) == 0 cannam@95: && (!d->is || (d->is == is)) cannam@95: && (!d->os || (d->os == os)) cannam@95: && (!d->ivs || (d->ivs == ivs)) cannam@95: && (!d->ovs || (d->ovs == ovs)) cannam@95: ); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(kdft_genus, XSIMD(dft_n1fsimd_genus)) = { n1f_okp, VL }; cannam@95: cannam@95: static int n2b_okp(const kdft_desc *d, cannam@95: const R *ri, const R *ii, const R *ro, const R *io, cannam@95: INT is, INT os, INT vl, INT ivs, INT ovs, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && ALIGNEDA(ii) cannam@95: && ALIGNEDA(io) cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OKA(is) cannam@95: && SIMD_VSTRIDE_OKA(ivs) cannam@95: && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */ cannam@95: && SIMD_STRIDE_OKPAIR(ovs) cannam@95: && ri == ii + 1 cannam@95: && ro == io + 1 cannam@95: && (vl % VL) == 0 cannam@95: && (!d->is || (d->is == is)) cannam@95: && (!d->os || (d->os == os)) cannam@95: && (!d->ivs || (d->ivs == ivs)) cannam@95: && (!d->ovs || (d->ovs == ovs)) cannam@95: ); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(kdft_genus, XSIMD(dft_n2bsimd_genus)) = { n2b_okp, VL }; cannam@95: cannam@95: static int n2f_okp(const kdft_desc *d, cannam@95: const R *ri, const R *ii, const R *ro, const R *io, cannam@95: INT is, INT os, INT vl, INT ivs, INT ovs, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && ALIGNEDA(ri) cannam@95: && ALIGNEDA(ro) cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OKA(is) cannam@95: && SIMD_VSTRIDE_OKA(ivs) cannam@95: && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */ cannam@95: && SIMD_STRIDE_OKPAIR(ovs) cannam@95: && ii == ri + 1 cannam@95: && io == ro + 1 cannam@95: && (vl % VL) == 0 cannam@95: && (!d->is || (d->is == is)) cannam@95: && (!d->os || (d->os == os)) cannam@95: && (!d->ivs || (d->ivs == ivs)) cannam@95: && (!d->ovs || (d->ovs == ovs)) cannam@95: ); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(kdft_genus, XSIMD(dft_n2fsimd_genus)) = { n2f_okp, VL }; cannam@95: cannam@95: static int n2s_okp(const kdft_desc *d, cannam@95: const R *ri, const R *ii, const R *ro, const R *io, cannam@95: INT is, INT os, INT vl, INT ivs, INT ovs, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && !NO_SIMDP(plnr) cannam@95: && ALIGNEDA(ri) cannam@95: && ALIGNEDA(ii) cannam@95: && ALIGNEDA(ro) cannam@95: && ALIGNEDA(io) cannam@95: && SIMD_STRIDE_OKA(is) cannam@95: && ivs == 1 cannam@95: && os == 1 cannam@95: && SIMD_STRIDE_OKA(ovs) cannam@95: && (vl % (2 * VL)) == 0 cannam@95: && (!d->is || (d->is == is)) cannam@95: && (!d->os || (d->os == os)) cannam@95: && (!d->ivs || (d->ivs == ivs)) cannam@95: && (!d->ovs || (d->ovs == ovs)) cannam@95: ); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(kdft_genus, XSIMD(dft_n2ssimd_genus)) = { n2s_okp, 2 * VL }; cannam@95: cannam@95: static int q1b_okp(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && ALIGNED(iio) cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OK(rs) cannam@95: && SIMD_STRIDE_OK(vs) cannam@95: && SIMD_VSTRIDE_OK(ms) cannam@95: && rio == iio + 1 cannam@95: && (m % VL) == 0 cannam@95: && (mb % VL) == 0 cannam@95: && (me % VL) == 0 cannam@95: && (!d->rs || (d->rs == rs)) cannam@95: && (!d->vs || (d->vs == vs)) cannam@95: && (!d->ms || (d->ms == ms)) cannam@95: ); cannam@95: } cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_q1bsimd_genus)) = { q1b_okp, VL }; cannam@95: cannam@95: static int q1f_okp(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return (1 cannam@95: && ALIGNED(rio) cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OK(rs) cannam@95: && SIMD_STRIDE_OK(vs) cannam@95: && SIMD_VSTRIDE_OK(ms) cannam@95: && iio == rio + 1 cannam@95: && (m % VL) == 0 cannam@95: && (mb % VL) == 0 cannam@95: && (me % VL) == 0 cannam@95: && (!d->rs || (d->rs == rs)) cannam@95: && (!d->vs || (d->vs == vs)) cannam@95: && (!d->ms || (d->ms == ms)) cannam@95: ); cannam@95: } cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_q1fsimd_genus)) = { q1f_okp, VL }; cannam@95: cannam@95: static int t_okp_common(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: UNUSED(rio); UNUSED(iio); cannam@95: return (1 cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OKA(rs) cannam@95: && SIMD_VSTRIDE_OKA(ms) cannam@95: && (m % VL) == 0 cannam@95: && (mb % VL) == 0 cannam@95: && (me % VL) == 0 cannam@95: && (!d->rs || (d->rs == rs)) cannam@95: && (!d->vs || (d->vs == vs)) cannam@95: && (!d->ms || (d->ms == ms)) cannam@95: ); cannam@95: } cannam@95: cannam@95: static int t_okp_commonu(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: UNUSED(rio); UNUSED(iio); UNUSED(m); cannam@95: return (1 cannam@95: && !NO_SIMDP(plnr) cannam@95: && SIMD_STRIDE_OK(rs) cannam@95: && SIMD_VSTRIDE_OK(ms) cannam@95: && (mb % VL) == 0 cannam@95: && (me % VL) == 0 cannam@95: && (!d->rs || (d->rs == rs)) cannam@95: && (!d->vs || (d->vs == vs)) cannam@95: && (!d->ms || (d->ms == ms)) cannam@95: ); cannam@95: } cannam@95: cannam@95: static int t_okp_t1f(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr) cannam@95: && iio == rio + 1 cannam@95: && ALIGNEDA(rio); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_t1fsimd_genus)) = { t_okp_t1f, VL }; cannam@95: cannam@95: static int t_okp_t1fu(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr) cannam@95: && iio == rio + 1 cannam@95: && ALIGNED(rio); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_t1fusimd_genus)) = { t_okp_t1fu, VL }; cannam@95: cannam@95: static int t_okp_t1b(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr) cannam@95: && rio == iio + 1 cannam@95: && ALIGNEDA(iio); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_t1bsimd_genus)) = { t_okp_t1b, VL }; cannam@95: cannam@95: static int t_okp_t1bu(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr) cannam@95: && rio == iio + 1 cannam@95: && ALIGNED(iio); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_t1busimd_genus)) = { t_okp_t1bu, VL }; cannam@95: cannam@95: /* use t2* codelets only when n = m*radix is small, because cannam@95: t2* codelets use ~2n twiddle factors (instead of ~n) */ cannam@95: static int small_enough(const ct_desc *d, INT m) cannam@95: { cannam@95: return m * d->radix <= 16384; cannam@95: } cannam@95: cannam@95: static int t_okp_t2f(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return t_okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr) cannam@95: && small_enough(d, m); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_t2fsimd_genus)) = { t_okp_t2f, VL }; cannam@95: cannam@95: static int t_okp_t2b(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: return t_okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr) cannam@95: && small_enough(d, m); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_t2bsimd_genus)) = { t_okp_t2b, VL }; cannam@95: cannam@95: static int ts_okp(const ct_desc *d, cannam@95: const R *rio, const R *iio, cannam@95: INT rs, INT vs, INT m, INT mb, INT me, INT ms, cannam@95: const planner *plnr) cannam@95: { cannam@95: UNUSED(rio); cannam@95: UNUSED(iio); cannam@95: return (1 cannam@95: && !NO_SIMDP(plnr) cannam@95: && ALIGNEDA(rio) cannam@95: && ALIGNEDA(iio) cannam@95: && SIMD_STRIDE_OKA(rs) cannam@95: && ms == 1 cannam@95: && (m % (2 * VL)) == 0 cannam@95: && (mb % (2 * VL)) == 0 cannam@95: && (me % (2 * VL)) == 0 cannam@95: && (!d->rs || (d->rs == rs)) cannam@95: && (!d->vs || (d->vs == vs)) cannam@95: && (!d->ms || (d->ms == ms)) cannam@95: ); cannam@95: } cannam@95: cannam@95: EXTERN_CONST(ct_genus, XSIMD(dft_tssimd_genus)) = { ts_okp, 2 * VL };