d@0: /* d@0: * Copyright (c) 2003, 2007-8 Matteo Frigo d@0: * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology d@0: * d@0: * This program is free software; you can redistribute it and/or modify d@0: * it under the terms of the GNU General Public License as published by d@0: * the Free Software Foundation; either version 2 of the License, or d@0: * (at your option) any later version. d@0: * d@0: * This program is distributed in the hope that it will be useful, d@0: * but WITHOUT ANY WARRANTY; without even the implied warranty of d@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the d@0: * GNU General Public License for more details. d@0: * d@0: * You should have received a copy of the GNU General Public License d@0: * along with this program; if not, write to the Free Software d@0: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA d@0: * d@0: */ d@0: /* Generated by: ../../genfft/gen_notw_c -standalone -fma -reorder-insns -simd -compact -variables 100000 -with-ostride 2 -include fftw-spu.h -store-multiple 2 -n 10 -name X(spu_n2fv_10) */ d@0: d@0: /* d@0: * This function contains 42 FP additions, 22 FP multiplications, d@0: * (or, 24 additions, 4 multiplications, 18 fused multiply/add), d@0: * 59 stack variables, 4 constants, and 25 memory accesses d@0: */ d@0: #include "fftw-spu.h" d@0: d@0: void X(spu_n2fv_10) (const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) { d@0: DVK(KP559016994, +0.559016994374947424102293417182819058860154590); d@0: DVK(KP250000000, +0.250000000000000000000000000000000000000000000); d@0: DVK(KP618033988, +0.618033988749894848204586834365638117720309180); d@0: DVK(KP951056516, +0.951056516295153572116439333379382143405698634); d@0: INT i; d@0: const R *xi; d@0: R *xo; d@0: xi = ri; d@0: xo = ro; d@0: for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) { d@0: V Tr, T3, Tm, Tn, TD, TC, TA, Ty, Ti, Tk, T1, T2, Ts, T6, Tw; d@0: V Tg, Tt, T9, Tv, Td, T4, T5, Te, Tf, T7, T8, Tb, Tc, Ta, Th; d@0: V Tu, Tx, TH, TI, TK, TL, TM, Tq, To, Tp, Tl, Tj, TJ, TG, TE; d@0: V TF, TB, Tz, TN, TO, TP, TQ; d@0: T1 = LD(&(xi[0]), ivs, &(xi[0])); d@0: T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); d@0: Tr = VADD(T1, T2); d@0: T3 = VSUB(T1, T2); d@0: T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); d@0: T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); d@0: Ts = VADD(T4, T5); d@0: T6 = VSUB(T4, T5); d@0: Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); d@0: Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); d@0: Tw = VADD(Te, Tf); d@0: Tg = VSUB(Te, Tf); d@0: T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); d@0: T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); d@0: Tt = VADD(T7, T8); d@0: T9 = VSUB(T7, T8); d@0: Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); d@0: Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); d@0: Tv = VADD(Tb, Tc); d@0: Td = VSUB(Tb, Tc); d@0: Ta = VADD(T6, T9); d@0: Tm = VSUB(T6, T9); d@0: Tn = VSUB(Td, Tg); d@0: Th = VADD(Td, Tg); d@0: Tu = VADD(Ts, Tt); d@0: TD = VSUB(Ts, Tt); d@0: TC = VSUB(Tv, Tw); d@0: Tx = VADD(Tv, Tw); d@0: TA = VSUB(Tu, Tx); d@0: Ty = VADD(Tu, Tx); d@0: Ti = VADD(Ta, Th); d@0: Tk = VSUB(Ta, Th); d@0: TH = VADD(T3, Ti); d@0: STM2(&(xo[10]), TH, ovs, &(xo[2])); d@0: TI = VADD(Tr, Ty); d@0: STM2(&(xo[0]), TI, ovs, &(xo[0])); d@0: Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn)); d@0: To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm)); d@0: Tj = VFNMS(LDK(KP250000000), Ti, T3); d@0: Tp = VFNMS(LDK(KP559016994), Tk, Tj); d@0: Tl = VFMA(LDK(KP559016994), Tk, Tj); d@0: TJ = VFNMSI(To, Tl); d@0: STM2(&(xo[2]), TJ, ovs, &(xo[2])); d@0: STN2(&(xo[0]), TI, TJ, ovs); d@0: TK = VFMAI(Tq, Tp); d@0: STM2(&(xo[14]), TK, ovs, &(xo[2])); d@0: TL = VFMAI(To, Tl); d@0: STM2(&(xo[18]), TL, ovs, &(xo[2])); d@0: TM = VFNMSI(Tq, Tp); d@0: STM2(&(xo[6]), TM, ovs, &(xo[2])); d@0: TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD)); d@0: TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC)); d@0: Tz = VFNMS(LDK(KP250000000), Ty, Tr); d@0: TF = VFMA(LDK(KP559016994), TA, Tz); d@0: TB = VFNMS(LDK(KP559016994), TA, Tz); d@0: TN = VFMAI(TE, TB); d@0: STM2(&(xo[4]), TN, ovs, &(xo[0])); d@0: STN2(&(xo[4]), TN, TM, ovs); d@0: TO = VFNMSI(TG, TF); d@0: STM2(&(xo[12]), TO, ovs, &(xo[0])); d@0: STN2(&(xo[12]), TO, TK, ovs); d@0: TP = VFNMSI(TE, TB); d@0: STM2(&(xo[16]), TP, ovs, &(xo[0])); d@0: STN2(&(xo[16]), TP, TL, ovs); d@0: TQ = VFMAI(TG, TF); d@0: STM2(&(xo[8]), TQ, ovs, &(xo[0])); d@0: STN2(&(xo[8]), TQ, TH, ovs); d@0: } d@0: }