| Chris@82 | 1 /* | 
| Chris@82 | 2  * Copyright (c) 2003, 2007-14 Matteo Frigo | 
| Chris@82 | 3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | 
| Chris@82 | 4  * | 
| Chris@82 | 5  * This program is free software; you can redistribute it and/or modify | 
| Chris@82 | 6  * it under the terms of the GNU General Public License as published by | 
| Chris@82 | 7  * the Free Software Foundation; either version 2 of the License, or | 
| Chris@82 | 8  * (at your option) any later version. | 
| Chris@82 | 9  * | 
| Chris@82 | 10  * This program is distributed in the hope that it will be useful, | 
| Chris@82 | 11  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| Chris@82 | 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| Chris@82 | 13  * GNU General Public License for more details. | 
| Chris@82 | 14  * | 
| Chris@82 | 15  * You should have received a copy of the GNU General Public License | 
| Chris@82 | 16  * along with this program; if not, write to the Free Software | 
| Chris@82 | 17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA | 
| Chris@82 | 18  * | 
| Chris@82 | 19  */ | 
| Chris@82 | 20 | 
| Chris@82 | 21 /* This file was automatically generated --- DO NOT EDIT */ | 
| Chris@82 | 22 /* Generated on Thu May 24 08:06:14 EDT 2018 */ | 
| Chris@82 | 23 | 
| Chris@82 | 24 #include "dft/codelet-dft.h" | 
| Chris@82 | 25 | 
| Chris@82 | 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA) | 
| Chris@82 | 27 | 
| Chris@82 | 28 /* Generated by: ../../../genfft/gen_twidsq_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1bv_2 -include dft/simd/q1b.h -sign 1 */ | 
| Chris@82 | 29 | 
| Chris@82 | 30 /* | 
| Chris@82 | 31  * This function contains 6 FP additions, 4 FP multiplications, | 
| Chris@82 | 32  * (or, 6 additions, 4 multiplications, 0 fused multiply/add), | 
| Chris@82 | 33  * 8 stack variables, 0 constants, and 8 memory accesses | 
| Chris@82 | 34  */ | 
| Chris@82 | 35 #include "dft/simd/q1b.h" | 
| Chris@82 | 36 | 
| Chris@82 | 37 static void q1bv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) | 
| Chris@82 | 38 { | 
| Chris@82 | 39      { | 
| Chris@82 | 40 	  INT m; | 
| Chris@82 | 41 	  R *x; | 
| Chris@82 | 42 	  x = ii; | 
| Chris@82 | 43 	  for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) { | 
| Chris@82 | 44 	       V T1, T2, T3, T4, T5, T6; | 
| Chris@82 | 45 	       T1 = LD(&(x[0]), ms, &(x[0])); | 
| Chris@82 | 46 	       T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); | 
| Chris@82 | 47 	       T3 = BYTW(&(W[0]), VSUB(T1, T2)); | 
| Chris@82 | 48 	       T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); | 
| Chris@82 | 49 	       T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); | 
| Chris@82 | 50 	       T6 = BYTW(&(W[0]), VSUB(T4, T5)); | 
| Chris@82 | 51 	       ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); | 
| Chris@82 | 52 	       ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); | 
| Chris@82 | 53 	       ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); | 
| Chris@82 | 54 	       ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); | 
| Chris@82 | 55 	  } | 
| Chris@82 | 56      } | 
| Chris@82 | 57      VLEAVE(); | 
| Chris@82 | 58 } | 
| Chris@82 | 59 | 
| Chris@82 | 60 static const tw_instr twinstr[] = { | 
| Chris@82 | 61      VTW(0, 1), | 
| Chris@82 | 62      {TW_NEXT, VL, 0} | 
| Chris@82 | 63 }; | 
| Chris@82 | 64 | 
| Chris@82 | 65 static const ct_desc desc = { 2, XSIMD_STRING("q1bv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; | 
| Chris@82 | 66 | 
| Chris@82 | 67 void XSIMD(codelet_q1bv_2) (planner *p) { | 
| Chris@82 | 68      X(kdft_difsq_register) (p, q1bv_2, &desc); | 
| Chris@82 | 69 } | 
| Chris@82 | 70 #else | 
| Chris@82 | 71 | 
| Chris@82 | 72 /* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1bv_2 -include dft/simd/q1b.h -sign 1 */ | 
| Chris@82 | 73 | 
| Chris@82 | 74 /* | 
| Chris@82 | 75  * This function contains 6 FP additions, 4 FP multiplications, | 
| Chris@82 | 76  * (or, 6 additions, 4 multiplications, 0 fused multiply/add), | 
| Chris@82 | 77  * 8 stack variables, 0 constants, and 8 memory accesses | 
| Chris@82 | 78  */ | 
| Chris@82 | 79 #include "dft/simd/q1b.h" | 
| Chris@82 | 80 | 
| Chris@82 | 81 static void q1bv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) | 
| Chris@82 | 82 { | 
| Chris@82 | 83      { | 
| Chris@82 | 84 	  INT m; | 
| Chris@82 | 85 	  R *x; | 
| Chris@82 | 86 	  x = ii; | 
| Chris@82 | 87 	  for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) { | 
| Chris@82 | 88 	       V T1, T2, T3, T4, T5, T6; | 
| Chris@82 | 89 	       T1 = LD(&(x[0]), ms, &(x[0])); | 
| Chris@82 | 90 	       T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); | 
| Chris@82 | 91 	       T3 = BYTW(&(W[0]), VSUB(T1, T2)); | 
| Chris@82 | 92 	       T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); | 
| Chris@82 | 93 	       T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); | 
| Chris@82 | 94 	       T6 = BYTW(&(W[0]), VSUB(T4, T5)); | 
| Chris@82 | 95 	       ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); | 
| Chris@82 | 96 	       ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); | 
| Chris@82 | 97 	       ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); | 
| Chris@82 | 98 	       ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); | 
| Chris@82 | 99 	  } | 
| Chris@82 | 100      } | 
| Chris@82 | 101      VLEAVE(); | 
| Chris@82 | 102 } | 
| Chris@82 | 103 | 
| Chris@82 | 104 static const tw_instr twinstr[] = { | 
| Chris@82 | 105      VTW(0, 1), | 
| Chris@82 | 106      {TW_NEXT, VL, 0} | 
| Chris@82 | 107 }; | 
| Chris@82 | 108 | 
| Chris@82 | 109 static const ct_desc desc = { 2, XSIMD_STRING("q1bv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; | 
| Chris@82 | 110 | 
| Chris@82 | 111 void XSIMD(codelet_q1bv_2) (planner *p) { | 
| Chris@82 | 112      X(kdft_difsq_register) (p, q1bv_2, &desc); | 
| Chris@82 | 113 } | 
| Chris@82 | 114 #endif |