annotate src/fftw-3.3.5/dft/simd/common/n2bv_10.c @ 84:08ae793730bd

Add null config files
author Chris Cannam
date Mon, 02 Mar 2020 14:03:47 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:40:34 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 10 -name n2bv_10 -with-ostride 2 -include n2b.h -store-multiple 2 */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 42 FP additions, 22 FP multiplications,
Chris@42 32 * (or, 24 additions, 4 multiplications, 18 fused multiply/add),
Chris@42 33 * 53 stack variables, 4 constants, and 25 memory accesses
Chris@42 34 */
Chris@42 35 #include "n2b.h"
Chris@42 36
Chris@42 37 static void n2bv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 40 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 41 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 42 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 43 {
Chris@42 44 INT i;
Chris@42 45 const R *xi;
Chris@42 46 R *xo;
Chris@42 47 xi = ii;
Chris@42 48 xo = io;
Chris@42 49 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) {
Chris@42 50 V Tb, Tr, T3, Ts, T6, Tw, Tg, Tt, T9, Tc, T1, T2;
Chris@42 51 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 52 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 53 {
Chris@42 54 V T4, T5, Te, Tf, T7, T8;
Chris@42 55 T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 56 T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 57 Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 58 Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 59 T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 60 T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 61 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 62 Tr = VADD(T1, T2);
Chris@42 63 T3 = VSUB(T1, T2);
Chris@42 64 Ts = VADD(T4, T5);
Chris@42 65 T6 = VSUB(T4, T5);
Chris@42 66 Tw = VADD(Te, Tf);
Chris@42 67 Tg = VSUB(Te, Tf);
Chris@42 68 Tt = VADD(T7, T8);
Chris@42 69 T9 = VSUB(T7, T8);
Chris@42 70 Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 71 }
Chris@42 72 {
Chris@42 73 V TD, Tu, Tm, Ta, Td, Tv;
Chris@42 74 TD = VSUB(Ts, Tt);
Chris@42 75 Tu = VADD(Ts, Tt);
Chris@42 76 Tm = VSUB(T6, T9);
Chris@42 77 Ta = VADD(T6, T9);
Chris@42 78 Td = VSUB(Tb, Tc);
Chris@42 79 Tv = VADD(Tb, Tc);
Chris@42 80 {
Chris@42 81 V TC, Tx, Tn, Th;
Chris@42 82 TC = VSUB(Tv, Tw);
Chris@42 83 Tx = VADD(Tv, Tw);
Chris@42 84 Tn = VSUB(Td, Tg);
Chris@42 85 Th = VADD(Td, Tg);
Chris@42 86 {
Chris@42 87 V Ty, TA, TE, TG, Ti, Tk, To, Tq;
Chris@42 88 Ty = VADD(Tu, Tx);
Chris@42 89 TA = VSUB(Tu, Tx);
Chris@42 90 TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC));
Chris@42 91 TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD));
Chris@42 92 Ti = VADD(Ta, Th);
Chris@42 93 Tk = VSUB(Ta, Th);
Chris@42 94 To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm));
Chris@42 95 Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn));
Chris@42 96 {
Chris@42 97 V Tz, TH, Tj, TI;
Chris@42 98 Tz = VFNMS(LDK(KP250000000), Ty, Tr);
Chris@42 99 TH = VADD(Tr, Ty);
Chris@42 100 STM2(&(xo[0]), TH, ovs, &(xo[0]));
Chris@42 101 Tj = VFNMS(LDK(KP250000000), Ti, T3);
Chris@42 102 TI = VADD(T3, Ti);
Chris@42 103 STM2(&(xo[10]), TI, ovs, &(xo[2]));
Chris@42 104 {
Chris@42 105 V TB, TF, Tl, Tp;
Chris@42 106 TB = VFNMS(LDK(KP559016994), TA, Tz);
Chris@42 107 TF = VFMA(LDK(KP559016994), TA, Tz);
Chris@42 108 Tl = VFMA(LDK(KP559016994), Tk, Tj);
Chris@42 109 Tp = VFNMS(LDK(KP559016994), Tk, Tj);
Chris@42 110 {
Chris@42 111 V TJ, TK, TL, TM;
Chris@42 112 TJ = VFNMSI(TG, TF);
Chris@42 113 STM2(&(xo[8]), TJ, ovs, &(xo[0]));
Chris@42 114 STN2(&(xo[8]), TJ, TI, ovs);
Chris@42 115 TK = VFMAI(TG, TF);
Chris@42 116 STM2(&(xo[12]), TK, ovs, &(xo[0]));
Chris@42 117 TL = VFMAI(TE, TB);
Chris@42 118 STM2(&(xo[16]), TL, ovs, &(xo[0]));
Chris@42 119 TM = VFNMSI(TE, TB);
Chris@42 120 STM2(&(xo[4]), TM, ovs, &(xo[0]));
Chris@42 121 {
Chris@42 122 V TN, TO, TP, TQ;
Chris@42 123 TN = VFMAI(Tq, Tp);
Chris@42 124 STM2(&(xo[6]), TN, ovs, &(xo[2]));
Chris@42 125 STN2(&(xo[4]), TM, TN, ovs);
Chris@42 126 TO = VFNMSI(Tq, Tp);
Chris@42 127 STM2(&(xo[14]), TO, ovs, &(xo[2]));
Chris@42 128 STN2(&(xo[12]), TK, TO, ovs);
Chris@42 129 TP = VFNMSI(To, Tl);
Chris@42 130 STM2(&(xo[18]), TP, ovs, &(xo[2]));
Chris@42 131 STN2(&(xo[16]), TL, TP, ovs);
Chris@42 132 TQ = VFMAI(To, Tl);
Chris@42 133 STM2(&(xo[2]), TQ, ovs, &(xo[2]));
Chris@42 134 STN2(&(xo[0]), TH, TQ, ovs);
Chris@42 135 }
Chris@42 136 }
Chris@42 137 }
Chris@42 138 }
Chris@42 139 }
Chris@42 140 }
Chris@42 141 }
Chris@42 142 }
Chris@42 143 }
Chris@42 144 VLEAVE();
Chris@42 145 }
Chris@42 146
Chris@42 147 static const kdft_desc desc = { 10, XSIMD_STRING("n2bv_10"), {24, 4, 18, 0}, &GENUS, 0, 2, 0, 0 };
Chris@42 148
Chris@42 149 void XSIMD(codelet_n2bv_10) (planner *p) {
Chris@42 150 X(kdft_register) (p, n2bv_10, &desc);
Chris@42 151 }
Chris@42 152
Chris@42 153 #else /* HAVE_FMA */
Chris@42 154
Chris@42 155 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 10 -name n2bv_10 -with-ostride 2 -include n2b.h -store-multiple 2 */
Chris@42 156
Chris@42 157 /*
Chris@42 158 * This function contains 42 FP additions, 12 FP multiplications,
Chris@42 159 * (or, 36 additions, 6 multiplications, 6 fused multiply/add),
Chris@42 160 * 36 stack variables, 4 constants, and 25 memory accesses
Chris@42 161 */
Chris@42 162 #include "n2b.h"
Chris@42 163
Chris@42 164 static void n2bv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 165 {
Chris@42 166 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 167 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 168 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 169 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 170 {
Chris@42 171 INT i;
Chris@42 172 const R *xi;
Chris@42 173 R *xo;
Chris@42 174 xi = ii;
Chris@42 175 xo = io;
Chris@42 176 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) {
Chris@42 177 V Tl, Ty, T7, Te, Tw, Tt, Tz, TA, TB, Tg, Th, Tm, Tj, Tk;
Chris@42 178 Tj = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 179 Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 180 Tl = VSUB(Tj, Tk);
Chris@42 181 Ty = VADD(Tj, Tk);
Chris@42 182 {
Chris@42 183 V T3, Tr, Td, Tv, T6, Ts, Ta, Tu;
Chris@42 184 {
Chris@42 185 V T1, T2, Tb, Tc;
Chris@42 186 T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 187 T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 188 T3 = VSUB(T1, T2);
Chris@42 189 Tr = VADD(T1, T2);
Chris@42 190 Tb = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 191 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 192 Td = VSUB(Tb, Tc);
Chris@42 193 Tv = VADD(Tb, Tc);
Chris@42 194 }
Chris@42 195 {
Chris@42 196 V T4, T5, T8, T9;
Chris@42 197 T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 198 T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 199 T6 = VSUB(T4, T5);
Chris@42 200 Ts = VADD(T4, T5);
Chris@42 201 T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 202 T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 203 Ta = VSUB(T8, T9);
Chris@42 204 Tu = VADD(T8, T9);
Chris@42 205 }
Chris@42 206 T7 = VSUB(T3, T6);
Chris@42 207 Te = VSUB(Ta, Td);
Chris@42 208 Tw = VSUB(Tu, Tv);
Chris@42 209 Tt = VSUB(Tr, Ts);
Chris@42 210 Tz = VADD(Tr, Ts);
Chris@42 211 TA = VADD(Tu, Tv);
Chris@42 212 TB = VADD(Tz, TA);
Chris@42 213 Tg = VADD(T3, T6);
Chris@42 214 Th = VADD(Ta, Td);
Chris@42 215 Tm = VADD(Tg, Th);
Chris@42 216 }
Chris@42 217 {
Chris@42 218 V TH, TI, TK, TL, TM;
Chris@42 219 TH = VADD(Tl, Tm);
Chris@42 220 STM2(&(xo[10]), TH, ovs, &(xo[2]));
Chris@42 221 TI = VADD(Ty, TB);
Chris@42 222 STM2(&(xo[0]), TI, ovs, &(xo[0]));
Chris@42 223 {
Chris@42 224 V Tf, Tq, To, Tp, Ti, Tn, TJ;
Chris@42 225 Tf = VBYI(VFMA(LDK(KP951056516), T7, VMUL(LDK(KP587785252), Te)));
Chris@42 226 Tq = VBYI(VFNMS(LDK(KP951056516), Te, VMUL(LDK(KP587785252), T7)));
Chris@42 227 Ti = VMUL(LDK(KP559016994), VSUB(Tg, Th));
Chris@42 228 Tn = VFNMS(LDK(KP250000000), Tm, Tl);
Chris@42 229 To = VADD(Ti, Tn);
Chris@42 230 Tp = VSUB(Tn, Ti);
Chris@42 231 TJ = VADD(Tf, To);
Chris@42 232 STM2(&(xo[2]), TJ, ovs, &(xo[2]));
Chris@42 233 STN2(&(xo[0]), TI, TJ, ovs);
Chris@42 234 TK = VADD(Tq, Tp);
Chris@42 235 STM2(&(xo[14]), TK, ovs, &(xo[2]));
Chris@42 236 TL = VSUB(To, Tf);
Chris@42 237 STM2(&(xo[18]), TL, ovs, &(xo[2]));
Chris@42 238 TM = VSUB(Tp, Tq);
Chris@42 239 STM2(&(xo[6]), TM, ovs, &(xo[2]));
Chris@42 240 }
Chris@42 241 {
Chris@42 242 V Tx, TG, TE, TF, TC, TD;
Chris@42 243 Tx = VBYI(VFNMS(LDK(KP951056516), Tw, VMUL(LDK(KP587785252), Tt)));
Chris@42 244 TG = VBYI(VFMA(LDK(KP951056516), Tt, VMUL(LDK(KP587785252), Tw)));
Chris@42 245 TC = VFNMS(LDK(KP250000000), TB, Ty);
Chris@42 246 TD = VMUL(LDK(KP559016994), VSUB(Tz, TA));
Chris@42 247 TE = VSUB(TC, TD);
Chris@42 248 TF = VADD(TD, TC);
Chris@42 249 {
Chris@42 250 V TN, TO, TP, TQ;
Chris@42 251 TN = VADD(Tx, TE);
Chris@42 252 STM2(&(xo[4]), TN, ovs, &(xo[0]));
Chris@42 253 STN2(&(xo[4]), TN, TM, ovs);
Chris@42 254 TO = VADD(TG, TF);
Chris@42 255 STM2(&(xo[12]), TO, ovs, &(xo[0]));
Chris@42 256 STN2(&(xo[12]), TO, TK, ovs);
Chris@42 257 TP = VSUB(TE, Tx);
Chris@42 258 STM2(&(xo[16]), TP, ovs, &(xo[0]));
Chris@42 259 STN2(&(xo[16]), TP, TL, ovs);
Chris@42 260 TQ = VSUB(TF, TG);
Chris@42 261 STM2(&(xo[8]), TQ, ovs, &(xo[0]));
Chris@42 262 STN2(&(xo[8]), TQ, TH, ovs);
Chris@42 263 }
Chris@42 264 }
Chris@42 265 }
Chris@42 266 }
Chris@42 267 }
Chris@42 268 VLEAVE();
Chris@42 269 }
Chris@42 270
Chris@42 271 static const kdft_desc desc = { 10, XSIMD_STRING("n2bv_10"), {36, 6, 6, 0}, &GENUS, 0, 2, 0, 0 };
Chris@42 272
Chris@42 273 void XSIMD(codelet_n2bv_10) (planner *p) {
Chris@42 274 X(kdft_register) (p, n2bv_10, &desc);
Chris@42 275 }
Chris@42 276
Chris@42 277 #endif /* HAVE_FMA */