annotate src/fftw-3.3.8/dft/simd/common/t1bv_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:05:59 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name t1bv_15 -include dft/simd/t1b.h -sign 1 */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 92 FP additions, 77 FP multiplications,
Chris@82 32 * (or, 50 additions, 35 multiplications, 42 fused multiply/add),
Chris@82 33 * 50 stack variables, 8 constants, and 30 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/t1b.h"
Chris@82 36
Chris@82 37 static void t1bv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DVK(KP910592997, +0.910592997310029334643087372129977886038870291);
Chris@82 40 DVK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 41 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 42 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 43 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 44 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 45 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 46 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 47 {
Chris@82 48 INT m;
Chris@82 49 R *x;
Chris@82 50 x = ii;
Chris@82 51 for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(15, rs)) {
Chris@82 52 V TV, T7, T1f, TM, TP, Tf, Tn, To, T1j, T1k, T1l, TW, TX, TY, Tw;
Chris@82 53 V TE, TF, T1g, T1h, T1i;
Chris@82 54 {
Chris@82 55 V T1, T5, T3, T4, T2, T6;
Chris@82 56 T1 = LD(&(x[0]), ms, &(x[0]));
Chris@82 57 T4 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@82 58 T5 = BYTW(&(W[TWVL * 18]), T4);
Chris@82 59 T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@82 60 T3 = BYTW(&(W[TWVL * 8]), T2);
Chris@82 61 TV = VSUB(T3, T5);
Chris@82 62 T6 = VADD(T3, T5);
Chris@82 63 T7 = VFNMS(LDK(KP500000000), T6, T1);
Chris@82 64 T1f = VADD(T1, T6);
Chris@82 65 }
Chris@82 66 {
Chris@82 67 V T9, Tq, Ty, Th, Te, TK, Tv, TN, TD, TO, Tm, TL;
Chris@82 68 {
Chris@82 69 V T8, Tp, Tx, Tg;
Chris@82 70 T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@82 71 T9 = BYTW(&(W[TWVL * 4]), T8);
Chris@82 72 Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@82 73 Tq = BYTW(&(W[TWVL * 10]), Tp);
Chris@82 74 Tx = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@82 75 Ty = BYTW(&(W[TWVL * 16]), Tx);
Chris@82 76 Tg = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@82 77 Th = BYTW(&(W[TWVL * 22]), Tg);
Chris@82 78 }
Chris@82 79 {
Chris@82 80 V Tb, Td, Ta, Tc;
Chris@82 81 Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@82 82 Tb = BYTW(&(W[TWVL * 14]), Ta);
Chris@82 83 Tc = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@82 84 Td = BYTW(&(W[TWVL * 24]), Tc);
Chris@82 85 Te = VADD(Tb, Td);
Chris@82 86 TK = VSUB(Tb, Td);
Chris@82 87 }
Chris@82 88 {
Chris@82 89 V Ts, Tu, Tr, Tt;
Chris@82 90 Tr = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@82 91 Ts = BYTW(&(W[TWVL * 20]), Tr);
Chris@82 92 Tt = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@82 93 Tu = BYTW(&(W[0]), Tt);
Chris@82 94 Tv = VADD(Ts, Tu);
Chris@82 95 TN = VSUB(Ts, Tu);
Chris@82 96 }
Chris@82 97 {
Chris@82 98 V TA, TC, Tz, TB;
Chris@82 99 Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@82 100 TA = BYTW(&(W[TWVL * 26]), Tz);
Chris@82 101 TB = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@82 102 TC = BYTW(&(W[TWVL * 6]), TB);
Chris@82 103 TD = VADD(TA, TC);
Chris@82 104 TO = VSUB(TA, TC);
Chris@82 105 }
Chris@82 106 {
Chris@82 107 V Tj, Tl, Ti, Tk;
Chris@82 108 Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@82 109 Tj = BYTW(&(W[TWVL * 2]), Ti);
Chris@82 110 Tk = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@82 111 Tl = BYTW(&(W[TWVL * 12]), Tk);
Chris@82 112 Tm = VADD(Tj, Tl);
Chris@82 113 TL = VSUB(Tj, Tl);
Chris@82 114 }
Chris@82 115 TM = VSUB(TK, TL);
Chris@82 116 TP = VSUB(TN, TO);
Chris@82 117 Tf = VFNMS(LDK(KP500000000), Te, T9);
Chris@82 118 Tn = VFNMS(LDK(KP500000000), Tm, Th);
Chris@82 119 To = VADD(Tf, Tn);
Chris@82 120 T1j = VADD(Tq, Tv);
Chris@82 121 T1k = VADD(Ty, TD);
Chris@82 122 T1l = VADD(T1j, T1k);
Chris@82 123 TW = VADD(TK, TL);
Chris@82 124 TX = VADD(TN, TO);
Chris@82 125 TY = VADD(TW, TX);
Chris@82 126 Tw = VFNMS(LDK(KP500000000), Tv, Tq);
Chris@82 127 TE = VFNMS(LDK(KP500000000), TD, Ty);
Chris@82 128 TF = VADD(Tw, TE);
Chris@82 129 T1g = VADD(T9, Te);
Chris@82 130 T1h = VADD(Th, Tm);
Chris@82 131 T1i = VADD(T1g, T1h);
Chris@82 132 }
Chris@82 133 {
Chris@82 134 V T1o, T1m, T1n, T1s, T1u, T1q, T1r, T1t, T1p;
Chris@82 135 T1o = VSUB(T1i, T1l);
Chris@82 136 T1m = VADD(T1i, T1l);
Chris@82 137 T1n = VFNMS(LDK(KP250000000), T1m, T1f);
Chris@82 138 T1q = VSUB(T1j, T1k);
Chris@82 139 T1r = VSUB(T1g, T1h);
Chris@82 140 T1s = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1r, T1q));
Chris@82 141 T1u = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1q, T1r));
Chris@82 142 ST(&(x[0]), VADD(T1f, T1m), ms, &(x[0]));
Chris@82 143 T1t = VFMA(LDK(KP559016994), T1o, T1n);
Chris@82 144 ST(&(x[WS(rs, 6)]), VFMAI(T1u, T1t), ms, &(x[0]));
Chris@82 145 ST(&(x[WS(rs, 9)]), VFNMSI(T1u, T1t), ms, &(x[WS(rs, 1)]));
Chris@82 146 T1p = VFNMS(LDK(KP559016994), T1o, T1n);
Chris@82 147 ST(&(x[WS(rs, 3)]), VFMAI(T1s, T1p), ms, &(x[WS(rs, 1)]));
Chris@82 148 ST(&(x[WS(rs, 12)]), VFNMSI(T1s, T1p), ms, &(x[0]));
Chris@82 149 }
Chris@82 150 {
Chris@82 151 V TQ, T16, T1e, T11, T19, TU, T18, TJ, T1d, T15, TZ, T10;
Chris@82 152 TQ = VFMA(LDK(KP618033988), TP, TM);
Chris@82 153 T16 = VFNMS(LDK(KP618033988), TM, TP);
Chris@82 154 T1e = VMUL(LDK(KP866025403), VADD(TV, TY));
Chris@82 155 TZ = VFNMS(LDK(KP250000000), TY, TV);
Chris@82 156 T10 = VSUB(TW, TX);
Chris@82 157 T11 = VFMA(LDK(KP559016994), T10, TZ);
Chris@82 158 T19 = VFNMS(LDK(KP559016994), T10, TZ);
Chris@82 159 {
Chris@82 160 V TS, TT, TI, TG, TH;
Chris@82 161 TS = VSUB(Tf, Tn);
Chris@82 162 TT = VSUB(Tw, TE);
Chris@82 163 TU = VFMA(LDK(KP618033988), TT, TS);
Chris@82 164 T18 = VFNMS(LDK(KP618033988), TS, TT);
Chris@82 165 TI = VSUB(To, TF);
Chris@82 166 TG = VADD(To, TF);
Chris@82 167 TH = VFNMS(LDK(KP250000000), TG, T7);
Chris@82 168 TJ = VFMA(LDK(KP559016994), TI, TH);
Chris@82 169 T1d = VADD(T7, TG);
Chris@82 170 T15 = VFNMS(LDK(KP559016994), TI, TH);
Chris@82 171 }
Chris@82 172 {
Chris@82 173 V TR, T12, T1b, T1c;
Chris@82 174 ST(&(x[WS(rs, 5)]), VFNMSI(T1e, T1d), ms, &(x[WS(rs, 1)]));
Chris@82 175 ST(&(x[WS(rs, 10)]), VFMAI(T1e, T1d), ms, &(x[0]));
Chris@82 176 TR = VFNMS(LDK(KP823639103), TQ, TJ);
Chris@82 177 T12 = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T11, TU));
Chris@82 178 ST(&(x[WS(rs, 1)]), VFMAI(T12, TR), ms, &(x[WS(rs, 1)]));
Chris@82 179 ST(&(x[WS(rs, 14)]), VFNMSI(T12, TR), ms, &(x[0]));
Chris@82 180 T1b = VFMA(LDK(KP823639103), T16, T15);
Chris@82 181 T1c = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T19, T18));
Chris@82 182 ST(&(x[WS(rs, 7)]), VFNMSI(T1c, T1b), ms, &(x[WS(rs, 1)]));
Chris@82 183 ST(&(x[WS(rs, 8)]), VFMAI(T1c, T1b), ms, &(x[0]));
Chris@82 184 {
Chris@82 185 V T17, T1a, T13, T14;
Chris@82 186 T17 = VFNMS(LDK(KP823639103), T16, T15);
Chris@82 187 T1a = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T19, T18));
Chris@82 188 ST(&(x[WS(rs, 2)]), VFNMSI(T1a, T17), ms, &(x[0]));
Chris@82 189 ST(&(x[WS(rs, 13)]), VFMAI(T1a, T17), ms, &(x[WS(rs, 1)]));
Chris@82 190 T13 = VFMA(LDK(KP823639103), TQ, TJ);
Chris@82 191 T14 = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T11, TU));
Chris@82 192 ST(&(x[WS(rs, 4)]), VFNMSI(T14, T13), ms, &(x[0]));
Chris@82 193 ST(&(x[WS(rs, 11)]), VFMAI(T14, T13), ms, &(x[WS(rs, 1)]));
Chris@82 194 }
Chris@82 195 }
Chris@82 196 }
Chris@82 197 }
Chris@82 198 }
Chris@82 199 VLEAVE();
Chris@82 200 }
Chris@82 201
Chris@82 202 static const tw_instr twinstr[] = {
Chris@82 203 VTW(0, 1),
Chris@82 204 VTW(0, 2),
Chris@82 205 VTW(0, 3),
Chris@82 206 VTW(0, 4),
Chris@82 207 VTW(0, 5),
Chris@82 208 VTW(0, 6),
Chris@82 209 VTW(0, 7),
Chris@82 210 VTW(0, 8),
Chris@82 211 VTW(0, 9),
Chris@82 212 VTW(0, 10),
Chris@82 213 VTW(0, 11),
Chris@82 214 VTW(0, 12),
Chris@82 215 VTW(0, 13),
Chris@82 216 VTW(0, 14),
Chris@82 217 {TW_NEXT, VL, 0}
Chris@82 218 };
Chris@82 219
Chris@82 220 static const ct_desc desc = { 15, XSIMD_STRING("t1bv_15"), twinstr, &GENUS, {50, 35, 42, 0}, 0, 0, 0 };
Chris@82 221
Chris@82 222 void XSIMD(codelet_t1bv_15) (planner *p) {
Chris@82 223 X(kdft_dit_register) (p, t1bv_15, &desc);
Chris@82 224 }
Chris@82 225 #else
Chris@82 226
Chris@82 227 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name t1bv_15 -include dft/simd/t1b.h -sign 1 */
Chris@82 228
Chris@82 229 /*
Chris@82 230 * This function contains 92 FP additions, 53 FP multiplications,
Chris@82 231 * (or, 78 additions, 39 multiplications, 14 fused multiply/add),
Chris@82 232 * 52 stack variables, 10 constants, and 30 memory accesses
Chris@82 233 */
Chris@82 234 #include "dft/simd/t1b.h"
Chris@82 235
Chris@82 236 static void t1bv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 237 {
Chris@82 238 DVK(KP216506350, +0.216506350946109661690930792688234045867850657);
Chris@82 239 DVK(KP484122918, +0.484122918275927110647408174972799951354115213);
Chris@82 240 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 241 DVK(KP509036960, +0.509036960455127183450980863393907648510733164);
Chris@82 242 DVK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 243 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 244 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 245 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 246 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 247 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 248 {
Chris@82 249 INT m;
Chris@82 250 R *x;
Chris@82 251 x = ii;
Chris@82 252 for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(15, rs)) {
Chris@82 253 V Ts, TV, T1f, TZ, T10, Tb, Tm, Tt, T1j, T1k, T1l, TI, TM, TR, Tz;
Chris@82 254 V TD, TQ, T1g, T1h, T1i;
Chris@82 255 {
Chris@82 256 V TT, Tr, Tp, Tq, To, TU;
Chris@82 257 TT = LD(&(x[0]), ms, &(x[0]));
Chris@82 258 Tq = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@82 259 Tr = BYTW(&(W[TWVL * 18]), Tq);
Chris@82 260 To = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@82 261 Tp = BYTW(&(W[TWVL * 8]), To);
Chris@82 262 Ts = VSUB(Tp, Tr);
Chris@82 263 TU = VADD(Tp, Tr);
Chris@82 264 TV = VFNMS(LDK(KP500000000), TU, TT);
Chris@82 265 T1f = VADD(TT, TU);
Chris@82 266 }
Chris@82 267 {
Chris@82 268 V Tx, TG, TK, TB, T5, Ty, Tg, TH, Tl, TL, Ta, TC;
Chris@82 269 {
Chris@82 270 V Tw, TF, TJ, TA;
Chris@82 271 Tw = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@82 272 Tx = BYTW(&(W[TWVL * 4]), Tw);
Chris@82 273 TF = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@82 274 TG = BYTW(&(W[TWVL * 10]), TF);
Chris@82 275 TJ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@82 276 TK = BYTW(&(W[TWVL * 16]), TJ);
Chris@82 277 TA = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@82 278 TB = BYTW(&(W[TWVL * 22]), TA);
Chris@82 279 }
Chris@82 280 {
Chris@82 281 V T2, T4, T1, T3;
Chris@82 282 T1 = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@82 283 T2 = BYTW(&(W[TWVL * 14]), T1);
Chris@82 284 T3 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@82 285 T4 = BYTW(&(W[TWVL * 24]), T3);
Chris@82 286 T5 = VSUB(T2, T4);
Chris@82 287 Ty = VADD(T2, T4);
Chris@82 288 }
Chris@82 289 {
Chris@82 290 V Td, Tf, Tc, Te;
Chris@82 291 Tc = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@82 292 Td = BYTW(&(W[TWVL * 20]), Tc);
Chris@82 293 Te = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@82 294 Tf = BYTW(&(W[0]), Te);
Chris@82 295 Tg = VSUB(Td, Tf);
Chris@82 296 TH = VADD(Td, Tf);
Chris@82 297 }
Chris@82 298 {
Chris@82 299 V Ti, Tk, Th, Tj;
Chris@82 300 Th = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@82 301 Ti = BYTW(&(W[TWVL * 26]), Th);
Chris@82 302 Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@82 303 Tk = BYTW(&(W[TWVL * 6]), Tj);
Chris@82 304 Tl = VSUB(Ti, Tk);
Chris@82 305 TL = VADD(Ti, Tk);
Chris@82 306 }
Chris@82 307 {
Chris@82 308 V T7, T9, T6, T8;
Chris@82 309 T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@82 310 T7 = BYTW(&(W[TWVL * 2]), T6);
Chris@82 311 T8 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@82 312 T9 = BYTW(&(W[TWVL * 12]), T8);
Chris@82 313 Ta = VSUB(T7, T9);
Chris@82 314 TC = VADD(T7, T9);
Chris@82 315 }
Chris@82 316 TZ = VSUB(T5, Ta);
Chris@82 317 T10 = VSUB(Tg, Tl);
Chris@82 318 Tb = VADD(T5, Ta);
Chris@82 319 Tm = VADD(Tg, Tl);
Chris@82 320 Tt = VADD(Tb, Tm);
Chris@82 321 T1j = VADD(TG, TH);
Chris@82 322 T1k = VADD(TK, TL);
Chris@82 323 T1l = VADD(T1j, T1k);
Chris@82 324 TI = VFNMS(LDK(KP500000000), TH, TG);
Chris@82 325 TM = VFNMS(LDK(KP500000000), TL, TK);
Chris@82 326 TR = VADD(TI, TM);
Chris@82 327 Tz = VFNMS(LDK(KP500000000), Ty, Tx);
Chris@82 328 TD = VFNMS(LDK(KP500000000), TC, TB);
Chris@82 329 TQ = VADD(Tz, TD);
Chris@82 330 T1g = VADD(Tx, Ty);
Chris@82 331 T1h = VADD(TB, TC);
Chris@82 332 T1i = VADD(T1g, T1h);
Chris@82 333 }
Chris@82 334 {
Chris@82 335 V T1o, T1m, T1n, T1s, T1t, T1q, T1r, T1u, T1p;
Chris@82 336 T1o = VMUL(LDK(KP559016994), VSUB(T1i, T1l));
Chris@82 337 T1m = VADD(T1i, T1l);
Chris@82 338 T1n = VFNMS(LDK(KP250000000), T1m, T1f);
Chris@82 339 T1q = VSUB(T1g, T1h);
Chris@82 340 T1r = VSUB(T1j, T1k);
Chris@82 341 T1s = VBYI(VFNMS(LDK(KP951056516), T1r, VMUL(LDK(KP587785252), T1q)));
Chris@82 342 T1t = VBYI(VFMA(LDK(KP951056516), T1q, VMUL(LDK(KP587785252), T1r)));
Chris@82 343 ST(&(x[0]), VADD(T1f, T1m), ms, &(x[0]));
Chris@82 344 T1u = VADD(T1o, T1n);
Chris@82 345 ST(&(x[WS(rs, 6)]), VADD(T1t, T1u), ms, &(x[0]));
Chris@82 346 ST(&(x[WS(rs, 9)]), VSUB(T1u, T1t), ms, &(x[WS(rs, 1)]));
Chris@82 347 T1p = VSUB(T1n, T1o);
Chris@82 348 ST(&(x[WS(rs, 3)]), VSUB(T1p, T1s), ms, &(x[WS(rs, 1)]));
Chris@82 349 ST(&(x[WS(rs, 12)]), VADD(T1s, T1p), ms, &(x[0]));
Chris@82 350 }
Chris@82 351 {
Chris@82 352 V T11, T18, T1e, TO, T16, Tv, T15, TY, T1d, T19, TE, TN;
Chris@82 353 T11 = VFMA(LDK(KP823639103), TZ, VMUL(LDK(KP509036960), T10));
Chris@82 354 T18 = VFNMS(LDK(KP823639103), T10, VMUL(LDK(KP509036960), TZ));
Chris@82 355 T1e = VBYI(VMUL(LDK(KP866025403), VADD(Ts, Tt)));
Chris@82 356 TE = VSUB(Tz, TD);
Chris@82 357 TN = VSUB(TI, TM);
Chris@82 358 TO = VFMA(LDK(KP951056516), TE, VMUL(LDK(KP587785252), TN));
Chris@82 359 T16 = VFNMS(LDK(KP951056516), TN, VMUL(LDK(KP587785252), TE));
Chris@82 360 {
Chris@82 361 V Tn, Tu, TS, TW, TX;
Chris@82 362 Tn = VMUL(LDK(KP484122918), VSUB(Tb, Tm));
Chris@82 363 Tu = VFNMS(LDK(KP216506350), Tt, VMUL(LDK(KP866025403), Ts));
Chris@82 364 Tv = VADD(Tn, Tu);
Chris@82 365 T15 = VSUB(Tn, Tu);
Chris@82 366 TS = VMUL(LDK(KP559016994), VSUB(TQ, TR));
Chris@82 367 TW = VADD(TQ, TR);
Chris@82 368 TX = VFNMS(LDK(KP250000000), TW, TV);
Chris@82 369 TY = VADD(TS, TX);
Chris@82 370 T1d = VADD(TV, TW);
Chris@82 371 T19 = VSUB(TX, TS);
Chris@82 372 }
Chris@82 373 {
Chris@82 374 V TP, T12, T1b, T1c;
Chris@82 375 ST(&(x[WS(rs, 5)]), VSUB(T1d, T1e), ms, &(x[WS(rs, 1)]));
Chris@82 376 ST(&(x[WS(rs, 10)]), VADD(T1e, T1d), ms, &(x[0]));
Chris@82 377 TP = VBYI(VADD(Tv, TO));
Chris@82 378 T12 = VSUB(TY, T11);
Chris@82 379 ST(&(x[WS(rs, 1)]), VADD(TP, T12), ms, &(x[WS(rs, 1)]));
Chris@82 380 ST(&(x[WS(rs, 14)]), VSUB(T12, TP), ms, &(x[0]));
Chris@82 381 T1b = VBYI(VSUB(T16, T15));
Chris@82 382 T1c = VSUB(T19, T18);
Chris@82 383 ST(&(x[WS(rs, 7)]), VADD(T1b, T1c), ms, &(x[WS(rs, 1)]));
Chris@82 384 ST(&(x[WS(rs, 8)]), VSUB(T1c, T1b), ms, &(x[0]));
Chris@82 385 {
Chris@82 386 V T17, T1a, T13, T14;
Chris@82 387 T17 = VBYI(VADD(T15, T16));
Chris@82 388 T1a = VADD(T18, T19);
Chris@82 389 ST(&(x[WS(rs, 2)]), VADD(T17, T1a), ms, &(x[0]));
Chris@82 390 ST(&(x[WS(rs, 13)]), VSUB(T1a, T17), ms, &(x[WS(rs, 1)]));
Chris@82 391 T13 = VBYI(VSUB(Tv, TO));
Chris@82 392 T14 = VADD(T11, TY);
Chris@82 393 ST(&(x[WS(rs, 4)]), VADD(T13, T14), ms, &(x[0]));
Chris@82 394 ST(&(x[WS(rs, 11)]), VSUB(T14, T13), ms, &(x[WS(rs, 1)]));
Chris@82 395 }
Chris@82 396 }
Chris@82 397 }
Chris@82 398 }
Chris@82 399 }
Chris@82 400 VLEAVE();
Chris@82 401 }
Chris@82 402
Chris@82 403 static const tw_instr twinstr[] = {
Chris@82 404 VTW(0, 1),
Chris@82 405 VTW(0, 2),
Chris@82 406 VTW(0, 3),
Chris@82 407 VTW(0, 4),
Chris@82 408 VTW(0, 5),
Chris@82 409 VTW(0, 6),
Chris@82 410 VTW(0, 7),
Chris@82 411 VTW(0, 8),
Chris@82 412 VTW(0, 9),
Chris@82 413 VTW(0, 10),
Chris@82 414 VTW(0, 11),
Chris@82 415 VTW(0, 12),
Chris@82 416 VTW(0, 13),
Chris@82 417 VTW(0, 14),
Chris@82 418 {TW_NEXT, VL, 0}
Chris@82 419 };
Chris@82 420
Chris@82 421 static const ct_desc desc = { 15, XSIMD_STRING("t1bv_15"), twinstr, &GENUS, {78, 39, 14, 0}, 0, 0, 0 };
Chris@82 422
Chris@82 423 void XSIMD(codelet_t1bv_15) (planner *p) {
Chris@82 424 X(kdft_dit_register) (p, t1bv_15, &desc);
Chris@82 425 }
Chris@82 426 #endif