annotate src/fftw-3.3.5/dft/simd/common/n2bv_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:40:36 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n2bv_14 -with-ostride 2 -include n2b.h -store-multiple 2 */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 74 FP additions, 48 FP multiplications,
Chris@42 32 * (or, 32 additions, 6 multiplications, 42 fused multiply/add),
Chris@42 33 * 65 stack variables, 6 constants, and 35 memory accesses
Chris@42 34 */
Chris@42 35 #include "n2b.h"
Chris@42 36
Chris@42 37 static void n2bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@42 40 DVK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@42 41 DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@42 42 DVK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@42 43 DVK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@42 44 DVK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@42 45 {
Chris@42 46 INT i;
Chris@42 47 const R *xi;
Chris@42 48 R *xo;
Chris@42 49 xi = ii;
Chris@42 50 xo = io;
Chris@42 51 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@42 52 V TH, T3, TP, Tn, Ta, Tu, TU, TK, TO, Tk, TM, Tg, TL, Td, T1;
Chris@42 53 V T2;
Chris@42 54 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 55 T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 56 {
Chris@42 57 V Ti, TI, T6, TJ, T9, Tj, Te, Tf, Tb, Tc;
Chris@42 58 {
Chris@42 59 V T4, T5, T7, T8, Tl, Tm;
Chris@42 60 T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 61 T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 62 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@42 63 T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 64 Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 65 Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 66 Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 67 TH = VADD(T1, T2);
Chris@42 68 T3 = VSUB(T1, T2);
Chris@42 69 TI = VADD(T4, T5);
Chris@42 70 T6 = VSUB(T4, T5);
Chris@42 71 TJ = VADD(T7, T8);
Chris@42 72 T9 = VSUB(T7, T8);
Chris@42 73 TP = VADD(Tl, Tm);
Chris@42 74 Tn = VSUB(Tl, Tm);
Chris@42 75 Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@42 76 Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@42 77 Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 78 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 79 Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@42 80 }
Chris@42 81 Ta = VADD(T6, T9);
Chris@42 82 Tu = VSUB(T6, T9);
Chris@42 83 TU = VSUB(TI, TJ);
Chris@42 84 TK = VADD(TI, TJ);
Chris@42 85 TO = VADD(Ti, Tj);
Chris@42 86 Tk = VSUB(Ti, Tj);
Chris@42 87 TM = VADD(Te, Tf);
Chris@42 88 Tg = VSUB(Te, Tf);
Chris@42 89 TL = VADD(Tb, Tc);
Chris@42 90 Td = VSUB(Tb, Tc);
Chris@42 91 }
Chris@42 92 {
Chris@42 93 V T19, T1a, T13, TG, TY, T18, TB, Tw, TT, Tz, T11, T16, TE, Tr, TV;
Chris@42 94 V TQ;
Chris@42 95 TV = VSUB(TP, TO);
Chris@42 96 TQ = VADD(TO, TP);
Chris@42 97 {
Chris@42 98 V Ts, To, TW, TN;
Chris@42 99 Ts = VSUB(Tk, Tn);
Chris@42 100 To = VADD(Tk, Tn);
Chris@42 101 TW = VSUB(TM, TL);
Chris@42 102 TN = VADD(TL, TM);
Chris@42 103 {
Chris@42 104 V Tt, Th, TR, T12;
Chris@42 105 Tt = VSUB(Td, Tg);
Chris@42 106 Th = VADD(Td, Tg);
Chris@42 107 TR = VFNMS(LDK(KP356895867), TK, TQ);
Chris@42 108 T12 = VFNMS(LDK(KP554958132), TV, TU);
Chris@42 109 {
Chris@42 110 V Tx, TF, TZ, T14;
Chris@42 111 Tx = VFNMS(LDK(KP356895867), Ta, To);
Chris@42 112 TF = VFMA(LDK(KP554958132), Ts, Tu);
Chris@42 113 T19 = VADD(TH, VADD(TK, VADD(TN, TQ)));
Chris@42 114 STM2(&(xo[0]), T19, ovs, &(xo[0]));
Chris@42 115 TZ = VFNMS(LDK(KP356895867), TN, TK);
Chris@42 116 T14 = VFNMS(LDK(KP356895867), TQ, TN);
Chris@42 117 {
Chris@42 118 V TX, T17, TC, Tp;
Chris@42 119 TX = VFMA(LDK(KP554958132), TW, TV);
Chris@42 120 T17 = VFMA(LDK(KP554958132), TU, TW);
Chris@42 121 T1a = VADD(T3, VADD(Ta, VADD(Th, To)));
Chris@42 122 STM2(&(xo[14]), T1a, ovs, &(xo[2]));
Chris@42 123 TC = VFNMS(LDK(KP356895867), Th, Ta);
Chris@42 124 Tp = VFNMS(LDK(KP356895867), To, Th);
Chris@42 125 {
Chris@42 126 V TA, Tv, TS, Ty;
Chris@42 127 TA = VFMA(LDK(KP554958132), Tt, Ts);
Chris@42 128 Tv = VFNMS(LDK(KP554958132), Tu, Tt);
Chris@42 129 TS = VFNMS(LDK(KP692021471), TR, TN);
Chris@42 130 T13 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T12, TW));
Chris@42 131 Ty = VFNMS(LDK(KP692021471), Tx, Th);
Chris@42 132 TG = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TF, Tt));
Chris@42 133 {
Chris@42 134 V T10, T15, TD, Tq;
Chris@42 135 T10 = VFNMS(LDK(KP692021471), TZ, TQ);
Chris@42 136 T15 = VFNMS(LDK(KP692021471), T14, TK);
Chris@42 137 TY = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TX, TU));
Chris@42 138 T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV));
Chris@42 139 TD = VFNMS(LDK(KP692021471), TC, To);
Chris@42 140 Tq = VFNMS(LDK(KP692021471), Tp, Ta);
Chris@42 141 TB = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TA, Tu));
Chris@42 142 Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts));
Chris@42 143 TT = VFNMS(LDK(KP900968867), TS, TH);
Chris@42 144 Tz = VFNMS(LDK(KP900968867), Ty, T3);
Chris@42 145 T11 = VFNMS(LDK(KP900968867), T10, TH);
Chris@42 146 T16 = VFNMS(LDK(KP900968867), T15, TH);
Chris@42 147 TE = VFNMS(LDK(KP900968867), TD, T3);
Chris@42 148 Tr = VFNMS(LDK(KP900968867), Tq, T3);
Chris@42 149 }
Chris@42 150 }
Chris@42 151 }
Chris@42 152 }
Chris@42 153 }
Chris@42 154 }
Chris@42 155 {
Chris@42 156 V T1b, T1c, T1d, T1e;
Chris@42 157 T1b = VFMAI(TY, TT);
Chris@42 158 STM2(&(xo[4]), T1b, ovs, &(xo[0]));
Chris@42 159 T1c = VFNMSI(TY, TT);
Chris@42 160 STM2(&(xo[24]), T1c, ovs, &(xo[0]));
Chris@42 161 T1d = VFMAI(TB, Tz);
Chris@42 162 STM2(&(xo[18]), T1d, ovs, &(xo[2]));
Chris@42 163 T1e = VFNMSI(TB, Tz);
Chris@42 164 STM2(&(xo[10]), T1e, ovs, &(xo[2]));
Chris@42 165 {
Chris@42 166 V T1f, T1g, T1h, T1i;
Chris@42 167 T1f = VFMAI(T13, T11);
Chris@42 168 STM2(&(xo[12]), T1f, ovs, &(xo[0]));
Chris@42 169 STN2(&(xo[12]), T1f, T1a, ovs);
Chris@42 170 T1g = VFNMSI(T13, T11);
Chris@42 171 STM2(&(xo[16]), T1g, ovs, &(xo[0]));
Chris@42 172 STN2(&(xo[16]), T1g, T1d, ovs);
Chris@42 173 T1h = VFMAI(T18, T16);
Chris@42 174 STM2(&(xo[8]), T1h, ovs, &(xo[0]));
Chris@42 175 STN2(&(xo[8]), T1h, T1e, ovs);
Chris@42 176 T1i = VFNMSI(T18, T16);
Chris@42 177 STM2(&(xo[20]), T1i, ovs, &(xo[0]));
Chris@42 178 {
Chris@42 179 V T1j, T1k, T1l, T1m;
Chris@42 180 T1j = VFNMSI(TG, TE);
Chris@42 181 STM2(&(xo[26]), T1j, ovs, &(xo[2]));
Chris@42 182 STN2(&(xo[24]), T1c, T1j, ovs);
Chris@42 183 T1k = VFMAI(TG, TE);
Chris@42 184 STM2(&(xo[2]), T1k, ovs, &(xo[2]));
Chris@42 185 STN2(&(xo[0]), T19, T1k, ovs);
Chris@42 186 T1l = VFNMSI(Tw, Tr);
Chris@42 187 STM2(&(xo[22]), T1l, ovs, &(xo[2]));
Chris@42 188 STN2(&(xo[20]), T1i, T1l, ovs);
Chris@42 189 T1m = VFMAI(Tw, Tr);
Chris@42 190 STM2(&(xo[6]), T1m, ovs, &(xo[2]));
Chris@42 191 STN2(&(xo[4]), T1b, T1m, ovs);
Chris@42 192 }
Chris@42 193 }
Chris@42 194 }
Chris@42 195 }
Chris@42 196 }
Chris@42 197 }
Chris@42 198 VLEAVE();
Chris@42 199 }
Chris@42 200
Chris@42 201 static const kdft_desc desc = { 14, XSIMD_STRING("n2bv_14"), {32, 6, 42, 0}, &GENUS, 0, 2, 0, 0 };
Chris@42 202
Chris@42 203 void XSIMD(codelet_n2bv_14) (planner *p) {
Chris@42 204 X(kdft_register) (p, n2bv_14, &desc);
Chris@42 205 }
Chris@42 206
Chris@42 207 #else /* HAVE_FMA */
Chris@42 208
Chris@42 209 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n2bv_14 -with-ostride 2 -include n2b.h -store-multiple 2 */
Chris@42 210
Chris@42 211 /*
Chris@42 212 * This function contains 74 FP additions, 36 FP multiplications,
Chris@42 213 * (or, 50 additions, 12 multiplications, 24 fused multiply/add),
Chris@42 214 * 41 stack variables, 6 constants, and 35 memory accesses
Chris@42 215 */
Chris@42 216 #include "n2b.h"
Chris@42 217
Chris@42 218 static void n2bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 219 {
Chris@42 220 DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@42 221 DVK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@42 222 DVK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@42 223 DVK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@42 224 DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@42 225 DVK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@42 226 {
Chris@42 227 INT i;
Chris@42 228 const R *xi;
Chris@42 229 R *xo;
Chris@42 230 xi = ii;
Chris@42 231 xo = io;
Chris@42 232 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@42 233 V Tp, Ty, Tl, TL, Tq, TE, T7, TJ, Ts, TB, Te, TK, Tr, TH, Tn;
Chris@42 234 V To;
Chris@42 235 Tn = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 236 To = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 237 Tp = VSUB(Tn, To);
Chris@42 238 Ty = VADD(Tn, To);
Chris@42 239 {
Chris@42 240 V Th, TC, Tk, TD;
Chris@42 241 {
Chris@42 242 V Tf, Tg, Ti, Tj;
Chris@42 243 Tf = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 244 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@42 245 Th = VSUB(Tf, Tg);
Chris@42 246 TC = VADD(Tf, Tg);
Chris@42 247 Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@42 248 Tj = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 249 Tk = VSUB(Ti, Tj);
Chris@42 250 TD = VADD(Ti, Tj);
Chris@42 251 }
Chris@42 252 Tl = VSUB(Th, Tk);
Chris@42 253 TL = VSUB(TD, TC);
Chris@42 254 Tq = VADD(Th, Tk);
Chris@42 255 TE = VADD(TC, TD);
Chris@42 256 }
Chris@42 257 {
Chris@42 258 V T3, Tz, T6, TA;
Chris@42 259 {
Chris@42 260 V T1, T2, T4, T5;
Chris@42 261 T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 262 T2 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 263 T3 = VSUB(T1, T2);
Chris@42 264 Tz = VADD(T1, T2);
Chris@42 265 T4 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@42 266 T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 267 T6 = VSUB(T4, T5);
Chris@42 268 TA = VADD(T4, T5);
Chris@42 269 }
Chris@42 270 T7 = VSUB(T3, T6);
Chris@42 271 TJ = VSUB(Tz, TA);
Chris@42 272 Ts = VADD(T3, T6);
Chris@42 273 TB = VADD(Tz, TA);
Chris@42 274 }
Chris@42 275 {
Chris@42 276 V Ta, TF, Td, TG;
Chris@42 277 {
Chris@42 278 V T8, T9, Tb, Tc;
Chris@42 279 T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 280 T9 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@42 281 Ta = VSUB(T8, T9);
Chris@42 282 TF = VADD(T8, T9);
Chris@42 283 Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 284 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 285 Td = VSUB(Tb, Tc);
Chris@42 286 TG = VADD(Tb, Tc);
Chris@42 287 }
Chris@42 288 Te = VSUB(Ta, Td);
Chris@42 289 TK = VSUB(TG, TF);
Chris@42 290 Tr = VADD(Ta, Td);
Chris@42 291 TH = VADD(TF, TG);
Chris@42 292 }
Chris@42 293 {
Chris@42 294 V TR, TS, TU, TV;
Chris@42 295 TR = VADD(Tp, VADD(Ts, VADD(Tq, Tr)));
Chris@42 296 STM2(&(xo[14]), TR, ovs, &(xo[2]));
Chris@42 297 TS = VADD(Ty, VADD(TB, VADD(TE, TH)));
Chris@42 298 STM2(&(xo[0]), TS, ovs, &(xo[0]));
Chris@42 299 {
Chris@42 300 V TT, Tm, Tt, TQ, TP, TW;
Chris@42 301 Tm = VBYI(VFMA(LDK(KP433883739), T7, VFNMS(LDK(KP781831482), Tl, VMUL(LDK(KP974927912), Te))));
Chris@42 302 Tt = VFMA(LDK(KP623489801), Tq, VFNMS(LDK(KP222520933), Tr, VFNMS(LDK(KP900968867), Ts, Tp)));
Chris@42 303 TT = VADD(Tm, Tt);
Chris@42 304 STM2(&(xo[6]), TT, ovs, &(xo[2]));
Chris@42 305 TU = VSUB(Tt, Tm);
Chris@42 306 STM2(&(xo[22]), TU, ovs, &(xo[2]));
Chris@42 307 TQ = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK))));
Chris@42 308 TP = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TB, Ty)));
Chris@42 309 TV = VSUB(TP, TQ);
Chris@42 310 STM2(&(xo[24]), TV, ovs, &(xo[0]));
Chris@42 311 TW = VADD(TP, TQ);
Chris@42 312 STM2(&(xo[4]), TW, ovs, &(xo[0]));
Chris@42 313 STN2(&(xo[4]), TW, TT, ovs);
Chris@42 314 }
Chris@42 315 {
Chris@42 316 V T10, TM, TI, TZ;
Chris@42 317 {
Chris@42 318 V Tu, Tv, TX, TY;
Chris@42 319 Tu = VBYI(VFMA(LDK(KP781831482), T7, VFMA(LDK(KP974927912), Tl, VMUL(LDK(KP433883739), Te))));
Chris@42 320 Tv = VFMA(LDK(KP623489801), Ts, VFNMS(LDK(KP900968867), Tr, VFNMS(LDK(KP222520933), Tq, Tp)));
Chris@42 321 TX = VADD(Tu, Tv);
Chris@42 322 STM2(&(xo[2]), TX, ovs, &(xo[2]));
Chris@42 323 STN2(&(xo[0]), TS, TX, ovs);
Chris@42 324 TY = VSUB(Tv, Tu);
Chris@42 325 STM2(&(xo[26]), TY, ovs, &(xo[2]));
Chris@42 326 STN2(&(xo[24]), TV, TY, ovs);
Chris@42 327 }
Chris@42 328 TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ))));
Chris@42 329 TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TE, Ty)));
Chris@42 330 TZ = VSUB(TI, TM);
Chris@42 331 STM2(&(xo[12]), TZ, ovs, &(xo[0]));
Chris@42 332 STN2(&(xo[12]), TZ, TR, ovs);
Chris@42 333 T10 = VADD(TI, TM);
Chris@42 334 STM2(&(xo[16]), T10, ovs, &(xo[0]));
Chris@42 335 {
Chris@42 336 V T11, TO, TN, T12;
Chris@42 337 TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL))));
Chris@42 338 TN = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP222520933), TH, VFNMS(LDK(KP900968867), TB, Ty)));
Chris@42 339 T11 = VSUB(TN, TO);
Chris@42 340 STM2(&(xo[8]), T11, ovs, &(xo[0]));
Chris@42 341 T12 = VADD(TN, TO);
Chris@42 342 STM2(&(xo[20]), T12, ovs, &(xo[0]));
Chris@42 343 STN2(&(xo[20]), T12, TU, ovs);
Chris@42 344 {
Chris@42 345 V Tx, Tw, T13, T14;
Chris@42 346 Tx = VBYI(VFNMS(LDK(KP781831482), Te, VFNMS(LDK(KP433883739), Tl, VMUL(LDK(KP974927912), T7))));
Chris@42 347 Tw = VFMA(LDK(KP623489801), Tr, VFNMS(LDK(KP900968867), Tq, VFNMS(LDK(KP222520933), Ts, Tp)));
Chris@42 348 T13 = VSUB(Tw, Tx);
Chris@42 349 STM2(&(xo[10]), T13, ovs, &(xo[2]));
Chris@42 350 STN2(&(xo[8]), T11, T13, ovs);
Chris@42 351 T14 = VADD(Tx, Tw);
Chris@42 352 STM2(&(xo[18]), T14, ovs, &(xo[2]));
Chris@42 353 STN2(&(xo[16]), T10, T14, ovs);
Chris@42 354 }
Chris@42 355 }
Chris@42 356 }
Chris@42 357 }
Chris@42 358 }
Chris@42 359 }
Chris@42 360 VLEAVE();
Chris@42 361 }
Chris@42 362
Chris@42 363 static const kdft_desc desc = { 14, XSIMD_STRING("n2bv_14"), {50, 12, 24, 0}, &GENUS, 0, 2, 0, 0 };
Chris@42 364
Chris@42 365 void XSIMD(codelet_n2bv_14) (planner *p) {
Chris@42 366 X(kdft_register) (p, n2bv_14, &desc);
Chris@42 367 }
Chris@42 368
Chris@42 369 #endif /* HAVE_FMA */