annotate src/fftw-3.3.8/dft/simd/common/n2bv_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:05:11 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n2bv_14 -with-ostride 2 -include dft/simd/n2b.h -store-multiple 2 */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 74 FP additions, 48 FP multiplications,
Chris@82 32 * (or, 32 additions, 6 multiplications, 42 fused multiply/add),
Chris@82 33 * 51 stack variables, 6 constants, and 35 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/n2b.h"
Chris@82 36
Chris@82 37 static void n2bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DVK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@82 40 DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 41 DVK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@82 42 DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 43 DVK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@82 44 DVK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@82 45 {
Chris@82 46 INT i;
Chris@82 47 const R *xi;
Chris@82 48 R *xo;
Chris@82 49 xi = ii;
Chris@82 50 xo = io;
Chris@82 51 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@82 52 V T3, TH, Ts, TV, TW, Tt, Tu, TU, Ta, To, Th, Tp, TC, Tx, TK;
Chris@82 53 V TQ, TN, TR, T14, TZ, T1, T2;
Chris@82 54 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 55 T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 56 T3 = VSUB(T1, T2);
Chris@82 57 TH = VADD(T1, T2);
Chris@82 58 {
Chris@82 59 V T6, TI, T9, TJ, Tn, TP, Tk, TO, Tg, TM, Td, TL;
Chris@82 60 {
Chris@82 61 V T4, T5, Ti, Tj;
Chris@82 62 T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 63 T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 64 T6 = VSUB(T4, T5);
Chris@82 65 TI = VADD(T4, T5);
Chris@82 66 {
Chris@82 67 V T7, T8, Tl, Tm;
Chris@82 68 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 69 T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 70 T9 = VSUB(T7, T8);
Chris@82 71 TJ = VADD(T7, T8);
Chris@82 72 Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 73 Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 74 Tn = VSUB(Tl, Tm);
Chris@82 75 TP = VADD(Tl, Tm);
Chris@82 76 }
Chris@82 77 Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 78 Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 79 Tk = VSUB(Ti, Tj);
Chris@82 80 TO = VADD(Ti, Tj);
Chris@82 81 {
Chris@82 82 V Te, Tf, Tb, Tc;
Chris@82 83 Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 84 Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 85 Tg = VSUB(Te, Tf);
Chris@82 86 TM = VADD(Te, Tf);
Chris@82 87 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 88 Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 89 Td = VSUB(Tb, Tc);
Chris@82 90 TL = VADD(Tb, Tc);
Chris@82 91 }
Chris@82 92 }
Chris@82 93 Ts = VSUB(Tk, Tn);
Chris@82 94 TV = VSUB(TP, TO);
Chris@82 95 TW = VSUB(TM, TL);
Chris@82 96 Tt = VSUB(Td, Tg);
Chris@82 97 Tu = VSUB(T6, T9);
Chris@82 98 TU = VSUB(TI, TJ);
Chris@82 99 Ta = VADD(T6, T9);
Chris@82 100 To = VADD(Tk, Tn);
Chris@82 101 Th = VADD(Td, Tg);
Chris@82 102 Tp = VFNMS(LDK(KP356895867), To, Th);
Chris@82 103 TC = VFNMS(LDK(KP356895867), Th, Ta);
Chris@82 104 Tx = VFNMS(LDK(KP356895867), Ta, To);
Chris@82 105 TK = VADD(TI, TJ);
Chris@82 106 TQ = VADD(TO, TP);
Chris@82 107 TN = VADD(TL, TM);
Chris@82 108 TR = VFNMS(LDK(KP356895867), TK, TQ);
Chris@82 109 T14 = VFNMS(LDK(KP356895867), TQ, TN);
Chris@82 110 TZ = VFNMS(LDK(KP356895867), TN, TK);
Chris@82 111 }
Chris@82 112 {
Chris@82 113 V T19, T1a, T1b, T1e, T1c, T1g, T1h;
Chris@82 114 T19 = VADD(T3, VADD(Ta, VADD(Th, To)));
Chris@82 115 STM2(&(xo[14]), T19, ovs, &(xo[2]));
Chris@82 116 T1a = VADD(TH, VADD(TK, VADD(TN, TQ)));
Chris@82 117 STM2(&(xo[0]), T1a, ovs, &(xo[0]));
Chris@82 118 {
Chris@82 119 V Tr, Tw, Tq, Tv;
Chris@82 120 Tq = VFNMS(LDK(KP692021471), Tp, Ta);
Chris@82 121 Tr = VFNMS(LDK(KP900968867), Tq, T3);
Chris@82 122 Tv = VFNMS(LDK(KP554958132), Tu, Tt);
Chris@82 123 Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts));
Chris@82 124 T1b = VFMAI(Tw, Tr);
Chris@82 125 STM2(&(xo[6]), T1b, ovs, &(xo[2]));
Chris@82 126 T1c = VFNMSI(Tw, Tr);
Chris@82 127 STM2(&(xo[22]), T1c, ovs, &(xo[2]));
Chris@82 128 }
Chris@82 129 {
Chris@82 130 V T16, T18, T15, T17, T1d;
Chris@82 131 T15 = VFNMS(LDK(KP692021471), T14, TK);
Chris@82 132 T16 = VFNMS(LDK(KP900968867), T15, TH);
Chris@82 133 T17 = VFMA(LDK(KP554958132), TU, TW);
Chris@82 134 T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV));
Chris@82 135 T1d = VFNMSI(T18, T16);
Chris@82 136 STM2(&(xo[20]), T1d, ovs, &(xo[0]));
Chris@82 137 STN2(&(xo[20]), T1d, T1c, ovs);
Chris@82 138 T1e = VFMAI(T18, T16);
Chris@82 139 STM2(&(xo[8]), T1e, ovs, &(xo[0]));
Chris@82 140 }
Chris@82 141 {
Chris@82 142 V Tz, TB, Ty, TA, T1f;
Chris@82 143 Ty = VFNMS(LDK(KP692021471), Tx, Th);
Chris@82 144 Tz = VFNMS(LDK(KP900968867), Ty, T3);
Chris@82 145 TA = VFMA(LDK(KP554958132), Tt, Ts);
Chris@82 146 TB = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TA, Tu));
Chris@82 147 T1f = VFNMSI(TB, Tz);
Chris@82 148 STM2(&(xo[10]), T1f, ovs, &(xo[2]));
Chris@82 149 STN2(&(xo[8]), T1e, T1f, ovs);
Chris@82 150 T1g = VFMAI(TB, Tz);
Chris@82 151 STM2(&(xo[18]), T1g, ovs, &(xo[2]));
Chris@82 152 }
Chris@82 153 {
Chris@82 154 V TT, TY, TS, TX, T1i;
Chris@82 155 TS = VFNMS(LDK(KP692021471), TR, TN);
Chris@82 156 TT = VFNMS(LDK(KP900968867), TS, TH);
Chris@82 157 TX = VFMA(LDK(KP554958132), TW, TV);
Chris@82 158 TY = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TX, TU));
Chris@82 159 T1h = VFNMSI(TY, TT);
Chris@82 160 STM2(&(xo[24]), T1h, ovs, &(xo[0]));
Chris@82 161 T1i = VFMAI(TY, TT);
Chris@82 162 STM2(&(xo[4]), T1i, ovs, &(xo[0]));
Chris@82 163 STN2(&(xo[4]), T1i, T1b, ovs);
Chris@82 164 }
Chris@82 165 {
Chris@82 166 V T11, T13, T10, T12, T1j, T1k;
Chris@82 167 T10 = VFNMS(LDK(KP692021471), TZ, TQ);
Chris@82 168 T11 = VFNMS(LDK(KP900968867), T10, TH);
Chris@82 169 T12 = VFNMS(LDK(KP554958132), TV, TU);
Chris@82 170 T13 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T12, TW));
Chris@82 171 T1j = VFNMSI(T13, T11);
Chris@82 172 STM2(&(xo[16]), T1j, ovs, &(xo[0]));
Chris@82 173 STN2(&(xo[16]), T1j, T1g, ovs);
Chris@82 174 T1k = VFMAI(T13, T11);
Chris@82 175 STM2(&(xo[12]), T1k, ovs, &(xo[0]));
Chris@82 176 STN2(&(xo[12]), T1k, T19, ovs);
Chris@82 177 }
Chris@82 178 {
Chris@82 179 V TE, TG, TD, TF, T1l, T1m;
Chris@82 180 TD = VFNMS(LDK(KP692021471), TC, To);
Chris@82 181 TE = VFNMS(LDK(KP900968867), TD, T3);
Chris@82 182 TF = VFMA(LDK(KP554958132), Ts, Tu);
Chris@82 183 TG = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TF, Tt));
Chris@82 184 T1l = VFMAI(TG, TE);
Chris@82 185 STM2(&(xo[2]), T1l, ovs, &(xo[2]));
Chris@82 186 STN2(&(xo[0]), T1a, T1l, ovs);
Chris@82 187 T1m = VFNMSI(TG, TE);
Chris@82 188 STM2(&(xo[26]), T1m, ovs, &(xo[2]));
Chris@82 189 STN2(&(xo[24]), T1h, T1m, ovs);
Chris@82 190 }
Chris@82 191 }
Chris@82 192 }
Chris@82 193 }
Chris@82 194 VLEAVE();
Chris@82 195 }
Chris@82 196
Chris@82 197 static const kdft_desc desc = { 14, XSIMD_STRING("n2bv_14"), {32, 6, 42, 0}, &GENUS, 0, 2, 0, 0 };
Chris@82 198
Chris@82 199 void XSIMD(codelet_n2bv_14) (planner *p) {
Chris@82 200 X(kdft_register) (p, n2bv_14, &desc);
Chris@82 201 }
Chris@82 202
Chris@82 203 #else
Chris@82 204
Chris@82 205 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n2bv_14 -with-ostride 2 -include dft/simd/n2b.h -store-multiple 2 */
Chris@82 206
Chris@82 207 /*
Chris@82 208 * This function contains 74 FP additions, 36 FP multiplications,
Chris@82 209 * (or, 50 additions, 12 multiplications, 24 fused multiply/add),
Chris@82 210 * 41 stack variables, 6 constants, and 35 memory accesses
Chris@82 211 */
Chris@82 212 #include "dft/simd/n2b.h"
Chris@82 213
Chris@82 214 static void n2bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 215 {
Chris@82 216 DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 217 DVK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@82 218 DVK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@82 219 DVK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@82 220 DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 221 DVK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@82 222 {
Chris@82 223 INT i;
Chris@82 224 const R *xi;
Chris@82 225 R *xo;
Chris@82 226 xi = ii;
Chris@82 227 xo = io;
Chris@82 228 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@82 229 V Tp, Ty, Tl, TL, Tq, TE, T7, TJ, Ts, TB, Te, TK, Tr, TH, Tn;
Chris@82 230 V To;
Chris@82 231 Tn = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 232 To = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 233 Tp = VSUB(Tn, To);
Chris@82 234 Ty = VADD(Tn, To);
Chris@82 235 {
Chris@82 236 V Th, TC, Tk, TD;
Chris@82 237 {
Chris@82 238 V Tf, Tg, Ti, Tj;
Chris@82 239 Tf = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 240 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 241 Th = VSUB(Tf, Tg);
Chris@82 242 TC = VADD(Tf, Tg);
Chris@82 243 Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 244 Tj = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 245 Tk = VSUB(Ti, Tj);
Chris@82 246 TD = VADD(Ti, Tj);
Chris@82 247 }
Chris@82 248 Tl = VSUB(Th, Tk);
Chris@82 249 TL = VSUB(TD, TC);
Chris@82 250 Tq = VADD(Th, Tk);
Chris@82 251 TE = VADD(TC, TD);
Chris@82 252 }
Chris@82 253 {
Chris@82 254 V T3, Tz, T6, TA;
Chris@82 255 {
Chris@82 256 V T1, T2, T4, T5;
Chris@82 257 T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 258 T2 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 259 T3 = VSUB(T1, T2);
Chris@82 260 Tz = VADD(T1, T2);
Chris@82 261 T4 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 262 T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 263 T6 = VSUB(T4, T5);
Chris@82 264 TA = VADD(T4, T5);
Chris@82 265 }
Chris@82 266 T7 = VSUB(T3, T6);
Chris@82 267 TJ = VSUB(Tz, TA);
Chris@82 268 Ts = VADD(T3, T6);
Chris@82 269 TB = VADD(Tz, TA);
Chris@82 270 }
Chris@82 271 {
Chris@82 272 V Ta, TF, Td, TG;
Chris@82 273 {
Chris@82 274 V T8, T9, Tb, Tc;
Chris@82 275 T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 276 T9 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 277 Ta = VSUB(T8, T9);
Chris@82 278 TF = VADD(T8, T9);
Chris@82 279 Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 280 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 281 Td = VSUB(Tb, Tc);
Chris@82 282 TG = VADD(Tb, Tc);
Chris@82 283 }
Chris@82 284 Te = VSUB(Ta, Td);
Chris@82 285 TK = VSUB(TG, TF);
Chris@82 286 Tr = VADD(Ta, Td);
Chris@82 287 TH = VADD(TF, TG);
Chris@82 288 }
Chris@82 289 {
Chris@82 290 V TR, TS, TU, TV;
Chris@82 291 TR = VADD(Tp, VADD(Ts, VADD(Tq, Tr)));
Chris@82 292 STM2(&(xo[14]), TR, ovs, &(xo[2]));
Chris@82 293 TS = VADD(Ty, VADD(TB, VADD(TE, TH)));
Chris@82 294 STM2(&(xo[0]), TS, ovs, &(xo[0]));
Chris@82 295 {
Chris@82 296 V TT, Tm, Tt, TQ, TP, TW;
Chris@82 297 Tm = VBYI(VFMA(LDK(KP433883739), T7, VFNMS(LDK(KP781831482), Tl, VMUL(LDK(KP974927912), Te))));
Chris@82 298 Tt = VFMA(LDK(KP623489801), Tq, VFNMS(LDK(KP222520933), Tr, VFNMS(LDK(KP900968867), Ts, Tp)));
Chris@82 299 TT = VADD(Tm, Tt);
Chris@82 300 STM2(&(xo[6]), TT, ovs, &(xo[2]));
Chris@82 301 TU = VSUB(Tt, Tm);
Chris@82 302 STM2(&(xo[22]), TU, ovs, &(xo[2]));
Chris@82 303 TQ = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK))));
Chris@82 304 TP = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TB, Ty)));
Chris@82 305 TV = VSUB(TP, TQ);
Chris@82 306 STM2(&(xo[24]), TV, ovs, &(xo[0]));
Chris@82 307 TW = VADD(TP, TQ);
Chris@82 308 STM2(&(xo[4]), TW, ovs, &(xo[0]));
Chris@82 309 STN2(&(xo[4]), TW, TT, ovs);
Chris@82 310 }
Chris@82 311 {
Chris@82 312 V T10, TM, TI, TZ;
Chris@82 313 {
Chris@82 314 V Tu, Tv, TX, TY;
Chris@82 315 Tu = VBYI(VFMA(LDK(KP781831482), T7, VFMA(LDK(KP974927912), Tl, VMUL(LDK(KP433883739), Te))));
Chris@82 316 Tv = VFMA(LDK(KP623489801), Ts, VFNMS(LDK(KP900968867), Tr, VFNMS(LDK(KP222520933), Tq, Tp)));
Chris@82 317 TX = VADD(Tu, Tv);
Chris@82 318 STM2(&(xo[2]), TX, ovs, &(xo[2]));
Chris@82 319 STN2(&(xo[0]), TS, TX, ovs);
Chris@82 320 TY = VSUB(Tv, Tu);
Chris@82 321 STM2(&(xo[26]), TY, ovs, &(xo[2]));
Chris@82 322 STN2(&(xo[24]), TV, TY, ovs);
Chris@82 323 }
Chris@82 324 TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ))));
Chris@82 325 TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TE, Ty)));
Chris@82 326 TZ = VSUB(TI, TM);
Chris@82 327 STM2(&(xo[12]), TZ, ovs, &(xo[0]));
Chris@82 328 STN2(&(xo[12]), TZ, TR, ovs);
Chris@82 329 T10 = VADD(TI, TM);
Chris@82 330 STM2(&(xo[16]), T10, ovs, &(xo[0]));
Chris@82 331 {
Chris@82 332 V T11, TO, TN, T12;
Chris@82 333 TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL))));
Chris@82 334 TN = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP222520933), TH, VFNMS(LDK(KP900968867), TB, Ty)));
Chris@82 335 T11 = VSUB(TN, TO);
Chris@82 336 STM2(&(xo[8]), T11, ovs, &(xo[0]));
Chris@82 337 T12 = VADD(TN, TO);
Chris@82 338 STM2(&(xo[20]), T12, ovs, &(xo[0]));
Chris@82 339 STN2(&(xo[20]), T12, TU, ovs);
Chris@82 340 {
Chris@82 341 V Tx, Tw, T13, T14;
Chris@82 342 Tx = VBYI(VFNMS(LDK(KP781831482), Te, VFNMS(LDK(KP433883739), Tl, VMUL(LDK(KP974927912), T7))));
Chris@82 343 Tw = VFMA(LDK(KP623489801), Tr, VFNMS(LDK(KP900968867), Tq, VFNMS(LDK(KP222520933), Ts, Tp)));
Chris@82 344 T13 = VSUB(Tw, Tx);
Chris@82 345 STM2(&(xo[10]), T13, ovs, &(xo[2]));
Chris@82 346 STN2(&(xo[8]), T11, T13, ovs);
Chris@82 347 T14 = VADD(Tx, Tw);
Chris@82 348 STM2(&(xo[18]), T14, ovs, &(xo[2]));
Chris@82 349 STN2(&(xo[16]), T10, T14, ovs);
Chris@82 350 }
Chris@82 351 }
Chris@82 352 }
Chris@82 353 }
Chris@82 354 }
Chris@82 355 }
Chris@82 356 VLEAVE();
Chris@82 357 }
Chris@82 358
Chris@82 359 static const kdft_desc desc = { 14, XSIMD_STRING("n2bv_14"), {50, 12, 24, 0}, &GENUS, 0, 2, 0, 0 };
Chris@82 360
Chris@82 361 void XSIMD(codelet_n2bv_14) (planner *p) {
Chris@82 362 X(kdft_register) (p, n2bv_14, &desc);
Chris@82 363 }
Chris@82 364
Chris@82 365 #endif