annotate src/fftw-3.3.3/dft/simd/common/n1fv_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:36:52 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 13 -name n1fv_13 -include n1f.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 88 FP additions, 63 FP multiplications,
Chris@10 32 * (or, 31 additions, 6 multiplications, 57 fused multiply/add),
Chris@10 33 * 96 stack variables, 23 constants, and 26 memory accesses
Chris@10 34 */
Chris@10 35 #include "n1f.h"
Chris@10 36
Chris@10 37 static void n1fv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 38 {
Chris@10 39 DVK(KP904176221, +0.904176221990848204433795481776887926501523162);
Chris@10 40 DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@10 41 DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@10 42 DVK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@10 43 DVK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@10 44 DVK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@10 45 DVK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@10 46 DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@10 47 DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@10 48 DVK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@10 49 DVK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@10 50 DVK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@10 51 DVK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@10 52 DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@10 53 DVK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@10 54 DVK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@10 55 DVK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@10 56 DVK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@10 57 DVK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@10 58 DVK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@10 59 DVK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@10 60 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@10 61 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@10 62 {
Chris@10 63 INT i;
Chris@10 64 const R *xi;
Chris@10 65 R *xo;
Chris@10 66 xi = ri;
Chris@10 67 xo = ro;
Chris@10 68 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) {
Chris@10 69 V T1, T7, T2, Tg, Tf, TN, Th, Tq, Ta, Tj, T5, Tr, Tk;
Chris@10 70 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@10 71 {
Chris@10 72 V Td, Te, T8, T9, T3, T4;
Chris@10 73 Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@10 74 Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@10 75 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@10 76 T8 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@10 77 T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@10 78 T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@10 79 T3 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@10 80 T4 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@10 81 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@10 82 Tf = VADD(Td, Te);
Chris@10 83 TN = VSUB(Td, Te);
Chris@10 84 Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@10 85 Tq = VSUB(T8, T9);
Chris@10 86 Ta = VADD(T8, T9);
Chris@10 87 Tj = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@10 88 T5 = VADD(T3, T4);
Chris@10 89 Tr = VSUB(T4, T3);
Chris@10 90 Tk = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@10 91 }
Chris@10 92 {
Chris@10 93 V Tt, Ti, Ty, Tb, Ts, TQ, Tx, T6, Tu, Tl;
Chris@10 94 Tt = VSUB(Tg, Th);
Chris@10 95 Ti = VADD(Tg, Th);
Chris@10 96 Ty = VFMS(LDK(KP500000000), Ta, T7);
Chris@10 97 Tb = VADD(T7, Ta);
Chris@10 98 Ts = VSUB(Tq, Tr);
Chris@10 99 TQ = VADD(Tr, Tq);
Chris@10 100 Tx = VFNMS(LDK(KP500000000), T5, T2);
Chris@10 101 T6 = VADD(T2, T5);
Chris@10 102 Tu = VSUB(Tj, Tk);
Chris@10 103 Tl = VADD(Tj, Tk);
Chris@10 104 {
Chris@10 105 V TK, Tz, Tc, TX, Tv, TO, TL, Tm;
Chris@10 106 TK = VADD(Tx, Ty);
Chris@10 107 Tz = VSUB(Tx, Ty);
Chris@10 108 Tc = VADD(T6, Tb);
Chris@10 109 TX = VSUB(T6, Tb);
Chris@10 110 Tv = VSUB(Tt, Tu);
Chris@10 111 TO = VADD(Tt, Tu);
Chris@10 112 TL = VSUB(Ti, Tl);
Chris@10 113 Tm = VADD(Ti, Tl);
Chris@10 114 {
Chris@10 115 V TF, Tw, TP, TY, TT, TM, TA, Tn;
Chris@10 116 TF = VSUB(Ts, Tv);
Chris@10 117 Tw = VADD(Ts, Tv);
Chris@10 118 TP = VFNMS(LDK(KP500000000), TO, TN);
Chris@10 119 TY = VADD(TN, TO);
Chris@10 120 TT = VFNMS(LDK(KP866025403), TL, TK);
Chris@10 121 TM = VFMA(LDK(KP866025403), TL, TK);
Chris@10 122 TA = VFNMS(LDK(KP500000000), Tm, Tf);
Chris@10 123 Tn = VADD(Tf, Tm);
Chris@10 124 {
Chris@10 125 V T1f, T1n, TI, T18, T1k, T1c, TD, T17, T10, T1m, T16, T1e, TU, TR;
Chris@10 126 TU = VFNMS(LDK(KP866025403), TQ, TP);
Chris@10 127 TR = VFMA(LDK(KP866025403), TQ, TP);
Chris@10 128 {
Chris@10 129 V TZ, T15, TE, TB;
Chris@10 130 TZ = VFMA(LDK(KP302775637), TY, TX);
Chris@10 131 T15 = VFNMS(LDK(KP302775637), TX, TY);
Chris@10 132 TE = VSUB(Tz, TA);
Chris@10 133 TB = VADD(Tz, TA);
Chris@10 134 {
Chris@10 135 V TH, To, TV, T13;
Chris@10 136 TH = VSUB(Tc, Tn);
Chris@10 137 To = VADD(Tc, Tn);
Chris@10 138 TV = VFNMS(LDK(KP612264650), TU, TT);
Chris@10 139 T13 = VFMA(LDK(KP612264650), TT, TU);
Chris@10 140 {
Chris@10 141 V TS, T12, TG, T1b;
Chris@10 142 TS = VFNMS(LDK(KP038632954), TR, TM);
Chris@10 143 T12 = VFMA(LDK(KP038632954), TM, TR);
Chris@10 144 TG = VFNMS(LDK(KP514918778), TF, TE);
Chris@10 145 T1b = VFMA(LDK(KP686558370), TE, TF);
Chris@10 146 {
Chris@10 147 V TC, T1a, Tp, TW, T14;
Chris@10 148 TC = VFMA(LDK(KP301479260), TB, Tw);
Chris@10 149 T1a = VFNMS(LDK(KP226109445), Tw, TB);
Chris@10 150 Tp = VFNMS(LDK(KP083333333), To, T1);
Chris@10 151 ST(&(xo[0]), VADD(T1, To), ovs, &(xo[0]));
Chris@10 152 T1f = VFMA(LDK(KP853480001), TV, TS);
Chris@10 153 TW = VFNMS(LDK(KP853480001), TV, TS);
Chris@10 154 T1n = VFMA(LDK(KP853480001), T13, T12);
Chris@10 155 T14 = VFNMS(LDK(KP853480001), T13, T12);
Chris@10 156 TI = VFMA(LDK(KP581704778), TH, TG);
Chris@10 157 T18 = VFNMS(LDK(KP859542535), TG, TH);
Chris@10 158 T1k = VFMA(LDK(KP769338817), T1b, T1a);
Chris@10 159 T1c = VFNMS(LDK(KP769338817), T1b, T1a);
Chris@10 160 TD = VFMA(LDK(KP503537032), TC, Tp);
Chris@10 161 T17 = VFNMS(LDK(KP251768516), TC, Tp);
Chris@10 162 T10 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), TZ, TW));
Chris@10 163 T1m = VFNMS(LDK(KP522026385), TW, TZ);
Chris@10 164 T16 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), T15, T14));
Chris@10 165 T1e = VFNMS(LDK(KP522026385), T14, T15);
Chris@10 166 }
Chris@10 167 }
Chris@10 168 }
Chris@10 169 }
Chris@10 170 {
Chris@10 171 V T1o, T1q, T1g, T1i, T1d, T1h, T1l, T1p;
Chris@10 172 {
Chris@10 173 V T11, TJ, T19, T1j;
Chris@10 174 T11 = VFMA(LDK(KP516520780), TI, TD);
Chris@10 175 TJ = VFNMS(LDK(KP516520780), TI, TD);
Chris@10 176 T19 = VFMA(LDK(KP300462606), T18, T17);
Chris@10 177 T1j = VFNMS(LDK(KP300462606), T18, T17);
Chris@10 178 T1o = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1n, T1m));
Chris@10 179 T1q = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1n, T1m));
Chris@10 180 T1g = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1f, T1e));
Chris@10 181 T1i = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1f, T1e));
Chris@10 182 ST(&(xo[WS(os, 12)]), VFNMSI(T16, T11), ovs, &(xo[0]));
Chris@10 183 ST(&(xo[WS(os, 1)]), VFMAI(T16, T11), ovs, &(xo[WS(os, 1)]));
Chris@10 184 ST(&(xo[WS(os, 8)]), VFMAI(T10, TJ), ovs, &(xo[0]));
Chris@10 185 ST(&(xo[WS(os, 5)]), VFNMSI(T10, TJ), ovs, &(xo[WS(os, 1)]));
Chris@10 186 T1d = VFNMS(LDK(KP503537032), T1c, T19);
Chris@10 187 T1h = VFMA(LDK(KP503537032), T1c, T19);
Chris@10 188 T1l = VFNMS(LDK(KP503537032), T1k, T1j);
Chris@10 189 T1p = VFMA(LDK(KP503537032), T1k, T1j);
Chris@10 190 }
Chris@10 191 ST(&(xo[WS(os, 9)]), VFMAI(T1g, T1d), ovs, &(xo[WS(os, 1)]));
Chris@10 192 ST(&(xo[WS(os, 4)]), VFNMSI(T1g, T1d), ovs, &(xo[0]));
Chris@10 193 ST(&(xo[WS(os, 10)]), VFNMSI(T1i, T1h), ovs, &(xo[0]));
Chris@10 194 ST(&(xo[WS(os, 3)]), VFMAI(T1i, T1h), ovs, &(xo[WS(os, 1)]));
Chris@10 195 ST(&(xo[WS(os, 7)]), VFMAI(T1o, T1l), ovs, &(xo[WS(os, 1)]));
Chris@10 196 ST(&(xo[WS(os, 6)]), VFNMSI(T1o, T1l), ovs, &(xo[0]));
Chris@10 197 ST(&(xo[WS(os, 11)]), VFMAI(T1q, T1p), ovs, &(xo[WS(os, 1)]));
Chris@10 198 ST(&(xo[WS(os, 2)]), VFNMSI(T1q, T1p), ovs, &(xo[0]));
Chris@10 199 }
Chris@10 200 }
Chris@10 201 }
Chris@10 202 }
Chris@10 203 }
Chris@10 204 }
Chris@10 205 }
Chris@10 206 VLEAVE();
Chris@10 207 }
Chris@10 208
Chris@10 209 static const kdft_desc desc = { 13, XSIMD_STRING("n1fv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 210
Chris@10 211 void XSIMD(codelet_n1fv_13) (planner *p) {
Chris@10 212 X(kdft_register) (p, n1fv_13, &desc);
Chris@10 213 }
Chris@10 214
Chris@10 215 #else /* HAVE_FMA */
Chris@10 216
Chris@10 217 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 13 -name n1fv_13 -include n1f.h */
Chris@10 218
Chris@10 219 /*
Chris@10 220 * This function contains 88 FP additions, 34 FP multiplications,
Chris@10 221 * (or, 69 additions, 15 multiplications, 19 fused multiply/add),
Chris@10 222 * 60 stack variables, 20 constants, and 26 memory accesses
Chris@10 223 */
Chris@10 224 #include "n1f.h"
Chris@10 225
Chris@10 226 static void n1fv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 227 {
Chris@10 228 DVK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@10 229 DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@10 230 DVK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@10 231 DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@10 232 DVK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@10 233 DVK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@10 234 DVK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@10 235 DVK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@10 236 DVK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@10 237 DVK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@10 238 DVK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@10 239 DVK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@10 240 DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@10 241 DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@10 242 DVK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@10 243 DVK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@10 244 DVK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@10 245 DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@10 246 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@10 247 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@10 248 {
Chris@10 249 INT i;
Chris@10 250 const R *xi;
Chris@10 251 R *xo;
Chris@10 252 xi = ri;
Chris@10 253 xo = ro;
Chris@10 254 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) {
Chris@10 255 V TW, Tb, Tm, Tu, TC, TR, TX, TK, TU, Tz, TB, TN, TT;
Chris@10 256 TW = LD(&(xi[0]), ivs, &(xi[0]));
Chris@10 257 {
Chris@10 258 V T3, TH, Tl, Tw, Tp, Tg, Tv, To, T6, Tr, T9, Ts, Ta, TI, T1;
Chris@10 259 V T2, Tq, Tt;
Chris@10 260 T1 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@10 261 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@10 262 T3 = VSUB(T1, T2);
Chris@10 263 TH = VADD(T1, T2);
Chris@10 264 {
Chris@10 265 V Th, Ti, Tj, Tk;
Chris@10 266 Th = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@10 267 Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@10 268 Tj = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@10 269 Tk = VADD(Ti, Tj);
Chris@10 270 Tl = VADD(Th, Tk);
Chris@10 271 Tw = VSUB(Ti, Tj);
Chris@10 272 Tp = VFNMS(LDK(KP500000000), Tk, Th);
Chris@10 273 }
Chris@10 274 {
Chris@10 275 V Tc, Td, Te, Tf;
Chris@10 276 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@10 277 Td = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@10 278 Te = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@10 279 Tf = VADD(Td, Te);
Chris@10 280 Tg = VADD(Tc, Tf);
Chris@10 281 Tv = VSUB(Td, Te);
Chris@10 282 To = VFNMS(LDK(KP500000000), Tf, Tc);
Chris@10 283 }
Chris@10 284 {
Chris@10 285 V T4, T5, T7, T8;
Chris@10 286 T4 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@10 287 T5 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@10 288 T6 = VSUB(T4, T5);
Chris@10 289 Tr = VADD(T4, T5);
Chris@10 290 T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@10 291 T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@10 292 T9 = VSUB(T7, T8);
Chris@10 293 Ts = VADD(T7, T8);
Chris@10 294 }
Chris@10 295 Ta = VADD(T6, T9);
Chris@10 296 TI = VADD(Tr, Ts);
Chris@10 297 Tb = VADD(T3, Ta);
Chris@10 298 Tm = VSUB(Tg, Tl);
Chris@10 299 Tq = VSUB(To, Tp);
Chris@10 300 Tt = VMUL(LDK(KP866025403), VSUB(Tr, Ts));
Chris@10 301 Tu = VADD(Tq, Tt);
Chris@10 302 TC = VSUB(Tq, Tt);
Chris@10 303 {
Chris@10 304 V TP, TQ, TG, TJ;
Chris@10 305 TP = VADD(Tg, Tl);
Chris@10 306 TQ = VADD(TH, TI);
Chris@10 307 TR = VMUL(LDK(KP300462606), VSUB(TP, TQ));
Chris@10 308 TX = VADD(TP, TQ);
Chris@10 309 TG = VADD(To, Tp);
Chris@10 310 TJ = VFNMS(LDK(KP500000000), TI, TH);
Chris@10 311 TK = VSUB(TG, TJ);
Chris@10 312 TU = VADD(TG, TJ);
Chris@10 313 }
Chris@10 314 {
Chris@10 315 V Tx, Ty, TL, TM;
Chris@10 316 Tx = VMUL(LDK(KP866025403), VSUB(Tv, Tw));
Chris@10 317 Ty = VFNMS(LDK(KP500000000), Ta, T3);
Chris@10 318 Tz = VSUB(Tx, Ty);
Chris@10 319 TB = VADD(Tx, Ty);
Chris@10 320 TL = VADD(Tv, Tw);
Chris@10 321 TM = VSUB(T6, T9);
Chris@10 322 TN = VSUB(TL, TM);
Chris@10 323 TT = VADD(TL, TM);
Chris@10 324 }
Chris@10 325 }
Chris@10 326 ST(&(xo[0]), VADD(TW, TX), ovs, &(xo[0]));
Chris@10 327 {
Chris@10 328 V T19, T1n, T14, T13, T1f, T1k, Tn, TE, T1e, T1j, TS, T1m, TZ, T1c, TA;
Chris@10 329 V TD;
Chris@10 330 {
Chris@10 331 V T17, T18, T11, T12;
Chris@10 332 T17 = VFMA(LDK(KP387390585), TN, VMUL(LDK(KP265966249), TK));
Chris@10 333 T18 = VFNMS(LDK(KP503537032), TU, VMUL(LDK(KP113854479), TT));
Chris@10 334 T19 = VSUB(T17, T18);
Chris@10 335 T1n = VADD(T17, T18);
Chris@10 336 T14 = VFMA(LDK(KP575140729), Tm, VMUL(LDK(KP174138601), Tb));
Chris@10 337 T11 = VFNMS(LDK(KP156891391), TB, VMUL(LDK(KP256247671), TC));
Chris@10 338 T12 = VFMA(LDK(KP011599105), Tz, VMUL(LDK(KP300238635), Tu));
Chris@10 339 T13 = VSUB(T11, T12);
Chris@10 340 T1f = VADD(T14, T13);
Chris@10 341 T1k = VMUL(LDK(KP1_732050807), VADD(T11, T12));
Chris@10 342 }
Chris@10 343 Tn = VFNMS(LDK(KP174138601), Tm, VMUL(LDK(KP575140729), Tb));
Chris@10 344 TA = VFNMS(LDK(KP300238635), Tz, VMUL(LDK(KP011599105), Tu));
Chris@10 345 TD = VFMA(LDK(KP256247671), TB, VMUL(LDK(KP156891391), TC));
Chris@10 346 TE = VSUB(TA, TD);
Chris@10 347 T1e = VMUL(LDK(KP1_732050807), VADD(TD, TA));
Chris@10 348 T1j = VSUB(Tn, TE);
Chris@10 349 {
Chris@10 350 V TO, T1b, TV, TY, T1a;
Chris@10 351 TO = VFNMS(LDK(KP132983124), TN, VMUL(LDK(KP258260390), TK));
Chris@10 352 T1b = VSUB(TR, TO);
Chris@10 353 TV = VFMA(LDK(KP251768516), TT, VMUL(LDK(KP075902986), TU));
Chris@10 354 TY = VFNMS(LDK(KP083333333), TX, TW);
Chris@10 355 T1a = VSUB(TY, TV);
Chris@10 356 TS = VFMA(LDK(KP2_000000000), TO, TR);
Chris@10 357 T1m = VADD(T1b, T1a);
Chris@10 358 TZ = VFMA(LDK(KP2_000000000), TV, TY);
Chris@10 359 T1c = VSUB(T1a, T1b);
Chris@10 360 }
Chris@10 361 {
Chris@10 362 V TF, T10, T1l, T1o;
Chris@10 363 TF = VBYI(VFMA(LDK(KP2_000000000), TE, Tn));
Chris@10 364 T10 = VADD(TS, TZ);
Chris@10 365 ST(&(xo[WS(os, 1)]), VADD(TF, T10), ovs, &(xo[WS(os, 1)]));
Chris@10 366 ST(&(xo[WS(os, 12)]), VSUB(T10, TF), ovs, &(xo[0]));
Chris@10 367 {
Chris@10 368 V T15, T16, T1p, T1q;
Chris@10 369 T15 = VBYI(VFMS(LDK(KP2_000000000), T13, T14));
Chris@10 370 T16 = VSUB(TZ, TS);
Chris@10 371 ST(&(xo[WS(os, 5)]), VADD(T15, T16), ovs, &(xo[WS(os, 1)]));
Chris@10 372 ST(&(xo[WS(os, 8)]), VSUB(T16, T15), ovs, &(xo[0]));
Chris@10 373 T1p = VADD(T1n, T1m);
Chris@10 374 T1q = VBYI(VADD(T1j, T1k));
Chris@10 375 ST(&(xo[WS(os, 4)]), VSUB(T1p, T1q), ovs, &(xo[0]));
Chris@10 376 ST(&(xo[WS(os, 9)]), VADD(T1q, T1p), ovs, &(xo[WS(os, 1)]));
Chris@10 377 }
Chris@10 378 T1l = VBYI(VSUB(T1j, T1k));
Chris@10 379 T1o = VSUB(T1m, T1n);
Chris@10 380 ST(&(xo[WS(os, 3)]), VADD(T1l, T1o), ovs, &(xo[WS(os, 1)]));
Chris@10 381 ST(&(xo[WS(os, 10)]), VSUB(T1o, T1l), ovs, &(xo[0]));
Chris@10 382 {
Chris@10 383 V T1h, T1i, T1d, T1g;
Chris@10 384 T1h = VBYI(VSUB(T1e, T1f));
Chris@10 385 T1i = VSUB(T1c, T19);
Chris@10 386 ST(&(xo[WS(os, 6)]), VADD(T1h, T1i), ovs, &(xo[0]));
Chris@10 387 ST(&(xo[WS(os, 7)]), VSUB(T1i, T1h), ovs, &(xo[WS(os, 1)]));
Chris@10 388 T1d = VADD(T19, T1c);
Chris@10 389 T1g = VBYI(VADD(T1e, T1f));
Chris@10 390 ST(&(xo[WS(os, 2)]), VSUB(T1d, T1g), ovs, &(xo[0]));
Chris@10 391 ST(&(xo[WS(os, 11)]), VADD(T1g, T1d), ovs, &(xo[WS(os, 1)]));
Chris@10 392 }
Chris@10 393 }
Chris@10 394 }
Chris@10 395 }
Chris@10 396 }
Chris@10 397 VLEAVE();
Chris@10 398 }
Chris@10 399
Chris@10 400 static const kdft_desc desc = { 13, XSIMD_STRING("n1fv_13"), {69, 15, 19, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 401
Chris@10 402 void XSIMD(codelet_n1fv_13) (planner *p) {
Chris@10 403 X(kdft_register) (p, n1fv_13, &desc);
Chris@10 404 }
Chris@10 405
Chris@10 406 #endif /* HAVE_FMA */