annotate src/fftw-3.3.5/dft/simd/common/n1bv_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:39:14 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include n1b.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 88 FP additions, 63 FP multiplications,
Chris@42 32 * (or, 31 additions, 6 multiplications, 57 fused multiply/add),
Chris@42 33 * 96 stack variables, 23 constants, and 26 memory accesses
Chris@42 34 */
Chris@42 35 #include "n1b.h"
Chris@42 36
Chris@42 37 static void n1bv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DVK(KP904176221, +0.904176221990848204433795481776887926501523162);
Chris@42 40 DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@42 41 DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@42 42 DVK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@42 43 DVK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@42 44 DVK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@42 45 DVK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@42 46 DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@42 47 DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@42 48 DVK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@42 49 DVK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@42 50 DVK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@42 51 DVK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@42 52 DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@42 53 DVK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@42 54 DVK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@42 55 DVK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@42 56 DVK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@42 57 DVK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@42 58 DVK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@42 59 DVK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@42 60 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 61 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 62 {
Chris@42 63 INT i;
Chris@42 64 const R *xi;
Chris@42 65 R *xo;
Chris@42 66 xi = ii;
Chris@42 67 xo = io;
Chris@42 68 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) {
Chris@42 69 V T1, T7, T2, Tg, Tf, TN, Th, Tq, Ta, Tj, T5, Tr, Tk;
Chris@42 70 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 71 {
Chris@42 72 V Td, Te, T8, T9, T3, T4;
Chris@42 73 Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 74 Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 75 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@42 76 T8 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@42 77 T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 78 T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 79 T3 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 80 T4 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 81 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@42 82 Tf = VADD(Td, Te);
Chris@42 83 TN = VSUB(Td, Te);
Chris@42 84 Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 85 Tq = VSUB(T8, T9);
Chris@42 86 Ta = VADD(T8, T9);
Chris@42 87 Tj = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 88 T5 = VADD(T3, T4);
Chris@42 89 Tr = VSUB(T4, T3);
Chris@42 90 Tk = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 91 }
Chris@42 92 {
Chris@42 93 V Tt, Ti, Ty, Tb, Ts, TQ, Tx, T6, Tu, Tl;
Chris@42 94 Tt = VSUB(Tg, Th);
Chris@42 95 Ti = VADD(Tg, Th);
Chris@42 96 Ty = VFMS(LDK(KP500000000), Ta, T7);
Chris@42 97 Tb = VADD(T7, Ta);
Chris@42 98 Ts = VSUB(Tq, Tr);
Chris@42 99 TQ = VADD(Tr, Tq);
Chris@42 100 Tx = VFNMS(LDK(KP500000000), T5, T2);
Chris@42 101 T6 = VADD(T2, T5);
Chris@42 102 Tu = VSUB(Tj, Tk);
Chris@42 103 Tl = VADD(Tj, Tk);
Chris@42 104 {
Chris@42 105 V TK, Tz, Tc, TX, Tv, TO, TL, Tm;
Chris@42 106 TK = VADD(Tx, Ty);
Chris@42 107 Tz = VSUB(Tx, Ty);
Chris@42 108 Tc = VADD(T6, Tb);
Chris@42 109 TX = VSUB(T6, Tb);
Chris@42 110 Tv = VSUB(Tt, Tu);
Chris@42 111 TO = VADD(Tt, Tu);
Chris@42 112 TL = VSUB(Ti, Tl);
Chris@42 113 Tm = VADD(Ti, Tl);
Chris@42 114 {
Chris@42 115 V TF, Tw, TP, TY, TT, TM, TA, Tn;
Chris@42 116 TF = VSUB(Ts, Tv);
Chris@42 117 Tw = VADD(Ts, Tv);
Chris@42 118 TP = VFNMS(LDK(KP500000000), TO, TN);
Chris@42 119 TY = VADD(TN, TO);
Chris@42 120 TT = VFNMS(LDK(KP866025403), TL, TK);
Chris@42 121 TM = VFMA(LDK(KP866025403), TL, TK);
Chris@42 122 TA = VFNMS(LDK(KP500000000), Tm, Tf);
Chris@42 123 Tn = VADD(Tf, Tm);
Chris@42 124 {
Chris@42 125 V T1f, T1n, TI, T18, T1k, T1c, TD, T17, T10, T1m, T16, T1e, TU, TR;
Chris@42 126 TU = VFNMS(LDK(KP866025403), TQ, TP);
Chris@42 127 TR = VFMA(LDK(KP866025403), TQ, TP);
Chris@42 128 {
Chris@42 129 V TZ, T15, TE, TB;
Chris@42 130 TZ = VFMA(LDK(KP302775637), TY, TX);
Chris@42 131 T15 = VFNMS(LDK(KP302775637), TX, TY);
Chris@42 132 TE = VSUB(Tz, TA);
Chris@42 133 TB = VADD(Tz, TA);
Chris@42 134 {
Chris@42 135 V TH, To, TV, T13;
Chris@42 136 TH = VSUB(Tc, Tn);
Chris@42 137 To = VADD(Tc, Tn);
Chris@42 138 TV = VFNMS(LDK(KP612264650), TU, TT);
Chris@42 139 T13 = VFMA(LDK(KP612264650), TT, TU);
Chris@42 140 {
Chris@42 141 V TS, T12, TG, T1b;
Chris@42 142 TS = VFNMS(LDK(KP038632954), TR, TM);
Chris@42 143 T12 = VFMA(LDK(KP038632954), TM, TR);
Chris@42 144 TG = VFNMS(LDK(KP514918778), TF, TE);
Chris@42 145 T1b = VFMA(LDK(KP686558370), TE, TF);
Chris@42 146 {
Chris@42 147 V TC, T1a, Tp, TW, T14;
Chris@42 148 TC = VFMA(LDK(KP301479260), TB, Tw);
Chris@42 149 T1a = VFNMS(LDK(KP226109445), Tw, TB);
Chris@42 150 Tp = VFNMS(LDK(KP083333333), To, T1);
Chris@42 151 ST(&(xo[0]), VADD(T1, To), ovs, &(xo[0]));
Chris@42 152 T1f = VFMA(LDK(KP853480001), TV, TS);
Chris@42 153 TW = VFNMS(LDK(KP853480001), TV, TS);
Chris@42 154 T1n = VFMA(LDK(KP853480001), T13, T12);
Chris@42 155 T14 = VFNMS(LDK(KP853480001), T13, T12);
Chris@42 156 TI = VFMA(LDK(KP581704778), TH, TG);
Chris@42 157 T18 = VFNMS(LDK(KP859542535), TG, TH);
Chris@42 158 T1k = VFMA(LDK(KP769338817), T1b, T1a);
Chris@42 159 T1c = VFNMS(LDK(KP769338817), T1b, T1a);
Chris@42 160 TD = VFMA(LDK(KP503537032), TC, Tp);
Chris@42 161 T17 = VFNMS(LDK(KP251768516), TC, Tp);
Chris@42 162 T10 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), TZ, TW));
Chris@42 163 T1m = VFNMS(LDK(KP522026385), TW, TZ);
Chris@42 164 T16 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), T15, T14));
Chris@42 165 T1e = VFNMS(LDK(KP522026385), T14, T15);
Chris@42 166 }
Chris@42 167 }
Chris@42 168 }
Chris@42 169 }
Chris@42 170 {
Chris@42 171 V T1o, T1q, T1g, T1i, T1d, T1h, T1l, T1p;
Chris@42 172 {
Chris@42 173 V T11, TJ, T19, T1j;
Chris@42 174 T11 = VFMA(LDK(KP516520780), TI, TD);
Chris@42 175 TJ = VFNMS(LDK(KP516520780), TI, TD);
Chris@42 176 T19 = VFMA(LDK(KP300462606), T18, T17);
Chris@42 177 T1j = VFNMS(LDK(KP300462606), T18, T17);
Chris@42 178 T1o = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1n, T1m));
Chris@42 179 T1q = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1n, T1m));
Chris@42 180 T1g = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1f, T1e));
Chris@42 181 T1i = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1f, T1e));
Chris@42 182 ST(&(xo[WS(os, 12)]), VFMAI(T16, T11), ovs, &(xo[0]));
Chris@42 183 ST(&(xo[WS(os, 1)]), VFNMSI(T16, T11), ovs, &(xo[WS(os, 1)]));
Chris@42 184 ST(&(xo[WS(os, 8)]), VFNMSI(T10, TJ), ovs, &(xo[0]));
Chris@42 185 ST(&(xo[WS(os, 5)]), VFMAI(T10, TJ), ovs, &(xo[WS(os, 1)]));
Chris@42 186 T1d = VFNMS(LDK(KP503537032), T1c, T19);
Chris@42 187 T1h = VFMA(LDK(KP503537032), T1c, T19);
Chris@42 188 T1l = VFNMS(LDK(KP503537032), T1k, T1j);
Chris@42 189 T1p = VFMA(LDK(KP503537032), T1k, T1j);
Chris@42 190 }
Chris@42 191 ST(&(xo[WS(os, 9)]), VFNMSI(T1g, T1d), ovs, &(xo[WS(os, 1)]));
Chris@42 192 ST(&(xo[WS(os, 4)]), VFMAI(T1g, T1d), ovs, &(xo[0]));
Chris@42 193 ST(&(xo[WS(os, 10)]), VFMAI(T1i, T1h), ovs, &(xo[0]));
Chris@42 194 ST(&(xo[WS(os, 3)]), VFNMSI(T1i, T1h), ovs, &(xo[WS(os, 1)]));
Chris@42 195 ST(&(xo[WS(os, 7)]), VFNMSI(T1o, T1l), ovs, &(xo[WS(os, 1)]));
Chris@42 196 ST(&(xo[WS(os, 6)]), VFMAI(T1o, T1l), ovs, &(xo[0]));
Chris@42 197 ST(&(xo[WS(os, 11)]), VFNMSI(T1q, T1p), ovs, &(xo[WS(os, 1)]));
Chris@42 198 ST(&(xo[WS(os, 2)]), VFMAI(T1q, T1p), ovs, &(xo[0]));
Chris@42 199 }
Chris@42 200 }
Chris@42 201 }
Chris@42 202 }
Chris@42 203 }
Chris@42 204 }
Chris@42 205 }
Chris@42 206 VLEAVE();
Chris@42 207 }
Chris@42 208
Chris@42 209 static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 };
Chris@42 210
Chris@42 211 void XSIMD(codelet_n1bv_13) (planner *p) {
Chris@42 212 X(kdft_register) (p, n1bv_13, &desc);
Chris@42 213 }
Chris@42 214
Chris@42 215 #else /* HAVE_FMA */
Chris@42 216
Chris@42 217 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include n1b.h */
Chris@42 218
Chris@42 219 /*
Chris@42 220 * This function contains 88 FP additions, 34 FP multiplications,
Chris@42 221 * (or, 69 additions, 15 multiplications, 19 fused multiply/add),
Chris@42 222 * 60 stack variables, 20 constants, and 26 memory accesses
Chris@42 223 */
Chris@42 224 #include "n1b.h"
Chris@42 225
Chris@42 226 static void n1bv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 227 {
Chris@42 228 DVK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 229 DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@42 230 DVK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@42 231 DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@42 232 DVK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@42 233 DVK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@42 234 DVK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@42 235 DVK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@42 236 DVK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@42 237 DVK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@42 238 DVK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@42 239 DVK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@42 240 DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@42 241 DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@42 242 DVK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@42 243 DVK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@42 244 DVK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@42 245 DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@42 246 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 247 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 248 {
Chris@42 249 INT i;
Chris@42 250 const R *xi;
Chris@42 251 R *xo;
Chris@42 252 xi = ii;
Chris@42 253 xo = io;
Chris@42 254 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) {
Chris@42 255 V TW, Tb, Tm, Ts, TB, TR, TX, TK, TU, Tz, TC, TN, TT;
Chris@42 256 TW = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 257 {
Chris@42 258 V Te, TH, Ta, Tu, Tp, T5, Tt, To, Th, Tw, Tk, Tx, Tl, TI, Tc;
Chris@42 259 V Td, Tq, Tr;
Chris@42 260 Tc = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 261 Td = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 262 Te = VSUB(Tc, Td);
Chris@42 263 TH = VADD(Tc, Td);
Chris@42 264 {
Chris@42 265 V T6, T7, T8, T9;
Chris@42 266 T6 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@42 267 T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@42 268 T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 269 T9 = VADD(T7, T8);
Chris@42 270 Ta = VADD(T6, T9);
Chris@42 271 Tu = VFNMS(LDK(KP500000000), T9, T6);
Chris@42 272 Tp = VSUB(T7, T8);
Chris@42 273 }
Chris@42 274 {
Chris@42 275 V T1, T2, T3, T4;
Chris@42 276 T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 277 T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 278 T3 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 279 T4 = VADD(T2, T3);
Chris@42 280 T5 = VADD(T1, T4);
Chris@42 281 Tt = VFNMS(LDK(KP500000000), T4, T1);
Chris@42 282 To = VSUB(T2, T3);
Chris@42 283 }
Chris@42 284 {
Chris@42 285 V Tf, Tg, Ti, Tj;
Chris@42 286 Tf = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@42 287 Tg = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 288 Th = VSUB(Tf, Tg);
Chris@42 289 Tw = VADD(Tf, Tg);
Chris@42 290 Ti = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 291 Tj = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 292 Tk = VSUB(Ti, Tj);
Chris@42 293 Tx = VADD(Ti, Tj);
Chris@42 294 }
Chris@42 295 Tl = VADD(Th, Tk);
Chris@42 296 TI = VADD(Tw, Tx);
Chris@42 297 Tb = VSUB(T5, Ta);
Chris@42 298 Tm = VADD(Te, Tl);
Chris@42 299 Tq = VMUL(LDK(KP866025403), VSUB(To, Tp));
Chris@42 300 Tr = VFNMS(LDK(KP500000000), Tl, Te);
Chris@42 301 Ts = VADD(Tq, Tr);
Chris@42 302 TB = VSUB(Tq, Tr);
Chris@42 303 {
Chris@42 304 V TP, TQ, TG, TJ;
Chris@42 305 TP = VADD(T5, Ta);
Chris@42 306 TQ = VADD(TH, TI);
Chris@42 307 TR = VMUL(LDK(KP300462606), VSUB(TP, TQ));
Chris@42 308 TX = VADD(TP, TQ);
Chris@42 309 TG = VADD(Tt, Tu);
Chris@42 310 TJ = VFNMS(LDK(KP500000000), TI, TH);
Chris@42 311 TK = VSUB(TG, TJ);
Chris@42 312 TU = VADD(TG, TJ);
Chris@42 313 }
Chris@42 314 {
Chris@42 315 V Tv, Ty, TL, TM;
Chris@42 316 Tv = VSUB(Tt, Tu);
Chris@42 317 Ty = VMUL(LDK(KP866025403), VSUB(Tw, Tx));
Chris@42 318 Tz = VSUB(Tv, Ty);
Chris@42 319 TC = VADD(Tv, Ty);
Chris@42 320 TL = VADD(To, Tp);
Chris@42 321 TM = VSUB(Th, Tk);
Chris@42 322 TN = VSUB(TL, TM);
Chris@42 323 TT = VADD(TL, TM);
Chris@42 324 }
Chris@42 325 }
Chris@42 326 ST(&(xo[0]), VADD(TW, TX), ovs, &(xo[0]));
Chris@42 327 {
Chris@42 328 V T1c, T1n, T11, T14, T17, T1k, Tn, TE, T18, T1j, TS, T1m, TZ, T1f, TA;
Chris@42 329 V TD;
Chris@42 330 {
Chris@42 331 V T1a, T1b, T12, T13;
Chris@42 332 T1a = VFMA(LDK(KP387390585), TN, VMUL(LDK(KP265966249), TK));
Chris@42 333 T1b = VFNMS(LDK(KP503537032), TU, VMUL(LDK(KP113854479), TT));
Chris@42 334 T1c = VSUB(T1a, T1b);
Chris@42 335 T1n = VADD(T1a, T1b);
Chris@42 336 T11 = VFMA(LDK(KP575140729), Tb, VMUL(LDK(KP174138601), Tm));
Chris@42 337 T12 = VFNMS(LDK(KP256247671), Tz, VMUL(LDK(KP156891391), Ts));
Chris@42 338 T13 = VFMA(LDK(KP011599105), TB, VMUL(LDK(KP300238635), TC));
Chris@42 339 T14 = VADD(T12, T13);
Chris@42 340 T17 = VSUB(T11, T14);
Chris@42 341 T1k = VMUL(LDK(KP1_732050807), VSUB(T12, T13));
Chris@42 342 }
Chris@42 343 Tn = VFNMS(LDK(KP575140729), Tm, VMUL(LDK(KP174138601), Tb));
Chris@42 344 TA = VFMA(LDK(KP256247671), Ts, VMUL(LDK(KP156891391), Tz));
Chris@42 345 TD = VFNMS(LDK(KP011599105), TC, VMUL(LDK(KP300238635), TB));
Chris@42 346 TE = VADD(TA, TD);
Chris@42 347 T18 = VMUL(LDK(KP1_732050807), VSUB(TD, TA));
Chris@42 348 T1j = VSUB(Tn, TE);
Chris@42 349 {
Chris@42 350 V TO, T1e, TV, TY, T1d;
Chris@42 351 TO = VFNMS(LDK(KP132983124), TN, VMUL(LDK(KP258260390), TK));
Chris@42 352 T1e = VSUB(TR, TO);
Chris@42 353 TV = VFMA(LDK(KP251768516), TT, VMUL(LDK(KP075902986), TU));
Chris@42 354 TY = VFNMS(LDK(KP083333333), TX, TW);
Chris@42 355 T1d = VSUB(TY, TV);
Chris@42 356 TS = VFMA(LDK(KP2_000000000), TO, TR);
Chris@42 357 T1m = VADD(T1e, T1d);
Chris@42 358 TZ = VFMA(LDK(KP2_000000000), TV, TY);
Chris@42 359 T1f = VSUB(T1d, T1e);
Chris@42 360 }
Chris@42 361 {
Chris@42 362 V TF, T10, T1l, T1o;
Chris@42 363 TF = VBYI(VFMA(LDK(KP2_000000000), TE, Tn));
Chris@42 364 T10 = VADD(TS, TZ);
Chris@42 365 ST(&(xo[WS(os, 1)]), VADD(TF, T10), ovs, &(xo[WS(os, 1)]));
Chris@42 366 ST(&(xo[WS(os, 12)]), VSUB(T10, TF), ovs, &(xo[0]));
Chris@42 367 {
Chris@42 368 V T15, T16, T1p, T1q;
Chris@42 369 T15 = VBYI(VFMA(LDK(KP2_000000000), T14, T11));
Chris@42 370 T16 = VSUB(TZ, TS);
Chris@42 371 ST(&(xo[WS(os, 5)]), VADD(T15, T16), ovs, &(xo[WS(os, 1)]));
Chris@42 372 ST(&(xo[WS(os, 8)]), VSUB(T16, T15), ovs, &(xo[0]));
Chris@42 373 T1p = VADD(T1n, T1m);
Chris@42 374 T1q = VBYI(VADD(T1j, T1k));
Chris@42 375 ST(&(xo[WS(os, 4)]), VSUB(T1p, T1q), ovs, &(xo[0]));
Chris@42 376 ST(&(xo[WS(os, 9)]), VADD(T1q, T1p), ovs, &(xo[WS(os, 1)]));
Chris@42 377 }
Chris@42 378 T1l = VBYI(VSUB(T1j, T1k));
Chris@42 379 T1o = VSUB(T1m, T1n);
Chris@42 380 ST(&(xo[WS(os, 3)]), VADD(T1l, T1o), ovs, &(xo[WS(os, 1)]));
Chris@42 381 ST(&(xo[WS(os, 10)]), VSUB(T1o, T1l), ovs, &(xo[0]));
Chris@42 382 {
Chris@42 383 V T1h, T1i, T19, T1g;
Chris@42 384 T1h = VBYI(VADD(T18, T17));
Chris@42 385 T1i = VSUB(T1f, T1c);
Chris@42 386 ST(&(xo[WS(os, 6)]), VADD(T1h, T1i), ovs, &(xo[0]));
Chris@42 387 ST(&(xo[WS(os, 7)]), VSUB(T1i, T1h), ovs, &(xo[WS(os, 1)]));
Chris@42 388 T19 = VBYI(VSUB(T17, T18));
Chris@42 389 T1g = VADD(T1c, T1f);
Chris@42 390 ST(&(xo[WS(os, 2)]), VADD(T19, T1g), ovs, &(xo[0]));
Chris@42 391 ST(&(xo[WS(os, 11)]), VSUB(T1g, T19), ovs, &(xo[WS(os, 1)]));
Chris@42 392 }
Chris@42 393 }
Chris@42 394 }
Chris@42 395 }
Chris@42 396 }
Chris@42 397 VLEAVE();
Chris@42 398 }
Chris@42 399
Chris@42 400 static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {69, 15, 19, 0}, &GENUS, 0, 0, 0, 0 };
Chris@42 401
Chris@42 402 void XSIMD(codelet_n1bv_13) (planner *p) {
Chris@42 403 X(kdft_register) (p, n1bv_13, &desc);
Chris@42 404 }
Chris@42 405
Chris@42 406 #endif /* HAVE_FMA */