annotate src/fftw-3.3.8/dft/simd/common/n1bv_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:57 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include dft/simd/n1b.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 88 FP additions, 63 FP multiplications,
Chris@82 32 * (or, 31 additions, 6 multiplications, 57 fused multiply/add),
Chris@82 33 * 63 stack variables, 23 constants, and 26 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/n1b.h"
Chris@82 36
Chris@82 37 static void n1bv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DVK(KP904176221, +0.904176221990848204433795481776887926501523162);
Chris@82 40 DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@82 41 DVK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@82 42 DVK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@82 43 DVK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@82 44 DVK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@82 45 DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@82 46 DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@82 47 DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@82 48 DVK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@82 49 DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@82 50 DVK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@82 51 DVK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@82 52 DVK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@82 53 DVK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@82 54 DVK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@82 55 DVK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@82 56 DVK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@82 57 DVK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@82 58 DVK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@82 59 DVK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@82 60 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 61 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 62 {
Chris@82 63 INT i;
Chris@82 64 const R *xi;
Chris@82 65 R *xo;
Chris@82 66 xi = ii;
Chris@82 67 xo = io;
Chris@82 68 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) {
Chris@82 69 V T1, TX, TY, To, TH, TR, TU, TB, TE, Tw, TF, TM, TT;
Chris@82 70 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 71 {
Chris@82 72 V Tf, TN, Tb, Ty, Tq, T6, Tx, Tr, Ti, Tt, Tl, Tu, Tm, TO, Td;
Chris@82 73 V Te, Tc, Tn;
Chris@82 74 Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 75 Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 76 Tf = VADD(Td, Te);
Chris@82 77 TN = VSUB(Td, Te);
Chris@82 78 {
Chris@82 79 V T7, T8, T9, Ta;
Chris@82 80 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 81 T8 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 82 T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 83 Ta = VADD(T8, T9);
Chris@82 84 Tb = VADD(T7, Ta);
Chris@82 85 Ty = VFMS(LDK(KP500000000), Ta, T7);
Chris@82 86 Tq = VSUB(T8, T9);
Chris@82 87 }
Chris@82 88 {
Chris@82 89 V T2, T3, T4, T5;
Chris@82 90 T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 91 T3 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 92 T4 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 93 T5 = VADD(T3, T4);
Chris@82 94 T6 = VADD(T2, T5);
Chris@82 95 Tx = VFNMS(LDK(KP500000000), T5, T2);
Chris@82 96 Tr = VSUB(T4, T3);
Chris@82 97 }
Chris@82 98 {
Chris@82 99 V Tg, Th, Tj, Tk;
Chris@82 100 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 101 Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 102 Ti = VADD(Tg, Th);
Chris@82 103 Tt = VSUB(Tg, Th);
Chris@82 104 Tj = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 105 Tk = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 106 Tl = VADD(Tj, Tk);
Chris@82 107 Tu = VSUB(Tj, Tk);
Chris@82 108 }
Chris@82 109 Tm = VADD(Ti, Tl);
Chris@82 110 TO = VADD(Tt, Tu);
Chris@82 111 TX = VSUB(T6, Tb);
Chris@82 112 TY = VADD(TN, TO);
Chris@82 113 Tc = VADD(T6, Tb);
Chris@82 114 Tn = VADD(Tf, Tm);
Chris@82 115 To = VADD(Tc, Tn);
Chris@82 116 TH = VSUB(Tc, Tn);
Chris@82 117 {
Chris@82 118 V TP, TQ, Tz, TA;
Chris@82 119 TP = VFNMS(LDK(KP500000000), TO, TN);
Chris@82 120 TQ = VADD(Tr, Tq);
Chris@82 121 TR = VFMA(LDK(KP866025403), TQ, TP);
Chris@82 122 TU = VFNMS(LDK(KP866025403), TQ, TP);
Chris@82 123 Tz = VSUB(Tx, Ty);
Chris@82 124 TA = VFNMS(LDK(KP500000000), Tm, Tf);
Chris@82 125 TB = VADD(Tz, TA);
Chris@82 126 TE = VSUB(Tz, TA);
Chris@82 127 }
Chris@82 128 {
Chris@82 129 V Ts, Tv, TK, TL;
Chris@82 130 Ts = VSUB(Tq, Tr);
Chris@82 131 Tv = VSUB(Tt, Tu);
Chris@82 132 Tw = VADD(Ts, Tv);
Chris@82 133 TF = VSUB(Ts, Tv);
Chris@82 134 TK = VADD(Tx, Ty);
Chris@82 135 TL = VSUB(Ti, Tl);
Chris@82 136 TM = VFMA(LDK(KP866025403), TL, TK);
Chris@82 137 TT = VFNMS(LDK(KP866025403), TL, TK);
Chris@82 138 }
Chris@82 139 }
Chris@82 140 ST(&(xo[0]), VADD(T1, To), ovs, &(xo[0]));
Chris@82 141 {
Chris@82 142 V T1c, T1k, T15, T14, T1e, T1n, TZ, TW, T1f, T1m, TD, T1j, TI, T19, TS;
Chris@82 143 V TV;
Chris@82 144 {
Chris@82 145 V T1a, T1b, T12, T13;
Chris@82 146 T1a = VFNMS(LDK(KP226109445), Tw, TB);
Chris@82 147 T1b = VFMA(LDK(KP686558370), TE, TF);
Chris@82 148 T1c = VFNMS(LDK(KP769338817), T1b, T1a);
Chris@82 149 T1k = VFMA(LDK(KP769338817), T1b, T1a);
Chris@82 150 T15 = VFNMS(LDK(KP302775637), TX, TY);
Chris@82 151 T12 = VFMA(LDK(KP038632954), TM, TR);
Chris@82 152 T13 = VFMA(LDK(KP612264650), TT, TU);
Chris@82 153 T14 = VFNMS(LDK(KP853480001), T13, T12);
Chris@82 154 T1e = VFNMS(LDK(KP522026385), T14, T15);
Chris@82 155 T1n = VFMA(LDK(KP853480001), T13, T12);
Chris@82 156 }
Chris@82 157 TZ = VFMA(LDK(KP302775637), TY, TX);
Chris@82 158 TS = VFNMS(LDK(KP038632954), TR, TM);
Chris@82 159 TV = VFNMS(LDK(KP612264650), TU, TT);
Chris@82 160 TW = VFNMS(LDK(KP853480001), TV, TS);
Chris@82 161 T1f = VFMA(LDK(KP853480001), TV, TS);
Chris@82 162 T1m = VFNMS(LDK(KP522026385), TW, TZ);
Chris@82 163 {
Chris@82 164 V TG, T18, Tp, TC, T17;
Chris@82 165 TG = VFNMS(LDK(KP514918778), TF, TE);
Chris@82 166 T18 = VFNMS(LDK(KP859542535), TG, TH);
Chris@82 167 Tp = VFNMS(LDK(KP083333333), To, T1);
Chris@82 168 TC = VFMA(LDK(KP301479260), TB, Tw);
Chris@82 169 T17 = VFNMS(LDK(KP251768516), TC, Tp);
Chris@82 170 TD = VFMA(LDK(KP503537032), TC, Tp);
Chris@82 171 T1j = VFNMS(LDK(KP300462606), T18, T17);
Chris@82 172 TI = VFMA(LDK(KP581704778), TH, TG);
Chris@82 173 T19 = VFMA(LDK(KP300462606), T18, T17);
Chris@82 174 }
Chris@82 175 {
Chris@82 176 V TJ, T10, T1l, T1o;
Chris@82 177 TJ = VFNMS(LDK(KP516520780), TI, TD);
Chris@82 178 T10 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), TZ, TW));
Chris@82 179 ST(&(xo[WS(os, 5)]), VFMAI(T10, TJ), ovs, &(xo[WS(os, 1)]));
Chris@82 180 ST(&(xo[WS(os, 8)]), VFNMSI(T10, TJ), ovs, &(xo[0]));
Chris@82 181 {
Chris@82 182 V T11, T16, T1p, T1q;
Chris@82 183 T11 = VFMA(LDK(KP516520780), TI, TD);
Chris@82 184 T16 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), T15, T14));
Chris@82 185 ST(&(xo[WS(os, 1)]), VFNMSI(T16, T11), ovs, &(xo[WS(os, 1)]));
Chris@82 186 ST(&(xo[WS(os, 12)]), VFMAI(T16, T11), ovs, &(xo[0]));
Chris@82 187 T1p = VFMA(LDK(KP503537032), T1k, T1j);
Chris@82 188 T1q = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1n, T1m));
Chris@82 189 ST(&(xo[WS(os, 2)]), VFMAI(T1q, T1p), ovs, &(xo[0]));
Chris@82 190 ST(&(xo[WS(os, 11)]), VFNMSI(T1q, T1p), ovs, &(xo[WS(os, 1)]));
Chris@82 191 }
Chris@82 192 T1l = VFNMS(LDK(KP503537032), T1k, T1j);
Chris@82 193 T1o = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1n, T1m));
Chris@82 194 ST(&(xo[WS(os, 6)]), VFMAI(T1o, T1l), ovs, &(xo[0]));
Chris@82 195 ST(&(xo[WS(os, 7)]), VFNMSI(T1o, T1l), ovs, &(xo[WS(os, 1)]));
Chris@82 196 {
Chris@82 197 V T1h, T1i, T1d, T1g;
Chris@82 198 T1h = VFMA(LDK(KP503537032), T1c, T19);
Chris@82 199 T1i = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1f, T1e));
Chris@82 200 ST(&(xo[WS(os, 3)]), VFNMSI(T1i, T1h), ovs, &(xo[WS(os, 1)]));
Chris@82 201 ST(&(xo[WS(os, 10)]), VFMAI(T1i, T1h), ovs, &(xo[0]));
Chris@82 202 T1d = VFNMS(LDK(KP503537032), T1c, T19);
Chris@82 203 T1g = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1f, T1e));
Chris@82 204 ST(&(xo[WS(os, 4)]), VFMAI(T1g, T1d), ovs, &(xo[0]));
Chris@82 205 ST(&(xo[WS(os, 9)]), VFNMSI(T1g, T1d), ovs, &(xo[WS(os, 1)]));
Chris@82 206 }
Chris@82 207 }
Chris@82 208 }
Chris@82 209 }
Chris@82 210 }
Chris@82 211 VLEAVE();
Chris@82 212 }
Chris@82 213
Chris@82 214 static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 215
Chris@82 216 void XSIMD(codelet_n1bv_13) (planner *p) {
Chris@82 217 X(kdft_register) (p, n1bv_13, &desc);
Chris@82 218 }
Chris@82 219
Chris@82 220 #else
Chris@82 221
Chris@82 222 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include dft/simd/n1b.h */
Chris@82 223
Chris@82 224 /*
Chris@82 225 * This function contains 88 FP additions, 34 FP multiplications,
Chris@82 226 * (or, 69 additions, 15 multiplications, 19 fused multiply/add),
Chris@82 227 * 60 stack variables, 20 constants, and 26 memory accesses
Chris@82 228 */
Chris@82 229 #include "dft/simd/n1b.h"
Chris@82 230
Chris@82 231 static void n1bv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 232 {
Chris@82 233 DVK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 234 DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@82 235 DVK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@82 236 DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@82 237 DVK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@82 238 DVK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@82 239 DVK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 240 DVK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@82 241 DVK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@82 242 DVK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@82 243 DVK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@82 244 DVK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@82 245 DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@82 246 DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@82 247 DVK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@82 248 DVK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@82 249 DVK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@82 250 DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@82 251 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 252 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 253 {
Chris@82 254 INT i;
Chris@82 255 const R *xi;
Chris@82 256 R *xo;
Chris@82 257 xi = ii;
Chris@82 258 xo = io;
Chris@82 259 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) {
Chris@82 260 V TW, Tb, Tm, Ts, TB, TR, TX, TK, TU, Tz, TC, TN, TT;
Chris@82 261 TW = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 262 {
Chris@82 263 V Te, TH, Ta, Tu, Tp, T5, Tt, To, Th, Tw, Tk, Tx, Tl, TI, Tc;
Chris@82 264 V Td, Tq, Tr;
Chris@82 265 Tc = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 266 Td = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 267 Te = VSUB(Tc, Td);
Chris@82 268 TH = VADD(Tc, Td);
Chris@82 269 {
Chris@82 270 V T6, T7, T8, T9;
Chris@82 271 T6 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 272 T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 273 T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 274 T9 = VADD(T7, T8);
Chris@82 275 Ta = VADD(T6, T9);
Chris@82 276 Tu = VFNMS(LDK(KP500000000), T9, T6);
Chris@82 277 Tp = VSUB(T7, T8);
Chris@82 278 }
Chris@82 279 {
Chris@82 280 V T1, T2, T3, T4;
Chris@82 281 T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 282 T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 283 T3 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 284 T4 = VADD(T2, T3);
Chris@82 285 T5 = VADD(T1, T4);
Chris@82 286 Tt = VFNMS(LDK(KP500000000), T4, T1);
Chris@82 287 To = VSUB(T2, T3);
Chris@82 288 }
Chris@82 289 {
Chris@82 290 V Tf, Tg, Ti, Tj;
Chris@82 291 Tf = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 292 Tg = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 293 Th = VSUB(Tf, Tg);
Chris@82 294 Tw = VADD(Tf, Tg);
Chris@82 295 Ti = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 296 Tj = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 297 Tk = VSUB(Ti, Tj);
Chris@82 298 Tx = VADD(Ti, Tj);
Chris@82 299 }
Chris@82 300 Tl = VADD(Th, Tk);
Chris@82 301 TI = VADD(Tw, Tx);
Chris@82 302 Tb = VSUB(T5, Ta);
Chris@82 303 Tm = VADD(Te, Tl);
Chris@82 304 Tq = VMUL(LDK(KP866025403), VSUB(To, Tp));
Chris@82 305 Tr = VFNMS(LDK(KP500000000), Tl, Te);
Chris@82 306 Ts = VADD(Tq, Tr);
Chris@82 307 TB = VSUB(Tq, Tr);
Chris@82 308 {
Chris@82 309 V TP, TQ, TG, TJ;
Chris@82 310 TP = VADD(T5, Ta);
Chris@82 311 TQ = VADD(TH, TI);
Chris@82 312 TR = VMUL(LDK(KP300462606), VSUB(TP, TQ));
Chris@82 313 TX = VADD(TP, TQ);
Chris@82 314 TG = VADD(Tt, Tu);
Chris@82 315 TJ = VFNMS(LDK(KP500000000), TI, TH);
Chris@82 316 TK = VSUB(TG, TJ);
Chris@82 317 TU = VADD(TG, TJ);
Chris@82 318 }
Chris@82 319 {
Chris@82 320 V Tv, Ty, TL, TM;
Chris@82 321 Tv = VSUB(Tt, Tu);
Chris@82 322 Ty = VMUL(LDK(KP866025403), VSUB(Tw, Tx));
Chris@82 323 Tz = VSUB(Tv, Ty);
Chris@82 324 TC = VADD(Tv, Ty);
Chris@82 325 TL = VADD(To, Tp);
Chris@82 326 TM = VSUB(Th, Tk);
Chris@82 327 TN = VSUB(TL, TM);
Chris@82 328 TT = VADD(TL, TM);
Chris@82 329 }
Chris@82 330 }
Chris@82 331 ST(&(xo[0]), VADD(TW, TX), ovs, &(xo[0]));
Chris@82 332 {
Chris@82 333 V T1c, T1n, T11, T14, T17, T1k, Tn, TE, T18, T1j, TS, T1m, TZ, T1f, TA;
Chris@82 334 V TD;
Chris@82 335 {
Chris@82 336 V T1a, T1b, T12, T13;
Chris@82 337 T1a = VFMA(LDK(KP387390585), TN, VMUL(LDK(KP265966249), TK));
Chris@82 338 T1b = VFNMS(LDK(KP503537032), TU, VMUL(LDK(KP113854479), TT));
Chris@82 339 T1c = VSUB(T1a, T1b);
Chris@82 340 T1n = VADD(T1a, T1b);
Chris@82 341 T11 = VFMA(LDK(KP575140729), Tb, VMUL(LDK(KP174138601), Tm));
Chris@82 342 T12 = VFNMS(LDK(KP256247671), Tz, VMUL(LDK(KP156891391), Ts));
Chris@82 343 T13 = VFMA(LDK(KP011599105), TB, VMUL(LDK(KP300238635), TC));
Chris@82 344 T14 = VADD(T12, T13);
Chris@82 345 T17 = VSUB(T11, T14);
Chris@82 346 T1k = VMUL(LDK(KP1_732050807), VSUB(T12, T13));
Chris@82 347 }
Chris@82 348 Tn = VFNMS(LDK(KP575140729), Tm, VMUL(LDK(KP174138601), Tb));
Chris@82 349 TA = VFMA(LDK(KP256247671), Ts, VMUL(LDK(KP156891391), Tz));
Chris@82 350 TD = VFNMS(LDK(KP011599105), TC, VMUL(LDK(KP300238635), TB));
Chris@82 351 TE = VADD(TA, TD);
Chris@82 352 T18 = VMUL(LDK(KP1_732050807), VSUB(TD, TA));
Chris@82 353 T1j = VSUB(Tn, TE);
Chris@82 354 {
Chris@82 355 V TO, T1e, TV, TY, T1d;
Chris@82 356 TO = VFNMS(LDK(KP132983124), TN, VMUL(LDK(KP258260390), TK));
Chris@82 357 T1e = VSUB(TR, TO);
Chris@82 358 TV = VFMA(LDK(KP251768516), TT, VMUL(LDK(KP075902986), TU));
Chris@82 359 TY = VFNMS(LDK(KP083333333), TX, TW);
Chris@82 360 T1d = VSUB(TY, TV);
Chris@82 361 TS = VFMA(LDK(KP2_000000000), TO, TR);
Chris@82 362 T1m = VADD(T1e, T1d);
Chris@82 363 TZ = VFMA(LDK(KP2_000000000), TV, TY);
Chris@82 364 T1f = VSUB(T1d, T1e);
Chris@82 365 }
Chris@82 366 {
Chris@82 367 V TF, T10, T1l, T1o;
Chris@82 368 TF = VBYI(VFMA(LDK(KP2_000000000), TE, Tn));
Chris@82 369 T10 = VADD(TS, TZ);
Chris@82 370 ST(&(xo[WS(os, 1)]), VADD(TF, T10), ovs, &(xo[WS(os, 1)]));
Chris@82 371 ST(&(xo[WS(os, 12)]), VSUB(T10, TF), ovs, &(xo[0]));
Chris@82 372 {
Chris@82 373 V T15, T16, T1p, T1q;
Chris@82 374 T15 = VBYI(VFMA(LDK(KP2_000000000), T14, T11));
Chris@82 375 T16 = VSUB(TZ, TS);
Chris@82 376 ST(&(xo[WS(os, 5)]), VADD(T15, T16), ovs, &(xo[WS(os, 1)]));
Chris@82 377 ST(&(xo[WS(os, 8)]), VSUB(T16, T15), ovs, &(xo[0]));
Chris@82 378 T1p = VADD(T1n, T1m);
Chris@82 379 T1q = VBYI(VADD(T1j, T1k));
Chris@82 380 ST(&(xo[WS(os, 4)]), VSUB(T1p, T1q), ovs, &(xo[0]));
Chris@82 381 ST(&(xo[WS(os, 9)]), VADD(T1q, T1p), ovs, &(xo[WS(os, 1)]));
Chris@82 382 }
Chris@82 383 T1l = VBYI(VSUB(T1j, T1k));
Chris@82 384 T1o = VSUB(T1m, T1n);
Chris@82 385 ST(&(xo[WS(os, 3)]), VADD(T1l, T1o), ovs, &(xo[WS(os, 1)]));
Chris@82 386 ST(&(xo[WS(os, 10)]), VSUB(T1o, T1l), ovs, &(xo[0]));
Chris@82 387 {
Chris@82 388 V T1h, T1i, T19, T1g;
Chris@82 389 T1h = VBYI(VADD(T18, T17));
Chris@82 390 T1i = VSUB(T1f, T1c);
Chris@82 391 ST(&(xo[WS(os, 6)]), VADD(T1h, T1i), ovs, &(xo[0]));
Chris@82 392 ST(&(xo[WS(os, 7)]), VSUB(T1i, T1h), ovs, &(xo[WS(os, 1)]));
Chris@82 393 T19 = VBYI(VSUB(T17, T18));
Chris@82 394 T1g = VADD(T1c, T1f);
Chris@82 395 ST(&(xo[WS(os, 2)]), VADD(T19, T1g), ovs, &(xo[0]));
Chris@82 396 ST(&(xo[WS(os, 11)]), VSUB(T1g, T19), ovs, &(xo[WS(os, 1)]));
Chris@82 397 }
Chris@82 398 }
Chris@82 399 }
Chris@82 400 }
Chris@82 401 }
Chris@82 402 VLEAVE();
Chris@82 403 }
Chris@82 404
Chris@82 405 static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {69, 15, 19, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 406
Chris@82 407 void XSIMD(codelet_n1bv_13) (planner *p) {
Chris@82 408 X(kdft_register) (p, n1bv_13, &desc);
Chris@82 409 }
Chris@82 410
Chris@82 411 #endif