Mercurial > hg > sv-dependency-builds
diff src/fftw-3.3.3/rdft/scalar/r2cb/hb_64.c @ 10:37bf6b4a2645
Add FFTW3
author | Chris Cannam |
---|---|
date | Wed, 20 Mar 2013 15:35:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/fftw-3.3.3/rdft/scalar/r2cb/hb_64.c Wed Mar 20 15:35:50 2013 +0000 @@ -0,0 +1,3959 @@ +/* + * Copyright (c) 2003, 2007-11 Matteo Frigo + * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Sun Nov 25 07:41:15 EST 2012 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */ + +/* + * This function contains 1038 FP additions, 644 FP multiplications, + * (or, 520 additions, 126 multiplications, 518 fused multiply/add), + * 231 stack variables, 15 constants, and 256 memory accesses + */ +#include "hb.h" + +static void hb_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tcx, Tcw, Tcv; + { + E Thy, Tv, T7n, T5B, TfP, Tey, Tkl, TjB, T6U, T2k, T7o, T2H, TiH, Tia, Tk8; + E Tj8, T6V, T5E, Tbz, T9N, Tb7, T9Q, Tgh, Tev, Tb6, T8G, TbA, T8N, TfO, TcU; + E Tgi, Td5, Ti3, T10, TjC, Tje, TiI, ThF, TeA, Tds, TjD, Tjb, TeB, Tdh, Tgl; + E TfT, Tgk, TfW, T6Z, T7r, T5H, T39, Tbb, TbC, T9S, T8V, T72, T7q, T5G, T3A; + E Tbe, TbD, T9T, T92, ThH, T1w, Tke, Tjq, Tkf, Tjt, TiK, ThO, Tgb, TgT, Tfc; + E Tec, Tg8, TgU, Tfd, Tel, T77, T83, T6i, T5a, T7a, T82, T6j, T5n, Tbj, Tcc; + E Tas, T9f, Tbm, Tcb, Tar, T9m, ThQ, T21, Tkb, Tjj, Tkc, Tjm, TiL, ThX, Tg4; + E TgW, Tf9, TdL, Tg1, TgX, Tfa, TdU, T7e, T80, T6f, T4h, T9q, Tbr, T7h, T7Z; + E T6g, T4u, T9D, T9C, Tbo, T9B, Tbp, T9x; + { + E T3v, T8Z, T8W, T90, T8X, T3y, T3q, T70; + { + E TcQ, TcT, Td4, TcZ; + { + E T24, T5t, T7, T27, T5w, Ti4, Tet, T2i, T5z, Te, Teu, Ti5, T5y, T2d, T8H; + E T2u, Td0, Tm, Ti7, Td3, T8I, T2p, Tq, T2w, Tp, TcV, T2E, Tr, T2x, T2y; + E Tes, Ter; + { + E T1, T2, T4, T5, T5u, T5v; + T1 = cr[0]; + T2 = ci[WS(rs, 31)]; + T4 = cr[WS(rs, 16)]; + T5 = ci[WS(rs, 15)]; + { + E T25, T3, T6, T26; + T25 = ci[WS(rs, 47)]; + T24 = T1 - T2; + T3 = T1 + T2; + T5t = T4 - T5; + T6 = T4 + T5; + T26 = cr[WS(rs, 48)]; + T5u = ci[WS(rs, 63)]; + T5v = cr[WS(rs, 32)]; + TcQ = T3 - T6; + T7 = T3 + T6; + Tes = T25 - T26; + T27 = T25 + T26; + } + Ter = T5u - T5v; + T5w = T5u + T5v; + } + { + E Ta, T29, Tb, TcR, T2h, Tc, T2a, T2b; + { + E T2f, T2g, T8, T9; + T8 = cr[WS(rs, 8)]; + T9 = ci[WS(rs, 23)]; + Ti4 = Ter + Tes; + Tet = Ter - Tes; + T2f = ci[WS(rs, 39)]; + T2g = cr[WS(rs, 56)]; + Ta = T8 + T9; + T29 = T8 - T9; + Tb = ci[WS(rs, 7)]; + TcR = T2f - T2g; + T2h = T2f + T2g; + Tc = cr[WS(rs, 24)]; + T2a = ci[WS(rs, 55)]; + T2b = cr[WS(rs, 40)]; + } + { + E Tj, T2l, Ti, Td1, T2t, Tk, T2m, T2n; + { + E Tg, Th, T2r, T2s; + Tg = cr[WS(rs, 4)]; + { + E T2e, Td, TcS, T2c; + T2e = Tb - Tc; + Td = Tb + Tc; + TcS = T2a - T2b; + T2c = T2a + T2b; + T2i = T2e - T2h; + T5z = T2e + T2h; + Te = Ta + Td; + Teu = Ta - Td; + TcT = TcR - TcS; + Ti5 = TcS + TcR; + T5y = T29 + T2c; + T2d = T29 - T2c; + Th = ci[WS(rs, 27)]; + } + T2r = ci[WS(rs, 59)]; + T2s = cr[WS(rs, 36)]; + Tj = cr[WS(rs, 20)]; + T2l = Tg - Th; + Ti = Tg + Th; + Td1 = T2r - T2s; + T2t = T2r + T2s; + Tk = ci[WS(rs, 11)]; + T2m = ci[WS(rs, 43)]; + T2n = cr[WS(rs, 52)]; + } + { + E Tn, To, T2C, T2D; + Tn = ci[WS(rs, 3)]; + { + E T2q, Tl, Td2, T2o; + T2q = Tj - Tk; + Tl = Tj + Tk; + Td2 = T2m - T2n; + T2o = T2m + T2n; + T8H = T2t - T2q; + T2u = T2q + T2t; + Td0 = Ti - Tl; + Tm = Ti + Tl; + Ti7 = Td1 + Td2; + Td3 = Td1 - Td2; + T8I = T2l + T2o; + T2p = T2l - T2o; + To = cr[WS(rs, 28)]; + } + T2C = ci[WS(rs, 35)]; + T2D = cr[WS(rs, 60)]; + Tq = cr[WS(rs, 12)]; + T2w = Tn - To; + Tp = Tn + To; + TcV = T2C - T2D; + T2E = T2C + T2D; + Tr = ci[WS(rs, 19)]; + T2x = ci[WS(rs, 51)]; + T2y = cr[WS(rs, 44)]; + } + } + } + { + E Tj6, T8K, T8L, T9L, T8F, Ti6, T8E, T9M, T5C, T5D, Ti9, Tj7; + { + E T2F, Ti8, T2A, TjA, Tew, Tex, Tjz; + { + E Tf, TcY, TcX, Tu, T5x, T5A; + Tj6 = T7 - Te; + Tf = T7 + Te; + { + E T2B, Ts, TcW, T2z, Tt; + T2B = Tq - Tr; + Ts = Tq + Tr; + TcW = T2x - T2y; + T2z = T2x + T2y; + T8K = T2B + T2E; + T2F = T2B - T2E; + TcY = Tp - Ts; + Tt = Tp + Ts; + TcX = TcV - TcW; + Ti8 = TcV + TcW; + T8L = T2w + T2z; + T2A = T2w - T2z; + Tu = Tm + Tt; + TjA = Tm - Tt; + } + T9L = T5w - T5t; + T5x = T5t + T5w; + T5A = T5y - T5z; + T8F = T5y + T5z; + Td4 = Td0 + Td3; + Tew = Td0 - Td3; + Thy = Tf - Tu; + Tv = Tf + Tu; + T7n = FNMS(KP707106781, T5A, T5x); + T5B = FMA(KP707106781, T5A, T5x); + Tex = TcY + TcX; + TcZ = TcX - TcY; + Ti6 = Ti4 + Ti5; + Tjz = Ti4 - Ti5; + } + { + E T28, T2j, T2v, T2G; + T8E = T24 + T27; + T28 = T24 - T27; + TfP = Tew + Tex; + Tey = Tew - Tex; + Tkl = TjA + Tjz; + TjB = Tjz - TjA; + T2j = T2d + T2i; + T9M = T2d - T2i; + T5C = FMA(KP414213562, T2p, T2u); + T2v = FNMS(KP414213562, T2u, T2p); + T2G = FMA(KP414213562, T2F, T2A); + T5D = FNMS(KP414213562, T2A, T2F); + T6U = FNMS(KP707106781, T2j, T28); + T2k = FMA(KP707106781, T2j, T28); + T7o = T2v - T2G; + T2H = T2v + T2G; + Ti9 = Ti7 + Ti8; + Tj7 = Ti8 - Ti7; + } + } + { + E T8J, T9O, T9P, T8M; + TiH = Ti6 + Ti9; + Tia = Ti6 - Ti9; + Tk8 = Tj6 + Tj7; + Tj8 = Tj6 - Tj7; + T8J = FNMS(KP414213562, T8I, T8H); + T9O = FMA(KP414213562, T8H, T8I); + T6V = T5D - T5C; + T5E = T5C + T5D; + Tbz = FNMS(KP707106781, T9M, T9L); + T9N = FMA(KP707106781, T9M, T9L); + T9P = FMA(KP414213562, T8K, T8L); + T8M = FNMS(KP414213562, T8L, T8K); + Tb7 = T9O + T9P; + T9Q = T9O - T9P; + Tgh = Teu + Tet; + Tev = Tet - Teu; + Tb6 = FMA(KP707106781, T8F, T8E); + T8G = FNMS(KP707106781, T8F, T8E); + TbA = T8M - T8J; + T8N = T8J + T8M; + } + } + } + { + E T8S, TC, Tdn, Tdk, ThC, T3e, T8P, T36, T2X, Tda, TY, ThA, Tdf, T35, T2S; + E T3x, T3o, Tdl, TJ, ThD, Tdq, T3w, T3j, T34, TR, Tdc, Td9, Thz, T2N; + { + E TV, T2O, TU, Tdd, T2W, TW, T2P, T2Q; + { + E Tz, T3r, Ty, Tdj, T3u, TA, T3b, T3c; + { + E Tw, Tx, T3s, T3t; + Tw = cr[WS(rs, 2)]; + TfO = TcQ + TcT; + TcU = TcQ - TcT; + Tgi = Td4 + TcZ; + Td5 = TcZ - Td4; + Tx = ci[WS(rs, 29)]; + T3s = ci[WS(rs, 45)]; + T3t = cr[WS(rs, 50)]; + Tz = cr[WS(rs, 18)]; + T3r = Tw - Tx; + Ty = Tw + Tx; + Tdj = T3s - T3t; + T3u = T3s + T3t; + TA = ci[WS(rs, 13)]; + T3b = ci[WS(rs, 61)]; + T3c = cr[WS(rs, 34)]; + } + { + E T3a, TB, Tdi, T3d; + T8S = T3r + T3u; + T3v = T3r - T3u; + T3a = Tz - TA; + TB = Tz + TA; + Tdi = T3b - T3c; + T3d = T3b + T3c; + TC = Ty + TB; + Tdn = Ty - TB; + Tdk = Tdi - Tdj; + ThC = Tdi + Tdj; + T3e = T3a + T3d; + T8P = T3d - T3a; + } + } + { + E TS, TT, T2U, T2V; + TS = cr[WS(rs, 6)]; + TT = ci[WS(rs, 25)]; + T2U = ci[WS(rs, 41)]; + T2V = cr[WS(rs, 54)]; + TV = ci[WS(rs, 9)]; + T2O = TS - TT; + TU = TS + TT; + Tdd = T2U - T2V; + T2W = T2U + T2V; + TW = cr[WS(rs, 22)]; + T2P = ci[WS(rs, 57)]; + T2Q = cr[WS(rs, 38)]; + } + { + E TG, T3f, TF, Tdo, T3n, TH, T3g, T3h; + { + E TD, TE, T3l, T3m; + TD = cr[WS(rs, 10)]; + { + E T2T, TX, Tde, T2R; + T2T = TV - TW; + TX = TV + TW; + Tde = T2P - T2Q; + T2R = T2P + T2Q; + T36 = T2T - T2W; + T2X = T2T + T2W; + Tda = TU - TX; + TY = TU + TX; + ThA = Tde + Tdd; + Tdf = Tdd - Tde; + T35 = T2O - T2R; + T2S = T2O + T2R; + TE = ci[WS(rs, 21)]; + } + T3l = ci[WS(rs, 37)]; + T3m = cr[WS(rs, 58)]; + TG = ci[WS(rs, 5)]; + T3f = TD - TE; + TF = TD + TE; + Tdo = T3l - T3m; + T3n = T3l + T3m; + TH = cr[WS(rs, 26)]; + T3g = ci[WS(rs, 53)]; + T3h = cr[WS(rs, 42)]; + } + { + E TO, T30, TN, Td8, T33, TP, T2K, T2L; + { + E TL, TM, T31, T32; + TL = ci[WS(rs, 1)]; + { + E T3k, TI, Tdp, T3i; + T3k = TG - TH; + TI = TG + TH; + Tdp = T3g - T3h; + T3i = T3g + T3h; + T3x = T3k - T3n; + T3o = T3k + T3n; + Tdl = TF - TI; + TJ = TF + TI; + ThD = Tdp + Tdo; + Tdq = Tdo - Tdp; + T3w = T3f - T3i; + T3j = T3f + T3i; + TM = cr[WS(rs, 30)]; + } + T31 = ci[WS(rs, 49)]; + T32 = cr[WS(rs, 46)]; + TO = cr[WS(rs, 14)]; + T30 = TL - TM; + TN = TL + TM; + Td8 = T31 - T32; + T33 = T31 + T32; + TP = ci[WS(rs, 17)]; + T2K = ci[WS(rs, 33)]; + T2L = cr[WS(rs, 62)]; + } + { + E T2J, TQ, Td7, T2M; + T8Z = T30 + T33; + T34 = T30 - T33; + T2J = TO - TP; + TQ = TO + TP; + Td7 = T2K - T2L; + T2M = T2K + T2L; + TR = TN + TQ; + Tdc = TN - TQ; + Td9 = Td7 - Td8; + Thz = Td7 + Td8; + T2N = T2J - T2M; + T8W = T2J + T2M; + } + } + } + } + { + E Tja, Tj9, TfU, TfV, TfR, Tdb, Tdg, TfS; + { + E ThE, ThB, Tdm, Tdr; + { + E Tjc, TK, TZ, Tjd; + Tjc = TC - TJ; + TK = TC + TJ; + TZ = TR + TY; + Tja = TR - TY; + Tjd = ThC - ThD; + ThE = ThC + ThD; + Tj9 = Thz - ThA; + ThB = Thz + ThA; + Ti3 = TK - TZ; + T10 = TK + TZ; + TjC = Tjc - Tjd; + Tje = Tjc + Tjd; + } + TfU = Tdl + Tdk; + Tdm = Tdk - Tdl; + Tdr = Tdn - Tdq; + TfV = Tdn + Tdq; + TiI = ThE + ThB; + ThF = ThB - ThE; + TeA = FMA(KP414213562, Tdm, Tdr); + Tds = FNMS(KP414213562, Tdr, Tdm); + TfR = Tda + Td9; + Tdb = Td9 - Tda; + Tdg = Tdc - Tdf; + TfS = Tdc + Tdf; + } + { + E T2Z, T6X, T37, T2Y; + TjD = Tja + Tj9; + Tjb = Tj9 - Tja; + TeB = FNMS(KP414213562, Tdb, Tdg); + Tdh = FMA(KP414213562, Tdg, Tdb); + T90 = T2S + T2X; + T2Y = T2S - T2X; + Tgl = FMA(KP414213562, TfR, TfS); + TfT = FNMS(KP414213562, TfS, TfR); + Tgk = FNMS(KP414213562, TfU, TfV); + TfW = FMA(KP414213562, TfV, TfU); + T2Z = FMA(KP707106781, T2Y, T2N); + T6X = FNMS(KP707106781, T2Y, T2N); + T37 = T35 + T36; + T8X = T35 - T36; + { + E T8Q, T8T, T3p, T6Y, T38; + T3y = T3w + T3x; + T8Q = T3x - T3w; + T8T = T3j + T3o; + T3p = T3j - T3o; + T6Y = FNMS(KP707106781, T37, T34); + T38 = FMA(KP707106781, T37, T34); + { + E Tb9, T8R, Tba, T8U; + Tb9 = FMA(KP707106781, T8Q, T8P); + T8R = FNMS(KP707106781, T8Q, T8P); + Tba = FMA(KP707106781, T8T, T8S); + T8U = FNMS(KP707106781, T8T, T8S); + T6Z = FMA(KP668178637, T6Y, T6X); + T7r = FNMS(KP668178637, T6X, T6Y); + T5H = FMA(KP198912367, T2Z, T38); + T39 = FNMS(KP198912367, T38, T2Z); + Tbb = FNMS(KP198912367, Tba, Tb9); + TbC = FMA(KP198912367, Tb9, Tba); + T9S = FNMS(KP668178637, T8R, T8U); + T8V = FMA(KP668178637, T8U, T8R); + T3q = FMA(KP707106781, T3p, T3e); + T70 = FNMS(KP707106781, T3p, T3e); + } + } + } + } + } + } + { + E T97, Tbk, T9j, T9k, Tbh, T9i, Tbi, T9e; + { + E T9g, T5f, T18, Ted, TdY, ThI, T4A, T95, T9b, T57, T1u, Te1, Te4, ThM, T52; + E T9c, T5h, T4K, TdZ, T1f, ThJ, Teg, T5g, T4F, T1j, Te8, T98, T4W, T4N, T1m; + E Te7, T4Q, T1n, Te6; + { + E T1q, Te3, T4Y, T1t, Te2, T51; + { + E T15, T5b, T14, TdX, T5e, T16, T4x, T4y; + { + E T12, T13, T5c, T5d, T71, T3z; + T12 = cr[WS(rs, 1)]; + T71 = FNMS(KP707106781, T3y, T3v); + T3z = FMA(KP707106781, T3y, T3v); + { + E Tbc, T8Y, Tbd, T91; + Tbc = FMA(KP707106781, T8X, T8W); + T8Y = FNMS(KP707106781, T8X, T8W); + Tbd = FMA(KP707106781, T90, T8Z); + T91 = FNMS(KP707106781, T90, T8Z); + T72 = FNMS(KP668178637, T71, T70); + T7q = FMA(KP668178637, T70, T71); + T5G = FNMS(KP198912367, T3q, T3z); + T3A = FMA(KP198912367, T3z, T3q); + Tbe = FNMS(KP198912367, Tbd, Tbc); + TbD = FMA(KP198912367, Tbc, Tbd); + T9T = FNMS(KP668178637, T8Y, T91); + T92 = FMA(KP668178637, T91, T8Y); + T13 = ci[WS(rs, 30)]; + } + T5c = ci[WS(rs, 46)]; + T5d = cr[WS(rs, 49)]; + T15 = cr[WS(rs, 17)]; + T5b = T12 - T13; + T14 = T12 + T13; + TdX = T5c - T5d; + T5e = T5c + T5d; + T16 = ci[WS(rs, 14)]; + T4x = ci[WS(rs, 62)]; + T4y = cr[WS(rs, 33)]; + } + { + E T4w, T17, TdW, T4z; + T9g = T5b + T5e; + T5f = T5b - T5e; + T4w = T15 - T16; + T17 = T15 + T16; + TdW = T4x - T4y; + T4z = T4x + T4y; + T18 = T14 + T17; + Ted = T14 - T17; + TdY = TdW - TdX; + ThI = TdW + TdX; + T4A = T4w + T4z; + T95 = T4z - T4w; + } + } + { + E T1r, T53, T56, T1s, T4Z, T50; + { + E T1o, T1p, T54, T55; + T1o = ci[WS(rs, 2)]; + T1p = cr[WS(rs, 29)]; + T54 = ci[WS(rs, 50)]; + T55 = cr[WS(rs, 45)]; + T1r = cr[WS(rs, 13)]; + T53 = T1o - T1p; + T1q = T1o + T1p; + Te3 = T54 - T55; + T56 = T54 + T55; + T1s = ci[WS(rs, 18)]; + T4Z = ci[WS(rs, 34)]; + T50 = cr[WS(rs, 61)]; + } + T9b = T53 + T56; + T57 = T53 - T56; + T4Y = T1r - T1s; + T1t = T1r + T1s; + Te2 = T4Z - T50; + T51 = T4Z + T50; + } + T1u = T1q + T1t; + Te1 = T1q - T1t; + Te4 = Te2 - Te3; + ThM = Te2 + Te3; + T52 = T4Y - T51; + T9c = T4Y + T51; + { + E T1c, T4B, T1b, Tee, T4J, T1d, T4C, T4D; + { + E T19, T1a, T4H, T4I; + T19 = cr[WS(rs, 9)]; + T1a = ci[WS(rs, 22)]; + T4H = ci[WS(rs, 38)]; + T4I = cr[WS(rs, 57)]; + T1c = ci[WS(rs, 6)]; + T4B = T19 - T1a; + T1b = T19 + T1a; + Tee = T4H - T4I; + T4J = T4H + T4I; + T1d = cr[WS(rs, 25)]; + T4C = ci[WS(rs, 54)]; + T4D = cr[WS(rs, 41)]; + } + { + E T1k, T4S, T4V, T1l, T4O, T4P; + { + E T1h, T1i, T4T, T4U; + T1h = cr[WS(rs, 5)]; + { + E T4G, T1e, Tef, T4E; + T4G = T1c - T1d; + T1e = T1c + T1d; + Tef = T4C - T4D; + T4E = T4C + T4D; + T5h = T4G - T4J; + T4K = T4G + T4J; + TdZ = T1b - T1e; + T1f = T1b + T1e; + ThJ = Tef + Tee; + Teg = Tee - Tef; + T5g = T4B - T4E; + T4F = T4B + T4E; + T1i = ci[WS(rs, 26)]; + } + T4T = ci[WS(rs, 42)]; + T4U = cr[WS(rs, 53)]; + T1k = cr[WS(rs, 21)]; + T4S = T1h - T1i; + T1j = T1h + T1i; + Te8 = T4T - T4U; + T4V = T4T + T4U; + T1l = ci[WS(rs, 10)]; + T4O = ci[WS(rs, 58)]; + T4P = cr[WS(rs, 37)]; + } + T98 = T4S + T4V; + T4W = T4S - T4V; + T4N = T1k - T1l; + T1m = T1k + T1l; + Te7 = T4O - T4P; + T4Q = T4O + T4P; + } + } + } + T1n = T1j + T1m; + Te6 = T1j - T1m; + { + E Te9, ThL, T4R, T99; + Te9 = Te7 - Te8; + ThL = Te7 + Te8; + T4R = T4N + T4Q; + T99 = T4Q - T4N; + { + E Tjr, ThK, Tjs, ThN; + { + E T1g, T1v, Tjp, Tjo; + Tjr = T18 - T1f; + T1g = T18 + T1f; + T1v = T1n + T1u; + Tjp = T1n - T1u; + ThK = ThI + ThJ; + Tjo = ThI - ThJ; + ThH = T1g - T1v; + T1w = T1g + T1v; + Tke = Tjp + Tjo; + Tjq = Tjo - Tjp; + Tjs = ThM - ThL; + ThN = ThL + ThM; + } + { + E Tg6, Te0, Tg9, Teh, Tej, Tei, Tga, Teb, Te5, Tea; + Tg6 = TdZ + TdY; + Te0 = TdY - TdZ; + Tkf = Tjr + Tjs; + Tjt = Tjr - Tjs; + TiK = ThK + ThN; + ThO = ThK - ThN; + Tg9 = Ted + Teg; + Teh = Ted - Teg; + Tej = Te4 - Te1; + Te5 = Te1 + Te4; + Tea = Te6 - Te9; + Tei = Te6 + Te9; + Tga = Tea + Te5; + Teb = Te5 - Tea; + { + E T9h, T4M, T78, T96, T5k, T5l, T75, T5j, T76, T59; + { + E T5i, Tg7, Tek, T4L, T4X, T58; + T9h = T4F + T4K; + T4L = T4F - T4K; + Tgb = FNMS(KP707106781, Tga, Tg9); + TgT = FMA(KP707106781, Tga, Tg9); + Tfc = FMA(KP707106781, Teb, Te0); + Tec = FNMS(KP707106781, Teb, Te0); + Tg7 = Tei + Tej; + Tek = Tei - Tej; + T4M = FMA(KP707106781, T4L, T4A); + T78 = FNMS(KP707106781, T4L, T4A); + Tg8 = FNMS(KP707106781, Tg7, Tg6); + TgU = FMA(KP707106781, Tg7, Tg6); + Tfd = FMA(KP707106781, Tek, Teh); + Tel = FNMS(KP707106781, Tek, Teh); + T5i = T5g + T5h; + T96 = T5h - T5g; + T5k = FNMS(KP414213562, T4R, T4W); + T4X = FMA(KP414213562, T4W, T4R); + T58 = FNMS(KP414213562, T57, T52); + T5l = FMA(KP414213562, T52, T57); + T75 = FNMS(KP707106781, T5i, T5f); + T5j = FMA(KP707106781, T5i, T5f); + T76 = T4X - T58; + T59 = T4X + T58; + } + { + E T79, T5m, T9a, T9d; + T77 = FNMS(KP923879532, T76, T75); + T83 = FMA(KP923879532, T76, T75); + T6i = FMA(KP923879532, T59, T4M); + T5a = FNMS(KP923879532, T59, T4M); + T79 = T5l - T5k; + T5m = T5k + T5l; + T97 = FNMS(KP707106781, T96, T95); + Tbk = FMA(KP707106781, T96, T95); + T7a = FNMS(KP923879532, T79, T78); + T82 = FMA(KP923879532, T79, T78); + T6j = FMA(KP923879532, T5m, T5j); + T5n = FNMS(KP923879532, T5m, T5j); + T9j = FNMS(KP414213562, T98, T99); + T9a = FMA(KP414213562, T99, T98); + T9d = FMA(KP414213562, T9c, T9b); + T9k = FNMS(KP414213562, T9b, T9c); + Tbh = FMA(KP707106781, T9h, T9g); + T9i = FNMS(KP707106781, T9h, T9g); + Tbi = T9a + T9d; + T9e = T9a - T9d; + } + } + } + } + } + } + { + E T9z, T4m, T1D, TdM, ThR, Tdx, T3H, T9o, T9r, T4e, T1Z, TdA, TdD, ThV, T49; + E T9s, T4o, T3R, Tdy, T1K, ThS, TdP, T4n, T3M, T1O, T3V, TdH, T3U, T1R, T3W; + E T9u, T43; + { + E T1V, T46, TdC, T45, T1Y, T47, T48, TdB; + { + E Tdw, T3D, T3G, Tdv, T4a, T4d; + { + E T4i, T1z, T3E, T4l, T1C, T3F; + { + E T4j, T4k, T1A, T1B; + { + E T1x, Tbl, T9l, T1y; + T1x = ci[0]; + Tbj = FNMS(KP923879532, Tbi, Tbh); + Tcc = FMA(KP923879532, Tbi, Tbh); + Tas = FMA(KP923879532, T9e, T97); + T9f = FNMS(KP923879532, T9e, T97); + Tbl = T9j - T9k; + T9l = T9j + T9k; + T1y = cr[WS(rs, 31)]; + T4j = ci[WS(rs, 48)]; + Tbm = FNMS(KP923879532, Tbl, Tbk); + Tcb = FMA(KP923879532, Tbl, Tbk); + Tar = FNMS(KP923879532, T9l, T9i); + T9m = FMA(KP923879532, T9l, T9i); + T4i = T1x - T1y; + T1z = T1x + T1y; + T4k = cr[WS(rs, 47)]; + } + T1A = cr[WS(rs, 15)]; + T1B = ci[WS(rs, 16)]; + T3E = ci[WS(rs, 32)]; + Tdw = T4j - T4k; + T4l = T4j + T4k; + T3D = T1A - T1B; + T1C = T1A + T1B; + T3F = cr[WS(rs, 63)]; + } + T9z = T4i + T4l; + T4m = T4i - T4l; + T1D = T1z + T1C; + TdM = T1z - T1C; + T3G = T3E + T3F; + Tdv = T3E - T3F; + } + { + E T4b, T4c, T1T, T1U, T1W, T1X; + T1T = ci[WS(rs, 4)]; + T1U = cr[WS(rs, 27)]; + ThR = Tdv + Tdw; + Tdx = Tdv - Tdw; + T3H = T3D - T3G; + T9o = T3D + T3G; + T4a = T1T - T1U; + T1V = T1T + T1U; + T4b = ci[WS(rs, 52)]; + T4c = cr[WS(rs, 43)]; + T1W = cr[WS(rs, 11)]; + T1X = ci[WS(rs, 20)]; + T46 = ci[WS(rs, 36)]; + TdC = T4b - T4c; + T4d = T4b + T4c; + T45 = T1W - T1X; + T1Y = T1W + T1X; + T47 = cr[WS(rs, 59)]; + } + T9r = T4a + T4d; + T4e = T4a - T4d; + } + T1Z = T1V + T1Y; + TdA = T1V - T1Y; + T48 = T46 + T47; + TdB = T46 - T47; + { + E T3I, T1G, T3J, TdN, T3Q, T3N, T1J, T3K, T3Z, T42; + { + E T3O, T3P, T1E, T1F, T1H, T1I; + T1E = cr[WS(rs, 7)]; + T1F = ci[WS(rs, 24)]; + TdD = TdB - TdC; + ThV = TdB + TdC; + T49 = T45 - T48; + T9s = T45 + T48; + T3I = T1E - T1F; + T1G = T1E + T1F; + T3O = ci[WS(rs, 40)]; + T3P = cr[WS(rs, 55)]; + T1H = ci[WS(rs, 8)]; + T1I = cr[WS(rs, 23)]; + T3J = ci[WS(rs, 56)]; + TdN = T3O - T3P; + T3Q = T3O + T3P; + T3N = T1H - T1I; + T1J = T1H + T1I; + T3K = cr[WS(rs, 39)]; + } + { + E T40, T41, T1P, T1Q; + { + E T1M, TdO, T3L, T1N; + T1M = cr[WS(rs, 3)]; + T4o = T3N - T3Q; + T3R = T3N + T3Q; + Tdy = T1G - T1J; + T1K = T1G + T1J; + TdO = T3J - T3K; + T3L = T3J + T3K; + T1N = ci[WS(rs, 28)]; + T40 = ci[WS(rs, 44)]; + ThS = TdO + TdN; + TdP = TdN - TdO; + T4n = T3I - T3L; + T3M = T3I + T3L; + T3Z = T1M - T1N; + T1O = T1M + T1N; + T41 = cr[WS(rs, 51)]; + } + T1P = cr[WS(rs, 19)]; + T1Q = ci[WS(rs, 12)]; + T3V = ci[WS(rs, 60)]; + TdH = T40 - T41; + T42 = T40 + T41; + T3U = T1P - T1Q; + T1R = T1P + T1Q; + T3W = cr[WS(rs, 35)]; + } + T9u = T3Z + T42; + T43 = T3Z - T42; + } + } + { + E T1S, TdF, T3X, TdG; + T1S = T1O + T1R; + TdF = T1O - T1R; + T3X = T3V + T3W; + TdG = T3V - T3W; + { + E TdI, T3Y, T9v, ThT, ThW; + { + E Tjk, Tji, ThU, Tjh, T1L, T20, Tjl; + Tjk = T1D - T1K; + T1L = T1D + T1K; + T20 = T1S + T1Z; + Tji = T1S - T1Z; + TdI = TdG - TdH; + ThU = TdG + TdH; + T3Y = T3U + T3X; + T9v = T3U - T3X; + ThQ = T1L - T20; + T21 = T1L + T20; + ThT = ThR + ThS; + Tjh = ThR - ThS; + Tjl = ThV - ThU; + ThW = ThU + ThV; + Tkb = Tji + Tjh; + Tjj = Tjh - Tji; + Tkc = Tjk + Tjl; + Tjm = Tjk - Tjl; + } + { + E TfZ, Tdz, Tg2, TdQ, TdS, TdR, Tg3, TdK, TdE, TdJ; + TfZ = Tdy + Tdx; + Tdz = Tdx - Tdy; + Tg2 = TdM + TdP; + TdQ = TdM - TdP; + TdS = TdD - TdA; + TdE = TdA + TdD; + TiL = ThT + ThW; + ThX = ThT - ThW; + TdJ = TdF - TdI; + TdR = TdF + TdI; + Tg3 = TdJ + TdE; + TdK = TdE - TdJ; + { + E T9A, T3T, T7f, T9p, T4r, T4s, T7c, T4q, T7d, T4g; + { + E T4p, Tg0, TdT, T3S, T44, T4f; + T9A = T3M + T3R; + T3S = T3M - T3R; + Tg4 = FNMS(KP707106781, Tg3, Tg2); + TgW = FMA(KP707106781, Tg3, Tg2); + Tf9 = FMA(KP707106781, TdK, Tdz); + TdL = FNMS(KP707106781, TdK, Tdz); + Tg0 = TdR + TdS; + TdT = TdR - TdS; + T3T = FMA(KP707106781, T3S, T3H); + T7f = FNMS(KP707106781, T3S, T3H); + Tg1 = FNMS(KP707106781, Tg0, TfZ); + TgX = FMA(KP707106781, Tg0, TfZ); + Tfa = FMA(KP707106781, TdT, TdQ); + TdU = FNMS(KP707106781, TdT, TdQ); + T4p = T4n + T4o; + T9p = T4n - T4o; + T4r = FNMS(KP414213562, T3Y, T43); + T44 = FMA(KP414213562, T43, T3Y); + T4f = FNMS(KP414213562, T4e, T49); + T4s = FMA(KP414213562, T49, T4e); + T7c = FNMS(KP707106781, T4p, T4m); + T4q = FMA(KP707106781, T4p, T4m); + T7d = T44 - T4f; + T4g = T44 + T4f; + } + { + E T7g, T4t, T9t, T9w; + T7e = FNMS(KP923879532, T7d, T7c); + T80 = FMA(KP923879532, T7d, T7c); + T6f = FMA(KP923879532, T4g, T3T); + T4h = FNMS(KP923879532, T4g, T3T); + T7g = T4s - T4r; + T4t = T4r + T4s; + T9q = FNMS(KP707106781, T9p, T9o); + Tbr = FMA(KP707106781, T9p, T9o); + T7h = FNMS(KP923879532, T7g, T7f); + T7Z = FMA(KP923879532, T7g, T7f); + T6g = FMA(KP923879532, T4t, T4q); + T4u = FNMS(KP923879532, T4t, T4q); + T9D = FNMS(KP414213562, T9r, T9s); + T9t = FMA(KP414213562, T9s, T9r); + T9w = FNMS(KP414213562, T9v, T9u); + T9C = FMA(KP414213562, T9u, T9v); + Tbo = FMA(KP707106781, T9A, T9z); + T9B = FNMS(KP707106781, T9A, T9z); + Tbp = T9w + T9t; + T9x = T9t - T9w; + } + } + } + } + } + } + } + } + { + E Tbq, Tcf, Tav, T9y, Tbt, Tce, Tau, T9F, T6p, T6d, T6c, T6q, Thf, The, Thd; + { + E Tk9, Tkm, TjP, TjO, TjN; + { + E Tj0, TiS, TiU, Tj3, Tj1, Tj4, TiY, Tj2; + { + E TiQ, TiW, TiV, TiR, TiD, TiG, TiN, TiF, TiO; + { + E T11, T22, TiJ, TiE, TiM, Tbs, T9E; + TiQ = Tv - T10; + T11 = Tv + T10; + Tbq = FNMS(KP923879532, Tbp, Tbo); + Tcf = FMA(KP923879532, Tbp, Tbo); + Tav = FMA(KP923879532, T9x, T9q); + T9y = FNMS(KP923879532, T9x, T9q); + Tbs = T9C + T9D; + T9E = T9C - T9D; + T22 = T1w + T21; + TiW = T1w - T21; + TiV = TiH - TiI; + TiJ = TiH + TiI; + Tbt = FNMS(KP923879532, Tbs, Tbr); + Tce = FMA(KP923879532, Tbs, Tbr); + Tau = FMA(KP923879532, T9E, T9B); + T9F = FNMS(KP923879532, T9E, T9B); + TiE = T11 - T22; + TiR = TiL - TiK; + TiM = TiK + TiL; + cr[0] = T11 + T22; + TiD = W[62]; + TiG = W[63]; + ci[0] = TiJ + TiM; + TiN = TiJ - TiM; + TiF = TiD * TiE; + TiO = TiG * TiE; + } + cr[WS(rs, 32)] = FNMS(TiG, TiN, TiF); + ci[WS(rs, 32)] = FMA(TiD, TiN, TiO); + Tj0 = TiQ + TiR; + TiS = TiQ - TiR; + { + E TiP, TiX, TiT, TiZ; + TiP = W[94]; + TiU = W[95]; + TiZ = W[30]; + Tj3 = TiW + TiV; + TiX = TiV - TiW; + TiT = TiP * TiS; + Tj1 = TiZ * Tj0; + Tj4 = TiZ * Tj3; + TiY = TiP * TiX; + cr[WS(rs, 48)] = FNMS(TiU, TiX, TiT); + Tj2 = W[31]; + } + } + { + E Tii, Til, Tik, Tih, Tim; + { + E Tib, Tit, Tio, ThG, ThP, ThY, Tie, Tip, Tic, Tid; + Tib = Ti3 + Tia; + Tit = Tia - Ti3; + ci[WS(rs, 48)] = FMA(TiU, TiS, TiY); + Tio = Thy - ThF; + ThG = Thy + ThF; + ci[WS(rs, 16)] = FMA(Tj2, Tj0, Tj4); + cr[WS(rs, 16)] = FNMS(Tj2, Tj3, Tj1); + ThP = ThH - ThO; + Tic = ThH + ThO; + Tid = ThX - ThQ; + ThY = ThQ + ThX; + Tie = Tic + Tid; + Tip = Tid - Tic; + { + E Tiy, TiB, Ti0, Tiz, TiC, TiA; + { + E Tin, Tis, Tiq, ThZ, Tiu, Tir, Tiw, Tix, Tiv; + Tin = W[110]; + Tis = W[111]; + Tiy = FMA(KP707106781, Tip, Tio); + Tiq = FNMS(KP707106781, Tip, Tio); + ThZ = ThP + ThY; + Tiu = ThP - ThY; + Tir = Tin * Tiq; + Tix = W[46]; + TiB = FMA(KP707106781, Tiu, Tit); + Tiv = FNMS(KP707106781, Tiu, Tit); + Ti0 = FNMS(KP707106781, ThZ, ThG); + Tii = FMA(KP707106781, ThZ, ThG); + cr[WS(rs, 56)] = FNMS(Tis, Tiv, Tir); + Tiw = Tin * Tiv; + Tiz = Tix * Tiy; + TiC = Tix * TiB; + TiA = W[47]; + ci[WS(rs, 56)] = FMA(Tis, Tiq, Tiw); + } + { + E Tif, Ti2, Thx, Tig, Ti1; + Til = FMA(KP707106781, Tie, Tib); + Tif = FNMS(KP707106781, Tie, Tib); + Ti2 = W[79]; + ci[WS(rs, 24)] = FMA(TiA, Tiy, TiC); + cr[WS(rs, 24)] = FNMS(TiA, TiB, Tiz); + Thx = W[78]; + Tig = Ti2 * Ti0; + Tik = W[15]; + Ti1 = Thx * Ti0; + ci[WS(rs, 40)] = FMA(Thx, Tif, Tig); + Tih = W[14]; + Tim = Tik * Tii; + cr[WS(rs, 40)] = FNMS(Ti2, Tif, Ti1); + } + } + } + { + E TjF, TjI, TjU, Tk2, TjZ, Tk5, Tjw, TjM; + { + E TjX, TjG, Tju, Tjg, TjS, Tjn, TjH, Tjf, TjE, Tij, TjT, Tjv, TjY; + TjE = TjC - TjD; + Tk9 = TjC + TjD; + Tij = Tih * Tii; + ci[WS(rs, 8)] = FMA(Tih, Til, Tim); + Tkm = Tje + Tjb; + Tjf = Tjb - Tje; + TjX = FNMS(KP707106781, TjE, TjB); + TjF = FMA(KP707106781, TjE, TjB); + cr[WS(rs, 8)] = FNMS(Tik, Til, Tij); + TjG = FMA(KP414213562, Tjq, Tjt); + Tju = FNMS(KP414213562, Tjt, Tjq); + Tjg = FMA(KP707106781, Tjf, Tj8); + TjS = FNMS(KP707106781, Tjf, Tj8); + Tjn = FMA(KP414213562, Tjm, Tjj); + TjH = FNMS(KP414213562, Tjj, Tjm); + TjI = TjG - TjH; + TjT = TjG + TjH; + Tjv = Tjn - Tju; + TjY = Tju + Tjn; + TjU = FNMS(KP923879532, TjT, TjS); + Tk2 = FMA(KP923879532, TjT, TjS); + TjZ = FNMS(KP923879532, TjY, TjX); + Tk5 = FMA(KP923879532, TjY, TjX); + Tjw = FNMS(KP923879532, Tjv, Tjg); + TjM = FMA(KP923879532, Tjv, Tjg); + } + { + E Tk4, Tk3, TjR, TjW, TjJ, Tjy, Tj5; + TjR = W[54]; + TjW = W[55]; + { + E Tk1, Tk0, TjV, Tk6; + Tk1 = W[118]; + Tk4 = W[119]; + Tk0 = TjR * TjZ; + TjV = TjR * TjU; + Tk6 = Tk1 * Tk5; + Tk3 = Tk1 * Tk2; + ci[WS(rs, 28)] = FMA(TjW, TjU, Tk0); + cr[WS(rs, 28)] = FNMS(TjW, TjZ, TjV); + ci[WS(rs, 60)] = FMA(Tk4, Tk2, Tk6); + } + cr[WS(rs, 60)] = FNMS(Tk4, Tk5, Tk3); + TjP = FMA(KP923879532, TjI, TjF); + TjJ = FNMS(KP923879532, TjI, TjF); + Tjy = W[87]; + Tj5 = W[86]; + { + E TjL, TjQ, TjK, Tjx; + TjO = W[23]; + TjK = Tjy * Tjw; + Tjx = Tj5 * Tjw; + TjL = W[22]; + TjQ = TjO * TjM; + ci[WS(rs, 44)] = FMA(Tj5, TjJ, TjK); + cr[WS(rs, 44)] = FNMS(Tjy, TjJ, Tjx); + TjN = TjL * TjM; + ci[WS(rs, 12)] = FMA(TjL, TjP, TjQ); + } + } + } + } + } + { + E T5T, T5S, T5R, Tkx, Tkw, Tkv; + { + E Tkn, Tkq, TkC, TkK, TkH, TkN, Tki, Tku; + { + E Tkg, Tko, TkF, Tka, TkA, Tkd, Tkp, TkB, Tkh, TkG; + cr[WS(rs, 12)] = FNMS(TjO, TjP, TjN); + Tkg = FMA(KP414213562, Tkf, Tke); + Tko = FNMS(KP414213562, Tke, Tkf); + TkF = FMA(KP707106781, Tkm, Tkl); + Tkn = FNMS(KP707106781, Tkm, Tkl); + Tka = FNMS(KP707106781, Tk9, Tk8); + TkA = FMA(KP707106781, Tk9, Tk8); + Tkd = FNMS(KP414213562, Tkc, Tkb); + Tkp = FMA(KP414213562, Tkb, Tkc); + Tkq = Tko - Tkp; + TkB = Tko + Tkp; + Tkh = Tkd - Tkg; + TkG = Tkg + Tkd; + TkC = FNMS(KP923879532, TkB, TkA); + TkK = FMA(KP923879532, TkB, TkA); + TkH = FNMS(KP923879532, TkG, TkF); + TkN = FMA(KP923879532, TkG, TkF); + Tki = FNMS(KP923879532, Tkh, Tka); + Tku = FMA(KP923879532, Tkh, Tka); + } + { + E TkM, TkL, Tkz, TkE, Tkr, Tkk, Tk7; + Tkz = W[70]; + TkE = W[71]; + { + E TkJ, TkI, TkD, TkO; + TkJ = W[6]; + TkM = W[7]; + TkI = Tkz * TkH; + TkD = Tkz * TkC; + TkO = TkJ * TkN; + TkL = TkJ * TkK; + ci[WS(rs, 36)] = FMA(TkE, TkC, TkI); + cr[WS(rs, 36)] = FNMS(TkE, TkH, TkD); + ci[WS(rs, 4)] = FMA(TkM, TkK, TkO); + } + cr[WS(rs, 4)] = FNMS(TkM, TkN, TkL); + Tkx = FMA(KP923879532, Tkq, Tkn); + Tkr = FNMS(KP923879532, Tkq, Tkn); + Tkk = W[103]; + Tk7 = W[102]; + { + E Tkt, Tky, Tks, Tkj; + Tkw = W[39]; + Tks = Tkk * Tki; + Tkj = Tk7 * Tki; + Tkt = W[38]; + Tky = Tkw * Tku; + ci[WS(rs, 52)] = FMA(Tk7, Tkr, Tks); + cr[WS(rs, 52)] = FNMS(Tkk, Tkr, Tkj); + Tkv = Tkt * Tku; + ci[WS(rs, 20)] = FMA(Tkt, Tkx, Tky); + } + } + } + { + E T5J, T5M, T66, T5Y, T69, T63, T5Q, T5q; + { + E T5o, T4v, T61, T5X, T3C, T5W, T62, T5p; + { + E T5K, T5L, T5F, T5I, T2I, T3B; + T5F = FNMS(KP923879532, T5E, T5B); + T6p = FMA(KP923879532, T5E, T5B); + T6d = T5G + T5H; + T5I = T5G - T5H; + cr[WS(rs, 20)] = FNMS(Tkw, Tkx, Tkv); + T5o = FNMS(KP820678790, T5n, T5a); + T5K = FMA(KP820678790, T5a, T5n); + T5L = FNMS(KP820678790, T4h, T4u); + T4v = FMA(KP820678790, T4u, T4h); + T5J = FMA(KP980785280, T5I, T5F); + T61 = FNMS(KP980785280, T5I, T5F); + T2I = FNMS(KP923879532, T2H, T2k); + T6c = FMA(KP923879532, T2H, T2k); + T6q = T3A + T39; + T3B = T39 - T3A; + T5X = T5K + T5L; + T5M = T5K - T5L; + T3C = FMA(KP980785280, T3B, T2I); + T5W = FNMS(KP980785280, T3B, T2I); + } + T62 = T5o + T4v; + T5p = T4v - T5o; + T66 = FMA(KP773010453, T5X, T5W); + T5Y = FNMS(KP773010453, T5X, T5W); + T69 = FMA(KP773010453, T62, T61); + T63 = FNMS(KP773010453, T62, T61); + T5Q = FMA(KP773010453, T5p, T3C); + T5q = FNMS(KP773010453, T5p, T3C); + } + { + E T68, T67, T5V, T60, T5N, T5s, T23; + T5V = W[48]; + T60 = W[49]; + { + E T65, T64, T5Z, T6a; + T65 = W[112]; + T68 = W[113]; + T64 = T5V * T63; + T5Z = T5V * T5Y; + T6a = T65 * T69; + T67 = T65 * T66; + ci[WS(rs, 25)] = FMA(T60, T5Y, T64); + cr[WS(rs, 25)] = FNMS(T60, T63, T5Z); + ci[WS(rs, 57)] = FMA(T68, T66, T6a); + } + cr[WS(rs, 57)] = FNMS(T68, T69, T67); + T5T = FMA(KP773010453, T5M, T5J); + T5N = FNMS(KP773010453, T5M, T5J); + T5s = W[81]; + T23 = W[80]; + { + E T5P, T5U, T5O, T5r; + T5S = W[17]; + T5O = T5s * T5q; + T5r = T23 * T5q; + T5P = W[16]; + T5U = T5S * T5Q; + ci[WS(rs, 41)] = FMA(T23, T5N, T5O); + cr[WS(rs, 41)] = FNMS(T5s, T5N, T5r); + T5R = T5P * T5Q; + ci[WS(rs, 9)] = FMA(T5P, T5T, T5U); + } + } + } + { + E Th3, TgR, TgQ, Th4, TgN, TgM, TgL; + { + E TgG, TgF, Tge, Tgu, TgK, TgC, Tgx, Tgr; + { + E Tgp, Tgo, Tgd, Tgn, TfY, TgA, TgB, Tgq; + { + E Tgj, Tgm, Tg5, Tgc, TfQ, TfX; + Tg5 = FMA(KP668178637, Tg4, Tg1); + Tgp = FNMS(KP668178637, Tg1, Tg4); + Tgo = FMA(KP668178637, Tg8, Tgb); + Tgc = FNMS(KP668178637, Tgb, Tg8); + cr[WS(rs, 9)] = FNMS(T5S, T5T, T5R); + Th3 = FMA(KP707106781, Tgi, Tgh); + Tgj = FNMS(KP707106781, Tgi, Tgh); + Tgm = Tgk - Tgl; + TgR = Tgk + Tgl; + TgG = Tgc + Tg5; + Tgd = Tg5 - Tgc; + TfQ = FNMS(KP707106781, TfP, TfO); + TgQ = FMA(KP707106781, TfP, TfO); + Th4 = TfW + TfT; + TfX = TfT - TfW; + Tgn = FMA(KP923879532, Tgm, Tgj); + TgF = FNMS(KP923879532, Tgm, Tgj); + TfY = FMA(KP923879532, TfX, TfQ); + TgA = FNMS(KP923879532, TfX, TfQ); + } + TgB = Tgo + Tgp; + Tgq = Tgo - Tgp; + Tge = FNMS(KP831469612, Tgd, TfY); + Tgu = FMA(KP831469612, Tgd, TfY); + TgK = FMA(KP831469612, TgB, TgA); + TgC = FNMS(KP831469612, TgB, TgA); + Tgx = FMA(KP831469612, Tgq, Tgn); + Tgr = FNMS(KP831469612, Tgq, Tgn); + } + { + E Tgw, Tgv, TfN, Tgg, TgH, TgE, Tgz; + TfN = W[82]; + Tgg = W[83]; + { + E Tgt, Tgs, Tgf, Tgy; + Tgt = W[18]; + Tgw = W[19]; + Tgs = TfN * Tgr; + Tgf = TfN * Tge; + Tgy = Tgt * Tgx; + Tgv = Tgt * Tgu; + ci[WS(rs, 42)] = FMA(Tgg, Tge, Tgs); + cr[WS(rs, 42)] = FNMS(Tgg, Tgr, Tgf); + ci[WS(rs, 10)] = FMA(Tgw, Tgu, Tgy); + } + cr[WS(rs, 10)] = FNMS(Tgw, Tgx, Tgv); + TgN = FMA(KP831469612, TgG, TgF); + TgH = FNMS(KP831469612, TgG, TgF); + TgE = W[51]; + Tgz = W[50]; + { + E TgJ, TgO, TgI, TgD; + TgM = W[115]; + TgI = TgE * TgC; + TgD = Tgz * TgC; + TgJ = W[114]; + TgO = TgM * TgK; + ci[WS(rs, 26)] = FMA(Tgz, TgH, TgI); + cr[WS(rs, 26)] = FNMS(TgE, TgH, TgD); + TgL = TgJ * TgK; + ci[WS(rs, 58)] = FMA(TgJ, TgN, TgO); + } + } + } + { + E Th5, Th8, Ths, Thk, Thv, Thp, Thc, Th0; + { + E TgV, TgY, Thn, Thj, TgS, Thi, Th6, Th7, Tho, TgZ; + cr[WS(rs, 58)] = FNMS(TgM, TgN, TgL); + TgV = FNMS(KP198912367, TgU, TgT); + Th6 = FMA(KP198912367, TgT, TgU); + Th7 = FNMS(KP198912367, TgW, TgX); + TgY = FMA(KP198912367, TgX, TgW); + Th5 = FMA(KP923879532, Th4, Th3); + Thn = FNMS(KP923879532, Th4, Th3); + Thj = Th7 - Th6; + Th8 = Th6 + Th7; + TgS = FMA(KP923879532, TgR, TgQ); + Thi = FNMS(KP923879532, TgR, TgQ); + Tho = TgV - TgY; + TgZ = TgV + TgY; + Ths = FMA(KP980785280, Thj, Thi); + Thk = FNMS(KP980785280, Thj, Thi); + Thv = FMA(KP980785280, Tho, Thn); + Thp = FNMS(KP980785280, Tho, Thn); + Thc = FMA(KP980785280, TgZ, TgS); + Th0 = FNMS(KP980785280, TgZ, TgS); + } + { + E Thu, Tht, Thh, Thm, Th9, Th2, TgP; + Thh = W[98]; + Thm = W[99]; + { + E Thr, Thq, Thl, Thw; + Thr = W[34]; + Thu = W[35]; + Thq = Thh * Thp; + Thl = Thh * Thk; + Thw = Thr * Thv; + Tht = Thr * Ths; + ci[WS(rs, 50)] = FMA(Thm, Thk, Thq); + cr[WS(rs, 50)] = FNMS(Thm, Thp, Thl); + ci[WS(rs, 18)] = FMA(Thu, Ths, Thw); + } + cr[WS(rs, 18)] = FNMS(Thu, Thv, Tht); + Thf = FMA(KP980785280, Th8, Th5); + Th9 = FNMS(KP980785280, Th8, Th5); + Th2 = W[67]; + TgP = W[66]; + { + E Thb, Thg, Tha, Th1; + The = W[3]; + Tha = Th2 * Th0; + Th1 = TgP * Th0; + Thb = W[2]; + Thg = The * Thc; + ci[WS(rs, 34)] = FMA(TgP, Th9, Tha); + cr[WS(rs, 34)] = FNMS(Th2, Th9, Th1); + Thd = Thb * Thc; + ci[WS(rs, 2)] = FMA(Thb, Thf, Thg); + } + } + } + } + } + } + { + E Tcl, Tc9, Tc8, Tcm, T9R, T93, T8O, T9U, Tez, Tdt, Td6, TeC, Tfv, Tfu, Tft; + E T8B, T8A, T8z; + { + E TbP, TbO, TbN, T6B, T6A, T6z, TaN, TaM, TaL; + { + E T6r, T6u, T6O, T6G, T6R, T6L, T6y, T6m; + { + E T6k, T6h, T6J, T6F, T6e, T6E, T6s, T6t, T6K, T6l; + cr[WS(rs, 2)] = FNMS(The, Thf, Thd); + T6k = FMA(KP098491403, T6j, T6i); + T6s = FNMS(KP098491403, T6i, T6j); + T6t = FMA(KP098491403, T6f, T6g); + T6h = FNMS(KP098491403, T6g, T6f); + T6r = FNMS(KP980785280, T6q, T6p); + T6J = FMA(KP980785280, T6q, T6p); + T6F = T6s + T6t; + T6u = T6s - T6t; + T6e = FNMS(KP980785280, T6d, T6c); + T6E = FMA(KP980785280, T6d, T6c); + T6K = T6k + T6h; + T6l = T6h - T6k; + T6O = FMA(KP995184726, T6F, T6E); + T6G = FNMS(KP995184726, T6F, T6E); + T6R = FMA(KP995184726, T6K, T6J); + T6L = FNMS(KP995184726, T6K, T6J); + T6y = FMA(KP995184726, T6l, T6e); + T6m = FNMS(KP995184726, T6l, T6e); + } + { + E T6Q, T6P, T6D, T6I, T6v, T6o, T6b; + T6D = W[64]; + T6I = W[65]; + { + E T6N, T6M, T6H, T6S; + T6N = W[0]; + T6Q = W[1]; + T6M = T6D * T6L; + T6H = T6D * T6G; + T6S = T6N * T6R; + T6P = T6N * T6O; + ci[WS(rs, 33)] = FMA(T6I, T6G, T6M); + cr[WS(rs, 33)] = FNMS(T6I, T6L, T6H); + ci[WS(rs, 1)] = FMA(T6Q, T6O, T6S); + } + cr[WS(rs, 1)] = FNMS(T6Q, T6R, T6P); + T6B = FMA(KP995184726, T6u, T6r); + T6v = FNMS(KP995184726, T6u, T6r); + T6o = W[97]; + T6b = W[96]; + { + E T6x, T6C, T6w, T6n; + T6A = W[33]; + T6w = T6o * T6m; + T6n = T6b * T6m; + T6x = W[32]; + T6C = T6A * T6y; + ci[WS(rs, 49)] = FMA(T6b, T6v, T6w); + cr[WS(rs, 49)] = FNMS(T6o, T6v, T6n); + T6z = T6x * T6y; + ci[WS(rs, 17)] = FMA(T6x, T6B, T6C); + } + } + } + { + E TbF, TbI, Tc2, TbU, Tc5, TbZ, TbM, Tbw; + { + E Tbn, Tbu, TbX, TbT, Tbg, TbS, TbY, Tbv; + { + E TbG, TbH, TbB, TbE, Tb8, Tbf; + TbB = FMA(KP923879532, TbA, Tbz); + Tcl = FNMS(KP923879532, TbA, Tbz); + Tc9 = TbC + TbD; + TbE = TbC - TbD; + cr[WS(rs, 17)] = FNMS(T6A, T6B, T6z); + Tbn = FNMS(KP820678790, Tbm, Tbj); + TbG = FMA(KP820678790, Tbj, Tbm); + TbH = FMA(KP820678790, Tbq, Tbt); + Tbu = FNMS(KP820678790, Tbt, Tbq); + TbF = FMA(KP980785280, TbE, TbB); + TbX = FNMS(KP980785280, TbE, TbB); + Tb8 = FNMS(KP923879532, Tb7, Tb6); + Tc8 = FMA(KP923879532, Tb7, Tb6); + Tcm = Tbe - Tbb; + Tbf = Tbb + Tbe; + TbT = TbG + TbH; + TbI = TbG - TbH; + Tbg = FNMS(KP980785280, Tbf, Tb8); + TbS = FMA(KP980785280, Tbf, Tb8); + } + TbY = Tbn - Tbu; + Tbv = Tbn + Tbu; + Tc2 = FMA(KP773010453, TbT, TbS); + TbU = FNMS(KP773010453, TbT, TbS); + Tc5 = FNMS(KP773010453, TbY, TbX); + TbZ = FMA(KP773010453, TbY, TbX); + TbM = FMA(KP773010453, Tbv, Tbg); + Tbw = FNMS(KP773010453, Tbv, Tbg); + } + { + E Tc4, Tc3, TbR, TbW, TbJ, Tby, Tb5; + TbR = W[44]; + TbW = W[45]; + { + E Tc1, Tc0, TbV, Tc6; + Tc1 = W[108]; + Tc4 = W[109]; + Tc0 = TbR * TbZ; + TbV = TbR * TbU; + Tc6 = Tc1 * Tc5; + Tc3 = Tc1 * Tc2; + ci[WS(rs, 23)] = FMA(TbW, TbU, Tc0); + cr[WS(rs, 23)] = FNMS(TbW, TbZ, TbV); + ci[WS(rs, 55)] = FMA(Tc4, Tc2, Tc6); + } + cr[WS(rs, 55)] = FNMS(Tc4, Tc5, Tc3); + TbP = FMA(KP773010453, TbI, TbF); + TbJ = FNMS(KP773010453, TbI, TbF); + Tby = W[77]; + Tb5 = W[76]; + { + E TbL, TbQ, TbK, Tbx; + TbO = W[13]; + TbK = Tby * Tbw; + Tbx = Tb5 * Tbw; + TbL = W[12]; + TbQ = TbO * TbM; + ci[WS(rs, 39)] = FMA(Tb5, TbJ, TbK); + cr[WS(rs, 39)] = FNMS(Tby, TbJ, Tbx); + TbN = TbL * TbM; + ci[WS(rs, 7)] = FMA(TbL, TbP, TbQ); + } + } + } + { + E TaD, TaG, Tb0, TaS, Tb3, TaX, TaK, Tay; + { + E Tat, Taw, TaV, TaR, Taq, TaQ, TaW, Tax; + { + E TaE, TaF, TaB, TaC, Tao, Tap; + TaB = FMA(KP923879532, T9Q, T9N); + T9R = FNMS(KP923879532, T9Q, T9N); + T93 = T8V + T92; + TaC = T8V - T92; + cr[WS(rs, 7)] = FNMS(TbO, TbP, TbN); + Tat = FNMS(KP303346683, Tas, Tar); + TaE = FMA(KP303346683, Tar, Tas); + TaF = FMA(KP303346683, Tau, Tav); + Taw = FNMS(KP303346683, Tav, Tau); + TaD = FMA(KP831469612, TaC, TaB); + TaV = FNMS(KP831469612, TaC, TaB); + Tao = FNMS(KP923879532, T8N, T8G); + T8O = FMA(KP923879532, T8N, T8G); + T9U = T9S - T9T; + Tap = T9S + T9T; + TaR = TaE + TaF; + TaG = TaE - TaF; + Taq = FMA(KP831469612, Tap, Tao); + TaQ = FNMS(KP831469612, Tap, Tao); + } + TaW = Tat - Taw; + Tax = Tat + Taw; + Tb0 = FMA(KP956940335, TaR, TaQ); + TaS = FNMS(KP956940335, TaR, TaQ); + Tb3 = FNMS(KP956940335, TaW, TaV); + TaX = FMA(KP956940335, TaW, TaV); + TaK = FMA(KP956940335, Tax, Taq); + Tay = FNMS(KP956940335, Tax, Taq); + } + { + E Tb2, Tb1, TaP, TaU, TaH, TaA, Tan; + TaP = W[36]; + TaU = W[37]; + { + E TaZ, TaY, TaT, Tb4; + TaZ = W[100]; + Tb2 = W[101]; + TaY = TaP * TaX; + TaT = TaP * TaS; + Tb4 = TaZ * Tb3; + Tb1 = TaZ * Tb0; + ci[WS(rs, 19)] = FMA(TaU, TaS, TaY); + cr[WS(rs, 19)] = FNMS(TaU, TaX, TaT); + ci[WS(rs, 51)] = FMA(Tb2, Tb0, Tb4); + } + cr[WS(rs, 51)] = FNMS(Tb2, Tb3, Tb1); + TaN = FMA(KP956940335, TaG, TaD); + TaH = FNMS(KP956940335, TaG, TaD); + TaA = W[69]; + Tan = W[68]; + { + E TaJ, TaO, TaI, Taz; + TaM = W[5]; + TaI = TaA * Tay; + Taz = Tan * Tay; + TaJ = W[4]; + TaO = TaM * TaK; + ci[WS(rs, 35)] = FMA(Tan, TaH, TaI); + cr[WS(rs, 35)] = FNMS(TaA, TaH, Taz); + TaL = TaJ * TaK; + ci[WS(rs, 3)] = FMA(TaJ, TaN, TaO); + } + } + } + { + E Tfl, Tfo, TfI, TfA, TfL, TfF, Tfs, Tfg; + { + E Tfe, Tfb, TfD, Tfz, Tf8, Tfy, TfE, Tff; + { + E Tfm, Tfn, Tfj, Tfk, Tf6, Tf7; + Tfj = FNMS(KP707106781, Tey, Tev); + Tez = FMA(KP707106781, Tey, Tev); + Tdt = Tdh - Tds; + Tfk = Tds + Tdh; + cr[WS(rs, 3)] = FNMS(TaM, TaN, TaL); + Tfe = FNMS(KP198912367, Tfd, Tfc); + Tfm = FMA(KP198912367, Tfc, Tfd); + Tfn = FNMS(KP198912367, Tf9, Tfa); + Tfb = FMA(KP198912367, Tfa, Tf9); + Tfl = FNMS(KP923879532, Tfk, Tfj); + TfD = FMA(KP923879532, Tfk, Tfj); + Tf6 = FNMS(KP707106781, Td5, TcU); + Td6 = FMA(KP707106781, Td5, TcU); + TeC = TeA - TeB; + Tf7 = TeA + TeB; + Tfz = Tfm + Tfn; + Tfo = Tfm - Tfn; + Tf8 = FNMS(KP923879532, Tf7, Tf6); + Tfy = FMA(KP923879532, Tf7, Tf6); + } + TfE = Tfe + Tfb; + Tff = Tfb - Tfe; + TfI = FMA(KP980785280, Tfz, Tfy); + TfA = FNMS(KP980785280, Tfz, Tfy); + TfL = FMA(KP980785280, TfE, TfD); + TfF = FNMS(KP980785280, TfE, TfD); + Tfs = FMA(KP980785280, Tff, Tf8); + Tfg = FNMS(KP980785280, Tff, Tf8); + } + { + E TfK, TfJ, Tfx, TfC, Tfp, Tfi, Tf5; + Tfx = W[58]; + TfC = W[59]; + { + E TfH, TfG, TfB, TfM; + TfH = W[122]; + TfK = W[123]; + TfG = Tfx * TfF; + TfB = Tfx * TfA; + TfM = TfH * TfL; + TfJ = TfH * TfI; + ci[WS(rs, 30)] = FMA(TfC, TfA, TfG); + cr[WS(rs, 30)] = FNMS(TfC, TfF, TfB); + ci[WS(rs, 62)] = FMA(TfK, TfI, TfM); + } + cr[WS(rs, 62)] = FNMS(TfK, TfL, TfJ); + Tfv = FMA(KP980785280, Tfo, Tfl); + Tfp = FNMS(KP980785280, Tfo, Tfl); + Tfi = W[91]; + Tf5 = W[90]; + { + E Tfr, Tfw, Tfq, Tfh; + Tfu = W[27]; + Tfq = Tfi * Tfg; + Tfh = Tf5 * Tfg; + Tfr = W[26]; + Tfw = Tfu * Tfs; + ci[WS(rs, 46)] = FMA(Tf5, Tfp, Tfq); + cr[WS(rs, 46)] = FNMS(Tfi, Tfp, Tfh); + Tft = Tfr * Tfs; + ci[WS(rs, 14)] = FMA(Tfr, Tfv, Tfw); + } + } + } + } + { + E T89, T7X, T7W, T8a, T7D, T7C, T7B; + { + E T7t, T7w, T7Q, T7I, T7T, T7N, T7A, T7k; + { + E T7b, T7i, T7L, T7H, T74, T7G, T7M, T7j; + { + E T7u, T7v, T7p, T7s, T6W, T73; + T7p = FMA(KP923879532, T7o, T7n); + T89 = FNMS(KP923879532, T7o, T7n); + T7X = T7q + T7r; + T7s = T7q - T7r; + cr[WS(rs, 14)] = FNMS(Tfu, Tfv, Tft); + T7b = FNMS(KP534511135, T7a, T77); + T7u = FMA(KP534511135, T77, T7a); + T7v = FNMS(KP534511135, T7e, T7h); + T7i = FMA(KP534511135, T7h, T7e); + T7t = FMA(KP831469612, T7s, T7p); + T7L = FNMS(KP831469612, T7s, T7p); + T6W = FMA(KP923879532, T6V, T6U); + T7W = FNMS(KP923879532, T6V, T6U); + T8a = T72 + T6Z; + T73 = T6Z - T72; + T7H = T7v - T7u; + T7w = T7u + T7v; + T74 = FMA(KP831469612, T73, T6W); + T7G = FNMS(KP831469612, T73, T6W); + } + T7M = T7b - T7i; + T7j = T7b + T7i; + T7Q = FMA(KP881921264, T7H, T7G); + T7I = FNMS(KP881921264, T7H, T7G); + T7T = FMA(KP881921264, T7M, T7L); + T7N = FNMS(KP881921264, T7M, T7L); + T7A = FMA(KP881921264, T7j, T74); + T7k = FNMS(KP881921264, T7j, T74); + } + { + E T7S, T7R, T7F, T7K, T7x, T7m, T6T; + T7F = W[104]; + T7K = W[105]; + { + E T7P, T7O, T7J, T7U; + T7P = W[40]; + T7S = W[41]; + T7O = T7F * T7N; + T7J = T7F * T7I; + T7U = T7P * T7T; + T7R = T7P * T7Q; + ci[WS(rs, 53)] = FMA(T7K, T7I, T7O); + cr[WS(rs, 53)] = FNMS(T7K, T7N, T7J); + ci[WS(rs, 21)] = FMA(T7S, T7Q, T7U); + } + cr[WS(rs, 21)] = FNMS(T7S, T7T, T7R); + T7D = FMA(KP881921264, T7w, T7t); + T7x = FNMS(KP881921264, T7w, T7t); + T7m = W[73]; + T6T = W[72]; + { + E T7z, T7E, T7y, T7l; + T7C = W[9]; + T7y = T7m * T7k; + T7l = T6T * T7k; + T7z = W[8]; + T7E = T7C * T7A; + ci[WS(rs, 37)] = FMA(T6T, T7x, T7y); + cr[WS(rs, 37)] = FNMS(T7m, T7x, T7l); + T7B = T7z * T7A; + ci[WS(rs, 5)] = FMA(T7z, T7D, T7E); + } + } + } + { + E T8u, T8t, T86, T8i, T8y, T8q, T8l, T8f; + { + E T8d, T8c, T85, T8b, T7Y, T8o, T81, T84, T8p, T8e; + T81 = FMA(KP303346683, T80, T7Z); + T8d = FNMS(KP303346683, T7Z, T80); + T8c = FMA(KP303346683, T82, T83); + T84 = FNMS(KP303346683, T83, T82); + cr[WS(rs, 5)] = FNMS(T7C, T7D, T7B); + T8u = T84 + T81; + T85 = T81 - T84; + T8b = FNMS(KP831469612, T8a, T89); + T8t = FMA(KP831469612, T8a, T89); + T7Y = FNMS(KP831469612, T7X, T7W); + T8o = FMA(KP831469612, T7X, T7W); + T8p = T8c + T8d; + T8e = T8c - T8d; + T86 = FNMS(KP956940335, T85, T7Y); + T8i = FMA(KP956940335, T85, T7Y); + T8y = FMA(KP956940335, T8p, T8o); + T8q = FNMS(KP956940335, T8p, T8o); + T8l = FMA(KP956940335, T8e, T8b); + T8f = FNMS(KP956940335, T8e, T8b); + } + { + E T8k, T8j, T7V, T88, T8v, T8s, T8n; + T7V = W[88]; + T88 = W[89]; + { + E T8h, T8g, T87, T8m; + T8h = W[24]; + T8k = W[25]; + T8g = T7V * T8f; + T87 = T7V * T86; + T8m = T8h * T8l; + T8j = T8h * T8i; + ci[WS(rs, 45)] = FMA(T88, T86, T8g); + cr[WS(rs, 45)] = FNMS(T88, T8f, T87); + ci[WS(rs, 13)] = FMA(T8k, T8i, T8m); + } + cr[WS(rs, 13)] = FNMS(T8k, T8l, T8j); + T8B = FMA(KP956940335, T8u, T8t); + T8v = FNMS(KP956940335, T8u, T8t); + T8s = W[57]; + T8n = W[56]; + { + E T8x, T8C, T8w, T8r; + T8A = W[121]; + T8w = T8s * T8q; + T8r = T8n * T8q; + T8x = W[120]; + T8C = T8A * T8y; + ci[WS(rs, 29)] = FMA(T8n, T8v, T8w); + cr[WS(rs, 29)] = FNMS(T8s, T8v, T8r); + T8z = T8x * T8y; + ci[WS(rs, 61)] = FMA(T8x, T8B, T8C); + } + } + } + } + { + E Ta5, Ta4, Ta3, TeN, TeM, TeL; + { + E T9V, T9Y, Tai, Taa, Tal, Taf, Ta2, T9I; + { + E T9n, T9G, Tad, Ta9, T94, Ta8, T9W, T9X, Tae, T9H; + cr[WS(rs, 61)] = FNMS(T8A, T8B, T8z); + T9n = FNMS(KP534511135, T9m, T9f); + T9W = FMA(KP534511135, T9f, T9m); + T9X = FMA(KP534511135, T9y, T9F); + T9G = FNMS(KP534511135, T9F, T9y); + T9V = FMA(KP831469612, T9U, T9R); + Tad = FNMS(KP831469612, T9U, T9R); + Ta9 = T9W + T9X; + T9Y = T9W - T9X; + T94 = FNMS(KP831469612, T93, T8O); + Ta8 = FMA(KP831469612, T93, T8O); + Tae = T9G - T9n; + T9H = T9n + T9G; + Tai = FMA(KP881921264, Ta9, Ta8); + Taa = FNMS(KP881921264, Ta9, Ta8); + Tal = FNMS(KP881921264, Tae, Tad); + Taf = FMA(KP881921264, Tae, Tad); + Ta2 = FNMS(KP881921264, T9H, T94); + T9I = FMA(KP881921264, T9H, T94); + } + { + E Tak, Taj, Ta7, Tac, T9Z, T9K, T8D; + Ta7 = W[52]; + Tac = W[53]; + { + E Tah, Tag, Tab, Tam; + Tah = W[116]; + Tak = W[117]; + Tag = Ta7 * Taf; + Tab = Ta7 * Taa; + Tam = Tah * Tal; + Taj = Tah * Tai; + ci[WS(rs, 27)] = FMA(Tac, Taa, Tag); + cr[WS(rs, 27)] = FNMS(Tac, Taf, Tab); + ci[WS(rs, 59)] = FMA(Tak, Tai, Tam); + } + cr[WS(rs, 59)] = FNMS(Tak, Tal, Taj); + Ta5 = FMA(KP881921264, T9Y, T9V); + T9Z = FNMS(KP881921264, T9Y, T9V); + T9K = W[85]; + T8D = W[84]; + { + E Ta1, Ta6, Ta0, T9J; + Ta4 = W[21]; + Ta0 = T9K * T9I; + T9J = T8D * T9I; + Ta1 = W[20]; + Ta6 = Ta4 * Ta2; + ci[WS(rs, 43)] = FMA(T8D, T9Z, Ta0); + cr[WS(rs, 43)] = FNMS(T9K, T9Z, T9J); + Ta3 = Ta1 * Ta2; + ci[WS(rs, 11)] = FMA(Ta1, Ta5, Ta6); + } + } + } + { + E TeD, TeG, Tf0, TeS, Tf3, TeX, TeK, Teo; + { + E Tem, TdV, TeV, TeR, Tdu, TeQ, TeE, TeF, TeW, Ten; + cr[WS(rs, 11)] = FNMS(Ta4, Ta5, Ta3); + Tem = FMA(KP668178637, Tel, Tec); + TeE = FNMS(KP668178637, Tec, Tel); + TeF = FMA(KP668178637, TdL, TdU); + TdV = FNMS(KP668178637, TdU, TdL); + TeD = FNMS(KP923879532, TeC, Tez); + TeV = FMA(KP923879532, TeC, Tez); + TeR = TeE + TeF; + TeG = TeE - TeF; + Tdu = FNMS(KP923879532, Tdt, Td6); + TeQ = FMA(KP923879532, Tdt, Td6); + TeW = Tem + TdV; + Ten = TdV - Tem; + Tf0 = FMA(KP831469612, TeR, TeQ); + TeS = FNMS(KP831469612, TeR, TeQ); + Tf3 = FMA(KP831469612, TeW, TeV); + TeX = FNMS(KP831469612, TeW, TeV); + TeK = FMA(KP831469612, Ten, Tdu); + Teo = FNMS(KP831469612, Ten, Tdu); + } + { + E Tf2, Tf1, TeP, TeU, TeH, Teq, TcP; + TeP = W[74]; + TeU = W[75]; + { + E TeZ, TeY, TeT, Tf4; + TeZ = W[10]; + Tf2 = W[11]; + TeY = TeP * TeX; + TeT = TeP * TeS; + Tf4 = TeZ * Tf3; + Tf1 = TeZ * Tf0; + ci[WS(rs, 38)] = FMA(TeU, TeS, TeY); + cr[WS(rs, 38)] = FNMS(TeU, TeX, TeT); + ci[WS(rs, 6)] = FMA(Tf2, Tf0, Tf4); + } + cr[WS(rs, 6)] = FNMS(Tf2, Tf3, Tf1); + TeN = FMA(KP831469612, TeG, TeD); + TeH = FNMS(KP831469612, TeG, TeD); + Teq = W[107]; + TcP = W[106]; + { + E TeJ, TeO, TeI, Tep; + TeM = W[43]; + TeI = Teq * Teo; + Tep = TcP * Teo; + TeJ = W[42]; + TeO = TeM * TeK; + ci[WS(rs, 54)] = FMA(TcP, TeH, TeI); + cr[WS(rs, 54)] = FNMS(Teq, TeH, Tep); + TeL = TeJ * TeK; + ci[WS(rs, 22)] = FMA(TeJ, TeN, TeO); + } + } + } + { + E Tcn, Tcq, TcK, TcC, TcN, TcH, Tcu, Tci; + { + E Tcd, Tcg, TcF, TcB, Tca, TcA, Tco, Tcp, TcG, Tch; + cr[WS(rs, 22)] = FNMS(TeM, TeN, TeL); + Tcd = FNMS(KP098491403, Tcc, Tcb); + Tco = FMA(KP098491403, Tcb, Tcc); + Tcp = FMA(KP098491403, Tce, Tcf); + Tcg = FNMS(KP098491403, Tcf, Tce); + Tcn = FMA(KP980785280, Tcm, Tcl); + TcF = FNMS(KP980785280, Tcm, Tcl); + TcB = Tco + Tcp; + Tcq = Tco - Tcp; + Tca = FNMS(KP980785280, Tc9, Tc8); + TcA = FMA(KP980785280, Tc9, Tc8); + TcG = Tcg - Tcd; + Tch = Tcd + Tcg; + TcK = FMA(KP995184726, TcB, TcA); + TcC = FNMS(KP995184726, TcB, TcA); + TcN = FNMS(KP995184726, TcG, TcF); + TcH = FMA(KP995184726, TcG, TcF); + Tcu = FNMS(KP995184726, Tch, Tca); + Tci = FMA(KP995184726, Tch, Tca); + } + { + E TcM, TcL, Tcz, TcE, Tcr, Tck, Tc7; + Tcz = W[60]; + TcE = W[61]; + { + E TcJ, TcI, TcD, TcO; + TcJ = W[124]; + TcM = W[125]; + TcI = Tcz * TcH; + TcD = Tcz * TcC; + TcO = TcJ * TcN; + TcL = TcJ * TcK; + ci[WS(rs, 31)] = FMA(TcE, TcC, TcI); + cr[WS(rs, 31)] = FNMS(TcE, TcH, TcD); + ci[WS(rs, 63)] = FMA(TcM, TcK, TcO); + } + cr[WS(rs, 63)] = FNMS(TcM, TcN, TcL); + Tcx = FMA(KP995184726, Tcq, Tcn); + Tcr = FNMS(KP995184726, Tcq, Tcn); + Tck = W[93]; + Tc7 = W[92]; + { + E Tct, Tcy, Tcs, Tcj; + Tcw = W[29]; + Tcs = Tck * Tci; + Tcj = Tc7 * Tci; + Tct = W[28]; + Tcy = Tcw * Tcu; + ci[WS(rs, 47)] = FMA(Tc7, Tcr, Tcs); + cr[WS(rs, 47)] = FNMS(Tck, Tcr, Tcj); + Tcv = Tct * Tcu; + ci[WS(rs, 15)] = FMA(Tct, Tcx, Tcy); + } + } + } + } + } + } + } + cr[WS(rs, 15)] = FNMS(Tcw, Tcx, Tcv); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {520, 126, 518, 0} }; + +void X(codelet_hb_64) (planner *p) { + X(khc2hc_register) (p, hb_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 196 stack variables, 15 constants, and 256 memory accesses + */ +#include "hb.h" + +static void hb_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tf, T8C, Tfa, Thk, Tgg, ThM, T2c, T5O, T4K, T6g, Tag, TdE, TcA, Te6, T7P; + E T94, TK, T7o, T38, T4P, Tfv, Thn, T5W, T6j, Tb0, TdK, Tfs, Tho, T8K, T97; + E Tb7, TdL, TZ, T7l, T2P, T4Q, Tfo, Thq, T5T, T6k, TaH, TdH, Tfl, Thr, T8H; + E T98, TaO, TdI, Tu, T95, Tfh, ThN, Tgj, Thl, T2v, T6h, T4N, T5P, Tav, Te7; + E TcD, TdF, T7S, T8D, T1L, T20, T7A, T7D, T7G, T7H, T40, T62, Tg1, Thv, Tg8; + E Thz, Tg5, Thw, T4t, T5Z, T4j, T60, T4w, T63, TbY, TdS, Tcd, TdQ, TfU, Thy; + E T8P, T9z, T8S, T9A, Tcl, TdP, Tco, TdT, T1g, T1v, T7r, T7u, T7x, T7y, T3j; + E T69, TfI, ThD, TfP, ThG, TfM, ThC, T3M, T66, T3C, T67, T3P, T6a, Tbl, TdZ; + E TbA, TdX, TfB, ThF, T8W, T9C, T8Z, T9D, TbI, TdW, TbL, Te0; + { + E T3, Ta6, T6, Tcu, T4I, Ta7, T4F, Tcv, Td, Tcy, T27, Tae, Ta, Tcx, T2a; + E Tab; + { + E T1, T2, T4D, T4E; + T1 = cr[0]; + T2 = ci[WS(rs, 31)]; + T3 = T1 + T2; + Ta6 = T1 - T2; + { + E T4, T5, T4G, T4H; + T4 = cr[WS(rs, 16)]; + T5 = ci[WS(rs, 15)]; + T6 = T4 + T5; + Tcu = T4 - T5; + T4G = ci[WS(rs, 47)]; + T4H = cr[WS(rs, 48)]; + T4I = T4G - T4H; + Ta7 = T4G + T4H; + } + T4D = ci[WS(rs, 63)]; + T4E = cr[WS(rs, 32)]; + T4F = T4D - T4E; + Tcv = T4D + T4E; + { + E Tb, Tc, Tac, T25, T26, Tad; + Tb = ci[WS(rs, 7)]; + Tc = cr[WS(rs, 24)]; + Tac = Tb - Tc; + T25 = ci[WS(rs, 39)]; + T26 = cr[WS(rs, 56)]; + Tad = T25 + T26; + Td = Tb + Tc; + Tcy = Tac + Tad; + T27 = T25 - T26; + Tae = Tac - Tad; + } + { + E T8, T9, Ta9, T28, T29, Taa; + T8 = cr[WS(rs, 8)]; + T9 = ci[WS(rs, 23)]; + Ta9 = T8 - T9; + T28 = ci[WS(rs, 55)]; + T29 = cr[WS(rs, 40)]; + Taa = T28 + T29; + Ta = T8 + T9; + Tcx = Ta9 + Taa; + T2a = T28 - T29; + Tab = Ta9 - Taa; + } + } + { + E T7, Te, Tf8, Tf9; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T8C = T7 - Te; + Tf8 = Ta6 + Ta7; + Tf9 = KP707106781 * (Tcx + Tcy); + Tfa = Tf8 - Tf9; + Thk = Tf8 + Tf9; + } + { + E Tge, Tgf, T24, T2b; + Tge = Tcv - Tcu; + Tgf = KP707106781 * (Tab - Tae); + Tgg = Tge + Tgf; + ThM = Tge - Tgf; + T24 = T3 - T6; + T2b = T27 - T2a; + T2c = T24 + T2b; + T5O = T24 - T2b; + } + { + E T4C, T4J, Ta8, Taf; + T4C = Ta - Td; + T4J = T4F - T4I; + T4K = T4C + T4J; + T6g = T4J - T4C; + Ta8 = Ta6 - Ta7; + Taf = KP707106781 * (Tab + Tae); + Tag = Ta8 - Taf; + TdE = Ta8 + Taf; + } + { + E Tcw, Tcz, T7N, T7O; + Tcw = Tcu + Tcv; + Tcz = KP707106781 * (Tcx - Tcy); + TcA = Tcw - Tcz; + Te6 = Tcw + Tcz; + T7N = T4F + T4I; + T7O = T2a + T27; + T7P = T7N + T7O; + T94 = T7N - T7O; + } + } + { + E TC, Tb1, T2Z, TaQ, T2X, Tb2, T7m, TaR, TJ, Tb4, Tb5, T2Q, T36, TaV, TaY; + E T7n, Tfq, Tfr; + { + E Tw, Tx, Ty, Tz, TA, TB; + Tw = cr[WS(rs, 2)]; + Tx = ci[WS(rs, 29)]; + Ty = Tw + Tx; + Tz = cr[WS(rs, 18)]; + TA = ci[WS(rs, 13)]; + TB = Tz + TA; + TC = Ty + TB; + Tb1 = Tz - TA; + T2Z = Ty - TB; + TaQ = Tw - Tx; + } + { + E T2R, T2S, T2T, T2U, T2V, T2W; + T2R = ci[WS(rs, 61)]; + T2S = cr[WS(rs, 34)]; + T2T = T2R - T2S; + T2U = ci[WS(rs, 45)]; + T2V = cr[WS(rs, 50)]; + T2W = T2U - T2V; + T2X = T2T - T2W; + Tb2 = T2R + T2S; + T7m = T2T + T2W; + TaR = T2U + T2V; + } + { + E TF, TaT, T35, TaU, TI, TaW, T32, TaX; + { + E TD, TE, T33, T34; + TD = cr[WS(rs, 10)]; + TE = ci[WS(rs, 21)]; + TF = TD + TE; + TaT = TD - TE; + T33 = ci[WS(rs, 53)]; + T34 = cr[WS(rs, 42)]; + T35 = T33 - T34; + TaU = T33 + T34; + } + { + E TG, TH, T30, T31; + TG = ci[WS(rs, 5)]; + TH = cr[WS(rs, 26)]; + TI = TG + TH; + TaW = TG - TH; + T30 = ci[WS(rs, 37)]; + T31 = cr[WS(rs, 58)]; + T32 = T30 - T31; + TaX = T30 + T31; + } + TJ = TF + TI; + Tb4 = TaT + TaU; + Tb5 = TaW + TaX; + T2Q = TF - TI; + T36 = T32 - T35; + TaV = TaT - TaU; + TaY = TaW - TaX; + T7n = T35 + T32; + } + TK = TC + TJ; + T7o = T7m + T7n; + { + E T2Y, T37, Tft, Tfu; + T2Y = T2Q + T2X; + T37 = T2Z + T36; + T38 = FMA(KP923879532, T2Y, KP382683432 * T37); + T4P = FNMS(KP382683432, T2Y, KP923879532 * T37); + Tft = TaQ + TaR; + Tfu = KP707106781 * (Tb4 + Tb5); + Tfv = Tft - Tfu; + Thn = Tft + Tfu; + } + { + E T5U, T5V, TaS, TaZ; + T5U = T2X - T2Q; + T5V = T2Z - T36; + T5W = FMA(KP382683432, T5U, KP923879532 * T5V); + T6j = FNMS(KP923879532, T5U, KP382683432 * T5V); + TaS = TaQ - TaR; + TaZ = KP707106781 * (TaV + TaY); + Tb0 = TaS - TaZ; + TdK = TaS + TaZ; + } + Tfq = Tb2 - Tb1; + Tfr = KP707106781 * (TaV - TaY); + Tfs = Tfq + Tfr; + Tho = Tfq - Tfr; + { + E T8I, T8J, Tb3, Tb6; + T8I = TC - TJ; + T8J = T7m - T7n; + T8K = T8I + T8J; + T97 = T8I - T8J; + Tb3 = Tb1 + Tb2; + Tb6 = KP707106781 * (Tb4 - Tb5); + Tb7 = Tb3 - Tb6; + TdL = Tb3 + Tb6; + } + } + { + E TR, TaI, T2G, Tax, T2E, TaJ, T7j, Tay, TY, TaL, TaM, T2x, T2N, TaC, TaF; + E T7k, Tfj, Tfk; + { + E TL, TM, TN, TO, TP, TQ; + TL = ci[WS(rs, 1)]; + TM = cr[WS(rs, 30)]; + TN = TL + TM; + TO = cr[WS(rs, 14)]; + TP = ci[WS(rs, 17)]; + TQ = TO + TP; + TR = TN + TQ; + TaI = TL - TM; + T2G = TN - TQ; + Tax = TO - TP; + } + { + E T2y, T2z, T2A, T2B, T2C, T2D; + T2y = ci[WS(rs, 33)]; + T2z = cr[WS(rs, 62)]; + T2A = T2y - T2z; + T2B = ci[WS(rs, 49)]; + T2C = cr[WS(rs, 46)]; + T2D = T2B - T2C; + T2E = T2A - T2D; + TaJ = T2B + T2C; + T7j = T2A + T2D; + Tay = T2y + T2z; + } + { + E TU, TaA, T2M, TaB, TX, TaD, T2J, TaE; + { + E TS, TT, T2K, T2L; + TS = cr[WS(rs, 6)]; + TT = ci[WS(rs, 25)]; + TU = TS + TT; + TaA = TS - TT; + T2K = ci[WS(rs, 57)]; + T2L = cr[WS(rs, 38)]; + T2M = T2K - T2L; + TaB = T2K + T2L; + } + { + E TV, TW, T2H, T2I; + TV = ci[WS(rs, 9)]; + TW = cr[WS(rs, 22)]; + TX = TV + TW; + TaD = TV - TW; + T2H = ci[WS(rs, 41)]; + T2I = cr[WS(rs, 54)]; + T2J = T2H - T2I; + TaE = T2H + T2I; + } + TY = TU + TX; + TaL = TaA - TaB; + TaM = TaD - TaE; + T2x = TU - TX; + T2N = T2J - T2M; + TaC = TaA + TaB; + TaF = TaD + TaE; + T7k = T2M + T2J; + } + TZ = TR + TY; + T7l = T7j + T7k; + { + E T2F, T2O, Tfm, Tfn; + T2F = T2x + T2E; + T2O = T2G + T2N; + T2P = FNMS(KP382683432, T2O, KP923879532 * T2F); + T4Q = FMA(KP382683432, T2F, KP923879532 * T2O); + Tfm = TaI + TaJ; + Tfn = KP707106781 * (TaC + TaF); + Tfo = Tfm - Tfn; + Thq = Tfm + Tfn; + } + { + E T5R, T5S, Taz, TaG; + T5R = T2E - T2x; + T5S = T2G - T2N; + T5T = FNMS(KP923879532, T5S, KP382683432 * T5R); + T6k = FMA(KP923879532, T5R, KP382683432 * T5S); + Taz = Tax - Tay; + TaG = KP707106781 * (TaC - TaF); + TaH = Taz - TaG; + TdH = Taz + TaG; + } + Tfj = KP707106781 * (TaL - TaM); + Tfk = Tax + Tay; + Tfl = Tfj - Tfk; + Thr = Tfk + Tfj; + { + E T8F, T8G, TaK, TaN; + T8F = T7j - T7k; + T8G = TR - TY; + T8H = T8F - T8G; + T98 = T8G + T8F; + TaK = TaI - TaJ; + TaN = KP707106781 * (TaL + TaM); + TaO = TaK - TaN; + TdI = TaK + TaN; + } + } + { + E Ti, T2j, Tl, T2g, T2d, T2k, Tfc, Tfb, Tat, Taq, Tp, T2s, Ts, T2p, T2m; + E T2t, Tff, Tfe, Tam, Taj; + { + E Tar, Tas, Tao, Tap; + { + E Tg, Th, T2h, T2i; + Tg = cr[WS(rs, 4)]; + Th = ci[WS(rs, 27)]; + Ti = Tg + Th; + Tar = Tg - Th; + T2h = ci[WS(rs, 43)]; + T2i = cr[WS(rs, 52)]; + T2j = T2h - T2i; + Tas = T2h + T2i; + } + { + E Tj, Tk, T2e, T2f; + Tj = cr[WS(rs, 20)]; + Tk = ci[WS(rs, 11)]; + Tl = Tj + Tk; + Tao = Tj - Tk; + T2e = ci[WS(rs, 59)]; + T2f = cr[WS(rs, 36)]; + T2g = T2e - T2f; + Tap = T2e + T2f; + } + T2d = Ti - Tl; + T2k = T2g - T2j; + Tfc = Tap - Tao; + Tfb = Tar + Tas; + Tat = Tar - Tas; + Taq = Tao + Tap; + } + { + E Tak, Tal, Tah, Tai; + { + E Tn, To, T2q, T2r; + Tn = ci[WS(rs, 3)]; + To = cr[WS(rs, 28)]; + Tp = Tn + To; + Tak = Tn - To; + T2q = ci[WS(rs, 51)]; + T2r = cr[WS(rs, 44)]; + T2s = T2q - T2r; + Tal = T2q + T2r; + } + { + E Tq, Tr, T2n, T2o; + Tq = cr[WS(rs, 12)]; + Tr = ci[WS(rs, 19)]; + Ts = Tq + Tr; + Tah = Tq - Tr; + T2n = ci[WS(rs, 35)]; + T2o = cr[WS(rs, 60)]; + T2p = T2n - T2o; + Tai = T2n + T2o; + } + T2m = Tp - Ts; + T2t = T2p - T2s; + Tff = Tah + Tai; + Tfe = Tak + Tal; + Tam = Tak - Tal; + Taj = Tah - Tai; + } + { + E Tm, Tt, Tfd, Tfg; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T95 = Tm - Tt; + Tfd = FNMS(KP923879532, Tfc, KP382683432 * Tfb); + Tfg = FNMS(KP923879532, Tff, KP382683432 * Tfe); + Tfh = Tfd + Tfg; + ThN = Tfd - Tfg; + } + { + E Tgh, Tgi, T2l, T2u; + Tgh = FMA(KP382683432, Tfc, KP923879532 * Tfb); + Tgi = FMA(KP382683432, Tff, KP923879532 * Tfe); + Tgj = Tgh - Tgi; + Thl = Tgh + Tgi; + T2l = T2d - T2k; + T2u = T2m + T2t; + T2v = KP707106781 * (T2l + T2u); + T6h = KP707106781 * (T2l - T2u); + } + { + E T4L, T4M, Tan, Tau; + T4L = T2d + T2k; + T4M = T2t - T2m; + T4N = KP707106781 * (T4L + T4M); + T5P = KP707106781 * (T4M - T4L); + Tan = FNMS(KP382683432, Tam, KP923879532 * Taj); + Tau = FMA(KP923879532, Taq, KP382683432 * Tat); + Tav = Tan - Tau; + Te7 = Tau + Tan; + } + { + E TcB, TcC, T7Q, T7R; + TcB = FNMS(KP382683432, Taq, KP923879532 * Tat); + TcC = FMA(KP382683432, Taj, KP923879532 * Tam); + TcD = TcB - TcC; + TdF = TcB + TcC; + T7Q = T2g + T2j; + T7R = T2p + T2s; + T7S = T7Q + T7R; + T8D = T7R - T7Q; + } + } + { + E T1z, T1C, T1D, Tcf, TbO, T4o, T4r, T7B, Tcg, TbP, T1G, T3Y, T1J, T3V, T1K; + E T7C, Tcj, Tci, TbW, TbT, T1S, TfV, TfW, T41, T48, Tc8, Tcb, T7E, T1Z, TfY; + E TfZ, T4a, T4h, Tc1, Tc4, T7F; + { + E T1x, T1y, T1A, T1B; + T1x = ci[0]; + T1y = cr[WS(rs, 31)]; + T1z = T1x + T1y; + T1A = cr[WS(rs, 15)]; + T1B = ci[WS(rs, 16)]; + T1C = T1A + T1B; + T1D = T1z + T1C; + Tcf = T1A - T1B; + TbO = T1x - T1y; + } + { + E T4m, T4n, T4p, T4q; + T4m = ci[WS(rs, 32)]; + T4n = cr[WS(rs, 63)]; + T4o = T4m - T4n; + T4p = ci[WS(rs, 48)]; + T4q = cr[WS(rs, 47)]; + T4r = T4p - T4q; + T7B = T4o + T4r; + Tcg = T4m + T4n; + TbP = T4p + T4q; + } + { + E TbR, TbS, TbU, TbV; + { + E T1E, T1F, T3W, T3X; + T1E = cr[WS(rs, 7)]; + T1F = ci[WS(rs, 24)]; + T1G = T1E + T1F; + TbR = T1E - T1F; + T3W = ci[WS(rs, 56)]; + T3X = cr[WS(rs, 39)]; + T3Y = T3W - T3X; + TbS = T3W + T3X; + } + { + E T1H, T1I, T3T, T3U; + T1H = ci[WS(rs, 8)]; + T1I = cr[WS(rs, 23)]; + T1J = T1H + T1I; + TbU = T1H - T1I; + T3T = ci[WS(rs, 40)]; + T3U = cr[WS(rs, 55)]; + T3V = T3T - T3U; + TbV = T3T + T3U; + } + T1K = T1G + T1J; + T7C = T3Y + T3V; + Tcj = TbU + TbV; + Tci = TbR + TbS; + TbW = TbU - TbV; + TbT = TbR - TbS; + } + { + E T1O, Tc9, T47, Tca, T1R, Tc6, T44, Tc7; + { + E T1M, T1N, T45, T46; + T1M = cr[WS(rs, 3)]; + T1N = ci[WS(rs, 28)]; + T1O = T1M + T1N; + Tc9 = T1M - T1N; + T45 = ci[WS(rs, 44)]; + T46 = cr[WS(rs, 51)]; + T47 = T45 - T46; + Tca = T45 + T46; + } + { + E T1P, T1Q, T42, T43; + T1P = cr[WS(rs, 19)]; + T1Q = ci[WS(rs, 12)]; + T1R = T1P + T1Q; + Tc6 = T1P - T1Q; + T42 = ci[WS(rs, 60)]; + T43 = cr[WS(rs, 35)]; + T44 = T42 - T43; + Tc7 = T42 + T43; + } + T1S = T1O + T1R; + TfV = Tc9 + Tca; + TfW = Tc7 - Tc6; + T41 = T1O - T1R; + T48 = T44 - T47; + Tc8 = Tc6 + Tc7; + Tcb = Tc9 - Tca; + T7E = T44 + T47; + } + { + E T1V, Tc2, T4g, Tc3, T1Y, TbZ, T4d, Tc0; + { + E T1T, T1U, T4e, T4f; + T1T = ci[WS(rs, 4)]; + T1U = cr[WS(rs, 27)]; + T1V = T1T + T1U; + Tc2 = T1T - T1U; + T4e = ci[WS(rs, 52)]; + T4f = cr[WS(rs, 43)]; + T4g = T4e - T4f; + Tc3 = T4e + T4f; + } + { + E T1W, T1X, T4b, T4c; + T1W = cr[WS(rs, 11)]; + T1X = ci[WS(rs, 20)]; + T1Y = T1W + T1X; + TbZ = T1W - T1X; + T4b = ci[WS(rs, 36)]; + T4c = cr[WS(rs, 59)]; + T4d = T4b - T4c; + Tc0 = T4b + T4c; + } + T1Z = T1V + T1Y; + TfY = Tc2 + Tc3; + TfZ = TbZ + Tc0; + T4a = T1V - T1Y; + T4h = T4d - T4g; + Tc1 = TbZ - Tc0; + Tc4 = Tc2 - Tc3; + T7F = T4d + T4g; + } + T1L = T1D + T1K; + T20 = T1S + T1Z; + T7A = T1L - T20; + T7D = T7B + T7C; + T7G = T7E + T7F; + T7H = T7D - T7G; + { + E T3S, T3Z, TfX, Tg0; + T3S = T1z - T1C; + T3Z = T3V - T3Y; + T40 = T3S + T3Z; + T62 = T3S - T3Z; + TfX = FNMS(KP923879532, TfW, KP382683432 * TfV); + Tg0 = FNMS(KP923879532, TfZ, KP382683432 * TfY); + Tg1 = TfX + Tg0; + Thv = TfX - Tg0; + } + { + E Tg6, Tg7, Tg3, Tg4; + Tg6 = FMA(KP382683432, TfW, KP923879532 * TfV); + Tg7 = FMA(KP382683432, TfZ, KP923879532 * TfY); + Tg8 = Tg6 - Tg7; + Thz = Tg6 + Tg7; + Tg3 = KP707106781 * (TbT - TbW); + Tg4 = Tcf + Tcg; + Tg5 = Tg3 - Tg4; + Thw = Tg4 + Tg3; + } + { + E T4l, T4s, T49, T4i; + T4l = T1G - T1J; + T4s = T4o - T4r; + T4t = T4l + T4s; + T5Z = T4s - T4l; + T49 = T41 - T48; + T4i = T4a + T4h; + T4j = KP707106781 * (T49 + T4i); + T60 = KP707106781 * (T49 - T4i); + } + { + E T4u, T4v, TbQ, TbX; + T4u = T41 + T48; + T4v = T4h - T4a; + T4w = KP707106781 * (T4u + T4v); + T63 = KP707106781 * (T4v - T4u); + TbQ = TbO - TbP; + TbX = KP707106781 * (TbT + TbW); + TbY = TbQ - TbX; + TdS = TbQ + TbX; + } + { + E Tc5, Tcc, TfS, TfT; + Tc5 = FNMS(KP382683432, Tc4, KP923879532 * Tc1); + Tcc = FMA(KP923879532, Tc8, KP382683432 * Tcb); + Tcd = Tc5 - Tcc; + TdQ = Tcc + Tc5; + TfS = TbO + TbP; + TfT = KP707106781 * (Tci + Tcj); + TfU = TfS - TfT; + Thy = TfS + TfT; + } + { + E T8N, T8O, T8Q, T8R; + T8N = T7B - T7C; + T8O = T1S - T1Z; + T8P = T8N - T8O; + T9z = T8O + T8N; + T8Q = T1D - T1K; + T8R = T7F - T7E; + T8S = T8Q - T8R; + T9A = T8Q + T8R; + } + { + E Tch, Tck, Tcm, Tcn; + Tch = Tcf - Tcg; + Tck = KP707106781 * (Tci - Tcj); + Tcl = Tch - Tck; + TdP = Tch + Tck; + Tcm = FNMS(KP382683432, Tc8, KP923879532 * Tcb); + Tcn = FMA(KP382683432, Tc1, KP923879532 * Tc4); + Tco = Tcm - Tcn; + TdT = Tcm + Tcn; + } + } + { + E T14, T17, T18, TbC, Tbb, T3H, T3K, T7s, TbD, Tbc, T1b, T3h, T1e, T3e, T1f; + E T7t, TbG, TbF, Tbj, Tbg, T1n, TfC, TfD, T3k, T3r, Tbv, Tby, T7v, T1u, TfF; + E TfG, T3t, T3A, Tbo, Tbr, T7w; + { + E T12, T13, T15, T16; + T12 = cr[WS(rs, 1)]; + T13 = ci[WS(rs, 30)]; + T14 = T12 + T13; + T15 = cr[WS(rs, 17)]; + T16 = ci[WS(rs, 14)]; + T17 = T15 + T16; + T18 = T14 + T17; + TbC = T15 - T16; + Tbb = T12 - T13; + } + { + E T3F, T3G, T3I, T3J; + T3F = ci[WS(rs, 62)]; + T3G = cr[WS(rs, 33)]; + T3H = T3F - T3G; + T3I = ci[WS(rs, 46)]; + T3J = cr[WS(rs, 49)]; + T3K = T3I - T3J; + T7s = T3H + T3K; + TbD = T3F + T3G; + Tbc = T3I + T3J; + } + { + E Tbe, Tbf, Tbh, Tbi; + { + E T19, T1a, T3f, T3g; + T19 = cr[WS(rs, 9)]; + T1a = ci[WS(rs, 22)]; + T1b = T19 + T1a; + Tbe = T19 - T1a; + T3f = ci[WS(rs, 54)]; + T3g = cr[WS(rs, 41)]; + T3h = T3f - T3g; + Tbf = T3f + T3g; + } + { + E T1c, T1d, T3c, T3d; + T1c = ci[WS(rs, 6)]; + T1d = cr[WS(rs, 25)]; + T1e = T1c + T1d; + Tbh = T1c - T1d; + T3c = ci[WS(rs, 38)]; + T3d = cr[WS(rs, 57)]; + T3e = T3c - T3d; + Tbi = T3c + T3d; + } + T1f = T1b + T1e; + T7t = T3h + T3e; + TbG = Tbh + Tbi; + TbF = Tbe + Tbf; + Tbj = Tbh - Tbi; + Tbg = Tbe - Tbf; + } + { + E T1j, Tbw, T3q, Tbx, T1m, Tbt, T3n, Tbu; + { + E T1h, T1i, T3o, T3p; + T1h = cr[WS(rs, 5)]; + T1i = ci[WS(rs, 26)]; + T1j = T1h + T1i; + Tbw = T1h - T1i; + T3o = ci[WS(rs, 42)]; + T3p = cr[WS(rs, 53)]; + T3q = T3o - T3p; + Tbx = T3o + T3p; + } + { + E T1k, T1l, T3l, T3m; + T1k = cr[WS(rs, 21)]; + T1l = ci[WS(rs, 10)]; + T1m = T1k + T1l; + Tbt = T1k - T1l; + T3l = ci[WS(rs, 58)]; + T3m = cr[WS(rs, 37)]; + T3n = T3l - T3m; + Tbu = T3l + T3m; + } + T1n = T1j + T1m; + TfC = Tbw + Tbx; + TfD = Tbu - Tbt; + T3k = T1j - T1m; + T3r = T3n - T3q; + Tbv = Tbt + Tbu; + Tby = Tbw - Tbx; + T7v = T3n + T3q; + } + { + E T1q, Tbp, T3z, Tbq, T1t, Tbm, T3w, Tbn; + { + E T1o, T1p, T3x, T3y; + T1o = ci[WS(rs, 2)]; + T1p = cr[WS(rs, 29)]; + T1q = T1o + T1p; + Tbp = T1o - T1p; + T3x = ci[WS(rs, 50)]; + T3y = cr[WS(rs, 45)]; + T3z = T3x - T3y; + Tbq = T3x + T3y; + } + { + E T1r, T1s, T3u, T3v; + T1r = cr[WS(rs, 13)]; + T1s = ci[WS(rs, 18)]; + T1t = T1r + T1s; + Tbm = T1r - T1s; + T3u = ci[WS(rs, 34)]; + T3v = cr[WS(rs, 61)]; + T3w = T3u - T3v; + Tbn = T3u + T3v; + } + T1u = T1q + T1t; + TfF = Tbp + Tbq; + TfG = Tbm + Tbn; + T3t = T1q - T1t; + T3A = T3w - T3z; + Tbo = Tbm - Tbn; + Tbr = Tbp - Tbq; + T7w = T3w + T3z; + } + T1g = T18 + T1f; + T1v = T1n + T1u; + T7r = T1g - T1v; + T7u = T7s + T7t; + T7x = T7v + T7w; + T7y = T7u - T7x; + { + E T3b, T3i, TfE, TfH; + T3b = T14 - T17; + T3i = T3e - T3h; + T3j = T3b + T3i; + T69 = T3b - T3i; + TfE = FNMS(KP923879532, TfD, KP382683432 * TfC); + TfH = FNMS(KP923879532, TfG, KP382683432 * TfF); + TfI = TfE + TfH; + ThD = TfE - TfH; + } + { + E TfN, TfO, TfK, TfL; + TfN = FMA(KP382683432, TfD, KP923879532 * TfC); + TfO = FMA(KP382683432, TfG, KP923879532 * TfF); + TfP = TfN - TfO; + ThG = TfN + TfO; + TfK = TbD - TbC; + TfL = KP707106781 * (Tbg - Tbj); + TfM = TfK + TfL; + ThC = TfK - TfL; + } + { + E T3E, T3L, T3s, T3B; + T3E = T1b - T1e; + T3L = T3H - T3K; + T3M = T3E + T3L; + T66 = T3L - T3E; + T3s = T3k - T3r; + T3B = T3t + T3A; + T3C = KP707106781 * (T3s + T3B); + T67 = KP707106781 * (T3s - T3B); + } + { + E T3N, T3O, Tbd, Tbk; + T3N = T3k + T3r; + T3O = T3A - T3t; + T3P = KP707106781 * (T3N + T3O); + T6a = KP707106781 * (T3O - T3N); + Tbd = Tbb - Tbc; + Tbk = KP707106781 * (Tbg + Tbj); + Tbl = Tbd - Tbk; + TdZ = Tbd + Tbk; + } + { + E Tbs, Tbz, Tfz, TfA; + Tbs = FNMS(KP382683432, Tbr, KP923879532 * Tbo); + Tbz = FMA(KP923879532, Tbv, KP382683432 * Tby); + TbA = Tbs - Tbz; + TdX = Tbz + Tbs; + Tfz = Tbb + Tbc; + TfA = KP707106781 * (TbF + TbG); + TfB = Tfz - TfA; + ThF = Tfz + TfA; + } + { + E T8U, T8V, T8X, T8Y; + T8U = T7s - T7t; + T8V = T1n - T1u; + T8W = T8U - T8V; + T9C = T8V + T8U; + T8X = T18 - T1f; + T8Y = T7w - T7v; + T8Z = T8X - T8Y; + T9D = T8X + T8Y; + } + { + E TbE, TbH, TbJ, TbK; + TbE = TbC + TbD; + TbH = KP707106781 * (TbF - TbG); + TbI = TbE - TbH; + TdW = TbE + TbH; + TbJ = FNMS(KP382683432, Tbv, KP923879532 * Tby); + TbK = FMA(KP382683432, Tbo, KP923879532 * Tbr); + TbL = TbJ - TbK; + Te0 = TbJ + TbK; + } + } + { + E T11, T8q, T8n, T8r, T22, T8v, T8k, T8u; + { + E Tv, T10, T8l, T8m; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + T8q = Tv - T10; + T8l = T7u + T7x; + T8m = T7D + T7G; + T8n = T8l + T8m; + T8r = T8m - T8l; + } + { + E T1w, T21, T8i, T8j; + T1w = T1g + T1v; + T21 = T1L + T20; + T22 = T1w + T21; + T8v = T1w - T21; + T8i = T7P + T7S; + T8j = T7o + T7l; + T8k = T8i + T8j; + T8u = T8i - T8j; + } + cr[0] = T11 + T22; + ci[0] = T8k + T8n; + { + E T8g, T8o, T8f, T8h; + T8g = T11 - T22; + T8o = T8k - T8n; + T8f = W[62]; + T8h = W[63]; + cr[WS(rs, 32)] = FNMS(T8h, T8o, T8f * T8g); + ci[WS(rs, 32)] = FMA(T8h, T8g, T8f * T8o); + } + { + E T8s, T8w, T8p, T8t; + T8s = T8q - T8r; + T8w = T8u - T8v; + T8p = W[94]; + T8t = W[95]; + cr[WS(rs, 48)] = FNMS(T8t, T8w, T8p * T8s); + ci[WS(rs, 48)] = FMA(T8p, T8w, T8t * T8s); + } + { + E T8y, T8A, T8x, T8z; + T8y = T8q + T8r; + T8A = T8v + T8u; + T8x = W[30]; + T8z = W[31]; + cr[WS(rs, 16)] = FNMS(T8z, T8A, T8x * T8y); + ci[WS(rs, 16)] = FMA(T8x, T8A, T8z * T8y); + } + } + { + E T9y, T9U, T9N, T9V, T9F, T9Z, T9K, T9Y; + { + E T9w, T9x, T9L, T9M; + T9w = T8C + T8D; + T9x = KP707106781 * (T97 + T98); + T9y = T9w - T9x; + T9U = T9w + T9x; + T9L = FNMS(KP382683432, T9C, KP923879532 * T9D); + T9M = FMA(KP382683432, T9z, KP923879532 * T9A); + T9N = T9L - T9M; + T9V = T9L + T9M; + } + { + E T9B, T9E, T9I, T9J; + T9B = FNMS(KP382683432, T9A, KP923879532 * T9z); + T9E = FMA(KP923879532, T9C, KP382683432 * T9D); + T9F = T9B - T9E; + T9Z = T9E + T9B; + T9I = T95 + T94; + T9J = KP707106781 * (T8K + T8H); + T9K = T9I - T9J; + T9Y = T9I + T9J; + } + { + E T9G, T9O, T9v, T9H; + T9G = T9y - T9F; + T9O = T9K - T9N; + T9v = W[102]; + T9H = W[103]; + cr[WS(rs, 52)] = FNMS(T9H, T9O, T9v * T9G); + ci[WS(rs, 52)] = FMA(T9H, T9G, T9v * T9O); + } + { + E Ta2, Ta4, Ta1, Ta3; + Ta2 = T9U + T9V; + Ta4 = T9Y + T9Z; + Ta1 = W[6]; + Ta3 = W[7]; + cr[WS(rs, 4)] = FNMS(Ta3, Ta4, Ta1 * Ta2); + ci[WS(rs, 4)] = FMA(Ta1, Ta4, Ta3 * Ta2); + } + { + E T9Q, T9S, T9P, T9R; + T9Q = T9y + T9F; + T9S = T9K + T9N; + T9P = W[38]; + T9R = W[39]; + cr[WS(rs, 20)] = FNMS(T9R, T9S, T9P * T9Q); + ci[WS(rs, 20)] = FMA(T9R, T9Q, T9P * T9S); + } + { + E T9W, Ta0, T9T, T9X; + T9W = T9U - T9V; + Ta0 = T9Y - T9Z; + T9T = W[70]; + T9X = W[71]; + cr[WS(rs, 36)] = FNMS(T9X, Ta0, T9T * T9W); + ci[WS(rs, 36)] = FMA(T9T, Ta0, T9X * T9W); + } + } + { + E T8M, T9k, T9d, T9l, T91, T9p, T9a, T9o; + { + E T8E, T8L, T9b, T9c; + T8E = T8C - T8D; + T8L = KP707106781 * (T8H - T8K); + T8M = T8E - T8L; + T9k = T8E + T8L; + T9b = FNMS(KP923879532, T8W, KP382683432 * T8Z); + T9c = FMA(KP923879532, T8P, KP382683432 * T8S); + T9d = T9b - T9c; + T9l = T9b + T9c; + } + { + E T8T, T90, T96, T99; + T8T = FNMS(KP923879532, T8S, KP382683432 * T8P); + T90 = FMA(KP382683432, T8W, KP923879532 * T8Z); + T91 = T8T - T90; + T9p = T90 + T8T; + T96 = T94 - T95; + T99 = KP707106781 * (T97 - T98); + T9a = T96 - T99; + T9o = T96 + T99; + } + { + E T92, T9e, T8B, T93; + T92 = T8M - T91; + T9e = T9a - T9d; + T8B = W[118]; + T93 = W[119]; + cr[WS(rs, 60)] = FNMS(T93, T9e, T8B * T92); + ci[WS(rs, 60)] = FMA(T93, T92, T8B * T9e); + } + { + E T9s, T9u, T9r, T9t; + T9s = T9k + T9l; + T9u = T9o + T9p; + T9r = W[22]; + T9t = W[23]; + cr[WS(rs, 12)] = FNMS(T9t, T9u, T9r * T9s); + ci[WS(rs, 12)] = FMA(T9r, T9u, T9t * T9s); + } + { + E T9g, T9i, T9f, T9h; + T9g = T8M + T91; + T9i = T9a + T9d; + T9f = W[54]; + T9h = W[55]; + cr[WS(rs, 28)] = FNMS(T9h, T9i, T9f * T9g); + ci[WS(rs, 28)] = FMA(T9h, T9g, T9f * T9i); + } + { + E T9m, T9q, T9j, T9n; + T9m = T9k - T9l; + T9q = T9o - T9p; + T9j = W[86]; + T9n = W[87]; + cr[WS(rs, 44)] = FNMS(T9n, T9q, T9j * T9m); + ci[WS(rs, 44)] = FMA(T9j, T9q, T9n * T9m); + } + } + { + E T7q, T84, T7X, T85, T7J, T89, T7U, T88; + { + E T7i, T7p, T7V, T7W; + T7i = Tf - Tu; + T7p = T7l - T7o; + T7q = T7i + T7p; + T84 = T7i - T7p; + T7V = T7r + T7y; + T7W = T7H - T7A; + T7X = KP707106781 * (T7V + T7W); + T85 = KP707106781 * (T7W - T7V); + } + { + E T7z, T7I, T7M, T7T; + T7z = T7r - T7y; + T7I = T7A + T7H; + T7J = KP707106781 * (T7z + T7I); + T89 = KP707106781 * (T7z - T7I); + T7M = TK - TZ; + T7T = T7P - T7S; + T7U = T7M + T7T; + T88 = T7T - T7M; + } + { + E T7K, T7Y, T7h, T7L; + T7K = T7q - T7J; + T7Y = T7U - T7X; + T7h = W[78]; + T7L = W[79]; + cr[WS(rs, 40)] = FNMS(T7L, T7Y, T7h * T7K); + ci[WS(rs, 40)] = FMA(T7L, T7K, T7h * T7Y); + } + { + E T8c, T8e, T8b, T8d; + T8c = T84 + T85; + T8e = T88 + T89; + T8b = W[46]; + T8d = W[47]; + cr[WS(rs, 24)] = FNMS(T8d, T8e, T8b * T8c); + ci[WS(rs, 24)] = FMA(T8b, T8e, T8d * T8c); + } + { + E T80, T82, T7Z, T81; + T80 = T7q + T7J; + T82 = T7U + T7X; + T7Z = W[14]; + T81 = W[15]; + cr[WS(rs, 8)] = FNMS(T81, T82, T7Z * T80); + ci[WS(rs, 8)] = FMA(T81, T80, T7Z * T82); + } + { + E T86, T8a, T83, T87; + T86 = T84 - T85; + T8a = T88 - T89; + T83 = W[110]; + T87 = W[111]; + cr[WS(rs, 56)] = FNMS(T87, T8a, T83 * T86); + ci[WS(rs, 56)] = FMA(T83, T8a, T87 * T86); + } + } + { + E T6K, T76, T6W, T7a, T6R, T7b, T6Z, T77; + { + E T6I, T6J, T6U, T6V; + T6I = T5O + T5P; + T6J = T6j + T6k; + T6K = T6I - T6J; + T76 = T6I + T6J; + T6U = T6g + T6h; + T6V = T5W + T5T; + T6W = T6U - T6V; + T7a = T6U + T6V; + { + E T6N, T6Y, T6Q, T6X; + { + E T6L, T6M, T6O, T6P; + T6L = T5Z + T60; + T6M = T62 + T63; + T6N = FNMS(KP555570233, T6M, KP831469612 * T6L); + T6Y = FMA(KP555570233, T6L, KP831469612 * T6M); + T6O = T66 + T67; + T6P = T69 + T6a; + T6Q = FMA(KP831469612, T6O, KP555570233 * T6P); + T6X = FNMS(KP555570233, T6O, KP831469612 * T6P); + } + T6R = T6N - T6Q; + T7b = T6Q + T6N; + T6Z = T6X - T6Y; + T77 = T6X + T6Y; + } + } + { + E T6S, T70, T6H, T6T; + T6S = T6K - T6R; + T70 = T6W - T6Z; + T6H = W[106]; + T6T = W[107]; + cr[WS(rs, 54)] = FNMS(T6T, T70, T6H * T6S); + ci[WS(rs, 54)] = FMA(T6T, T6S, T6H * T70); + } + { + E T7e, T7g, T7d, T7f; + T7e = T76 + T77; + T7g = T7a + T7b; + T7d = W[10]; + T7f = W[11]; + cr[WS(rs, 6)] = FNMS(T7f, T7g, T7d * T7e); + ci[WS(rs, 6)] = FMA(T7d, T7g, T7f * T7e); + } + { + E T72, T74, T71, T73; + T72 = T6K + T6R; + T74 = T6W + T6Z; + T71 = W[42]; + T73 = W[43]; + cr[WS(rs, 22)] = FNMS(T73, T74, T71 * T72); + ci[WS(rs, 22)] = FMA(T73, T72, T71 * T74); + } + { + E T78, T7c, T75, T79; + T78 = T76 - T77; + T7c = T7a - T7b; + T75 = W[74]; + T79 = W[75]; + cr[WS(rs, 38)] = FNMS(T79, T7c, T75 * T78); + ci[WS(rs, 38)] = FMA(T75, T7c, T79 * T78); + } + } + { + E T3a, T52, T4S, T56, T4z, T57, T4V, T53; + { + E T2w, T39, T4O, T4R; + T2w = T2c - T2v; + T39 = T2P - T38; + T3a = T2w + T39; + T52 = T2w - T39; + T4O = T4K - T4N; + T4R = T4P - T4Q; + T4S = T4O + T4R; + T56 = T4O - T4R; + { + E T3R, T4T, T4y, T4U; + { + E T3D, T3Q, T4k, T4x; + T3D = T3j - T3C; + T3Q = T3M - T3P; + T3R = FNMS(KP831469612, T3Q, KP555570233 * T3D); + T4T = FMA(KP831469612, T3D, KP555570233 * T3Q); + T4k = T40 - T4j; + T4x = T4t - T4w; + T4y = FMA(KP555570233, T4k, KP831469612 * T4x); + T4U = FNMS(KP831469612, T4k, KP555570233 * T4x); + } + T4z = T3R + T4y; + T57 = T3R - T4y; + T4V = T4T + T4U; + T53 = T4U - T4T; + } + } + { + E T4A, T4W, T23, T4B; + T4A = T3a - T4z; + T4W = T4S - T4V; + T23 = W[82]; + T4B = W[83]; + cr[WS(rs, 42)] = FNMS(T4B, T4W, T23 * T4A); + ci[WS(rs, 42)] = FMA(T4B, T4A, T23 * T4W); + } + { + E T5a, T5c, T59, T5b; + T5a = T52 + T53; + T5c = T56 + T57; + T59 = W[50]; + T5b = W[51]; + cr[WS(rs, 26)] = FNMS(T5b, T5c, T59 * T5a); + ci[WS(rs, 26)] = FMA(T59, T5c, T5b * T5a); + } + { + E T4Y, T50, T4X, T4Z; + T4Y = T3a + T4z; + T50 = T4S + T4V; + T4X = W[18]; + T4Z = W[19]; + cr[WS(rs, 10)] = FNMS(T4Z, T50, T4X * T4Y); + ci[WS(rs, 10)] = FMA(T4Z, T4Y, T4X * T50); + } + { + E T54, T58, T51, T55; + T54 = T52 - T53; + T58 = T56 - T57; + T51 = W[114]; + T55 = W[115]; + cr[WS(rs, 58)] = FNMS(T55, T58, T51 * T54); + ci[WS(rs, 58)] = FMA(T51, T58, T55 * T54); + } + } + { + E T5g, T5C, T5s, T5G, T5n, T5H, T5v, T5D; + { + E T5e, T5f, T5q, T5r; + T5e = T2c + T2v; + T5f = T4P + T4Q; + T5g = T5e + T5f; + T5C = T5e - T5f; + T5q = T4K + T4N; + T5r = T38 + T2P; + T5s = T5q + T5r; + T5G = T5q - T5r; + { + E T5j, T5t, T5m, T5u; + { + E T5h, T5i, T5k, T5l; + T5h = T3j + T3C; + T5i = T3M + T3P; + T5j = FNMS(KP195090322, T5i, KP980785280 * T5h); + T5t = FMA(KP195090322, T5h, KP980785280 * T5i); + T5k = T40 + T4j; + T5l = T4t + T4w; + T5m = FMA(KP980785280, T5k, KP195090322 * T5l); + T5u = FNMS(KP195090322, T5k, KP980785280 * T5l); + } + T5n = T5j + T5m; + T5H = T5j - T5m; + T5v = T5t + T5u; + T5D = T5u - T5t; + } + } + { + E T5o, T5w, T5d, T5p; + T5o = T5g - T5n; + T5w = T5s - T5v; + T5d = W[66]; + T5p = W[67]; + cr[WS(rs, 34)] = FNMS(T5p, T5w, T5d * T5o); + ci[WS(rs, 34)] = FMA(T5p, T5o, T5d * T5w); + } + { + E T5K, T5M, T5J, T5L; + T5K = T5C + T5D; + T5M = T5G + T5H; + T5J = W[34]; + T5L = W[35]; + cr[WS(rs, 18)] = FNMS(T5L, T5M, T5J * T5K); + ci[WS(rs, 18)] = FMA(T5J, T5M, T5L * T5K); + } + { + E T5y, T5A, T5x, T5z; + T5y = T5g + T5n; + T5A = T5s + T5v; + T5x = W[2]; + T5z = W[3]; + cr[WS(rs, 2)] = FNMS(T5z, T5A, T5x * T5y); + ci[WS(rs, 2)] = FMA(T5z, T5y, T5x * T5A); + } + { + E T5E, T5I, T5B, T5F; + T5E = T5C - T5D; + T5I = T5G - T5H; + T5B = W[98]; + T5F = W[99]; + cr[WS(rs, 50)] = FNMS(T5F, T5I, T5B * T5E); + ci[WS(rs, 50)] = FMA(T5B, T5I, T5F * T5E); + } + } + { + E T5Y, T6w, T6m, T6A, T6d, T6B, T6p, T6x; + { + E T5Q, T5X, T6i, T6l; + T5Q = T5O - T5P; + T5X = T5T - T5W; + T5Y = T5Q - T5X; + T6w = T5Q + T5X; + T6i = T6g - T6h; + T6l = T6j - T6k; + T6m = T6i - T6l; + T6A = T6i + T6l; + { + E T65, T6o, T6c, T6n; + { + E T61, T64, T68, T6b; + T61 = T5Z - T60; + T64 = T62 - T63; + T65 = FNMS(KP980785280, T64, KP195090322 * T61); + T6o = FMA(KP980785280, T61, KP195090322 * T64); + T68 = T66 - T67; + T6b = T69 - T6a; + T6c = FMA(KP195090322, T68, KP980785280 * T6b); + T6n = FNMS(KP980785280, T68, KP195090322 * T6b); + } + T6d = T65 - T6c; + T6B = T6c + T65; + T6p = T6n - T6o; + T6x = T6n + T6o; + } + } + { + E T6e, T6q, T5N, T6f; + T6e = T5Y - T6d; + T6q = T6m - T6p; + T5N = W[122]; + T6f = W[123]; + cr[WS(rs, 62)] = FNMS(T6f, T6q, T5N * T6e); + ci[WS(rs, 62)] = FMA(T6f, T6e, T5N * T6q); + } + { + E T6E, T6G, T6D, T6F; + T6E = T6w + T6x; + T6G = T6A + T6B; + T6D = W[26]; + T6F = W[27]; + cr[WS(rs, 14)] = FNMS(T6F, T6G, T6D * T6E); + ci[WS(rs, 14)] = FMA(T6D, T6G, T6F * T6E); + } + { + E T6s, T6u, T6r, T6t; + T6s = T5Y + T6d; + T6u = T6m + T6p; + T6r = W[58]; + T6t = W[59]; + cr[WS(rs, 30)] = FNMS(T6t, T6u, T6r * T6s); + ci[WS(rs, 30)] = FMA(T6t, T6s, T6r * T6u); + } + { + E T6y, T6C, T6v, T6z; + T6y = T6w - T6x; + T6C = T6A - T6B; + T6v = W[90]; + T6z = W[91]; + cr[WS(rs, 46)] = FNMS(T6z, T6C, T6v * T6y); + ci[WS(rs, 46)] = FMA(T6v, T6C, T6z * T6y); + } + } + { + E Tba, Tdw, TcS, Tdi, TcI, Tds, TcW, Td6, Tcr, TcX, TcL, TcT, Tdd, Tdx, Tdl; + E Tdt; + { + E Taw, Tdg, Tb9, Tdh, TaP, Tb8; + Taw = Tag - Tav; + Tdg = TcA + TcD; + TaP = FNMS(KP831469612, TaO, KP555570233 * TaH); + Tb8 = FMA(KP831469612, Tb0, KP555570233 * Tb7); + Tb9 = TaP - Tb8; + Tdh = Tb8 + TaP; + Tba = Taw + Tb9; + Tdw = Tdg - Tdh; + TcS = Taw - Tb9; + Tdi = Tdg + Tdh; + } + { + E TcE, Td4, TcH, Td5, TcF, TcG; + TcE = TcA - TcD; + Td4 = Tag + Tav; + TcF = FNMS(KP831469612, Tb7, KP555570233 * Tb0); + TcG = FMA(KP555570233, TaO, KP831469612 * TaH); + TcH = TcF - TcG; + Td5 = TcF + TcG; + TcI = TcE + TcH; + Tds = Td4 - Td5; + TcW = TcE - TcH; + Td6 = Td4 + Td5; + } + { + E TbN, TcJ, Tcq, TcK; + { + E TbB, TbM, Tce, Tcp; + TbB = Tbl - TbA; + TbM = TbI - TbL; + TbN = FNMS(KP956940335, TbM, KP290284677 * TbB); + TcJ = FMA(KP956940335, TbB, KP290284677 * TbM); + Tce = TbY - Tcd; + Tcp = Tcl - Tco; + Tcq = FMA(KP290284677, Tce, KP956940335 * Tcp); + TcK = FNMS(KP956940335, Tce, KP290284677 * Tcp); + } + Tcr = TbN + Tcq; + TcX = TbN - Tcq; + TcL = TcJ + TcK; + TcT = TcK - TcJ; + } + { + E Td9, Tdj, Tdc, Tdk; + { + E Td7, Td8, Tda, Tdb; + Td7 = Tbl + TbA; + Td8 = TbI + TbL; + Td9 = FNMS(KP471396736, Td8, KP881921264 * Td7); + Tdj = FMA(KP471396736, Td7, KP881921264 * Td8); + Tda = TbY + Tcd; + Tdb = Tcl + Tco; + Tdc = FMA(KP881921264, Tda, KP471396736 * Tdb); + Tdk = FNMS(KP471396736, Tda, KP881921264 * Tdb); + } + Tdd = Td9 + Tdc; + Tdx = Td9 - Tdc; + Tdl = Tdj + Tdk; + Tdt = Tdk - Tdj; + } + { + E Tcs, TcM, Ta5, Tct; + Tcs = Tba - Tcr; + TcM = TcI - TcL; + Ta5 = W[88]; + Tct = W[89]; + cr[WS(rs, 45)] = FNMS(Tct, TcM, Ta5 * Tcs); + ci[WS(rs, 45)] = FMA(Tct, Tcs, Ta5 * TcM); + } + { + E Tdu, Tdy, Tdr, Tdv; + Tdu = Tds - Tdt; + Tdy = Tdw - Tdx; + Tdr = W[104]; + Tdv = W[105]; + cr[WS(rs, 53)] = FNMS(Tdv, Tdy, Tdr * Tdu); + ci[WS(rs, 53)] = FMA(Tdr, Tdy, Tdv * Tdu); + } + { + E TdA, TdC, Tdz, TdB; + TdA = Tds + Tdt; + TdC = Tdw + Tdx; + Tdz = W[40]; + TdB = W[41]; + cr[WS(rs, 21)] = FNMS(TdB, TdC, Tdz * TdA); + ci[WS(rs, 21)] = FMA(Tdz, TdC, TdB * TdA); + } + { + E TcO, TcQ, TcN, TcP; + TcO = Tba + Tcr; + TcQ = TcI + TcL; + TcN = W[24]; + TcP = W[25]; + cr[WS(rs, 13)] = FNMS(TcP, TcQ, TcN * TcO); + ci[WS(rs, 13)] = FMA(TcP, TcO, TcN * TcQ); + } + { + E TcU, TcY, TcR, TcV; + TcU = TcS - TcT; + TcY = TcW - TcX; + TcR = W[120]; + TcV = W[121]; + cr[WS(rs, 61)] = FNMS(TcV, TcY, TcR * TcU); + ci[WS(rs, 61)] = FMA(TcR, TcY, TcV * TcU); + } + { + E Tde, Tdm, Td3, Tdf; + Tde = Td6 - Tdd; + Tdm = Tdi - Tdl; + Td3 = W[72]; + Tdf = W[73]; + cr[WS(rs, 37)] = FNMS(Tdf, Tdm, Td3 * Tde); + ci[WS(rs, 37)] = FMA(Tdf, Tde, Td3 * Tdm); + } + { + E Tdo, Tdq, Tdn, Tdp; + Tdo = Td6 + Tdd; + Tdq = Tdi + Tdl; + Tdn = W[8]; + Tdp = W[9]; + cr[WS(rs, 5)] = FNMS(Tdp, Tdq, Tdn * Tdo); + ci[WS(rs, 5)] = FMA(Tdp, Tdo, Tdn * Tdq); + } + { + E Td0, Td2, TcZ, Td1; + Td0 = TcS + TcT; + Td2 = TcW + TcX; + TcZ = W[56]; + Td1 = W[57]; + cr[WS(rs, 29)] = FNMS(Td1, Td2, TcZ * Td0); + ci[WS(rs, 29)] = FMA(TcZ, Td2, Td1 * Td0); + } + } + { + E Tfy, Thc, Tgy, TgY, Tgo, Th8, TgC, TgM, Tgb, TgD, Tgr, Tgz, TgT, Thd, Th1; + E Th9; + { + E Tfi, TgW, Tfx, TgX, Tfp, Tfw; + Tfi = Tfa - Tfh; + TgW = Tgg + Tgj; + Tfp = FNMS(KP555570233, Tfo, KP831469612 * Tfl); + Tfw = FMA(KP831469612, Tfs, KP555570233 * Tfv); + Tfx = Tfp - Tfw; + TgX = Tfw + Tfp; + Tfy = Tfi + Tfx; + Thc = TgW - TgX; + Tgy = Tfi - Tfx; + TgY = TgW + TgX; + } + { + E Tgk, TgK, Tgn, TgL, Tgl, Tgm; + Tgk = Tgg - Tgj; + TgK = Tfa + Tfh; + Tgl = FNMS(KP555570233, Tfs, KP831469612 * Tfv); + Tgm = FMA(KP555570233, Tfl, KP831469612 * Tfo); + Tgn = Tgl - Tgm; + TgL = Tgl + Tgm; + Tgo = Tgk + Tgn; + Th8 = TgK - TgL; + TgC = Tgk - Tgn; + TgM = TgK + TgL; + } + { + E TfR, Tgp, Tga, Tgq; + { + E TfJ, TfQ, Tg2, Tg9; + TfJ = TfB - TfI; + TfQ = TfM - TfP; + TfR = FNMS(KP881921264, TfQ, KP471396736 * TfJ); + Tgp = FMA(KP881921264, TfJ, KP471396736 * TfQ); + Tg2 = TfU - Tg1; + Tg9 = Tg5 - Tg8; + Tga = FMA(KP471396736, Tg2, KP881921264 * Tg9); + Tgq = FNMS(KP881921264, Tg2, KP471396736 * Tg9); + } + Tgb = TfR + Tga; + TgD = TfR - Tga; + Tgr = Tgp + Tgq; + Tgz = Tgq - Tgp; + } + { + E TgP, TgZ, TgS, Th0; + { + E TgN, TgO, TgQ, TgR; + TgN = TfB + TfI; + TgO = TfM + TfP; + TgP = FNMS(KP290284677, TgO, KP956940335 * TgN); + TgZ = FMA(KP290284677, TgN, KP956940335 * TgO); + TgQ = TfU + Tg1; + TgR = Tg5 + Tg8; + TgS = FMA(KP956940335, TgQ, KP290284677 * TgR); + Th0 = FNMS(KP290284677, TgQ, KP956940335 * TgR); + } + TgT = TgP + TgS; + Thd = TgP - TgS; + Th1 = TgZ + Th0; + Th9 = Th0 - TgZ; + } + { + E Tgc, Tgs, Tf7, Tgd; + Tgc = Tfy - Tgb; + Tgs = Tgo - Tgr; + Tf7 = W[84]; + Tgd = W[85]; + cr[WS(rs, 43)] = FNMS(Tgd, Tgs, Tf7 * Tgc); + ci[WS(rs, 43)] = FMA(Tgd, Tgc, Tf7 * Tgs); + } + { + E Tha, The, Th7, Thb; + Tha = Th8 - Th9; + The = Thc - Thd; + Th7 = W[100]; + Thb = W[101]; + cr[WS(rs, 51)] = FNMS(Thb, The, Th7 * Tha); + ci[WS(rs, 51)] = FMA(Th7, The, Thb * Tha); + } + { + E Thg, Thi, Thf, Thh; + Thg = Th8 + Th9; + Thi = Thc + Thd; + Thf = W[36]; + Thh = W[37]; + cr[WS(rs, 19)] = FNMS(Thh, Thi, Thf * Thg); + ci[WS(rs, 19)] = FMA(Thf, Thi, Thh * Thg); + } + { + E Tgu, Tgw, Tgt, Tgv; + Tgu = Tfy + Tgb; + Tgw = Tgo + Tgr; + Tgt = W[20]; + Tgv = W[21]; + cr[WS(rs, 11)] = FNMS(Tgv, Tgw, Tgt * Tgu); + ci[WS(rs, 11)] = FMA(Tgv, Tgu, Tgt * Tgw); + } + { + E TgA, TgE, Tgx, TgB; + TgA = Tgy - Tgz; + TgE = TgC - TgD; + Tgx = W[116]; + TgB = W[117]; + cr[WS(rs, 59)] = FNMS(TgB, TgE, Tgx * TgA); + ci[WS(rs, 59)] = FMA(Tgx, TgE, TgB * TgA); + } + { + E TgU, Th2, TgJ, TgV; + TgU = TgM - TgT; + Th2 = TgY - Th1; + TgJ = W[68]; + TgV = W[69]; + cr[WS(rs, 35)] = FNMS(TgV, Th2, TgJ * TgU); + ci[WS(rs, 35)] = FMA(TgV, TgU, TgJ * Th2); + } + { + E Th4, Th6, Th3, Th5; + Th4 = TgM + TgT; + Th6 = TgY + Th1; + Th3 = W[4]; + Th5 = W[5]; + cr[WS(rs, 3)] = FNMS(Th5, Th6, Th3 * Th4); + ci[WS(rs, 3)] = FMA(Th5, Th4, Th3 * Th6); + } + { + E TgG, TgI, TgF, TgH; + TgG = Tgy + Tgz; + TgI = TgC + TgD; + TgF = W[52]; + TgH = W[53]; + cr[WS(rs, 27)] = FNMS(TgH, TgI, TgF * TgG); + ci[WS(rs, 27)] = FMA(TgF, TgI, TgH * TgG); + } + } + { + E TdO, Tf0, Tem, TeM, Tec, TeW, Teq, TeA, Te3, Ter, Tef, Ten, TeH, Tf1, TeP; + E TeX; + { + E TdG, TeK, TdN, TeL, TdJ, TdM; + TdG = TdE - TdF; + TeK = Te6 + Te7; + TdJ = FNMS(KP195090322, TdI, KP980785280 * TdH); + TdM = FMA(KP195090322, TdK, KP980785280 * TdL); + TdN = TdJ - TdM; + TeL = TdM + TdJ; + TdO = TdG - TdN; + Tf0 = TeK + TeL; + Tem = TdG + TdN; + TeM = TeK - TeL; + } + { + E Te8, Tey, Teb, Tez, Te9, Tea; + Te8 = Te6 - Te7; + Tey = TdE + TdF; + Te9 = FNMS(KP195090322, TdL, KP980785280 * TdK); + Tea = FMA(KP980785280, TdI, KP195090322 * TdH); + Teb = Te9 - Tea; + Tez = Te9 + Tea; + Tec = Te8 - Teb; + TeW = Tey + Tez; + Teq = Te8 + Teb; + TeA = Tey - Tez; + } + { + E TdV, Tee, Te2, Ted; + { + E TdR, TdU, TdY, Te1; + TdR = TdP - TdQ; + TdU = TdS - TdT; + TdV = FNMS(KP773010453, TdU, KP634393284 * TdR); + Tee = FMA(KP773010453, TdR, KP634393284 * TdU); + TdY = TdW - TdX; + Te1 = TdZ - Te0; + Te2 = FMA(KP634393284, TdY, KP773010453 * Te1); + Ted = FNMS(KP773010453, TdY, KP634393284 * Te1); + } + Te3 = TdV - Te2; + Ter = Te2 + TdV; + Tef = Ted - Tee; + Ten = Ted + Tee; + } + { + E TeD, TeO, TeG, TeN; + { + E TeB, TeC, TeE, TeF; + TeB = TdP + TdQ; + TeC = TdS + TdT; + TeD = FNMS(KP098017140, TeC, KP995184726 * TeB); + TeO = FMA(KP098017140, TeB, KP995184726 * TeC); + TeE = TdW + TdX; + TeF = TdZ + Te0; + TeG = FMA(KP995184726, TeE, KP098017140 * TeF); + TeN = FNMS(KP098017140, TeE, KP995184726 * TeF); + } + TeH = TeD - TeG; + Tf1 = TeG + TeD; + TeP = TeN - TeO; + TeX = TeN + TeO; + } + { + E Te4, Teg, TdD, Te5; + Te4 = TdO - Te3; + Teg = Tec - Tef; + TdD = W[112]; + Te5 = W[113]; + cr[WS(rs, 57)] = FNMS(Te5, Teg, TdD * Te4); + ci[WS(rs, 57)] = FMA(Te5, Te4, TdD * Teg); + } + { + E TeY, Tf2, TeV, TeZ; + TeY = TeW - TeX; + Tf2 = Tf0 - Tf1; + TeV = W[64]; + TeZ = W[65]; + cr[WS(rs, 33)] = FNMS(TeZ, Tf2, TeV * TeY); + ci[WS(rs, 33)] = FMA(TeV, Tf2, TeZ * TeY); + } + { + E Tf4, Tf6, Tf3, Tf5; + Tf4 = TeW + TeX; + Tf6 = Tf0 + Tf1; + Tf3 = W[0]; + Tf5 = W[1]; + cr[WS(rs, 1)] = FNMS(Tf5, Tf6, Tf3 * Tf4); + ci[WS(rs, 1)] = FMA(Tf3, Tf6, Tf5 * Tf4); + } + { + E Tei, Tek, Teh, Tej; + Tei = TdO + Te3; + Tek = Tec + Tef; + Teh = W[48]; + Tej = W[49]; + cr[WS(rs, 25)] = FNMS(Tej, Tek, Teh * Tei); + ci[WS(rs, 25)] = FMA(Tej, Tei, Teh * Tek); + } + { + E Teo, Tes, Tel, Tep; + Teo = Tem - Ten; + Tes = Teq - Ter; + Tel = W[80]; + Tep = W[81]; + cr[WS(rs, 41)] = FNMS(Tep, Tes, Tel * Teo); + ci[WS(rs, 41)] = FMA(Tel, Tes, Tep * Teo); + } + { + E TeI, TeQ, Tex, TeJ; + TeI = TeA - TeH; + TeQ = TeM - TeP; + Tex = W[96]; + TeJ = W[97]; + cr[WS(rs, 49)] = FNMS(TeJ, TeQ, Tex * TeI); + ci[WS(rs, 49)] = FMA(TeJ, TeI, Tex * TeQ); + } + { + E TeS, TeU, TeR, TeT; + TeS = TeA + TeH; + TeU = TeM + TeP; + TeR = W[32]; + TeT = W[33]; + cr[WS(rs, 17)] = FNMS(TeT, TeU, TeR * TeS); + ci[WS(rs, 17)] = FMA(TeT, TeS, TeR * TeU); + } + { + E Teu, Tew, Tet, Tev; + Teu = Tem + Ten; + Tew = Teq + Ter; + Tet = W[16]; + Tev = W[17]; + cr[WS(rs, 9)] = FNMS(Tev, Tew, Tet * Teu); + ci[WS(rs, 9)] = FMA(Tet, Tew, Tev * Teu); + } + } + { + E Thu, TiG, Ti2, Tis, ThS, TiC, Ti6, Tig, ThJ, Ti7, ThV, Ti3, Tin, TiH, Tiv; + E TiD; + { + E Thm, Tiq, Tht, Tir, Thp, Ths; + Thm = Thk - Thl; + Tiq = ThM - ThN; + Thp = FNMS(KP980785280, Tho, KP195090322 * Thn); + Ths = FNMS(KP980785280, Thr, KP195090322 * Thq); + Tht = Thp + Ths; + Tir = Thp - Ths; + Thu = Thm - Tht; + TiG = Tiq - Tir; + Ti2 = Thm + Tht; + Tis = Tiq + Tir; + } + { + E ThO, Tie, ThR, Tif, ThP, ThQ; + ThO = ThM + ThN; + Tie = Thk + Thl; + ThP = FMA(KP195090322, Tho, KP980785280 * Thn); + ThQ = FMA(KP195090322, Thr, KP980785280 * Thq); + ThR = ThP - ThQ; + Tif = ThP + ThQ; + ThS = ThO - ThR; + TiC = Tie + Tif; + Ti6 = ThO + ThR; + Tig = Tie - Tif; + } + { + E ThB, ThU, ThI, ThT; + { + E Thx, ThA, ThE, ThH; + Thx = Thv - Thw; + ThA = Thy - Thz; + ThB = FNMS(KP634393284, ThA, KP773010453 * Thx); + ThU = FMA(KP634393284, Thx, KP773010453 * ThA); + ThE = ThC + ThD; + ThH = ThF - ThG; + ThI = FMA(KP773010453, ThE, KP634393284 * ThH); + ThT = FNMS(KP634393284, ThE, KP773010453 * ThH); + } + ThJ = ThB - ThI; + Ti7 = ThI + ThB; + ThV = ThT - ThU; + Ti3 = ThT + ThU; + } + { + E Tij, Tit, Tim, Tiu; + { + E Tih, Tii, Tik, Til; + Tih = ThF + ThG; + Tii = ThC - ThD; + Tij = FNMS(KP995184726, Tii, KP098017140 * Tih); + Tit = FMA(KP098017140, Tii, KP995184726 * Tih); + Tik = Thy + Thz; + Til = Thw + Thv; + Tim = FNMS(KP995184726, Til, KP098017140 * Tik); + Tiu = FMA(KP098017140, Til, KP995184726 * Tik); + } + Tin = Tij + Tim; + TiH = Tij - Tim; + Tiv = Tit - Tiu; + TiD = Tit + Tiu; + } + { + E ThK, ThW, Thj, ThL; + ThK = Thu - ThJ; + ThW = ThS - ThV; + Thj = W[108]; + ThL = W[109]; + cr[WS(rs, 55)] = FNMS(ThL, ThW, Thj * ThK); + ci[WS(rs, 55)] = FMA(ThL, ThK, Thj * ThW); + } + { + E TiE, TiI, TiB, TiF; + TiE = TiC - TiD; + TiI = TiG + TiH; + TiB = W[60]; + TiF = W[61]; + cr[WS(rs, 31)] = FNMS(TiF, TiI, TiB * TiE); + ci[WS(rs, 31)] = FMA(TiB, TiI, TiF * TiE); + } + { + E TiK, TiM, TiJ, TiL; + TiK = TiC + TiD; + TiM = TiG - TiH; + TiJ = W[124]; + TiL = W[125]; + cr[WS(rs, 63)] = FNMS(TiL, TiM, TiJ * TiK); + ci[WS(rs, 63)] = FMA(TiJ, TiM, TiL * TiK); + } + { + E ThY, Ti0, ThX, ThZ; + ThY = Thu + ThJ; + Ti0 = ThS + ThV; + ThX = W[44]; + ThZ = W[45]; + cr[WS(rs, 23)] = FNMS(ThZ, Ti0, ThX * ThY); + ci[WS(rs, 23)] = FMA(ThZ, ThY, ThX * Ti0); + } + { + E Ti4, Ti8, Ti1, Ti5; + Ti4 = Ti2 - Ti3; + Ti8 = Ti6 - Ti7; + Ti1 = W[76]; + Ti5 = W[77]; + cr[WS(rs, 39)] = FNMS(Ti5, Ti8, Ti1 * Ti4); + ci[WS(rs, 39)] = FMA(Ti1, Ti8, Ti5 * Ti4); + } + { + E Tio, Tiw, Tid, Tip; + Tio = Tig - Tin; + Tiw = Tis - Tiv; + Tid = W[92]; + Tip = W[93]; + cr[WS(rs, 47)] = FNMS(Tip, Tiw, Tid * Tio); + ci[WS(rs, 47)] = FMA(Tip, Tio, Tid * Tiw); + } + { + E Tiy, TiA, Tix, Tiz; + Tiy = Tig + Tin; + TiA = Tis + Tiv; + Tix = W[28]; + Tiz = W[29]; + cr[WS(rs, 15)] = FNMS(Tiz, TiA, Tix * Tiy); + ci[WS(rs, 15)] = FMA(Tiz, Tiy, Tix * TiA); + } + { + E Tia, Tic, Ti9, Tib; + Tia = Ti2 + Ti3; + Tic = Ti6 + Ti7; + Ti9 = W[12]; + Tib = W[13]; + cr[WS(rs, 7)] = FNMS(Tib, Tic, Ti9 * Tia); + ci[WS(rs, 7)] = FMA(Ti9, Tic, Tib * Tia); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {808, 270, 230, 0} }; + +void X(codelet_hb_64) (planner *p) { + X(khc2hc_register) (p, hb_64, &desc); +} +#endif /* HAVE_FMA */