annotate src/fftw-3.3.3/dft/scalar/codelets/t2_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:36:11 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include t.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 440 FP additions, 434 FP multiplications,
Chris@10 32 * (or, 84 additions, 78 multiplications, 356 fused multiply/add),
Chris@10 33 * 215 stack variables, 47 constants, and 100 memory accesses
Chris@10 34 */
Chris@10 35 #include "t.h"
Chris@10 36
Chris@10 37 static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 38 {
Chris@10 39 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@10 40 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@10 41 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@10 42 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@10 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@10 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@10 45 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@10 46 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@10 47 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@10 48 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@10 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@10 50 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@10 52 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@10 53 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@10 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@10 55 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@10 56 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@10 57 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@10 58 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@10 59 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@10 60 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@10 61 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@10 62 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@10 63 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@10 64 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@10 65 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 66 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@10 67 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@10 68 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@10 69 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@10 70 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@10 71 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@10 72 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 73 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@10 74 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@10 75 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@10 76 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@10 77 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@10 78 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@10 79 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@10 80 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@10 81 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@10 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@10 86 {
Chris@10 87 INT m;
Chris@10 88 for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@10 89 E T8c, T7k, T7i, T8i, T8g, T8b, T7j, T7b, T8d, T8h;
Chris@10 90 {
Chris@10 91 E T2, T8, T3, T6, Tk, Tv, TS, T4, Ta, TD, T2L, T10, Tm, T5, Tc;
Chris@10 92 T2 = W[0];
Chris@10 93 T8 = W[4];
Chris@10 94 T3 = W[2];
Chris@10 95 T6 = W[3];
Chris@10 96 Tk = W[6];
Chris@10 97 Tv = T2 * T8;
Chris@10 98 TS = T3 * T8;
Chris@10 99 T4 = T2 * T3;
Chris@10 100 Ta = T2 * T6;
Chris@10 101 TD = T8 * Tk;
Chris@10 102 T2L = T2 * Tk;
Chris@10 103 T10 = T3 * Tk;
Chris@10 104 Tm = W[7];
Chris@10 105 T5 = W[1];
Chris@10 106 Tc = W[5];
Chris@10 107 {
Chris@10 108 E T7G, T86, T4s, T6a, T4g, TN, T4f, T7C, T7s, T7B, T5q, T6k, T3a, T5j, T6n;
Chris@10 109 E T6m, T5g, T4a, T5n, T6j, T6C, T4G, T6z, T4z, T1v, T3t, T6y, T4w, T6B, T4D;
Chris@10 110 E T6v, T4O, T6s, T4V, T21, T3H, T6r, T4S, T6u, T4L, T26, T3K, T5a, T2A, T3U;
Chris@10 111 E T53, T2c, T3M, T2k, T3O;
Chris@10 112 {
Chris@10 113 E T11, T1b, Tb, T19, T7, T2m, TT, T15, T2Q, TX, T2p, T1g, T2a, T2e, T2i;
Chris@10 114 E T27, T1c, T1O, T1K, T1q, T1m, T2x, T2t, T1W, T1S, T2G, T3Y, T2N, T5p, T38;
Chris@10 115 E T48, T5i, T2K, T40, T2S, T41;
Chris@10 116 {
Chris@10 117 E T2M, T1j, T1l, T2X, T2U, T35, T31, T7r, T7p, T7o, T2O, T2R;
Chris@10 118 {
Chris@10 119 E T1, Tj, T4j, TK, T4q, TC, T4o, Tt, T4l;
Chris@10 120 {
Chris@10 121 E TE, Tw, TI, TA, Th, Tr, Tn, Td, Te, Ti, T14, T2P, TH, Tx, TB;
Chris@10 122 T1 = ri[0];
Chris@10 123 T11 = FMA(T6, Tm, T10);
Chris@10 124 T14 = T3 * Tm;
Chris@10 125 T2P = T2 * Tm;
Chris@10 126 TH = T8 * Tm;
Chris@10 127 T2M = FMA(T5, Tm, T2L);
Chris@10 128 T1b = FNMS(T5, T3, Ta);
Chris@10 129 Tb = FMA(T5, T3, Ta);
Chris@10 130 T19 = FMA(T5, T6, T4);
Chris@10 131 T7 = FNMS(T5, T6, T4);
Chris@10 132 T2m = FNMS(T6, Tc, TS);
Chris@10 133 TT = FMA(T6, Tc, TS);
Chris@10 134 TE = FMA(Tc, Tm, TD);
Chris@10 135 T1j = FMA(T5, Tc, Tv);
Chris@10 136 Tw = FNMS(T5, Tc, Tv);
Chris@10 137 {
Chris@10 138 E TW, Tz, T1f, T2d;
Chris@10 139 TW = T3 * Tc;
Chris@10 140 Tz = T2 * Tc;
Chris@10 141 T15 = FNMS(T6, Tk, T14);
Chris@10 142 T2Q = FNMS(T5, Tk, T2P);
Chris@10 143 TI = FNMS(Tc, Tk, TH);
Chris@10 144 T1f = T19 * Tc;
Chris@10 145 T2d = T19 * Tk;
Chris@10 146 {
Chris@10 147 E T2h, T1a, Tg, Tq;
Chris@10 148 T2h = T19 * Tm;
Chris@10 149 T1a = T19 * T8;
Chris@10 150 Tg = T7 * Tc;
Chris@10 151 Tq = T7 * Tm;
Chris@10 152 {
Chris@10 153 E Tl, T9, T1p, T1k;
Chris@10 154 Tl = T7 * Tk;
Chris@10 155 T9 = T7 * T8;
Chris@10 156 T1p = T1j * Tm;
Chris@10 157 T1k = T1j * Tk;
Chris@10 158 {
Chris@10 159 E T34, T30, T1N, T1J;
Chris@10 160 T34 = TT * Tm;
Chris@10 161 T30 = TT * Tk;
Chris@10 162 T1N = Tw * Tm;
Chris@10 163 T1J = Tw * Tk;
Chris@10 164 TX = FNMS(T6, T8, TW);
Chris@10 165 T2p = FMA(T6, T8, TW);
Chris@10 166 TA = FMA(T5, T8, Tz);
Chris@10 167 T1l = FNMS(T5, T8, Tz);
Chris@10 168 T1g = FMA(T1b, T8, T1f);
Chris@10 169 T2a = FNMS(T1b, T8, T1f);
Chris@10 170 T2e = FMA(T1b, Tm, T2d);
Chris@10 171 T2i = FNMS(T1b, Tk, T2h);
Chris@10 172 T27 = FMA(T1b, Tc, T1a);
Chris@10 173 T1c = FNMS(T1b, Tc, T1a);
Chris@10 174 T2X = FMA(Tb, T8, Tg);
Chris@10 175 Th = FNMS(Tb, T8, Tg);
Chris@10 176 Tr = FNMS(Tb, Tk, Tq);
Chris@10 177 Tn = FMA(Tb, Tm, Tl);
Chris@10 178 Td = FMA(Tb, Tc, T9);
Chris@10 179 T2U = FNMS(Tb, Tc, T9);
Chris@10 180 T35 = FNMS(TX, Tk, T34);
Chris@10 181 T31 = FMA(TX, Tm, T30);
Chris@10 182 T1O = FNMS(TA, Tk, T1N);
Chris@10 183 T1K = FMA(TA, Tm, T1J);
Chris@10 184 T1q = FNMS(T1l, Tk, T1p);
Chris@10 185 T1m = FMA(T1l, Tm, T1k);
Chris@10 186 {
Chris@10 187 E T2w, T2s, T1V, T1R;
Chris@10 188 T2w = T27 * Tm;
Chris@10 189 T2s = T27 * Tk;
Chris@10 190 T1V = Td * Tm;
Chris@10 191 T1R = Td * Tk;
Chris@10 192 T2x = FNMS(T2a, Tk, T2w);
Chris@10 193 T2t = FMA(T2a, Tm, T2s);
Chris@10 194 T1W = FNMS(Th, Tk, T1V);
Chris@10 195 T1S = FMA(Th, Tm, T1R);
Chris@10 196 T7r = ii[0];
Chris@10 197 Te = ri[WS(rs, 5)];
Chris@10 198 Ti = ii[WS(rs, 5)];
Chris@10 199 }
Chris@10 200 }
Chris@10 201 }
Chris@10 202 }
Chris@10 203 }
Chris@10 204 {
Chris@10 205 E TF, TJ, Tf, T4i, TG, T4p;
Chris@10 206 TF = ri[WS(rs, 15)];
Chris@10 207 TJ = ii[WS(rs, 15)];
Chris@10 208 Tf = Td * Te;
Chris@10 209 T4i = Td * Ti;
Chris@10 210 TG = TE * TF;
Chris@10 211 T4p = TE * TJ;
Chris@10 212 Tj = FMA(Th, Ti, Tf);
Chris@10 213 T4j = FNMS(Th, Te, T4i);
Chris@10 214 TK = FMA(TI, TJ, TG);
Chris@10 215 T4q = FNMS(TI, TF, T4p);
Chris@10 216 }
Chris@10 217 Tx = ri[WS(rs, 10)];
Chris@10 218 TB = ii[WS(rs, 10)];
Chris@10 219 {
Chris@10 220 E To, Ts, Ty, T4n, Tp, T4k;
Chris@10 221 To = ri[WS(rs, 20)];
Chris@10 222 Ts = ii[WS(rs, 20)];
Chris@10 223 Ty = Tw * Tx;
Chris@10 224 T4n = Tw * TB;
Chris@10 225 Tp = Tn * To;
Chris@10 226 T4k = Tn * Ts;
Chris@10 227 TC = FMA(TA, TB, Ty);
Chris@10 228 T4o = FNMS(TA, Tx, T4n);
Chris@10 229 Tt = FMA(Tr, Ts, Tp);
Chris@10 230 T4l = FNMS(Tr, To, T4k);
Chris@10 231 }
Chris@10 232 }
Chris@10 233 {
Chris@10 234 E TL, T7F, T4r, Tu, T7E, T4m, TM;
Chris@10 235 TL = TC + TK;
Chris@10 236 T7F = TC - TK;
Chris@10 237 T4r = T4o - T4q;
Chris@10 238 T7p = T4o + T4q;
Chris@10 239 Tu = Tj + Tt;
Chris@10 240 T7E = Tj - Tt;
Chris@10 241 T4m = T4j - T4l;
Chris@10 242 T7o = T4j + T4l;
Chris@10 243 T7G = FMA(KP618033988, T7F, T7E);
Chris@10 244 T86 = FNMS(KP618033988, T7E, T7F);
Chris@10 245 T4s = FMA(KP618033988, T4r, T4m);
Chris@10 246 T6a = FNMS(KP618033988, T4m, T4r);
Chris@10 247 T4g = Tu - TL;
Chris@10 248 TM = Tu + TL;
Chris@10 249 TN = T1 + TM;
Chris@10 250 T4f = FNMS(KP250000000, TM, T1);
Chris@10 251 }
Chris@10 252 }
Chris@10 253 {
Chris@10 254 E T2D, T2F, T7q, T2E, T3X;
Chris@10 255 T2D = ri[WS(rs, 3)];
Chris@10 256 T2F = ii[WS(rs, 3)];
Chris@10 257 T7C = T7o - T7p;
Chris@10 258 T7q = T7o + T7p;
Chris@10 259 T2E = T3 * T2D;
Chris@10 260 T3X = T3 * T2F;
Chris@10 261 {
Chris@10 262 E T2V, T2W, T2Y, T32, T36;
Chris@10 263 T2V = ri[WS(rs, 13)];
Chris@10 264 T7s = T7q + T7r;
Chris@10 265 T7B = FNMS(KP250000000, T7q, T7r);
Chris@10 266 T2G = FMA(T6, T2F, T2E);
Chris@10 267 T3Y = FNMS(T6, T2D, T3X);
Chris@10 268 T2W = T2U * T2V;
Chris@10 269 T2Y = ii[WS(rs, 13)];
Chris@10 270 T32 = ri[WS(rs, 18)];
Chris@10 271 T36 = ii[WS(rs, 18)];
Chris@10 272 {
Chris@10 273 E T2H, T2I, T2J, T3Z;
Chris@10 274 {
Chris@10 275 E T2Z, T45, T37, T47, T44, T33, T46;
Chris@10 276 T2H = ri[WS(rs, 8)];
Chris@10 277 T2Z = FMA(T2X, T2Y, T2W);
Chris@10 278 T44 = T2U * T2Y;
Chris@10 279 T33 = T31 * T32;
Chris@10 280 T46 = T31 * T36;
Chris@10 281 T2I = T1j * T2H;
Chris@10 282 T45 = FNMS(T2X, T2V, T44);
Chris@10 283 T37 = FMA(T35, T36, T33);
Chris@10 284 T47 = FNMS(T35, T32, T46);
Chris@10 285 T2J = ii[WS(rs, 8)];
Chris@10 286 T2N = ri[WS(rs, 23)];
Chris@10 287 T5p = T2Z - T37;
Chris@10 288 T38 = T2Z + T37;
Chris@10 289 T48 = T45 + T47;
Chris@10 290 T5i = T47 - T45;
Chris@10 291 T3Z = T1j * T2J;
Chris@10 292 T2O = T2M * T2N;
Chris@10 293 T2R = ii[WS(rs, 23)];
Chris@10 294 }
Chris@10 295 T2K = FMA(T1l, T2J, T2I);
Chris@10 296 T40 = FNMS(T1l, T2H, T3Z);
Chris@10 297 }
Chris@10 298 }
Chris@10 299 }
Chris@10 300 T2S = FMA(T2Q, T2R, T2O);
Chris@10 301 T41 = T2M * T2R;
Chris@10 302 }
Chris@10 303 {
Chris@10 304 E TR, T3h, T1t, T4F, T3r, T4y, TZ, T3j, T17, T3l;
Chris@10 305 {
Chris@10 306 E T12, T16, T13, T3k;
Chris@10 307 {
Chris@10 308 E TO, TP, T5m, T5l, TQ;
Chris@10 309 {
Chris@10 310 E T2T, T5o, T42, T5f, T39;
Chris@10 311 TO = ri[WS(rs, 1)];
Chris@10 312 T2T = T2K + T2S;
Chris@10 313 T5o = T2K - T2S;
Chris@10 314 T42 = FNMS(T2Q, T2N, T41);
Chris@10 315 TP = T2 * TO;
Chris@10 316 T5q = FMA(KP618033988, T5p, T5o);
Chris@10 317 T6k = FNMS(KP618033988, T5o, T5p);
Chris@10 318 T5f = T38 - T2T;
Chris@10 319 T39 = T2T + T38;
Chris@10 320 {
Chris@10 321 E T43, T5h, T5e, T49;
Chris@10 322 T43 = T40 + T42;
Chris@10 323 T5h = T42 - T40;
Chris@10 324 T5e = FNMS(KP250000000, T39, T2G);
Chris@10 325 T3a = T2G + T39;
Chris@10 326 T5j = FMA(KP618033988, T5i, T5h);
Chris@10 327 T6n = FNMS(KP618033988, T5h, T5i);
Chris@10 328 T5m = T48 - T43;
Chris@10 329 T49 = T43 + T48;
Chris@10 330 T6m = FMA(KP559016994, T5f, T5e);
Chris@10 331 T5g = FNMS(KP559016994, T5f, T5e);
Chris@10 332 T5l = FNMS(KP250000000, T49, T3Y);
Chris@10 333 T4a = T3Y + T49;
Chris@10 334 TQ = ii[WS(rs, 1)];
Chris@10 335 }
Chris@10 336 }
Chris@10 337 {
Chris@10 338 E T1n, T1r, T1i, T1o, T3o, T3p;
Chris@10 339 {
Chris@10 340 E T1d, T1h, T1e, T3n, T3g;
Chris@10 341 T1d = ri[WS(rs, 11)];
Chris@10 342 T1h = ii[WS(rs, 11)];
Chris@10 343 T5n = FNMS(KP559016994, T5m, T5l);
Chris@10 344 T6j = FMA(KP559016994, T5m, T5l);
Chris@10 345 TR = FMA(T5, TQ, TP);
Chris@10 346 T3g = T2 * TQ;
Chris@10 347 T1e = T1c * T1d;
Chris@10 348 T3n = T1c * T1h;
Chris@10 349 T1n = ri[WS(rs, 16)];
Chris@10 350 T3h = FNMS(T5, TO, T3g);
Chris@10 351 T1r = ii[WS(rs, 16)];
Chris@10 352 T1i = FMA(T1g, T1h, T1e);
Chris@10 353 T1o = T1m * T1n;
Chris@10 354 T3o = FNMS(T1g, T1d, T3n);
Chris@10 355 T3p = T1m * T1r;
Chris@10 356 }
Chris@10 357 {
Chris@10 358 E TU, TY, TV, T3i, T3q, T1s;
Chris@10 359 TU = ri[WS(rs, 6)];
Chris@10 360 T1s = FMA(T1q, T1r, T1o);
Chris@10 361 TY = ii[WS(rs, 6)];
Chris@10 362 T3q = FNMS(T1q, T1n, T3p);
Chris@10 363 TV = TT * TU;
Chris@10 364 T1t = T1i + T1s;
Chris@10 365 T4F = T1s - T1i;
Chris@10 366 T3i = TT * TY;
Chris@10 367 T3r = T3o + T3q;
Chris@10 368 T4y = T3q - T3o;
Chris@10 369 T12 = ri[WS(rs, 21)];
Chris@10 370 T16 = ii[WS(rs, 21)];
Chris@10 371 TZ = FMA(TX, TY, TV);
Chris@10 372 T3j = FNMS(TX, TU, T3i);
Chris@10 373 T13 = T11 * T12;
Chris@10 374 T3k = T11 * T16;
Chris@10 375 }
Chris@10 376 }
Chris@10 377 }
Chris@10 378 T17 = FMA(T15, T16, T13);
Chris@10 379 T3l = FNMS(T15, T12, T3k);
Chris@10 380 }
Chris@10 381 {
Chris@10 382 E T1z, T3v, T4N, T1Z, T3F, T4U, T1D, T3x, T1H, T3z;
Chris@10 383 {
Chris@10 384 E T1E, T1G, T1F, T3y;
Chris@10 385 {
Chris@10 386 E T1w, T1y, T1x, T4v, T4C, T4u, T4B, T3u, T18, T4E;
Chris@10 387 T1w = ri[WS(rs, 4)];
Chris@10 388 T1y = ii[WS(rs, 4)];
Chris@10 389 T18 = TZ + T17;
Chris@10 390 T4E = T17 - TZ;
Chris@10 391 {
Chris@10 392 E T3m, T4x, T1u, T3s;
Chris@10 393 T3m = T3j + T3l;
Chris@10 394 T4x = T3j - T3l;
Chris@10 395 T1x = T7 * T1w;
Chris@10 396 T6C = FNMS(KP618033988, T4E, T4F);
Chris@10 397 T4G = FMA(KP618033988, T4F, T4E);
Chris@10 398 T1u = T18 + T1t;
Chris@10 399 T4v = T18 - T1t;
Chris@10 400 T6z = FMA(KP618033988, T4x, T4y);
Chris@10 401 T4z = FNMS(KP618033988, T4y, T4x);
Chris@10 402 T3s = T3m + T3r;
Chris@10 403 T4C = T3m - T3r;
Chris@10 404 T1v = TR + T1u;
Chris@10 405 T4u = FNMS(KP250000000, T1u, TR);
Chris@10 406 T3t = T3h + T3s;
Chris@10 407 T4B = FNMS(KP250000000, T3s, T3h);
Chris@10 408 T3u = T7 * T1y;
Chris@10 409 }
Chris@10 410 T6y = FNMS(KP559016994, T4v, T4u);
Chris@10 411 T4w = FMA(KP559016994, T4v, T4u);
Chris@10 412 T6B = FNMS(KP559016994, T4C, T4B);
Chris@10 413 T4D = FMA(KP559016994, T4C, T4B);
Chris@10 414 T1z = FMA(Tb, T1y, T1x);
Chris@10 415 T3v = FNMS(Tb, T1w, T3u);
Chris@10 416 }
Chris@10 417 {
Chris@10 418 E T1Q, T3C, T1Y, T3E;
Chris@10 419 {
Chris@10 420 E T1L, T1P, T1T, T1X, T1M, T3B, T1U, T3D;
Chris@10 421 T1L = ri[WS(rs, 14)];
Chris@10 422 T1P = ii[WS(rs, 14)];
Chris@10 423 T1T = ri[WS(rs, 19)];
Chris@10 424 T1X = ii[WS(rs, 19)];
Chris@10 425 T1M = T1K * T1L;
Chris@10 426 T3B = T1K * T1P;
Chris@10 427 T1U = T1S * T1T;
Chris@10 428 T3D = T1S * T1X;
Chris@10 429 T1Q = FMA(T1O, T1P, T1M);
Chris@10 430 T3C = FNMS(T1O, T1L, T3B);
Chris@10 431 T1Y = FMA(T1W, T1X, T1U);
Chris@10 432 T3E = FNMS(T1W, T1T, T3D);
Chris@10 433 }
Chris@10 434 {
Chris@10 435 E T1A, T1C, T1B, T3w;
Chris@10 436 T1A = ri[WS(rs, 9)];
Chris@10 437 T1C = ii[WS(rs, 9)];
Chris@10 438 T4N = T1Y - T1Q;
Chris@10 439 T1Z = T1Q + T1Y;
Chris@10 440 T3F = T3C + T3E;
Chris@10 441 T4U = T3E - T3C;
Chris@10 442 T1B = T8 * T1A;
Chris@10 443 T3w = T8 * T1C;
Chris@10 444 T1E = ri[WS(rs, 24)];
Chris@10 445 T1G = ii[WS(rs, 24)];
Chris@10 446 T1D = FMA(Tc, T1C, T1B);
Chris@10 447 T3x = FNMS(Tc, T1A, T3w);
Chris@10 448 T1F = Tk * T1E;
Chris@10 449 T3y = Tk * T1G;
Chris@10 450 }
Chris@10 451 }
Chris@10 452 T1H = FMA(Tm, T1G, T1F);
Chris@10 453 T3z = FNMS(Tm, T1E, T3y);
Chris@10 454 }
Chris@10 455 {
Chris@10 456 E T2f, T2j, T2g, T3N;
Chris@10 457 {
Chris@10 458 E T23, T25, T24, T4R, T4K, T4Q, T4J, T3J, T1I, T4M;
Chris@10 459 T23 = ri[WS(rs, 2)];
Chris@10 460 T25 = ii[WS(rs, 2)];
Chris@10 461 T1I = T1D + T1H;
Chris@10 462 T4M = T1H - T1D;
Chris@10 463 {
Chris@10 464 E T3A, T4T, T20, T3G;
Chris@10 465 T3A = T3x + T3z;
Chris@10 466 T4T = T3z - T3x;
Chris@10 467 T24 = T19 * T23;
Chris@10 468 T6v = FNMS(KP618033988, T4M, T4N);
Chris@10 469 T4O = FMA(KP618033988, T4N, T4M);
Chris@10 470 T20 = T1I + T1Z;
Chris@10 471 T4R = T1I - T1Z;
Chris@10 472 T6s = FNMS(KP618033988, T4T, T4U);
Chris@10 473 T4V = FMA(KP618033988, T4U, T4T);
Chris@10 474 T3G = T3A + T3F;
Chris@10 475 T4K = T3F - T3A;
Chris@10 476 T21 = T1z + T20;
Chris@10 477 T4Q = FNMS(KP250000000, T20, T1z);
Chris@10 478 T3H = T3v + T3G;
Chris@10 479 T4J = FNMS(KP250000000, T3G, T3v);
Chris@10 480 T3J = T19 * T25;
Chris@10 481 }
Chris@10 482 T6r = FNMS(KP559016994, T4R, T4Q);
Chris@10 483 T4S = FMA(KP559016994, T4R, T4Q);
Chris@10 484 T6u = FMA(KP559016994, T4K, T4J);
Chris@10 485 T4L = FNMS(KP559016994, T4K, T4J);
Chris@10 486 T26 = FMA(T1b, T25, T24);
Chris@10 487 T3K = FNMS(T1b, T23, T3J);
Chris@10 488 }
Chris@10 489 {
Chris@10 490 E T2r, T3R, T2z, T3T;
Chris@10 491 {
Chris@10 492 E T2n, T2q, T2u, T2y, T2o, T3Q, T2v, T3S;
Chris@10 493 T2n = ri[WS(rs, 12)];
Chris@10 494 T2q = ii[WS(rs, 12)];
Chris@10 495 T2u = ri[WS(rs, 17)];
Chris@10 496 T2y = ii[WS(rs, 17)];
Chris@10 497 T2o = T2m * T2n;
Chris@10 498 T3Q = T2m * T2q;
Chris@10 499 T2v = T2t * T2u;
Chris@10 500 T3S = T2t * T2y;
Chris@10 501 T2r = FMA(T2p, T2q, T2o);
Chris@10 502 T3R = FNMS(T2p, T2n, T3Q);
Chris@10 503 T2z = FMA(T2x, T2y, T2v);
Chris@10 504 T3T = FNMS(T2x, T2u, T3S);
Chris@10 505 }
Chris@10 506 {
Chris@10 507 E T28, T2b, T29, T3L;
Chris@10 508 T28 = ri[WS(rs, 7)];
Chris@10 509 T2b = ii[WS(rs, 7)];
Chris@10 510 T5a = T2z - T2r;
Chris@10 511 T2A = T2r + T2z;
Chris@10 512 T3U = T3R + T3T;
Chris@10 513 T53 = T3R - T3T;
Chris@10 514 T29 = T27 * T28;
Chris@10 515 T3L = T27 * T2b;
Chris@10 516 T2f = ri[WS(rs, 22)];
Chris@10 517 T2j = ii[WS(rs, 22)];
Chris@10 518 T2c = FMA(T2a, T2b, T29);
Chris@10 519 T3M = FNMS(T2a, T28, T3L);
Chris@10 520 T2g = T2e * T2f;
Chris@10 521 T3N = T2e * T2j;
Chris@10 522 }
Chris@10 523 }
Chris@10 524 T2k = FMA(T2i, T2j, T2g);
Chris@10 525 T3O = FNMS(T2i, T2f, T3N);
Chris@10 526 }
Chris@10 527 }
Chris@10 528 }
Chris@10 529 }
Chris@10 530 {
Chris@10 531 E T7l, T5b, T6d, T54, T6g, T51, T6f, T7m, T6c, T58, T4e, T4c, T7A, T7y, T4d;
Chris@10 532 E T3f;
Chris@10 533 {
Chris@10 534 E T7w, T22, T7x, T3b, T3I, T3c, T3e, T3d;
Chris@10 535 T7l = T3t + T3H;
Chris@10 536 T3I = T3t - T3H;
Chris@10 537 {
Chris@10 538 E T2l, T59, T3P, T52;
Chris@10 539 T2l = T2c + T2k;
Chris@10 540 T59 = T2k - T2c;
Chris@10 541 T3P = T3M + T3O;
Chris@10 542 T52 = T3O - T3M;
Chris@10 543 T5b = FMA(KP618033988, T5a, T59);
Chris@10 544 T6d = FNMS(KP618033988, T59, T5a);
Chris@10 545 {
Chris@10 546 E T50, T2B, T57, T3V;
Chris@10 547 T50 = T2A - T2l;
Chris@10 548 T2B = T2l + T2A;
Chris@10 549 T54 = FNMS(KP618033988, T53, T52);
Chris@10 550 T6g = FMA(KP618033988, T52, T53);
Chris@10 551 T57 = T3U - T3P;
Chris@10 552 T3V = T3P + T3U;
Chris@10 553 {
Chris@10 554 E T4Z, T2C, T56, T3W, T4b;
Chris@10 555 T4Z = FNMS(KP250000000, T2B, T26);
Chris@10 556 T2C = T26 + T2B;
Chris@10 557 T56 = FNMS(KP250000000, T3V, T3K);
Chris@10 558 T3W = T3K + T3V;
Chris@10 559 T7w = T1v - T21;
Chris@10 560 T22 = T1v + T21;
Chris@10 561 T51 = FNMS(KP559016994, T50, T4Z);
Chris@10 562 T6f = FMA(KP559016994, T50, T4Z);
Chris@10 563 T4b = T3W - T4a;
Chris@10 564 T7m = T3W + T4a;
Chris@10 565 T6c = FMA(KP559016994, T57, T56);
Chris@10 566 T58 = FNMS(KP559016994, T57, T56);
Chris@10 567 T7x = T2C - T3a;
Chris@10 568 T3b = T2C + T3a;
Chris@10 569 T4e = FNMS(KP618033988, T3I, T4b);
Chris@10 570 T4c = FMA(KP618033988, T4b, T3I);
Chris@10 571 }
Chris@10 572 }
Chris@10 573 }
Chris@10 574 T3c = T22 + T3b;
Chris@10 575 T3e = T22 - T3b;
Chris@10 576 ri[0] = TN + T3c;
Chris@10 577 T3d = FNMS(KP250000000, T3c, TN);
Chris@10 578 T7A = FNMS(KP618033988, T7w, T7x);
Chris@10 579 T7y = FMA(KP618033988, T7x, T7w);
Chris@10 580 T4d = FNMS(KP559016994, T3e, T3d);
Chris@10 581 T3f = FMA(KP559016994, T3e, T3d);
Chris@10 582 }
Chris@10 583 {
Chris@10 584 E T69, T85, T7Y, T68, T66, T84, T82, T7X, T67, T5Z;
Chris@10 585 {
Chris@10 586 E T4t, T5H, T5Q, T7T, T7H, T5P, T5M, T5L, T5A, T7O, T5D, T7P, T7K, T7M, T5u;
Chris@10 587 E T5w, T5K, T63, T61, T5U, T7D, T7z, T7v;
Chris@10 588 {
Chris@10 589 E T7u, T7t, T4h, T7n;
Chris@10 590 T69 = FNMS(KP559016994, T4g, T4f);
Chris@10 591 T4h = FMA(KP559016994, T4g, T4f);
Chris@10 592 T7u = T7l - T7m;
Chris@10 593 T7n = T7l + T7m;
Chris@10 594 ri[WS(rs, 5)] = FMA(KP951056516, T4c, T3f);
Chris@10 595 ri[WS(rs, 20)] = FNMS(KP951056516, T4c, T3f);
Chris@10 596 ri[WS(rs, 15)] = FMA(KP951056516, T4e, T4d);
Chris@10 597 ri[WS(rs, 10)] = FNMS(KP951056516, T4e, T4d);
Chris@10 598 ii[0] = T7n + T7s;
Chris@10 599 T7t = FNMS(KP250000000, T7n, T7s);
Chris@10 600 T4t = FMA(KP951056516, T4s, T4h);
Chris@10 601 T5H = FNMS(KP951056516, T4s, T4h);
Chris@10 602 T7D = FMA(KP559016994, T7C, T7B);
Chris@10 603 T85 = FNMS(KP559016994, T7C, T7B);
Chris@10 604 T7z = FNMS(KP559016994, T7u, T7t);
Chris@10 605 T7v = FMA(KP559016994, T7u, T7t);
Chris@10 606 }
Chris@10 607 {
Chris@10 608 E T5I, T5J, T5S, T4P, T5y, T4I, T5C, T5s, T4W, T5T, T55, T5c;
Chris@10 609 {
Chris@10 610 E T4A, T4H, T5k, T5r;
Chris@10 611 T5Q = FNMS(KP951056516, T4z, T4w);
Chris@10 612 T4A = FMA(KP951056516, T4z, T4w);
Chris@10 613 T7T = FMA(KP951056516, T7G, T7D);
Chris@10 614 T7H = FNMS(KP951056516, T7G, T7D);
Chris@10 615 ii[WS(rs, 20)] = FMA(KP951056516, T7y, T7v);
Chris@10 616 ii[WS(rs, 5)] = FNMS(KP951056516, T7y, T7v);
Chris@10 617 ii[WS(rs, 15)] = FNMS(KP951056516, T7A, T7z);
Chris@10 618 ii[WS(rs, 10)] = FMA(KP951056516, T7A, T7z);
Chris@10 619 T4H = FMA(KP951056516, T4G, T4D);
Chris@10 620 T5P = FNMS(KP951056516, T4G, T4D);
Chris@10 621 T5I = FMA(KP951056516, T5j, T5g);
Chris@10 622 T5k = FNMS(KP951056516, T5j, T5g);
Chris@10 623 T5r = FNMS(KP951056516, T5q, T5n);
Chris@10 624 T5J = FMA(KP951056516, T5q, T5n);
Chris@10 625 T5S = FNMS(KP951056516, T4O, T4L);
Chris@10 626 T4P = FMA(KP951056516, T4O, T4L);
Chris@10 627 T5y = FNMS(KP256756360, T4A, T4H);
Chris@10 628 T4I = FMA(KP256756360, T4H, T4A);
Chris@10 629 T5C = FNMS(KP939062505, T5k, T5r);
Chris@10 630 T5s = FMA(KP939062505, T5r, T5k);
Chris@10 631 T4W = FNMS(KP951056516, T4V, T4S);
Chris@10 632 T5T = FMA(KP951056516, T4V, T4S);
Chris@10 633 T5M = FMA(KP951056516, T54, T51);
Chris@10 634 T55 = FNMS(KP951056516, T54, T51);
Chris@10 635 T5c = FMA(KP951056516, T5b, T58);
Chris@10 636 T5L = FNMS(KP951056516, T5b, T58);
Chris@10 637 }
Chris@10 638 {
Chris@10 639 E T4Y, T5t, T5z, T4X;
Chris@10 640 T5z = FNMS(KP634619297, T4P, T4W);
Chris@10 641 T4X = FMA(KP634619297, T4W, T4P);
Chris@10 642 {
Chris@10 643 E T5B, T5d, T7I, T7J;
Chris@10 644 T5B = FNMS(KP549754652, T55, T5c);
Chris@10 645 T5d = FMA(KP549754652, T5c, T55);
Chris@10 646 T7I = FNMS(KP871714437, T5z, T5y);
Chris@10 647 T5A = FMA(KP871714437, T5z, T5y);
Chris@10 648 T4Y = FMA(KP871714437, T4X, T4I);
Chris@10 649 T7O = FNMS(KP871714437, T4X, T4I);
Chris@10 650 T7J = FMA(KP831864738, T5C, T5B);
Chris@10 651 T5D = FNMS(KP831864738, T5C, T5B);
Chris@10 652 T5t = FMA(KP831864738, T5s, T5d);
Chris@10 653 T7P = FNMS(KP831864738, T5s, T5d);
Chris@10 654 T7K = FMA(KP904730450, T7J, T7I);
Chris@10 655 T7M = FNMS(KP904730450, T7J, T7I);
Chris@10 656 }
Chris@10 657 T5u = FMA(KP904730450, T5t, T4Y);
Chris@10 658 T5w = FNMS(KP904730450, T5t, T4Y);
Chris@10 659 }
Chris@10 660 T5K = FNMS(KP126329378, T5J, T5I);
Chris@10 661 T63 = FMA(KP126329378, T5I, T5J);
Chris@10 662 T61 = FNMS(KP827271945, T5S, T5T);
Chris@10 663 T5U = FMA(KP827271945, T5T, T5S);
Chris@10 664 }
Chris@10 665 {
Chris@10 666 E T65, T81, T62, T80, T7W, T5W, T5Y;
Chris@10 667 {
Chris@10 668 E T5O, T5V, T64, T5N;
Chris@10 669 ri[WS(rs, 1)] = FMA(KP968583161, T5u, T4t);
Chris@10 670 T64 = FMA(KP470564281, T5L, T5M);
Chris@10 671 T5N = FNMS(KP470564281, T5M, T5L);
Chris@10 672 {
Chris@10 673 E T60, T5R, T7U, T7V;
Chris@10 674 T60 = FNMS(KP634619297, T5P, T5Q);
Chris@10 675 T5R = FMA(KP634619297, T5Q, T5P);
Chris@10 676 T7U = FMA(KP912018591, T64, T63);
Chris@10 677 T65 = FNMS(KP912018591, T64, T63);
Chris@10 678 T5O = FNMS(KP912018591, T5N, T5K);
Chris@10 679 T81 = FMA(KP912018591, T5N, T5K);
Chris@10 680 T7V = FNMS(KP912575812, T61, T60);
Chris@10 681 T62 = FMA(KP912575812, T61, T60);
Chris@10 682 T5V = FNMS(KP912575812, T5U, T5R);
Chris@10 683 T80 = FMA(KP912575812, T5U, T5R);
Chris@10 684 T7W = FMA(KP851038619, T7V, T7U);
Chris@10 685 T7Y = FNMS(KP851038619, T7V, T7U);
Chris@10 686 ii[WS(rs, 1)] = FMA(KP968583161, T7K, T7H);
Chris@10 687 }
Chris@10 688 T5W = FNMS(KP851038619, T5V, T5O);
Chris@10 689 T5Y = FMA(KP851038619, T5V, T5O);
Chris@10 690 }
Chris@10 691 {
Chris@10 692 E T5G, T5E, T7S, T7Q, T7L, T5F, T5x, T5v, T5X, T7R, T7N;
Chris@10 693 T5G = FNMS(KP683113946, T5A, T5D);
Chris@10 694 T5E = FMA(KP559154169, T5D, T5A);
Chris@10 695 ii[WS(rs, 4)] = FNMS(KP992114701, T7W, T7T);
Chris@10 696 ri[WS(rs, 4)] = FNMS(KP992114701, T5W, T5H);
Chris@10 697 T5v = FNMS(KP242145790, T5u, T4t);
Chris@10 698 T7S = FNMS(KP683113946, T7O, T7P);
Chris@10 699 T7Q = FMA(KP559154169, T7P, T7O);
Chris@10 700 T7L = FNMS(KP242145790, T7K, T7H);
Chris@10 701 T5F = FNMS(KP541454447, T5w, T5v);
Chris@10 702 T5x = FMA(KP541454447, T5w, T5v);
Chris@10 703 T68 = FMA(KP525970792, T62, T65);
Chris@10 704 T66 = FNMS(KP726211448, T65, T62);
Chris@10 705 ri[WS(rs, 11)] = FNMS(KP833417178, T5G, T5F);
Chris@10 706 ri[WS(rs, 16)] = FMA(KP833417178, T5G, T5F);
Chris@10 707 ri[WS(rs, 21)] = FNMS(KP921177326, T5E, T5x);
Chris@10 708 ri[WS(rs, 6)] = FMA(KP921177326, T5E, T5x);
Chris@10 709 T7R = FNMS(KP541454447, T7M, T7L);
Chris@10 710 T7N = FMA(KP541454447, T7M, T7L);
Chris@10 711 T5X = FMA(KP248028675, T5W, T5H);
Chris@10 712 ii[WS(rs, 11)] = FMA(KP833417178, T7S, T7R);
Chris@10 713 ii[WS(rs, 16)] = FNMS(KP833417178, T7S, T7R);
Chris@10 714 ii[WS(rs, 21)] = FMA(KP921177326, T7Q, T7N);
Chris@10 715 ii[WS(rs, 6)] = FNMS(KP921177326, T7Q, T7N);
Chris@10 716 T84 = FNMS(KP525970792, T80, T81);
Chris@10 717 T82 = FMA(KP726211448, T81, T80);
Chris@10 718 T7X = FMA(KP248028675, T7W, T7T);
Chris@10 719 T67 = FNMS(KP554608978, T5Y, T5X);
Chris@10 720 T5Z = FMA(KP554608978, T5Y, T5X);
Chris@10 721 }
Chris@10 722 }
Chris@10 723 }
Chris@10 724 {
Chris@10 725 E T6b, T6T, T8j, T87, T72, T71, T6P, T8r, T6M, T8q, T7f, T6W, T8m, T8o, T6I;
Chris@10 726 E T6G, T7d, T76, T7g, T6Z, T83, T7Z;
Chris@10 727 ri[WS(rs, 14)] = FNMS(KP943557151, T68, T67);
Chris@10 728 ri[WS(rs, 19)] = FMA(KP943557151, T68, T67);
Chris@10 729 ri[WS(rs, 24)] = FMA(KP803003575, T66, T5Z);
Chris@10 730 ri[WS(rs, 9)] = FNMS(KP803003575, T66, T5Z);
Chris@10 731 T83 = FNMS(KP554608978, T7Y, T7X);
Chris@10 732 T7Z = FMA(KP554608978, T7Y, T7X);
Chris@10 733 T6b = FMA(KP951056516, T6a, T69);
Chris@10 734 T6T = FNMS(KP951056516, T6a, T69);
Chris@10 735 ii[WS(rs, 14)] = FMA(KP943557151, T84, T83);
Chris@10 736 ii[WS(rs, 19)] = FNMS(KP943557151, T84, T83);
Chris@10 737 ii[WS(rs, 24)] = FMA(KP803003575, T82, T7Z);
Chris@10 738 ii[WS(rs, 9)] = FNMS(KP803003575, T82, T7Z);
Chris@10 739 {
Chris@10 740 E T6X, T6Y, T74, T6N, T6i, T75, T6U, T6V, T6t, T6L, T6E, T6O, T6p, T6w;
Chris@10 741 {
Chris@10 742 E T6A, T6D, T6e, T6h, T6l, T6o;
Chris@10 743 T6X = FNMS(KP951056516, T6d, T6c);
Chris@10 744 T6e = FMA(KP951056516, T6d, T6c);
Chris@10 745 T6h = FMA(KP951056516, T6g, T6f);
Chris@10 746 T6Y = FNMS(KP951056516, T6g, T6f);
Chris@10 747 T74 = FMA(KP951056516, T6z, T6y);
Chris@10 748 T6A = FNMS(KP951056516, T6z, T6y);
Chris@10 749 T8j = FNMS(KP951056516, T86, T85);
Chris@10 750 T87 = FMA(KP951056516, T86, T85);
Chris@10 751 T6N = FNMS(KP062914667, T6e, T6h);
Chris@10 752 T6i = FMA(KP062914667, T6h, T6e);
Chris@10 753 T6D = FMA(KP951056516, T6C, T6B);
Chris@10 754 T75 = FNMS(KP951056516, T6C, T6B);
Chris@10 755 T6U = FMA(KP951056516, T6k, T6j);
Chris@10 756 T6l = FNMS(KP951056516, T6k, T6j);
Chris@10 757 T6o = FNMS(KP951056516, T6n, T6m);
Chris@10 758 T6V = FMA(KP951056516, T6n, T6m);
Chris@10 759 T72 = FMA(KP951056516, T6s, T6r);
Chris@10 760 T6t = FNMS(KP951056516, T6s, T6r);
Chris@10 761 T6L = FNMS(KP939062505, T6A, T6D);
Chris@10 762 T6E = FMA(KP939062505, T6D, T6A);
Chris@10 763 T6O = FMA(KP827271945, T6l, T6o);
Chris@10 764 T6p = FNMS(KP827271945, T6o, T6l);
Chris@10 765 T6w = FMA(KP951056516, T6v, T6u);
Chris@10 766 T71 = FNMS(KP951056516, T6v, T6u);
Chris@10 767 }
Chris@10 768 {
Chris@10 769 E T8k, T6q, T6K, T6x, T8l, T6F;
Chris@10 770 T8k = FMA(KP772036680, T6O, T6N);
Chris@10 771 T6P = FNMS(KP772036680, T6O, T6N);
Chris@10 772 T6q = FMA(KP772036680, T6p, T6i);
Chris@10 773 T8r = FNMS(KP772036680, T6p, T6i);
Chris@10 774 T6K = FMA(KP126329378, T6t, T6w);
Chris@10 775 T6x = FNMS(KP126329378, T6w, T6t);
Chris@10 776 T8l = FNMS(KP734762448, T6L, T6K);
Chris@10 777 T6M = FMA(KP734762448, T6L, T6K);
Chris@10 778 T6F = FNMS(KP734762448, T6E, T6x);
Chris@10 779 T8q = FMA(KP734762448, T6E, T6x);
Chris@10 780 T7f = FNMS(KP062914667, T6U, T6V);
Chris@10 781 T6W = FMA(KP062914667, T6V, T6U);
Chris@10 782 T8m = FMA(KP994076283, T8l, T8k);
Chris@10 783 T8o = FNMS(KP994076283, T8l, T8k);
Chris@10 784 T6I = FMA(KP994076283, T6F, T6q);
Chris@10 785 T6G = FNMS(KP994076283, T6F, T6q);
Chris@10 786 }
Chris@10 787 T7d = FNMS(KP549754652, T74, T75);
Chris@10 788 T76 = FMA(KP549754652, T75, T74);
Chris@10 789 T7g = FNMS(KP634619297, T6X, T6Y);
Chris@10 790 T6Z = FMA(KP634619297, T6Y, T6X);
Chris@10 791 }
Chris@10 792 {
Chris@10 793 E T88, T7h, T70, T8f, T7c, T73;
Chris@10 794 ri[WS(rs, 3)] = FMA(KP998026728, T6G, T6b);
Chris@10 795 T88 = FMA(KP845997307, T7g, T7f);
Chris@10 796 T7h = FNMS(KP845997307, T7g, T7f);
Chris@10 797 T70 = FMA(KP845997307, T6Z, T6W);
Chris@10 798 T8f = FNMS(KP845997307, T6Z, T6W);
Chris@10 799 T7c = FMA(KP470564281, T71, T72);
Chris@10 800 T73 = FNMS(KP470564281, T72, T71);
Chris@10 801 ii[WS(rs, 3)] = FNMS(KP998026728, T8m, T8j);
Chris@10 802 {
Chris@10 803 E T7e, T8e, T8a, T78, T7a, T8u, T8s, T8t, T8p, T79;
Chris@10 804 {
Chris@10 805 E T6S, T6Q, T6H, T89, T77, T6J, T6R, T8n;
Chris@10 806 T6S = FMA(KP614372930, T6M, T6P);
Chris@10 807 T6Q = FNMS(KP621716863, T6P, T6M);
Chris@10 808 T89 = FNMS(KP968479752, T7d, T7c);
Chris@10 809 T7e = FMA(KP968479752, T7d, T7c);
Chris@10 810 T77 = FMA(KP968479752, T76, T73);
Chris@10 811 T8e = FNMS(KP968479752, T76, T73);
Chris@10 812 T8a = FMA(KP906616052, T89, T88);
Chris@10 813 T8c = FNMS(KP906616052, T89, T88);
Chris@10 814 T78 = FMA(KP906616052, T77, T70);
Chris@10 815 T7a = FNMS(KP906616052, T77, T70);
Chris@10 816 T6H = FNMS(KP249506682, T6G, T6b);
Chris@10 817 ii[WS(rs, 2)] = FNMS(KP998026728, T8a, T87);
Chris@10 818 ri[WS(rs, 2)] = FMA(KP998026728, T78, T6T);
Chris@10 819 T8u = FNMS(KP614372930, T8q, T8r);
Chris@10 820 T8s = FMA(KP621716863, T8r, T8q);
Chris@10 821 T6J = FNMS(KP557913902, T6I, T6H);
Chris@10 822 T6R = FMA(KP557913902, T6I, T6H);
Chris@10 823 T8n = FMA(KP249506682, T8m, T8j);
Chris@10 824 ri[WS(rs, 18)] = FNMS(KP949179823, T6S, T6R);
Chris@10 825 ri[WS(rs, 13)] = FMA(KP949179823, T6S, T6R);
Chris@10 826 ri[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J);
Chris@10 827 ri[WS(rs, 23)] = FNMS(KP943557151, T6Q, T6J);
Chris@10 828 T8t = FNMS(KP557913902, T8o, T8n);
Chris@10 829 T8p = FMA(KP557913902, T8o, T8n);
Chris@10 830 }
Chris@10 831 T7k = FNMS(KP560319534, T7e, T7h);
Chris@10 832 T7i = FMA(KP681693190, T7h, T7e);
Chris@10 833 ii[WS(rs, 23)] = FMA(KP943557151, T8s, T8p);
Chris@10 834 ii[WS(rs, 8)] = FNMS(KP943557151, T8s, T8p);
Chris@10 835 ii[WS(rs, 13)] = FMA(KP949179823, T8u, T8t);
Chris@10 836 ii[WS(rs, 18)] = FNMS(KP949179823, T8u, T8t);
Chris@10 837 T79 = FNMS(KP249506682, T78, T6T);
Chris@10 838 T8i = FNMS(KP560319534, T8e, T8f);
Chris@10 839 T8g = FMA(KP681693190, T8f, T8e);
Chris@10 840 T8b = FMA(KP249506682, T8a, T87);
Chris@10 841 T7j = FMA(KP557913902, T7a, T79);
Chris@10 842 T7b = FNMS(KP557913902, T7a, T79);
Chris@10 843 }
Chris@10 844 }
Chris@10 845 }
Chris@10 846 }
Chris@10 847 }
Chris@10 848 }
Chris@10 849 }
Chris@10 850 ri[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j);
Chris@10 851 ri[WS(rs, 17)] = FMA(KP949179823, T7k, T7j);
Chris@10 852 ri[WS(rs, 7)] = FMA(KP860541664, T7i, T7b);
Chris@10 853 ri[WS(rs, 22)] = FNMS(KP860541664, T7i, T7b);
Chris@10 854 T8d = FMA(KP557913902, T8c, T8b);
Chris@10 855 T8h = FNMS(KP557913902, T8c, T8b);
Chris@10 856 ii[WS(rs, 12)] = FNMS(KP949179823, T8i, T8h);
Chris@10 857 ii[WS(rs, 17)] = FMA(KP949179823, T8i, T8h);
Chris@10 858 ii[WS(rs, 22)] = FNMS(KP860541664, T8g, T8d);
Chris@10 859 ii[WS(rs, 7)] = FMA(KP860541664, T8g, T8d);
Chris@10 860 }
Chris@10 861 }
Chris@10 862 }
Chris@10 863
Chris@10 864 static const tw_instr twinstr[] = {
Chris@10 865 {TW_CEXP, 0, 1},
Chris@10 866 {TW_CEXP, 0, 3},
Chris@10 867 {TW_CEXP, 0, 9},
Chris@10 868 {TW_CEXP, 0, 24},
Chris@10 869 {TW_NEXT, 1, 0}
Chris@10 870 };
Chris@10 871
Chris@10 872 static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {84, 78, 356, 0}, 0, 0, 0 };
Chris@10 873
Chris@10 874 void X(codelet_t2_25) (planner *p) {
Chris@10 875 X(kdft_dit_register) (p, t2_25, &desc);
Chris@10 876 }
Chris@10 877 #else /* HAVE_FMA */
Chris@10 878
Chris@10 879 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include t.h */
Chris@10 880
Chris@10 881 /*
Chris@10 882 * This function contains 440 FP additions, 340 FP multiplications,
Chris@10 883 * (or, 280 additions, 180 multiplications, 160 fused multiply/add),
Chris@10 884 * 149 stack variables, 20 constants, and 100 memory accesses
Chris@10 885 */
Chris@10 886 #include "t.h"
Chris@10 887
Chris@10 888 static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 889 {
Chris@10 890 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 891 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@10 892 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@10 893 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@10 894 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 895 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@10 896 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@10 897 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@10 898 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@10 899 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@10 900 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@10 901 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 902 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@10 903 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@10 904 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@10 905 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 906 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@10 907 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 908 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 909 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 910 {
Chris@10 911 INT m;
Chris@10 912 for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@10 913 E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g;
Chris@10 914 E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr;
Chris@10 915 E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V;
Chris@10 916 E T1X, T1z, T1j;
Chris@10 917 {
Chris@10 918 E Tw, TT, Tz, TQ, Tv, TU, TA, TP;
Chris@10 919 {
Chris@10 920 E T4, Tc, T7, Tb;
Chris@10 921 T2 = W[0];
Chris@10 922 T5 = W[1];
Chris@10 923 T3 = W[2];
Chris@10 924 T6 = W[3];
Chris@10 925 T4 = T2 * T3;
Chris@10 926 Tc = T5 * T3;
Chris@10 927 T7 = T5 * T6;
Chris@10 928 Tb = T2 * T6;
Chris@10 929 T8 = T4 - T7;
Chris@10 930 Td = Tb + Tc;
Chris@10 931 T16 = Tb - Tc;
Chris@10 932 T14 = T4 + T7;
Chris@10 933 Te = W[5];
Chris@10 934 Tw = T5 * Te;
Chris@10 935 TT = T3 * Te;
Chris@10 936 Tz = T2 * Te;
Chris@10 937 TQ = T6 * Te;
Chris@10 938 T9 = W[4];
Chris@10 939 Tv = T2 * T9;
Chris@10 940 TU = T6 * T9;
Chris@10 941 TA = T5 * T9;
Chris@10 942 TP = T3 * T9;
Chris@10 943 }
Chris@10 944 T21 = TP - TQ;
Chris@10 945 T23 = TT + TU;
Chris@10 946 {
Chris@10 947 E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj;
Chris@10 948 Tx = Tv - Tw;
Chris@10 949 TR = TP + TQ;
Chris@10 950 T1g = Tz - TA;
Chris@10 951 TB = Tz + TA;
Chris@10 952 T1f = Tv + Tw;
Chris@10 953 TV = TT - TU;
Chris@10 954 T15 = T14 * T9;
Chris@10 955 T17 = T16 * Te;
Chris@10 956 T1Q = T15 + T17;
Chris@10 957 Ta = T8 * T9;
Chris@10 958 Tf = Td * Te;
Chris@10 959 Tg = Ta + Tf;
Chris@10 960 T1a = T14 * Te;
Chris@10 961 T1b = T16 * T9;
Chris@10 962 T1S = T1a - T1b;
Chris@10 963 Ti = T8 * Te;
Chris@10 964 Tj = Td * T9;
Chris@10 965 Tk = Ti - Tj;
Chris@10 966 T18 = T15 - T17;
Chris@10 967 T2s = Ti + Tj;
Chris@10 968 T1c = T1a + T1b;
Chris@10 969 T2q = Ta - Tf;
Chris@10 970 Tn = W[6];
Chris@10 971 To = W[7];
Chris@10 972 Tp = FMA(T8, Tn, Td * To);
Chris@10 973 Tr = FNMS(Td, Tn, T8 * To);
Chris@10 974 T28 = FNMS(T1S, Tn, T1Q * To);
Chris@10 975 T2x = FNMS(TV, Tn, TR * To);
Chris@10 976 TY = FMA(T3, Tn, T6 * To);
Chris@10 977 T2k = FMA(T2, Tn, T5 * To);
Chris@10 978 T2m = FNMS(T5, Tn, T2 * To);
Chris@10 979 T2v = FMA(TR, Tn, TV * To);
Chris@10 980 TG = FNMS(Te, Tn, T9 * To);
Chris@10 981 TE = FMA(T9, Tn, Te * To);
Chris@10 982 T10 = FNMS(T6, Tn, T3 * To);
Chris@10 983 T1h = FMA(T1f, Tn, T1g * To);
Chris@10 984 T1E = FMA(Tg, Tn, Tk * To);
Chris@10 985 T26 = FMA(T1Q, Tn, T1S * To);
Chris@10 986 T1B = FNMS(TB, Tn, Tx * To);
Chris@10 987 T1G = FNMS(Tk, Tn, Tg * To);
Chris@10 988 T1V = FMA(T14, Tn, T16 * To);
Chris@10 989 T1X = FNMS(T16, Tn, T14 * To);
Chris@10 990 T1z = FMA(Tx, Tn, TB * To);
Chris@10 991 T1j = FNMS(T1g, Tn, T1f * To);
Chris@10 992 }
Chris@10 993 }
Chris@10 994 {
Chris@10 995 E T1, T6v, T2F, T6I, TK, T2G, T6u, T6J, T6N, T7c, T2O, T52, T2C, T6k, T48;
Chris@10 996 E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N;
Chris@10 997 E T4B, T59, T1L, T6h, T3n, T5Q, T4D, T5g, T3y, T5P, T4E, T5d, T2d, T6j, T3L;
Chris@10 998 E T5T, T4I, T5l, T3W, T5U, T4H, T5o;
Chris@10 999 {
Chris@10 1000 E Tm, T2I, Tt, T2J, Tu, T6s, TD, T2L, TI, T2M, TJ, T6t;
Chris@10 1001 T1 = ri[0];
Chris@10 1002 T6v = ii[0];
Chris@10 1003 {
Chris@10 1004 E Th, Tl, Tq, Ts;
Chris@10 1005 Th = ri[WS(rs, 5)];
Chris@10 1006 Tl = ii[WS(rs, 5)];
Chris@10 1007 Tm = FMA(Tg, Th, Tk * Tl);
Chris@10 1008 T2I = FNMS(Tk, Th, Tg * Tl);
Chris@10 1009 Tq = ri[WS(rs, 20)];
Chris@10 1010 Ts = ii[WS(rs, 20)];
Chris@10 1011 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@10 1012 T2J = FNMS(Tr, Tq, Tp * Ts);
Chris@10 1013 }
Chris@10 1014 Tu = Tm + Tt;
Chris@10 1015 T6s = T2I + T2J;
Chris@10 1016 {
Chris@10 1017 E Ty, TC, TF, TH;
Chris@10 1018 Ty = ri[WS(rs, 10)];
Chris@10 1019 TC = ii[WS(rs, 10)];
Chris@10 1020 TD = FMA(Tx, Ty, TB * TC);
Chris@10 1021 T2L = FNMS(TB, Ty, Tx * TC);
Chris@10 1022 TF = ri[WS(rs, 15)];
Chris@10 1023 TH = ii[WS(rs, 15)];
Chris@10 1024 TI = FMA(TE, TF, TG * TH);
Chris@10 1025 T2M = FNMS(TG, TF, TE * TH);
Chris@10 1026 }
Chris@10 1027 TJ = TD + TI;
Chris@10 1028 T6t = T2L + T2M;
Chris@10 1029 T2F = KP559016994 * (Tu - TJ);
Chris@10 1030 T6I = KP559016994 * (T6s - T6t);
Chris@10 1031 TK = Tu + TJ;
Chris@10 1032 T2G = FNMS(KP250000000, TK, T1);
Chris@10 1033 T6u = T6s + T6t;
Chris@10 1034 T6J = FNMS(KP250000000, T6u, T6v);
Chris@10 1035 {
Chris@10 1036 E T6L, T6M, T2K, T2N;
Chris@10 1037 T6L = Tm - Tt;
Chris@10 1038 T6M = TD - TI;
Chris@10 1039 T6N = FMA(KP951056516, T6L, KP587785252 * T6M);
Chris@10 1040 T7c = FNMS(KP587785252, T6L, KP951056516 * T6M);
Chris@10 1041 T2K = T2I - T2J;
Chris@10 1042 T2N = T2L - T2M;
Chris@10 1043 T2O = FMA(KP951056516, T2K, KP587785252 * T2N);
Chris@10 1044 T52 = FNMS(KP587785252, T2K, KP951056516 * T2N);
Chris@10 1045 }
Chris@10 1046 }
Chris@10 1047 {
Chris@10 1048 E T2g, T4c, T43, T46, T4h, T4g, T49, T4a, T4d, T2p, T2A, T2B, T2e, T2f;
Chris@10 1049 T2e = ri[WS(rs, 3)];
Chris@10 1050 T2f = ii[WS(rs, 3)];
Chris@10 1051 T2g = FMA(T3, T2e, T6 * T2f);
Chris@10 1052 T4c = FNMS(T6, T2e, T3 * T2f);
Chris@10 1053 {
Chris@10 1054 E T2j, T41, T2z, T45, T2o, T42, T2u, T44;
Chris@10 1055 {
Chris@10 1056 E T2h, T2i, T2w, T2y;
Chris@10 1057 T2h = ri[WS(rs, 8)];
Chris@10 1058 T2i = ii[WS(rs, 8)];
Chris@10 1059 T2j = FMA(T1f, T2h, T1g * T2i);
Chris@10 1060 T41 = FNMS(T1g, T2h, T1f * T2i);
Chris@10 1061 T2w = ri[WS(rs, 18)];
Chris@10 1062 T2y = ii[WS(rs, 18)];
Chris@10 1063 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@10 1064 T45 = FNMS(T2x, T2w, T2v * T2y);
Chris@10 1065 }
Chris@10 1066 {
Chris@10 1067 E T2l, T2n, T2r, T2t;
Chris@10 1068 T2l = ri[WS(rs, 23)];
Chris@10 1069 T2n = ii[WS(rs, 23)];
Chris@10 1070 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@10 1071 T42 = FNMS(T2m, T2l, T2k * T2n);
Chris@10 1072 T2r = ri[WS(rs, 13)];
Chris@10 1073 T2t = ii[WS(rs, 13)];
Chris@10 1074 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@10 1075 T44 = FNMS(T2s, T2r, T2q * T2t);
Chris@10 1076 }
Chris@10 1077 T43 = T41 - T42;
Chris@10 1078 T46 = T44 - T45;
Chris@10 1079 T4h = T2u - T2z;
Chris@10 1080 T4g = T2j - T2o;
Chris@10 1081 T49 = T41 + T42;
Chris@10 1082 T4a = T44 + T45;
Chris@10 1083 T4d = T49 + T4a;
Chris@10 1084 T2p = T2j + T2o;
Chris@10 1085 T2A = T2u + T2z;
Chris@10 1086 T2B = T2p + T2A;
Chris@10 1087 }
Chris@10 1088 T2C = T2g + T2B;
Chris@10 1089 T6k = T4c + T4d;
Chris@10 1090 {
Chris@10 1091 E T47, T5r, T40, T5q, T3Y, T3Z;
Chris@10 1092 T47 = FMA(KP951056516, T43, KP587785252 * T46);
Chris@10 1093 T5r = FNMS(KP587785252, T43, KP951056516 * T46);
Chris@10 1094 T3Y = KP559016994 * (T2p - T2A);
Chris@10 1095 T3Z = FNMS(KP250000000, T2B, T2g);
Chris@10 1096 T40 = T3Y + T3Z;
Chris@10 1097 T5q = T3Z - T3Y;
Chris@10 1098 T48 = T40 + T47;
Chris@10 1099 T5X = T5q + T5r;
Chris@10 1100 T4L = T40 - T47;
Chris@10 1101 T5s = T5q - T5r;
Chris@10 1102 }
Chris@10 1103 {
Chris@10 1104 E T4i, T5t, T4f, T5u, T4b, T4e;
Chris@10 1105 T4i = FMA(KP951056516, T4g, KP587785252 * T4h);
Chris@10 1106 T5t = FNMS(KP587785252, T4g, KP951056516 * T4h);
Chris@10 1107 T4b = KP559016994 * (T49 - T4a);
Chris@10 1108 T4e = FNMS(KP250000000, T4d, T4c);
Chris@10 1109 T4f = T4b + T4e;
Chris@10 1110 T5u = T4e - T4b;
Chris@10 1111 T4j = T4f - T4i;
Chris@10 1112 T5W = T5u - T5t;
Chris@10 1113 T4K = T4i + T4f;
Chris@10 1114 T5v = T5t + T5u;
Chris@10 1115 }
Chris@10 1116 }
Chris@10 1117 {
Chris@10 1118 E TO, T34, T2V, T2Y, T39, T38, T31, T32, T35, T13, T1m, T1n, TM, TN;
Chris@10 1119 TM = ri[WS(rs, 1)];
Chris@10 1120 TN = ii[WS(rs, 1)];
Chris@10 1121 TO = FMA(T2, TM, T5 * TN);
Chris@10 1122 T34 = FNMS(T5, TM, T2 * TN);
Chris@10 1123 {
Chris@10 1124 E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W;
Chris@10 1125 {
Chris@10 1126 E TS, TW, T1i, T1k;
Chris@10 1127 TS = ri[WS(rs, 6)];
Chris@10 1128 TW = ii[WS(rs, 6)];
Chris@10 1129 TX = FMA(TR, TS, TV * TW);
Chris@10 1130 T2T = FNMS(TV, TS, TR * TW);
Chris@10 1131 T1i = ri[WS(rs, 16)];
Chris@10 1132 T1k = ii[WS(rs, 16)];
Chris@10 1133 T1l = FMA(T1h, T1i, T1j * T1k);
Chris@10 1134 T2X = FNMS(T1j, T1i, T1h * T1k);
Chris@10 1135 }
Chris@10 1136 {
Chris@10 1137 E TZ, T11, T19, T1d;
Chris@10 1138 TZ = ri[WS(rs, 21)];
Chris@10 1139 T11 = ii[WS(rs, 21)];
Chris@10 1140 T12 = FMA(TY, TZ, T10 * T11);
Chris@10 1141 T2U = FNMS(T10, TZ, TY * T11);
Chris@10 1142 T19 = ri[WS(rs, 11)];
Chris@10 1143 T1d = ii[WS(rs, 11)];
Chris@10 1144 T1e = FMA(T18, T19, T1c * T1d);
Chris@10 1145 T2W = FNMS(T1c, T19, T18 * T1d);
Chris@10 1146 }
Chris@10 1147 T2V = T2T - T2U;
Chris@10 1148 T2Y = T2W - T2X;
Chris@10 1149 T39 = T1e - T1l;
Chris@10 1150 T38 = TX - T12;
Chris@10 1151 T31 = T2T + T2U;
Chris@10 1152 T32 = T2W + T2X;
Chris@10 1153 T35 = T31 + T32;
Chris@10 1154 T13 = TX + T12;
Chris@10 1155 T1m = T1e + T1l;
Chris@10 1156 T1n = T13 + T1m;
Chris@10 1157 }
Chris@10 1158 T1o = TO + T1n;
Chris@10 1159 T6g = T34 + T35;
Chris@10 1160 {
Chris@10 1161 E T2Z, T55, T2S, T54, T2Q, T2R;
Chris@10 1162 T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y);
Chris@10 1163 T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y);
Chris@10 1164 T2Q = KP559016994 * (T13 - T1m);
Chris@10 1165 T2R = FNMS(KP250000000, T1n, TO);
Chris@10 1166 T2S = T2Q + T2R;
Chris@10 1167 T54 = T2R - T2Q;
Chris@10 1168 T30 = T2S + T2Z;
Chris@10 1169 T5M = T54 + T55;
Chris@10 1170 T4A = T2S - T2Z;
Chris@10 1171 T56 = T54 - T55;
Chris@10 1172 }
Chris@10 1173 {
Chris@10 1174 E T3a, T57, T37, T58, T33, T36;
Chris@10 1175 T3a = FMA(KP951056516, T38, KP587785252 * T39);
Chris@10 1176 T57 = FNMS(KP587785252, T38, KP951056516 * T39);
Chris@10 1177 T33 = KP559016994 * (T31 - T32);
Chris@10 1178 T36 = FNMS(KP250000000, T35, T34);
Chris@10 1179 T37 = T33 + T36;
Chris@10 1180 T58 = T36 - T33;
Chris@10 1181 T3b = T37 - T3a;
Chris@10 1182 T5N = T58 - T57;
Chris@10 1183 T4B = T3a + T37;
Chris@10 1184 T59 = T57 + T58;
Chris@10 1185 }
Chris@10 1186 }
Chris@10 1187 {
Chris@10 1188 E T1r, T3r, T3i, T3l, T3w, T3v, T3o, T3p, T3s, T1y, T1J, T1K, T1p, T1q;
Chris@10 1189 T1p = ri[WS(rs, 4)];
Chris@10 1190 T1q = ii[WS(rs, 4)];
Chris@10 1191 T1r = FMA(T8, T1p, Td * T1q);
Chris@10 1192 T3r = FNMS(Td, T1p, T8 * T1q);
Chris@10 1193 {
Chris@10 1194 E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j;
Chris@10 1195 {
Chris@10 1196 E T1s, T1t, T1F, T1H;
Chris@10 1197 T1s = ri[WS(rs, 9)];
Chris@10 1198 T1t = ii[WS(rs, 9)];
Chris@10 1199 T1u = FMA(T9, T1s, Te * T1t);
Chris@10 1200 T3g = FNMS(Te, T1s, T9 * T1t);
Chris@10 1201 T1F = ri[WS(rs, 19)];
Chris@10 1202 T1H = ii[WS(rs, 19)];
Chris@10 1203 T1I = FMA(T1E, T1F, T1G * T1H);
Chris@10 1204 T3k = FNMS(T1G, T1F, T1E * T1H);
Chris@10 1205 }
Chris@10 1206 {
Chris@10 1207 E T1v, T1w, T1A, T1C;
Chris@10 1208 T1v = ri[WS(rs, 24)];
Chris@10 1209 T1w = ii[WS(rs, 24)];
Chris@10 1210 T1x = FMA(Tn, T1v, To * T1w);
Chris@10 1211 T3h = FNMS(To, T1v, Tn * T1w);
Chris@10 1212 T1A = ri[WS(rs, 14)];
Chris@10 1213 T1C = ii[WS(rs, 14)];
Chris@10 1214 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@10 1215 T3j = FNMS(T1B, T1A, T1z * T1C);
Chris@10 1216 }
Chris@10 1217 T3i = T3g - T3h;
Chris@10 1218 T3l = T3j - T3k;
Chris@10 1219 T3w = T1D - T1I;
Chris@10 1220 T3v = T1u - T1x;
Chris@10 1221 T3o = T3g + T3h;
Chris@10 1222 T3p = T3j + T3k;
Chris@10 1223 T3s = T3o + T3p;
Chris@10 1224 T1y = T1u + T1x;
Chris@10 1225 T1J = T1D + T1I;
Chris@10 1226 T1K = T1y + T1J;
Chris@10 1227 }
Chris@10 1228 T1L = T1r + T1K;
Chris@10 1229 T6h = T3r + T3s;
Chris@10 1230 {
Chris@10 1231 E T3m, T5f, T3f, T5e, T3d, T3e;
Chris@10 1232 T3m = FMA(KP951056516, T3i, KP587785252 * T3l);
Chris@10 1233 T5f = FNMS(KP587785252, T3i, KP951056516 * T3l);
Chris@10 1234 T3d = KP559016994 * (T1y - T1J);
Chris@10 1235 T3e = FNMS(KP250000000, T1K, T1r);
Chris@10 1236 T3f = T3d + T3e;
Chris@10 1237 T5e = T3e - T3d;
Chris@10 1238 T3n = T3f + T3m;
Chris@10 1239 T5Q = T5e + T5f;
Chris@10 1240 T4D = T3f - T3m;
Chris@10 1241 T5g = T5e - T5f;
Chris@10 1242 }
Chris@10 1243 {
Chris@10 1244 E T3x, T5b, T3u, T5c, T3q, T3t;
Chris@10 1245 T3x = FMA(KP951056516, T3v, KP587785252 * T3w);
Chris@10 1246 T5b = FNMS(KP587785252, T3v, KP951056516 * T3w);
Chris@10 1247 T3q = KP559016994 * (T3o - T3p);
Chris@10 1248 T3t = FNMS(KP250000000, T3s, T3r);
Chris@10 1249 T3u = T3q + T3t;
Chris@10 1250 T5c = T3t - T3q;
Chris@10 1251 T3y = T3u - T3x;
Chris@10 1252 T5P = T5c - T5b;
Chris@10 1253 T4E = T3x + T3u;
Chris@10 1254 T5d = T5b + T5c;
Chris@10 1255 }
Chris@10 1256 }
Chris@10 1257 {
Chris@10 1258 E T1P, T3P, T3G, T3J, T3U, T3T, T3M, T3N, T3Q, T20, T2b, T2c, T1N, T1O;
Chris@10 1259 T1N = ri[WS(rs, 2)];
Chris@10 1260 T1O = ii[WS(rs, 2)];
Chris@10 1261 T1P = FMA(T14, T1N, T16 * T1O);
Chris@10 1262 T3P = FNMS(T16, T1N, T14 * T1O);
Chris@10 1263 {
Chris@10 1264 E T1U, T3E, T2a, T3I, T1Z, T3F, T25, T3H;
Chris@10 1265 {
Chris@10 1266 E T1R, T1T, T27, T29;
Chris@10 1267 T1R = ri[WS(rs, 7)];
Chris@10 1268 T1T = ii[WS(rs, 7)];
Chris@10 1269 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@10 1270 T3E = FNMS(T1S, T1R, T1Q * T1T);
Chris@10 1271 T27 = ri[WS(rs, 17)];
Chris@10 1272 T29 = ii[WS(rs, 17)];
Chris@10 1273 T2a = FMA(T26, T27, T28 * T29);
Chris@10 1274 T3I = FNMS(T28, T27, T26 * T29);
Chris@10 1275 }
Chris@10 1276 {
Chris@10 1277 E T1W, T1Y, T22, T24;
Chris@10 1278 T1W = ri[WS(rs, 22)];
Chris@10 1279 T1Y = ii[WS(rs, 22)];
Chris@10 1280 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@10 1281 T3F = FNMS(T1X, T1W, T1V * T1Y);
Chris@10 1282 T22 = ri[WS(rs, 12)];
Chris@10 1283 T24 = ii[WS(rs, 12)];
Chris@10 1284 T25 = FMA(T21, T22, T23 * T24);
Chris@10 1285 T3H = FNMS(T23, T22, T21 * T24);
Chris@10 1286 }
Chris@10 1287 T3G = T3E - T3F;
Chris@10 1288 T3J = T3H - T3I;
Chris@10 1289 T3U = T25 - T2a;
Chris@10 1290 T3T = T1U - T1Z;
Chris@10 1291 T3M = T3E + T3F;
Chris@10 1292 T3N = T3H + T3I;
Chris@10 1293 T3Q = T3M + T3N;
Chris@10 1294 T20 = T1U + T1Z;
Chris@10 1295 T2b = T25 + T2a;
Chris@10 1296 T2c = T20 + T2b;
Chris@10 1297 }
Chris@10 1298 T2d = T1P + T2c;
Chris@10 1299 T6j = T3P + T3Q;
Chris@10 1300 {
Chris@10 1301 E T3K, T5k, T3D, T5j, T3B, T3C;
Chris@10 1302 T3K = FMA(KP951056516, T3G, KP587785252 * T3J);
Chris@10 1303 T5k = FNMS(KP587785252, T3G, KP951056516 * T3J);
Chris@10 1304 T3B = KP559016994 * (T20 - T2b);
Chris@10 1305 T3C = FNMS(KP250000000, T2c, T1P);
Chris@10 1306 T3D = T3B + T3C;
Chris@10 1307 T5j = T3C - T3B;
Chris@10 1308 T3L = T3D + T3K;
Chris@10 1309 T5T = T5j + T5k;
Chris@10 1310 T4I = T3D - T3K;
Chris@10 1311 T5l = T5j - T5k;
Chris@10 1312 }
Chris@10 1313 {
Chris@10 1314 E T3V, T5m, T3S, T5n, T3O, T3R;
Chris@10 1315 T3V = FMA(KP951056516, T3T, KP587785252 * T3U);
Chris@10 1316 T5m = FNMS(KP587785252, T3T, KP951056516 * T3U);
Chris@10 1317 T3O = KP559016994 * (T3M - T3N);
Chris@10 1318 T3R = FNMS(KP250000000, T3Q, T3P);
Chris@10 1319 T3S = T3O + T3R;
Chris@10 1320 T5n = T3R - T3O;
Chris@10 1321 T3W = T3S - T3V;
Chris@10 1322 T5U = T5n - T5m;
Chris@10 1323 T4H = T3V + T3S;
Chris@10 1324 T5o = T5m + T5n;
Chris@10 1325 }
Chris@10 1326 }
Chris@10 1327 {
Chris@10 1328 E T6m, T6o, TL, T2E, T6d, T6e, T6n, T6f;
Chris@10 1329 {
Chris@10 1330 E T6i, T6l, T1M, T2D;
Chris@10 1331 T6i = T6g - T6h;
Chris@10 1332 T6l = T6j - T6k;
Chris@10 1333 T6m = FMA(KP951056516, T6i, KP587785252 * T6l);
Chris@10 1334 T6o = FNMS(KP587785252, T6i, KP951056516 * T6l);
Chris@10 1335 TL = T1 + TK;
Chris@10 1336 T1M = T1o + T1L;
Chris@10 1337 T2D = T2d + T2C;
Chris@10 1338 T2E = T1M + T2D;
Chris@10 1339 T6d = KP559016994 * (T1M - T2D);
Chris@10 1340 T6e = FNMS(KP250000000, T2E, TL);
Chris@10 1341 }
Chris@10 1342 ri[0] = TL + T2E;
Chris@10 1343 T6n = T6e - T6d;
Chris@10 1344 ri[WS(rs, 10)] = T6n - T6o;
Chris@10 1345 ri[WS(rs, 15)] = T6n + T6o;
Chris@10 1346 T6f = T6d + T6e;
Chris@10 1347 ri[WS(rs, 20)] = T6f - T6m;
Chris@10 1348 ri[WS(rs, 5)] = T6f + T6m;
Chris@10 1349 }
Chris@10 1350 {
Chris@10 1351 E T6C, T6D, T6w, T6r, T6x, T6y, T6E, T6z;
Chris@10 1352 {
Chris@10 1353 E T6A, T6B, T6p, T6q;
Chris@10 1354 T6A = T1o - T1L;
Chris@10 1355 T6B = T2d - T2C;
Chris@10 1356 T6C = FMA(KP951056516, T6A, KP587785252 * T6B);
Chris@10 1357 T6D = FNMS(KP587785252, T6A, KP951056516 * T6B);
Chris@10 1358 T6w = T6u + T6v;
Chris@10 1359 T6p = T6g + T6h;
Chris@10 1360 T6q = T6j + T6k;
Chris@10 1361 T6r = T6p + T6q;
Chris@10 1362 T6x = KP559016994 * (T6p - T6q);
Chris@10 1363 T6y = FNMS(KP250000000, T6r, T6w);
Chris@10 1364 }
Chris@10 1365 ii[0] = T6r + T6w;
Chris@10 1366 T6E = T6y - T6x;
Chris@10 1367 ii[WS(rs, 10)] = T6D + T6E;
Chris@10 1368 ii[WS(rs, 15)] = T6E - T6D;
Chris@10 1369 T6z = T6x + T6y;
Chris@10 1370 ii[WS(rs, 5)] = T6z - T6C;
Chris@10 1371 ii[WS(rs, 20)] = T6C + T6z;
Chris@10 1372 }
Chris@10 1373 {
Chris@10 1374 E T2P, T4z, T6O, T70, T4m, T6T, T4n, T6S, T4U, T71, T4X, T6Z, T4O, T75, T4P;
Chris@10 1375 E T74, T4s, T6P, T4v, T6H, T2H, T6K;
Chris@10 1376 T2H = T2F + T2G;
Chris@10 1377 T2P = T2H + T2O;
Chris@10 1378 T4z = T2H - T2O;
Chris@10 1379 T6K = T6I + T6J;
Chris@10 1380 T6O = T6K - T6N;
Chris@10 1381 T70 = T6N + T6K;
Chris@10 1382 {
Chris@10 1383 E T3c, T3z, T3A, T3X, T4k, T4l;
Chris@10 1384 T3c = FMA(KP968583161, T30, KP248689887 * T3b);
Chris@10 1385 T3z = FMA(KP535826794, T3n, KP844327925 * T3y);
Chris@10 1386 T3A = T3c + T3z;
Chris@10 1387 T3X = FMA(KP876306680, T3L, KP481753674 * T3W);
Chris@10 1388 T4k = FMA(KP728968627, T48, KP684547105 * T4j);
Chris@10 1389 T4l = T3X + T4k;
Chris@10 1390 T4m = T3A + T4l;
Chris@10 1391 T6T = T3X - T4k;
Chris@10 1392 T4n = KP559016994 * (T3A - T4l);
Chris@10 1393 T6S = T3c - T3z;
Chris@10 1394 }
Chris@10 1395 {
Chris@10 1396 E T4S, T4T, T6X, T4V, T4W, T6Y;
Chris@10 1397 T4S = FNMS(KP844327925, T4A, KP535826794 * T4B);
Chris@10 1398 T4T = FNMS(KP637423989, T4E, KP770513242 * T4D);
Chris@10 1399 T6X = T4S + T4T;
Chris@10 1400 T4V = FMA(KP125333233, T4L, KP992114701 * T4K);
Chris@10 1401 T4W = FMA(KP904827052, T4I, KP425779291 * T4H);
Chris@10 1402 T6Y = T4W + T4V;
Chris@10 1403 T4U = T4S - T4T;
Chris@10 1404 T71 = KP559016994 * (T6X + T6Y);
Chris@10 1405 T4X = T4V - T4W;
Chris@10 1406 T6Z = T6X - T6Y;
Chris@10 1407 }
Chris@10 1408 {
Chris@10 1409 E T4C, T4F, T4G, T4J, T4M, T4N;
Chris@10 1410 T4C = FMA(KP535826794, T4A, KP844327925 * T4B);
Chris@10 1411 T4F = FMA(KP637423989, T4D, KP770513242 * T4E);
Chris@10 1412 T4G = T4C - T4F;
Chris@10 1413 T4J = FNMS(KP425779291, T4I, KP904827052 * T4H);
Chris@10 1414 T4M = FNMS(KP992114701, T4L, KP125333233 * T4K);
Chris@10 1415 T4N = T4J + T4M;
Chris@10 1416 T4O = T4G + T4N;
Chris@10 1417 T75 = T4J - T4M;
Chris@10 1418 T4P = KP559016994 * (T4G - T4N);
Chris@10 1419 T74 = T4C + T4F;
Chris@10 1420 }
Chris@10 1421 {
Chris@10 1422 E T4q, T4r, T6F, T4t, T4u, T6G;
Chris@10 1423 T4q = FNMS(KP248689887, T30, KP968583161 * T3b);
Chris@10 1424 T4r = FNMS(KP844327925, T3n, KP535826794 * T3y);
Chris@10 1425 T6F = T4q + T4r;
Chris@10 1426 T4t = FNMS(KP481753674, T3L, KP876306680 * T3W);
Chris@10 1427 T4u = FNMS(KP684547105, T48, KP728968627 * T4j);
Chris@10 1428 T6G = T4t + T4u;
Chris@10 1429 T4s = T4q - T4r;
Chris@10 1430 T6P = KP559016994 * (T6F - T6G);
Chris@10 1431 T4v = T4t - T4u;
Chris@10 1432 T6H = T6F + T6G;
Chris@10 1433 }
Chris@10 1434 ri[WS(rs, 1)] = T2P + T4m;
Chris@10 1435 ii[WS(rs, 1)] = T6H + T6O;
Chris@10 1436 ri[WS(rs, 4)] = T4z + T4O;
Chris@10 1437 ii[WS(rs, 4)] = T6Z + T70;
Chris@10 1438 {
Chris@10 1439 E T4w, T4y, T4p, T4x, T4o;
Chris@10 1440 T4w = FMA(KP951056516, T4s, KP587785252 * T4v);
Chris@10 1441 T4y = FNMS(KP587785252, T4s, KP951056516 * T4v);
Chris@10 1442 T4o = FNMS(KP250000000, T4m, T2P);
Chris@10 1443 T4p = T4n + T4o;
Chris@10 1444 T4x = T4o - T4n;
Chris@10 1445 ri[WS(rs, 21)] = T4p - T4w;
Chris@10 1446 ri[WS(rs, 16)] = T4x + T4y;
Chris@10 1447 ri[WS(rs, 6)] = T4p + T4w;
Chris@10 1448 ri[WS(rs, 11)] = T4x - T4y;
Chris@10 1449 }
Chris@10 1450 {
Chris@10 1451 E T6U, T6V, T6R, T6W, T6Q;
Chris@10 1452 T6U = FMA(KP951056516, T6S, KP587785252 * T6T);
Chris@10 1453 T6V = FNMS(KP587785252, T6S, KP951056516 * T6T);
Chris@10 1454 T6Q = FNMS(KP250000000, T6H, T6O);
Chris@10 1455 T6R = T6P + T6Q;
Chris@10 1456 T6W = T6Q - T6P;
Chris@10 1457 ii[WS(rs, 6)] = T6R - T6U;
Chris@10 1458 ii[WS(rs, 16)] = T6W - T6V;
Chris@10 1459 ii[WS(rs, 21)] = T6U + T6R;
Chris@10 1460 ii[WS(rs, 11)] = T6V + T6W;
Chris@10 1461 }
Chris@10 1462 {
Chris@10 1463 E T4Y, T50, T4R, T4Z, T4Q;
Chris@10 1464 T4Y = FMA(KP951056516, T4U, KP587785252 * T4X);
Chris@10 1465 T50 = FNMS(KP587785252, T4U, KP951056516 * T4X);
Chris@10 1466 T4Q = FNMS(KP250000000, T4O, T4z);
Chris@10 1467 T4R = T4P + T4Q;
Chris@10 1468 T4Z = T4Q - T4P;
Chris@10 1469 ri[WS(rs, 24)] = T4R - T4Y;
Chris@10 1470 ri[WS(rs, 19)] = T4Z + T50;
Chris@10 1471 ri[WS(rs, 9)] = T4R + T4Y;
Chris@10 1472 ri[WS(rs, 14)] = T4Z - T50;
Chris@10 1473 }
Chris@10 1474 {
Chris@10 1475 E T76, T77, T73, T78, T72;
Chris@10 1476 T76 = FMA(KP951056516, T74, KP587785252 * T75);
Chris@10 1477 T77 = FNMS(KP587785252, T74, KP951056516 * T75);
Chris@10 1478 T72 = FNMS(KP250000000, T6Z, T70);
Chris@10 1479 T73 = T71 + T72;
Chris@10 1480 T78 = T72 - T71;
Chris@10 1481 ii[WS(rs, 9)] = T73 - T76;
Chris@10 1482 ii[WS(rs, 19)] = T78 - T77;
Chris@10 1483 ii[WS(rs, 24)] = T76 + T73;
Chris@10 1484 ii[WS(rs, 14)] = T77 + T78;
Chris@10 1485 }
Chris@10 1486 }
Chris@10 1487 {
Chris@10 1488 E T53, T5L, T7e, T7q, T5y, T7j, T5z, T7i, T66, T7r, T69, T7p, T60, T7v, T61;
Chris@10 1489 E T7u, T5E, T7f, T5H, T7b, T51, T7d;
Chris@10 1490 T51 = T2G - T2F;
Chris@10 1491 T53 = T51 - T52;
Chris@10 1492 T5L = T51 + T52;
Chris@10 1493 T7d = T6J - T6I;
Chris@10 1494 T7e = T7c + T7d;
Chris@10 1495 T7q = T7d - T7c;
Chris@10 1496 {
Chris@10 1497 E T5a, T5h, T5i, T5p, T5w, T5x;
Chris@10 1498 T5a = FMA(KP876306680, T56, KP481753674 * T59);
Chris@10 1499 T5h = FNMS(KP425779291, T5g, KP904827052 * T5d);
Chris@10 1500 T5i = T5a + T5h;
Chris@10 1501 T5p = FMA(KP535826794, T5l, KP844327925 * T5o);
Chris@10 1502 T5w = FMA(KP062790519, T5s, KP998026728 * T5v);
Chris@10 1503 T5x = T5p + T5w;
Chris@10 1504 T5y = T5i + T5x;
Chris@10 1505 T7j = T5p - T5w;
Chris@10 1506 T5z = KP559016994 * (T5i - T5x);
Chris@10 1507 T7i = T5a - T5h;
Chris@10 1508 }
Chris@10 1509 {
Chris@10 1510 E T64, T65, T7n, T67, T68, T7o;
Chris@10 1511 T64 = FNMS(KP684547105, T5M, KP728968627 * T5N);
Chris@10 1512 T65 = FMA(KP125333233, T5Q, KP992114701 * T5P);
Chris@10 1513 T7n = T64 - T65;
Chris@10 1514 T67 = FNMS(KP998026728, T5T, KP062790519 * T5U);
Chris@10 1515 T68 = FMA(KP770513242, T5X, KP637423989 * T5W);
Chris@10 1516 T7o = T67 - T68;
Chris@10 1517 T66 = T64 + T65;
Chris@10 1518 T7r = KP559016994 * (T7n - T7o);
Chris@10 1519 T69 = T67 + T68;
Chris@10 1520 T7p = T7n + T7o;
Chris@10 1521 }
Chris@10 1522 {
Chris@10 1523 E T5O, T5R, T5S, T5V, T5Y, T5Z;
Chris@10 1524 T5O = FMA(KP728968627, T5M, KP684547105 * T5N);
Chris@10 1525 T5R = FNMS(KP992114701, T5Q, KP125333233 * T5P);
Chris@10 1526 T5S = T5O + T5R;
Chris@10 1527 T5V = FMA(KP062790519, T5T, KP998026728 * T5U);
Chris@10 1528 T5Y = FNMS(KP637423989, T5X, KP770513242 * T5W);
Chris@10 1529 T5Z = T5V + T5Y;
Chris@10 1530 T60 = T5S + T5Z;
Chris@10 1531 T7v = T5V - T5Y;
Chris@10 1532 T61 = KP559016994 * (T5S - T5Z);
Chris@10 1533 T7u = T5O - T5R;
Chris@10 1534 }
Chris@10 1535 {
Chris@10 1536 E T5C, T5D, T79, T5F, T5G, T7a;
Chris@10 1537 T5C = FNMS(KP481753674, T56, KP876306680 * T59);
Chris@10 1538 T5D = FMA(KP904827052, T5g, KP425779291 * T5d);
Chris@10 1539 T79 = T5C - T5D;
Chris@10 1540 T5F = FNMS(KP844327925, T5l, KP535826794 * T5o);
Chris@10 1541 T5G = FNMS(KP998026728, T5s, KP062790519 * T5v);
Chris@10 1542 T7a = T5F + T5G;
Chris@10 1543 T5E = T5C + T5D;
Chris@10 1544 T7f = KP559016994 * (T79 - T7a);
Chris@10 1545 T5H = T5F - T5G;
Chris@10 1546 T7b = T79 + T7a;
Chris@10 1547 }
Chris@10 1548 ri[WS(rs, 2)] = T53 + T5y;
Chris@10 1549 ii[WS(rs, 2)] = T7b + T7e;
Chris@10 1550 ri[WS(rs, 3)] = T5L + T60;
Chris@10 1551 ii[WS(rs, 3)] = T7p + T7q;
Chris@10 1552 {
Chris@10 1553 E T5I, T5K, T5B, T5J, T5A;
Chris@10 1554 T5I = FMA(KP951056516, T5E, KP587785252 * T5H);
Chris@10 1555 T5K = FNMS(KP587785252, T5E, KP951056516 * T5H);
Chris@10 1556 T5A = FNMS(KP250000000, T5y, T53);
Chris@10 1557 T5B = T5z + T5A;
Chris@10 1558 T5J = T5A - T5z;
Chris@10 1559 ri[WS(rs, 22)] = T5B - T5I;
Chris@10 1560 ri[WS(rs, 17)] = T5J + T5K;
Chris@10 1561 ri[WS(rs, 7)] = T5B + T5I;
Chris@10 1562 ri[WS(rs, 12)] = T5J - T5K;
Chris@10 1563 }
Chris@10 1564 {
Chris@10 1565 E T7k, T7l, T7h, T7m, T7g;
Chris@10 1566 T7k = FMA(KP951056516, T7i, KP587785252 * T7j);
Chris@10 1567 T7l = FNMS(KP587785252, T7i, KP951056516 * T7j);
Chris@10 1568 T7g = FNMS(KP250000000, T7b, T7e);
Chris@10 1569 T7h = T7f + T7g;
Chris@10 1570 T7m = T7g - T7f;
Chris@10 1571 ii[WS(rs, 7)] = T7h - T7k;
Chris@10 1572 ii[WS(rs, 17)] = T7m - T7l;
Chris@10 1573 ii[WS(rs, 22)] = T7k + T7h;
Chris@10 1574 ii[WS(rs, 12)] = T7l + T7m;
Chris@10 1575 }
Chris@10 1576 {
Chris@10 1577 E T6a, T6c, T63, T6b, T62;
Chris@10 1578 T6a = FMA(KP951056516, T66, KP587785252 * T69);
Chris@10 1579 T6c = FNMS(KP587785252, T66, KP951056516 * T69);
Chris@10 1580 T62 = FNMS(KP250000000, T60, T5L);
Chris@10 1581 T63 = T61 + T62;
Chris@10 1582 T6b = T62 - T61;
Chris@10 1583 ri[WS(rs, 23)] = T63 - T6a;
Chris@10 1584 ri[WS(rs, 18)] = T6b + T6c;
Chris@10 1585 ri[WS(rs, 8)] = T63 + T6a;
Chris@10 1586 ri[WS(rs, 13)] = T6b - T6c;
Chris@10 1587 }
Chris@10 1588 {
Chris@10 1589 E T7w, T7x, T7t, T7y, T7s;
Chris@10 1590 T7w = FMA(KP951056516, T7u, KP587785252 * T7v);
Chris@10 1591 T7x = FNMS(KP587785252, T7u, KP951056516 * T7v);
Chris@10 1592 T7s = FNMS(KP250000000, T7p, T7q);
Chris@10 1593 T7t = T7r + T7s;
Chris@10 1594 T7y = T7s - T7r;
Chris@10 1595 ii[WS(rs, 8)] = T7t - T7w;
Chris@10 1596 ii[WS(rs, 18)] = T7y - T7x;
Chris@10 1597 ii[WS(rs, 23)] = T7w + T7t;
Chris@10 1598 ii[WS(rs, 13)] = T7x + T7y;
Chris@10 1599 }
Chris@10 1600 }
Chris@10 1601 }
Chris@10 1602 }
Chris@10 1603 }
Chris@10 1604 }
Chris@10 1605
Chris@10 1606 static const tw_instr twinstr[] = {
Chris@10 1607 {TW_CEXP, 0, 1},
Chris@10 1608 {TW_CEXP, 0, 3},
Chris@10 1609 {TW_CEXP, 0, 9},
Chris@10 1610 {TW_CEXP, 0, 24},
Chris@10 1611 {TW_NEXT, 1, 0}
Chris@10 1612 };
Chris@10 1613
Chris@10 1614 static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {280, 180, 160, 0}, 0, 0, 0 };
Chris@10 1615
Chris@10 1616 void X(codelet_t2_25) (planner *p) {
Chris@10 1617 X(kdft_dit_register) (p, t2_25, &desc);
Chris@10 1618 }
Chris@10 1619 #endif /* HAVE_FMA */