annotate src/fftw-3.3.8/dft/scalar/codelets/t2_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:27 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 440 FP additions, 434 FP multiplications,
Chris@82 32 * (or, 84 additions, 78 multiplications, 356 fused multiply/add),
Chris@82 33 * 186 stack variables, 47 constants, and 100 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 40 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 41 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 42 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@82 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@82 45 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@82 46 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@82 47 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 48 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 50 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@82 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@82 52 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 53 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@82 55 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@82 56 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 57 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@82 58 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@82 59 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@82 60 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@82 61 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@82 62 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@82 63 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@82 64 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 65 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 66 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 67 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 68 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 69 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@82 70 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@82 71 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 72 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@82 73 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@82 74 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@82 75 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 76 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 77 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@82 78 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@82 79 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@82 80 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@82 81 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@82 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 86 {
Chris@82 87 INT m;
Chris@82 88 for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 89 E T2, T8, T3, T6, Tk, Tm, T5, T7, T19, Tb, T1b, Tc, Tw, TT, T1j;
Chris@82 90 E TE, T2p, T1c, T2U, TI, T11, T15, T2Q, T2M, T2m, T2i, T2e, Tn, Tr, TX;
Chris@82 91 E T31, T35, T1l, T1m, T1q, TA, T1K, T1O, T2a, T27, T1g, T2x, T2t, Th, Td;
Chris@82 92 E T1S, T2X, T1W;
Chris@82 93 {
Chris@82 94 E TS, TD, T2L, T10, TH, T2P, T14, T9, T1a, Tz, TW, T4, Ta, Tv, T1J;
Chris@82 95 E T1N;
Chris@82 96 T2 = W[0];
Chris@82 97 T8 = W[4];
Chris@82 98 T3 = W[2];
Chris@82 99 T6 = W[3];
Chris@82 100 T4 = T2 * T3;
Chris@82 101 TS = T3 * T8;
Chris@82 102 Ta = T2 * T6;
Chris@82 103 Tv = T2 * T8;
Chris@82 104 Tk = W[6];
Chris@82 105 TD = T8 * Tk;
Chris@82 106 T2L = T2 * Tk;
Chris@82 107 T10 = T3 * Tk;
Chris@82 108 Tm = W[7];
Chris@82 109 TH = T8 * Tm;
Chris@82 110 T2P = T2 * Tm;
Chris@82 111 T14 = T3 * Tm;
Chris@82 112 T5 = W[1];
Chris@82 113 T7 = FNMS(T5, T6, T4);
Chris@82 114 T19 = FMA(T5, T6, T4);
Chris@82 115 T9 = T7 * T8;
Chris@82 116 T1a = T19 * T8;
Chris@82 117 Tb = FMA(T5, T3, Ta);
Chris@82 118 T1b = FNMS(T5, T3, Ta);
Chris@82 119 Tc = W[5];
Chris@82 120 Tz = T2 * Tc;
Chris@82 121 TW = T3 * Tc;
Chris@82 122 Tw = FNMS(T5, Tc, Tv);
Chris@82 123 TT = FMA(T6, Tc, TS);
Chris@82 124 T1j = FMA(T5, Tc, Tv);
Chris@82 125 TE = FMA(Tc, Tm, TD);
Chris@82 126 T2p = FMA(T6, T8, TW);
Chris@82 127 T1c = FNMS(T1b, Tc, T1a);
Chris@82 128 T2U = FNMS(Tb, Tc, T9);
Chris@82 129 TI = FNMS(Tc, Tk, TH);
Chris@82 130 T11 = FMA(T6, Tm, T10);
Chris@82 131 T15 = FNMS(T6, Tk, T14);
Chris@82 132 T2Q = FNMS(T5, Tk, T2P);
Chris@82 133 T2M = FMA(T5, Tm, T2L);
Chris@82 134 {
Chris@82 135 E T2h, T2d, Tl, Tq;
Chris@82 136 T2m = FNMS(T6, Tc, TS);
Chris@82 137 T2h = T19 * Tm;
Chris@82 138 T2i = FNMS(T1b, Tk, T2h);
Chris@82 139 T2d = T19 * Tk;
Chris@82 140 T2e = FMA(T1b, Tm, T2d);
Chris@82 141 Tl = T7 * Tk;
Chris@82 142 Tn = FMA(Tb, Tm, Tl);
Chris@82 143 Tq = T7 * Tm;
Chris@82 144 Tr = FNMS(Tb, Tk, Tq);
Chris@82 145 }
Chris@82 146 {
Chris@82 147 E T30, T34, T1k, T1p;
Chris@82 148 T30 = TT * Tk;
Chris@82 149 T34 = TT * Tm;
Chris@82 150 TX = FNMS(T6, T8, TW);
Chris@82 151 T31 = FMA(TX, Tm, T30);
Chris@82 152 T35 = FNMS(TX, Tk, T34);
Chris@82 153 T1k = T1j * Tk;
Chris@82 154 T1p = T1j * Tm;
Chris@82 155 T1l = FNMS(T5, T8, Tz);
Chris@82 156 T1m = FMA(T1l, Tm, T1k);
Chris@82 157 T1q = FNMS(T1l, Tk, T1p);
Chris@82 158 }
Chris@82 159 T1J = Tw * Tk;
Chris@82 160 T1N = Tw * Tm;
Chris@82 161 TA = FMA(T5, T8, Tz);
Chris@82 162 T1K = FMA(TA, Tm, T1J);
Chris@82 163 T1O = FNMS(TA, Tk, T1N);
Chris@82 164 {
Chris@82 165 E T1f, T2s, T2w, Tg, T1R, T1V;
Chris@82 166 T1f = T19 * Tc;
Chris@82 167 T2a = FNMS(T1b, T8, T1f);
Chris@82 168 T27 = FMA(T1b, Tc, T1a);
Chris@82 169 T2s = T27 * Tk;
Chris@82 170 T2w = T27 * Tm;
Chris@82 171 T1g = FMA(T1b, T8, T1f);
Chris@82 172 T2x = FNMS(T2a, Tk, T2w);
Chris@82 173 T2t = FMA(T2a, Tm, T2s);
Chris@82 174 Tg = T7 * Tc;
Chris@82 175 Th = FNMS(Tb, T8, Tg);
Chris@82 176 Td = FMA(Tb, Tc, T9);
Chris@82 177 T1R = Td * Tk;
Chris@82 178 T1V = Td * Tm;
Chris@82 179 T1S = FMA(Th, Tm, T1R);
Chris@82 180 T2X = FMA(Tb, T8, Tg);
Chris@82 181 T1W = FNMS(Th, Tk, T1V);
Chris@82 182 }
Chris@82 183 }
Chris@82 184 {
Chris@82 185 E T1, T7r, T4s, T6a, T7G, T86, TM, T4f, T4g, T7q, T7B, T7C, T5j, T6n, T5q;
Chris@82 186 E T6k, T3a, T6m, T5g, T4a, T6j, T5n, T4z, T6z, T4G, T6C, T1v, T6y, T4w, T3t;
Chris@82 187 E T6B, T4D, T4O, T6v, T4V, T6s, T21, T6r, T4S, T3H, T6u, T4L, T54, T6g, T5b;
Chris@82 188 E T6d, T2C, T6f, T51, T3W, T6c, T58;
Chris@82 189 {
Chris@82 190 E Tj, T4j, TK, T4q, TC, T4o, Tt, T4l;
Chris@82 191 T1 = ri[0];
Chris@82 192 T7r = ii[0];
Chris@82 193 {
Chris@82 194 E Te, Tf, Ti, T4i;
Chris@82 195 Te = ri[WS(rs, 5)];
Chris@82 196 Tf = Td * Te;
Chris@82 197 Ti = ii[WS(rs, 5)];
Chris@82 198 T4i = Td * Ti;
Chris@82 199 Tj = FMA(Th, Ti, Tf);
Chris@82 200 T4j = FNMS(Th, Te, T4i);
Chris@82 201 }
Chris@82 202 {
Chris@82 203 E TF, TG, TJ, T4p;
Chris@82 204 TF = ri[WS(rs, 15)];
Chris@82 205 TG = TE * TF;
Chris@82 206 TJ = ii[WS(rs, 15)];
Chris@82 207 T4p = TE * TJ;
Chris@82 208 TK = FMA(TI, TJ, TG);
Chris@82 209 T4q = FNMS(TI, TF, T4p);
Chris@82 210 }
Chris@82 211 {
Chris@82 212 E Tx, Ty, TB, T4n;
Chris@82 213 Tx = ri[WS(rs, 10)];
Chris@82 214 Ty = Tw * Tx;
Chris@82 215 TB = ii[WS(rs, 10)];
Chris@82 216 T4n = Tw * TB;
Chris@82 217 TC = FMA(TA, TB, Ty);
Chris@82 218 T4o = FNMS(TA, Tx, T4n);
Chris@82 219 }
Chris@82 220 {
Chris@82 221 E To, Tp, Ts, T4k;
Chris@82 222 To = ri[WS(rs, 20)];
Chris@82 223 Tp = Tn * To;
Chris@82 224 Ts = ii[WS(rs, 20)];
Chris@82 225 T4k = Tn * Ts;
Chris@82 226 Tt = FMA(Tr, Ts, Tp);
Chris@82 227 T4l = FNMS(Tr, To, T4k);
Chris@82 228 }
Chris@82 229 {
Chris@82 230 E T4m, T4r, T7E, T7F;
Chris@82 231 T4m = T4j - T4l;
Chris@82 232 T4r = T4o - T4q;
Chris@82 233 T4s = FMA(KP618033988, T4r, T4m);
Chris@82 234 T6a = FNMS(KP618033988, T4m, T4r);
Chris@82 235 T7E = Tj - Tt;
Chris@82 236 T7F = TC - TK;
Chris@82 237 T7G = FMA(KP618033988, T7F, T7E);
Chris@82 238 T86 = FNMS(KP618033988, T7E, T7F);
Chris@82 239 }
Chris@82 240 {
Chris@82 241 E Tu, TL, T7o, T7p;
Chris@82 242 Tu = Tj + Tt;
Chris@82 243 TL = TC + TK;
Chris@82 244 TM = Tu + TL;
Chris@82 245 T4f = FNMS(KP250000000, TM, T1);
Chris@82 246 T4g = Tu - TL;
Chris@82 247 T7o = T4j + T4l;
Chris@82 248 T7p = T4o + T4q;
Chris@82 249 T7q = T7o + T7p;
Chris@82 250 T7B = FNMS(KP250000000, T7q, T7r);
Chris@82 251 T7C = T7o - T7p;
Chris@82 252 }
Chris@82 253 }
Chris@82 254 {
Chris@82 255 E T2G, T3Y, T2Z, T37, T38, T45, T47, T48, T2K, T2S, T2T, T40, T42, T43;
Chris@82 256 {
Chris@82 257 E T2D, T2E, T2F, T3X;
Chris@82 258 T2D = ri[WS(rs, 3)];
Chris@82 259 T2E = T3 * T2D;
Chris@82 260 T2F = ii[WS(rs, 3)];
Chris@82 261 T3X = T3 * T2F;
Chris@82 262 T2G = FMA(T6, T2F, T2E);
Chris@82 263 T3Y = FNMS(T6, T2D, T3X);
Chris@82 264 }
Chris@82 265 {
Chris@82 266 E T2V, T2W, T2Y, T44, T32, T33, T36, T46;
Chris@82 267 T2V = ri[WS(rs, 13)];
Chris@82 268 T2W = T2U * T2V;
Chris@82 269 T2Y = ii[WS(rs, 13)];
Chris@82 270 T44 = T2U * T2Y;
Chris@82 271 T32 = ri[WS(rs, 18)];
Chris@82 272 T33 = T31 * T32;
Chris@82 273 T36 = ii[WS(rs, 18)];
Chris@82 274 T46 = T31 * T36;
Chris@82 275 T2Z = FMA(T2X, T2Y, T2W);
Chris@82 276 T37 = FMA(T35, T36, T33);
Chris@82 277 T38 = T2Z + T37;
Chris@82 278 T45 = FNMS(T2X, T2V, T44);
Chris@82 279 T47 = FNMS(T35, T32, T46);
Chris@82 280 T48 = T45 + T47;
Chris@82 281 }
Chris@82 282 {
Chris@82 283 E T2H, T2I, T2J, T3Z, T2N, T2O, T2R, T41;
Chris@82 284 T2H = ri[WS(rs, 8)];
Chris@82 285 T2I = T1j * T2H;
Chris@82 286 T2J = ii[WS(rs, 8)];
Chris@82 287 T3Z = T1j * T2J;
Chris@82 288 T2N = ri[WS(rs, 23)];
Chris@82 289 T2O = T2M * T2N;
Chris@82 290 T2R = ii[WS(rs, 23)];
Chris@82 291 T41 = T2M * T2R;
Chris@82 292 T2K = FMA(T1l, T2J, T2I);
Chris@82 293 T2S = FMA(T2Q, T2R, T2O);
Chris@82 294 T2T = T2K + T2S;
Chris@82 295 T40 = FNMS(T1l, T2H, T3Z);
Chris@82 296 T42 = FNMS(T2Q, T2N, T41);
Chris@82 297 T43 = T40 + T42;
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T5h, T5i, T5o, T5p;
Chris@82 301 T5h = T42 - T40;
Chris@82 302 T5i = T47 - T45;
Chris@82 303 T5j = FMA(KP618033988, T5i, T5h);
Chris@82 304 T6n = FNMS(KP618033988, T5h, T5i);
Chris@82 305 T5o = T2K - T2S;
Chris@82 306 T5p = T2Z - T37;
Chris@82 307 T5q = FMA(KP618033988, T5p, T5o);
Chris@82 308 T6k = FNMS(KP618033988, T5o, T5p);
Chris@82 309 }
Chris@82 310 {
Chris@82 311 E T5f, T39, T5e, T5m, T49, T5l;
Chris@82 312 T5f = T38 - T2T;
Chris@82 313 T39 = T2T + T38;
Chris@82 314 T5e = FNMS(KP250000000, T39, T2G);
Chris@82 315 T3a = T2G + T39;
Chris@82 316 T6m = FMA(KP559016994, T5f, T5e);
Chris@82 317 T5g = FNMS(KP559016994, T5f, T5e);
Chris@82 318 T5m = T48 - T43;
Chris@82 319 T49 = T43 + T48;
Chris@82 320 T5l = FNMS(KP250000000, T49, T3Y);
Chris@82 321 T4a = T3Y + T49;
Chris@82 322 T6j = FMA(KP559016994, T5m, T5l);
Chris@82 323 T5n = FNMS(KP559016994, T5m, T5l);
Chris@82 324 }
Chris@82 325 }
Chris@82 326 {
Chris@82 327 E TR, T3h, T1i, T1s, T1t, T3o, T3q, T3r, TZ, T17, T18, T3j, T3l, T3m;
Chris@82 328 {
Chris@82 329 E TO, TP, TQ, T3g;
Chris@82 330 TO = ri[WS(rs, 1)];
Chris@82 331 TP = T2 * TO;
Chris@82 332 TQ = ii[WS(rs, 1)];
Chris@82 333 T3g = T2 * TQ;
Chris@82 334 TR = FMA(T5, TQ, TP);
Chris@82 335 T3h = FNMS(T5, TO, T3g);
Chris@82 336 }
Chris@82 337 {
Chris@82 338 E T1d, T1e, T1h, T3n, T1n, T1o, T1r, T3p;
Chris@82 339 T1d = ri[WS(rs, 11)];
Chris@82 340 T1e = T1c * T1d;
Chris@82 341 T1h = ii[WS(rs, 11)];
Chris@82 342 T3n = T1c * T1h;
Chris@82 343 T1n = ri[WS(rs, 16)];
Chris@82 344 T1o = T1m * T1n;
Chris@82 345 T1r = ii[WS(rs, 16)];
Chris@82 346 T3p = T1m * T1r;
Chris@82 347 T1i = FMA(T1g, T1h, T1e);
Chris@82 348 T1s = FMA(T1q, T1r, T1o);
Chris@82 349 T1t = T1i + T1s;
Chris@82 350 T3o = FNMS(T1g, T1d, T3n);
Chris@82 351 T3q = FNMS(T1q, T1n, T3p);
Chris@82 352 T3r = T3o + T3q;
Chris@82 353 }
Chris@82 354 {
Chris@82 355 E TU, TV, TY, T3i, T12, T13, T16, T3k;
Chris@82 356 TU = ri[WS(rs, 6)];
Chris@82 357 TV = TT * TU;
Chris@82 358 TY = ii[WS(rs, 6)];
Chris@82 359 T3i = TT * TY;
Chris@82 360 T12 = ri[WS(rs, 21)];
Chris@82 361 T13 = T11 * T12;
Chris@82 362 T16 = ii[WS(rs, 21)];
Chris@82 363 T3k = T11 * T16;
Chris@82 364 TZ = FMA(TX, TY, TV);
Chris@82 365 T17 = FMA(T15, T16, T13);
Chris@82 366 T18 = TZ + T17;
Chris@82 367 T3j = FNMS(TX, TU, T3i);
Chris@82 368 T3l = FNMS(T15, T12, T3k);
Chris@82 369 T3m = T3j + T3l;
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E T4x, T4y, T4E, T4F;
Chris@82 373 T4x = T3j - T3l;
Chris@82 374 T4y = T3q - T3o;
Chris@82 375 T4z = FNMS(KP618033988, T4y, T4x);
Chris@82 376 T6z = FMA(KP618033988, T4x, T4y);
Chris@82 377 T4E = T17 - TZ;
Chris@82 378 T4F = T1s - T1i;
Chris@82 379 T4G = FMA(KP618033988, T4F, T4E);
Chris@82 380 T6C = FNMS(KP618033988, T4E, T4F);
Chris@82 381 }
Chris@82 382 {
Chris@82 383 E T4v, T1u, T4u, T4C, T3s, T4B;
Chris@82 384 T4v = T18 - T1t;
Chris@82 385 T1u = T18 + T1t;
Chris@82 386 T4u = FNMS(KP250000000, T1u, TR);
Chris@82 387 T1v = TR + T1u;
Chris@82 388 T6y = FNMS(KP559016994, T4v, T4u);
Chris@82 389 T4w = FMA(KP559016994, T4v, T4u);
Chris@82 390 T4C = T3m - T3r;
Chris@82 391 T3s = T3m + T3r;
Chris@82 392 T4B = FNMS(KP250000000, T3s, T3h);
Chris@82 393 T3t = T3h + T3s;
Chris@82 394 T6B = FNMS(KP559016994, T4C, T4B);
Chris@82 395 T4D = FMA(KP559016994, T4C, T4B);
Chris@82 396 }
Chris@82 397 }
Chris@82 398 {
Chris@82 399 E T1z, T3v, T1Q, T1Y, T1Z, T3C, T3E, T3F, T1D, T1H, T1I, T3x, T3z, T3A;
Chris@82 400 {
Chris@82 401 E T1w, T1x, T1y, T3u;
Chris@82 402 T1w = ri[WS(rs, 4)];
Chris@82 403 T1x = T7 * T1w;
Chris@82 404 T1y = ii[WS(rs, 4)];
Chris@82 405 T3u = T7 * T1y;
Chris@82 406 T1z = FMA(Tb, T1y, T1x);
Chris@82 407 T3v = FNMS(Tb, T1w, T3u);
Chris@82 408 }
Chris@82 409 {
Chris@82 410 E T1L, T1M, T1P, T3B, T1T, T1U, T1X, T3D;
Chris@82 411 T1L = ri[WS(rs, 14)];
Chris@82 412 T1M = T1K * T1L;
Chris@82 413 T1P = ii[WS(rs, 14)];
Chris@82 414 T3B = T1K * T1P;
Chris@82 415 T1T = ri[WS(rs, 19)];
Chris@82 416 T1U = T1S * T1T;
Chris@82 417 T1X = ii[WS(rs, 19)];
Chris@82 418 T3D = T1S * T1X;
Chris@82 419 T1Q = FMA(T1O, T1P, T1M);
Chris@82 420 T1Y = FMA(T1W, T1X, T1U);
Chris@82 421 T1Z = T1Q + T1Y;
Chris@82 422 T3C = FNMS(T1O, T1L, T3B);
Chris@82 423 T3E = FNMS(T1W, T1T, T3D);
Chris@82 424 T3F = T3C + T3E;
Chris@82 425 }
Chris@82 426 {
Chris@82 427 E T1A, T1B, T1C, T3w, T1E, T1F, T1G, T3y;
Chris@82 428 T1A = ri[WS(rs, 9)];
Chris@82 429 T1B = T8 * T1A;
Chris@82 430 T1C = ii[WS(rs, 9)];
Chris@82 431 T3w = T8 * T1C;
Chris@82 432 T1E = ri[WS(rs, 24)];
Chris@82 433 T1F = Tk * T1E;
Chris@82 434 T1G = ii[WS(rs, 24)];
Chris@82 435 T3y = Tk * T1G;
Chris@82 436 T1D = FMA(Tc, T1C, T1B);
Chris@82 437 T1H = FMA(Tm, T1G, T1F);
Chris@82 438 T1I = T1D + T1H;
Chris@82 439 T3x = FNMS(Tc, T1A, T3w);
Chris@82 440 T3z = FNMS(Tm, T1E, T3y);
Chris@82 441 T3A = T3x + T3z;
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E T4M, T4N, T4T, T4U;
Chris@82 445 T4M = T1H - T1D;
Chris@82 446 T4N = T1Y - T1Q;
Chris@82 447 T4O = FMA(KP618033988, T4N, T4M);
Chris@82 448 T6v = FNMS(KP618033988, T4M, T4N);
Chris@82 449 T4T = T3z - T3x;
Chris@82 450 T4U = T3E - T3C;
Chris@82 451 T4V = FMA(KP618033988, T4U, T4T);
Chris@82 452 T6s = FNMS(KP618033988, T4T, T4U);
Chris@82 453 }
Chris@82 454 {
Chris@82 455 E T4R, T20, T4Q, T4K, T3G, T4J;
Chris@82 456 T4R = T1I - T1Z;
Chris@82 457 T20 = T1I + T1Z;
Chris@82 458 T4Q = FNMS(KP250000000, T20, T1z);
Chris@82 459 T21 = T1z + T20;
Chris@82 460 T6r = FNMS(KP559016994, T4R, T4Q);
Chris@82 461 T4S = FMA(KP559016994, T4R, T4Q);
Chris@82 462 T4K = T3F - T3A;
Chris@82 463 T3G = T3A + T3F;
Chris@82 464 T4J = FNMS(KP250000000, T3G, T3v);
Chris@82 465 T3H = T3v + T3G;
Chris@82 466 T6u = FMA(KP559016994, T4K, T4J);
Chris@82 467 T4L = FNMS(KP559016994, T4K, T4J);
Chris@82 468 }
Chris@82 469 }
Chris@82 470 {
Chris@82 471 E T26, T3K, T2r, T2z, T2A, T3R, T3T, T3U, T2c, T2k, T2l, T3M, T3O, T3P;
Chris@82 472 {
Chris@82 473 E T23, T24, T25, T3J;
Chris@82 474 T23 = ri[WS(rs, 2)];
Chris@82 475 T24 = T19 * T23;
Chris@82 476 T25 = ii[WS(rs, 2)];
Chris@82 477 T3J = T19 * T25;
Chris@82 478 T26 = FMA(T1b, T25, T24);
Chris@82 479 T3K = FNMS(T1b, T23, T3J);
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E T2n, T2o, T2q, T3Q, T2u, T2v, T2y, T3S;
Chris@82 483 T2n = ri[WS(rs, 12)];
Chris@82 484 T2o = T2m * T2n;
Chris@82 485 T2q = ii[WS(rs, 12)];
Chris@82 486 T3Q = T2m * T2q;
Chris@82 487 T2u = ri[WS(rs, 17)];
Chris@82 488 T2v = T2t * T2u;
Chris@82 489 T2y = ii[WS(rs, 17)];
Chris@82 490 T3S = T2t * T2y;
Chris@82 491 T2r = FMA(T2p, T2q, T2o);
Chris@82 492 T2z = FMA(T2x, T2y, T2v);
Chris@82 493 T2A = T2r + T2z;
Chris@82 494 T3R = FNMS(T2p, T2n, T3Q);
Chris@82 495 T3T = FNMS(T2x, T2u, T3S);
Chris@82 496 T3U = T3R + T3T;
Chris@82 497 }
Chris@82 498 {
Chris@82 499 E T28, T29, T2b, T3L, T2f, T2g, T2j, T3N;
Chris@82 500 T28 = ri[WS(rs, 7)];
Chris@82 501 T29 = T27 * T28;
Chris@82 502 T2b = ii[WS(rs, 7)];
Chris@82 503 T3L = T27 * T2b;
Chris@82 504 T2f = ri[WS(rs, 22)];
Chris@82 505 T2g = T2e * T2f;
Chris@82 506 T2j = ii[WS(rs, 22)];
Chris@82 507 T3N = T2e * T2j;
Chris@82 508 T2c = FMA(T2a, T2b, T29);
Chris@82 509 T2k = FMA(T2i, T2j, T2g);
Chris@82 510 T2l = T2c + T2k;
Chris@82 511 T3M = FNMS(T2a, T28, T3L);
Chris@82 512 T3O = FNMS(T2i, T2f, T3N);
Chris@82 513 T3P = T3M + T3O;
Chris@82 514 }
Chris@82 515 {
Chris@82 516 E T52, T53, T59, T5a;
Chris@82 517 T52 = T3O - T3M;
Chris@82 518 T53 = T3R - T3T;
Chris@82 519 T54 = FNMS(KP618033988, T53, T52);
Chris@82 520 T6g = FMA(KP618033988, T52, T53);
Chris@82 521 T59 = T2k - T2c;
Chris@82 522 T5a = T2z - T2r;
Chris@82 523 T5b = FMA(KP618033988, T5a, T59);
Chris@82 524 T6d = FNMS(KP618033988, T59, T5a);
Chris@82 525 }
Chris@82 526 {
Chris@82 527 E T50, T2B, T4Z, T57, T3V, T56;
Chris@82 528 T50 = T2A - T2l;
Chris@82 529 T2B = T2l + T2A;
Chris@82 530 T4Z = FNMS(KP250000000, T2B, T26);
Chris@82 531 T2C = T26 + T2B;
Chris@82 532 T6f = FMA(KP559016994, T50, T4Z);
Chris@82 533 T51 = FNMS(KP559016994, T50, T4Z);
Chris@82 534 T57 = T3U - T3P;
Chris@82 535 T3V = T3P + T3U;
Chris@82 536 T56 = FNMS(KP250000000, T3V, T3K);
Chris@82 537 T3W = T3K + T3V;
Chris@82 538 T6c = FMA(KP559016994, T57, T56);
Chris@82 539 T58 = FNMS(KP559016994, T57, T56);
Chris@82 540 }
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T4c, T4e, TN, T3c, T3d, T3e, T4d, T3f;
Chris@82 544 {
Chris@82 545 E T3I, T4b, T22, T3b;
Chris@82 546 T3I = T3t - T3H;
Chris@82 547 T4b = T3W - T4a;
Chris@82 548 T4c = FMA(KP618033988, T4b, T3I);
Chris@82 549 T4e = FNMS(KP618033988, T3I, T4b);
Chris@82 550 TN = T1 + TM;
Chris@82 551 T22 = T1v + T21;
Chris@82 552 T3b = T2C + T3a;
Chris@82 553 T3c = T22 + T3b;
Chris@82 554 T3d = FNMS(KP250000000, T3c, TN);
Chris@82 555 T3e = T22 - T3b;
Chris@82 556 }
Chris@82 557 ri[0] = TN + T3c;
Chris@82 558 T4d = FNMS(KP559016994, T3e, T3d);
Chris@82 559 ri[WS(rs, 10)] = FNMS(KP951056516, T4e, T4d);
Chris@82 560 ri[WS(rs, 15)] = FMA(KP951056516, T4e, T4d);
Chris@82 561 T3f = FMA(KP559016994, T3e, T3d);
Chris@82 562 ri[WS(rs, 20)] = FNMS(KP951056516, T4c, T3f);
Chris@82 563 ri[WS(rs, 5)] = FMA(KP951056516, T4c, T3f);
Chris@82 564 }
Chris@82 565 {
Chris@82 566 E T7y, T7A, T7s, T7n, T7t, T7u, T7z, T7v;
Chris@82 567 {
Chris@82 568 E T7w, T7x, T7l, T7m;
Chris@82 569 T7w = T1v - T21;
Chris@82 570 T7x = T2C - T3a;
Chris@82 571 T7y = FMA(KP618033988, T7x, T7w);
Chris@82 572 T7A = FNMS(KP618033988, T7w, T7x);
Chris@82 573 T7s = T7q + T7r;
Chris@82 574 T7l = T3t + T3H;
Chris@82 575 T7m = T3W + T4a;
Chris@82 576 T7n = T7l + T7m;
Chris@82 577 T7t = FNMS(KP250000000, T7n, T7s);
Chris@82 578 T7u = T7l - T7m;
Chris@82 579 }
Chris@82 580 ii[0] = T7n + T7s;
Chris@82 581 T7z = FNMS(KP559016994, T7u, T7t);
Chris@82 582 ii[WS(rs, 10)] = FMA(KP951056516, T7A, T7z);
Chris@82 583 ii[WS(rs, 15)] = FNMS(KP951056516, T7A, T7z);
Chris@82 584 T7v = FMA(KP559016994, T7u, T7t);
Chris@82 585 ii[WS(rs, 5)] = FNMS(KP951056516, T7y, T7v);
Chris@82 586 ii[WS(rs, 20)] = FMA(KP951056516, T7y, T7v);
Chris@82 587 }
Chris@82 588 {
Chris@82 589 E T4t, T5H, T7H, T7T, T5A, T5D, T7P, T7O, T7I, T7J, T7K, T4Y, T5t, T5u, T62;
Chris@82 590 E T65, T81, T80, T7U, T7V, T7W, T5O, T5V, T5W, T4h, T7D;
Chris@82 591 T4h = FMA(KP559016994, T4g, T4f);
Chris@82 592 T4t = FMA(KP951056516, T4s, T4h);
Chris@82 593 T5H = FNMS(KP951056516, T4s, T4h);
Chris@82 594 T7D = FMA(KP559016994, T7C, T7B);
Chris@82 595 T7H = FNMS(KP951056516, T7G, T7D);
Chris@82 596 T7T = FMA(KP951056516, T7G, T7D);
Chris@82 597 {
Chris@82 598 E T4I, T5y, T5s, T5C, T4X, T5z, T5d, T5B;
Chris@82 599 {
Chris@82 600 E T4A, T4H, T5k, T5r;
Chris@82 601 T4A = FMA(KP951056516, T4z, T4w);
Chris@82 602 T4H = FMA(KP951056516, T4G, T4D);
Chris@82 603 T4I = FMA(KP256756360, T4H, T4A);
Chris@82 604 T5y = FNMS(KP256756360, T4A, T4H);
Chris@82 605 T5k = FNMS(KP951056516, T5j, T5g);
Chris@82 606 T5r = FNMS(KP951056516, T5q, T5n);
Chris@82 607 T5s = FMA(KP939062505, T5r, T5k);
Chris@82 608 T5C = FNMS(KP939062505, T5k, T5r);
Chris@82 609 }
Chris@82 610 {
Chris@82 611 E T4P, T4W, T55, T5c;
Chris@82 612 T4P = FMA(KP951056516, T4O, T4L);
Chris@82 613 T4W = FNMS(KP951056516, T4V, T4S);
Chris@82 614 T4X = FMA(KP634619297, T4W, T4P);
Chris@82 615 T5z = FNMS(KP634619297, T4P, T4W);
Chris@82 616 T55 = FNMS(KP951056516, T54, T51);
Chris@82 617 T5c = FMA(KP951056516, T5b, T58);
Chris@82 618 T5d = FMA(KP549754652, T5c, T55);
Chris@82 619 T5B = FNMS(KP549754652, T55, T5c);
Chris@82 620 }
Chris@82 621 T5A = FMA(KP871714437, T5z, T5y);
Chris@82 622 T5D = FNMS(KP831864738, T5C, T5B);
Chris@82 623 T7P = FNMS(KP831864738, T5s, T5d);
Chris@82 624 T7O = FNMS(KP871714437, T4X, T4I);
Chris@82 625 T7I = FNMS(KP871714437, T5z, T5y);
Chris@82 626 T7J = FMA(KP831864738, T5C, T5B);
Chris@82 627 T7K = FMA(KP904730450, T7J, T7I);
Chris@82 628 T4Y = FMA(KP871714437, T4X, T4I);
Chris@82 629 T5t = FMA(KP831864738, T5s, T5d);
Chris@82 630 T5u = FMA(KP904730450, T5t, T4Y);
Chris@82 631 }
Chris@82 632 {
Chris@82 633 E T5K, T63, T5U, T61, T5N, T64, T5R, T60;
Chris@82 634 {
Chris@82 635 E T5I, T5J, T5S, T5T;
Chris@82 636 T5I = FMA(KP951056516, T5j, T5g);
Chris@82 637 T5J = FMA(KP951056516, T5q, T5n);
Chris@82 638 T5K = FNMS(KP126329378, T5J, T5I);
Chris@82 639 T63 = FMA(KP126329378, T5I, T5J);
Chris@82 640 T5S = FNMS(KP951056516, T4O, T4L);
Chris@82 641 T5T = FMA(KP951056516, T4V, T4S);
Chris@82 642 T5U = FMA(KP827271945, T5T, T5S);
Chris@82 643 T61 = FNMS(KP827271945, T5S, T5T);
Chris@82 644 }
Chris@82 645 {
Chris@82 646 E T5L, T5M, T5P, T5Q;
Chris@82 647 T5L = FNMS(KP951056516, T5b, T58);
Chris@82 648 T5M = FMA(KP951056516, T54, T51);
Chris@82 649 T5N = FNMS(KP470564281, T5M, T5L);
Chris@82 650 T64 = FMA(KP470564281, T5L, T5M);
Chris@82 651 T5P = FNMS(KP951056516, T4G, T4D);
Chris@82 652 T5Q = FNMS(KP951056516, T4z, T4w);
Chris@82 653 T5R = FMA(KP634619297, T5Q, T5P);
Chris@82 654 T60 = FNMS(KP634619297, T5P, T5Q);
Chris@82 655 }
Chris@82 656 T62 = FMA(KP912575812, T61, T60);
Chris@82 657 T65 = FNMS(KP912018591, T64, T63);
Chris@82 658 T81 = FMA(KP912018591, T5N, T5K);
Chris@82 659 T80 = FMA(KP912575812, T5U, T5R);
Chris@82 660 T7U = FMA(KP912018591, T64, T63);
Chris@82 661 T7V = FNMS(KP912575812, T61, T60);
Chris@82 662 T7W = FMA(KP851038619, T7V, T7U);
Chris@82 663 T5O = FNMS(KP912018591, T5N, T5K);
Chris@82 664 T5V = FNMS(KP912575812, T5U, T5R);
Chris@82 665 T5W = FNMS(KP851038619, T5V, T5O);
Chris@82 666 }
Chris@82 667 ri[WS(rs, 1)] = FMA(KP968583161, T5u, T4t);
Chris@82 668 ii[WS(rs, 1)] = FMA(KP968583161, T7K, T7H);
Chris@82 669 ri[WS(rs, 4)] = FNMS(KP992114701, T5W, T5H);
Chris@82 670 ii[WS(rs, 4)] = FNMS(KP992114701, T7W, T7T);
Chris@82 671 {
Chris@82 672 E T5E, T5G, T5x, T5F, T5v, T5w;
Chris@82 673 T5E = FMA(KP559154169, T5D, T5A);
Chris@82 674 T5G = FNMS(KP683113946, T5A, T5D);
Chris@82 675 T5v = FNMS(KP242145790, T5u, T4t);
Chris@82 676 T5w = FNMS(KP904730450, T5t, T4Y);
Chris@82 677 T5x = FMA(KP541454447, T5w, T5v);
Chris@82 678 T5F = FNMS(KP541454447, T5w, T5v);
Chris@82 679 ri[WS(rs, 6)] = FMA(KP921177326, T5E, T5x);
Chris@82 680 ri[WS(rs, 16)] = FMA(KP833417178, T5G, T5F);
Chris@82 681 ri[WS(rs, 21)] = FNMS(KP921177326, T5E, T5x);
Chris@82 682 ri[WS(rs, 11)] = FNMS(KP833417178, T5G, T5F);
Chris@82 683 }
Chris@82 684 {
Chris@82 685 E T7Q, T7S, T7N, T7R, T7L, T7M;
Chris@82 686 T7Q = FMA(KP559154169, T7P, T7O);
Chris@82 687 T7S = FNMS(KP683113946, T7O, T7P);
Chris@82 688 T7L = FNMS(KP242145790, T7K, T7H);
Chris@82 689 T7M = FNMS(KP904730450, T7J, T7I);
Chris@82 690 T7N = FMA(KP541454447, T7M, T7L);
Chris@82 691 T7R = FNMS(KP541454447, T7M, T7L);
Chris@82 692 ii[WS(rs, 6)] = FNMS(KP921177326, T7Q, T7N);
Chris@82 693 ii[WS(rs, 16)] = FNMS(KP833417178, T7S, T7R);
Chris@82 694 ii[WS(rs, 21)] = FMA(KP921177326, T7Q, T7N);
Chris@82 695 ii[WS(rs, 11)] = FMA(KP833417178, T7S, T7R);
Chris@82 696 }
Chris@82 697 {
Chris@82 698 E T66, T68, T5Z, T67, T5X, T5Y;
Chris@82 699 T66 = FNMS(KP726211448, T65, T62);
Chris@82 700 T68 = FMA(KP525970792, T62, T65);
Chris@82 701 T5X = FMA(KP248028675, T5W, T5H);
Chris@82 702 T5Y = FMA(KP851038619, T5V, T5O);
Chris@82 703 T5Z = FMA(KP554608978, T5Y, T5X);
Chris@82 704 T67 = FNMS(KP554608978, T5Y, T5X);
Chris@82 705 ri[WS(rs, 9)] = FNMS(KP803003575, T66, T5Z);
Chris@82 706 ri[WS(rs, 19)] = FMA(KP943557151, T68, T67);
Chris@82 707 ri[WS(rs, 24)] = FMA(KP803003575, T66, T5Z);
Chris@82 708 ri[WS(rs, 14)] = FNMS(KP943557151, T68, T67);
Chris@82 709 }
Chris@82 710 {
Chris@82 711 E T82, T84, T7Z, T83, T7X, T7Y;
Chris@82 712 T82 = FMA(KP726211448, T81, T80);
Chris@82 713 T84 = FNMS(KP525970792, T80, T81);
Chris@82 714 T7X = FMA(KP248028675, T7W, T7T);
Chris@82 715 T7Y = FNMS(KP851038619, T7V, T7U);
Chris@82 716 T7Z = FMA(KP554608978, T7Y, T7X);
Chris@82 717 T83 = FNMS(KP554608978, T7Y, T7X);
Chris@82 718 ii[WS(rs, 9)] = FNMS(KP803003575, T82, T7Z);
Chris@82 719 ii[WS(rs, 19)] = FNMS(KP943557151, T84, T83);
Chris@82 720 ii[WS(rs, 24)] = FMA(KP803003575, T82, T7Z);
Chris@82 721 ii[WS(rs, 14)] = FMA(KP943557151, T84, T83);
Chris@82 722 }
Chris@82 723 }
Chris@82 724 {
Chris@82 725 E T6b, T6T, T87, T8j, T6M, T6P, T8r, T8q, T8k, T8l, T8m, T6q, T6F, T6G, T7e;
Chris@82 726 E T7h, T8f, T8e, T88, T89, T8a, T70, T77, T78, T69, T85;
Chris@82 727 T69 = FNMS(KP559016994, T4g, T4f);
Chris@82 728 T6b = FMA(KP951056516, T6a, T69);
Chris@82 729 T6T = FNMS(KP951056516, T6a, T69);
Chris@82 730 T85 = FNMS(KP559016994, T7C, T7B);
Chris@82 731 T87 = FMA(KP951056516, T86, T85);
Chris@82 732 T8j = FNMS(KP951056516, T86, T85);
Chris@82 733 {
Chris@82 734 E T6i, T6N, T6E, T6L, T6p, T6O, T6x, T6K;
Chris@82 735 {
Chris@82 736 E T6e, T6h, T6A, T6D;
Chris@82 737 T6e = FMA(KP951056516, T6d, T6c);
Chris@82 738 T6h = FMA(KP951056516, T6g, T6f);
Chris@82 739 T6i = FMA(KP062914667, T6h, T6e);
Chris@82 740 T6N = FNMS(KP062914667, T6e, T6h);
Chris@82 741 T6A = FNMS(KP951056516, T6z, T6y);
Chris@82 742 T6D = FMA(KP951056516, T6C, T6B);
Chris@82 743 T6E = FMA(KP939062505, T6D, T6A);
Chris@82 744 T6L = FNMS(KP939062505, T6A, T6D);
Chris@82 745 }
Chris@82 746 {
Chris@82 747 E T6l, T6o, T6t, T6w;
Chris@82 748 T6l = FNMS(KP951056516, T6k, T6j);
Chris@82 749 T6o = FNMS(KP951056516, T6n, T6m);
Chris@82 750 T6p = FNMS(KP827271945, T6o, T6l);
Chris@82 751 T6O = FMA(KP827271945, T6l, T6o);
Chris@82 752 T6t = FNMS(KP951056516, T6s, T6r);
Chris@82 753 T6w = FMA(KP951056516, T6v, T6u);
Chris@82 754 T6x = FNMS(KP126329378, T6w, T6t);
Chris@82 755 T6K = FMA(KP126329378, T6t, T6w);
Chris@82 756 }
Chris@82 757 T6M = FMA(KP734762448, T6L, T6K);
Chris@82 758 T6P = FNMS(KP772036680, T6O, T6N);
Chris@82 759 T8r = FNMS(KP772036680, T6p, T6i);
Chris@82 760 T8q = FMA(KP734762448, T6E, T6x);
Chris@82 761 T8k = FMA(KP772036680, T6O, T6N);
Chris@82 762 T8l = FNMS(KP734762448, T6L, T6K);
Chris@82 763 T8m = FMA(KP994076283, T8l, T8k);
Chris@82 764 T6q = FMA(KP772036680, T6p, T6i);
Chris@82 765 T6F = FNMS(KP734762448, T6E, T6x);
Chris@82 766 T6G = FNMS(KP994076283, T6F, T6q);
Chris@82 767 }
Chris@82 768 {
Chris@82 769 E T6W, T7f, T76, T7d, T6Z, T7g, T73, T7c;
Chris@82 770 {
Chris@82 771 E T6U, T6V, T74, T75;
Chris@82 772 T6U = FMA(KP951056516, T6k, T6j);
Chris@82 773 T6V = FMA(KP951056516, T6n, T6m);
Chris@82 774 T6W = FMA(KP062914667, T6V, T6U);
Chris@82 775 T7f = FNMS(KP062914667, T6U, T6V);
Chris@82 776 T74 = FMA(KP951056516, T6z, T6y);
Chris@82 777 T75 = FNMS(KP951056516, T6C, T6B);
Chris@82 778 T76 = FMA(KP549754652, T75, T74);
Chris@82 779 T7d = FNMS(KP549754652, T74, T75);
Chris@82 780 }
Chris@82 781 {
Chris@82 782 E T6X, T6Y, T71, T72;
Chris@82 783 T6X = FNMS(KP951056516, T6d, T6c);
Chris@82 784 T6Y = FNMS(KP951056516, T6g, T6f);
Chris@82 785 T6Z = FMA(KP634619297, T6Y, T6X);
Chris@82 786 T7g = FNMS(KP634619297, T6X, T6Y);
Chris@82 787 T71 = FNMS(KP951056516, T6v, T6u);
Chris@82 788 T72 = FMA(KP951056516, T6s, T6r);
Chris@82 789 T73 = FNMS(KP470564281, T72, T71);
Chris@82 790 T7c = FMA(KP470564281, T71, T72);
Chris@82 791 }
Chris@82 792 T7e = FMA(KP968479752, T7d, T7c);
Chris@82 793 T7h = FNMS(KP845997307, T7g, T7f);
Chris@82 794 T8f = FNMS(KP845997307, T6Z, T6W);
Chris@82 795 T8e = FNMS(KP968479752, T76, T73);
Chris@82 796 T88 = FMA(KP845997307, T7g, T7f);
Chris@82 797 T89 = FNMS(KP968479752, T7d, T7c);
Chris@82 798 T8a = FMA(KP906616052, T89, T88);
Chris@82 799 T70 = FMA(KP845997307, T6Z, T6W);
Chris@82 800 T77 = FMA(KP968479752, T76, T73);
Chris@82 801 T78 = FMA(KP906616052, T77, T70);
Chris@82 802 }
Chris@82 803 ri[WS(rs, 3)] = FMA(KP998026728, T6G, T6b);
Chris@82 804 ii[WS(rs, 3)] = FNMS(KP998026728, T8m, T8j);
Chris@82 805 ri[WS(rs, 2)] = FMA(KP998026728, T78, T6T);
Chris@82 806 ii[WS(rs, 2)] = FNMS(KP998026728, T8a, T87);
Chris@82 807 {
Chris@82 808 E T6Q, T6S, T6J, T6R, T6H, T6I;
Chris@82 809 T6Q = FNMS(KP621716863, T6P, T6M);
Chris@82 810 T6S = FMA(KP614372930, T6M, T6P);
Chris@82 811 T6H = FNMS(KP249506682, T6G, T6b);
Chris@82 812 T6I = FMA(KP994076283, T6F, T6q);
Chris@82 813 T6J = FNMS(KP557913902, T6I, T6H);
Chris@82 814 T6R = FMA(KP557913902, T6I, T6H);
Chris@82 815 ri[WS(rs, 23)] = FNMS(KP943557151, T6Q, T6J);
Chris@82 816 ri[WS(rs, 13)] = FMA(KP949179823, T6S, T6R);
Chris@82 817 ri[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J);
Chris@82 818 ri[WS(rs, 18)] = FNMS(KP949179823, T6S, T6R);
Chris@82 819 }
Chris@82 820 {
Chris@82 821 E T8s, T8u, T8p, T8t, T8n, T8o;
Chris@82 822 T8s = FMA(KP621716863, T8r, T8q);
Chris@82 823 T8u = FNMS(KP614372930, T8q, T8r);
Chris@82 824 T8n = FMA(KP249506682, T8m, T8j);
Chris@82 825 T8o = FNMS(KP994076283, T8l, T8k);
Chris@82 826 T8p = FMA(KP557913902, T8o, T8n);
Chris@82 827 T8t = FNMS(KP557913902, T8o, T8n);
Chris@82 828 ii[WS(rs, 8)] = FNMS(KP943557151, T8s, T8p);
Chris@82 829 ii[WS(rs, 18)] = FNMS(KP949179823, T8u, T8t);
Chris@82 830 ii[WS(rs, 23)] = FMA(KP943557151, T8s, T8p);
Chris@82 831 ii[WS(rs, 13)] = FMA(KP949179823, T8u, T8t);
Chris@82 832 }
Chris@82 833 {
Chris@82 834 E T7i, T7k, T7b, T7j, T79, T7a;
Chris@82 835 T7i = FMA(KP681693190, T7h, T7e);
Chris@82 836 T7k = FNMS(KP560319534, T7e, T7h);
Chris@82 837 T79 = FNMS(KP249506682, T78, T6T);
Chris@82 838 T7a = FNMS(KP906616052, T77, T70);
Chris@82 839 T7b = FNMS(KP557913902, T7a, T79);
Chris@82 840 T7j = FMA(KP557913902, T7a, T79);
Chris@82 841 ri[WS(rs, 22)] = FNMS(KP860541664, T7i, T7b);
Chris@82 842 ri[WS(rs, 17)] = FMA(KP949179823, T7k, T7j);
Chris@82 843 ri[WS(rs, 7)] = FMA(KP860541664, T7i, T7b);
Chris@82 844 ri[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j);
Chris@82 845 }
Chris@82 846 {
Chris@82 847 E T8g, T8i, T8d, T8h, T8b, T8c;
Chris@82 848 T8g = FMA(KP681693190, T8f, T8e);
Chris@82 849 T8i = FNMS(KP560319534, T8e, T8f);
Chris@82 850 T8b = FMA(KP249506682, T8a, T87);
Chris@82 851 T8c = FNMS(KP906616052, T89, T88);
Chris@82 852 T8d = FMA(KP557913902, T8c, T8b);
Chris@82 853 T8h = FNMS(KP557913902, T8c, T8b);
Chris@82 854 ii[WS(rs, 7)] = FMA(KP860541664, T8g, T8d);
Chris@82 855 ii[WS(rs, 17)] = FMA(KP949179823, T8i, T8h);
Chris@82 856 ii[WS(rs, 22)] = FNMS(KP860541664, T8g, T8d);
Chris@82 857 ii[WS(rs, 12)] = FNMS(KP949179823, T8i, T8h);
Chris@82 858 }
Chris@82 859 }
Chris@82 860 }
Chris@82 861 }
Chris@82 862 }
Chris@82 863 }
Chris@82 864
Chris@82 865 static const tw_instr twinstr[] = {
Chris@82 866 {TW_CEXP, 0, 1},
Chris@82 867 {TW_CEXP, 0, 3},
Chris@82 868 {TW_CEXP, 0, 9},
Chris@82 869 {TW_CEXP, 0, 24},
Chris@82 870 {TW_NEXT, 1, 0}
Chris@82 871 };
Chris@82 872
Chris@82 873 static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {84, 78, 356, 0}, 0, 0, 0 };
Chris@82 874
Chris@82 875 void X(codelet_t2_25) (planner *p) {
Chris@82 876 X(kdft_dit_register) (p, t2_25, &desc);
Chris@82 877 }
Chris@82 878 #else
Chris@82 879
Chris@82 880 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include dft/scalar/t.h */
Chris@82 881
Chris@82 882 /*
Chris@82 883 * This function contains 440 FP additions, 340 FP multiplications,
Chris@82 884 * (or, 280 additions, 180 multiplications, 160 fused multiply/add),
Chris@82 885 * 149 stack variables, 20 constants, and 100 memory accesses
Chris@82 886 */
Chris@82 887 #include "dft/scalar/t.h"
Chris@82 888
Chris@82 889 static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 890 {
Chris@82 891 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 892 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 893 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 894 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 895 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 896 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 897 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 898 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 899 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 900 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 901 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 902 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 903 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 904 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 905 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 906 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 907 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 908 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 909 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 910 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 911 {
Chris@82 912 INT m;
Chris@82 913 for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 914 E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g;
Chris@82 915 E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr;
Chris@82 916 E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V;
Chris@82 917 E T1X, T1z, T1j;
Chris@82 918 {
Chris@82 919 E Tw, TT, Tz, TQ, Tv, TU, TA, TP;
Chris@82 920 {
Chris@82 921 E T4, Tc, T7, Tb;
Chris@82 922 T2 = W[0];
Chris@82 923 T5 = W[1];
Chris@82 924 T3 = W[2];
Chris@82 925 T6 = W[3];
Chris@82 926 T4 = T2 * T3;
Chris@82 927 Tc = T5 * T3;
Chris@82 928 T7 = T5 * T6;
Chris@82 929 Tb = T2 * T6;
Chris@82 930 T8 = T4 - T7;
Chris@82 931 Td = Tb + Tc;
Chris@82 932 T16 = Tb - Tc;
Chris@82 933 T14 = T4 + T7;
Chris@82 934 Te = W[5];
Chris@82 935 Tw = T5 * Te;
Chris@82 936 TT = T3 * Te;
Chris@82 937 Tz = T2 * Te;
Chris@82 938 TQ = T6 * Te;
Chris@82 939 T9 = W[4];
Chris@82 940 Tv = T2 * T9;
Chris@82 941 TU = T6 * T9;
Chris@82 942 TA = T5 * T9;
Chris@82 943 TP = T3 * T9;
Chris@82 944 }
Chris@82 945 T21 = TP - TQ;
Chris@82 946 T23 = TT + TU;
Chris@82 947 {
Chris@82 948 E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj;
Chris@82 949 Tx = Tv - Tw;
Chris@82 950 TR = TP + TQ;
Chris@82 951 T1g = Tz - TA;
Chris@82 952 TB = Tz + TA;
Chris@82 953 T1f = Tv + Tw;
Chris@82 954 TV = TT - TU;
Chris@82 955 T15 = T14 * T9;
Chris@82 956 T17 = T16 * Te;
Chris@82 957 T1Q = T15 + T17;
Chris@82 958 Ta = T8 * T9;
Chris@82 959 Tf = Td * Te;
Chris@82 960 Tg = Ta + Tf;
Chris@82 961 T1a = T14 * Te;
Chris@82 962 T1b = T16 * T9;
Chris@82 963 T1S = T1a - T1b;
Chris@82 964 Ti = T8 * Te;
Chris@82 965 Tj = Td * T9;
Chris@82 966 Tk = Ti - Tj;
Chris@82 967 T18 = T15 - T17;
Chris@82 968 T2s = Ti + Tj;
Chris@82 969 T1c = T1a + T1b;
Chris@82 970 T2q = Ta - Tf;
Chris@82 971 Tn = W[6];
Chris@82 972 To = W[7];
Chris@82 973 Tp = FMA(T8, Tn, Td * To);
Chris@82 974 Tr = FNMS(Td, Tn, T8 * To);
Chris@82 975 T28 = FNMS(T1S, Tn, T1Q * To);
Chris@82 976 T2x = FNMS(TV, Tn, TR * To);
Chris@82 977 TY = FMA(T3, Tn, T6 * To);
Chris@82 978 T2k = FMA(T2, Tn, T5 * To);
Chris@82 979 T2m = FNMS(T5, Tn, T2 * To);
Chris@82 980 T2v = FMA(TR, Tn, TV * To);
Chris@82 981 TG = FNMS(Te, Tn, T9 * To);
Chris@82 982 TE = FMA(T9, Tn, Te * To);
Chris@82 983 T10 = FNMS(T6, Tn, T3 * To);
Chris@82 984 T1h = FMA(T1f, Tn, T1g * To);
Chris@82 985 T1E = FMA(Tg, Tn, Tk * To);
Chris@82 986 T26 = FMA(T1Q, Tn, T1S * To);
Chris@82 987 T1B = FNMS(TB, Tn, Tx * To);
Chris@82 988 T1G = FNMS(Tk, Tn, Tg * To);
Chris@82 989 T1V = FMA(T14, Tn, T16 * To);
Chris@82 990 T1X = FNMS(T16, Tn, T14 * To);
Chris@82 991 T1z = FMA(Tx, Tn, TB * To);
Chris@82 992 T1j = FNMS(T1g, Tn, T1f * To);
Chris@82 993 }
Chris@82 994 }
Chris@82 995 {
Chris@82 996 E T1, T6v, T2F, T6I, TK, T2G, T6u, T6J, T6N, T7c, T2O, T52, T2C, T6k, T48;
Chris@82 997 E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N;
Chris@82 998 E T4B, T59, T1L, T6h, T3n, T5Q, T4D, T5g, T3y, T5P, T4E, T5d, T2d, T6j, T3L;
Chris@82 999 E T5T, T4I, T5l, T3W, T5U, T4H, T5o;
Chris@82 1000 {
Chris@82 1001 E Tm, T2I, Tt, T2J, Tu, T6s, TD, T2L, TI, T2M, TJ, T6t;
Chris@82 1002 T1 = ri[0];
Chris@82 1003 T6v = ii[0];
Chris@82 1004 {
Chris@82 1005 E Th, Tl, Tq, Ts;
Chris@82 1006 Th = ri[WS(rs, 5)];
Chris@82 1007 Tl = ii[WS(rs, 5)];
Chris@82 1008 Tm = FMA(Tg, Th, Tk * Tl);
Chris@82 1009 T2I = FNMS(Tk, Th, Tg * Tl);
Chris@82 1010 Tq = ri[WS(rs, 20)];
Chris@82 1011 Ts = ii[WS(rs, 20)];
Chris@82 1012 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 1013 T2J = FNMS(Tr, Tq, Tp * Ts);
Chris@82 1014 }
Chris@82 1015 Tu = Tm + Tt;
Chris@82 1016 T6s = T2I + T2J;
Chris@82 1017 {
Chris@82 1018 E Ty, TC, TF, TH;
Chris@82 1019 Ty = ri[WS(rs, 10)];
Chris@82 1020 TC = ii[WS(rs, 10)];
Chris@82 1021 TD = FMA(Tx, Ty, TB * TC);
Chris@82 1022 T2L = FNMS(TB, Ty, Tx * TC);
Chris@82 1023 TF = ri[WS(rs, 15)];
Chris@82 1024 TH = ii[WS(rs, 15)];
Chris@82 1025 TI = FMA(TE, TF, TG * TH);
Chris@82 1026 T2M = FNMS(TG, TF, TE * TH);
Chris@82 1027 }
Chris@82 1028 TJ = TD + TI;
Chris@82 1029 T6t = T2L + T2M;
Chris@82 1030 T2F = KP559016994 * (Tu - TJ);
Chris@82 1031 T6I = KP559016994 * (T6s - T6t);
Chris@82 1032 TK = Tu + TJ;
Chris@82 1033 T2G = FNMS(KP250000000, TK, T1);
Chris@82 1034 T6u = T6s + T6t;
Chris@82 1035 T6J = FNMS(KP250000000, T6u, T6v);
Chris@82 1036 {
Chris@82 1037 E T6L, T6M, T2K, T2N;
Chris@82 1038 T6L = Tm - Tt;
Chris@82 1039 T6M = TD - TI;
Chris@82 1040 T6N = FMA(KP951056516, T6L, KP587785252 * T6M);
Chris@82 1041 T7c = FNMS(KP587785252, T6L, KP951056516 * T6M);
Chris@82 1042 T2K = T2I - T2J;
Chris@82 1043 T2N = T2L - T2M;
Chris@82 1044 T2O = FMA(KP951056516, T2K, KP587785252 * T2N);
Chris@82 1045 T52 = FNMS(KP587785252, T2K, KP951056516 * T2N);
Chris@82 1046 }
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E T2g, T4c, T43, T46, T4h, T4g, T49, T4a, T4d, T2p, T2A, T2B, T2e, T2f;
Chris@82 1050 T2e = ri[WS(rs, 3)];
Chris@82 1051 T2f = ii[WS(rs, 3)];
Chris@82 1052 T2g = FMA(T3, T2e, T6 * T2f);
Chris@82 1053 T4c = FNMS(T6, T2e, T3 * T2f);
Chris@82 1054 {
Chris@82 1055 E T2j, T41, T2z, T45, T2o, T42, T2u, T44;
Chris@82 1056 {
Chris@82 1057 E T2h, T2i, T2w, T2y;
Chris@82 1058 T2h = ri[WS(rs, 8)];
Chris@82 1059 T2i = ii[WS(rs, 8)];
Chris@82 1060 T2j = FMA(T1f, T2h, T1g * T2i);
Chris@82 1061 T41 = FNMS(T1g, T2h, T1f * T2i);
Chris@82 1062 T2w = ri[WS(rs, 18)];
Chris@82 1063 T2y = ii[WS(rs, 18)];
Chris@82 1064 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@82 1065 T45 = FNMS(T2x, T2w, T2v * T2y);
Chris@82 1066 }
Chris@82 1067 {
Chris@82 1068 E T2l, T2n, T2r, T2t;
Chris@82 1069 T2l = ri[WS(rs, 23)];
Chris@82 1070 T2n = ii[WS(rs, 23)];
Chris@82 1071 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@82 1072 T42 = FNMS(T2m, T2l, T2k * T2n);
Chris@82 1073 T2r = ri[WS(rs, 13)];
Chris@82 1074 T2t = ii[WS(rs, 13)];
Chris@82 1075 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@82 1076 T44 = FNMS(T2s, T2r, T2q * T2t);
Chris@82 1077 }
Chris@82 1078 T43 = T41 - T42;
Chris@82 1079 T46 = T44 - T45;
Chris@82 1080 T4h = T2u - T2z;
Chris@82 1081 T4g = T2j - T2o;
Chris@82 1082 T49 = T41 + T42;
Chris@82 1083 T4a = T44 + T45;
Chris@82 1084 T4d = T49 + T4a;
Chris@82 1085 T2p = T2j + T2o;
Chris@82 1086 T2A = T2u + T2z;
Chris@82 1087 T2B = T2p + T2A;
Chris@82 1088 }
Chris@82 1089 T2C = T2g + T2B;
Chris@82 1090 T6k = T4c + T4d;
Chris@82 1091 {
Chris@82 1092 E T47, T5r, T40, T5q, T3Y, T3Z;
Chris@82 1093 T47 = FMA(KP951056516, T43, KP587785252 * T46);
Chris@82 1094 T5r = FNMS(KP587785252, T43, KP951056516 * T46);
Chris@82 1095 T3Y = KP559016994 * (T2p - T2A);
Chris@82 1096 T3Z = FNMS(KP250000000, T2B, T2g);
Chris@82 1097 T40 = T3Y + T3Z;
Chris@82 1098 T5q = T3Z - T3Y;
Chris@82 1099 T48 = T40 + T47;
Chris@82 1100 T5X = T5q + T5r;
Chris@82 1101 T4L = T40 - T47;
Chris@82 1102 T5s = T5q - T5r;
Chris@82 1103 }
Chris@82 1104 {
Chris@82 1105 E T4i, T5t, T4f, T5u, T4b, T4e;
Chris@82 1106 T4i = FMA(KP951056516, T4g, KP587785252 * T4h);
Chris@82 1107 T5t = FNMS(KP587785252, T4g, KP951056516 * T4h);
Chris@82 1108 T4b = KP559016994 * (T49 - T4a);
Chris@82 1109 T4e = FNMS(KP250000000, T4d, T4c);
Chris@82 1110 T4f = T4b + T4e;
Chris@82 1111 T5u = T4e - T4b;
Chris@82 1112 T4j = T4f - T4i;
Chris@82 1113 T5W = T5u - T5t;
Chris@82 1114 T4K = T4i + T4f;
Chris@82 1115 T5v = T5t + T5u;
Chris@82 1116 }
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E TO, T34, T2V, T2Y, T39, T38, T31, T32, T35, T13, T1m, T1n, TM, TN;
Chris@82 1120 TM = ri[WS(rs, 1)];
Chris@82 1121 TN = ii[WS(rs, 1)];
Chris@82 1122 TO = FMA(T2, TM, T5 * TN);
Chris@82 1123 T34 = FNMS(T5, TM, T2 * TN);
Chris@82 1124 {
Chris@82 1125 E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W;
Chris@82 1126 {
Chris@82 1127 E TS, TW, T1i, T1k;
Chris@82 1128 TS = ri[WS(rs, 6)];
Chris@82 1129 TW = ii[WS(rs, 6)];
Chris@82 1130 TX = FMA(TR, TS, TV * TW);
Chris@82 1131 T2T = FNMS(TV, TS, TR * TW);
Chris@82 1132 T1i = ri[WS(rs, 16)];
Chris@82 1133 T1k = ii[WS(rs, 16)];
Chris@82 1134 T1l = FMA(T1h, T1i, T1j * T1k);
Chris@82 1135 T2X = FNMS(T1j, T1i, T1h * T1k);
Chris@82 1136 }
Chris@82 1137 {
Chris@82 1138 E TZ, T11, T19, T1d;
Chris@82 1139 TZ = ri[WS(rs, 21)];
Chris@82 1140 T11 = ii[WS(rs, 21)];
Chris@82 1141 T12 = FMA(TY, TZ, T10 * T11);
Chris@82 1142 T2U = FNMS(T10, TZ, TY * T11);
Chris@82 1143 T19 = ri[WS(rs, 11)];
Chris@82 1144 T1d = ii[WS(rs, 11)];
Chris@82 1145 T1e = FMA(T18, T19, T1c * T1d);
Chris@82 1146 T2W = FNMS(T1c, T19, T18 * T1d);
Chris@82 1147 }
Chris@82 1148 T2V = T2T - T2U;
Chris@82 1149 T2Y = T2W - T2X;
Chris@82 1150 T39 = T1e - T1l;
Chris@82 1151 T38 = TX - T12;
Chris@82 1152 T31 = T2T + T2U;
Chris@82 1153 T32 = T2W + T2X;
Chris@82 1154 T35 = T31 + T32;
Chris@82 1155 T13 = TX + T12;
Chris@82 1156 T1m = T1e + T1l;
Chris@82 1157 T1n = T13 + T1m;
Chris@82 1158 }
Chris@82 1159 T1o = TO + T1n;
Chris@82 1160 T6g = T34 + T35;
Chris@82 1161 {
Chris@82 1162 E T2Z, T55, T2S, T54, T2Q, T2R;
Chris@82 1163 T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y);
Chris@82 1164 T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y);
Chris@82 1165 T2Q = KP559016994 * (T13 - T1m);
Chris@82 1166 T2R = FNMS(KP250000000, T1n, TO);
Chris@82 1167 T2S = T2Q + T2R;
Chris@82 1168 T54 = T2R - T2Q;
Chris@82 1169 T30 = T2S + T2Z;
Chris@82 1170 T5M = T54 + T55;
Chris@82 1171 T4A = T2S - T2Z;
Chris@82 1172 T56 = T54 - T55;
Chris@82 1173 }
Chris@82 1174 {
Chris@82 1175 E T3a, T57, T37, T58, T33, T36;
Chris@82 1176 T3a = FMA(KP951056516, T38, KP587785252 * T39);
Chris@82 1177 T57 = FNMS(KP587785252, T38, KP951056516 * T39);
Chris@82 1178 T33 = KP559016994 * (T31 - T32);
Chris@82 1179 T36 = FNMS(KP250000000, T35, T34);
Chris@82 1180 T37 = T33 + T36;
Chris@82 1181 T58 = T36 - T33;
Chris@82 1182 T3b = T37 - T3a;
Chris@82 1183 T5N = T58 - T57;
Chris@82 1184 T4B = T3a + T37;
Chris@82 1185 T59 = T57 + T58;
Chris@82 1186 }
Chris@82 1187 }
Chris@82 1188 {
Chris@82 1189 E T1r, T3r, T3i, T3l, T3w, T3v, T3o, T3p, T3s, T1y, T1J, T1K, T1p, T1q;
Chris@82 1190 T1p = ri[WS(rs, 4)];
Chris@82 1191 T1q = ii[WS(rs, 4)];
Chris@82 1192 T1r = FMA(T8, T1p, Td * T1q);
Chris@82 1193 T3r = FNMS(Td, T1p, T8 * T1q);
Chris@82 1194 {
Chris@82 1195 E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j;
Chris@82 1196 {
Chris@82 1197 E T1s, T1t, T1F, T1H;
Chris@82 1198 T1s = ri[WS(rs, 9)];
Chris@82 1199 T1t = ii[WS(rs, 9)];
Chris@82 1200 T1u = FMA(T9, T1s, Te * T1t);
Chris@82 1201 T3g = FNMS(Te, T1s, T9 * T1t);
Chris@82 1202 T1F = ri[WS(rs, 19)];
Chris@82 1203 T1H = ii[WS(rs, 19)];
Chris@82 1204 T1I = FMA(T1E, T1F, T1G * T1H);
Chris@82 1205 T3k = FNMS(T1G, T1F, T1E * T1H);
Chris@82 1206 }
Chris@82 1207 {
Chris@82 1208 E T1v, T1w, T1A, T1C;
Chris@82 1209 T1v = ri[WS(rs, 24)];
Chris@82 1210 T1w = ii[WS(rs, 24)];
Chris@82 1211 T1x = FMA(Tn, T1v, To * T1w);
Chris@82 1212 T3h = FNMS(To, T1v, Tn * T1w);
Chris@82 1213 T1A = ri[WS(rs, 14)];
Chris@82 1214 T1C = ii[WS(rs, 14)];
Chris@82 1215 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@82 1216 T3j = FNMS(T1B, T1A, T1z * T1C);
Chris@82 1217 }
Chris@82 1218 T3i = T3g - T3h;
Chris@82 1219 T3l = T3j - T3k;
Chris@82 1220 T3w = T1D - T1I;
Chris@82 1221 T3v = T1u - T1x;
Chris@82 1222 T3o = T3g + T3h;
Chris@82 1223 T3p = T3j + T3k;
Chris@82 1224 T3s = T3o + T3p;
Chris@82 1225 T1y = T1u + T1x;
Chris@82 1226 T1J = T1D + T1I;
Chris@82 1227 T1K = T1y + T1J;
Chris@82 1228 }
Chris@82 1229 T1L = T1r + T1K;
Chris@82 1230 T6h = T3r + T3s;
Chris@82 1231 {
Chris@82 1232 E T3m, T5f, T3f, T5e, T3d, T3e;
Chris@82 1233 T3m = FMA(KP951056516, T3i, KP587785252 * T3l);
Chris@82 1234 T5f = FNMS(KP587785252, T3i, KP951056516 * T3l);
Chris@82 1235 T3d = KP559016994 * (T1y - T1J);
Chris@82 1236 T3e = FNMS(KP250000000, T1K, T1r);
Chris@82 1237 T3f = T3d + T3e;
Chris@82 1238 T5e = T3e - T3d;
Chris@82 1239 T3n = T3f + T3m;
Chris@82 1240 T5Q = T5e + T5f;
Chris@82 1241 T4D = T3f - T3m;
Chris@82 1242 T5g = T5e - T5f;
Chris@82 1243 }
Chris@82 1244 {
Chris@82 1245 E T3x, T5b, T3u, T5c, T3q, T3t;
Chris@82 1246 T3x = FMA(KP951056516, T3v, KP587785252 * T3w);
Chris@82 1247 T5b = FNMS(KP587785252, T3v, KP951056516 * T3w);
Chris@82 1248 T3q = KP559016994 * (T3o - T3p);
Chris@82 1249 T3t = FNMS(KP250000000, T3s, T3r);
Chris@82 1250 T3u = T3q + T3t;
Chris@82 1251 T5c = T3t - T3q;
Chris@82 1252 T3y = T3u - T3x;
Chris@82 1253 T5P = T5c - T5b;
Chris@82 1254 T4E = T3x + T3u;
Chris@82 1255 T5d = T5b + T5c;
Chris@82 1256 }
Chris@82 1257 }
Chris@82 1258 {
Chris@82 1259 E T1P, T3P, T3G, T3J, T3U, T3T, T3M, T3N, T3Q, T20, T2b, T2c, T1N, T1O;
Chris@82 1260 T1N = ri[WS(rs, 2)];
Chris@82 1261 T1O = ii[WS(rs, 2)];
Chris@82 1262 T1P = FMA(T14, T1N, T16 * T1O);
Chris@82 1263 T3P = FNMS(T16, T1N, T14 * T1O);
Chris@82 1264 {
Chris@82 1265 E T1U, T3E, T2a, T3I, T1Z, T3F, T25, T3H;
Chris@82 1266 {
Chris@82 1267 E T1R, T1T, T27, T29;
Chris@82 1268 T1R = ri[WS(rs, 7)];
Chris@82 1269 T1T = ii[WS(rs, 7)];
Chris@82 1270 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@82 1271 T3E = FNMS(T1S, T1R, T1Q * T1T);
Chris@82 1272 T27 = ri[WS(rs, 17)];
Chris@82 1273 T29 = ii[WS(rs, 17)];
Chris@82 1274 T2a = FMA(T26, T27, T28 * T29);
Chris@82 1275 T3I = FNMS(T28, T27, T26 * T29);
Chris@82 1276 }
Chris@82 1277 {
Chris@82 1278 E T1W, T1Y, T22, T24;
Chris@82 1279 T1W = ri[WS(rs, 22)];
Chris@82 1280 T1Y = ii[WS(rs, 22)];
Chris@82 1281 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@82 1282 T3F = FNMS(T1X, T1W, T1V * T1Y);
Chris@82 1283 T22 = ri[WS(rs, 12)];
Chris@82 1284 T24 = ii[WS(rs, 12)];
Chris@82 1285 T25 = FMA(T21, T22, T23 * T24);
Chris@82 1286 T3H = FNMS(T23, T22, T21 * T24);
Chris@82 1287 }
Chris@82 1288 T3G = T3E - T3F;
Chris@82 1289 T3J = T3H - T3I;
Chris@82 1290 T3U = T25 - T2a;
Chris@82 1291 T3T = T1U - T1Z;
Chris@82 1292 T3M = T3E + T3F;
Chris@82 1293 T3N = T3H + T3I;
Chris@82 1294 T3Q = T3M + T3N;
Chris@82 1295 T20 = T1U + T1Z;
Chris@82 1296 T2b = T25 + T2a;
Chris@82 1297 T2c = T20 + T2b;
Chris@82 1298 }
Chris@82 1299 T2d = T1P + T2c;
Chris@82 1300 T6j = T3P + T3Q;
Chris@82 1301 {
Chris@82 1302 E T3K, T5k, T3D, T5j, T3B, T3C;
Chris@82 1303 T3K = FMA(KP951056516, T3G, KP587785252 * T3J);
Chris@82 1304 T5k = FNMS(KP587785252, T3G, KP951056516 * T3J);
Chris@82 1305 T3B = KP559016994 * (T20 - T2b);
Chris@82 1306 T3C = FNMS(KP250000000, T2c, T1P);
Chris@82 1307 T3D = T3B + T3C;
Chris@82 1308 T5j = T3C - T3B;
Chris@82 1309 T3L = T3D + T3K;
Chris@82 1310 T5T = T5j + T5k;
Chris@82 1311 T4I = T3D - T3K;
Chris@82 1312 T5l = T5j - T5k;
Chris@82 1313 }
Chris@82 1314 {
Chris@82 1315 E T3V, T5m, T3S, T5n, T3O, T3R;
Chris@82 1316 T3V = FMA(KP951056516, T3T, KP587785252 * T3U);
Chris@82 1317 T5m = FNMS(KP587785252, T3T, KP951056516 * T3U);
Chris@82 1318 T3O = KP559016994 * (T3M - T3N);
Chris@82 1319 T3R = FNMS(KP250000000, T3Q, T3P);
Chris@82 1320 T3S = T3O + T3R;
Chris@82 1321 T5n = T3R - T3O;
Chris@82 1322 T3W = T3S - T3V;
Chris@82 1323 T5U = T5n - T5m;
Chris@82 1324 T4H = T3V + T3S;
Chris@82 1325 T5o = T5m + T5n;
Chris@82 1326 }
Chris@82 1327 }
Chris@82 1328 {
Chris@82 1329 E T6m, T6o, TL, T2E, T6d, T6e, T6n, T6f;
Chris@82 1330 {
Chris@82 1331 E T6i, T6l, T1M, T2D;
Chris@82 1332 T6i = T6g - T6h;
Chris@82 1333 T6l = T6j - T6k;
Chris@82 1334 T6m = FMA(KP951056516, T6i, KP587785252 * T6l);
Chris@82 1335 T6o = FNMS(KP587785252, T6i, KP951056516 * T6l);
Chris@82 1336 TL = T1 + TK;
Chris@82 1337 T1M = T1o + T1L;
Chris@82 1338 T2D = T2d + T2C;
Chris@82 1339 T2E = T1M + T2D;
Chris@82 1340 T6d = KP559016994 * (T1M - T2D);
Chris@82 1341 T6e = FNMS(KP250000000, T2E, TL);
Chris@82 1342 }
Chris@82 1343 ri[0] = TL + T2E;
Chris@82 1344 T6n = T6e - T6d;
Chris@82 1345 ri[WS(rs, 10)] = T6n - T6o;
Chris@82 1346 ri[WS(rs, 15)] = T6n + T6o;
Chris@82 1347 T6f = T6d + T6e;
Chris@82 1348 ri[WS(rs, 20)] = T6f - T6m;
Chris@82 1349 ri[WS(rs, 5)] = T6f + T6m;
Chris@82 1350 }
Chris@82 1351 {
Chris@82 1352 E T6C, T6D, T6w, T6r, T6x, T6y, T6E, T6z;
Chris@82 1353 {
Chris@82 1354 E T6A, T6B, T6p, T6q;
Chris@82 1355 T6A = T1o - T1L;
Chris@82 1356 T6B = T2d - T2C;
Chris@82 1357 T6C = FMA(KP951056516, T6A, KP587785252 * T6B);
Chris@82 1358 T6D = FNMS(KP587785252, T6A, KP951056516 * T6B);
Chris@82 1359 T6w = T6u + T6v;
Chris@82 1360 T6p = T6g + T6h;
Chris@82 1361 T6q = T6j + T6k;
Chris@82 1362 T6r = T6p + T6q;
Chris@82 1363 T6x = KP559016994 * (T6p - T6q);
Chris@82 1364 T6y = FNMS(KP250000000, T6r, T6w);
Chris@82 1365 }
Chris@82 1366 ii[0] = T6r + T6w;
Chris@82 1367 T6E = T6y - T6x;
Chris@82 1368 ii[WS(rs, 10)] = T6D + T6E;
Chris@82 1369 ii[WS(rs, 15)] = T6E - T6D;
Chris@82 1370 T6z = T6x + T6y;
Chris@82 1371 ii[WS(rs, 5)] = T6z - T6C;
Chris@82 1372 ii[WS(rs, 20)] = T6C + T6z;
Chris@82 1373 }
Chris@82 1374 {
Chris@82 1375 E T2P, T4z, T6O, T70, T4m, T6T, T4n, T6S, T4U, T71, T4X, T6Z, T4O, T75, T4P;
Chris@82 1376 E T74, T4s, T6P, T4v, T6H, T2H, T6K;
Chris@82 1377 T2H = T2F + T2G;
Chris@82 1378 T2P = T2H + T2O;
Chris@82 1379 T4z = T2H - T2O;
Chris@82 1380 T6K = T6I + T6J;
Chris@82 1381 T6O = T6K - T6N;
Chris@82 1382 T70 = T6N + T6K;
Chris@82 1383 {
Chris@82 1384 E T3c, T3z, T3A, T3X, T4k, T4l;
Chris@82 1385 T3c = FMA(KP968583161, T30, KP248689887 * T3b);
Chris@82 1386 T3z = FMA(KP535826794, T3n, KP844327925 * T3y);
Chris@82 1387 T3A = T3c + T3z;
Chris@82 1388 T3X = FMA(KP876306680, T3L, KP481753674 * T3W);
Chris@82 1389 T4k = FMA(KP728968627, T48, KP684547105 * T4j);
Chris@82 1390 T4l = T3X + T4k;
Chris@82 1391 T4m = T3A + T4l;
Chris@82 1392 T6T = T3X - T4k;
Chris@82 1393 T4n = KP559016994 * (T3A - T4l);
Chris@82 1394 T6S = T3c - T3z;
Chris@82 1395 }
Chris@82 1396 {
Chris@82 1397 E T4S, T4T, T6X, T4V, T4W, T6Y;
Chris@82 1398 T4S = FNMS(KP844327925, T4A, KP535826794 * T4B);
Chris@82 1399 T4T = FNMS(KP637423989, T4E, KP770513242 * T4D);
Chris@82 1400 T6X = T4S + T4T;
Chris@82 1401 T4V = FMA(KP125333233, T4L, KP992114701 * T4K);
Chris@82 1402 T4W = FMA(KP904827052, T4I, KP425779291 * T4H);
Chris@82 1403 T6Y = T4W + T4V;
Chris@82 1404 T4U = T4S - T4T;
Chris@82 1405 T71 = KP559016994 * (T6X + T6Y);
Chris@82 1406 T4X = T4V - T4W;
Chris@82 1407 T6Z = T6X - T6Y;
Chris@82 1408 }
Chris@82 1409 {
Chris@82 1410 E T4C, T4F, T4G, T4J, T4M, T4N;
Chris@82 1411 T4C = FMA(KP535826794, T4A, KP844327925 * T4B);
Chris@82 1412 T4F = FMA(KP637423989, T4D, KP770513242 * T4E);
Chris@82 1413 T4G = T4C - T4F;
Chris@82 1414 T4J = FNMS(KP425779291, T4I, KP904827052 * T4H);
Chris@82 1415 T4M = FNMS(KP992114701, T4L, KP125333233 * T4K);
Chris@82 1416 T4N = T4J + T4M;
Chris@82 1417 T4O = T4G + T4N;
Chris@82 1418 T75 = T4J - T4M;
Chris@82 1419 T4P = KP559016994 * (T4G - T4N);
Chris@82 1420 T74 = T4C + T4F;
Chris@82 1421 }
Chris@82 1422 {
Chris@82 1423 E T4q, T4r, T6F, T4t, T4u, T6G;
Chris@82 1424 T4q = FNMS(KP248689887, T30, KP968583161 * T3b);
Chris@82 1425 T4r = FNMS(KP844327925, T3n, KP535826794 * T3y);
Chris@82 1426 T6F = T4q + T4r;
Chris@82 1427 T4t = FNMS(KP481753674, T3L, KP876306680 * T3W);
Chris@82 1428 T4u = FNMS(KP684547105, T48, KP728968627 * T4j);
Chris@82 1429 T6G = T4t + T4u;
Chris@82 1430 T4s = T4q - T4r;
Chris@82 1431 T6P = KP559016994 * (T6F - T6G);
Chris@82 1432 T4v = T4t - T4u;
Chris@82 1433 T6H = T6F + T6G;
Chris@82 1434 }
Chris@82 1435 ri[WS(rs, 1)] = T2P + T4m;
Chris@82 1436 ii[WS(rs, 1)] = T6H + T6O;
Chris@82 1437 ri[WS(rs, 4)] = T4z + T4O;
Chris@82 1438 ii[WS(rs, 4)] = T6Z + T70;
Chris@82 1439 {
Chris@82 1440 E T4w, T4y, T4p, T4x, T4o;
Chris@82 1441 T4w = FMA(KP951056516, T4s, KP587785252 * T4v);
Chris@82 1442 T4y = FNMS(KP587785252, T4s, KP951056516 * T4v);
Chris@82 1443 T4o = FNMS(KP250000000, T4m, T2P);
Chris@82 1444 T4p = T4n + T4o;
Chris@82 1445 T4x = T4o - T4n;
Chris@82 1446 ri[WS(rs, 21)] = T4p - T4w;
Chris@82 1447 ri[WS(rs, 16)] = T4x + T4y;
Chris@82 1448 ri[WS(rs, 6)] = T4p + T4w;
Chris@82 1449 ri[WS(rs, 11)] = T4x - T4y;
Chris@82 1450 }
Chris@82 1451 {
Chris@82 1452 E T6U, T6V, T6R, T6W, T6Q;
Chris@82 1453 T6U = FMA(KP951056516, T6S, KP587785252 * T6T);
Chris@82 1454 T6V = FNMS(KP587785252, T6S, KP951056516 * T6T);
Chris@82 1455 T6Q = FNMS(KP250000000, T6H, T6O);
Chris@82 1456 T6R = T6P + T6Q;
Chris@82 1457 T6W = T6Q - T6P;
Chris@82 1458 ii[WS(rs, 6)] = T6R - T6U;
Chris@82 1459 ii[WS(rs, 16)] = T6W - T6V;
Chris@82 1460 ii[WS(rs, 21)] = T6U + T6R;
Chris@82 1461 ii[WS(rs, 11)] = T6V + T6W;
Chris@82 1462 }
Chris@82 1463 {
Chris@82 1464 E T4Y, T50, T4R, T4Z, T4Q;
Chris@82 1465 T4Y = FMA(KP951056516, T4U, KP587785252 * T4X);
Chris@82 1466 T50 = FNMS(KP587785252, T4U, KP951056516 * T4X);
Chris@82 1467 T4Q = FNMS(KP250000000, T4O, T4z);
Chris@82 1468 T4R = T4P + T4Q;
Chris@82 1469 T4Z = T4Q - T4P;
Chris@82 1470 ri[WS(rs, 24)] = T4R - T4Y;
Chris@82 1471 ri[WS(rs, 19)] = T4Z + T50;
Chris@82 1472 ri[WS(rs, 9)] = T4R + T4Y;
Chris@82 1473 ri[WS(rs, 14)] = T4Z - T50;
Chris@82 1474 }
Chris@82 1475 {
Chris@82 1476 E T76, T77, T73, T78, T72;
Chris@82 1477 T76 = FMA(KP951056516, T74, KP587785252 * T75);
Chris@82 1478 T77 = FNMS(KP587785252, T74, KP951056516 * T75);
Chris@82 1479 T72 = FNMS(KP250000000, T6Z, T70);
Chris@82 1480 T73 = T71 + T72;
Chris@82 1481 T78 = T72 - T71;
Chris@82 1482 ii[WS(rs, 9)] = T73 - T76;
Chris@82 1483 ii[WS(rs, 19)] = T78 - T77;
Chris@82 1484 ii[WS(rs, 24)] = T76 + T73;
Chris@82 1485 ii[WS(rs, 14)] = T77 + T78;
Chris@82 1486 }
Chris@82 1487 }
Chris@82 1488 {
Chris@82 1489 E T53, T5L, T7e, T7q, T5y, T7j, T5z, T7i, T66, T7r, T69, T7p, T60, T7v, T61;
Chris@82 1490 E T7u, T5E, T7f, T5H, T7b, T51, T7d;
Chris@82 1491 T51 = T2G - T2F;
Chris@82 1492 T53 = T51 - T52;
Chris@82 1493 T5L = T51 + T52;
Chris@82 1494 T7d = T6J - T6I;
Chris@82 1495 T7e = T7c + T7d;
Chris@82 1496 T7q = T7d - T7c;
Chris@82 1497 {
Chris@82 1498 E T5a, T5h, T5i, T5p, T5w, T5x;
Chris@82 1499 T5a = FMA(KP876306680, T56, KP481753674 * T59);
Chris@82 1500 T5h = FNMS(KP425779291, T5g, KP904827052 * T5d);
Chris@82 1501 T5i = T5a + T5h;
Chris@82 1502 T5p = FMA(KP535826794, T5l, KP844327925 * T5o);
Chris@82 1503 T5w = FMA(KP062790519, T5s, KP998026728 * T5v);
Chris@82 1504 T5x = T5p + T5w;
Chris@82 1505 T5y = T5i + T5x;
Chris@82 1506 T7j = T5p - T5w;
Chris@82 1507 T5z = KP559016994 * (T5i - T5x);
Chris@82 1508 T7i = T5a - T5h;
Chris@82 1509 }
Chris@82 1510 {
Chris@82 1511 E T64, T65, T7n, T67, T68, T7o;
Chris@82 1512 T64 = FNMS(KP684547105, T5M, KP728968627 * T5N);
Chris@82 1513 T65 = FMA(KP125333233, T5Q, KP992114701 * T5P);
Chris@82 1514 T7n = T64 - T65;
Chris@82 1515 T67 = FNMS(KP998026728, T5T, KP062790519 * T5U);
Chris@82 1516 T68 = FMA(KP770513242, T5X, KP637423989 * T5W);
Chris@82 1517 T7o = T67 - T68;
Chris@82 1518 T66 = T64 + T65;
Chris@82 1519 T7r = KP559016994 * (T7n - T7o);
Chris@82 1520 T69 = T67 + T68;
Chris@82 1521 T7p = T7n + T7o;
Chris@82 1522 }
Chris@82 1523 {
Chris@82 1524 E T5O, T5R, T5S, T5V, T5Y, T5Z;
Chris@82 1525 T5O = FMA(KP728968627, T5M, KP684547105 * T5N);
Chris@82 1526 T5R = FNMS(KP992114701, T5Q, KP125333233 * T5P);
Chris@82 1527 T5S = T5O + T5R;
Chris@82 1528 T5V = FMA(KP062790519, T5T, KP998026728 * T5U);
Chris@82 1529 T5Y = FNMS(KP637423989, T5X, KP770513242 * T5W);
Chris@82 1530 T5Z = T5V + T5Y;
Chris@82 1531 T60 = T5S + T5Z;
Chris@82 1532 T7v = T5V - T5Y;
Chris@82 1533 T61 = KP559016994 * (T5S - T5Z);
Chris@82 1534 T7u = T5O - T5R;
Chris@82 1535 }
Chris@82 1536 {
Chris@82 1537 E T5C, T5D, T79, T5F, T5G, T7a;
Chris@82 1538 T5C = FNMS(KP481753674, T56, KP876306680 * T59);
Chris@82 1539 T5D = FMA(KP904827052, T5g, KP425779291 * T5d);
Chris@82 1540 T79 = T5C - T5D;
Chris@82 1541 T5F = FNMS(KP844327925, T5l, KP535826794 * T5o);
Chris@82 1542 T5G = FNMS(KP998026728, T5s, KP062790519 * T5v);
Chris@82 1543 T7a = T5F + T5G;
Chris@82 1544 T5E = T5C + T5D;
Chris@82 1545 T7f = KP559016994 * (T79 - T7a);
Chris@82 1546 T5H = T5F - T5G;
Chris@82 1547 T7b = T79 + T7a;
Chris@82 1548 }
Chris@82 1549 ri[WS(rs, 2)] = T53 + T5y;
Chris@82 1550 ii[WS(rs, 2)] = T7b + T7e;
Chris@82 1551 ri[WS(rs, 3)] = T5L + T60;
Chris@82 1552 ii[WS(rs, 3)] = T7p + T7q;
Chris@82 1553 {
Chris@82 1554 E T5I, T5K, T5B, T5J, T5A;
Chris@82 1555 T5I = FMA(KP951056516, T5E, KP587785252 * T5H);
Chris@82 1556 T5K = FNMS(KP587785252, T5E, KP951056516 * T5H);
Chris@82 1557 T5A = FNMS(KP250000000, T5y, T53);
Chris@82 1558 T5B = T5z + T5A;
Chris@82 1559 T5J = T5A - T5z;
Chris@82 1560 ri[WS(rs, 22)] = T5B - T5I;
Chris@82 1561 ri[WS(rs, 17)] = T5J + T5K;
Chris@82 1562 ri[WS(rs, 7)] = T5B + T5I;
Chris@82 1563 ri[WS(rs, 12)] = T5J - T5K;
Chris@82 1564 }
Chris@82 1565 {
Chris@82 1566 E T7k, T7l, T7h, T7m, T7g;
Chris@82 1567 T7k = FMA(KP951056516, T7i, KP587785252 * T7j);
Chris@82 1568 T7l = FNMS(KP587785252, T7i, KP951056516 * T7j);
Chris@82 1569 T7g = FNMS(KP250000000, T7b, T7e);
Chris@82 1570 T7h = T7f + T7g;
Chris@82 1571 T7m = T7g - T7f;
Chris@82 1572 ii[WS(rs, 7)] = T7h - T7k;
Chris@82 1573 ii[WS(rs, 17)] = T7m - T7l;
Chris@82 1574 ii[WS(rs, 22)] = T7k + T7h;
Chris@82 1575 ii[WS(rs, 12)] = T7l + T7m;
Chris@82 1576 }
Chris@82 1577 {
Chris@82 1578 E T6a, T6c, T63, T6b, T62;
Chris@82 1579 T6a = FMA(KP951056516, T66, KP587785252 * T69);
Chris@82 1580 T6c = FNMS(KP587785252, T66, KP951056516 * T69);
Chris@82 1581 T62 = FNMS(KP250000000, T60, T5L);
Chris@82 1582 T63 = T61 + T62;
Chris@82 1583 T6b = T62 - T61;
Chris@82 1584 ri[WS(rs, 23)] = T63 - T6a;
Chris@82 1585 ri[WS(rs, 18)] = T6b + T6c;
Chris@82 1586 ri[WS(rs, 8)] = T63 + T6a;
Chris@82 1587 ri[WS(rs, 13)] = T6b - T6c;
Chris@82 1588 }
Chris@82 1589 {
Chris@82 1590 E T7w, T7x, T7t, T7y, T7s;
Chris@82 1591 T7w = FMA(KP951056516, T7u, KP587785252 * T7v);
Chris@82 1592 T7x = FNMS(KP587785252, T7u, KP951056516 * T7v);
Chris@82 1593 T7s = FNMS(KP250000000, T7p, T7q);
Chris@82 1594 T7t = T7r + T7s;
Chris@82 1595 T7y = T7s - T7r;
Chris@82 1596 ii[WS(rs, 8)] = T7t - T7w;
Chris@82 1597 ii[WS(rs, 18)] = T7y - T7x;
Chris@82 1598 ii[WS(rs, 23)] = T7w + T7t;
Chris@82 1599 ii[WS(rs, 13)] = T7x + T7y;
Chris@82 1600 }
Chris@82 1601 }
Chris@82 1602 }
Chris@82 1603 }
Chris@82 1604 }
Chris@82 1605 }
Chris@82 1606
Chris@82 1607 static const tw_instr twinstr[] = {
Chris@82 1608 {TW_CEXP, 0, 1},
Chris@82 1609 {TW_CEXP, 0, 3},
Chris@82 1610 {TW_CEXP, 0, 9},
Chris@82 1611 {TW_CEXP, 0, 24},
Chris@82 1612 {TW_NEXT, 1, 0}
Chris@82 1613 };
Chris@82 1614
Chris@82 1615 static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {280, 180, 160, 0}, 0, 0, 0 };
Chris@82 1616
Chris@82 1617 void X(codelet_t2_25) (planner *p) {
Chris@82 1618 X(kdft_dit_register) (p, t2_25, &desc);
Chris@82 1619 }
Chris@82 1620 #endif