annotate src/fftw-3.3.8/rdft/scalar/r2cf/hf2_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:38 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -dit -name hf2_25 -include rdft/scalar/hf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 440 FP additions, 434 FP multiplications,
Chris@82 32 * (or, 84 additions, 78 multiplications, 356 fused multiply/add),
Chris@82 33 * 186 stack variables, 47 constants, and 100 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hf.h"
Chris@82 36
Chris@82 37 static void hf2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@82 40 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@82 41 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 42 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@82 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@82 45 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 46 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 47 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 48 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 50 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@82 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@82 52 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 53 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@82 55 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@82 56 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@82 57 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@82 58 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 59 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 60 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 61 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@82 62 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 63 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@82 64 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@82 65 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@82 66 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@82 67 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 68 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 69 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 70 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 71 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@82 72 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@82 73 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@82 74 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@82 75 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@82 76 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@82 77 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 78 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@82 79 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@82 80 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@82 81 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@82 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 86 {
Chris@82 87 INT m;
Chris@82 88 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 89 E T2, T8, T3, T6, Tk, Tm, T5, T7, T19, Tb, T1b, Tc, Tw, TT, T1j;
Chris@82 90 E TE, T2p, T1c, T2U, TI, T11, T15, T2Q, T2M, T2m, T2i, T2e, Tn, Tr, TX;
Chris@82 91 E T31, T35, T1l, T1m, T1q, TA, T1K, T1O, T2a, T27, T1g, T2x, T2t, Th, Td;
Chris@82 92 E T1S, T2X, T1W;
Chris@82 93 {
Chris@82 94 E TS, TD, T2L, T10, TH, T2P, T14, T9, T1a, Tz, TW, T4, Ta, Tv, T1J;
Chris@82 95 E T1N;
Chris@82 96 T2 = W[0];
Chris@82 97 T8 = W[4];
Chris@82 98 T3 = W[2];
Chris@82 99 T6 = W[3];
Chris@82 100 T4 = T2 * T3;
Chris@82 101 TS = T3 * T8;
Chris@82 102 Ta = T2 * T6;
Chris@82 103 Tv = T2 * T8;
Chris@82 104 Tk = W[6];
Chris@82 105 TD = T8 * Tk;
Chris@82 106 T2L = T2 * Tk;
Chris@82 107 T10 = T3 * Tk;
Chris@82 108 Tm = W[7];
Chris@82 109 TH = T8 * Tm;
Chris@82 110 T2P = T2 * Tm;
Chris@82 111 T14 = T3 * Tm;
Chris@82 112 T5 = W[1];
Chris@82 113 T7 = FNMS(T5, T6, T4);
Chris@82 114 T19 = FMA(T5, T6, T4);
Chris@82 115 T9 = T7 * T8;
Chris@82 116 T1a = T19 * T8;
Chris@82 117 Tb = FMA(T5, T3, Ta);
Chris@82 118 T1b = FNMS(T5, T3, Ta);
Chris@82 119 Tc = W[5];
Chris@82 120 Tz = T2 * Tc;
Chris@82 121 TW = T3 * Tc;
Chris@82 122 Tw = FNMS(T5, Tc, Tv);
Chris@82 123 TT = FMA(T6, Tc, TS);
Chris@82 124 T1j = FMA(T5, Tc, Tv);
Chris@82 125 TE = FMA(Tc, Tm, TD);
Chris@82 126 T2p = FMA(T6, T8, TW);
Chris@82 127 T1c = FNMS(T1b, Tc, T1a);
Chris@82 128 T2U = FNMS(Tb, Tc, T9);
Chris@82 129 TI = FNMS(Tc, Tk, TH);
Chris@82 130 T11 = FMA(T6, Tm, T10);
Chris@82 131 T15 = FNMS(T6, Tk, T14);
Chris@82 132 T2Q = FNMS(T5, Tk, T2P);
Chris@82 133 T2M = FMA(T5, Tm, T2L);
Chris@82 134 {
Chris@82 135 E T2h, T2d, Tl, Tq;
Chris@82 136 T2m = FNMS(T6, Tc, TS);
Chris@82 137 T2h = T19 * Tm;
Chris@82 138 T2i = FNMS(T1b, Tk, T2h);
Chris@82 139 T2d = T19 * Tk;
Chris@82 140 T2e = FMA(T1b, Tm, T2d);
Chris@82 141 Tl = T7 * Tk;
Chris@82 142 Tn = FMA(Tb, Tm, Tl);
Chris@82 143 Tq = T7 * Tm;
Chris@82 144 Tr = FNMS(Tb, Tk, Tq);
Chris@82 145 }
Chris@82 146 {
Chris@82 147 E T30, T34, T1k, T1p;
Chris@82 148 T30 = TT * Tk;
Chris@82 149 T34 = TT * Tm;
Chris@82 150 TX = FNMS(T6, T8, TW);
Chris@82 151 T31 = FMA(TX, Tm, T30);
Chris@82 152 T35 = FNMS(TX, Tk, T34);
Chris@82 153 T1k = T1j * Tk;
Chris@82 154 T1p = T1j * Tm;
Chris@82 155 T1l = FNMS(T5, T8, Tz);
Chris@82 156 T1m = FMA(T1l, Tm, T1k);
Chris@82 157 T1q = FNMS(T1l, Tk, T1p);
Chris@82 158 }
Chris@82 159 T1J = Tw * Tk;
Chris@82 160 T1N = Tw * Tm;
Chris@82 161 TA = FMA(T5, T8, Tz);
Chris@82 162 T1K = FMA(TA, Tm, T1J);
Chris@82 163 T1O = FNMS(TA, Tk, T1N);
Chris@82 164 {
Chris@82 165 E T1f, T2s, T2w, Tg, T1R, T1V;
Chris@82 166 T1f = T19 * Tc;
Chris@82 167 T2a = FNMS(T1b, T8, T1f);
Chris@82 168 T27 = FMA(T1b, Tc, T1a);
Chris@82 169 T2s = T27 * Tk;
Chris@82 170 T2w = T27 * Tm;
Chris@82 171 T1g = FMA(T1b, T8, T1f);
Chris@82 172 T2x = FNMS(T2a, Tk, T2w);
Chris@82 173 T2t = FMA(T2a, Tm, T2s);
Chris@82 174 Tg = T7 * Tc;
Chris@82 175 Th = FNMS(Tb, T8, Tg);
Chris@82 176 Td = FMA(Tb, Tc, T9);
Chris@82 177 T1R = Td * Tk;
Chris@82 178 T1V = Td * Tm;
Chris@82 179 T1S = FMA(Th, Tm, T1R);
Chris@82 180 T2X = FMA(Tb, T8, Tg);
Chris@82 181 T1W = FNMS(Th, Tk, T1V);
Chris@82 182 }
Chris@82 183 }
Chris@82 184 {
Chris@82 185 E T1, T7l, T4s, T6a, T7u, T7U, TM, T4f, T4g, T7o, T7p, T7q, T4z, T6n, T4G;
Chris@82 186 E T6k, T3a, T6m, T4w, T4a, T6j, T4D, T54, T6C, T5b, T6z, T1v, T6y, T58, T3t;
Chris@82 187 E T6B, T51, T5j, T6v, T5q, T6s, T21, T6r, T5n, T3H, T6u, T5g, T4O, T6d, T4V;
Chris@82 188 E T6g, T2C, T6f, T4S, T3W, T6c, T4L;
Chris@82 189 {
Chris@82 190 E Tj, T4j, TK, T4q, TC, T4o, Tt, T4l;
Chris@82 191 T1 = cr[0];
Chris@82 192 T7l = ci[0];
Chris@82 193 {
Chris@82 194 E Te, Tf, Ti, T4i;
Chris@82 195 Te = cr[WS(rs, 5)];
Chris@82 196 Tf = Td * Te;
Chris@82 197 Ti = ci[WS(rs, 5)];
Chris@82 198 T4i = Td * Ti;
Chris@82 199 Tj = FMA(Th, Ti, Tf);
Chris@82 200 T4j = FNMS(Th, Te, T4i);
Chris@82 201 }
Chris@82 202 {
Chris@82 203 E TF, TG, TJ, T4p;
Chris@82 204 TF = cr[WS(rs, 15)];
Chris@82 205 TG = TE * TF;
Chris@82 206 TJ = ci[WS(rs, 15)];
Chris@82 207 T4p = TE * TJ;
Chris@82 208 TK = FMA(TI, TJ, TG);
Chris@82 209 T4q = FNMS(TI, TF, T4p);
Chris@82 210 }
Chris@82 211 {
Chris@82 212 E Tx, Ty, TB, T4n;
Chris@82 213 Tx = cr[WS(rs, 10)];
Chris@82 214 Ty = Tw * Tx;
Chris@82 215 TB = ci[WS(rs, 10)];
Chris@82 216 T4n = Tw * TB;
Chris@82 217 TC = FMA(TA, TB, Ty);
Chris@82 218 T4o = FNMS(TA, Tx, T4n);
Chris@82 219 }
Chris@82 220 {
Chris@82 221 E To, Tp, Ts, T4k;
Chris@82 222 To = cr[WS(rs, 20)];
Chris@82 223 Tp = Tn * To;
Chris@82 224 Ts = ci[WS(rs, 20)];
Chris@82 225 T4k = Tn * Ts;
Chris@82 226 Tt = FMA(Tr, Ts, Tp);
Chris@82 227 T4l = FNMS(Tr, To, T4k);
Chris@82 228 }
Chris@82 229 {
Chris@82 230 E T4m, T4r, T7s, T7t;
Chris@82 231 T4m = T4j - T4l;
Chris@82 232 T4r = T4o - T4q;
Chris@82 233 T4s = FMA(KP618033988, T4r, T4m);
Chris@82 234 T6a = FNMS(KP618033988, T4m, T4r);
Chris@82 235 T7s = TC - TK;
Chris@82 236 T7t = Tj - Tt;
Chris@82 237 T7u = FNMS(KP618033988, T7t, T7s);
Chris@82 238 T7U = FMA(KP618033988, T7s, T7t);
Chris@82 239 }
Chris@82 240 {
Chris@82 241 E Tu, TL, T7m, T7n;
Chris@82 242 Tu = Tj + Tt;
Chris@82 243 TL = TC + TK;
Chris@82 244 TM = Tu + TL;
Chris@82 245 T4f = FNMS(KP250000000, TM, T1);
Chris@82 246 T4g = Tu - TL;
Chris@82 247 T7m = T4j + T4l;
Chris@82 248 T7n = T4o + T4q;
Chris@82 249 T7o = T7m + T7n;
Chris@82 250 T7p = FNMS(KP250000000, T7o, T7l);
Chris@82 251 T7q = T7m - T7n;
Chris@82 252 }
Chris@82 253 }
Chris@82 254 {
Chris@82 255 E T2G, T3Y, T2Z, T37, T38, T45, T47, T48, T2K, T2S, T2T, T40, T42, T43;
Chris@82 256 {
Chris@82 257 E T2D, T2E, T2F, T3X;
Chris@82 258 T2D = cr[WS(rs, 3)];
Chris@82 259 T2E = T3 * T2D;
Chris@82 260 T2F = ci[WS(rs, 3)];
Chris@82 261 T3X = T3 * T2F;
Chris@82 262 T2G = FMA(T6, T2F, T2E);
Chris@82 263 T3Y = FNMS(T6, T2D, T3X);
Chris@82 264 }
Chris@82 265 {
Chris@82 266 E T2V, T2W, T2Y, T44, T32, T33, T36, T46;
Chris@82 267 T2V = cr[WS(rs, 13)];
Chris@82 268 T2W = T2U * T2V;
Chris@82 269 T2Y = ci[WS(rs, 13)];
Chris@82 270 T44 = T2U * T2Y;
Chris@82 271 T32 = cr[WS(rs, 18)];
Chris@82 272 T33 = T31 * T32;
Chris@82 273 T36 = ci[WS(rs, 18)];
Chris@82 274 T46 = T31 * T36;
Chris@82 275 T2Z = FMA(T2X, T2Y, T2W);
Chris@82 276 T37 = FMA(T35, T36, T33);
Chris@82 277 T38 = T2Z + T37;
Chris@82 278 T45 = FNMS(T2X, T2V, T44);
Chris@82 279 T47 = FNMS(T35, T32, T46);
Chris@82 280 T48 = T45 + T47;
Chris@82 281 }
Chris@82 282 {
Chris@82 283 E T2H, T2I, T2J, T3Z, T2N, T2O, T2R, T41;
Chris@82 284 T2H = cr[WS(rs, 8)];
Chris@82 285 T2I = T1j * T2H;
Chris@82 286 T2J = ci[WS(rs, 8)];
Chris@82 287 T3Z = T1j * T2J;
Chris@82 288 T2N = cr[WS(rs, 23)];
Chris@82 289 T2O = T2M * T2N;
Chris@82 290 T2R = ci[WS(rs, 23)];
Chris@82 291 T41 = T2M * T2R;
Chris@82 292 T2K = FMA(T1l, T2J, T2I);
Chris@82 293 T2S = FMA(T2Q, T2R, T2O);
Chris@82 294 T2T = T2K + T2S;
Chris@82 295 T40 = FNMS(T1l, T2H, T3Z);
Chris@82 296 T42 = FNMS(T2Q, T2N, T41);
Chris@82 297 T43 = T40 + T42;
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T4x, T4y, T4E, T4F;
Chris@82 301 T4x = T42 - T40;
Chris@82 302 T4y = T47 - T45;
Chris@82 303 T4z = FMA(KP618033988, T4y, T4x);
Chris@82 304 T6n = FNMS(KP618033988, T4x, T4y);
Chris@82 305 T4E = T2K - T2S;
Chris@82 306 T4F = T2Z - T37;
Chris@82 307 T4G = FMA(KP618033988, T4F, T4E);
Chris@82 308 T6k = FNMS(KP618033988, T4E, T4F);
Chris@82 309 }
Chris@82 310 {
Chris@82 311 E T4v, T39, T4u, T4C, T49, T4B;
Chris@82 312 T4v = T38 - T2T;
Chris@82 313 T39 = T2T + T38;
Chris@82 314 T4u = FNMS(KP250000000, T39, T2G);
Chris@82 315 T3a = T2G + T39;
Chris@82 316 T6m = FMA(KP559016994, T4v, T4u);
Chris@82 317 T4w = FNMS(KP559016994, T4v, T4u);
Chris@82 318 T4C = T48 - T43;
Chris@82 319 T49 = T43 + T48;
Chris@82 320 T4B = FNMS(KP250000000, T49, T3Y);
Chris@82 321 T4a = T3Y + T49;
Chris@82 322 T6j = FMA(KP559016994, T4C, T4B);
Chris@82 323 T4D = FNMS(KP559016994, T4C, T4B);
Chris@82 324 }
Chris@82 325 }
Chris@82 326 {
Chris@82 327 E TR, T3h, T1i, T1s, T1t, T3o, T3q, T3r, TZ, T17, T18, T3j, T3l, T3m;
Chris@82 328 {
Chris@82 329 E TO, TP, TQ, T3g;
Chris@82 330 TO = cr[WS(rs, 1)];
Chris@82 331 TP = T2 * TO;
Chris@82 332 TQ = ci[WS(rs, 1)];
Chris@82 333 T3g = T2 * TQ;
Chris@82 334 TR = FMA(T5, TQ, TP);
Chris@82 335 T3h = FNMS(T5, TO, T3g);
Chris@82 336 }
Chris@82 337 {
Chris@82 338 E T1d, T1e, T1h, T3n, T1n, T1o, T1r, T3p;
Chris@82 339 T1d = cr[WS(rs, 11)];
Chris@82 340 T1e = T1c * T1d;
Chris@82 341 T1h = ci[WS(rs, 11)];
Chris@82 342 T3n = T1c * T1h;
Chris@82 343 T1n = cr[WS(rs, 16)];
Chris@82 344 T1o = T1m * T1n;
Chris@82 345 T1r = ci[WS(rs, 16)];
Chris@82 346 T3p = T1m * T1r;
Chris@82 347 T1i = FMA(T1g, T1h, T1e);
Chris@82 348 T1s = FMA(T1q, T1r, T1o);
Chris@82 349 T1t = T1i + T1s;
Chris@82 350 T3o = FNMS(T1g, T1d, T3n);
Chris@82 351 T3q = FNMS(T1q, T1n, T3p);
Chris@82 352 T3r = T3o + T3q;
Chris@82 353 }
Chris@82 354 {
Chris@82 355 E TU, TV, TY, T3i, T12, T13, T16, T3k;
Chris@82 356 TU = cr[WS(rs, 6)];
Chris@82 357 TV = TT * TU;
Chris@82 358 TY = ci[WS(rs, 6)];
Chris@82 359 T3i = TT * TY;
Chris@82 360 T12 = cr[WS(rs, 21)];
Chris@82 361 T13 = T11 * T12;
Chris@82 362 T16 = ci[WS(rs, 21)];
Chris@82 363 T3k = T11 * T16;
Chris@82 364 TZ = FMA(TX, TY, TV);
Chris@82 365 T17 = FMA(T15, T16, T13);
Chris@82 366 T18 = TZ + T17;
Chris@82 367 T3j = FNMS(TX, TU, T3i);
Chris@82 368 T3l = FNMS(T15, T12, T3k);
Chris@82 369 T3m = T3j + T3l;
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E T52, T53, T59, T5a;
Chris@82 373 T52 = T17 - TZ;
Chris@82 374 T53 = T1s - T1i;
Chris@82 375 T54 = FMA(KP618033988, T53, T52);
Chris@82 376 T6C = FNMS(KP618033988, T52, T53);
Chris@82 377 T59 = T3j - T3l;
Chris@82 378 T5a = T3q - T3o;
Chris@82 379 T5b = FNMS(KP618033988, T5a, T59);
Chris@82 380 T6z = FMA(KP618033988, T59, T5a);
Chris@82 381 }
Chris@82 382 {
Chris@82 383 E T57, T1u, T56, T50, T3s, T4Z;
Chris@82 384 T57 = T18 - T1t;
Chris@82 385 T1u = T18 + T1t;
Chris@82 386 T56 = FNMS(KP250000000, T1u, TR);
Chris@82 387 T1v = TR + T1u;
Chris@82 388 T6y = FNMS(KP559016994, T57, T56);
Chris@82 389 T58 = FMA(KP559016994, T57, T56);
Chris@82 390 T50 = T3m - T3r;
Chris@82 391 T3s = T3m + T3r;
Chris@82 392 T4Z = FNMS(KP250000000, T3s, T3h);
Chris@82 393 T3t = T3h + T3s;
Chris@82 394 T6B = FNMS(KP559016994, T50, T4Z);
Chris@82 395 T51 = FMA(KP559016994, T50, T4Z);
Chris@82 396 }
Chris@82 397 }
Chris@82 398 {
Chris@82 399 E T1z, T3v, T1Q, T1Y, T1Z, T3C, T3E, T3F, T1D, T1H, T1I, T3x, T3z, T3A;
Chris@82 400 {
Chris@82 401 E T1w, T1x, T1y, T3u;
Chris@82 402 T1w = cr[WS(rs, 4)];
Chris@82 403 T1x = T7 * T1w;
Chris@82 404 T1y = ci[WS(rs, 4)];
Chris@82 405 T3u = T7 * T1y;
Chris@82 406 T1z = FMA(Tb, T1y, T1x);
Chris@82 407 T3v = FNMS(Tb, T1w, T3u);
Chris@82 408 }
Chris@82 409 {
Chris@82 410 E T1L, T1M, T1P, T3B, T1T, T1U, T1X, T3D;
Chris@82 411 T1L = cr[WS(rs, 14)];
Chris@82 412 T1M = T1K * T1L;
Chris@82 413 T1P = ci[WS(rs, 14)];
Chris@82 414 T3B = T1K * T1P;
Chris@82 415 T1T = cr[WS(rs, 19)];
Chris@82 416 T1U = T1S * T1T;
Chris@82 417 T1X = ci[WS(rs, 19)];
Chris@82 418 T3D = T1S * T1X;
Chris@82 419 T1Q = FMA(T1O, T1P, T1M);
Chris@82 420 T1Y = FMA(T1W, T1X, T1U);
Chris@82 421 T1Z = T1Q + T1Y;
Chris@82 422 T3C = FNMS(T1O, T1L, T3B);
Chris@82 423 T3E = FNMS(T1W, T1T, T3D);
Chris@82 424 T3F = T3C + T3E;
Chris@82 425 }
Chris@82 426 {
Chris@82 427 E T1A, T1B, T1C, T3w, T1E, T1F, T1G, T3y;
Chris@82 428 T1A = cr[WS(rs, 9)];
Chris@82 429 T1B = T8 * T1A;
Chris@82 430 T1C = ci[WS(rs, 9)];
Chris@82 431 T3w = T8 * T1C;
Chris@82 432 T1E = cr[WS(rs, 24)];
Chris@82 433 T1F = Tk * T1E;
Chris@82 434 T1G = ci[WS(rs, 24)];
Chris@82 435 T3y = Tk * T1G;
Chris@82 436 T1D = FMA(Tc, T1C, T1B);
Chris@82 437 T1H = FMA(Tm, T1G, T1F);
Chris@82 438 T1I = T1D + T1H;
Chris@82 439 T3x = FNMS(Tc, T1A, T3w);
Chris@82 440 T3z = FNMS(Tm, T1E, T3y);
Chris@82 441 T3A = T3x + T3z;
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E T5h, T5i, T5o, T5p;
Chris@82 445 T5h = T1H - T1D;
Chris@82 446 T5i = T1Y - T1Q;
Chris@82 447 T5j = FMA(KP618033988, T5i, T5h);
Chris@82 448 T6v = FNMS(KP618033988, T5h, T5i);
Chris@82 449 T5o = T3z - T3x;
Chris@82 450 T5p = T3E - T3C;
Chris@82 451 T5q = FMA(KP618033988, T5p, T5o);
Chris@82 452 T6s = FNMS(KP618033988, T5o, T5p);
Chris@82 453 }
Chris@82 454 {
Chris@82 455 E T5m, T20, T5l, T5f, T3G, T5e;
Chris@82 456 T5m = T1I - T1Z;
Chris@82 457 T20 = T1I + T1Z;
Chris@82 458 T5l = FNMS(KP250000000, T20, T1z);
Chris@82 459 T21 = T1z + T20;
Chris@82 460 T6r = FNMS(KP559016994, T5m, T5l);
Chris@82 461 T5n = FMA(KP559016994, T5m, T5l);
Chris@82 462 T5f = T3F - T3A;
Chris@82 463 T3G = T3A + T3F;
Chris@82 464 T5e = FNMS(KP250000000, T3G, T3v);
Chris@82 465 T3H = T3v + T3G;
Chris@82 466 T6u = FMA(KP559016994, T5f, T5e);
Chris@82 467 T5g = FNMS(KP559016994, T5f, T5e);
Chris@82 468 }
Chris@82 469 }
Chris@82 470 {
Chris@82 471 E T26, T3K, T2r, T2z, T2A, T3R, T3T, T3U, T2c, T2k, T2l, T3M, T3O, T3P;
Chris@82 472 {
Chris@82 473 E T23, T24, T25, T3J;
Chris@82 474 T23 = cr[WS(rs, 2)];
Chris@82 475 T24 = T19 * T23;
Chris@82 476 T25 = ci[WS(rs, 2)];
Chris@82 477 T3J = T19 * T25;
Chris@82 478 T26 = FMA(T1b, T25, T24);
Chris@82 479 T3K = FNMS(T1b, T23, T3J);
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E T2n, T2o, T2q, T3Q, T2u, T2v, T2y, T3S;
Chris@82 483 T2n = cr[WS(rs, 12)];
Chris@82 484 T2o = T2m * T2n;
Chris@82 485 T2q = ci[WS(rs, 12)];
Chris@82 486 T3Q = T2m * T2q;
Chris@82 487 T2u = cr[WS(rs, 17)];
Chris@82 488 T2v = T2t * T2u;
Chris@82 489 T2y = ci[WS(rs, 17)];
Chris@82 490 T3S = T2t * T2y;
Chris@82 491 T2r = FMA(T2p, T2q, T2o);
Chris@82 492 T2z = FMA(T2x, T2y, T2v);
Chris@82 493 T2A = T2r + T2z;
Chris@82 494 T3R = FNMS(T2p, T2n, T3Q);
Chris@82 495 T3T = FNMS(T2x, T2u, T3S);
Chris@82 496 T3U = T3R + T3T;
Chris@82 497 }
Chris@82 498 {
Chris@82 499 E T28, T29, T2b, T3L, T2f, T2g, T2j, T3N;
Chris@82 500 T28 = cr[WS(rs, 7)];
Chris@82 501 T29 = T27 * T28;
Chris@82 502 T2b = ci[WS(rs, 7)];
Chris@82 503 T3L = T27 * T2b;
Chris@82 504 T2f = cr[WS(rs, 22)];
Chris@82 505 T2g = T2e * T2f;
Chris@82 506 T2j = ci[WS(rs, 22)];
Chris@82 507 T3N = T2e * T2j;
Chris@82 508 T2c = FMA(T2a, T2b, T29);
Chris@82 509 T2k = FMA(T2i, T2j, T2g);
Chris@82 510 T2l = T2c + T2k;
Chris@82 511 T3M = FNMS(T2a, T28, T3L);
Chris@82 512 T3O = FNMS(T2i, T2f, T3N);
Chris@82 513 T3P = T3M + T3O;
Chris@82 514 }
Chris@82 515 {
Chris@82 516 E T4M, T4N, T4T, T4U;
Chris@82 517 T4M = T2k - T2c;
Chris@82 518 T4N = T2z - T2r;
Chris@82 519 T4O = FMA(KP618033988, T4N, T4M);
Chris@82 520 T6d = FNMS(KP618033988, T4M, T4N);
Chris@82 521 T4T = T3O - T3M;
Chris@82 522 T4U = T3R - T3T;
Chris@82 523 T4V = FNMS(KP618033988, T4U, T4T);
Chris@82 524 T6g = FMA(KP618033988, T4T, T4U);
Chris@82 525 }
Chris@82 526 {
Chris@82 527 E T4R, T2B, T4Q, T4K, T3V, T4J;
Chris@82 528 T4R = T2A - T2l;
Chris@82 529 T2B = T2l + T2A;
Chris@82 530 T4Q = FNMS(KP250000000, T2B, T26);
Chris@82 531 T2C = T26 + T2B;
Chris@82 532 T6f = FMA(KP559016994, T4R, T4Q);
Chris@82 533 T4S = FNMS(KP559016994, T4R, T4Q);
Chris@82 534 T4K = T3U - T3P;
Chris@82 535 T3V = T3P + T3U;
Chris@82 536 T4J = FNMS(KP250000000, T3V, T3K);
Chris@82 537 T3W = T3K + T3V;
Chris@82 538 T6c = FMA(KP559016994, T4K, T4J);
Chris@82 539 T4L = FNMS(KP559016994, T4K, T4J);
Chris@82 540 }
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T4c, T4e, TN, T3c, T3d, T3e, T4d, T3f;
Chris@82 544 {
Chris@82 545 E T3I, T4b, T22, T3b;
Chris@82 546 T3I = T3t - T3H;
Chris@82 547 T4b = T3W - T4a;
Chris@82 548 T4c = FMA(KP618033988, T4b, T3I);
Chris@82 549 T4e = FNMS(KP618033988, T3I, T4b);
Chris@82 550 TN = T1 + TM;
Chris@82 551 T22 = T1v + T21;
Chris@82 552 T3b = T2C + T3a;
Chris@82 553 T3c = T22 + T3b;
Chris@82 554 T3d = FNMS(KP250000000, T3c, TN);
Chris@82 555 T3e = T22 - T3b;
Chris@82 556 }
Chris@82 557 cr[0] = TN + T3c;
Chris@82 558 T4d = FNMS(KP559016994, T3e, T3d);
Chris@82 559 cr[WS(rs, 10)] = FNMS(KP951056516, T4e, T4d);
Chris@82 560 ci[WS(rs, 9)] = FMA(KP951056516, T4e, T4d);
Chris@82 561 T3f = FMA(KP559016994, T3e, T3d);
Chris@82 562 ci[WS(rs, 4)] = FNMS(KP951056516, T4c, T3f);
Chris@82 563 cr[WS(rs, 5)] = FMA(KP951056516, T4c, T3f);
Chris@82 564 }
Chris@82 565 {
Chris@82 566 E T4t, T5H, T7V, T87, T5A, T5D, T8f, T8e, T88, T89, T8a, T4Y, T5t, T5u, T62;
Chris@82 567 E T65, T83, T82, T7W, T7X, T7Y, T5O, T5V, T5W, T4h, T7T;
Chris@82 568 T4h = FMA(KP559016994, T4g, T4f);
Chris@82 569 T4t = FNMS(KP951056516, T4s, T4h);
Chris@82 570 T5H = FMA(KP951056516, T4s, T4h);
Chris@82 571 T7T = FMA(KP559016994, T7q, T7p);
Chris@82 572 T7V = FNMS(KP951056516, T7U, T7T);
Chris@82 573 T87 = FMA(KP951056516, T7U, T7T);
Chris@82 574 {
Chris@82 575 E T4I, T5B, T5s, T5z, T4X, T5C, T5d, T5y;
Chris@82 576 {
Chris@82 577 E T4A, T4H, T5k, T5r;
Chris@82 578 T4A = FMA(KP951056516, T4z, T4w);
Chris@82 579 T4H = FMA(KP951056516, T4G, T4D);
Chris@82 580 T4I = FNMS(KP126329378, T4H, T4A);
Chris@82 581 T5B = FMA(KP126329378, T4A, T4H);
Chris@82 582 T5k = FNMS(KP951056516, T5j, T5g);
Chris@82 583 T5r = FMA(KP951056516, T5q, T5n);
Chris@82 584 T5s = FMA(KP827271945, T5r, T5k);
Chris@82 585 T5z = FNMS(KP827271945, T5k, T5r);
Chris@82 586 }
Chris@82 587 {
Chris@82 588 E T4P, T4W, T55, T5c;
Chris@82 589 T4P = FNMS(KP951056516, T4O, T4L);
Chris@82 590 T4W = FMA(KP951056516, T4V, T4S);
Chris@82 591 T4X = FNMS(KP470564281, T4W, T4P);
Chris@82 592 T5C = FMA(KP470564281, T4P, T4W);
Chris@82 593 T55 = FNMS(KP951056516, T54, T51);
Chris@82 594 T5c = FNMS(KP951056516, T5b, T58);
Chris@82 595 T5d = FMA(KP634619297, T5c, T55);
Chris@82 596 T5y = FNMS(KP634619297, T55, T5c);
Chris@82 597 }
Chris@82 598 T5A = FMA(KP912575812, T5z, T5y);
Chris@82 599 T5D = FNMS(KP912018591, T5C, T5B);
Chris@82 600 T8f = FMA(KP912575812, T5s, T5d);
Chris@82 601 T8e = FMA(KP912018591, T4X, T4I);
Chris@82 602 T88 = FMA(KP912018591, T5C, T5B);
Chris@82 603 T89 = FNMS(KP912575812, T5z, T5y);
Chris@82 604 T8a = FMA(KP851038619, T89, T88);
Chris@82 605 T4Y = FNMS(KP912018591, T4X, T4I);
Chris@82 606 T5t = FNMS(KP912575812, T5s, T5d);
Chris@82 607 T5u = FNMS(KP851038619, T5t, T4Y);
Chris@82 608 }
Chris@82 609 {
Chris@82 610 E T5K, T60, T5U, T64, T5N, T61, T5R, T63;
Chris@82 611 {
Chris@82 612 E T5I, T5J, T5S, T5T;
Chris@82 613 T5I = FMA(KP951056516, T5b, T58);
Chris@82 614 T5J = FMA(KP951056516, T54, T51);
Chris@82 615 T5K = FMA(KP256756360, T5J, T5I);
Chris@82 616 T60 = FNMS(KP256756360, T5I, T5J);
Chris@82 617 T5S = FNMS(KP951056516, T4z, T4w);
Chris@82 618 T5T = FNMS(KP951056516, T4G, T4D);
Chris@82 619 T5U = FMA(KP939062505, T5T, T5S);
Chris@82 620 T64 = FNMS(KP939062505, T5S, T5T);
Chris@82 621 }
Chris@82 622 {
Chris@82 623 E T5L, T5M, T5P, T5Q;
Chris@82 624 T5L = FMA(KP951056516, T5j, T5g);
Chris@82 625 T5M = FNMS(KP951056516, T5q, T5n);
Chris@82 626 T5N = FMA(KP634619297, T5M, T5L);
Chris@82 627 T61 = FNMS(KP634619297, T5L, T5M);
Chris@82 628 T5P = FNMS(KP951056516, T4V, T4S);
Chris@82 629 T5Q = FMA(KP951056516, T4O, T4L);
Chris@82 630 T5R = FMA(KP549754652, T5Q, T5P);
Chris@82 631 T63 = FNMS(KP549754652, T5P, T5Q);
Chris@82 632 }
Chris@82 633 T62 = FMA(KP871714437, T61, T60);
Chris@82 634 T65 = FNMS(KP831864738, T64, T63);
Chris@82 635 T83 = FNMS(KP871714437, T5N, T5K);
Chris@82 636 T82 = FNMS(KP831864738, T5U, T5R);
Chris@82 637 T7W = FNMS(KP871714437, T61, T60);
Chris@82 638 T7X = FMA(KP831864738, T64, T63);
Chris@82 639 T7Y = FMA(KP904730450, T7X, T7W);
Chris@82 640 T5O = FMA(KP871714437, T5N, T5K);
Chris@82 641 T5V = FMA(KP831864738, T5U, T5R);
Chris@82 642 T5W = FMA(KP904730450, T5V, T5O);
Chris@82 643 }
Chris@82 644 cr[WS(rs, 4)] = FNMS(KP992114701, T5u, T4t);
Chris@82 645 ci[WS(rs, 23)] = FMA(KP968583161, T7Y, T7V);
Chris@82 646 ci[WS(rs, 20)] = FNMS(KP992114701, T8a, T87);
Chris@82 647 cr[WS(rs, 1)] = FMA(KP968583161, T5W, T5H);
Chris@82 648 {
Chris@82 649 E T5E, T5G, T5x, T5F, T5v, T5w;
Chris@82 650 T5E = FNMS(KP726211448, T5D, T5A);
Chris@82 651 T5G = FMA(KP525970792, T5A, T5D);
Chris@82 652 T5v = FMA(KP248028675, T5u, T4t);
Chris@82 653 T5w = FMA(KP851038619, T5t, T4Y);
Chris@82 654 T5x = FMA(KP554608978, T5w, T5v);
Chris@82 655 T5F = FNMS(KP554608978, T5w, T5v);
Chris@82 656 cr[WS(rs, 9)] = FNMS(KP803003575, T5E, T5x);
Chris@82 657 ci[WS(rs, 5)] = FMA(KP943557151, T5G, T5F);
Chris@82 658 ci[0] = FMA(KP803003575, T5E, T5x);
Chris@82 659 ci[WS(rs, 10)] = FNMS(KP943557151, T5G, T5F);
Chris@82 660 }
Chris@82 661 {
Chris@82 662 E T84, T86, T81, T85, T7Z, T80;
Chris@82 663 T84 = FNMS(KP683113946, T83, T82);
Chris@82 664 T86 = FMA(KP559154169, T82, T83);
Chris@82 665 T7Z = FNMS(KP242145790, T7Y, T7V);
Chris@82 666 T80 = FNMS(KP904730450, T7X, T7W);
Chris@82 667 T81 = FNMS(KP541454447, T80, T7Z);
Chris@82 668 T85 = FMA(KP541454447, T80, T7Z);
Chris@82 669 cr[WS(rs, 16)] = FMS(KP833417178, T84, T81);
Chris@82 670 ci[WS(rs, 18)] = FNMS(KP921177326, T86, T85);
Chris@82 671 ci[WS(rs, 13)] = FMA(KP833417178, T84, T81);
Chris@82 672 cr[WS(rs, 21)] = -(FMA(KP921177326, T86, T85));
Chris@82 673 }
Chris@82 674 {
Chris@82 675 E T8g, T8i, T8d, T8h, T8b, T8c;
Chris@82 676 T8g = FNMS(KP525970792, T8f, T8e);
Chris@82 677 T8i = FMA(KP726211448, T8e, T8f);
Chris@82 678 T8b = FMA(KP248028675, T8a, T87);
Chris@82 679 T8c = FNMS(KP851038619, T89, T88);
Chris@82 680 T8d = FNMS(KP554608978, T8c, T8b);
Chris@82 681 T8h = FMA(KP554608978, T8c, T8b);
Chris@82 682 cr[WS(rs, 14)] = -(FMA(KP943557151, T8g, T8d));
Chris@82 683 ci[WS(rs, 15)] = FNMS(KP803003575, T8i, T8h);
Chris@82 684 cr[WS(rs, 19)] = FMS(KP943557151, T8g, T8d);
Chris@82 685 cr[WS(rs, 24)] = -(FMA(KP803003575, T8i, T8h));
Chris@82 686 }
Chris@82 687 {
Chris@82 688 E T66, T68, T5Z, T67, T5X, T5Y;
Chris@82 689 T66 = FMA(KP559154169, T65, T62);
Chris@82 690 T68 = FNMS(KP683113946, T62, T65);
Chris@82 691 T5X = FNMS(KP242145790, T5W, T5H);
Chris@82 692 T5Y = FNMS(KP904730450, T5V, T5O);
Chris@82 693 T5Z = FMA(KP541454447, T5Y, T5X);
Chris@82 694 T67 = FNMS(KP541454447, T5Y, T5X);
Chris@82 695 ci[WS(rs, 3)] = FNMS(KP921177326, T66, T5Z);
Chris@82 696 ci[WS(rs, 8)] = FMA(KP833417178, T68, T67);
Chris@82 697 cr[WS(rs, 6)] = FMA(KP921177326, T66, T5Z);
Chris@82 698 cr[WS(rs, 11)] = FNMS(KP833417178, T68, T67);
Chris@82 699 }
Chris@82 700 }
Chris@82 701 {
Chris@82 702 E T8s, T8u, T8j, T8m, T8n, T8o, T8t, T8p;
Chris@82 703 {
Chris@82 704 E T8q, T8r, T8k, T8l;
Chris@82 705 T8q = T2C - T3a;
Chris@82 706 T8r = T21 - T1v;
Chris@82 707 T8s = FMA(KP618033988, T8r, T8q);
Chris@82 708 T8u = FNMS(KP618033988, T8q, T8r);
Chris@82 709 T8j = T7o + T7l;
Chris@82 710 T8k = T3t + T3H;
Chris@82 711 T8l = T3W + T4a;
Chris@82 712 T8m = T8k + T8l;
Chris@82 713 T8n = FNMS(KP250000000, T8m, T8j);
Chris@82 714 T8o = T8k - T8l;
Chris@82 715 }
Chris@82 716 ci[WS(rs, 24)] = T8m + T8j;
Chris@82 717 T8t = FMA(KP559016994, T8o, T8n);
Chris@82 718 cr[WS(rs, 20)] = FMS(KP951056516, T8u, T8t);
Chris@82 719 ci[WS(rs, 19)] = FMA(KP951056516, T8u, T8t);
Chris@82 720 T8p = FNMS(KP559016994, T8o, T8n);
Chris@82 721 cr[WS(rs, 15)] = FMS(KP951056516, T8s, T8p);
Chris@82 722 ci[WS(rs, 14)] = FMA(KP951056516, T8s, T8p);
Chris@82 723 }
Chris@82 724 {
Chris@82 725 E T6b, T6T, T7v, T7H, T6M, T6P, T7P, T7O, T7I, T7J, T7K, T6q, T6F, T6G, T7e;
Chris@82 726 E T7h, T7D, T7C, T7w, T7x, T7y, T70, T77, T78, T69, T7r;
Chris@82 727 T69 = FNMS(KP559016994, T4g, T4f);
Chris@82 728 T6b = FMA(KP951056516, T6a, T69);
Chris@82 729 T6T = FNMS(KP951056516, T6a, T69);
Chris@82 730 T7r = FNMS(KP559016994, T7q, T7p);
Chris@82 731 T7v = FMA(KP951056516, T7u, T7r);
Chris@82 732 T7H = FNMS(KP951056516, T7u, T7r);
Chris@82 733 {
Chris@82 734 E T6i, T6N, T6E, T6L, T6p, T6O, T6x, T6K;
Chris@82 735 {
Chris@82 736 E T6e, T6h, T6A, T6D;
Chris@82 737 T6e = FMA(KP951056516, T6d, T6c);
Chris@82 738 T6h = FMA(KP951056516, T6g, T6f);
Chris@82 739 T6i = FMA(KP062914667, T6h, T6e);
Chris@82 740 T6N = FNMS(KP062914667, T6e, T6h);
Chris@82 741 T6A = FNMS(KP951056516, T6z, T6y);
Chris@82 742 T6D = FMA(KP951056516, T6C, T6B);
Chris@82 743 T6E = FMA(KP939062505, T6D, T6A);
Chris@82 744 T6L = FNMS(KP939062505, T6A, T6D);
Chris@82 745 }
Chris@82 746 {
Chris@82 747 E T6l, T6o, T6t, T6w;
Chris@82 748 T6l = FNMS(KP951056516, T6k, T6j);
Chris@82 749 T6o = FNMS(KP951056516, T6n, T6m);
Chris@82 750 T6p = FNMS(KP827271945, T6o, T6l);
Chris@82 751 T6O = FMA(KP827271945, T6l, T6o);
Chris@82 752 T6t = FNMS(KP951056516, T6s, T6r);
Chris@82 753 T6w = FMA(KP951056516, T6v, T6u);
Chris@82 754 T6x = FNMS(KP126329378, T6w, T6t);
Chris@82 755 T6K = FMA(KP126329378, T6t, T6w);
Chris@82 756 }
Chris@82 757 T6M = FMA(KP734762448, T6L, T6K);
Chris@82 758 T6P = FNMS(KP772036680, T6O, T6N);
Chris@82 759 T7P = FMA(KP734762448, T6E, T6x);
Chris@82 760 T7O = FNMS(KP772036680, T6p, T6i);
Chris@82 761 T7I = FMA(KP772036680, T6O, T6N);
Chris@82 762 T7J = FNMS(KP734762448, T6L, T6K);
Chris@82 763 T7K = FMA(KP994076283, T7J, T7I);
Chris@82 764 T6q = FMA(KP772036680, T6p, T6i);
Chris@82 765 T6F = FNMS(KP734762448, T6E, T6x);
Chris@82 766 T6G = FNMS(KP994076283, T6F, T6q);
Chris@82 767 }
Chris@82 768 {
Chris@82 769 E T6W, T7f, T76, T7d, T6Z, T7g, T73, T7c;
Chris@82 770 {
Chris@82 771 E T6U, T6V, T74, T75;
Chris@82 772 T6U = FMA(KP951056516, T6k, T6j);
Chris@82 773 T6V = FMA(KP951056516, T6n, T6m);
Chris@82 774 T6W = FMA(KP062914667, T6V, T6U);
Chris@82 775 T7f = FNMS(KP062914667, T6U, T6V);
Chris@82 776 T74 = FMA(KP951056516, T6z, T6y);
Chris@82 777 T75 = FNMS(KP951056516, T6C, T6B);
Chris@82 778 T76 = FMA(KP549754652, T75, T74);
Chris@82 779 T7d = FNMS(KP549754652, T74, T75);
Chris@82 780 }
Chris@82 781 {
Chris@82 782 E T6X, T6Y, T71, T72;
Chris@82 783 T6X = FNMS(KP951056516, T6d, T6c);
Chris@82 784 T6Y = FNMS(KP951056516, T6g, T6f);
Chris@82 785 T6Z = FMA(KP634619297, T6Y, T6X);
Chris@82 786 T7g = FNMS(KP634619297, T6X, T6Y);
Chris@82 787 T71 = FNMS(KP951056516, T6v, T6u);
Chris@82 788 T72 = FMA(KP951056516, T6s, T6r);
Chris@82 789 T73 = FNMS(KP470564281, T72, T71);
Chris@82 790 T7c = FMA(KP470564281, T71, T72);
Chris@82 791 }
Chris@82 792 T7e = FMA(KP968479752, T7d, T7c);
Chris@82 793 T7h = FNMS(KP845997307, T7g, T7f);
Chris@82 794 T7D = FNMS(KP968479752, T76, T73);
Chris@82 795 T7C = FNMS(KP845997307, T6Z, T6W);
Chris@82 796 T7w = FMA(KP845997307, T7g, T7f);
Chris@82 797 T7x = FNMS(KP968479752, T7d, T7c);
Chris@82 798 T7y = FMA(KP906616052, T7x, T7w);
Chris@82 799 T70 = FMA(KP845997307, T6Z, T6W);
Chris@82 800 T77 = FMA(KP968479752, T76, T73);
Chris@82 801 T78 = FMA(KP906616052, T77, T70);
Chris@82 802 }
Chris@82 803 cr[WS(rs, 3)] = FMA(KP998026728, T6G, T6b);
Chris@82 804 ci[WS(rs, 22)] = FNMS(KP998026728, T7y, T7v);
Chris@82 805 ci[WS(rs, 21)] = FNMS(KP998026728, T7K, T7H);
Chris@82 806 cr[WS(rs, 2)] = FMA(KP998026728, T78, T6T);
Chris@82 807 {
Chris@82 808 E T7E, T7G, T7B, T7F, T7z, T7A;
Chris@82 809 T7E = FNMS(KP560319534, T7D, T7C);
Chris@82 810 T7G = FMA(KP681693190, T7C, T7D);
Chris@82 811 T7z = FMA(KP249506682, T7y, T7v);
Chris@82 812 T7A = FNMS(KP906616052, T7x, T7w);
Chris@82 813 T7B = FNMS(KP557913902, T7A, T7z);
Chris@82 814 T7F = FMA(KP557913902, T7A, T7z);
Chris@82 815 cr[WS(rs, 17)] = -(FMA(KP949179823, T7E, T7B));
Chris@82 816 ci[WS(rs, 17)] = FMA(KP860541664, T7G, T7F);
Chris@82 817 ci[WS(rs, 12)] = FNMS(KP949179823, T7E, T7B);
Chris@82 818 cr[WS(rs, 22)] = FMS(KP860541664, T7G, T7F);
Chris@82 819 }
Chris@82 820 {
Chris@82 821 E T7i, T7k, T7b, T7j, T79, T7a;
Chris@82 822 T7i = FMA(KP681693190, T7h, T7e);
Chris@82 823 T7k = FNMS(KP560319534, T7e, T7h);
Chris@82 824 T79 = FNMS(KP249506682, T78, T6T);
Chris@82 825 T7a = FNMS(KP906616052, T77, T70);
Chris@82 826 T7b = FNMS(KP557913902, T7a, T79);
Chris@82 827 T7j = FMA(KP557913902, T7a, T79);
Chris@82 828 ci[WS(rs, 2)] = FNMS(KP860541664, T7i, T7b);
Chris@82 829 cr[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j);
Chris@82 830 cr[WS(rs, 7)] = FMA(KP860541664, T7i, T7b);
Chris@82 831 ci[WS(rs, 7)] = FMA(KP949179823, T7k, T7j);
Chris@82 832 }
Chris@82 833 {
Chris@82 834 E T6Q, T6S, T6J, T6R, T6H, T6I;
Chris@82 835 T6Q = FNMS(KP621716863, T6P, T6M);
Chris@82 836 T6S = FMA(KP614372930, T6M, T6P);
Chris@82 837 T6H = FNMS(KP249506682, T6G, T6b);
Chris@82 838 T6I = FMA(KP994076283, T6F, T6q);
Chris@82 839 T6J = FNMS(KP557913902, T6I, T6H);
Chris@82 840 T6R = FMA(KP557913902, T6I, T6H);
Chris@82 841 ci[WS(rs, 1)] = FNMS(KP943557151, T6Q, T6J);
Chris@82 842 ci[WS(rs, 11)] = FMA(KP949179823, T6S, T6R);
Chris@82 843 cr[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J);
Chris@82 844 ci[WS(rs, 6)] = FNMS(KP949179823, T6S, T6R);
Chris@82 845 }
Chris@82 846 {
Chris@82 847 E T7Q, T7S, T7N, T7R, T7L, T7M;
Chris@82 848 T7Q = FNMS(KP614372930, T7P, T7O);
Chris@82 849 T7S = FMA(KP621716863, T7O, T7P);
Chris@82 850 T7L = FMA(KP249506682, T7K, T7H);
Chris@82 851 T7M = FNMS(KP994076283, T7J, T7I);
Chris@82 852 T7N = FNMS(KP557913902, T7M, T7L);
Chris@82 853 T7R = FMA(KP557913902, T7M, T7L);
Chris@82 854 cr[WS(rs, 13)] = -(FMA(KP949179823, T7Q, T7N));
Chris@82 855 ci[WS(rs, 16)] = FNMS(KP943557151, T7S, T7R);
Chris@82 856 cr[WS(rs, 18)] = FMS(KP949179823, T7Q, T7N);
Chris@82 857 cr[WS(rs, 23)] = -(FMA(KP943557151, T7S, T7R));
Chris@82 858 }
Chris@82 859 }
Chris@82 860 }
Chris@82 861 }
Chris@82 862 }
Chris@82 863 }
Chris@82 864
Chris@82 865 static const tw_instr twinstr[] = {
Chris@82 866 {TW_CEXP, 1, 1},
Chris@82 867 {TW_CEXP, 1, 3},
Chris@82 868 {TW_CEXP, 1, 9},
Chris@82 869 {TW_CEXP, 1, 24},
Chris@82 870 {TW_NEXT, 1, 0}
Chris@82 871 };
Chris@82 872
Chris@82 873 static const hc2hc_desc desc = { 25, "hf2_25", twinstr, &GENUS, {84, 78, 356, 0} };
Chris@82 874
Chris@82 875 void X(codelet_hf2_25) (planner *p) {
Chris@82 876 X(khc2hc_register) (p, hf2_25, &desc);
Chris@82 877 }
Chris@82 878 #else
Chris@82 879
Chris@82 880 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -dit -name hf2_25 -include rdft/scalar/hf.h */
Chris@82 881
Chris@82 882 /*
Chris@82 883 * This function contains 440 FP additions, 340 FP multiplications,
Chris@82 884 * (or, 280 additions, 180 multiplications, 160 fused multiply/add),
Chris@82 885 * 149 stack variables, 20 constants, and 100 memory accesses
Chris@82 886 */
Chris@82 887 #include "rdft/scalar/hf.h"
Chris@82 888
Chris@82 889 static void hf2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 890 {
Chris@82 891 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 892 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 893 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 894 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 895 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 896 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 897 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 898 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 899 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 900 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 901 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 902 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 903 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 904 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 905 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 906 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 907 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 908 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 909 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 910 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 911 {
Chris@82 912 INT m;
Chris@82 913 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 914 E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g;
Chris@82 915 E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr;
Chris@82 916 E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V;
Chris@82 917 E T1X, T1z, T1j;
Chris@82 918 {
Chris@82 919 E Tw, TT, Tz, TQ, Tv, TU, TA, TP;
Chris@82 920 {
Chris@82 921 E T4, Tc, T7, Tb;
Chris@82 922 T2 = W[0];
Chris@82 923 T5 = W[1];
Chris@82 924 T3 = W[2];
Chris@82 925 T6 = W[3];
Chris@82 926 T4 = T2 * T3;
Chris@82 927 Tc = T5 * T3;
Chris@82 928 T7 = T5 * T6;
Chris@82 929 Tb = T2 * T6;
Chris@82 930 T8 = T4 - T7;
Chris@82 931 Td = Tb + Tc;
Chris@82 932 T16 = Tb - Tc;
Chris@82 933 T14 = T4 + T7;
Chris@82 934 Te = W[5];
Chris@82 935 Tw = T5 * Te;
Chris@82 936 TT = T3 * Te;
Chris@82 937 Tz = T2 * Te;
Chris@82 938 TQ = T6 * Te;
Chris@82 939 T9 = W[4];
Chris@82 940 Tv = T2 * T9;
Chris@82 941 TU = T6 * T9;
Chris@82 942 TA = T5 * T9;
Chris@82 943 TP = T3 * T9;
Chris@82 944 }
Chris@82 945 T21 = TP - TQ;
Chris@82 946 T23 = TT + TU;
Chris@82 947 {
Chris@82 948 E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj;
Chris@82 949 Tx = Tv - Tw;
Chris@82 950 TR = TP + TQ;
Chris@82 951 T1g = Tz - TA;
Chris@82 952 TB = Tz + TA;
Chris@82 953 T1f = Tv + Tw;
Chris@82 954 TV = TT - TU;
Chris@82 955 T15 = T14 * T9;
Chris@82 956 T17 = T16 * Te;
Chris@82 957 T1Q = T15 + T17;
Chris@82 958 Ta = T8 * T9;
Chris@82 959 Tf = Td * Te;
Chris@82 960 Tg = Ta + Tf;
Chris@82 961 T1a = T14 * Te;
Chris@82 962 T1b = T16 * T9;
Chris@82 963 T1S = T1a - T1b;
Chris@82 964 Ti = T8 * Te;
Chris@82 965 Tj = Td * T9;
Chris@82 966 Tk = Ti - Tj;
Chris@82 967 T18 = T15 - T17;
Chris@82 968 T2s = Ti + Tj;
Chris@82 969 T1c = T1a + T1b;
Chris@82 970 T2q = Ta - Tf;
Chris@82 971 Tn = W[6];
Chris@82 972 To = W[7];
Chris@82 973 Tp = FMA(T8, Tn, Td * To);
Chris@82 974 Tr = FNMS(Td, Tn, T8 * To);
Chris@82 975 T28 = FNMS(T1S, Tn, T1Q * To);
Chris@82 976 T2x = FNMS(TV, Tn, TR * To);
Chris@82 977 TY = FMA(T3, Tn, T6 * To);
Chris@82 978 T2k = FMA(T2, Tn, T5 * To);
Chris@82 979 T2m = FNMS(T5, Tn, T2 * To);
Chris@82 980 T2v = FMA(TR, Tn, TV * To);
Chris@82 981 TG = FNMS(Te, Tn, T9 * To);
Chris@82 982 TE = FMA(T9, Tn, Te * To);
Chris@82 983 T10 = FNMS(T6, Tn, T3 * To);
Chris@82 984 T1h = FMA(T1f, Tn, T1g * To);
Chris@82 985 T1E = FMA(Tg, Tn, Tk * To);
Chris@82 986 T26 = FMA(T1Q, Tn, T1S * To);
Chris@82 987 T1B = FNMS(TB, Tn, Tx * To);
Chris@82 988 T1G = FNMS(Tk, Tn, Tg * To);
Chris@82 989 T1V = FMA(T14, Tn, T16 * To);
Chris@82 990 T1X = FNMS(T16, Tn, T14 * To);
Chris@82 991 T1z = FMA(Tx, Tn, TB * To);
Chris@82 992 T1j = FNMS(T1g, Tn, T1f * To);
Chris@82 993 }
Chris@82 994 }
Chris@82 995 {
Chris@82 996 E T1, T6v, T2F, T6A, TK, T2G, T6y, T6z, T6u, T71, T2O, T52, T2C, T6k, T4c;
Chris@82 997 E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N;
Chris@82 998 E T4B, T59, T1L, T6h, T3r, T5P, T4E, T5d, T3y, T5Q, T4D, T5g, T2d, T6j, T3P;
Chris@82 999 E T5U, T4I, T5o, T3W, T5T, T4H, T5l;
Chris@82 1000 {
Chris@82 1001 E Tm, T2I, Tt, T2J, Tu, T6w, TD, T2L, TI, T2M, TJ, T6x;
Chris@82 1002 T1 = cr[0];
Chris@82 1003 T6v = ci[0];
Chris@82 1004 {
Chris@82 1005 E Th, Tl, Tq, Ts;
Chris@82 1006 Th = cr[WS(rs, 5)];
Chris@82 1007 Tl = ci[WS(rs, 5)];
Chris@82 1008 Tm = FMA(Tg, Th, Tk * Tl);
Chris@82 1009 T2I = FNMS(Tk, Th, Tg * Tl);
Chris@82 1010 Tq = cr[WS(rs, 20)];
Chris@82 1011 Ts = ci[WS(rs, 20)];
Chris@82 1012 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 1013 T2J = FNMS(Tr, Tq, Tp * Ts);
Chris@82 1014 }
Chris@82 1015 Tu = Tm + Tt;
Chris@82 1016 T6w = T2I + T2J;
Chris@82 1017 {
Chris@82 1018 E Ty, TC, TF, TH;
Chris@82 1019 Ty = cr[WS(rs, 10)];
Chris@82 1020 TC = ci[WS(rs, 10)];
Chris@82 1021 TD = FMA(Tx, Ty, TB * TC);
Chris@82 1022 T2L = FNMS(TB, Ty, Tx * TC);
Chris@82 1023 TF = cr[WS(rs, 15)];
Chris@82 1024 TH = ci[WS(rs, 15)];
Chris@82 1025 TI = FMA(TE, TF, TG * TH);
Chris@82 1026 T2M = FNMS(TG, TF, TE * TH);
Chris@82 1027 }
Chris@82 1028 TJ = TD + TI;
Chris@82 1029 T6x = T2L + T2M;
Chris@82 1030 T2F = KP559016994 * (Tu - TJ);
Chris@82 1031 T6A = KP559016994 * (T6w - T6x);
Chris@82 1032 TK = Tu + TJ;
Chris@82 1033 T2G = FNMS(KP250000000, TK, T1);
Chris@82 1034 T6y = T6w + T6x;
Chris@82 1035 T6z = FNMS(KP250000000, T6y, T6v);
Chris@82 1036 {
Chris@82 1037 E T6s, T6t, T2K, T2N;
Chris@82 1038 T6s = TD - TI;
Chris@82 1039 T6t = Tm - Tt;
Chris@82 1040 T6u = FNMS(KP587785252, T6t, KP951056516 * T6s);
Chris@82 1041 T71 = FMA(KP951056516, T6t, KP587785252 * T6s);
Chris@82 1042 T2K = T2I - T2J;
Chris@82 1043 T2N = T2L - T2M;
Chris@82 1044 T2O = FMA(KP951056516, T2K, KP587785252 * T2N);
Chris@82 1045 T52 = FNMS(KP587785252, T2K, KP951056516 * T2N);
Chris@82 1046 }
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E T2g, T48, T3Y, T3Z, T4h, T4g, T43, T46, T49, T2p, T2A, T2B, T2e, T2f;
Chris@82 1050 T2e = cr[WS(rs, 3)];
Chris@82 1051 T2f = ci[WS(rs, 3)];
Chris@82 1052 T2g = FMA(T3, T2e, T6 * T2f);
Chris@82 1053 T48 = FNMS(T6, T2e, T3 * T2f);
Chris@82 1054 {
Chris@82 1055 E T2j, T41, T2z, T45, T2o, T42, T2u, T44;
Chris@82 1056 {
Chris@82 1057 E T2h, T2i, T2w, T2y;
Chris@82 1058 T2h = cr[WS(rs, 8)];
Chris@82 1059 T2i = ci[WS(rs, 8)];
Chris@82 1060 T2j = FMA(T1f, T2h, T1g * T2i);
Chris@82 1061 T41 = FNMS(T1g, T2h, T1f * T2i);
Chris@82 1062 T2w = cr[WS(rs, 18)];
Chris@82 1063 T2y = ci[WS(rs, 18)];
Chris@82 1064 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@82 1065 T45 = FNMS(T2x, T2w, T2v * T2y);
Chris@82 1066 }
Chris@82 1067 {
Chris@82 1068 E T2l, T2n, T2r, T2t;
Chris@82 1069 T2l = cr[WS(rs, 23)];
Chris@82 1070 T2n = ci[WS(rs, 23)];
Chris@82 1071 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@82 1072 T42 = FNMS(T2m, T2l, T2k * T2n);
Chris@82 1073 T2r = cr[WS(rs, 13)];
Chris@82 1074 T2t = ci[WS(rs, 13)];
Chris@82 1075 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@82 1076 T44 = FNMS(T2s, T2r, T2q * T2t);
Chris@82 1077 }
Chris@82 1078 T3Y = T2j - T2o;
Chris@82 1079 T3Z = T2u - T2z;
Chris@82 1080 T4h = T44 - T45;
Chris@82 1081 T4g = T41 - T42;
Chris@82 1082 T43 = T41 + T42;
Chris@82 1083 T46 = T44 + T45;
Chris@82 1084 T49 = T43 + T46;
Chris@82 1085 T2p = T2j + T2o;
Chris@82 1086 T2A = T2u + T2z;
Chris@82 1087 T2B = T2p + T2A;
Chris@82 1088 }
Chris@82 1089 T2C = T2g + T2B;
Chris@82 1090 T6k = T48 + T49;
Chris@82 1091 {
Chris@82 1092 E T40, T5r, T4b, T5q, T47, T4a;
Chris@82 1093 T40 = FMA(KP951056516, T3Y, KP587785252 * T3Z);
Chris@82 1094 T5r = FNMS(KP587785252, T3Y, KP951056516 * T3Z);
Chris@82 1095 T47 = KP559016994 * (T43 - T46);
Chris@82 1096 T4a = FNMS(KP250000000, T49, T48);
Chris@82 1097 T4b = T47 + T4a;
Chris@82 1098 T5q = T4a - T47;
Chris@82 1099 T4c = T40 + T4b;
Chris@82 1100 T5X = T5r + T5q;
Chris@82 1101 T4L = T4b - T40;
Chris@82 1102 T5s = T5q - T5r;
Chris@82 1103 }
Chris@82 1104 {
Chris@82 1105 E T4i, T5u, T4f, T5t, T4d, T4e;
Chris@82 1106 T4i = FMA(KP951056516, T4g, KP587785252 * T4h);
Chris@82 1107 T5u = FNMS(KP587785252, T4g, KP951056516 * T4h);
Chris@82 1108 T4d = KP559016994 * (T2p - T2A);
Chris@82 1109 T4e = FNMS(KP250000000, T2B, T2g);
Chris@82 1110 T4f = T4d + T4e;
Chris@82 1111 T5t = T4e - T4d;
Chris@82 1112 T4j = T4f - T4i;
Chris@82 1113 T5W = T5t - T5u;
Chris@82 1114 T4K = T4f + T4i;
Chris@82 1115 T5v = T5t + T5u;
Chris@82 1116 }
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E TO, T37, T2V, T2Y, T32, T31, T34, T35, T38, T13, T1m, T1n, TM, TN;
Chris@82 1120 TM = cr[WS(rs, 1)];
Chris@82 1121 TN = ci[WS(rs, 1)];
Chris@82 1122 TO = FMA(T2, TM, T5 * TN);
Chris@82 1123 T37 = FNMS(T5, TM, T2 * TN);
Chris@82 1124 {
Chris@82 1125 E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W;
Chris@82 1126 {
Chris@82 1127 E TS, TW, T1i, T1k;
Chris@82 1128 TS = cr[WS(rs, 6)];
Chris@82 1129 TW = ci[WS(rs, 6)];
Chris@82 1130 TX = FMA(TR, TS, TV * TW);
Chris@82 1131 T2T = FNMS(TV, TS, TR * TW);
Chris@82 1132 T1i = cr[WS(rs, 16)];
Chris@82 1133 T1k = ci[WS(rs, 16)];
Chris@82 1134 T1l = FMA(T1h, T1i, T1j * T1k);
Chris@82 1135 T2X = FNMS(T1j, T1i, T1h * T1k);
Chris@82 1136 }
Chris@82 1137 {
Chris@82 1138 E TZ, T11, T19, T1d;
Chris@82 1139 TZ = cr[WS(rs, 21)];
Chris@82 1140 T11 = ci[WS(rs, 21)];
Chris@82 1141 T12 = FMA(TY, TZ, T10 * T11);
Chris@82 1142 T2U = FNMS(T10, TZ, TY * T11);
Chris@82 1143 T19 = cr[WS(rs, 11)];
Chris@82 1144 T1d = ci[WS(rs, 11)];
Chris@82 1145 T1e = FMA(T18, T19, T1c * T1d);
Chris@82 1146 T2W = FNMS(T1c, T19, T18 * T1d);
Chris@82 1147 }
Chris@82 1148 T2V = T2T - T2U;
Chris@82 1149 T2Y = T2W - T2X;
Chris@82 1150 T32 = T1e - T1l;
Chris@82 1151 T31 = TX - T12;
Chris@82 1152 T34 = T2T + T2U;
Chris@82 1153 T35 = T2W + T2X;
Chris@82 1154 T38 = T34 + T35;
Chris@82 1155 T13 = TX + T12;
Chris@82 1156 T1m = T1e + T1l;
Chris@82 1157 T1n = T13 + T1m;
Chris@82 1158 }
Chris@82 1159 T1o = TO + T1n;
Chris@82 1160 T6g = T37 + T38;
Chris@82 1161 {
Chris@82 1162 E T2Z, T55, T2S, T54, T2Q, T2R;
Chris@82 1163 T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y);
Chris@82 1164 T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y);
Chris@82 1165 T2Q = KP559016994 * (T13 - T1m);
Chris@82 1166 T2R = FNMS(KP250000000, T1n, TO);
Chris@82 1167 T2S = T2Q + T2R;
Chris@82 1168 T54 = T2R - T2Q;
Chris@82 1169 T30 = T2S - T2Z;
Chris@82 1170 T5M = T54 - T55;
Chris@82 1171 T4A = T2S + T2Z;
Chris@82 1172 T56 = T54 + T55;
Chris@82 1173 }
Chris@82 1174 {
Chris@82 1175 E T33, T58, T3a, T57, T36, T39;
Chris@82 1176 T33 = FMA(KP951056516, T31, KP587785252 * T32);
Chris@82 1177 T58 = FNMS(KP587785252, T31, KP951056516 * T32);
Chris@82 1178 T36 = KP559016994 * (T34 - T35);
Chris@82 1179 T39 = FNMS(KP250000000, T38, T37);
Chris@82 1180 T3a = T36 + T39;
Chris@82 1181 T57 = T39 - T36;
Chris@82 1182 T3b = T33 + T3a;
Chris@82 1183 T5N = T58 + T57;
Chris@82 1184 T4B = T3a - T33;
Chris@82 1185 T59 = T57 - T58;
Chris@82 1186 }
Chris@82 1187 }
Chris@82 1188 {
Chris@82 1189 E T1r, T3n, T3d, T3e, T3w, T3v, T3i, T3l, T3o, T1y, T1J, T1K, T1p, T1q;
Chris@82 1190 T1p = cr[WS(rs, 4)];
Chris@82 1191 T1q = ci[WS(rs, 4)];
Chris@82 1192 T1r = FMA(T8, T1p, Td * T1q);
Chris@82 1193 T3n = FNMS(Td, T1p, T8 * T1q);
Chris@82 1194 {
Chris@82 1195 E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j;
Chris@82 1196 {
Chris@82 1197 E T1s, T1t, T1F, T1H;
Chris@82 1198 T1s = cr[WS(rs, 9)];
Chris@82 1199 T1t = ci[WS(rs, 9)];
Chris@82 1200 T1u = FMA(T9, T1s, Te * T1t);
Chris@82 1201 T3g = FNMS(Te, T1s, T9 * T1t);
Chris@82 1202 T1F = cr[WS(rs, 19)];
Chris@82 1203 T1H = ci[WS(rs, 19)];
Chris@82 1204 T1I = FMA(T1E, T1F, T1G * T1H);
Chris@82 1205 T3k = FNMS(T1G, T1F, T1E * T1H);
Chris@82 1206 }
Chris@82 1207 {
Chris@82 1208 E T1v, T1w, T1A, T1C;
Chris@82 1209 T1v = cr[WS(rs, 24)];
Chris@82 1210 T1w = ci[WS(rs, 24)];
Chris@82 1211 T1x = FMA(Tn, T1v, To * T1w);
Chris@82 1212 T3h = FNMS(To, T1v, Tn * T1w);
Chris@82 1213 T1A = cr[WS(rs, 14)];
Chris@82 1214 T1C = ci[WS(rs, 14)];
Chris@82 1215 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@82 1216 T3j = FNMS(T1B, T1A, T1z * T1C);
Chris@82 1217 }
Chris@82 1218 T3d = T1x - T1u;
Chris@82 1219 T3e = T1D - T1I;
Chris@82 1220 T3w = T3j - T3k;
Chris@82 1221 T3v = T3g - T3h;
Chris@82 1222 T3i = T3g + T3h;
Chris@82 1223 T3l = T3j + T3k;
Chris@82 1224 T3o = T3i + T3l;
Chris@82 1225 T1y = T1u + T1x;
Chris@82 1226 T1J = T1D + T1I;
Chris@82 1227 T1K = T1y + T1J;
Chris@82 1228 }
Chris@82 1229 T1L = T1r + T1K;
Chris@82 1230 T6h = T3n + T3o;
Chris@82 1231 {
Chris@82 1232 E T3f, T5c, T3q, T5b, T3m, T3p;
Chris@82 1233 T3f = FNMS(KP587785252, T3e, KP951056516 * T3d);
Chris@82 1234 T5c = FMA(KP587785252, T3d, KP951056516 * T3e);
Chris@82 1235 T3m = KP559016994 * (T3i - T3l);
Chris@82 1236 T3p = FNMS(KP250000000, T3o, T3n);
Chris@82 1237 T3q = T3m + T3p;
Chris@82 1238 T5b = T3p - T3m;
Chris@82 1239 T3r = T3f - T3q;
Chris@82 1240 T5P = T5c + T5b;
Chris@82 1241 T4E = T3f + T3q;
Chris@82 1242 T5d = T5b - T5c;
Chris@82 1243 }
Chris@82 1244 {
Chris@82 1245 E T3x, T5f, T3u, T5e, T3s, T3t;
Chris@82 1246 T3x = FMA(KP951056516, T3v, KP587785252 * T3w);
Chris@82 1247 T5f = FNMS(KP587785252, T3v, KP951056516 * T3w);
Chris@82 1248 T3s = KP559016994 * (T1y - T1J);
Chris@82 1249 T3t = FNMS(KP250000000, T1K, T1r);
Chris@82 1250 T3u = T3s + T3t;
Chris@82 1251 T5e = T3t - T3s;
Chris@82 1252 T3y = T3u - T3x;
Chris@82 1253 T5Q = T5e - T5f;
Chris@82 1254 T4D = T3u + T3x;
Chris@82 1255 T5g = T5e + T5f;
Chris@82 1256 }
Chris@82 1257 }
Chris@82 1258 {
Chris@82 1259 E T1P, T3L, T3B, T3C, T3U, T3T, T3G, T3J, T3M, T20, T2b, T2c, T1N, T1O;
Chris@82 1260 T1N = cr[WS(rs, 2)];
Chris@82 1261 T1O = ci[WS(rs, 2)];
Chris@82 1262 T1P = FMA(T14, T1N, T16 * T1O);
Chris@82 1263 T3L = FNMS(T16, T1N, T14 * T1O);
Chris@82 1264 {
Chris@82 1265 E T1U, T3E, T2a, T3I, T1Z, T3F, T25, T3H;
Chris@82 1266 {
Chris@82 1267 E T1R, T1T, T27, T29;
Chris@82 1268 T1R = cr[WS(rs, 7)];
Chris@82 1269 T1T = ci[WS(rs, 7)];
Chris@82 1270 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@82 1271 T3E = FNMS(T1S, T1R, T1Q * T1T);
Chris@82 1272 T27 = cr[WS(rs, 17)];
Chris@82 1273 T29 = ci[WS(rs, 17)];
Chris@82 1274 T2a = FMA(T26, T27, T28 * T29);
Chris@82 1275 T3I = FNMS(T28, T27, T26 * T29);
Chris@82 1276 }
Chris@82 1277 {
Chris@82 1278 E T1W, T1Y, T22, T24;
Chris@82 1279 T1W = cr[WS(rs, 22)];
Chris@82 1280 T1Y = ci[WS(rs, 22)];
Chris@82 1281 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@82 1282 T3F = FNMS(T1X, T1W, T1V * T1Y);
Chris@82 1283 T22 = cr[WS(rs, 12)];
Chris@82 1284 T24 = ci[WS(rs, 12)];
Chris@82 1285 T25 = FMA(T21, T22, T23 * T24);
Chris@82 1286 T3H = FNMS(T23, T22, T21 * T24);
Chris@82 1287 }
Chris@82 1288 T3B = T1U - T1Z;
Chris@82 1289 T3C = T25 - T2a;
Chris@82 1290 T3U = T3H - T3I;
Chris@82 1291 T3T = T3E - T3F;
Chris@82 1292 T3G = T3E + T3F;
Chris@82 1293 T3J = T3H + T3I;
Chris@82 1294 T3M = T3G + T3J;
Chris@82 1295 T20 = T1U + T1Z;
Chris@82 1296 T2b = T25 + T2a;
Chris@82 1297 T2c = T20 + T2b;
Chris@82 1298 }
Chris@82 1299 T2d = T1P + T2c;
Chris@82 1300 T6j = T3L + T3M;
Chris@82 1301 {
Chris@82 1302 E T3D, T5n, T3O, T5m, T3K, T3N;
Chris@82 1303 T3D = FMA(KP951056516, T3B, KP587785252 * T3C);
Chris@82 1304 T5n = FNMS(KP587785252, T3B, KP951056516 * T3C);
Chris@82 1305 T3K = KP559016994 * (T3G - T3J);
Chris@82 1306 T3N = FNMS(KP250000000, T3M, T3L);
Chris@82 1307 T3O = T3K + T3N;
Chris@82 1308 T5m = T3N - T3K;
Chris@82 1309 T3P = T3D + T3O;
Chris@82 1310 T5U = T5n + T5m;
Chris@82 1311 T4I = T3O - T3D;
Chris@82 1312 T5o = T5m - T5n;
Chris@82 1313 }
Chris@82 1314 {
Chris@82 1315 E T3V, T5k, T3S, T5j, T3Q, T3R;
Chris@82 1316 T3V = FMA(KP951056516, T3T, KP587785252 * T3U);
Chris@82 1317 T5k = FNMS(KP587785252, T3T, KP951056516 * T3U);
Chris@82 1318 T3Q = KP559016994 * (T20 - T2b);
Chris@82 1319 T3R = FNMS(KP250000000, T2c, T1P);
Chris@82 1320 T3S = T3Q + T3R;
Chris@82 1321 T5j = T3R - T3Q;
Chris@82 1322 T3W = T3S - T3V;
Chris@82 1323 T5T = T5j - T5k;
Chris@82 1324 T4H = T3S + T3V;
Chris@82 1325 T5l = T5j + T5k;
Chris@82 1326 }
Chris@82 1327 }
Chris@82 1328 {
Chris@82 1329 E T6m, T6o, TL, T2E, T6d, T6e, T6n, T6f;
Chris@82 1330 {
Chris@82 1331 E T6i, T6l, T1M, T2D;
Chris@82 1332 T6i = T6g - T6h;
Chris@82 1333 T6l = T6j - T6k;
Chris@82 1334 T6m = FMA(KP951056516, T6i, KP587785252 * T6l);
Chris@82 1335 T6o = FNMS(KP587785252, T6i, KP951056516 * T6l);
Chris@82 1336 TL = T1 + TK;
Chris@82 1337 T1M = T1o + T1L;
Chris@82 1338 T2D = T2d + T2C;
Chris@82 1339 T2E = T1M + T2D;
Chris@82 1340 T6d = KP559016994 * (T1M - T2D);
Chris@82 1341 T6e = FNMS(KP250000000, T2E, TL);
Chris@82 1342 }
Chris@82 1343 cr[0] = TL + T2E;
Chris@82 1344 T6n = T6e - T6d;
Chris@82 1345 cr[WS(rs, 10)] = T6n - T6o;
Chris@82 1346 ci[WS(rs, 9)] = T6n + T6o;
Chris@82 1347 T6f = T6d + T6e;
Chris@82 1348 ci[WS(rs, 4)] = T6f - T6m;
Chris@82 1349 cr[WS(rs, 5)] = T6f + T6m;
Chris@82 1350 }
Chris@82 1351 {
Chris@82 1352 E T2P, T4z, T72, T7e, T4m, T7j, T4n, T7i, T4U, T77, T4X, T75, T4O, T6Y, T4P;
Chris@82 1353 E T6X, T4s, T7f, T4v, T7d, T2H, T70;
Chris@82 1354 T2H = T2F + T2G;
Chris@82 1355 T2P = T2H - T2O;
Chris@82 1356 T4z = T2H + T2O;
Chris@82 1357 T70 = T6A + T6z;
Chris@82 1358 T72 = T70 - T71;
Chris@82 1359 T7e = T71 + T70;
Chris@82 1360 {
Chris@82 1361 E T3c, T3z, T3A, T3X, T4k, T4l;
Chris@82 1362 T3c = FMA(KP535826794, T30, KP844327925 * T3b);
Chris@82 1363 T3z = FNMS(KP637423989, T3y, KP770513242 * T3r);
Chris@82 1364 T3A = T3c + T3z;
Chris@82 1365 T3X = FNMS(KP425779291, T3W, KP904827052 * T3P);
Chris@82 1366 T4k = FNMS(KP992114701, T4j, KP125333233 * T4c);
Chris@82 1367 T4l = T3X + T4k;
Chris@82 1368 T4m = T3A + T4l;
Chris@82 1369 T7j = T3X - T4k;
Chris@82 1370 T4n = KP559016994 * (T3A - T4l);
Chris@82 1371 T7i = T3z - T3c;
Chris@82 1372 }
Chris@82 1373 {
Chris@82 1374 E T4S, T4T, T73, T4V, T4W, T74;
Chris@82 1375 T4S = FNMS(KP248689887, T4A, KP968583161 * T4B);
Chris@82 1376 T4T = FNMS(KP844327925, T4D, KP535826794 * T4E);
Chris@82 1377 T73 = T4S + T4T;
Chris@82 1378 T4V = FNMS(KP481753674, T4H, KP876306680 * T4I);
Chris@82 1379 T4W = FNMS(KP684547105, T4K, KP728968627 * T4L);
Chris@82 1380 T74 = T4V + T4W;
Chris@82 1381 T4U = T4S - T4T;
Chris@82 1382 T77 = KP559016994 * (T73 - T74);
Chris@82 1383 T4X = T4V - T4W;
Chris@82 1384 T75 = T73 + T74;
Chris@82 1385 }
Chris@82 1386 {
Chris@82 1387 E T4C, T4F, T4G, T4J, T4M, T4N;
Chris@82 1388 T4C = FMA(KP968583161, T4A, KP248689887 * T4B);
Chris@82 1389 T4F = FMA(KP535826794, T4D, KP844327925 * T4E);
Chris@82 1390 T4G = T4C + T4F;
Chris@82 1391 T4J = FMA(KP876306680, T4H, KP481753674 * T4I);
Chris@82 1392 T4M = FMA(KP728968627, T4K, KP684547105 * T4L);
Chris@82 1393 T4N = T4J + T4M;
Chris@82 1394 T4O = T4G + T4N;
Chris@82 1395 T6Y = T4J - T4M;
Chris@82 1396 T4P = KP559016994 * (T4G - T4N);
Chris@82 1397 T6X = T4F - T4C;
Chris@82 1398 }
Chris@82 1399 {
Chris@82 1400 E T4q, T4r, T7b, T4t, T4u, T7c;
Chris@82 1401 T4q = FNMS(KP844327925, T30, KP535826794 * T3b);
Chris@82 1402 T4r = FMA(KP770513242, T3y, KP637423989 * T3r);
Chris@82 1403 T7b = T4q + T4r;
Chris@82 1404 T4t = FMA(KP125333233, T4j, KP992114701 * T4c);
Chris@82 1405 T4u = FMA(KP904827052, T3W, KP425779291 * T3P);
Chris@82 1406 T7c = T4u + T4t;
Chris@82 1407 T4s = T4q - T4r;
Chris@82 1408 T7f = T7b - T7c;
Chris@82 1409 T4v = T4t - T4u;
Chris@82 1410 T7d = KP559016994 * (T7b + T7c);
Chris@82 1411 }
Chris@82 1412 cr[WS(rs, 4)] = T2P + T4m;
Chris@82 1413 ci[WS(rs, 23)] = T75 + T72;
Chris@82 1414 ci[WS(rs, 20)] = T7f + T7e;
Chris@82 1415 cr[WS(rs, 1)] = T4z + T4O;
Chris@82 1416 {
Chris@82 1417 E T4w, T4y, T4p, T4x, T4o;
Chris@82 1418 T4w = FMA(KP951056516, T4s, KP587785252 * T4v);
Chris@82 1419 T4y = FNMS(KP587785252, T4s, KP951056516 * T4v);
Chris@82 1420 T4o = FNMS(KP250000000, T4m, T2P);
Chris@82 1421 T4p = T4n + T4o;
Chris@82 1422 T4x = T4o - T4n;
Chris@82 1423 ci[0] = T4p - T4w;
Chris@82 1424 ci[WS(rs, 5)] = T4x + T4y;
Chris@82 1425 cr[WS(rs, 9)] = T4p + T4w;
Chris@82 1426 ci[WS(rs, 10)] = T4x - T4y;
Chris@82 1427 }
Chris@82 1428 {
Chris@82 1429 E T6Z, T79, T78, T7a, T76;
Chris@82 1430 T6Z = FMA(KP587785252, T6X, KP951056516 * T6Y);
Chris@82 1431 T79 = FNMS(KP587785252, T6Y, KP951056516 * T6X);
Chris@82 1432 T76 = FNMS(KP250000000, T75, T72);
Chris@82 1433 T78 = T76 - T77;
Chris@82 1434 T7a = T77 + T76;
Chris@82 1435 cr[WS(rs, 16)] = T6Z - T78;
Chris@82 1436 ci[WS(rs, 18)] = T79 + T7a;
Chris@82 1437 ci[WS(rs, 13)] = T6Z + T78;
Chris@82 1438 cr[WS(rs, 21)] = T79 - T7a;
Chris@82 1439 }
Chris@82 1440 {
Chris@82 1441 E T7k, T7l, T7h, T7m, T7g;
Chris@82 1442 T7k = FMA(KP587785252, T7i, KP951056516 * T7j);
Chris@82 1443 T7l = FNMS(KP587785252, T7j, KP951056516 * T7i);
Chris@82 1444 T7g = FNMS(KP250000000, T7f, T7e);
Chris@82 1445 T7h = T7d - T7g;
Chris@82 1446 T7m = T7d + T7g;
Chris@82 1447 cr[WS(rs, 14)] = T7h - T7k;
Chris@82 1448 ci[WS(rs, 15)] = T7l + T7m;
Chris@82 1449 cr[WS(rs, 19)] = T7k + T7h;
Chris@82 1450 cr[WS(rs, 24)] = T7l - T7m;
Chris@82 1451 }
Chris@82 1452 {
Chris@82 1453 E T4Y, T50, T4R, T4Z, T4Q;
Chris@82 1454 T4Y = FMA(KP951056516, T4U, KP587785252 * T4X);
Chris@82 1455 T50 = FNMS(KP587785252, T4U, KP951056516 * T4X);
Chris@82 1456 T4Q = FNMS(KP250000000, T4O, T4z);
Chris@82 1457 T4R = T4P + T4Q;
Chris@82 1458 T4Z = T4Q - T4P;
Chris@82 1459 ci[WS(rs, 3)] = T4R - T4Y;
Chris@82 1460 ci[WS(rs, 8)] = T4Z + T50;
Chris@82 1461 cr[WS(rs, 6)] = T4R + T4Y;
Chris@82 1462 cr[WS(rs, 11)] = T4Z - T50;
Chris@82 1463 }
Chris@82 1464 }
Chris@82 1465 {
Chris@82 1466 E T7p, T7x, T7q, T7t, T7u, T7v, T7y, T7w;
Chris@82 1467 {
Chris@82 1468 E T7n, T7o, T7r, T7s;
Chris@82 1469 T7n = T1L - T1o;
Chris@82 1470 T7o = T2d - T2C;
Chris@82 1471 T7p = FMA(KP587785252, T7n, KP951056516 * T7o);
Chris@82 1472 T7x = FNMS(KP587785252, T7o, KP951056516 * T7n);
Chris@82 1473 T7q = T6y + T6v;
Chris@82 1474 T7r = T6g + T6h;
Chris@82 1475 T7s = T6j + T6k;
Chris@82 1476 T7t = T7r + T7s;
Chris@82 1477 T7u = FNMS(KP250000000, T7t, T7q);
Chris@82 1478 T7v = KP559016994 * (T7r - T7s);
Chris@82 1479 }
Chris@82 1480 ci[WS(rs, 24)] = T7t + T7q;
Chris@82 1481 T7y = T7v + T7u;
Chris@82 1482 cr[WS(rs, 20)] = T7x - T7y;
Chris@82 1483 ci[WS(rs, 19)] = T7x + T7y;
Chris@82 1484 T7w = T7u - T7v;
Chris@82 1485 cr[WS(rs, 15)] = T7p - T7w;
Chris@82 1486 ci[WS(rs, 14)] = T7p + T7w;
Chris@82 1487 }
Chris@82 1488 {
Chris@82 1489 E T53, T5L, T6C, T6O, T5y, T6T, T5z, T6S, T66, T6H, T69, T6F, T60, T6q, T61;
Chris@82 1490 E T6p, T5E, T6P, T5H, T6N, T51, T6B;
Chris@82 1491 T51 = T2G - T2F;
Chris@82 1492 T53 = T51 + T52;
Chris@82 1493 T5L = T51 - T52;
Chris@82 1494 T6B = T6z - T6A;
Chris@82 1495 T6C = T6u + T6B;
Chris@82 1496 T6O = T6B - T6u;
Chris@82 1497 {
Chris@82 1498 E T5a, T5h, T5i, T5p, T5w, T5x;
Chris@82 1499 T5a = FMA(KP728968627, T56, KP684547105 * T59);
Chris@82 1500 T5h = FNMS(KP992114701, T5g, KP125333233 * T5d);
Chris@82 1501 T5i = T5a + T5h;
Chris@82 1502 T5p = FMA(KP062790519, T5l, KP998026728 * T5o);
Chris@82 1503 T5w = FNMS(KP637423989, T5v, KP770513242 * T5s);
Chris@82 1504 T5x = T5p + T5w;
Chris@82 1505 T5y = T5i + T5x;
Chris@82 1506 T6T = T5p - T5w;
Chris@82 1507 T5z = KP559016994 * (T5i - T5x);
Chris@82 1508 T6S = T5h - T5a;
Chris@82 1509 }
Chris@82 1510 {
Chris@82 1511 E T64, T65, T6D, T67, T68, T6E;
Chris@82 1512 T64 = FNMS(KP481753674, T5M, KP876306680 * T5N);
Chris@82 1513 T65 = FMA(KP904827052, T5Q, KP425779291 * T5P);
Chris@82 1514 T6D = T64 - T65;
Chris@82 1515 T67 = FNMS(KP844327925, T5T, KP535826794 * T5U);
Chris@82 1516 T68 = FNMS(KP998026728, T5W, KP062790519 * T5X);
Chris@82 1517 T6E = T67 + T68;
Chris@82 1518 T66 = T64 + T65;
Chris@82 1519 T6H = KP559016994 * (T6D - T6E);
Chris@82 1520 T69 = T67 - T68;
Chris@82 1521 T6F = T6D + T6E;
Chris@82 1522 }
Chris@82 1523 {
Chris@82 1524 E T5O, T5R, T5S, T5V, T5Y, T5Z;
Chris@82 1525 T5O = FMA(KP876306680, T5M, KP481753674 * T5N);
Chris@82 1526 T5R = FNMS(KP425779291, T5Q, KP904827052 * T5P);
Chris@82 1527 T5S = T5O + T5R;
Chris@82 1528 T5V = FMA(KP535826794, T5T, KP844327925 * T5U);
Chris@82 1529 T5Y = FMA(KP062790519, T5W, KP998026728 * T5X);
Chris@82 1530 T5Z = T5V + T5Y;
Chris@82 1531 T60 = T5S + T5Z;
Chris@82 1532 T6q = T5V - T5Y;
Chris@82 1533 T61 = KP559016994 * (T5S - T5Z);
Chris@82 1534 T6p = T5R - T5O;
Chris@82 1535 }
Chris@82 1536 {
Chris@82 1537 E T5C, T5D, T6L, T5F, T5G, T6M;
Chris@82 1538 T5C = FNMS(KP684547105, T56, KP728968627 * T59);
Chris@82 1539 T5D = FMA(KP125333233, T5g, KP992114701 * T5d);
Chris@82 1540 T6L = T5C - T5D;
Chris@82 1541 T5F = FNMS(KP998026728, T5l, KP062790519 * T5o);
Chris@82 1542 T5G = FMA(KP770513242, T5v, KP637423989 * T5s);
Chris@82 1543 T6M = T5F - T5G;
Chris@82 1544 T5E = T5C + T5D;
Chris@82 1545 T6P = T6L + T6M;
Chris@82 1546 T5H = T5F + T5G;
Chris@82 1547 T6N = KP559016994 * (T6L - T6M);
Chris@82 1548 }
Chris@82 1549 cr[WS(rs, 3)] = T53 + T5y;
Chris@82 1550 ci[WS(rs, 22)] = T6F + T6C;
Chris@82 1551 ci[WS(rs, 21)] = T6P + T6O;
Chris@82 1552 cr[WS(rs, 2)] = T5L + T60;
Chris@82 1553 {
Chris@82 1554 E T6r, T6J, T6I, T6K, T6G;
Chris@82 1555 T6r = FMA(KP587785252, T6p, KP951056516 * T6q);
Chris@82 1556 T6J = FNMS(KP587785252, T6q, KP951056516 * T6p);
Chris@82 1557 T6G = FNMS(KP250000000, T6F, T6C);
Chris@82 1558 T6I = T6G - T6H;
Chris@82 1559 T6K = T6H + T6G;
Chris@82 1560 cr[WS(rs, 17)] = T6r - T6I;
Chris@82 1561 ci[WS(rs, 17)] = T6J + T6K;
Chris@82 1562 ci[WS(rs, 12)] = T6r + T6I;
Chris@82 1563 cr[WS(rs, 22)] = T6J - T6K;
Chris@82 1564 }
Chris@82 1565 {
Chris@82 1566 E T6a, T6c, T63, T6b, T62;
Chris@82 1567 T6a = FMA(KP951056516, T66, KP587785252 * T69);
Chris@82 1568 T6c = FNMS(KP587785252, T66, KP951056516 * T69);
Chris@82 1569 T62 = FNMS(KP250000000, T60, T5L);
Chris@82 1570 T63 = T61 + T62;
Chris@82 1571 T6b = T62 - T61;
Chris@82 1572 ci[WS(rs, 2)] = T63 - T6a;
Chris@82 1573 ci[WS(rs, 7)] = T6b + T6c;
Chris@82 1574 cr[WS(rs, 7)] = T63 + T6a;
Chris@82 1575 cr[WS(rs, 12)] = T6b - T6c;
Chris@82 1576 }
Chris@82 1577 {
Chris@82 1578 E T5I, T5K, T5B, T5J, T5A;
Chris@82 1579 T5I = FMA(KP951056516, T5E, KP587785252 * T5H);
Chris@82 1580 T5K = FNMS(KP587785252, T5E, KP951056516 * T5H);
Chris@82 1581 T5A = FNMS(KP250000000, T5y, T53);
Chris@82 1582 T5B = T5z + T5A;
Chris@82 1583 T5J = T5A - T5z;
Chris@82 1584 ci[WS(rs, 1)] = T5B - T5I;
Chris@82 1585 ci[WS(rs, 6)] = T5J + T5K;
Chris@82 1586 cr[WS(rs, 8)] = T5B + T5I;
Chris@82 1587 ci[WS(rs, 11)] = T5J - T5K;
Chris@82 1588 }
Chris@82 1589 {
Chris@82 1590 E T6U, T6V, T6R, T6W, T6Q;
Chris@82 1591 T6U = FMA(KP587785252, T6S, KP951056516 * T6T);
Chris@82 1592 T6V = FNMS(KP587785252, T6T, KP951056516 * T6S);
Chris@82 1593 T6Q = FNMS(KP250000000, T6P, T6O);
Chris@82 1594 T6R = T6N - T6Q;
Chris@82 1595 T6W = T6N + T6Q;
Chris@82 1596 cr[WS(rs, 13)] = T6R - T6U;
Chris@82 1597 ci[WS(rs, 16)] = T6V + T6W;
Chris@82 1598 cr[WS(rs, 18)] = T6U + T6R;
Chris@82 1599 cr[WS(rs, 23)] = T6V - T6W;
Chris@82 1600 }
Chris@82 1601 }
Chris@82 1602 }
Chris@82 1603 }
Chris@82 1604 }
Chris@82 1605 }
Chris@82 1606
Chris@82 1607 static const tw_instr twinstr[] = {
Chris@82 1608 {TW_CEXP, 1, 1},
Chris@82 1609 {TW_CEXP, 1, 3},
Chris@82 1610 {TW_CEXP, 1, 9},
Chris@82 1611 {TW_CEXP, 1, 24},
Chris@82 1612 {TW_NEXT, 1, 0}
Chris@82 1613 };
Chris@82 1614
Chris@82 1615 static const hc2hc_desc desc = { 25, "hf2_25", twinstr, &GENUS, {280, 180, 160, 0} };
Chris@82 1616
Chris@82 1617 void X(codelet_hf2_25) (planner *p) {
Chris@82 1618 X(khc2hc_register) (p, hf2_25, &desc);
Chris@82 1619 }
Chris@82 1620 #endif