annotate src/fftw-3.3.5/rdft/scalar/r2cf/hf2_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:47:06 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -dit -name hf2_25 -include hf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 440 FP additions, 434 FP multiplications,
Chris@42 32 * (or, 84 additions, 78 multiplications, 356 fused multiply/add),
Chris@42 33 * 215 stack variables, 47 constants, and 100 memory accesses
Chris@42 34 */
Chris@42 35 #include "hf.h"
Chris@42 36
Chris@42 37 static void hf2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@42 40 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@42 41 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@42 42 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@42 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@42 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@42 45 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@42 46 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@42 47 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@42 48 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@42 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@42 50 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@42 52 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@42 53 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@42 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@42 55 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@42 56 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@42 57 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@42 58 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@42 59 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@42 60 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@42 61 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@42 62 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 63 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 64 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 65 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@42 66 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 67 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@42 68 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@42 69 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@42 70 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@42 71 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@42 72 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 73 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@42 74 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@42 75 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@42 76 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@42 77 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@42 78 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@42 79 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@42 80 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@42 81 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@42 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 86 {
Chris@42 87 INT m;
Chris@42 88 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@42 89 E T7M, T6S, T6Q, T7S, T7Q, T7L, T6R, T6J, T7N, T7R;
Chris@42 90 {
Chris@42 91 E T2, T8, T3, T6, Tk, Tv, TS, T4, Ta, TD, T2L, T10, Tm, T5, Tc;
Chris@42 92 T2 = W[0];
Chris@42 93 T8 = W[4];
Chris@42 94 T3 = W[2];
Chris@42 95 T6 = W[3];
Chris@42 96 Tk = W[6];
Chris@42 97 Tv = T2 * T8;
Chris@42 98 TS = T3 * T8;
Chris@42 99 T4 = T2 * T3;
Chris@42 100 Ta = T2 * T6;
Chris@42 101 TD = T8 * Tk;
Chris@42 102 T2L = T2 * Tk;
Chris@42 103 T10 = T3 * Tk;
Chris@42 104 Tm = W[7];
Chris@42 105 T5 = W[1];
Chris@42 106 Tc = W[5];
Chris@42 107 {
Chris@42 108 E T7u, T7U, T4s, T6a, T4g, TN, T4f, T7q, T8j, T7p, T4G, T6k, T3a, T4z, T6n;
Chris@42 109 E T6m, T4w, T4a, T4D, T6j, T6C, T54, T6z, T5b, T1v, T3t, T6y, T58, T6B, T51;
Chris@42 110 E T6v, T5j, T6s, T5q, T21, T3H, T6r, T5n, T6u, T5g, T26, T3K, T4N, T2A, T3U;
Chris@42 111 E T4U, T2c, T3M, T2k, T3O;
Chris@42 112 {
Chris@42 113 E T11, T1b, Tb, T19, T7, T2m, TT, T15, T2Q, TX, T2p, T1g, T2a, T2e, T2i;
Chris@42 114 E T27, T1c, T1O, T1K, T1q, T1m, T2x, T2t, T1W, T1S, T2G, T3Y, T2N, T4F, T38;
Chris@42 115 E T48, T4y, T2K, T40, T2S, T41;
Chris@42 116 {
Chris@42 117 E T2M, T1j, T1l, T2X, T2U, T35, T31, T7l, T7n, T7m, T2O, T2R;
Chris@42 118 {
Chris@42 119 E T1, Tj, T4j, TK, T4q, TC, T4o, Tt, T4l;
Chris@42 120 {
Chris@42 121 E TE, Tw, TI, TA, Th, Tr, Tn, Td, Te, Ti, T14, T2P, TH, Tx, TB;
Chris@42 122 T1 = cr[0];
Chris@42 123 T11 = FMA(T6, Tm, T10);
Chris@42 124 T14 = T3 * Tm;
Chris@42 125 T2P = T2 * Tm;
Chris@42 126 TH = T8 * Tm;
Chris@42 127 T2M = FMA(T5, Tm, T2L);
Chris@42 128 T1b = FNMS(T5, T3, Ta);
Chris@42 129 Tb = FMA(T5, T3, Ta);
Chris@42 130 T19 = FMA(T5, T6, T4);
Chris@42 131 T7 = FNMS(T5, T6, T4);
Chris@42 132 T2m = FNMS(T6, Tc, TS);
Chris@42 133 TT = FMA(T6, Tc, TS);
Chris@42 134 TE = FMA(Tc, Tm, TD);
Chris@42 135 T1j = FMA(T5, Tc, Tv);
Chris@42 136 Tw = FNMS(T5, Tc, Tv);
Chris@42 137 {
Chris@42 138 E TW, Tz, T1f, T2d;
Chris@42 139 TW = T3 * Tc;
Chris@42 140 Tz = T2 * Tc;
Chris@42 141 T15 = FNMS(T6, Tk, T14);
Chris@42 142 T2Q = FNMS(T5, Tk, T2P);
Chris@42 143 TI = FNMS(Tc, Tk, TH);
Chris@42 144 T1f = T19 * Tc;
Chris@42 145 T2d = T19 * Tk;
Chris@42 146 {
Chris@42 147 E T2h, T1a, Tg, Tq;
Chris@42 148 T2h = T19 * Tm;
Chris@42 149 T1a = T19 * T8;
Chris@42 150 Tg = T7 * Tc;
Chris@42 151 Tq = T7 * Tm;
Chris@42 152 {
Chris@42 153 E Tl, T9, T1p, T1k;
Chris@42 154 Tl = T7 * Tk;
Chris@42 155 T9 = T7 * T8;
Chris@42 156 T1p = T1j * Tm;
Chris@42 157 T1k = T1j * Tk;
Chris@42 158 {
Chris@42 159 E T34, T30, T1N, T1J;
Chris@42 160 T34 = TT * Tm;
Chris@42 161 T30 = TT * Tk;
Chris@42 162 T1N = Tw * Tm;
Chris@42 163 T1J = Tw * Tk;
Chris@42 164 TX = FNMS(T6, T8, TW);
Chris@42 165 T2p = FMA(T6, T8, TW);
Chris@42 166 TA = FMA(T5, T8, Tz);
Chris@42 167 T1l = FNMS(T5, T8, Tz);
Chris@42 168 T1g = FMA(T1b, T8, T1f);
Chris@42 169 T2a = FNMS(T1b, T8, T1f);
Chris@42 170 T2e = FMA(T1b, Tm, T2d);
Chris@42 171 T2i = FNMS(T1b, Tk, T2h);
Chris@42 172 T27 = FMA(T1b, Tc, T1a);
Chris@42 173 T1c = FNMS(T1b, Tc, T1a);
Chris@42 174 T2X = FMA(Tb, T8, Tg);
Chris@42 175 Th = FNMS(Tb, T8, Tg);
Chris@42 176 Tr = FNMS(Tb, Tk, Tq);
Chris@42 177 Tn = FMA(Tb, Tm, Tl);
Chris@42 178 Td = FMA(Tb, Tc, T9);
Chris@42 179 T2U = FNMS(Tb, Tc, T9);
Chris@42 180 T35 = FNMS(TX, Tk, T34);
Chris@42 181 T31 = FMA(TX, Tm, T30);
Chris@42 182 T1O = FNMS(TA, Tk, T1N);
Chris@42 183 T1K = FMA(TA, Tm, T1J);
Chris@42 184 T1q = FNMS(T1l, Tk, T1p);
Chris@42 185 T1m = FMA(T1l, Tm, T1k);
Chris@42 186 {
Chris@42 187 E T2w, T2s, T1V, T1R;
Chris@42 188 T2w = T27 * Tm;
Chris@42 189 T2s = T27 * Tk;
Chris@42 190 T1V = Td * Tm;
Chris@42 191 T1R = Td * Tk;
Chris@42 192 T2x = FNMS(T2a, Tk, T2w);
Chris@42 193 T2t = FMA(T2a, Tm, T2s);
Chris@42 194 T1W = FNMS(Th, Tk, T1V);
Chris@42 195 T1S = FMA(Th, Tm, T1R);
Chris@42 196 T7l = ci[0];
Chris@42 197 Te = cr[WS(rs, 5)];
Chris@42 198 Ti = ci[WS(rs, 5)];
Chris@42 199 }
Chris@42 200 }
Chris@42 201 }
Chris@42 202 }
Chris@42 203 }
Chris@42 204 {
Chris@42 205 E TF, TJ, Tf, T4i, TG, T4p;
Chris@42 206 TF = cr[WS(rs, 15)];
Chris@42 207 TJ = ci[WS(rs, 15)];
Chris@42 208 Tf = Td * Te;
Chris@42 209 T4i = Td * Ti;
Chris@42 210 TG = TE * TF;
Chris@42 211 T4p = TE * TJ;
Chris@42 212 Tj = FMA(Th, Ti, Tf);
Chris@42 213 T4j = FNMS(Th, Te, T4i);
Chris@42 214 TK = FMA(TI, TJ, TG);
Chris@42 215 T4q = FNMS(TI, TF, T4p);
Chris@42 216 }
Chris@42 217 Tx = cr[WS(rs, 10)];
Chris@42 218 TB = ci[WS(rs, 10)];
Chris@42 219 {
Chris@42 220 E To, Ts, Ty, T4n, Tp, T4k;
Chris@42 221 To = cr[WS(rs, 20)];
Chris@42 222 Ts = ci[WS(rs, 20)];
Chris@42 223 Ty = Tw * Tx;
Chris@42 224 T4n = Tw * TB;
Chris@42 225 Tp = Tn * To;
Chris@42 226 T4k = Tn * Ts;
Chris@42 227 TC = FMA(TA, TB, Ty);
Chris@42 228 T4o = FNMS(TA, Tx, T4n);
Chris@42 229 Tt = FMA(Tr, Ts, Tp);
Chris@42 230 T4l = FNMS(Tr, To, T4k);
Chris@42 231 }
Chris@42 232 }
Chris@42 233 {
Chris@42 234 E TL, T7s, T4r, Tu, T7t, T4m, TM;
Chris@42 235 TL = TC + TK;
Chris@42 236 T7s = TC - TK;
Chris@42 237 T4r = T4o - T4q;
Chris@42 238 T7n = T4o + T4q;
Chris@42 239 Tu = Tj + Tt;
Chris@42 240 T7t = Tj - Tt;
Chris@42 241 T4m = T4j - T4l;
Chris@42 242 T7m = T4j + T4l;
Chris@42 243 T7u = FNMS(KP618033988, T7t, T7s);
Chris@42 244 T7U = FMA(KP618033988, T7s, T7t);
Chris@42 245 T4s = FMA(KP618033988, T4r, T4m);
Chris@42 246 T6a = FNMS(KP618033988, T4m, T4r);
Chris@42 247 T4g = Tu - TL;
Chris@42 248 TM = Tu + TL;
Chris@42 249 TN = T1 + TM;
Chris@42 250 T4f = FNMS(KP250000000, TM, T1);
Chris@42 251 }
Chris@42 252 }
Chris@42 253 {
Chris@42 254 E T2D, T2F, T7o, T2E, T3X;
Chris@42 255 T2D = cr[WS(rs, 3)];
Chris@42 256 T2F = ci[WS(rs, 3)];
Chris@42 257 T7q = T7m - T7n;
Chris@42 258 T7o = T7m + T7n;
Chris@42 259 T2E = T3 * T2D;
Chris@42 260 T3X = T3 * T2F;
Chris@42 261 {
Chris@42 262 E T2V, T2W, T2Y, T32, T36;
Chris@42 263 T2V = cr[WS(rs, 13)];
Chris@42 264 T8j = T7o + T7l;
Chris@42 265 T7p = FNMS(KP250000000, T7o, T7l);
Chris@42 266 T2G = FMA(T6, T2F, T2E);
Chris@42 267 T3Y = FNMS(T6, T2D, T3X);
Chris@42 268 T2W = T2U * T2V;
Chris@42 269 T2Y = ci[WS(rs, 13)];
Chris@42 270 T32 = cr[WS(rs, 18)];
Chris@42 271 T36 = ci[WS(rs, 18)];
Chris@42 272 {
Chris@42 273 E T2H, T2I, T2J, T3Z;
Chris@42 274 {
Chris@42 275 E T2Z, T45, T37, T47, T44, T33, T46;
Chris@42 276 T2H = cr[WS(rs, 8)];
Chris@42 277 T2Z = FMA(T2X, T2Y, T2W);
Chris@42 278 T44 = T2U * T2Y;
Chris@42 279 T33 = T31 * T32;
Chris@42 280 T46 = T31 * T36;
Chris@42 281 T2I = T1j * T2H;
Chris@42 282 T45 = FNMS(T2X, T2V, T44);
Chris@42 283 T37 = FMA(T35, T36, T33);
Chris@42 284 T47 = FNMS(T35, T32, T46);
Chris@42 285 T2J = ci[WS(rs, 8)];
Chris@42 286 T2N = cr[WS(rs, 23)];
Chris@42 287 T4F = T2Z - T37;
Chris@42 288 T38 = T2Z + T37;
Chris@42 289 T48 = T45 + T47;
Chris@42 290 T4y = T47 - T45;
Chris@42 291 T3Z = T1j * T2J;
Chris@42 292 T2O = T2M * T2N;
Chris@42 293 T2R = ci[WS(rs, 23)];
Chris@42 294 }
Chris@42 295 T2K = FMA(T1l, T2J, T2I);
Chris@42 296 T40 = FNMS(T1l, T2H, T3Z);
Chris@42 297 }
Chris@42 298 }
Chris@42 299 }
Chris@42 300 T2S = FMA(T2Q, T2R, T2O);
Chris@42 301 T41 = T2M * T2R;
Chris@42 302 }
Chris@42 303 {
Chris@42 304 E TR, T3h, T1t, T53, T3r, T5a, TZ, T3j, T17, T3l;
Chris@42 305 {
Chris@42 306 E T12, T16, T13, T3k;
Chris@42 307 {
Chris@42 308 E TO, TP, T4C, T4B, TQ;
Chris@42 309 {
Chris@42 310 E T2T, T4E, T42, T4v, T39;
Chris@42 311 TO = cr[WS(rs, 1)];
Chris@42 312 T2T = T2K + T2S;
Chris@42 313 T4E = T2K - T2S;
Chris@42 314 T42 = FNMS(T2Q, T2N, T41);
Chris@42 315 TP = T2 * TO;
Chris@42 316 T4G = FMA(KP618033988, T4F, T4E);
Chris@42 317 T6k = FNMS(KP618033988, T4E, T4F);
Chris@42 318 T4v = T38 - T2T;
Chris@42 319 T39 = T2T + T38;
Chris@42 320 {
Chris@42 321 E T43, T4x, T4u, T49;
Chris@42 322 T43 = T40 + T42;
Chris@42 323 T4x = T42 - T40;
Chris@42 324 T4u = FNMS(KP250000000, T39, T2G);
Chris@42 325 T3a = T2G + T39;
Chris@42 326 T4z = FMA(KP618033988, T4y, T4x);
Chris@42 327 T6n = FNMS(KP618033988, T4x, T4y);
Chris@42 328 T4C = T48 - T43;
Chris@42 329 T49 = T43 + T48;
Chris@42 330 T6m = FMA(KP559016994, T4v, T4u);
Chris@42 331 T4w = FNMS(KP559016994, T4v, T4u);
Chris@42 332 T4B = FNMS(KP250000000, T49, T3Y);
Chris@42 333 T4a = T3Y + T49;
Chris@42 334 TQ = ci[WS(rs, 1)];
Chris@42 335 }
Chris@42 336 }
Chris@42 337 {
Chris@42 338 E T1n, T1r, T1i, T1o, T3o, T3p;
Chris@42 339 {
Chris@42 340 E T1d, T1h, T1e, T3n, T3g;
Chris@42 341 T1d = cr[WS(rs, 11)];
Chris@42 342 T1h = ci[WS(rs, 11)];
Chris@42 343 T4D = FNMS(KP559016994, T4C, T4B);
Chris@42 344 T6j = FMA(KP559016994, T4C, T4B);
Chris@42 345 TR = FMA(T5, TQ, TP);
Chris@42 346 T3g = T2 * TQ;
Chris@42 347 T1e = T1c * T1d;
Chris@42 348 T3n = T1c * T1h;
Chris@42 349 T1n = cr[WS(rs, 16)];
Chris@42 350 T3h = FNMS(T5, TO, T3g);
Chris@42 351 T1r = ci[WS(rs, 16)];
Chris@42 352 T1i = FMA(T1g, T1h, T1e);
Chris@42 353 T1o = T1m * T1n;
Chris@42 354 T3o = FNMS(T1g, T1d, T3n);
Chris@42 355 T3p = T1m * T1r;
Chris@42 356 }
Chris@42 357 {
Chris@42 358 E TU, TY, TV, T3i, T3q, T1s;
Chris@42 359 TU = cr[WS(rs, 6)];
Chris@42 360 T1s = FMA(T1q, T1r, T1o);
Chris@42 361 TY = ci[WS(rs, 6)];
Chris@42 362 T3q = FNMS(T1q, T1n, T3p);
Chris@42 363 TV = TT * TU;
Chris@42 364 T1t = T1i + T1s;
Chris@42 365 T53 = T1s - T1i;
Chris@42 366 T3i = TT * TY;
Chris@42 367 T3r = T3o + T3q;
Chris@42 368 T5a = T3q - T3o;
Chris@42 369 T12 = cr[WS(rs, 21)];
Chris@42 370 T16 = ci[WS(rs, 21)];
Chris@42 371 TZ = FMA(TX, TY, TV);
Chris@42 372 T3j = FNMS(TX, TU, T3i);
Chris@42 373 T13 = T11 * T12;
Chris@42 374 T3k = T11 * T16;
Chris@42 375 }
Chris@42 376 }
Chris@42 377 }
Chris@42 378 T17 = FMA(T15, T16, T13);
Chris@42 379 T3l = FNMS(T15, T12, T3k);
Chris@42 380 }
Chris@42 381 {
Chris@42 382 E T1z, T3v, T5i, T1Z, T3F, T5p, T1D, T3x, T1H, T3z;
Chris@42 383 {
Chris@42 384 E T1E, T1G, T1F, T3y;
Chris@42 385 {
Chris@42 386 E T1w, T1y, T1x, T57, T50, T56, T4Z, T3u, T18, T52;
Chris@42 387 T1w = cr[WS(rs, 4)];
Chris@42 388 T1y = ci[WS(rs, 4)];
Chris@42 389 T18 = TZ + T17;
Chris@42 390 T52 = T17 - TZ;
Chris@42 391 {
Chris@42 392 E T3m, T59, T1u, T3s;
Chris@42 393 T3m = T3j + T3l;
Chris@42 394 T59 = T3j - T3l;
Chris@42 395 T1x = T7 * T1w;
Chris@42 396 T6C = FNMS(KP618033988, T52, T53);
Chris@42 397 T54 = FMA(KP618033988, T53, T52);
Chris@42 398 T1u = T18 + T1t;
Chris@42 399 T57 = T18 - T1t;
Chris@42 400 T6z = FMA(KP618033988, T59, T5a);
Chris@42 401 T5b = FNMS(KP618033988, T5a, T59);
Chris@42 402 T3s = T3m + T3r;
Chris@42 403 T50 = T3m - T3r;
Chris@42 404 T1v = TR + T1u;
Chris@42 405 T56 = FNMS(KP250000000, T1u, TR);
Chris@42 406 T3t = T3h + T3s;
Chris@42 407 T4Z = FNMS(KP250000000, T3s, T3h);
Chris@42 408 T3u = T7 * T1y;
Chris@42 409 }
Chris@42 410 T6y = FNMS(KP559016994, T57, T56);
Chris@42 411 T58 = FMA(KP559016994, T57, T56);
Chris@42 412 T6B = FNMS(KP559016994, T50, T4Z);
Chris@42 413 T51 = FMA(KP559016994, T50, T4Z);
Chris@42 414 T1z = FMA(Tb, T1y, T1x);
Chris@42 415 T3v = FNMS(Tb, T1w, T3u);
Chris@42 416 }
Chris@42 417 {
Chris@42 418 E T1Q, T3C, T1Y, T3E;
Chris@42 419 {
Chris@42 420 E T1L, T1P, T1T, T1X, T1M, T3B, T1U, T3D;
Chris@42 421 T1L = cr[WS(rs, 14)];
Chris@42 422 T1P = ci[WS(rs, 14)];
Chris@42 423 T1T = cr[WS(rs, 19)];
Chris@42 424 T1X = ci[WS(rs, 19)];
Chris@42 425 T1M = T1K * T1L;
Chris@42 426 T3B = T1K * T1P;
Chris@42 427 T1U = T1S * T1T;
Chris@42 428 T3D = T1S * T1X;
Chris@42 429 T1Q = FMA(T1O, T1P, T1M);
Chris@42 430 T3C = FNMS(T1O, T1L, T3B);
Chris@42 431 T1Y = FMA(T1W, T1X, T1U);
Chris@42 432 T3E = FNMS(T1W, T1T, T3D);
Chris@42 433 }
Chris@42 434 {
Chris@42 435 E T1A, T1C, T1B, T3w;
Chris@42 436 T1A = cr[WS(rs, 9)];
Chris@42 437 T1C = ci[WS(rs, 9)];
Chris@42 438 T5i = T1Y - T1Q;
Chris@42 439 T1Z = T1Q + T1Y;
Chris@42 440 T3F = T3C + T3E;
Chris@42 441 T5p = T3E - T3C;
Chris@42 442 T1B = T8 * T1A;
Chris@42 443 T3w = T8 * T1C;
Chris@42 444 T1E = cr[WS(rs, 24)];
Chris@42 445 T1G = ci[WS(rs, 24)];
Chris@42 446 T1D = FMA(Tc, T1C, T1B);
Chris@42 447 T3x = FNMS(Tc, T1A, T3w);
Chris@42 448 T1F = Tk * T1E;
Chris@42 449 T3y = Tk * T1G;
Chris@42 450 }
Chris@42 451 }
Chris@42 452 T1H = FMA(Tm, T1G, T1F);
Chris@42 453 T3z = FNMS(Tm, T1E, T3y);
Chris@42 454 }
Chris@42 455 {
Chris@42 456 E T2f, T2j, T2g, T3N;
Chris@42 457 {
Chris@42 458 E T23, T25, T24, T5m, T5f, T5l, T5e, T3J, T1I, T5h;
Chris@42 459 T23 = cr[WS(rs, 2)];
Chris@42 460 T25 = ci[WS(rs, 2)];
Chris@42 461 T1I = T1D + T1H;
Chris@42 462 T5h = T1H - T1D;
Chris@42 463 {
Chris@42 464 E T3A, T5o, T20, T3G;
Chris@42 465 T3A = T3x + T3z;
Chris@42 466 T5o = T3z - T3x;
Chris@42 467 T24 = T19 * T23;
Chris@42 468 T6v = FNMS(KP618033988, T5h, T5i);
Chris@42 469 T5j = FMA(KP618033988, T5i, T5h);
Chris@42 470 T20 = T1I + T1Z;
Chris@42 471 T5m = T1I - T1Z;
Chris@42 472 T6s = FNMS(KP618033988, T5o, T5p);
Chris@42 473 T5q = FMA(KP618033988, T5p, T5o);
Chris@42 474 T3G = T3A + T3F;
Chris@42 475 T5f = T3F - T3A;
Chris@42 476 T21 = T1z + T20;
Chris@42 477 T5l = FNMS(KP250000000, T20, T1z);
Chris@42 478 T3H = T3v + T3G;
Chris@42 479 T5e = FNMS(KP250000000, T3G, T3v);
Chris@42 480 T3J = T19 * T25;
Chris@42 481 }
Chris@42 482 T6r = FNMS(KP559016994, T5m, T5l);
Chris@42 483 T5n = FMA(KP559016994, T5m, T5l);
Chris@42 484 T6u = FMA(KP559016994, T5f, T5e);
Chris@42 485 T5g = FNMS(KP559016994, T5f, T5e);
Chris@42 486 T26 = FMA(T1b, T25, T24);
Chris@42 487 T3K = FNMS(T1b, T23, T3J);
Chris@42 488 }
Chris@42 489 {
Chris@42 490 E T2r, T3R, T2z, T3T;
Chris@42 491 {
Chris@42 492 E T2n, T2q, T2u, T2y, T2o, T3Q, T2v, T3S;
Chris@42 493 T2n = cr[WS(rs, 12)];
Chris@42 494 T2q = ci[WS(rs, 12)];
Chris@42 495 T2u = cr[WS(rs, 17)];
Chris@42 496 T2y = ci[WS(rs, 17)];
Chris@42 497 T2o = T2m * T2n;
Chris@42 498 T3Q = T2m * T2q;
Chris@42 499 T2v = T2t * T2u;
Chris@42 500 T3S = T2t * T2y;
Chris@42 501 T2r = FMA(T2p, T2q, T2o);
Chris@42 502 T3R = FNMS(T2p, T2n, T3Q);
Chris@42 503 T2z = FMA(T2x, T2y, T2v);
Chris@42 504 T3T = FNMS(T2x, T2u, T3S);
Chris@42 505 }
Chris@42 506 {
Chris@42 507 E T28, T2b, T29, T3L;
Chris@42 508 T28 = cr[WS(rs, 7)];
Chris@42 509 T2b = ci[WS(rs, 7)];
Chris@42 510 T4N = T2z - T2r;
Chris@42 511 T2A = T2r + T2z;
Chris@42 512 T3U = T3R + T3T;
Chris@42 513 T4U = T3R - T3T;
Chris@42 514 T29 = T27 * T28;
Chris@42 515 T3L = T27 * T2b;
Chris@42 516 T2f = cr[WS(rs, 22)];
Chris@42 517 T2j = ci[WS(rs, 22)];
Chris@42 518 T2c = FMA(T2a, T2b, T29);
Chris@42 519 T3M = FNMS(T2a, T28, T3L);
Chris@42 520 T2g = T2e * T2f;
Chris@42 521 T3N = T2e * T2j;
Chris@42 522 }
Chris@42 523 }
Chris@42 524 T2k = FMA(T2i, T2j, T2g);
Chris@42 525 T3O = FNMS(T2i, T2f, T3N);
Chris@42 526 }
Chris@42 527 }
Chris@42 528 }
Chris@42 529 }
Chris@42 530 {
Chris@42 531 E T8k, T6d, T6g, T8r, T6f, T8l, T6c, T8q, T69, T7r, T5Y, T8g, T8i, T66, T68;
Chris@42 532 E T5X, T8d, T8h;
Chris@42 533 {
Chris@42 534 E T4O, T4V, T22, T4S, T4L, T3b, T4e, T4c, T3I;
Chris@42 535 T8k = T3t + T3H;
Chris@42 536 T3I = T3t - T3H;
Chris@42 537 {
Chris@42 538 E T2l, T4M, T3P, T4T;
Chris@42 539 T2l = T2c + T2k;
Chris@42 540 T4M = T2k - T2c;
Chris@42 541 T3P = T3M + T3O;
Chris@42 542 T4T = T3O - T3M;
Chris@42 543 T4O = FMA(KP618033988, T4N, T4M);
Chris@42 544 T6d = FNMS(KP618033988, T4M, T4N);
Chris@42 545 {
Chris@42 546 E T4R, T2B, T4K, T3V;
Chris@42 547 T4R = T2A - T2l;
Chris@42 548 T2B = T2l + T2A;
Chris@42 549 T4V = FNMS(KP618033988, T4U, T4T);
Chris@42 550 T6g = FMA(KP618033988, T4T, T4U);
Chris@42 551 T4K = T3U - T3P;
Chris@42 552 T3V = T3P + T3U;
Chris@42 553 {
Chris@42 554 E T4Q, T2C, T4J, T3W, T4b;
Chris@42 555 T4Q = FNMS(KP250000000, T2B, T26);
Chris@42 556 T2C = T26 + T2B;
Chris@42 557 T4J = FNMS(KP250000000, T3V, T3K);
Chris@42 558 T3W = T3K + T3V;
Chris@42 559 T8r = T21 - T1v;
Chris@42 560 T22 = T1v + T21;
Chris@42 561 T4S = FNMS(KP559016994, T4R, T4Q);
Chris@42 562 T6f = FMA(KP559016994, T4R, T4Q);
Chris@42 563 T4b = T3W - T4a;
Chris@42 564 T8l = T3W + T4a;
Chris@42 565 T6c = FMA(KP559016994, T4K, T4J);
Chris@42 566 T4L = FNMS(KP559016994, T4K, T4J);
Chris@42 567 T8q = T2C - T3a;
Chris@42 568 T3b = T2C + T3a;
Chris@42 569 T4e = FNMS(KP618033988, T3I, T4b);
Chris@42 570 T4c = FMA(KP618033988, T4b, T3I);
Chris@42 571 }
Chris@42 572 }
Chris@42 573 }
Chris@42 574 {
Chris@42 575 E T5H, T4t, T7V, T87, T5Q, T5P, T5D, T8e, T5A, T8f, T5K, T60, T8c, T8a, T5u;
Chris@42 576 E T5w, T5U, T64, T5N, T61;
Chris@42 577 {
Chris@42 578 E T3e, T3d, T4h, T3c, T7T;
Chris@42 579 T4h = FMA(KP559016994, T4g, T4f);
Chris@42 580 T69 = FNMS(KP559016994, T4g, T4f);
Chris@42 581 T3c = T22 + T3b;
Chris@42 582 T3e = T22 - T3b;
Chris@42 583 T7r = FNMS(KP559016994, T7q, T7p);
Chris@42 584 T7T = FMA(KP559016994, T7q, T7p);
Chris@42 585 T5H = FMA(KP951056516, T4s, T4h);
Chris@42 586 T4t = FNMS(KP951056516, T4s, T4h);
Chris@42 587 cr[0] = TN + T3c;
Chris@42 588 T3d = FNMS(KP250000000, T3c, TN);
Chris@42 589 T7V = FNMS(KP951056516, T7U, T7T);
Chris@42 590 T87 = FMA(KP951056516, T7U, T7T);
Chris@42 591 {
Chris@42 592 E T5S, T5T, T5L, T4I, T5B, T5M, T55, T5J, T5s, T5z, T4X, T5C, T5I, T5c;
Chris@42 593 {
Chris@42 594 E T5k, T5r, T4P, T4W;
Chris@42 595 {
Chris@42 596 E T4A, T4d, T3f, T4H;
Chris@42 597 T4A = FMA(KP951056516, T4z, T4w);
Chris@42 598 T5S = FNMS(KP951056516, T4z, T4w);
Chris@42 599 T4d = FNMS(KP559016994, T3e, T3d);
Chris@42 600 T3f = FMA(KP559016994, T3e, T3d);
Chris@42 601 T5T = FNMS(KP951056516, T4G, T4D);
Chris@42 602 T4H = FMA(KP951056516, T4G, T4D);
Chris@42 603 T5k = FNMS(KP951056516, T5j, T5g);
Chris@42 604 T5L = FMA(KP951056516, T5j, T5g);
Chris@42 605 cr[WS(rs, 5)] = FMA(KP951056516, T4c, T3f);
Chris@42 606 ci[WS(rs, 4)] = FNMS(KP951056516, T4c, T3f);
Chris@42 607 ci[WS(rs, 9)] = FMA(KP951056516, T4e, T4d);
Chris@42 608 cr[WS(rs, 10)] = FNMS(KP951056516, T4e, T4d);
Chris@42 609 T4I = FNMS(KP126329378, T4H, T4A);
Chris@42 610 T5B = FMA(KP126329378, T4A, T4H);
Chris@42 611 T5M = FNMS(KP951056516, T5q, T5n);
Chris@42 612 T5r = FMA(KP951056516, T5q, T5n);
Chris@42 613 }
Chris@42 614 T4P = FNMS(KP951056516, T4O, T4L);
Chris@42 615 T5Q = FMA(KP951056516, T4O, T4L);
Chris@42 616 T5P = FNMS(KP951056516, T4V, T4S);
Chris@42 617 T4W = FMA(KP951056516, T4V, T4S);
Chris@42 618 T55 = FNMS(KP951056516, T54, T51);
Chris@42 619 T5J = FMA(KP951056516, T54, T51);
Chris@42 620 T5s = FMA(KP827271945, T5r, T5k);
Chris@42 621 T5z = FNMS(KP827271945, T5k, T5r);
Chris@42 622 T4X = FNMS(KP470564281, T4W, T4P);
Chris@42 623 T5C = FMA(KP470564281, T4P, T4W);
Chris@42 624 T5I = FMA(KP951056516, T5b, T58);
Chris@42 625 T5c = FNMS(KP951056516, T5b, T58);
Chris@42 626 }
Chris@42 627 {
Chris@42 628 E T88, T4Y, T5d, T5y, T89, T5t;
Chris@42 629 T5D = FNMS(KP912018591, T5C, T5B);
Chris@42 630 T88 = FMA(KP912018591, T5C, T5B);
Chris@42 631 T8e = FMA(KP912018591, T4X, T4I);
Chris@42 632 T4Y = FNMS(KP912018591, T4X, T4I);
Chris@42 633 T5d = FMA(KP634619297, T5c, T55);
Chris@42 634 T5y = FNMS(KP634619297, T55, T5c);
Chris@42 635 T5A = FMA(KP912575812, T5z, T5y);
Chris@42 636 T89 = FNMS(KP912575812, T5z, T5y);
Chris@42 637 T8f = FMA(KP912575812, T5s, T5d);
Chris@42 638 T5t = FNMS(KP912575812, T5s, T5d);
Chris@42 639 T5K = FMA(KP256756360, T5J, T5I);
Chris@42 640 T60 = FNMS(KP256756360, T5I, T5J);
Chris@42 641 T8c = FNMS(KP851038619, T89, T88);
Chris@42 642 T8a = FMA(KP851038619, T89, T88);
Chris@42 643 T5u = FNMS(KP851038619, T5t, T4Y);
Chris@42 644 T5w = FMA(KP851038619, T5t, T4Y);
Chris@42 645 }
Chris@42 646 T5U = FMA(KP939062505, T5T, T5S);
Chris@42 647 T64 = FNMS(KP939062505, T5S, T5T);
Chris@42 648 T5N = FMA(KP634619297, T5M, T5L);
Chris@42 649 T61 = FNMS(KP634619297, T5L, T5M);
Chris@42 650 }
Chris@42 651 }
Chris@42 652 {
Chris@42 653 E T62, T7W, T83, T5O, T5R, T63;
Chris@42 654 cr[WS(rs, 4)] = FNMS(KP992114701, T5u, T4t);
Chris@42 655 T62 = FMA(KP871714437, T61, T60);
Chris@42 656 T7W = FNMS(KP871714437, T61, T60);
Chris@42 657 T83 = FNMS(KP871714437, T5N, T5K);
Chris@42 658 T5O = FMA(KP871714437, T5N, T5K);
Chris@42 659 T5R = FMA(KP549754652, T5Q, T5P);
Chris@42 660 T63 = FNMS(KP549754652, T5P, T5Q);
Chris@42 661 ci[WS(rs, 20)] = FNMS(KP992114701, T8a, T87);
Chris@42 662 {
Chris@42 663 E T65, T5W, T84, T86, T81, T85, T8b;
Chris@42 664 {
Chris@42 665 E T5E, T5G, T82, T80, T7Y, T5v, T7X, T5V, T5F, T5x, T7Z;
Chris@42 666 T5E = FNMS(KP726211448, T5D, T5A);
Chris@42 667 T5G = FMA(KP525970792, T5A, T5D);
Chris@42 668 T65 = FNMS(KP831864738, T64, T63);
Chris@42 669 T7X = FMA(KP831864738, T64, T63);
Chris@42 670 T82 = FNMS(KP831864738, T5U, T5R);
Chris@42 671 T5V = FMA(KP831864738, T5U, T5R);
Chris@42 672 T80 = FNMS(KP904730450, T7X, T7W);
Chris@42 673 T7Y = FMA(KP904730450, T7X, T7W);
Chris@42 674 T5Y = FNMS(KP904730450, T5V, T5O);
Chris@42 675 T5W = FMA(KP904730450, T5V, T5O);
Chris@42 676 T5v = FMA(KP248028675, T5u, T4t);
Chris@42 677 ci[WS(rs, 23)] = FMA(KP968583161, T7Y, T7V);
Chris@42 678 cr[WS(rs, 1)] = FMA(KP968583161, T5W, T5H);
Chris@42 679 T84 = FNMS(KP683113946, T83, T82);
Chris@42 680 T86 = FMA(KP559154169, T82, T83);
Chris@42 681 T5F = FNMS(KP554608978, T5w, T5v);
Chris@42 682 T5x = FMA(KP554608978, T5w, T5v);
Chris@42 683 T7Z = FNMS(KP242145790, T7Y, T7V);
Chris@42 684 ci[WS(rs, 10)] = FNMS(KP943557151, T5G, T5F);
Chris@42 685 ci[WS(rs, 5)] = FMA(KP943557151, T5G, T5F);
Chris@42 686 ci[0] = FMA(KP803003575, T5E, T5x);
Chris@42 687 cr[WS(rs, 9)] = FNMS(KP803003575, T5E, T5x);
Chris@42 688 T81 = FNMS(KP541454447, T80, T7Z);
Chris@42 689 T85 = FMA(KP541454447, T80, T7Z);
Chris@42 690 }
Chris@42 691 T8g = FNMS(KP525970792, T8f, T8e);
Chris@42 692 T8i = FMA(KP726211448, T8e, T8f);
Chris@42 693 ci[WS(rs, 13)] = FMA(KP833417178, T84, T81);
Chris@42 694 cr[WS(rs, 16)] = FMS(KP833417178, T84, T81);
Chris@42 695 cr[WS(rs, 21)] = -(FMA(KP921177326, T86, T85));
Chris@42 696 ci[WS(rs, 18)] = FNMS(KP921177326, T86, T85);
Chris@42 697 T8b = FMA(KP248028675, T8a, T87);
Chris@42 698 T66 = FMA(KP559154169, T65, T62);
Chris@42 699 T68 = FNMS(KP683113946, T62, T65);
Chris@42 700 T5X = FNMS(KP242145790, T5W, T5H);
Chris@42 701 T8d = FNMS(KP554608978, T8c, T8b);
Chris@42 702 T8h = FMA(KP554608978, T8c, T8b);
Chris@42 703 }
Chris@42 704 }
Chris@42 705 }
Chris@42 706 }
Chris@42 707 {
Chris@42 708 E T8s, T8u, T5Z, T67;
Chris@42 709 cr[WS(rs, 24)] = -(FMA(KP803003575, T8i, T8h));
Chris@42 710 ci[WS(rs, 15)] = FNMS(KP803003575, T8i, T8h);
Chris@42 711 cr[WS(rs, 19)] = FMS(KP943557151, T8g, T8d);
Chris@42 712 cr[WS(rs, 14)] = -(FMA(KP943557151, T8g, T8d));
Chris@42 713 T5Z = FMA(KP541454447, T5Y, T5X);
Chris@42 714 T67 = FNMS(KP541454447, T5Y, T5X);
Chris@42 715 cr[WS(rs, 11)] = FNMS(KP833417178, T68, T67);
Chris@42 716 ci[WS(rs, 8)] = FMA(KP833417178, T68, T67);
Chris@42 717 cr[WS(rs, 6)] = FMA(KP921177326, T66, T5Z);
Chris@42 718 ci[WS(rs, 3)] = FNMS(KP921177326, T66, T5Z);
Chris@42 719 T8s = FMA(KP618033988, T8r, T8q);
Chris@42 720 T8u = FNMS(KP618033988, T8q, T8r);
Chris@42 721 {
Chris@42 722 E T6X, T6T, T6b, T7H, T7v, T6Y, T72, T71, T6P, T7O, T6M, T7P, T7K, T6G, T6I;
Chris@42 723 E T6W, T7f, T7d, T76;
Chris@42 724 {
Chris@42 725 E T74, T75, T6i, T6N, T6L, T6E, T6U, T6l, T6o, T6V, T6t, T6w;
Chris@42 726 {
Chris@42 727 E T6e, T8o, T8n, T6h, T8m;
Chris@42 728 T6X = FNMS(KP951056516, T6d, T6c);
Chris@42 729 T6e = FMA(KP951056516, T6d, T6c);
Chris@42 730 T8o = T8k - T8l;
Chris@42 731 T8m = T8k + T8l;
Chris@42 732 T6T = FNMS(KP951056516, T6a, T69);
Chris@42 733 T6b = FMA(KP951056516, T6a, T69);
Chris@42 734 T7H = FNMS(KP951056516, T7u, T7r);
Chris@42 735 T7v = FMA(KP951056516, T7u, T7r);
Chris@42 736 ci[WS(rs, 24)] = T8m + T8j;
Chris@42 737 T8n = FNMS(KP250000000, T8m, T8j);
Chris@42 738 T6h = FMA(KP951056516, T6g, T6f);
Chris@42 739 T6Y = FNMS(KP951056516, T6g, T6f);
Chris@42 740 {
Chris@42 741 E T6A, T6D, T8t, T8p;
Chris@42 742 T74 = FMA(KP951056516, T6z, T6y);
Chris@42 743 T6A = FNMS(KP951056516, T6z, T6y);
Chris@42 744 T6D = FMA(KP951056516, T6C, T6B);
Chris@42 745 T75 = FNMS(KP951056516, T6C, T6B);
Chris@42 746 T8t = FMA(KP559016994, T8o, T8n);
Chris@42 747 T8p = FNMS(KP559016994, T8o, T8n);
Chris@42 748 T6i = FMA(KP062914667, T6h, T6e);
Chris@42 749 T6N = FNMS(KP062914667, T6e, T6h);
Chris@42 750 ci[WS(rs, 14)] = FMA(KP951056516, T8s, T8p);
Chris@42 751 cr[WS(rs, 15)] = FMS(KP951056516, T8s, T8p);
Chris@42 752 ci[WS(rs, 19)] = FMA(KP951056516, T8u, T8t);
Chris@42 753 cr[WS(rs, 20)] = FMS(KP951056516, T8u, T8t);
Chris@42 754 T6L = FNMS(KP939062505, T6A, T6D);
Chris@42 755 T6E = FMA(KP939062505, T6D, T6A);
Chris@42 756 }
Chris@42 757 }
Chris@42 758 T6U = FMA(KP951056516, T6k, T6j);
Chris@42 759 T6l = FNMS(KP951056516, T6k, T6j);
Chris@42 760 T6o = FNMS(KP951056516, T6n, T6m);
Chris@42 761 T6V = FMA(KP951056516, T6n, T6m);
Chris@42 762 T72 = FMA(KP951056516, T6s, T6r);
Chris@42 763 T6t = FNMS(KP951056516, T6s, T6r);
Chris@42 764 T6w = FMA(KP951056516, T6v, T6u);
Chris@42 765 T71 = FNMS(KP951056516, T6v, T6u);
Chris@42 766 {
Chris@42 767 E T6q, T6F, T6O, T6p;
Chris@42 768 T6O = FMA(KP827271945, T6l, T6o);
Chris@42 769 T6p = FNMS(KP827271945, T6o, T6l);
Chris@42 770 {
Chris@42 771 E T6K, T6x, T7I, T7J;
Chris@42 772 T6K = FMA(KP126329378, T6t, T6w);
Chris@42 773 T6x = FNMS(KP126329378, T6w, T6t);
Chris@42 774 T7I = FMA(KP772036680, T6O, T6N);
Chris@42 775 T6P = FNMS(KP772036680, T6O, T6N);
Chris@42 776 T6q = FMA(KP772036680, T6p, T6i);
Chris@42 777 T7O = FNMS(KP772036680, T6p, T6i);
Chris@42 778 T7J = FNMS(KP734762448, T6L, T6K);
Chris@42 779 T6M = FMA(KP734762448, T6L, T6K);
Chris@42 780 T6F = FNMS(KP734762448, T6E, T6x);
Chris@42 781 T7P = FMA(KP734762448, T6E, T6x);
Chris@42 782 T7K = FMA(KP994076283, T7J, T7I);
Chris@42 783 T7M = FNMS(KP994076283, T7J, T7I);
Chris@42 784 }
Chris@42 785 T6G = FNMS(KP994076283, T6F, T6q);
Chris@42 786 T6I = FMA(KP994076283, T6F, T6q);
Chris@42 787 }
Chris@42 788 T6W = FMA(KP062914667, T6V, T6U);
Chris@42 789 T7f = FNMS(KP062914667, T6U, T6V);
Chris@42 790 T7d = FNMS(KP549754652, T74, T75);
Chris@42 791 T76 = FMA(KP549754652, T75, T74);
Chris@42 792 }
Chris@42 793 {
Chris@42 794 E T7h, T7C, T7e, T7D, T7y, T7A, T78, T7a;
Chris@42 795 {
Chris@42 796 E T70, T77, T7g, T6Z;
Chris@42 797 cr[WS(rs, 3)] = FMA(KP998026728, T6G, T6b);
Chris@42 798 T7g = FNMS(KP634619297, T6X, T6Y);
Chris@42 799 T6Z = FMA(KP634619297, T6Y, T6X);
Chris@42 800 {
Chris@42 801 E T7c, T73, T7w, T7x;
Chris@42 802 T7c = FMA(KP470564281, T71, T72);
Chris@42 803 T73 = FNMS(KP470564281, T72, T71);
Chris@42 804 T7w = FMA(KP845997307, T7g, T7f);
Chris@42 805 T7h = FNMS(KP845997307, T7g, T7f);
Chris@42 806 T70 = FMA(KP845997307, T6Z, T6W);
Chris@42 807 T7C = FNMS(KP845997307, T6Z, T6W);
Chris@42 808 T7x = FNMS(KP968479752, T7d, T7c);
Chris@42 809 T7e = FMA(KP968479752, T7d, T7c);
Chris@42 810 T77 = FMA(KP968479752, T76, T73);
Chris@42 811 T7D = FNMS(KP968479752, T76, T73);
Chris@42 812 T7y = FMA(KP906616052, T7x, T7w);
Chris@42 813 T7A = FNMS(KP906616052, T7x, T7w);
Chris@42 814 }
Chris@42 815 ci[WS(rs, 21)] = FNMS(KP998026728, T7K, T7H);
Chris@42 816 T78 = FMA(KP906616052, T77, T70);
Chris@42 817 T7a = FNMS(KP906616052, T77, T70);
Chris@42 818 }
Chris@42 819 {
Chris@42 820 E T7G, T7E, T7k, T7i, T79, T7F, T7B, T7z, T6H, T7j, T7b;
Chris@42 821 T7G = FMA(KP681693190, T7C, T7D);
Chris@42 822 T7E = FNMS(KP560319534, T7D, T7C);
Chris@42 823 ci[WS(rs, 22)] = FNMS(KP998026728, T7y, T7v);
Chris@42 824 cr[WS(rs, 2)] = FMA(KP998026728, T78, T6T);
Chris@42 825 T7z = FMA(KP249506682, T7y, T7v);
Chris@42 826 T7k = FNMS(KP560319534, T7e, T7h);
Chris@42 827 T7i = FMA(KP681693190, T7h, T7e);
Chris@42 828 T79 = FNMS(KP249506682, T78, T6T);
Chris@42 829 T7F = FMA(KP557913902, T7A, T7z);
Chris@42 830 T7B = FNMS(KP557913902, T7A, T7z);
Chris@42 831 T6S = FMA(KP614372930, T6M, T6P);
Chris@42 832 T6Q = FNMS(KP621716863, T6P, T6M);
Chris@42 833 cr[WS(rs, 22)] = FMS(KP860541664, T7G, T7F);
Chris@42 834 ci[WS(rs, 17)] = FMA(KP860541664, T7G, T7F);
Chris@42 835 ci[WS(rs, 12)] = FNMS(KP949179823, T7E, T7B);
Chris@42 836 cr[WS(rs, 17)] = -(FMA(KP949179823, T7E, T7B));
Chris@42 837 T7j = FMA(KP557913902, T7a, T79);
Chris@42 838 T7b = FNMS(KP557913902, T7a, T79);
Chris@42 839 T6H = FNMS(KP249506682, T6G, T6b);
Chris@42 840 ci[WS(rs, 7)] = FMA(KP949179823, T7k, T7j);
Chris@42 841 cr[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j);
Chris@42 842 cr[WS(rs, 7)] = FMA(KP860541664, T7i, T7b);
Chris@42 843 ci[WS(rs, 2)] = FNMS(KP860541664, T7i, T7b);
Chris@42 844 T7S = FMA(KP621716863, T7O, T7P);
Chris@42 845 T7Q = FNMS(KP614372930, T7P, T7O);
Chris@42 846 T7L = FMA(KP249506682, T7K, T7H);
Chris@42 847 T6R = FMA(KP557913902, T6I, T6H);
Chris@42 848 T6J = FNMS(KP557913902, T6I, T6H);
Chris@42 849 }
Chris@42 850 }
Chris@42 851 }
Chris@42 852 }
Chris@42 853 }
Chris@42 854 }
Chris@42 855 }
Chris@42 856 ci[WS(rs, 6)] = FNMS(KP949179823, T6S, T6R);
Chris@42 857 ci[WS(rs, 11)] = FMA(KP949179823, T6S, T6R);
Chris@42 858 cr[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J);
Chris@42 859 ci[WS(rs, 1)] = FNMS(KP943557151, T6Q, T6J);
Chris@42 860 T7N = FNMS(KP557913902, T7M, T7L);
Chris@42 861 T7R = FMA(KP557913902, T7M, T7L);
Chris@42 862 cr[WS(rs, 23)] = -(FMA(KP943557151, T7S, T7R));
Chris@42 863 ci[WS(rs, 16)] = FNMS(KP943557151, T7S, T7R);
Chris@42 864 cr[WS(rs, 18)] = FMS(KP949179823, T7Q, T7N);
Chris@42 865 cr[WS(rs, 13)] = -(FMA(KP949179823, T7Q, T7N));
Chris@42 866 }
Chris@42 867 }
Chris@42 868 }
Chris@42 869
Chris@42 870 static const tw_instr twinstr[] = {
Chris@42 871 {TW_CEXP, 1, 1},
Chris@42 872 {TW_CEXP, 1, 3},
Chris@42 873 {TW_CEXP, 1, 9},
Chris@42 874 {TW_CEXP, 1, 24},
Chris@42 875 {TW_NEXT, 1, 0}
Chris@42 876 };
Chris@42 877
Chris@42 878 static const hc2hc_desc desc = { 25, "hf2_25", twinstr, &GENUS, {84, 78, 356, 0} };
Chris@42 879
Chris@42 880 void X(codelet_hf2_25) (planner *p) {
Chris@42 881 X(khc2hc_register) (p, hf2_25, &desc);
Chris@42 882 }
Chris@42 883 #else /* HAVE_FMA */
Chris@42 884
Chris@42 885 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -dit -name hf2_25 -include hf.h */
Chris@42 886
Chris@42 887 /*
Chris@42 888 * This function contains 440 FP additions, 340 FP multiplications,
Chris@42 889 * (or, 280 additions, 180 multiplications, 160 fused multiply/add),
Chris@42 890 * 149 stack variables, 20 constants, and 100 memory accesses
Chris@42 891 */
Chris@42 892 #include "hf.h"
Chris@42 893
Chris@42 894 static void hf2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 895 {
Chris@42 896 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 897 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 898 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 899 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 900 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 901 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 902 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 903 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 904 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 905 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@42 906 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 907 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 908 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@42 909 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@42 910 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 911 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 912 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 913 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 914 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 915 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 916 {
Chris@42 917 INT m;
Chris@42 918 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@42 919 E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g;
Chris@42 920 E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr;
Chris@42 921 E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V;
Chris@42 922 E T1X, T1z, T1j;
Chris@42 923 {
Chris@42 924 E Tw, TT, Tz, TQ, Tv, TU, TA, TP;
Chris@42 925 {
Chris@42 926 E T4, Tc, T7, Tb;
Chris@42 927 T2 = W[0];
Chris@42 928 T5 = W[1];
Chris@42 929 T3 = W[2];
Chris@42 930 T6 = W[3];
Chris@42 931 T4 = T2 * T3;
Chris@42 932 Tc = T5 * T3;
Chris@42 933 T7 = T5 * T6;
Chris@42 934 Tb = T2 * T6;
Chris@42 935 T8 = T4 - T7;
Chris@42 936 Td = Tb + Tc;
Chris@42 937 T16 = Tb - Tc;
Chris@42 938 T14 = T4 + T7;
Chris@42 939 Te = W[5];
Chris@42 940 Tw = T5 * Te;
Chris@42 941 TT = T3 * Te;
Chris@42 942 Tz = T2 * Te;
Chris@42 943 TQ = T6 * Te;
Chris@42 944 T9 = W[4];
Chris@42 945 Tv = T2 * T9;
Chris@42 946 TU = T6 * T9;
Chris@42 947 TA = T5 * T9;
Chris@42 948 TP = T3 * T9;
Chris@42 949 }
Chris@42 950 T21 = TP - TQ;
Chris@42 951 T23 = TT + TU;
Chris@42 952 {
Chris@42 953 E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj;
Chris@42 954 Tx = Tv - Tw;
Chris@42 955 TR = TP + TQ;
Chris@42 956 T1g = Tz - TA;
Chris@42 957 TB = Tz + TA;
Chris@42 958 T1f = Tv + Tw;
Chris@42 959 TV = TT - TU;
Chris@42 960 T15 = T14 * T9;
Chris@42 961 T17 = T16 * Te;
Chris@42 962 T1Q = T15 + T17;
Chris@42 963 Ta = T8 * T9;
Chris@42 964 Tf = Td * Te;
Chris@42 965 Tg = Ta + Tf;
Chris@42 966 T1a = T14 * Te;
Chris@42 967 T1b = T16 * T9;
Chris@42 968 T1S = T1a - T1b;
Chris@42 969 Ti = T8 * Te;
Chris@42 970 Tj = Td * T9;
Chris@42 971 Tk = Ti - Tj;
Chris@42 972 T18 = T15 - T17;
Chris@42 973 T2s = Ti + Tj;
Chris@42 974 T1c = T1a + T1b;
Chris@42 975 T2q = Ta - Tf;
Chris@42 976 Tn = W[6];
Chris@42 977 To = W[7];
Chris@42 978 Tp = FMA(T8, Tn, Td * To);
Chris@42 979 Tr = FNMS(Td, Tn, T8 * To);
Chris@42 980 T28 = FNMS(T1S, Tn, T1Q * To);
Chris@42 981 T2x = FNMS(TV, Tn, TR * To);
Chris@42 982 TY = FMA(T3, Tn, T6 * To);
Chris@42 983 T2k = FMA(T2, Tn, T5 * To);
Chris@42 984 T2m = FNMS(T5, Tn, T2 * To);
Chris@42 985 T2v = FMA(TR, Tn, TV * To);
Chris@42 986 TG = FNMS(Te, Tn, T9 * To);
Chris@42 987 TE = FMA(T9, Tn, Te * To);
Chris@42 988 T10 = FNMS(T6, Tn, T3 * To);
Chris@42 989 T1h = FMA(T1f, Tn, T1g * To);
Chris@42 990 T1E = FMA(Tg, Tn, Tk * To);
Chris@42 991 T26 = FMA(T1Q, Tn, T1S * To);
Chris@42 992 T1B = FNMS(TB, Tn, Tx * To);
Chris@42 993 T1G = FNMS(Tk, Tn, Tg * To);
Chris@42 994 T1V = FMA(T14, Tn, T16 * To);
Chris@42 995 T1X = FNMS(T16, Tn, T14 * To);
Chris@42 996 T1z = FMA(Tx, Tn, TB * To);
Chris@42 997 T1j = FNMS(T1g, Tn, T1f * To);
Chris@42 998 }
Chris@42 999 }
Chris@42 1000 {
Chris@42 1001 E T1, T6v, T2F, T6A, TK, T2G, T6y, T6z, T6u, T71, T2O, T52, T2C, T6k, T4c;
Chris@42 1002 E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N;
Chris@42 1003 E T4B, T59, T1L, T6h, T3r, T5P, T4E, T5d, T3y, T5Q, T4D, T5g, T2d, T6j, T3P;
Chris@42 1004 E T5U, T4I, T5o, T3W, T5T, T4H, T5l;
Chris@42 1005 {
Chris@42 1006 E Tm, T2I, Tt, T2J, Tu, T6w, TD, T2L, TI, T2M, TJ, T6x;
Chris@42 1007 T1 = cr[0];
Chris@42 1008 T6v = ci[0];
Chris@42 1009 {
Chris@42 1010 E Th, Tl, Tq, Ts;
Chris@42 1011 Th = cr[WS(rs, 5)];
Chris@42 1012 Tl = ci[WS(rs, 5)];
Chris@42 1013 Tm = FMA(Tg, Th, Tk * Tl);
Chris@42 1014 T2I = FNMS(Tk, Th, Tg * Tl);
Chris@42 1015 Tq = cr[WS(rs, 20)];
Chris@42 1016 Ts = ci[WS(rs, 20)];
Chris@42 1017 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@42 1018 T2J = FNMS(Tr, Tq, Tp * Ts);
Chris@42 1019 }
Chris@42 1020 Tu = Tm + Tt;
Chris@42 1021 T6w = T2I + T2J;
Chris@42 1022 {
Chris@42 1023 E Ty, TC, TF, TH;
Chris@42 1024 Ty = cr[WS(rs, 10)];
Chris@42 1025 TC = ci[WS(rs, 10)];
Chris@42 1026 TD = FMA(Tx, Ty, TB * TC);
Chris@42 1027 T2L = FNMS(TB, Ty, Tx * TC);
Chris@42 1028 TF = cr[WS(rs, 15)];
Chris@42 1029 TH = ci[WS(rs, 15)];
Chris@42 1030 TI = FMA(TE, TF, TG * TH);
Chris@42 1031 T2M = FNMS(TG, TF, TE * TH);
Chris@42 1032 }
Chris@42 1033 TJ = TD + TI;
Chris@42 1034 T6x = T2L + T2M;
Chris@42 1035 T2F = KP559016994 * (Tu - TJ);
Chris@42 1036 T6A = KP559016994 * (T6w - T6x);
Chris@42 1037 TK = Tu + TJ;
Chris@42 1038 T2G = FNMS(KP250000000, TK, T1);
Chris@42 1039 T6y = T6w + T6x;
Chris@42 1040 T6z = FNMS(KP250000000, T6y, T6v);
Chris@42 1041 {
Chris@42 1042 E T6s, T6t, T2K, T2N;
Chris@42 1043 T6s = TD - TI;
Chris@42 1044 T6t = Tm - Tt;
Chris@42 1045 T6u = FNMS(KP587785252, T6t, KP951056516 * T6s);
Chris@42 1046 T71 = FMA(KP951056516, T6t, KP587785252 * T6s);
Chris@42 1047 T2K = T2I - T2J;
Chris@42 1048 T2N = T2L - T2M;
Chris@42 1049 T2O = FMA(KP951056516, T2K, KP587785252 * T2N);
Chris@42 1050 T52 = FNMS(KP587785252, T2K, KP951056516 * T2N);
Chris@42 1051 }
Chris@42 1052 }
Chris@42 1053 {
Chris@42 1054 E T2g, T48, T3Y, T3Z, T4h, T4g, T43, T46, T49, T2p, T2A, T2B, T2e, T2f;
Chris@42 1055 T2e = cr[WS(rs, 3)];
Chris@42 1056 T2f = ci[WS(rs, 3)];
Chris@42 1057 T2g = FMA(T3, T2e, T6 * T2f);
Chris@42 1058 T48 = FNMS(T6, T2e, T3 * T2f);
Chris@42 1059 {
Chris@42 1060 E T2j, T41, T2z, T45, T2o, T42, T2u, T44;
Chris@42 1061 {
Chris@42 1062 E T2h, T2i, T2w, T2y;
Chris@42 1063 T2h = cr[WS(rs, 8)];
Chris@42 1064 T2i = ci[WS(rs, 8)];
Chris@42 1065 T2j = FMA(T1f, T2h, T1g * T2i);
Chris@42 1066 T41 = FNMS(T1g, T2h, T1f * T2i);
Chris@42 1067 T2w = cr[WS(rs, 18)];
Chris@42 1068 T2y = ci[WS(rs, 18)];
Chris@42 1069 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@42 1070 T45 = FNMS(T2x, T2w, T2v * T2y);
Chris@42 1071 }
Chris@42 1072 {
Chris@42 1073 E T2l, T2n, T2r, T2t;
Chris@42 1074 T2l = cr[WS(rs, 23)];
Chris@42 1075 T2n = ci[WS(rs, 23)];
Chris@42 1076 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@42 1077 T42 = FNMS(T2m, T2l, T2k * T2n);
Chris@42 1078 T2r = cr[WS(rs, 13)];
Chris@42 1079 T2t = ci[WS(rs, 13)];
Chris@42 1080 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@42 1081 T44 = FNMS(T2s, T2r, T2q * T2t);
Chris@42 1082 }
Chris@42 1083 T3Y = T2j - T2o;
Chris@42 1084 T3Z = T2u - T2z;
Chris@42 1085 T4h = T44 - T45;
Chris@42 1086 T4g = T41 - T42;
Chris@42 1087 T43 = T41 + T42;
Chris@42 1088 T46 = T44 + T45;
Chris@42 1089 T49 = T43 + T46;
Chris@42 1090 T2p = T2j + T2o;
Chris@42 1091 T2A = T2u + T2z;
Chris@42 1092 T2B = T2p + T2A;
Chris@42 1093 }
Chris@42 1094 T2C = T2g + T2B;
Chris@42 1095 T6k = T48 + T49;
Chris@42 1096 {
Chris@42 1097 E T40, T5r, T4b, T5q, T47, T4a;
Chris@42 1098 T40 = FMA(KP951056516, T3Y, KP587785252 * T3Z);
Chris@42 1099 T5r = FNMS(KP587785252, T3Y, KP951056516 * T3Z);
Chris@42 1100 T47 = KP559016994 * (T43 - T46);
Chris@42 1101 T4a = FNMS(KP250000000, T49, T48);
Chris@42 1102 T4b = T47 + T4a;
Chris@42 1103 T5q = T4a - T47;
Chris@42 1104 T4c = T40 + T4b;
Chris@42 1105 T5X = T5r + T5q;
Chris@42 1106 T4L = T4b - T40;
Chris@42 1107 T5s = T5q - T5r;
Chris@42 1108 }
Chris@42 1109 {
Chris@42 1110 E T4i, T5u, T4f, T5t, T4d, T4e;
Chris@42 1111 T4i = FMA(KP951056516, T4g, KP587785252 * T4h);
Chris@42 1112 T5u = FNMS(KP587785252, T4g, KP951056516 * T4h);
Chris@42 1113 T4d = KP559016994 * (T2p - T2A);
Chris@42 1114 T4e = FNMS(KP250000000, T2B, T2g);
Chris@42 1115 T4f = T4d + T4e;
Chris@42 1116 T5t = T4e - T4d;
Chris@42 1117 T4j = T4f - T4i;
Chris@42 1118 T5W = T5t - T5u;
Chris@42 1119 T4K = T4f + T4i;
Chris@42 1120 T5v = T5t + T5u;
Chris@42 1121 }
Chris@42 1122 }
Chris@42 1123 {
Chris@42 1124 E TO, T37, T2V, T2Y, T32, T31, T34, T35, T38, T13, T1m, T1n, TM, TN;
Chris@42 1125 TM = cr[WS(rs, 1)];
Chris@42 1126 TN = ci[WS(rs, 1)];
Chris@42 1127 TO = FMA(T2, TM, T5 * TN);
Chris@42 1128 T37 = FNMS(T5, TM, T2 * TN);
Chris@42 1129 {
Chris@42 1130 E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W;
Chris@42 1131 {
Chris@42 1132 E TS, TW, T1i, T1k;
Chris@42 1133 TS = cr[WS(rs, 6)];
Chris@42 1134 TW = ci[WS(rs, 6)];
Chris@42 1135 TX = FMA(TR, TS, TV * TW);
Chris@42 1136 T2T = FNMS(TV, TS, TR * TW);
Chris@42 1137 T1i = cr[WS(rs, 16)];
Chris@42 1138 T1k = ci[WS(rs, 16)];
Chris@42 1139 T1l = FMA(T1h, T1i, T1j * T1k);
Chris@42 1140 T2X = FNMS(T1j, T1i, T1h * T1k);
Chris@42 1141 }
Chris@42 1142 {
Chris@42 1143 E TZ, T11, T19, T1d;
Chris@42 1144 TZ = cr[WS(rs, 21)];
Chris@42 1145 T11 = ci[WS(rs, 21)];
Chris@42 1146 T12 = FMA(TY, TZ, T10 * T11);
Chris@42 1147 T2U = FNMS(T10, TZ, TY * T11);
Chris@42 1148 T19 = cr[WS(rs, 11)];
Chris@42 1149 T1d = ci[WS(rs, 11)];
Chris@42 1150 T1e = FMA(T18, T19, T1c * T1d);
Chris@42 1151 T2W = FNMS(T1c, T19, T18 * T1d);
Chris@42 1152 }
Chris@42 1153 T2V = T2T - T2U;
Chris@42 1154 T2Y = T2W - T2X;
Chris@42 1155 T32 = T1e - T1l;
Chris@42 1156 T31 = TX - T12;
Chris@42 1157 T34 = T2T + T2U;
Chris@42 1158 T35 = T2W + T2X;
Chris@42 1159 T38 = T34 + T35;
Chris@42 1160 T13 = TX + T12;
Chris@42 1161 T1m = T1e + T1l;
Chris@42 1162 T1n = T13 + T1m;
Chris@42 1163 }
Chris@42 1164 T1o = TO + T1n;
Chris@42 1165 T6g = T37 + T38;
Chris@42 1166 {
Chris@42 1167 E T2Z, T55, T2S, T54, T2Q, T2R;
Chris@42 1168 T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y);
Chris@42 1169 T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y);
Chris@42 1170 T2Q = KP559016994 * (T13 - T1m);
Chris@42 1171 T2R = FNMS(KP250000000, T1n, TO);
Chris@42 1172 T2S = T2Q + T2R;
Chris@42 1173 T54 = T2R - T2Q;
Chris@42 1174 T30 = T2S - T2Z;
Chris@42 1175 T5M = T54 - T55;
Chris@42 1176 T4A = T2S + T2Z;
Chris@42 1177 T56 = T54 + T55;
Chris@42 1178 }
Chris@42 1179 {
Chris@42 1180 E T33, T58, T3a, T57, T36, T39;
Chris@42 1181 T33 = FMA(KP951056516, T31, KP587785252 * T32);
Chris@42 1182 T58 = FNMS(KP587785252, T31, KP951056516 * T32);
Chris@42 1183 T36 = KP559016994 * (T34 - T35);
Chris@42 1184 T39 = FNMS(KP250000000, T38, T37);
Chris@42 1185 T3a = T36 + T39;
Chris@42 1186 T57 = T39 - T36;
Chris@42 1187 T3b = T33 + T3a;
Chris@42 1188 T5N = T58 + T57;
Chris@42 1189 T4B = T3a - T33;
Chris@42 1190 T59 = T57 - T58;
Chris@42 1191 }
Chris@42 1192 }
Chris@42 1193 {
Chris@42 1194 E T1r, T3n, T3d, T3e, T3w, T3v, T3i, T3l, T3o, T1y, T1J, T1K, T1p, T1q;
Chris@42 1195 T1p = cr[WS(rs, 4)];
Chris@42 1196 T1q = ci[WS(rs, 4)];
Chris@42 1197 T1r = FMA(T8, T1p, Td * T1q);
Chris@42 1198 T3n = FNMS(Td, T1p, T8 * T1q);
Chris@42 1199 {
Chris@42 1200 E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j;
Chris@42 1201 {
Chris@42 1202 E T1s, T1t, T1F, T1H;
Chris@42 1203 T1s = cr[WS(rs, 9)];
Chris@42 1204 T1t = ci[WS(rs, 9)];
Chris@42 1205 T1u = FMA(T9, T1s, Te * T1t);
Chris@42 1206 T3g = FNMS(Te, T1s, T9 * T1t);
Chris@42 1207 T1F = cr[WS(rs, 19)];
Chris@42 1208 T1H = ci[WS(rs, 19)];
Chris@42 1209 T1I = FMA(T1E, T1F, T1G * T1H);
Chris@42 1210 T3k = FNMS(T1G, T1F, T1E * T1H);
Chris@42 1211 }
Chris@42 1212 {
Chris@42 1213 E T1v, T1w, T1A, T1C;
Chris@42 1214 T1v = cr[WS(rs, 24)];
Chris@42 1215 T1w = ci[WS(rs, 24)];
Chris@42 1216 T1x = FMA(Tn, T1v, To * T1w);
Chris@42 1217 T3h = FNMS(To, T1v, Tn * T1w);
Chris@42 1218 T1A = cr[WS(rs, 14)];
Chris@42 1219 T1C = ci[WS(rs, 14)];
Chris@42 1220 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@42 1221 T3j = FNMS(T1B, T1A, T1z * T1C);
Chris@42 1222 }
Chris@42 1223 T3d = T1x - T1u;
Chris@42 1224 T3e = T1D - T1I;
Chris@42 1225 T3w = T3j - T3k;
Chris@42 1226 T3v = T3g - T3h;
Chris@42 1227 T3i = T3g + T3h;
Chris@42 1228 T3l = T3j + T3k;
Chris@42 1229 T3o = T3i + T3l;
Chris@42 1230 T1y = T1u + T1x;
Chris@42 1231 T1J = T1D + T1I;
Chris@42 1232 T1K = T1y + T1J;
Chris@42 1233 }
Chris@42 1234 T1L = T1r + T1K;
Chris@42 1235 T6h = T3n + T3o;
Chris@42 1236 {
Chris@42 1237 E T3f, T5c, T3q, T5b, T3m, T3p;
Chris@42 1238 T3f = FNMS(KP587785252, T3e, KP951056516 * T3d);
Chris@42 1239 T5c = FMA(KP587785252, T3d, KP951056516 * T3e);
Chris@42 1240 T3m = KP559016994 * (T3i - T3l);
Chris@42 1241 T3p = FNMS(KP250000000, T3o, T3n);
Chris@42 1242 T3q = T3m + T3p;
Chris@42 1243 T5b = T3p - T3m;
Chris@42 1244 T3r = T3f - T3q;
Chris@42 1245 T5P = T5c + T5b;
Chris@42 1246 T4E = T3f + T3q;
Chris@42 1247 T5d = T5b - T5c;
Chris@42 1248 }
Chris@42 1249 {
Chris@42 1250 E T3x, T5f, T3u, T5e, T3s, T3t;
Chris@42 1251 T3x = FMA(KP951056516, T3v, KP587785252 * T3w);
Chris@42 1252 T5f = FNMS(KP587785252, T3v, KP951056516 * T3w);
Chris@42 1253 T3s = KP559016994 * (T1y - T1J);
Chris@42 1254 T3t = FNMS(KP250000000, T1K, T1r);
Chris@42 1255 T3u = T3s + T3t;
Chris@42 1256 T5e = T3t - T3s;
Chris@42 1257 T3y = T3u - T3x;
Chris@42 1258 T5Q = T5e - T5f;
Chris@42 1259 T4D = T3u + T3x;
Chris@42 1260 T5g = T5e + T5f;
Chris@42 1261 }
Chris@42 1262 }
Chris@42 1263 {
Chris@42 1264 E T1P, T3L, T3B, T3C, T3U, T3T, T3G, T3J, T3M, T20, T2b, T2c, T1N, T1O;
Chris@42 1265 T1N = cr[WS(rs, 2)];
Chris@42 1266 T1O = ci[WS(rs, 2)];
Chris@42 1267 T1P = FMA(T14, T1N, T16 * T1O);
Chris@42 1268 T3L = FNMS(T16, T1N, T14 * T1O);
Chris@42 1269 {
Chris@42 1270 E T1U, T3E, T2a, T3I, T1Z, T3F, T25, T3H;
Chris@42 1271 {
Chris@42 1272 E T1R, T1T, T27, T29;
Chris@42 1273 T1R = cr[WS(rs, 7)];
Chris@42 1274 T1T = ci[WS(rs, 7)];
Chris@42 1275 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@42 1276 T3E = FNMS(T1S, T1R, T1Q * T1T);
Chris@42 1277 T27 = cr[WS(rs, 17)];
Chris@42 1278 T29 = ci[WS(rs, 17)];
Chris@42 1279 T2a = FMA(T26, T27, T28 * T29);
Chris@42 1280 T3I = FNMS(T28, T27, T26 * T29);
Chris@42 1281 }
Chris@42 1282 {
Chris@42 1283 E T1W, T1Y, T22, T24;
Chris@42 1284 T1W = cr[WS(rs, 22)];
Chris@42 1285 T1Y = ci[WS(rs, 22)];
Chris@42 1286 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@42 1287 T3F = FNMS(T1X, T1W, T1V * T1Y);
Chris@42 1288 T22 = cr[WS(rs, 12)];
Chris@42 1289 T24 = ci[WS(rs, 12)];
Chris@42 1290 T25 = FMA(T21, T22, T23 * T24);
Chris@42 1291 T3H = FNMS(T23, T22, T21 * T24);
Chris@42 1292 }
Chris@42 1293 T3B = T1U - T1Z;
Chris@42 1294 T3C = T25 - T2a;
Chris@42 1295 T3U = T3H - T3I;
Chris@42 1296 T3T = T3E - T3F;
Chris@42 1297 T3G = T3E + T3F;
Chris@42 1298 T3J = T3H + T3I;
Chris@42 1299 T3M = T3G + T3J;
Chris@42 1300 T20 = T1U + T1Z;
Chris@42 1301 T2b = T25 + T2a;
Chris@42 1302 T2c = T20 + T2b;
Chris@42 1303 }
Chris@42 1304 T2d = T1P + T2c;
Chris@42 1305 T6j = T3L + T3M;
Chris@42 1306 {
Chris@42 1307 E T3D, T5n, T3O, T5m, T3K, T3N;
Chris@42 1308 T3D = FMA(KP951056516, T3B, KP587785252 * T3C);
Chris@42 1309 T5n = FNMS(KP587785252, T3B, KP951056516 * T3C);
Chris@42 1310 T3K = KP559016994 * (T3G - T3J);
Chris@42 1311 T3N = FNMS(KP250000000, T3M, T3L);
Chris@42 1312 T3O = T3K + T3N;
Chris@42 1313 T5m = T3N - T3K;
Chris@42 1314 T3P = T3D + T3O;
Chris@42 1315 T5U = T5n + T5m;
Chris@42 1316 T4I = T3O - T3D;
Chris@42 1317 T5o = T5m - T5n;
Chris@42 1318 }
Chris@42 1319 {
Chris@42 1320 E T3V, T5k, T3S, T5j, T3Q, T3R;
Chris@42 1321 T3V = FMA(KP951056516, T3T, KP587785252 * T3U);
Chris@42 1322 T5k = FNMS(KP587785252, T3T, KP951056516 * T3U);
Chris@42 1323 T3Q = KP559016994 * (T20 - T2b);
Chris@42 1324 T3R = FNMS(KP250000000, T2c, T1P);
Chris@42 1325 T3S = T3Q + T3R;
Chris@42 1326 T5j = T3R - T3Q;
Chris@42 1327 T3W = T3S - T3V;
Chris@42 1328 T5T = T5j - T5k;
Chris@42 1329 T4H = T3S + T3V;
Chris@42 1330 T5l = T5j + T5k;
Chris@42 1331 }
Chris@42 1332 }
Chris@42 1333 {
Chris@42 1334 E T6m, T6o, TL, T2E, T6d, T6e, T6n, T6f;
Chris@42 1335 {
Chris@42 1336 E T6i, T6l, T1M, T2D;
Chris@42 1337 T6i = T6g - T6h;
Chris@42 1338 T6l = T6j - T6k;
Chris@42 1339 T6m = FMA(KP951056516, T6i, KP587785252 * T6l);
Chris@42 1340 T6o = FNMS(KP587785252, T6i, KP951056516 * T6l);
Chris@42 1341 TL = T1 + TK;
Chris@42 1342 T1M = T1o + T1L;
Chris@42 1343 T2D = T2d + T2C;
Chris@42 1344 T2E = T1M + T2D;
Chris@42 1345 T6d = KP559016994 * (T1M - T2D);
Chris@42 1346 T6e = FNMS(KP250000000, T2E, TL);
Chris@42 1347 }
Chris@42 1348 cr[0] = TL + T2E;
Chris@42 1349 T6n = T6e - T6d;
Chris@42 1350 cr[WS(rs, 10)] = T6n - T6o;
Chris@42 1351 ci[WS(rs, 9)] = T6n + T6o;
Chris@42 1352 T6f = T6d + T6e;
Chris@42 1353 ci[WS(rs, 4)] = T6f - T6m;
Chris@42 1354 cr[WS(rs, 5)] = T6f + T6m;
Chris@42 1355 }
Chris@42 1356 {
Chris@42 1357 E T2P, T4z, T72, T7e, T4m, T7j, T4n, T7i, T4U, T77, T4X, T75, T4O, T6Y, T4P;
Chris@42 1358 E T6X, T4s, T7f, T4v, T7d, T2H, T70;
Chris@42 1359 T2H = T2F + T2G;
Chris@42 1360 T2P = T2H - T2O;
Chris@42 1361 T4z = T2H + T2O;
Chris@42 1362 T70 = T6A + T6z;
Chris@42 1363 T72 = T70 - T71;
Chris@42 1364 T7e = T71 + T70;
Chris@42 1365 {
Chris@42 1366 E T3c, T3z, T3A, T3X, T4k, T4l;
Chris@42 1367 T3c = FMA(KP535826794, T30, KP844327925 * T3b);
Chris@42 1368 T3z = FNMS(KP637423989, T3y, KP770513242 * T3r);
Chris@42 1369 T3A = T3c + T3z;
Chris@42 1370 T3X = FNMS(KP425779291, T3W, KP904827052 * T3P);
Chris@42 1371 T4k = FNMS(KP992114701, T4j, KP125333233 * T4c);
Chris@42 1372 T4l = T3X + T4k;
Chris@42 1373 T4m = T3A + T4l;
Chris@42 1374 T7j = T3X - T4k;
Chris@42 1375 T4n = KP559016994 * (T3A - T4l);
Chris@42 1376 T7i = T3z - T3c;
Chris@42 1377 }
Chris@42 1378 {
Chris@42 1379 E T4S, T4T, T73, T4V, T4W, T74;
Chris@42 1380 T4S = FNMS(KP248689887, T4A, KP968583161 * T4B);
Chris@42 1381 T4T = FNMS(KP844327925, T4D, KP535826794 * T4E);
Chris@42 1382 T73 = T4S + T4T;
Chris@42 1383 T4V = FNMS(KP481753674, T4H, KP876306680 * T4I);
Chris@42 1384 T4W = FNMS(KP684547105, T4K, KP728968627 * T4L);
Chris@42 1385 T74 = T4V + T4W;
Chris@42 1386 T4U = T4S - T4T;
Chris@42 1387 T77 = KP559016994 * (T73 - T74);
Chris@42 1388 T4X = T4V - T4W;
Chris@42 1389 T75 = T73 + T74;
Chris@42 1390 }
Chris@42 1391 {
Chris@42 1392 E T4C, T4F, T4G, T4J, T4M, T4N;
Chris@42 1393 T4C = FMA(KP968583161, T4A, KP248689887 * T4B);
Chris@42 1394 T4F = FMA(KP535826794, T4D, KP844327925 * T4E);
Chris@42 1395 T4G = T4C + T4F;
Chris@42 1396 T4J = FMA(KP876306680, T4H, KP481753674 * T4I);
Chris@42 1397 T4M = FMA(KP728968627, T4K, KP684547105 * T4L);
Chris@42 1398 T4N = T4J + T4M;
Chris@42 1399 T4O = T4G + T4N;
Chris@42 1400 T6Y = T4J - T4M;
Chris@42 1401 T4P = KP559016994 * (T4G - T4N);
Chris@42 1402 T6X = T4F - T4C;
Chris@42 1403 }
Chris@42 1404 {
Chris@42 1405 E T4q, T4r, T7b, T4t, T4u, T7c;
Chris@42 1406 T4q = FNMS(KP844327925, T30, KP535826794 * T3b);
Chris@42 1407 T4r = FMA(KP770513242, T3y, KP637423989 * T3r);
Chris@42 1408 T7b = T4q + T4r;
Chris@42 1409 T4t = FMA(KP125333233, T4j, KP992114701 * T4c);
Chris@42 1410 T4u = FMA(KP904827052, T3W, KP425779291 * T3P);
Chris@42 1411 T7c = T4u + T4t;
Chris@42 1412 T4s = T4q - T4r;
Chris@42 1413 T7f = T7b - T7c;
Chris@42 1414 T4v = T4t - T4u;
Chris@42 1415 T7d = KP559016994 * (T7b + T7c);
Chris@42 1416 }
Chris@42 1417 cr[WS(rs, 4)] = T2P + T4m;
Chris@42 1418 ci[WS(rs, 23)] = T75 + T72;
Chris@42 1419 ci[WS(rs, 20)] = T7f + T7e;
Chris@42 1420 cr[WS(rs, 1)] = T4z + T4O;
Chris@42 1421 {
Chris@42 1422 E T4w, T4y, T4p, T4x, T4o;
Chris@42 1423 T4w = FMA(KP951056516, T4s, KP587785252 * T4v);
Chris@42 1424 T4y = FNMS(KP587785252, T4s, KP951056516 * T4v);
Chris@42 1425 T4o = FNMS(KP250000000, T4m, T2P);
Chris@42 1426 T4p = T4n + T4o;
Chris@42 1427 T4x = T4o - T4n;
Chris@42 1428 ci[0] = T4p - T4w;
Chris@42 1429 ci[WS(rs, 5)] = T4x + T4y;
Chris@42 1430 cr[WS(rs, 9)] = T4p + T4w;
Chris@42 1431 ci[WS(rs, 10)] = T4x - T4y;
Chris@42 1432 }
Chris@42 1433 {
Chris@42 1434 E T6Z, T79, T78, T7a, T76;
Chris@42 1435 T6Z = FMA(KP587785252, T6X, KP951056516 * T6Y);
Chris@42 1436 T79 = FNMS(KP587785252, T6Y, KP951056516 * T6X);
Chris@42 1437 T76 = FNMS(KP250000000, T75, T72);
Chris@42 1438 T78 = T76 - T77;
Chris@42 1439 T7a = T77 + T76;
Chris@42 1440 cr[WS(rs, 16)] = T6Z - T78;
Chris@42 1441 ci[WS(rs, 18)] = T79 + T7a;
Chris@42 1442 ci[WS(rs, 13)] = T6Z + T78;
Chris@42 1443 cr[WS(rs, 21)] = T79 - T7a;
Chris@42 1444 }
Chris@42 1445 {
Chris@42 1446 E T7k, T7l, T7h, T7m, T7g;
Chris@42 1447 T7k = FMA(KP587785252, T7i, KP951056516 * T7j);
Chris@42 1448 T7l = FNMS(KP587785252, T7j, KP951056516 * T7i);
Chris@42 1449 T7g = FNMS(KP250000000, T7f, T7e);
Chris@42 1450 T7h = T7d - T7g;
Chris@42 1451 T7m = T7d + T7g;
Chris@42 1452 cr[WS(rs, 14)] = T7h - T7k;
Chris@42 1453 ci[WS(rs, 15)] = T7l + T7m;
Chris@42 1454 cr[WS(rs, 19)] = T7k + T7h;
Chris@42 1455 cr[WS(rs, 24)] = T7l - T7m;
Chris@42 1456 }
Chris@42 1457 {
Chris@42 1458 E T4Y, T50, T4R, T4Z, T4Q;
Chris@42 1459 T4Y = FMA(KP951056516, T4U, KP587785252 * T4X);
Chris@42 1460 T50 = FNMS(KP587785252, T4U, KP951056516 * T4X);
Chris@42 1461 T4Q = FNMS(KP250000000, T4O, T4z);
Chris@42 1462 T4R = T4P + T4Q;
Chris@42 1463 T4Z = T4Q - T4P;
Chris@42 1464 ci[WS(rs, 3)] = T4R - T4Y;
Chris@42 1465 ci[WS(rs, 8)] = T4Z + T50;
Chris@42 1466 cr[WS(rs, 6)] = T4R + T4Y;
Chris@42 1467 cr[WS(rs, 11)] = T4Z - T50;
Chris@42 1468 }
Chris@42 1469 }
Chris@42 1470 {
Chris@42 1471 E T7p, T7x, T7q, T7t, T7u, T7v, T7y, T7w;
Chris@42 1472 {
Chris@42 1473 E T7n, T7o, T7r, T7s;
Chris@42 1474 T7n = T1L - T1o;
Chris@42 1475 T7o = T2d - T2C;
Chris@42 1476 T7p = FMA(KP587785252, T7n, KP951056516 * T7o);
Chris@42 1477 T7x = FNMS(KP587785252, T7o, KP951056516 * T7n);
Chris@42 1478 T7q = T6y + T6v;
Chris@42 1479 T7r = T6g + T6h;
Chris@42 1480 T7s = T6j + T6k;
Chris@42 1481 T7t = T7r + T7s;
Chris@42 1482 T7u = FNMS(KP250000000, T7t, T7q);
Chris@42 1483 T7v = KP559016994 * (T7r - T7s);
Chris@42 1484 }
Chris@42 1485 ci[WS(rs, 24)] = T7t + T7q;
Chris@42 1486 T7y = T7v + T7u;
Chris@42 1487 cr[WS(rs, 20)] = T7x - T7y;
Chris@42 1488 ci[WS(rs, 19)] = T7x + T7y;
Chris@42 1489 T7w = T7u - T7v;
Chris@42 1490 cr[WS(rs, 15)] = T7p - T7w;
Chris@42 1491 ci[WS(rs, 14)] = T7p + T7w;
Chris@42 1492 }
Chris@42 1493 {
Chris@42 1494 E T53, T5L, T6C, T6O, T5y, T6T, T5z, T6S, T66, T6H, T69, T6F, T60, T6q, T61;
Chris@42 1495 E T6p, T5E, T6P, T5H, T6N, T51, T6B;
Chris@42 1496 T51 = T2G - T2F;
Chris@42 1497 T53 = T51 + T52;
Chris@42 1498 T5L = T51 - T52;
Chris@42 1499 T6B = T6z - T6A;
Chris@42 1500 T6C = T6u + T6B;
Chris@42 1501 T6O = T6B - T6u;
Chris@42 1502 {
Chris@42 1503 E T5a, T5h, T5i, T5p, T5w, T5x;
Chris@42 1504 T5a = FMA(KP728968627, T56, KP684547105 * T59);
Chris@42 1505 T5h = FNMS(KP992114701, T5g, KP125333233 * T5d);
Chris@42 1506 T5i = T5a + T5h;
Chris@42 1507 T5p = FMA(KP062790519, T5l, KP998026728 * T5o);
Chris@42 1508 T5w = FNMS(KP637423989, T5v, KP770513242 * T5s);
Chris@42 1509 T5x = T5p + T5w;
Chris@42 1510 T5y = T5i + T5x;
Chris@42 1511 T6T = T5p - T5w;
Chris@42 1512 T5z = KP559016994 * (T5i - T5x);
Chris@42 1513 T6S = T5h - T5a;
Chris@42 1514 }
Chris@42 1515 {
Chris@42 1516 E T64, T65, T6D, T67, T68, T6E;
Chris@42 1517 T64 = FNMS(KP481753674, T5M, KP876306680 * T5N);
Chris@42 1518 T65 = FMA(KP904827052, T5Q, KP425779291 * T5P);
Chris@42 1519 T6D = T64 - T65;
Chris@42 1520 T67 = FNMS(KP844327925, T5T, KP535826794 * T5U);
Chris@42 1521 T68 = FNMS(KP998026728, T5W, KP062790519 * T5X);
Chris@42 1522 T6E = T67 + T68;
Chris@42 1523 T66 = T64 + T65;
Chris@42 1524 T6H = KP559016994 * (T6D - T6E);
Chris@42 1525 T69 = T67 - T68;
Chris@42 1526 T6F = T6D + T6E;
Chris@42 1527 }
Chris@42 1528 {
Chris@42 1529 E T5O, T5R, T5S, T5V, T5Y, T5Z;
Chris@42 1530 T5O = FMA(KP876306680, T5M, KP481753674 * T5N);
Chris@42 1531 T5R = FNMS(KP425779291, T5Q, KP904827052 * T5P);
Chris@42 1532 T5S = T5O + T5R;
Chris@42 1533 T5V = FMA(KP535826794, T5T, KP844327925 * T5U);
Chris@42 1534 T5Y = FMA(KP062790519, T5W, KP998026728 * T5X);
Chris@42 1535 T5Z = T5V + T5Y;
Chris@42 1536 T60 = T5S + T5Z;
Chris@42 1537 T6q = T5V - T5Y;
Chris@42 1538 T61 = KP559016994 * (T5S - T5Z);
Chris@42 1539 T6p = T5R - T5O;
Chris@42 1540 }
Chris@42 1541 {
Chris@42 1542 E T5C, T5D, T6L, T5F, T5G, T6M;
Chris@42 1543 T5C = FNMS(KP684547105, T56, KP728968627 * T59);
Chris@42 1544 T5D = FMA(KP125333233, T5g, KP992114701 * T5d);
Chris@42 1545 T6L = T5C - T5D;
Chris@42 1546 T5F = FNMS(KP998026728, T5l, KP062790519 * T5o);
Chris@42 1547 T5G = FMA(KP770513242, T5v, KP637423989 * T5s);
Chris@42 1548 T6M = T5F - T5G;
Chris@42 1549 T5E = T5C + T5D;
Chris@42 1550 T6P = T6L + T6M;
Chris@42 1551 T5H = T5F + T5G;
Chris@42 1552 T6N = KP559016994 * (T6L - T6M);
Chris@42 1553 }
Chris@42 1554 cr[WS(rs, 3)] = T53 + T5y;
Chris@42 1555 ci[WS(rs, 22)] = T6F + T6C;
Chris@42 1556 ci[WS(rs, 21)] = T6P + T6O;
Chris@42 1557 cr[WS(rs, 2)] = T5L + T60;
Chris@42 1558 {
Chris@42 1559 E T6r, T6J, T6I, T6K, T6G;
Chris@42 1560 T6r = FMA(KP587785252, T6p, KP951056516 * T6q);
Chris@42 1561 T6J = FNMS(KP587785252, T6q, KP951056516 * T6p);
Chris@42 1562 T6G = FNMS(KP250000000, T6F, T6C);
Chris@42 1563 T6I = T6G - T6H;
Chris@42 1564 T6K = T6H + T6G;
Chris@42 1565 cr[WS(rs, 17)] = T6r - T6I;
Chris@42 1566 ci[WS(rs, 17)] = T6J + T6K;
Chris@42 1567 ci[WS(rs, 12)] = T6r + T6I;
Chris@42 1568 cr[WS(rs, 22)] = T6J - T6K;
Chris@42 1569 }
Chris@42 1570 {
Chris@42 1571 E T6a, T6c, T63, T6b, T62;
Chris@42 1572 T6a = FMA(KP951056516, T66, KP587785252 * T69);
Chris@42 1573 T6c = FNMS(KP587785252, T66, KP951056516 * T69);
Chris@42 1574 T62 = FNMS(KP250000000, T60, T5L);
Chris@42 1575 T63 = T61 + T62;
Chris@42 1576 T6b = T62 - T61;
Chris@42 1577 ci[WS(rs, 2)] = T63 - T6a;
Chris@42 1578 ci[WS(rs, 7)] = T6b + T6c;
Chris@42 1579 cr[WS(rs, 7)] = T63 + T6a;
Chris@42 1580 cr[WS(rs, 12)] = T6b - T6c;
Chris@42 1581 }
Chris@42 1582 {
Chris@42 1583 E T5I, T5K, T5B, T5J, T5A;
Chris@42 1584 T5I = FMA(KP951056516, T5E, KP587785252 * T5H);
Chris@42 1585 T5K = FNMS(KP587785252, T5E, KP951056516 * T5H);
Chris@42 1586 T5A = FNMS(KP250000000, T5y, T53);
Chris@42 1587 T5B = T5z + T5A;
Chris@42 1588 T5J = T5A - T5z;
Chris@42 1589 ci[WS(rs, 1)] = T5B - T5I;
Chris@42 1590 ci[WS(rs, 6)] = T5J + T5K;
Chris@42 1591 cr[WS(rs, 8)] = T5B + T5I;
Chris@42 1592 ci[WS(rs, 11)] = T5J - T5K;
Chris@42 1593 }
Chris@42 1594 {
Chris@42 1595 E T6U, T6V, T6R, T6W, T6Q;
Chris@42 1596 T6U = FMA(KP587785252, T6S, KP951056516 * T6T);
Chris@42 1597 T6V = FNMS(KP587785252, T6T, KP951056516 * T6S);
Chris@42 1598 T6Q = FNMS(KP250000000, T6P, T6O);
Chris@42 1599 T6R = T6N - T6Q;
Chris@42 1600 T6W = T6N + T6Q;
Chris@42 1601 cr[WS(rs, 13)] = T6R - T6U;
Chris@42 1602 ci[WS(rs, 16)] = T6V + T6W;
Chris@42 1603 cr[WS(rs, 18)] = T6U + T6R;
Chris@42 1604 cr[WS(rs, 23)] = T6V - T6W;
Chris@42 1605 }
Chris@42 1606 }
Chris@42 1607 }
Chris@42 1608 }
Chris@42 1609 }
Chris@42 1610 }
Chris@42 1611
Chris@42 1612 static const tw_instr twinstr[] = {
Chris@42 1613 {TW_CEXP, 1, 1},
Chris@42 1614 {TW_CEXP, 1, 3},
Chris@42 1615 {TW_CEXP, 1, 9},
Chris@42 1616 {TW_CEXP, 1, 24},
Chris@42 1617 {TW_NEXT, 1, 0}
Chris@42 1618 };
Chris@42 1619
Chris@42 1620 static const hc2hc_desc desc = { 25, "hf2_25", twinstr, &GENUS, {280, 180, 160, 0} };
Chris@42 1621
Chris@42 1622 void X(codelet_hf2_25) (planner *p) {
Chris@42 1623 X(khc2hc_register) (p, hf2_25, &desc);
Chris@42 1624 }
Chris@42 1625 #endif /* HAVE_FMA */