annotate src/fftw-3.3.5/rdft/scalar/r2cb/hb2_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:50:26 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 25 -dif -name hb2_25 -include hb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 440 FP additions, 434 FP multiplications,
Chris@42 32 * (or, 84 additions, 78 multiplications, 356 fused multiply/add),
Chris@42 33 * 234 stack variables, 47 constants, and 100 memory accesses
Chris@42 34 */
Chris@42 35 #include "hb.h"
Chris@42 36
Chris@42 37 static void hb2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@42 40 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@42 41 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@42 42 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 43 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@42 44 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 45 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@42 46 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@42 47 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 48 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 49 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@42 50 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 51 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@42 52 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@42 53 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@42 54 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@42 55 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@42 56 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@42 57 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@42 58 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@42 59 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@42 60 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@42 61 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@42 62 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@42 63 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@42 64 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@42 65 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 66 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@42 67 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@42 68 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@42 69 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@42 70 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@42 71 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@42 72 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@42 73 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@42 74 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@42 75 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@42 76 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@42 77 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@42 78 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@42 79 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@42 80 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@42 81 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@42 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 84 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 85 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 86 {
Chris@42 87 INT m;
Chris@42 88 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@42 89 E TN, TQ, T4e, T2y, T4i, T3U, T4u, T4o, T4G, T4C, T2F, T41, T3Q, T4q, T3a;
Chris@42 90 E T3F, T4a, T4w, T46, T44;
Chris@42 91 {
Chris@42 92 E TT, TO, TR, T23, T2d, T2x, TP, TV, T2p, T85, T4d, T25, TX;
Chris@42 93 TN = W[0];
Chris@42 94 TT = W[4];
Chris@42 95 TO = W[2];
Chris@42 96 TR = W[3];
Chris@42 97 T23 = W[6];
Chris@42 98 T2d = TN * TT;
Chris@42 99 T2x = TO * TT;
Chris@42 100 TP = TN * TO;
Chris@42 101 TV = TN * TR;
Chris@42 102 T2p = TT * T23;
Chris@42 103 T85 = TN * T23;
Chris@42 104 T4d = TO * T23;
Chris@42 105 T25 = W[7];
Chris@42 106 TQ = W[1];
Chris@42 107 TX = W[5];
Chris@42 108 {
Chris@42 109 E T86, T4n, TW, T4l, TS, T71, T2q, T4z, T2e, T8a, T2u, T76, T2k, T4B, T6E;
Chris@42 110 E T6U, T6Y, T5T, T8i, T1I, T2a, T26, TY, T8d, T8s, T8o, T5C, T5w, T7g, T7c;
Chris@42 111 E T5M, T5I, T9, T40, T1R, T3X, T6H, T7F, T5W, T7n, T4N, T68, T1S, T1k, T1T;
Chris@42 112 E T1D, T1Y, T1Z, T10, TM, T7K, T7A, T6p, T6w, T4X, T56, T3K, T2U, T7x, T7J;
Chris@42 113 E T6v, T6i, T50, T57, T3L, T39, T4Q, T59, T3O, T3E, T67, T7t, T7H, T6y, T63;
Chris@42 114 E T4T, T5a, T3N, T3p, T66, T7o;
Chris@42 115 {
Chris@42 116 E T2A, T2z, T6G, T2E, T5V, T6F;
Chris@42 117 {
Chris@42 118 E T1, T1J, T3Y, T3Z, T8, T2C, T1M, T1P, T2D, T4h, T89, T2t, T3W, T1Q, T3V;
Chris@42 119 T1 = cr[0];
Chris@42 120 T4e = FMA(TR, T25, T4d);
Chris@42 121 T4h = TO * T25;
Chris@42 122 T89 = TN * T25;
Chris@42 123 T2t = TT * T25;
Chris@42 124 T86 = FMA(TQ, T25, T85);
Chris@42 125 T4n = FNMS(TQ, TO, TV);
Chris@42 126 TW = FMA(TQ, TO, TV);
Chris@42 127 T4l = FMA(TQ, TR, TP);
Chris@42 128 TS = FNMS(TQ, TR, TP);
Chris@42 129 T71 = FNMS(TR, TX, T2x);
Chris@42 130 T2y = FMA(TR, TX, T2x);
Chris@42 131 T2q = FMA(TX, T25, T2p);
Chris@42 132 T4z = FMA(TQ, TX, T2d);
Chris@42 133 T2e = FNMS(TQ, TX, T2d);
Chris@42 134 {
Chris@42 135 E T3T, T2j, T4t, T6T;
Chris@42 136 T3T = TO * TX;
Chris@42 137 T2j = TN * TX;
Chris@42 138 T4i = FNMS(TR, T23, T4h);
Chris@42 139 T8a = FNMS(TQ, T23, T89);
Chris@42 140 T2u = FNMS(TX, T23, T2t);
Chris@42 141 T4t = T4l * TX;
Chris@42 142 T6T = T4l * T23;
Chris@42 143 {
Chris@42 144 E T6X, T4m, T1H, T29;
Chris@42 145 T6X = T4l * T25;
Chris@42 146 T4m = T4l * TT;
Chris@42 147 T1H = TS * TX;
Chris@42 148 T29 = TS * T25;
Chris@42 149 {
Chris@42 150 E T24, TU, T4F, T4A;
Chris@42 151 T24 = TS * T23;
Chris@42 152 TU = TS * TT;
Chris@42 153 T4F = T4z * T25;
Chris@42 154 T4A = T4z * T23;
Chris@42 155 {
Chris@42 156 E T8r, T8n, T5B, T5v;
Chris@42 157 T8r = T2y * T25;
Chris@42 158 T8n = T2y * T23;
Chris@42 159 T5B = T2e * T25;
Chris@42 160 T5v = T2e * T23;
Chris@42 161 T3U = FNMS(TR, TT, T3T);
Chris@42 162 T76 = FMA(TR, TT, T3T);
Chris@42 163 T2k = FMA(TQ, TT, T2j);
Chris@42 164 T4B = FNMS(TQ, TT, T2j);
Chris@42 165 T4u = FMA(T4n, TT, T4t);
Chris@42 166 T6E = FNMS(T4n, TT, T4t);
Chris@42 167 T6U = FMA(T4n, T25, T6T);
Chris@42 168 T6Y = FNMS(T4n, T23, T6X);
Chris@42 169 T5T = FMA(T4n, TX, T4m);
Chris@42 170 T4o = FNMS(T4n, TX, T4m);
Chris@42 171 T8i = FMA(TW, TT, T1H);
Chris@42 172 T1I = FNMS(TW, TT, T1H);
Chris@42 173 T2a = FNMS(TW, T23, T29);
Chris@42 174 T26 = FMA(TW, T25, T24);
Chris@42 175 TY = FMA(TW, TX, TU);
Chris@42 176 T8d = FNMS(TW, TX, TU);
Chris@42 177 T8s = FNMS(T3U, T23, T8r);
Chris@42 178 T8o = FMA(T3U, T25, T8n);
Chris@42 179 T5C = FNMS(T2k, T23, T5B);
Chris@42 180 T5w = FMA(T2k, T25, T5v);
Chris@42 181 T4G = FNMS(T4B, T23, T4F);
Chris@42 182 T4C = FMA(T4B, T25, T4A);
Chris@42 183 {
Chris@42 184 E T7f, T7b, T5L, T5H;
Chris@42 185 T7f = T5T * T25;
Chris@42 186 T7b = T5T * T23;
Chris@42 187 T5L = TY * T25;
Chris@42 188 T5H = TY * T23;
Chris@42 189 T7g = FNMS(T6E, T23, T7f);
Chris@42 190 T7c = FMA(T6E, T25, T7b);
Chris@42 191 T5M = FNMS(T1I, T23, T5L);
Chris@42 192 T5I = FMA(T1I, T25, T5H);
Chris@42 193 T1J = ci[WS(rs, 24)];
Chris@42 194 }
Chris@42 195 }
Chris@42 196 }
Chris@42 197 }
Chris@42 198 }
Chris@42 199 {
Chris@42 200 E T2, T3, T5, T6;
Chris@42 201 T2 = cr[WS(rs, 5)];
Chris@42 202 T3 = ci[WS(rs, 4)];
Chris@42 203 T5 = cr[WS(rs, 10)];
Chris@42 204 T6 = ci[WS(rs, 9)];
Chris@42 205 {
Chris@42 206 E T1K, T4, T7, T1L, T1N, T1O;
Chris@42 207 T1K = ci[WS(rs, 19)];
Chris@42 208 T3Y = T2 - T3;
Chris@42 209 T4 = T2 + T3;
Chris@42 210 T3Z = T5 - T6;
Chris@42 211 T7 = T5 + T6;
Chris@42 212 T1L = cr[WS(rs, 20)];
Chris@42 213 T1N = ci[WS(rs, 14)];
Chris@42 214 T1O = cr[WS(rs, 15)];
Chris@42 215 T8 = T4 + T7;
Chris@42 216 T2A = T4 - T7;
Chris@42 217 T2C = T1K + T1L;
Chris@42 218 T1M = T1K - T1L;
Chris@42 219 T1P = T1N - T1O;
Chris@42 220 T2D = T1N + T1O;
Chris@42 221 }
Chris@42 222 }
Chris@42 223 T2z = FNMS(KP250000000, T8, T1);
Chris@42 224 T9 = T1 + T8;
Chris@42 225 T3W = T1M - T1P;
Chris@42 226 T1Q = T1M + T1P;
Chris@42 227 T40 = FMA(KP618033988, T3Z, T3Y);
Chris@42 228 T6G = FNMS(KP618033988, T3Y, T3Z);
Chris@42 229 T2E = FMA(KP618033988, T2D, T2C);
Chris@42 230 T5V = FNMS(KP618033988, T2C, T2D);
Chris@42 231 T1R = T1J + T1Q;
Chris@42 232 T3V = FNMS(KP250000000, T1Q, T1J);
Chris@42 233 T6F = FNMS(KP559016994, T3W, T3V);
Chris@42 234 T3X = FMA(KP559016994, T3W, T3V);
Chris@42 235 }
Chris@42 236 {
Chris@42 237 E T2S, T6n, T2H, T2G, Ti, T5Y, T3C, T3r, TK, T3q, T30, T6d, T33, Tr, T32;
Chris@42 238 E T3v, T61, T3y, T1C, T3x, T2L, T6k, T2O, T1a, T2N, T6g, T37, T2W, Tt, T1j;
Chris@42 239 E T2V, Tx, T3g, T3j, Tw, T3l, T1t, T3i, Ty;
Chris@42 240 {
Chris@42 241 E T1u, T1v, T1A, T3u, T1w;
Chris@42 242 {
Chris@42 243 E TC, TI, T3B, TD, TE;
Chris@42 244 {
Chris@42 245 E Ta, Te, Tf, Tb, Tc, T5U, T2B, T2R, Tg;
Chris@42 246 Ta = cr[WS(rs, 1)];
Chris@42 247 T5U = FNMS(KP559016994, T2A, T2z);
Chris@42 248 T2B = FMA(KP559016994, T2A, T2z);
Chris@42 249 T6H = FNMS(KP951056516, T6G, T6F);
Chris@42 250 T7F = FMA(KP951056516, T6G, T6F);
Chris@42 251 Te = cr[WS(rs, 11)];
Chris@42 252 T5W = FMA(KP951056516, T5V, T5U);
Chris@42 253 T7n = FNMS(KP951056516, T5V, T5U);
Chris@42 254 T4N = FMA(KP951056516, T2E, T2B);
Chris@42 255 T2F = FNMS(KP951056516, T2E, T2B);
Chris@42 256 Tf = ci[WS(rs, 8)];
Chris@42 257 Tb = cr[WS(rs, 6)];
Chris@42 258 Tc = ci[WS(rs, 3)];
Chris@42 259 TC = cr[WS(rs, 3)];
Chris@42 260 T2R = Tf - Te;
Chris@42 261 Tg = Te + Tf;
Chris@42 262 {
Chris@42 263 E T2Q, Td, Th, TG, TH;
Chris@42 264 T2Q = Tb - Tc;
Chris@42 265 Td = Tb + Tc;
Chris@42 266 TG = ci[WS(rs, 11)];
Chris@42 267 TH = ci[WS(rs, 6)];
Chris@42 268 T2S = FNMS(KP618033988, T2R, T2Q);
Chris@42 269 T6n = FMA(KP618033988, T2Q, T2R);
Chris@42 270 Th = Td + Tg;
Chris@42 271 T2H = Td - Tg;
Chris@42 272 TI = TG + TH;
Chris@42 273 T3B = TG - TH;
Chris@42 274 T2G = FNMS(KP250000000, Th, Ta);
Chris@42 275 Ti = Ta + Th;
Chris@42 276 TD = cr[WS(rs, 8)];
Chris@42 277 TE = ci[WS(rs, 1)];
Chris@42 278 }
Chris@42 279 }
Chris@42 280 {
Chris@42 281 E Tj, Tk, Tp, T2Z, TJ, Tl;
Chris@42 282 Tj = cr[WS(rs, 4)];
Chris@42 283 {
Chris@42 284 E Tn, To, T3A, TF;
Chris@42 285 Tn = ci[WS(rs, 10)];
Chris@42 286 To = ci[WS(rs, 5)];
Chris@42 287 T3A = TD - TE;
Chris@42 288 TF = TD + TE;
Chris@42 289 Tk = cr[WS(rs, 9)];
Chris@42 290 Tp = Tn + To;
Chris@42 291 T2Z = To - Tn;
Chris@42 292 T5Y = FNMS(KP618033988, T3A, T3B);
Chris@42 293 T3C = FMA(KP618033988, T3B, T3A);
Chris@42 294 T3r = TI - TF;
Chris@42 295 TJ = TF + TI;
Chris@42 296 Tl = ci[0];
Chris@42 297 }
Chris@42 298 T1u = ci[WS(rs, 21)];
Chris@42 299 TK = TC + TJ;
Chris@42 300 T3q = FNMS(KP250000000, TJ, TC);
Chris@42 301 {
Chris@42 302 E T1y, Tm, T2Y, T1z, Tq;
Chris@42 303 T1y = cr[WS(rs, 13)];
Chris@42 304 Tm = Tk + Tl;
Chris@42 305 T2Y = Tl - Tk;
Chris@42 306 T1z = cr[WS(rs, 18)];
Chris@42 307 T1v = ci[WS(rs, 16)];
Chris@42 308 T30 = FMA(KP618033988, T2Z, T2Y);
Chris@42 309 T6d = FNMS(KP618033988, T2Y, T2Z);
Chris@42 310 T33 = Tm - Tp;
Chris@42 311 Tq = Tm + Tp;
Chris@42 312 T1A = T1y + T1z;
Chris@42 313 T3u = T1z - T1y;
Chris@42 314 Tr = Tj + Tq;
Chris@42 315 T32 = FMS(KP250000000, Tq, Tj);
Chris@42 316 T1w = cr[WS(rs, 23)];
Chris@42 317 }
Chris@42 318 }
Chris@42 319 }
Chris@42 320 {
Chris@42 321 E T1b, T1c, T1h, T36, T1d;
Chris@42 322 {
Chris@42 323 E T12, T13, T18, T2K, T1B, T14;
Chris@42 324 T12 = ci[WS(rs, 23)];
Chris@42 325 {
Chris@42 326 E T16, T17, T3t, T1x;
Chris@42 327 T16 = ci[WS(rs, 13)];
Chris@42 328 T17 = cr[WS(rs, 16)];
Chris@42 329 T3t = T1v + T1w;
Chris@42 330 T1x = T1v - T1w;
Chris@42 331 T13 = ci[WS(rs, 18)];
Chris@42 332 T18 = T16 - T17;
Chris@42 333 T2K = T16 + T17;
Chris@42 334 T3v = FMA(KP618033988, T3u, T3t);
Chris@42 335 T61 = FNMS(KP618033988, T3t, T3u);
Chris@42 336 T3y = T1x + T1A;
Chris@42 337 T1B = T1x - T1A;
Chris@42 338 T14 = cr[WS(rs, 21)];
Chris@42 339 }
Chris@42 340 T1b = ci[WS(rs, 20)];
Chris@42 341 T1C = T1u + T1B;
Chris@42 342 T3x = FMS(KP250000000, T1B, T1u);
Chris@42 343 {
Chris@42 344 E T1f, T15, T2J, T1g, T19;
Chris@42 345 T1f = cr[WS(rs, 14)];
Chris@42 346 T15 = T13 - T14;
Chris@42 347 T2J = T13 + T14;
Chris@42 348 T1g = cr[WS(rs, 19)];
Chris@42 349 T1c = ci[WS(rs, 15)];
Chris@42 350 T2L = FMA(KP618033988, T2K, T2J);
Chris@42 351 T6k = FNMS(KP618033988, T2J, T2K);
Chris@42 352 T2O = T15 - T18;
Chris@42 353 T19 = T15 + T18;
Chris@42 354 T1h = T1f + T1g;
Chris@42 355 T36 = T1g - T1f;
Chris@42 356 T1a = T12 + T19;
Chris@42 357 T2N = FNMS(KP250000000, T19, T12);
Chris@42 358 T1d = cr[WS(rs, 24)];
Chris@42 359 }
Chris@42 360 }
Chris@42 361 {
Chris@42 362 E T1l, T1p, T1o, T3e, T1i, T1q;
Chris@42 363 T1l = ci[WS(rs, 22)];
Chris@42 364 {
Chris@42 365 E T1m, T1n, T35, T1e;
Chris@42 366 T1m = ci[WS(rs, 17)];
Chris@42 367 T1n = cr[WS(rs, 22)];
Chris@42 368 T35 = T1c + T1d;
Chris@42 369 T1e = T1c - T1d;
Chris@42 370 T1p = ci[WS(rs, 12)];
Chris@42 371 T1o = T1m - T1n;
Chris@42 372 T3e = T1m + T1n;
Chris@42 373 T6g = FNMS(KP618033988, T35, T36);
Chris@42 374 T37 = FMA(KP618033988, T36, T35);
Chris@42 375 T2W = T1e + T1h;
Chris@42 376 T1i = T1e - T1h;
Chris@42 377 T1q = cr[WS(rs, 17)];
Chris@42 378 }
Chris@42 379 Tt = cr[WS(rs, 2)];
Chris@42 380 T1j = T1b + T1i;
Chris@42 381 T2V = FMS(KP250000000, T1i, T1b);
Chris@42 382 {
Chris@42 383 E Tu, T1r, T3f, Tv, T1s;
Chris@42 384 Tu = cr[WS(rs, 7)];
Chris@42 385 T1r = T1p - T1q;
Chris@42 386 T3f = T1p + T1q;
Chris@42 387 Tv = ci[WS(rs, 2)];
Chris@42 388 Tx = cr[WS(rs, 12)];
Chris@42 389 T3g = FMA(KP618033988, T3f, T3e);
Chris@42 390 T68 = FNMS(KP618033988, T3e, T3f);
Chris@42 391 T3j = T1o - T1r;
Chris@42 392 T1s = T1o + T1r;
Chris@42 393 Tw = Tu + Tv;
Chris@42 394 T3l = Tu - Tv;
Chris@42 395 T1t = T1l + T1s;
Chris@42 396 T3i = FMS(KP250000000, T1s, T1l);
Chris@42 397 Ty = ci[WS(rs, 7)];
Chris@42 398 }
Chris@42 399 }
Chris@42 400 }
Chris@42 401 }
Chris@42 402 {
Chris@42 403 E T3n, T65, T3c, T3b, T2P, T2M, T4W;
Chris@42 404 {
Chris@42 405 E TA, T3m, Tz, TB, Ts;
Chris@42 406 T3m = Ty - Tx;
Chris@42 407 Tz = Tx + Ty;
Chris@42 408 T1S = T1a + T1j;
Chris@42 409 T1k = T1a - T1j;
Chris@42 410 T3n = FNMS(KP618033988, T3m, T3l);
Chris@42 411 T65 = FMA(KP618033988, T3l, T3m);
Chris@42 412 TA = Tw + Tz;
Chris@42 413 T3c = Tz - Tw;
Chris@42 414 T3b = FNMS(KP250000000, TA, Tt);
Chris@42 415 TB = Tt + TA;
Chris@42 416 T1T = T1t + T1C;
Chris@42 417 T1D = T1t - T1C;
Chris@42 418 T1Y = Ti - Tr;
Chris@42 419 Ts = Ti + Tr;
Chris@42 420 {
Chris@42 421 E T2I, T6j, T6m, TL;
Chris@42 422 T2I = FMA(KP559016994, T2H, T2G);
Chris@42 423 T6j = FNMS(KP559016994, T2H, T2G);
Chris@42 424 T6m = FNMS(KP559016994, T2O, T2N);
Chris@42 425 T2P = FMA(KP559016994, T2O, T2N);
Chris@42 426 TL = TB + TK;
Chris@42 427 T1Z = TB - TK;
Chris@42 428 {
Chris@42 429 E T6l, T7y, T6o, T7z;
Chris@42 430 T6l = FMA(KP951056516, T6k, T6j);
Chris@42 431 T7y = FNMS(KP951056516, T6k, T6j);
Chris@42 432 T6o = FMA(KP951056516, T6n, T6m);
Chris@42 433 T7z = FNMS(KP951056516, T6n, T6m);
Chris@42 434 T10 = Ts - TL;
Chris@42 435 TM = Ts + TL;
Chris@42 436 T2M = FNMS(KP951056516, T2L, T2I);
Chris@42 437 T4W = FMA(KP951056516, T2L, T2I);
Chris@42 438 T7K = FMA(KP939062505, T7y, T7z);
Chris@42 439 T7A = FNMS(KP939062505, T7z, T7y);
Chris@42 440 T6p = FNMS(KP549754652, T6o, T6l);
Chris@42 441 T6w = FMA(KP549754652, T6l, T6o);
Chris@42 442 }
Chris@42 443 }
Chris@42 444 }
Chris@42 445 {
Chris@42 446 E T34, T31, T4Y, T60, T3s, T3z, T5X;
Chris@42 447 {
Chris@42 448 E T2X, T6c, T6f, T4V, T2T;
Chris@42 449 T2X = FNMS(KP559016994, T2W, T2V);
Chris@42 450 T6c = FMA(KP559016994, T2W, T2V);
Chris@42 451 T6f = FMA(KP559016994, T33, T32);
Chris@42 452 T34 = FNMS(KP559016994, T33, T32);
Chris@42 453 T4V = FNMS(KP951056516, T2S, T2P);
Chris@42 454 T2T = FMA(KP951056516, T2S, T2P);
Chris@42 455 {
Chris@42 456 E T7w, T6e, T7v, T6h;
Chris@42 457 T7w = FMA(KP951056516, T6d, T6c);
Chris@42 458 T6e = FNMS(KP951056516, T6d, T6c);
Chris@42 459 T7v = FMA(KP951056516, T6g, T6f);
Chris@42 460 T6h = FNMS(KP951056516, T6g, T6f);
Chris@42 461 T4X = FNMS(KP634619297, T4W, T4V);
Chris@42 462 T56 = FMA(KP634619297, T4V, T4W);
Chris@42 463 T3K = FMA(KP256756360, T2M, T2T);
Chris@42 464 T2U = FNMS(KP256756360, T2T, T2M);
Chris@42 465 T7x = FMA(KP126329378, T7w, T7v);
Chris@42 466 T7J = FNMS(KP126329378, T7v, T7w);
Chris@42 467 T6v = FNMS(KP470564281, T6e, T6h);
Chris@42 468 T6i = FMA(KP470564281, T6h, T6e);
Chris@42 469 T31 = FMA(KP951056516, T30, T2X);
Chris@42 470 T4Y = FNMS(KP951056516, T30, T2X);
Chris@42 471 }
Chris@42 472 T60 = FMA(KP559016994, T3r, T3q);
Chris@42 473 T3s = FNMS(KP559016994, T3r, T3q);
Chris@42 474 T3z = FNMS(KP559016994, T3y, T3x);
Chris@42 475 T5X = FMA(KP559016994, T3y, T3x);
Chris@42 476 }
Chris@42 477 {
Chris@42 478 E T5Z, T7r, T4Z, T38;
Chris@42 479 T4Z = FNMS(KP951056516, T37, T34);
Chris@42 480 T38 = FMA(KP951056516, T37, T34);
Chris@42 481 {
Chris@42 482 E T4O, T3w, T4P, T3D;
Chris@42 483 T4O = FMA(KP951056516, T3v, T3s);
Chris@42 484 T3w = FNMS(KP951056516, T3v, T3s);
Chris@42 485 T4P = FMA(KP951056516, T3C, T3z);
Chris@42 486 T3D = FNMS(KP951056516, T3C, T3z);
Chris@42 487 T50 = FNMS(KP827271945, T4Z, T4Y);
Chris@42 488 T57 = FMA(KP827271945, T4Y, T4Z);
Chris@42 489 T3L = FMA(KP634619297, T31, T38);
Chris@42 490 T39 = FNMS(KP634619297, T38, T31);
Chris@42 491 T4Q = FNMS(KP126329378, T4P, T4O);
Chris@42 492 T59 = FMA(KP126329378, T4O, T4P);
Chris@42 493 T3O = FNMS(KP939062505, T3w, T3D);
Chris@42 494 T3E = FMA(KP939062505, T3D, T3w);
Chris@42 495 T5Z = FMA(KP951056516, T5Y, T5X);
Chris@42 496 T7r = FNMS(KP951056516, T5Y, T5X);
Chris@42 497 }
Chris@42 498 {
Chris@42 499 E T3d, T3k, T64, T7s, T62;
Chris@42 500 T67 = FMA(KP559016994, T3c, T3b);
Chris@42 501 T3d = FNMS(KP559016994, T3c, T3b);
Chris@42 502 T3k = FNMS(KP559016994, T3j, T3i);
Chris@42 503 T64 = FMA(KP559016994, T3j, T3i);
Chris@42 504 T7s = FNMS(KP951056516, T61, T60);
Chris@42 505 T62 = FMA(KP951056516, T61, T60);
Chris@42 506 {
Chris@42 507 E T4S, T3h, T4R, T3o;
Chris@42 508 T4S = FMA(KP951056516, T3g, T3d);
Chris@42 509 T3h = FNMS(KP951056516, T3g, T3d);
Chris@42 510 T4R = FMA(KP951056516, T3n, T3k);
Chris@42 511 T3o = FNMS(KP951056516, T3n, T3k);
Chris@42 512 T7t = FNMS(KP827271945, T7s, T7r);
Chris@42 513 T7H = FMA(KP827271945, T7r, T7s);
Chris@42 514 T6y = FNMS(KP062914667, T5Z, T62);
Chris@42 515 T63 = FMA(KP062914667, T62, T5Z);
Chris@42 516 T4T = FNMS(KP470564281, T4S, T4R);
Chris@42 517 T5a = FMA(KP470564281, T4R, T4S);
Chris@42 518 T3N = FNMS(KP549754652, T3h, T3o);
Chris@42 519 T3p = FMA(KP549754652, T3o, T3h);
Chris@42 520 T66 = FNMS(KP951056516, T65, T64);
Chris@42 521 T7o = FMA(KP951056516, T65, T64);
Chris@42 522 }
Chris@42 523 }
Chris@42 524 }
Chris@42 525 }
Chris@42 526 }
Chris@42 527 }
Chris@42 528 }
Chris@42 529 {
Chris@42 530 E T7q, T7G, T6J, T6I, T6q, T6b, T6B, T73, T6Q, T78, T6z, T6a;
Chris@42 531 cr[0] = T9 + TM;
Chris@42 532 {
Chris@42 533 E T1U, T2l, T1X, T2g, T1E, TZ, T2m, T20, T2v, T2n;
Chris@42 534 {
Chris@42 535 E T1W, T7p, T69, T1V;
Chris@42 536 T1W = T1S - T1T;
Chris@42 537 T1U = T1S + T1T;
Chris@42 538 T7p = FNMS(KP951056516, T68, T67);
Chris@42 539 T69 = FMA(KP951056516, T68, T67);
Chris@42 540 T1V = FNMS(KP250000000, T1U, T1R);
Chris@42 541 T7q = FMA(KP062914667, T7p, T7o);
Chris@42 542 T7G = FNMS(KP062914667, T7o, T7p);
Chris@42 543 T6z = FNMS(KP634619297, T66, T69);
Chris@42 544 T6a = FMA(KP634619297, T69, T66);
Chris@42 545 T2l = FNMS(KP559016994, T1W, T1V);
Chris@42 546 T1X = FMA(KP559016994, T1W, T1V);
Chris@42 547 T2g = FNMS(KP618033988, T1k, T1D);
Chris@42 548 T1E = FMA(KP618033988, T1D, T1k);
Chris@42 549 TZ = FNMS(KP250000000, TM, T9);
Chris@42 550 T2m = FNMS(KP618033988, T1Y, T1Z);
Chris@42 551 T20 = FMA(KP618033988, T1Z, T1Y);
Chris@42 552 }
Chris@42 553 ci[0] = T1R + T1U;
Chris@42 554 T2v = FMA(KP951056516, T2m, T2l);
Chris@42 555 T2n = FNMS(KP951056516, T2m, T2l);
Chris@42 556 {
Chris@42 557 E T2b, T21, T2f, T11;
Chris@42 558 T2b = FNMS(KP951056516, T20, T1X);
Chris@42 559 T21 = FMA(KP951056516, T20, T1X);
Chris@42 560 T2f = FNMS(KP559016994, T10, TZ);
Chris@42 561 T11 = FMA(KP559016994, T10, TZ);
Chris@42 562 {
Chris@42 563 E T2h, T2r, T27, T1F;
Chris@42 564 T2h = FMA(KP951056516, T2g, T2f);
Chris@42 565 T2r = FNMS(KP951056516, T2g, T2f);
Chris@42 566 T27 = FMA(KP951056516, T1E, T11);
Chris@42 567 T1F = FNMS(KP951056516, T1E, T11);
Chris@42 568 {
Chris@42 569 E T2o, T2i, T2w, T2s;
Chris@42 570 T2o = T2k * T2h;
Chris@42 571 T2i = T2e * T2h;
Chris@42 572 T2w = T2u * T2r;
Chris@42 573 T2s = T2q * T2r;
Chris@42 574 {
Chris@42 575 E T2c, T28, T22, T1G;
Chris@42 576 T2c = T2a * T27;
Chris@42 577 T28 = T26 * T27;
Chris@42 578 T22 = T1I * T1F;
Chris@42 579 T1G = TY * T1F;
Chris@42 580 ci[WS(rs, 15)] = FMA(T2q, T2v, T2w);
Chris@42 581 cr[WS(rs, 15)] = FNMS(T2u, T2v, T2s);
Chris@42 582 ci[WS(rs, 20)] = FMA(T26, T2b, T2c);
Chris@42 583 cr[WS(rs, 20)] = FNMS(T2a, T2b, T28);
Chris@42 584 ci[WS(rs, 5)] = FMA(TY, T21, T22);
Chris@42 585 cr[WS(rs, 5)] = FNMS(T1I, T21, T1G);
Chris@42 586 cr[WS(rs, 10)] = FNMS(T2k, T2n, T2i);
Chris@42 587 ci[WS(rs, 10)] = FMA(T2e, T2n, T2o);
Chris@42 588 }
Chris@42 589 }
Chris@42 590 }
Chris@42 591 }
Chris@42 592 }
Chris@42 593 {
Chris@42 594 E T6x, T6A, T6O, T6P;
Chris@42 595 T6x = FMA(KP968479752, T6w, T6v);
Chris@42 596 T6J = FNMS(KP968479752, T6w, T6v);
Chris@42 597 T6I = FMA(KP845997307, T6z, T6y);
Chris@42 598 T6A = FNMS(KP845997307, T6z, T6y);
Chris@42 599 T6O = FNMS(KP968479752, T6p, T6i);
Chris@42 600 T6q = FMA(KP968479752, T6p, T6i);
Chris@42 601 T6b = FMA(KP845997307, T6a, T63);
Chris@42 602 T6P = FNMS(KP845997307, T6a, T63);
Chris@42 603 T6B = FNMS(KP681693190, T6A, T6x);
Chris@42 604 T73 = FMA(KP560319534, T6x, T6A);
Chris@42 605 T6Q = FMA(KP681693190, T6P, T6O);
Chris@42 606 T78 = FNMS(KP560319534, T6O, T6P);
Chris@42 607 }
Chris@42 608 {
Chris@42 609 E T7U, T8f, T7B, T7u, T82, T8k, T7Y, T7M;
Chris@42 610 {
Chris@42 611 E T7L, T7I, T80, T81;
Chris@42 612 {
Chris@42 613 E T7S, T6r, T6t, T6K, T6M, T7T, T6s, T7j;
Chris@42 614 T7S = FNMS(KP734762448, T7K, T7J);
Chris@42 615 T7L = FMA(KP734762448, T7K, T7J);
Chris@42 616 T6r = FMA(KP906616052, T6q, T6b);
Chris@42 617 T6t = FNMS(KP906616052, T6q, T6b);
Chris@42 618 T6K = FNMS(KP906616052, T6J, T6I);
Chris@42 619 T6M = FMA(KP906616052, T6J, T6I);
Chris@42 620 T7I = FMA(KP772036680, T7H, T7G);
Chris@42 621 T7T = FNMS(KP772036680, T7H, T7G);
Chris@42 622 T6s = FNMS(KP249506682, T6r, T5W);
Chris@42 623 T7j = FMA(KP998026728, T6r, T5W);
Chris@42 624 {
Chris@42 625 E T6L, T7l, T72, T6u;
Chris@42 626 T6L = FNMS(KP249506682, T6K, T6H);
Chris@42 627 T7l = FMA(KP998026728, T6K, T6H);
Chris@42 628 T72 = FMA(KP557913902, T6t, T6s);
Chris@42 629 T6u = FNMS(KP557913902, T6t, T6s);
Chris@42 630 {
Chris@42 631 E T7k, T6N, T77, T7m;
Chris@42 632 T7k = T4l * T7j;
Chris@42 633 T6N = FNMS(KP557913902, T6M, T6L);
Chris@42 634 T77 = FMA(KP557913902, T6M, T6L);
Chris@42 635 T7m = T4l * T7l;
Chris@42 636 {
Chris@42 637 E T74, T7d, T6V, T6C;
Chris@42 638 T74 = FNMS(KP949179823, T73, T72);
Chris@42 639 T7d = FMA(KP949179823, T73, T72);
Chris@42 640 T6V = FMA(KP860541664, T6B, T6u);
Chris@42 641 T6C = FNMS(KP860541664, T6B, T6u);
Chris@42 642 cr[WS(rs, 2)] = FNMS(T4n, T7l, T7k);
Chris@42 643 {
Chris@42 644 E T7h, T79, T6R, T6Z;
Chris@42 645 T7h = FNMS(KP949179823, T78, T77);
Chris@42 646 T79 = FMA(KP949179823, T78, T77);
Chris@42 647 T6R = FNMS(KP860541664, T6Q, T6N);
Chris@42 648 T6Z = FMA(KP860541664, T6Q, T6N);
Chris@42 649 ci[WS(rs, 2)] = FMA(T4n, T7j, T7m);
Chris@42 650 {
Chris@42 651 E T75, T7e, T6W, T6D;
Chris@42 652 T75 = T71 * T74;
Chris@42 653 T7e = T7c * T7d;
Chris@42 654 T6W = T6U * T6V;
Chris@42 655 T6D = T5T * T6C;
Chris@42 656 {
Chris@42 657 E T7a, T7i, T70, T6S;
Chris@42 658 T7a = T71 * T79;
Chris@42 659 T7i = T7c * T7h;
Chris@42 660 T70 = T6U * T6Z;
Chris@42 661 T6S = T5T * T6R;
Chris@42 662 cr[WS(rs, 12)] = FNMS(T76, T79, T75);
Chris@42 663 cr[WS(rs, 17)] = FNMS(T7g, T7h, T7e);
Chris@42 664 cr[WS(rs, 22)] = FNMS(T6Y, T6Z, T6W);
Chris@42 665 cr[WS(rs, 7)] = FNMS(T6E, T6R, T6D);
Chris@42 666 ci[WS(rs, 12)] = FMA(T76, T74, T7a);
Chris@42 667 ci[WS(rs, 17)] = FMA(T7g, T7d, T7i);
Chris@42 668 ci[WS(rs, 22)] = FMA(T6Y, T6V, T70);
Chris@42 669 ci[WS(rs, 7)] = FMA(T6E, T6C, T6S);
Chris@42 670 T7U = FNMS(KP621716863, T7T, T7S);
Chris@42 671 T8f = FMA(KP614372930, T7S, T7T);
Chris@42 672 }
Chris@42 673 }
Chris@42 674 }
Chris@42 675 }
Chris@42 676 }
Chris@42 677 }
Chris@42 678 }
Chris@42 679 T80 = FNMS(KP734762448, T7A, T7x);
Chris@42 680 T7B = FMA(KP734762448, T7A, T7x);
Chris@42 681 T7u = FMA(KP772036680, T7t, T7q);
Chris@42 682 T81 = FNMS(KP772036680, T7t, T7q);
Chris@42 683 T82 = FNMS(KP621716863, T81, T80);
Chris@42 684 T8k = FMA(KP614372930, T80, T81);
Chris@42 685 T7Y = FNMS(KP994076283, T7L, T7I);
Chris@42 686 T7M = FMA(KP994076283, T7L, T7I);
Chris@42 687 }
Chris@42 688 {
Chris@42 689 E T5y, T5c, T51, T4U, T5f, T5E, T5o, T5i, T5k;
Chris@42 690 {
Chris@42 691 E T5h, T5g, T5m, T5n, T58, T5b;
Chris@42 692 T5h = FMA(KP912575812, T57, T56);
Chris@42 693 T58 = FNMS(KP912575812, T57, T56);
Chris@42 694 T5b = FNMS(KP912018591, T5a, T59);
Chris@42 695 T5g = FMA(KP912018591, T5a, T59);
Chris@42 696 {
Chris@42 697 E T7X, T7N, T7C, T7Q;
Chris@42 698 T7X = FNMS(KP249506682, T7M, T7F);
Chris@42 699 T7N = FMA(KP998026728, T7M, T7F);
Chris@42 700 T7C = FMA(KP994076283, T7B, T7u);
Chris@42 701 T7Q = FNMS(KP994076283, T7B, T7u);
Chris@42 702 T5y = FMA(KP525970792, T58, T5b);
Chris@42 703 T5c = FNMS(KP726211448, T5b, T58);
Chris@42 704 {
Chris@42 705 E T7Z, T8j, T7P, T7D;
Chris@42 706 T7Z = FNMS(KP557913902, T7Y, T7X);
Chris@42 707 T8j = FMA(KP557913902, T7Y, T7X);
Chris@42 708 T7P = FNMS(KP249506682, T7C, T7n);
Chris@42 709 T7D = FMA(KP998026728, T7C, T7n);
Chris@42 710 {
Chris@42 711 E T8b, T83, T8t, T8l;
Chris@42 712 T8b = FMA(KP943557151, T82, T7Z);
Chris@42 713 T83 = FNMS(KP943557151, T82, T7Z);
Chris@42 714 T8t = FMA(KP949179823, T8k, T8j);
Chris@42 715 T8l = FNMS(KP949179823, T8k, T8j);
Chris@42 716 {
Chris@42 717 E T8e, T7R, T7O, T7E;
Chris@42 718 T8e = FMA(KP557913902, T7Q, T7P);
Chris@42 719 T7R = FNMS(KP557913902, T7Q, T7P);
Chris@42 720 T7O = TR * T7D;
Chris@42 721 T7E = TO * T7D;
Chris@42 722 {
Chris@42 723 E T8g, T8p, T7V, T87;
Chris@42 724 T8g = FMA(KP949179823, T8f, T8e);
Chris@42 725 T8p = FNMS(KP949179823, T8f, T8e);
Chris@42 726 T7V = FMA(KP943557151, T7U, T7R);
Chris@42 727 T87 = FNMS(KP943557151, T7U, T7R);
Chris@42 728 ci[WS(rs, 3)] = FMA(TO, T7N, T7O);
Chris@42 729 cr[WS(rs, 3)] = FNMS(TR, T7N, T7E);
Chris@42 730 {
Chris@42 731 E T8m, T8h, T8u, T8q;
Chris@42 732 T8m = T8i * T8g;
Chris@42 733 T8h = T8d * T8g;
Chris@42 734 T8u = T8s * T8p;
Chris@42 735 T8q = T8o * T8p;
Chris@42 736 {
Chris@42 737 E T84, T7W, T8c, T88;
Chris@42 738 T84 = T4B * T7V;
Chris@42 739 T7W = T4z * T7V;
Chris@42 740 T8c = T8a * T87;
Chris@42 741 T88 = T86 * T87;
Chris@42 742 ci[WS(rs, 13)] = FMA(T8d, T8l, T8m);
Chris@42 743 cr[WS(rs, 13)] = FNMS(T8i, T8l, T8h);
Chris@42 744 ci[WS(rs, 18)] = FMA(T8o, T8t, T8u);
Chris@42 745 cr[WS(rs, 18)] = FNMS(T8s, T8t, T8q);
Chris@42 746 ci[WS(rs, 8)] = FMA(T4z, T83, T84);
Chris@42 747 cr[WS(rs, 8)] = FNMS(T4B, T83, T7W);
Chris@42 748 ci[WS(rs, 23)] = FMA(T86, T8b, T8c);
Chris@42 749 cr[WS(rs, 23)] = FNMS(T8a, T8b, T88);
Chris@42 750 }
Chris@42 751 }
Chris@42 752 }
Chris@42 753 }
Chris@42 754 }
Chris@42 755 }
Chris@42 756 }
Chris@42 757 T51 = FMA(KP912575812, T50, T4X);
Chris@42 758 T5m = FNMS(KP912575812, T50, T4X);
Chris@42 759 T5n = FMA(KP912018591, T4T, T4Q);
Chris@42 760 T4U = FNMS(KP912018591, T4T, T4Q);
Chris@42 761 T41 = FMA(KP951056516, T40, T3X);
Chris@42 762 T5f = FNMS(KP951056516, T40, T3X);
Chris@42 763 T5E = FMA(KP525970792, T5m, T5n);
Chris@42 764 T5o = FNMS(KP726211448, T5n, T5m);
Chris@42 765 T5i = FMA(KP851038619, T5h, T5g);
Chris@42 766 T5k = FNMS(KP851038619, T5h, T5g);
Chris@42 767 }
Chris@42 768 {
Chris@42 769 E T42, T43, T48, T49, T3M, T3P;
Chris@42 770 T3M = FMA(KP871714437, T3L, T3K);
Chris@42 771 T42 = FNMS(KP871714437, T3L, T3K);
Chris@42 772 T43 = FMA(KP831864738, T3O, T3N);
Chris@42 773 T3P = FNMS(KP831864738, T3O, T3N);
Chris@42 774 {
Chris@42 775 E T5R, T5j, T54, T52;
Chris@42 776 T5R = FMA(KP992114701, T5i, T5f);
Chris@42 777 T5j = FNMS(KP248028675, T5i, T5f);
Chris@42 778 T54 = FNMS(KP851038619, T51, T4U);
Chris@42 779 T52 = FMA(KP851038619, T51, T4U);
Chris@42 780 T3Q = FNMS(KP559154169, T3P, T3M);
Chris@42 781 T4q = FMA(KP683113946, T3M, T3P);
Chris@42 782 {
Chris@42 783 E T5D, T5l, T5P, T53;
Chris@42 784 T5D = FMA(KP554608978, T5k, T5j);
Chris@42 785 T5l = FNMS(KP554608978, T5k, T5j);
Chris@42 786 T5P = FNMS(KP992114701, T52, T4N);
Chris@42 787 T53 = FMA(KP248028675, T52, T4N);
Chris@42 788 {
Chris@42 789 E T5p, T5t, T5F, T5N;
Chris@42 790 T5p = FNMS(KP803003575, T5o, T5l);
Chris@42 791 T5t = FMA(KP803003575, T5o, T5l);
Chris@42 792 T5F = FNMS(KP943557151, T5E, T5D);
Chris@42 793 T5N = FMA(KP943557151, T5E, T5D);
Chris@42 794 {
Chris@42 795 E T55, T5x, T5S, T5Q;
Chris@42 796 T55 = FMA(KP554608978, T54, T53);
Chris@42 797 T5x = FNMS(KP554608978, T54, T53);
Chris@42 798 T5S = TW * T5P;
Chris@42 799 T5Q = TS * T5P;
Chris@42 800 {
Chris@42 801 E T5J, T5z, T5r, T5d;
Chris@42 802 T5J = FMA(KP943557151, T5y, T5x);
Chris@42 803 T5z = FNMS(KP943557151, T5y, T5x);
Chris@42 804 T5r = FMA(KP803003575, T5c, T55);
Chris@42 805 T5d = FNMS(KP803003575, T5c, T55);
Chris@42 806 ci[WS(rs, 4)] = FMA(TS, T5R, T5S);
Chris@42 807 cr[WS(rs, 4)] = FNMS(TW, T5R, T5Q);
Chris@42 808 {
Chris@42 809 E T5G, T5A, T5O, T5K;
Chris@42 810 T5G = T5C * T5z;
Chris@42 811 T5A = T5w * T5z;
Chris@42 812 T5O = T5M * T5J;
Chris@42 813 T5K = T5I * T5J;
Chris@42 814 {
Chris@42 815 E T5q, T5e, T5u, T5s;
Chris@42 816 T5q = TX * T5d;
Chris@42 817 T5e = TT * T5d;
Chris@42 818 T5u = T25 * T5r;
Chris@42 819 T5s = T23 * T5r;
Chris@42 820 ci[WS(rs, 14)] = FMA(T5w, T5F, T5G);
Chris@42 821 cr[WS(rs, 14)] = FNMS(T5C, T5F, T5A);
Chris@42 822 ci[WS(rs, 19)] = FMA(T5I, T5N, T5O);
Chris@42 823 cr[WS(rs, 19)] = FNMS(T5M, T5N, T5K);
Chris@42 824 ci[WS(rs, 9)] = FMA(TT, T5p, T5q);
Chris@42 825 cr[WS(rs, 9)] = FNMS(TX, T5p, T5e);
Chris@42 826 ci[WS(rs, 24)] = FMA(T23, T5t, T5u);
Chris@42 827 cr[WS(rs, 24)] = FNMS(T25, T5t, T5s);
Chris@42 828 }
Chris@42 829 }
Chris@42 830 }
Chris@42 831 }
Chris@42 832 }
Chris@42 833 }
Chris@42 834 }
Chris@42 835 T48 = FNMS(KP871714437, T39, T2U);
Chris@42 836 T3a = FMA(KP871714437, T39, T2U);
Chris@42 837 T3F = FMA(KP831864738, T3E, T3p);
Chris@42 838 T49 = FNMS(KP831864738, T3E, T3p);
Chris@42 839 T4a = FMA(KP559154169, T49, T48);
Chris@42 840 T4w = FNMS(KP683113946, T48, T49);
Chris@42 841 T46 = FMA(KP904730450, T43, T42);
Chris@42 842 T44 = FNMS(KP904730450, T43, T42);
Chris@42 843 }
Chris@42 844 }
Chris@42 845 }
Chris@42 846 }
Chris@42 847 }
Chris@42 848 }
Chris@42 849 {
Chris@42 850 E T45, T4L, T3G, T3I;
Chris@42 851 T45 = FNMS(KP242145790, T44, T41);
Chris@42 852 T4L = FMA(KP968583161, T44, T41);
Chris@42 853 T3G = FMA(KP904730450, T3F, T3a);
Chris@42 854 T3I = FNMS(KP904730450, T3F, T3a);
Chris@42 855 {
Chris@42 856 E T4v, T47, T4J, T3H;
Chris@42 857 T4v = FNMS(KP541454447, T46, T45);
Chris@42 858 T47 = FMA(KP541454447, T46, T45);
Chris@42 859 T4J = FMA(KP968583161, T3G, T2F);
Chris@42 860 T3H = FNMS(KP242145790, T3G, T2F);
Chris@42 861 {
Chris@42 862 E T4b, T4j, T4x, T4H;
Chris@42 863 T4b = FMA(KP921177326, T4a, T47);
Chris@42 864 T4j = FNMS(KP921177326, T4a, T47);
Chris@42 865 T4x = FNMS(KP833417178, T4w, T4v);
Chris@42 866 T4H = FMA(KP833417178, T4w, T4v);
Chris@42 867 {
Chris@42 868 E T3J, T4p, T4M, T4K;
Chris@42 869 T3J = FMA(KP541454447, T3I, T3H);
Chris@42 870 T4p = FNMS(KP541454447, T3I, T3H);
Chris@42 871 T4M = TQ * T4J;
Chris@42 872 T4K = TN * T4J;
Chris@42 873 {
Chris@42 874 E T4D, T4r, T4f, T3R;
Chris@42 875 T4D = FMA(KP833417178, T4q, T4p);
Chris@42 876 T4r = FNMS(KP833417178, T4q, T4p);
Chris@42 877 T4f = FMA(KP921177326, T3Q, T3J);
Chris@42 878 T3R = FNMS(KP921177326, T3Q, T3J);
Chris@42 879 ci[WS(rs, 1)] = FMA(TN, T4L, T4M);
Chris@42 880 cr[WS(rs, 1)] = FNMS(TQ, T4L, T4K);
Chris@42 881 {
Chris@42 882 E T4y, T4s, T4I, T4E;
Chris@42 883 T4y = T4u * T4r;
Chris@42 884 T4s = T4o * T4r;
Chris@42 885 T4I = T4G * T4D;
Chris@42 886 T4E = T4C * T4D;
Chris@42 887 {
Chris@42 888 E T4c, T3S, T4k, T4g;
Chris@42 889 T4c = T3U * T3R;
Chris@42 890 T3S = T2y * T3R;
Chris@42 891 T4k = T4i * T4f;
Chris@42 892 T4g = T4e * T4f;
Chris@42 893 ci[WS(rs, 11)] = FMA(T4o, T4x, T4y);
Chris@42 894 cr[WS(rs, 11)] = FNMS(T4u, T4x, T4s);
Chris@42 895 ci[WS(rs, 16)] = FMA(T4C, T4H, T4I);
Chris@42 896 cr[WS(rs, 16)] = FNMS(T4G, T4H, T4E);
Chris@42 897 ci[WS(rs, 6)] = FMA(T2y, T4b, T4c);
Chris@42 898 cr[WS(rs, 6)] = FNMS(T3U, T4b, T3S);
Chris@42 899 ci[WS(rs, 21)] = FMA(T4e, T4j, T4k);
Chris@42 900 cr[WS(rs, 21)] = FNMS(T4i, T4j, T4g);
Chris@42 901 }
Chris@42 902 }
Chris@42 903 }
Chris@42 904 }
Chris@42 905 }
Chris@42 906 }
Chris@42 907 }
Chris@42 908 }
Chris@42 909 }
Chris@42 910 }
Chris@42 911
Chris@42 912 static const tw_instr twinstr[] = {
Chris@42 913 {TW_CEXP, 1, 1},
Chris@42 914 {TW_CEXP, 1, 3},
Chris@42 915 {TW_CEXP, 1, 9},
Chris@42 916 {TW_CEXP, 1, 24},
Chris@42 917 {TW_NEXT, 1, 0}
Chris@42 918 };
Chris@42 919
Chris@42 920 static const hc2hc_desc desc = { 25, "hb2_25", twinstr, &GENUS, {84, 78, 356, 0} };
Chris@42 921
Chris@42 922 void X(codelet_hb2_25) (planner *p) {
Chris@42 923 X(khc2hc_register) (p, hb2_25, &desc);
Chris@42 924 }
Chris@42 925 #else /* HAVE_FMA */
Chris@42 926
Chris@42 927 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 25 -dif -name hb2_25 -include hb.h */
Chris@42 928
Chris@42 929 /*
Chris@42 930 * This function contains 440 FP additions, 340 FP multiplications,
Chris@42 931 * (or, 280 additions, 180 multiplications, 160 fused multiply/add),
Chris@42 932 * 155 stack variables, 20 constants, and 100 memory accesses
Chris@42 933 */
Chris@42 934 #include "hb.h"
Chris@42 935
Chris@42 936 static void hb2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 937 {
Chris@42 938 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 939 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 940 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 941 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@42 942 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 943 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 944 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 945 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 946 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@42 947 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@42 948 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 949 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 950 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 951 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 952 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 953 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 954 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 955 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 956 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 957 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 958 {
Chris@42 959 INT m;
Chris@42 960 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@42 961 E TN, TQ, TO, TR, TT, TY, T2t, T2r, TZ, TU, T4f, T4l, T2d, T4v, T5m;
Chris@42 962 E T2j, T5l, T4X, T2v, T11, T3R, T1L, T5d, T6x, T5h, T6t, T25, T26, T27, T29;
Chris@42 963 E T6D, T7v, T49, T7l, T7p, T7t, T2p, T2n, T4b, T4p, T5n, T6B, T5b, T5p, T6p;
Chris@42 964 E T6r, T59, T4r;
Chris@42 965 {
Chris@42 966 E T2c, T4j, T2h, T4e, T2b, T4k, T2i, T4d;
Chris@42 967 {
Chris@42 968 E TP, TX, TS, TW;
Chris@42 969 TN = W[0];
Chris@42 970 TQ = W[1];
Chris@42 971 TO = W[2];
Chris@42 972 TR = W[3];
Chris@42 973 TP = TN * TO;
Chris@42 974 TX = TQ * TO;
Chris@42 975 TS = TQ * TR;
Chris@42 976 TW = TN * TR;
Chris@42 977 TT = TP - TS;
Chris@42 978 TY = TW + TX;
Chris@42 979 T2t = TW - TX;
Chris@42 980 T2r = TP + TS;
Chris@42 981 TZ = W[5];
Chris@42 982 T2c = TQ * TZ;
Chris@42 983 T4j = TO * TZ;
Chris@42 984 T2h = TN * TZ;
Chris@42 985 T4e = TR * TZ;
Chris@42 986 TU = W[4];
Chris@42 987 T2b = TN * TU;
Chris@42 988 T4k = TR * TU;
Chris@42 989 T2i = TQ * TU;
Chris@42 990 T4d = TO * TU;
Chris@42 991 }
Chris@42 992 T4f = T4d - T4e;
Chris@42 993 T4l = T4j + T4k;
Chris@42 994 {
Chris@42 995 E T2s, T2u, TV, T10, T3P, T3Q, T1J, T1K;
Chris@42 996 T2d = T2b - T2c;
Chris@42 997 T4v = T2b + T2c;
Chris@42 998 T5m = T4j - T4k;
Chris@42 999 T2j = T2h + T2i;
Chris@42 1000 T5l = T4d + T4e;
Chris@42 1001 T4X = T2h - T2i;
Chris@42 1002 T2s = T2r * TU;
Chris@42 1003 T2u = T2t * TZ;
Chris@42 1004 T2v = T2s + T2u;
Chris@42 1005 TV = TT * TU;
Chris@42 1006 T10 = TY * TZ;
Chris@42 1007 T11 = TV + T10;
Chris@42 1008 T3P = T2r * TZ;
Chris@42 1009 T3Q = T2t * TU;
Chris@42 1010 T3R = T3P - T3Q;
Chris@42 1011 T1J = TT * TZ;
Chris@42 1012 T1K = TY * TU;
Chris@42 1013 T1L = T1J - T1K;
Chris@42 1014 T5d = TV - T10;
Chris@42 1015 T6x = T3P + T3Q;
Chris@42 1016 T5h = T1J + T1K;
Chris@42 1017 T6t = T2s - T2u;
Chris@42 1018 T25 = W[6];
Chris@42 1019 T26 = W[7];
Chris@42 1020 T27 = FMA(TT, T25, TY * T26);
Chris@42 1021 T29 = FNMS(TY, T25, TT * T26);
Chris@42 1022 T6D = FNMS(T4X, T25, T4v * T26);
Chris@42 1023 T7v = FNMS(T1L, T25, T11 * T26);
Chris@42 1024 T49 = FMA(T2r, T25, T2t * T26);
Chris@42 1025 T7l = FMA(T2d, T25, T2j * T26);
Chris@42 1026 T7p = FNMS(T2j, T25, T2d * T26);
Chris@42 1027 T7t = FMA(T11, T25, T1L * T26);
Chris@42 1028 T2p = FNMS(TZ, T25, TU * T26);
Chris@42 1029 T2n = FMA(TU, T25, TZ * T26);
Chris@42 1030 T4b = FNMS(T2t, T25, T2r * T26);
Chris@42 1031 T4p = FMA(T2v, T25, T3R * T26);
Chris@42 1032 T5n = FMA(T5l, T25, T5m * T26);
Chris@42 1033 T6B = FMA(T4v, T25, T4X * T26);
Chris@42 1034 T5b = FNMS(TQ, T25, TN * T26);
Chris@42 1035 T5p = FNMS(T5m, T25, T5l * T26);
Chris@42 1036 T6p = FMA(TO, T25, TR * T26);
Chris@42 1037 T6r = FNMS(TR, T25, TO * T26);
Chris@42 1038 T59 = FMA(TN, T25, TQ * T26);
Chris@42 1039 T4r = FNMS(T3R, T25, T2v * T26);
Chris@42 1040 }
Chris@42 1041 }
Chris@42 1042 {
Chris@42 1043 E T9, T6i, T40, T3z, T5Y, Ti, Tr, Ts, T1d, T1m, T1P, T2K, T4P, T3H, T4y;
Chris@42 1044 E T5G, T71, T65, T6N, T5z, T70, T64, T6K, T2Z, T4Q, T3I, T4B, T20, T5Z, T3C;
Chris@42 1045 E T43, T6j, TB, TK, TL, T1w, T1F, T1Q, T3f, T4S, T3K, T4F, T5V, T74, T68;
Chris@42 1046 E T6U, T5O, T73, T67, T6R, T3u, T4T, T3L, T4I;
Chris@42 1047 {
Chris@42 1048 E T1, T4, T7, T8, T3Z, T3Y, T3x, T3y;
Chris@42 1049 T1 = cr[0];
Chris@42 1050 {
Chris@42 1051 E T2, T3, T5, T6;
Chris@42 1052 T2 = cr[WS(rs, 5)];
Chris@42 1053 T3 = ci[WS(rs, 4)];
Chris@42 1054 T4 = T2 + T3;
Chris@42 1055 T5 = cr[WS(rs, 10)];
Chris@42 1056 T6 = ci[WS(rs, 9)];
Chris@42 1057 T7 = T5 + T6;
Chris@42 1058 T8 = T4 + T7;
Chris@42 1059 T3Z = T5 - T6;
Chris@42 1060 T3Y = T2 - T3;
Chris@42 1061 }
Chris@42 1062 T9 = T1 + T8;
Chris@42 1063 T6i = FMA(KP951056516, T3Y, KP587785252 * T3Z);
Chris@42 1064 T40 = FNMS(KP951056516, T3Z, KP587785252 * T3Y);
Chris@42 1065 T3x = FNMS(KP250000000, T8, T1);
Chris@42 1066 T3y = KP559016994 * (T4 - T7);
Chris@42 1067 T3z = T3x - T3y;
Chris@42 1068 T5Y = T3y + T3x;
Chris@42 1069 }
Chris@42 1070 {
Chris@42 1071 E Ta, T2x, T5w, T2F, Th, T2w, T1e, T2P, T5B, T2X, T1l, T2O, Tj, T2N, T5D;
Chris@42 1072 E T2T, Tq, T2S, T15, T2B, T5u, T2H, T1c, T2G;
Chris@42 1073 {
Chris@42 1074 E Tg, T2E, Td, T2D;
Chris@42 1075 Ta = cr[WS(rs, 1)];
Chris@42 1076 {
Chris@42 1077 E Te, Tf, Tb, Tc;
Chris@42 1078 Te = cr[WS(rs, 11)];
Chris@42 1079 Tf = ci[WS(rs, 8)];
Chris@42 1080 Tg = Te + Tf;
Chris@42 1081 T2E = Te - Tf;
Chris@42 1082 Tb = cr[WS(rs, 6)];
Chris@42 1083 Tc = ci[WS(rs, 3)];
Chris@42 1084 Td = Tb + Tc;
Chris@42 1085 T2D = Tb - Tc;
Chris@42 1086 }
Chris@42 1087 T2x = KP559016994 * (Td - Tg);
Chris@42 1088 T5w = FMA(KP951056516, T2D, KP587785252 * T2E);
Chris@42 1089 T2F = FNMS(KP951056516, T2E, KP587785252 * T2D);
Chris@42 1090 Th = Td + Tg;
Chris@42 1091 T2w = FNMS(KP250000000, Th, Ta);
Chris@42 1092 }
Chris@42 1093 {
Chris@42 1094 E T1k, T2W, T1h, T2V;
Chris@42 1095 T1e = ci[WS(rs, 20)];
Chris@42 1096 {
Chris@42 1097 E T1i, T1j, T1f, T1g;
Chris@42 1098 T1i = cr[WS(rs, 14)];
Chris@42 1099 T1j = cr[WS(rs, 19)];
Chris@42 1100 T1k = T1i + T1j;
Chris@42 1101 T2W = T1j - T1i;
Chris@42 1102 T1f = ci[WS(rs, 15)];
Chris@42 1103 T1g = cr[WS(rs, 24)];
Chris@42 1104 T1h = T1f - T1g;
Chris@42 1105 T2V = T1f + T1g;
Chris@42 1106 }
Chris@42 1107 T2P = KP559016994 * (T1h + T1k);
Chris@42 1108 T5B = FMA(KP951056516, T2V, KP587785252 * T2W);
Chris@42 1109 T2X = FNMS(KP951056516, T2W, KP587785252 * T2V);
Chris@42 1110 T1l = T1h - T1k;
Chris@42 1111 T2O = FNMS(KP250000000, T1l, T1e);
Chris@42 1112 }
Chris@42 1113 {
Chris@42 1114 E Tp, T2M, Tm, T2L;
Chris@42 1115 Tj = cr[WS(rs, 4)];
Chris@42 1116 {
Chris@42 1117 E Tn, To, Tk, Tl;
Chris@42 1118 Tn = ci[WS(rs, 10)];
Chris@42 1119 To = ci[WS(rs, 5)];
Chris@42 1120 Tp = Tn + To;
Chris@42 1121 T2M = Tn - To;
Chris@42 1122 Tk = cr[WS(rs, 9)];
Chris@42 1123 Tl = ci[0];
Chris@42 1124 Tm = Tk + Tl;
Chris@42 1125 T2L = Tk - Tl;
Chris@42 1126 }
Chris@42 1127 T2N = FNMS(KP951056516, T2M, KP587785252 * T2L);
Chris@42 1128 T5D = FMA(KP951056516, T2L, KP587785252 * T2M);
Chris@42 1129 T2T = KP559016994 * (Tm - Tp);
Chris@42 1130 Tq = Tm + Tp;
Chris@42 1131 T2S = FNMS(KP250000000, Tq, Tj);
Chris@42 1132 }
Chris@42 1133 {
Chris@42 1134 E T1b, T2A, T18, T2z;
Chris@42 1135 T15 = ci[WS(rs, 23)];
Chris@42 1136 {
Chris@42 1137 E T19, T1a, T16, T17;
Chris@42 1138 T19 = ci[WS(rs, 13)];
Chris@42 1139 T1a = cr[WS(rs, 16)];
Chris@42 1140 T1b = T19 - T1a;
Chris@42 1141 T2A = T19 + T1a;
Chris@42 1142 T16 = ci[WS(rs, 18)];
Chris@42 1143 T17 = cr[WS(rs, 21)];
Chris@42 1144 T18 = T16 - T17;
Chris@42 1145 T2z = T16 + T17;
Chris@42 1146 }
Chris@42 1147 T2B = FNMS(KP951056516, T2A, KP587785252 * T2z);
Chris@42 1148 T5u = FMA(KP951056516, T2z, KP587785252 * T2A);
Chris@42 1149 T2H = KP559016994 * (T18 - T1b);
Chris@42 1150 T1c = T18 + T1b;
Chris@42 1151 T2G = FNMS(KP250000000, T1c, T15);
Chris@42 1152 }
Chris@42 1153 Ti = Ta + Th;
Chris@42 1154 Tr = Tj + Tq;
Chris@42 1155 Ts = Ti + Tr;
Chris@42 1156 T1d = T15 + T1c;
Chris@42 1157 T1m = T1e + T1l;
Chris@42 1158 T1P = T1d + T1m;
Chris@42 1159 {
Chris@42 1160 E T2C, T4w, T2J, T4x, T2y, T2I;
Chris@42 1161 T2y = T2w - T2x;
Chris@42 1162 T2C = T2y - T2B;
Chris@42 1163 T4w = T2y + T2B;
Chris@42 1164 T2I = T2G - T2H;
Chris@42 1165 T2J = T2F + T2I;
Chris@42 1166 T4x = T2I - T2F;
Chris@42 1167 T2K = FNMS(KP481753674, T2J, KP876306680 * T2C);
Chris@42 1168 T4P = FMA(KP728968627, T4x, KP684547105 * T4w);
Chris@42 1169 T3H = FMA(KP876306680, T2J, KP481753674 * T2C);
Chris@42 1170 T4y = FNMS(KP684547105, T4x, KP728968627 * T4w);
Chris@42 1171 }
Chris@42 1172 {
Chris@42 1173 E T5C, T6M, T5F, T6L, T5A, T5E;
Chris@42 1174 T5A = T2T + T2S;
Chris@42 1175 T5C = T5A - T5B;
Chris@42 1176 T6M = T5A + T5B;
Chris@42 1177 T5E = T2O + T2P;
Chris@42 1178 T5F = T5D + T5E;
Chris@42 1179 T6L = T5E - T5D;
Chris@42 1180 T5G = FNMS(KP844327925, T5F, KP535826794 * T5C);
Chris@42 1181 T71 = FMA(KP637423989, T6L, KP770513242 * T6M);
Chris@42 1182 T65 = FMA(KP535826794, T5F, KP844327925 * T5C);
Chris@42 1183 T6N = FNMS(KP637423989, T6M, KP770513242 * T6L);
Chris@42 1184 }
Chris@42 1185 {
Chris@42 1186 E T5v, T6I, T5y, T6J, T5t, T5x;
Chris@42 1187 T5t = T2x + T2w;
Chris@42 1188 T5v = T5t - T5u;
Chris@42 1189 T6I = T5t + T5u;
Chris@42 1190 T5x = T2H + T2G;
Chris@42 1191 T5y = T5w + T5x;
Chris@42 1192 T6J = T5x - T5w;
Chris@42 1193 T5z = FNMS(KP248689887, T5y, KP968583161 * T5v);
Chris@42 1194 T70 = FMA(KP535826794, T6J, KP844327925 * T6I);
Chris@42 1195 T64 = FMA(KP968583161, T5y, KP248689887 * T5v);
Chris@42 1196 T6K = FNMS(KP844327925, T6J, KP535826794 * T6I);
Chris@42 1197 }
Chris@42 1198 {
Chris@42 1199 E T2R, T4z, T2Y, T4A, T2Q, T2U;
Chris@42 1200 T2Q = T2O - T2P;
Chris@42 1201 T2R = T2N + T2Q;
Chris@42 1202 T4z = T2Q - T2N;
Chris@42 1203 T2U = T2S - T2T;
Chris@42 1204 T2Y = T2U - T2X;
Chris@42 1205 T4A = T2U + T2X;
Chris@42 1206 T2Z = FMA(KP904827052, T2R, KP425779291 * T2Y);
Chris@42 1207 T4Q = FNMS(KP992114701, T4z, KP125333233 * T4A);
Chris@42 1208 T3I = FNMS(KP425779291, T2R, KP904827052 * T2Y);
Chris@42 1209 T4B = FMA(KP125333233, T4z, KP992114701 * T4A);
Chris@42 1210 }
Chris@42 1211 }
Chris@42 1212 {
Chris@42 1213 E T1S, T1V, T1Y, T1Z, T3B, T3A, T41, T42;
Chris@42 1214 T1S = ci[WS(rs, 24)];
Chris@42 1215 {
Chris@42 1216 E T1T, T1U, T1W, T1X;
Chris@42 1217 T1T = ci[WS(rs, 19)];
Chris@42 1218 T1U = cr[WS(rs, 20)];
Chris@42 1219 T1V = T1T - T1U;
Chris@42 1220 T1W = ci[WS(rs, 14)];
Chris@42 1221 T1X = cr[WS(rs, 15)];
Chris@42 1222 T1Y = T1W - T1X;
Chris@42 1223 T1Z = T1V + T1Y;
Chris@42 1224 T3B = T1W + T1X;
Chris@42 1225 T3A = T1T + T1U;
Chris@42 1226 }
Chris@42 1227 T20 = T1S + T1Z;
Chris@42 1228 T5Z = FMA(KP951056516, T3A, KP587785252 * T3B);
Chris@42 1229 T3C = FNMS(KP951056516, T3B, KP587785252 * T3A);
Chris@42 1230 T41 = FNMS(KP250000000, T1Z, T1S);
Chris@42 1231 T42 = KP559016994 * (T1V - T1Y);
Chris@42 1232 T43 = T41 - T42;
Chris@42 1233 T6j = T42 + T41;
Chris@42 1234 }
Chris@42 1235 {
Chris@42 1236 E Tt, T32, T5L, T3a, TA, T31, T1o, T36, T5J, T3c, T1v, T3b, TC, T3h, T5S;
Chris@42 1237 E T3p, TJ, T3g, T1x, T3l, T5Q, T3r, T1E, T3q;
Chris@42 1238 {
Chris@42 1239 E Tw, T38, Tz, T39;
Chris@42 1240 Tt = cr[WS(rs, 2)];
Chris@42 1241 {
Chris@42 1242 E Tu, Tv, Tx, Ty;
Chris@42 1243 Tu = cr[WS(rs, 7)];
Chris@42 1244 Tv = ci[WS(rs, 2)];
Chris@42 1245 Tw = Tu + Tv;
Chris@42 1246 T38 = Tu - Tv;
Chris@42 1247 Tx = cr[WS(rs, 12)];
Chris@42 1248 Ty = ci[WS(rs, 7)];
Chris@42 1249 Tz = Tx + Ty;
Chris@42 1250 T39 = Tx - Ty;
Chris@42 1251 }
Chris@42 1252 T32 = KP559016994 * (Tw - Tz);
Chris@42 1253 T5L = FMA(KP951056516, T38, KP587785252 * T39);
Chris@42 1254 T3a = FNMS(KP951056516, T39, KP587785252 * T38);
Chris@42 1255 TA = Tw + Tz;
Chris@42 1256 T31 = FNMS(KP250000000, TA, Tt);
Chris@42 1257 }
Chris@42 1258 {
Chris@42 1259 E T1r, T34, T1u, T35;
Chris@42 1260 T1o = ci[WS(rs, 22)];
Chris@42 1261 {
Chris@42 1262 E T1p, T1q, T1s, T1t;
Chris@42 1263 T1p = ci[WS(rs, 17)];
Chris@42 1264 T1q = cr[WS(rs, 22)];
Chris@42 1265 T1r = T1p - T1q;
Chris@42 1266 T34 = T1p + T1q;
Chris@42 1267 T1s = ci[WS(rs, 12)];
Chris@42 1268 T1t = cr[WS(rs, 17)];
Chris@42 1269 T1u = T1s - T1t;
Chris@42 1270 T35 = T1s + T1t;
Chris@42 1271 }
Chris@42 1272 T36 = FNMS(KP951056516, T35, KP587785252 * T34);
Chris@42 1273 T5J = FMA(KP951056516, T34, KP587785252 * T35);
Chris@42 1274 T3c = KP559016994 * (T1r - T1u);
Chris@42 1275 T1v = T1r + T1u;
Chris@42 1276 T3b = FNMS(KP250000000, T1v, T1o);
Chris@42 1277 }
Chris@42 1278 {
Chris@42 1279 E TI, T3o, TF, T3n;
Chris@42 1280 TC = cr[WS(rs, 3)];
Chris@42 1281 {
Chris@42 1282 E TG, TH, TD, TE;
Chris@42 1283 TG = ci[WS(rs, 11)];
Chris@42 1284 TH = ci[WS(rs, 6)];
Chris@42 1285 TI = TG + TH;
Chris@42 1286 T3o = TG - TH;
Chris@42 1287 TD = cr[WS(rs, 8)];
Chris@42 1288 TE = ci[WS(rs, 1)];
Chris@42 1289 TF = TD + TE;
Chris@42 1290 T3n = TD - TE;
Chris@42 1291 }
Chris@42 1292 T3h = KP559016994 * (TF - TI);
Chris@42 1293 T5S = FMA(KP951056516, T3n, KP587785252 * T3o);
Chris@42 1294 T3p = FNMS(KP951056516, T3o, KP587785252 * T3n);
Chris@42 1295 TJ = TF + TI;
Chris@42 1296 T3g = FNMS(KP250000000, TJ, TC);
Chris@42 1297 }
Chris@42 1298 {
Chris@42 1299 E T1D, T3k, T1A, T3j;
Chris@42 1300 T1x = ci[WS(rs, 21)];
Chris@42 1301 {
Chris@42 1302 E T1B, T1C, T1y, T1z;
Chris@42 1303 T1B = cr[WS(rs, 13)];
Chris@42 1304 T1C = cr[WS(rs, 18)];
Chris@42 1305 T1D = T1B + T1C;
Chris@42 1306 T3k = T1C - T1B;
Chris@42 1307 T1y = ci[WS(rs, 16)];
Chris@42 1308 T1z = cr[WS(rs, 23)];
Chris@42 1309 T1A = T1y - T1z;
Chris@42 1310 T3j = T1y + T1z;
Chris@42 1311 }
Chris@42 1312 T3l = FNMS(KP951056516, T3k, KP587785252 * T3j);
Chris@42 1313 T5Q = FMA(KP951056516, T3j, KP587785252 * T3k);
Chris@42 1314 T3r = KP559016994 * (T1A + T1D);
Chris@42 1315 T1E = T1A - T1D;
Chris@42 1316 T3q = FNMS(KP250000000, T1E, T1x);
Chris@42 1317 }
Chris@42 1318 TB = Tt + TA;
Chris@42 1319 TK = TC + TJ;
Chris@42 1320 TL = TB + TK;
Chris@42 1321 T1w = T1o + T1v;
Chris@42 1322 T1F = T1x + T1E;
Chris@42 1323 T1Q = T1w + T1F;
Chris@42 1324 {
Chris@42 1325 E T37, T4D, T3e, T4E, T33, T3d;
Chris@42 1326 T33 = T31 - T32;
Chris@42 1327 T37 = T33 - T36;
Chris@42 1328 T4D = T33 + T36;
Chris@42 1329 T3d = T3b - T3c;
Chris@42 1330 T3e = T3a + T3d;
Chris@42 1331 T4E = T3d - T3a;
Chris@42 1332 T3f = FNMS(KP844327925, T3e, KP535826794 * T37);
Chris@42 1333 T4S = FMA(KP062790519, T4E, KP998026728 * T4D);
Chris@42 1334 T3K = FMA(KP535826794, T3e, KP844327925 * T37);
Chris@42 1335 T4F = FNMS(KP998026728, T4E, KP062790519 * T4D);
Chris@42 1336 }
Chris@42 1337 {
Chris@42 1338 E T5R, T6T, T5U, T6S, T5P, T5T;
Chris@42 1339 T5P = T3h + T3g;
Chris@42 1340 T5R = T5P - T5Q;
Chris@42 1341 T6T = T5P + T5Q;
Chris@42 1342 T5T = T3q + T3r;
Chris@42 1343 T5U = T5S + T5T;
Chris@42 1344 T6S = T5T - T5S;
Chris@42 1345 T5V = FNMS(KP684547105, T5U, KP728968627 * T5R);
Chris@42 1346 T74 = FNMS(KP992114701, T6S, KP125333233 * T6T);
Chris@42 1347 T68 = FMA(KP728968627, T5U, KP684547105 * T5R);
Chris@42 1348 T6U = FMA(KP125333233, T6S, KP992114701 * T6T);
Chris@42 1349 }
Chris@42 1350 {
Chris@42 1351 E T5K, T6Q, T5N, T6P, T5I, T5M;
Chris@42 1352 T5I = T32 + T31;
Chris@42 1353 T5K = T5I - T5J;
Chris@42 1354 T6Q = T5I + T5J;
Chris@42 1355 T5M = T3c + T3b;
Chris@42 1356 T5N = T5L + T5M;
Chris@42 1357 T6P = T5M - T5L;
Chris@42 1358 T5O = FNMS(KP481753674, T5N, KP876306680 * T5K);
Chris@42 1359 T73 = FNMS(KP425779291, T6P, KP904827052 * T6Q);
Chris@42 1360 T67 = FMA(KP876306680, T5N, KP481753674 * T5K);
Chris@42 1361 T6R = FMA(KP904827052, T6P, KP425779291 * T6Q);
Chris@42 1362 }
Chris@42 1363 {
Chris@42 1364 E T3m, T4H, T3t, T4G, T3i, T3s;
Chris@42 1365 T3i = T3g - T3h;
Chris@42 1366 T3m = T3i - T3l;
Chris@42 1367 T4H = T3i + T3l;
Chris@42 1368 T3s = T3q - T3r;
Chris@42 1369 T3t = T3p + T3s;
Chris@42 1370 T4G = T3s - T3p;
Chris@42 1371 T3u = FNMS(KP998026728, T3t, KP062790519 * T3m);
Chris@42 1372 T4T = FNMS(KP637423989, T4G, KP770513242 * T4H);
Chris@42 1373 T3L = FMA(KP062790519, T3t, KP998026728 * T3m);
Chris@42 1374 T4I = FMA(KP770513242, T4G, KP637423989 * T4H);
Chris@42 1375 }
Chris@42 1376 }
Chris@42 1377 {
Chris@42 1378 E TM, T14, T2e, T21, T23, T2l, T1H, T2f, T1O, T2k;
Chris@42 1379 {
Chris@42 1380 E T12, T13, T1R, T22;
Chris@42 1381 T12 = KP559016994 * (Ts - TL);
Chris@42 1382 TM = Ts + TL;
Chris@42 1383 T13 = FNMS(KP250000000, TM, T9);
Chris@42 1384 T14 = T12 + T13;
Chris@42 1385 T2e = T13 - T12;
Chris@42 1386 T1R = KP559016994 * (T1P - T1Q);
Chris@42 1387 T21 = T1P + T1Q;
Chris@42 1388 T22 = FNMS(KP250000000, T21, T20);
Chris@42 1389 T23 = T1R + T22;
Chris@42 1390 T2l = T22 - T1R;
Chris@42 1391 }
Chris@42 1392 {
Chris@42 1393 E T1n, T1G, T1M, T1N;
Chris@42 1394 T1n = T1d - T1m;
Chris@42 1395 T1G = T1w - T1F;
Chris@42 1396 T1H = FMA(KP951056516, T1n, KP587785252 * T1G);
Chris@42 1397 T2f = FNMS(KP951056516, T1G, KP587785252 * T1n);
Chris@42 1398 T1M = Ti - Tr;
Chris@42 1399 T1N = TB - TK;
Chris@42 1400 T1O = FMA(KP951056516, T1M, KP587785252 * T1N);
Chris@42 1401 T2k = FNMS(KP951056516, T1N, KP587785252 * T1M);
Chris@42 1402 }
Chris@42 1403 {
Chris@42 1404 E T1I, T24, T2o, T2q;
Chris@42 1405 cr[0] = T9 + TM;
Chris@42 1406 ci[0] = T20 + T21;
Chris@42 1407 T1I = T14 - T1H;
Chris@42 1408 T24 = T1O + T23;
Chris@42 1409 cr[WS(rs, 5)] = FNMS(T1L, T24, T11 * T1I);
Chris@42 1410 ci[WS(rs, 5)] = FMA(T1L, T1I, T11 * T24);
Chris@42 1411 T2o = T2e + T2f;
Chris@42 1412 T2q = T2l - T2k;
Chris@42 1413 cr[WS(rs, 15)] = FNMS(T2p, T2q, T2n * T2o);
Chris@42 1414 ci[WS(rs, 15)] = FMA(T2p, T2o, T2n * T2q);
Chris@42 1415 {
Chris@42 1416 E T2g, T2m, T28, T2a;
Chris@42 1417 T2g = T2e - T2f;
Chris@42 1418 T2m = T2k + T2l;
Chris@42 1419 cr[WS(rs, 10)] = FNMS(T2j, T2m, T2d * T2g);
Chris@42 1420 ci[WS(rs, 10)] = FMA(T2j, T2g, T2d * T2m);
Chris@42 1421 T28 = T14 + T1H;
Chris@42 1422 T2a = T23 - T1O;
Chris@42 1423 cr[WS(rs, 20)] = FNMS(T29, T2a, T27 * T28);
Chris@42 1424 ci[WS(rs, 20)] = FMA(T29, T28, T27 * T2a);
Chris@42 1425 }
Chris@42 1426 }
Chris@42 1427 }
Chris@42 1428 {
Chris@42 1429 E T76, T7n, T7a, T7q, T6H, T6W, T6X, T6Y, T7e, T7f, T7d, T7g, T7x, T7y;
Chris@42 1430 {
Chris@42 1431 E T72, T75, T78, T79;
Chris@42 1432 T72 = T70 + T71;
Chris@42 1433 T75 = T73 - T74;
Chris@42 1434 T76 = FMA(KP951056516, T72, KP587785252 * T75);
Chris@42 1435 T7n = FNMS(KP951056516, T75, KP587785252 * T72);
Chris@42 1436 T78 = T6K - T6N;
Chris@42 1437 T79 = T6U - T6R;
Chris@42 1438 T7a = FMA(KP951056516, T78, KP587785252 * T79);
Chris@42 1439 T7q = FNMS(KP951056516, T79, KP587785252 * T78);
Chris@42 1440 }
Chris@42 1441 {
Chris@42 1442 E T6O, T6V, T7b, T7c;
Chris@42 1443 T6H = T5Y + T5Z;
Chris@42 1444 T6O = T6K + T6N;
Chris@42 1445 T6V = T6R + T6U;
Chris@42 1446 T6W = T6O - T6V;
Chris@42 1447 T6X = FNMS(KP250000000, T6W, T6H);
Chris@42 1448 T6Y = KP559016994 * (T6O + T6V);
Chris@42 1449 T7e = T6j - T6i;
Chris@42 1450 T7b = T70 - T71;
Chris@42 1451 T7c = T73 + T74;
Chris@42 1452 T7f = T7b + T7c;
Chris@42 1453 T7d = KP559016994 * (T7b - T7c);
Chris@42 1454 T7g = FNMS(KP250000000, T7f, T7e);
Chris@42 1455 }
Chris@42 1456 T7x = T6H + T6W;
Chris@42 1457 T7y = T7e + T7f;
Chris@42 1458 cr[WS(rs, 4)] = FNMS(TY, T7y, TT * T7x);
Chris@42 1459 ci[WS(rs, 4)] = FMA(TY, T7x, TT * T7y);
Chris@42 1460 {
Chris@42 1461 E T7o, T7u, T7s, T7w, T7m, T7r;
Chris@42 1462 T7m = T6X - T6Y;
Chris@42 1463 T7o = T7m - T7n;
Chris@42 1464 T7u = T7m + T7n;
Chris@42 1465 T7r = T7g - T7d;
Chris@42 1466 T7s = T7q + T7r;
Chris@42 1467 T7w = T7r - T7q;
Chris@42 1468 cr[WS(rs, 14)] = FNMS(T7p, T7s, T7l * T7o);
Chris@42 1469 ci[WS(rs, 14)] = FMA(T7p, T7o, T7l * T7s);
Chris@42 1470 cr[WS(rs, 19)] = FNMS(T7v, T7w, T7t * T7u);
Chris@42 1471 ci[WS(rs, 19)] = FMA(T7v, T7u, T7t * T7w);
Chris@42 1472 }
Chris@42 1473 {
Chris@42 1474 E T77, T7j, T7i, T7k, T6Z, T7h;
Chris@42 1475 T6Z = T6X + T6Y;
Chris@42 1476 T77 = T6Z - T76;
Chris@42 1477 T7j = T6Z + T76;
Chris@42 1478 T7h = T7d + T7g;
Chris@42 1479 T7i = T7a + T7h;
Chris@42 1480 T7k = T7h - T7a;
Chris@42 1481 cr[WS(rs, 9)] = FNMS(TZ, T7i, TU * T77);
Chris@42 1482 ci[WS(rs, 9)] = FMA(TZ, T77, TU * T7i);
Chris@42 1483 cr[WS(rs, 24)] = FNMS(T26, T7k, T25 * T7j);
Chris@42 1484 ci[WS(rs, 24)] = FMA(T26, T7j, T25 * T7k);
Chris@42 1485 }
Chris@42 1486 }
Chris@42 1487 {
Chris@42 1488 E T3N, T4h, T3U, T4m, T3D, T3E, T3w, T3F, T44, T45, T3X, T46, T4t, T4u;
Chris@42 1489 {
Chris@42 1490 E T3J, T3M, T3S, T3T;
Chris@42 1491 T3J = T3H - T3I;
Chris@42 1492 T3M = T3K - T3L;
Chris@42 1493 T3N = FMA(KP951056516, T3J, KP587785252 * T3M);
Chris@42 1494 T4h = FNMS(KP951056516, T3M, KP587785252 * T3J);
Chris@42 1495 T3S = T2K + T2Z;
Chris@42 1496 T3T = T3f - T3u;
Chris@42 1497 T3U = FMA(KP951056516, T3S, KP587785252 * T3T);
Chris@42 1498 T4m = FNMS(KP951056516, T3T, KP587785252 * T3S);
Chris@42 1499 }
Chris@42 1500 {
Chris@42 1501 E T30, T3v, T3V, T3W;
Chris@42 1502 T3D = T3z - T3C;
Chris@42 1503 T30 = T2K - T2Z;
Chris@42 1504 T3v = T3f + T3u;
Chris@42 1505 T3E = T30 + T3v;
Chris@42 1506 T3w = KP559016994 * (T30 - T3v);
Chris@42 1507 T3F = FNMS(KP250000000, T3E, T3D);
Chris@42 1508 T44 = T40 + T43;
Chris@42 1509 T3V = T3H + T3I;
Chris@42 1510 T3W = T3K + T3L;
Chris@42 1511 T45 = T3V + T3W;
Chris@42 1512 T3X = KP559016994 * (T3V - T3W);
Chris@42 1513 T46 = FNMS(KP250000000, T45, T44);
Chris@42 1514 }
Chris@42 1515 T4t = T3D + T3E;
Chris@42 1516 T4u = T44 + T45;
Chris@42 1517 cr[WS(rs, 2)] = FNMS(T2t, T4u, T2r * T4t);
Chris@42 1518 ci[WS(rs, 2)] = FMA(T2t, T4t, T2r * T4u);
Chris@42 1519 {
Chris@42 1520 E T4i, T4q, T4o, T4s, T4g, T4n;
Chris@42 1521 T4g = T3F - T3w;
Chris@42 1522 T4i = T4g - T4h;
Chris@42 1523 T4q = T4g + T4h;
Chris@42 1524 T4n = T46 - T3X;
Chris@42 1525 T4o = T4m + T4n;
Chris@42 1526 T4s = T4n - T4m;
Chris@42 1527 cr[WS(rs, 12)] = FNMS(T4l, T4o, T4f * T4i);
Chris@42 1528 ci[WS(rs, 12)] = FMA(T4l, T4i, T4f * T4o);
Chris@42 1529 cr[WS(rs, 17)] = FNMS(T4r, T4s, T4p * T4q);
Chris@42 1530 ci[WS(rs, 17)] = FMA(T4r, T4q, T4p * T4s);
Chris@42 1531 }
Chris@42 1532 {
Chris@42 1533 E T3O, T4a, T48, T4c, T3G, T47;
Chris@42 1534 T3G = T3w + T3F;
Chris@42 1535 T3O = T3G - T3N;
Chris@42 1536 T4a = T3G + T3N;
Chris@42 1537 T47 = T3X + T46;
Chris@42 1538 T48 = T3U + T47;
Chris@42 1539 T4c = T47 - T3U;
Chris@42 1540 cr[WS(rs, 7)] = FNMS(T3R, T48, T2v * T3O);
Chris@42 1541 ci[WS(rs, 7)] = FMA(T3R, T3O, T2v * T48);
Chris@42 1542 cr[WS(rs, 22)] = FNMS(T4b, T4c, T49 * T4a);
Chris@42 1543 ci[WS(rs, 22)] = FMA(T4b, T4a, T49 * T4c);
Chris@42 1544 }
Chris@42 1545 }
Chris@42 1546 {
Chris@42 1547 E T4V, T5f, T50, T5i, T4L, T4M, T4K, T4N, T54, T55, T53, T56, T5r, T5s;
Chris@42 1548 {
Chris@42 1549 E T4R, T4U, T4Y, T4Z;
Chris@42 1550 T4R = T4P - T4Q;
Chris@42 1551 T4U = T4S - T4T;
Chris@42 1552 T4V = FMA(KP951056516, T4R, KP587785252 * T4U);
Chris@42 1553 T5f = FNMS(KP951056516, T4U, KP587785252 * T4R);
Chris@42 1554 T4Y = T4y + T4B;
Chris@42 1555 T4Z = T4F + T4I;
Chris@42 1556 T50 = FMA(KP951056516, T4Y, KP587785252 * T4Z);
Chris@42 1557 T5i = FNMS(KP951056516, T4Z, KP587785252 * T4Y);
Chris@42 1558 }
Chris@42 1559 {
Chris@42 1560 E T4C, T4J, T51, T52;
Chris@42 1561 T4L = T3z + T3C;
Chris@42 1562 T4C = T4y - T4B;
Chris@42 1563 T4J = T4F - T4I;
Chris@42 1564 T4M = T4C + T4J;
Chris@42 1565 T4K = KP559016994 * (T4C - T4J);
Chris@42 1566 T4N = FNMS(KP250000000, T4M, T4L);
Chris@42 1567 T54 = T43 - T40;
Chris@42 1568 T51 = T4P + T4Q;
Chris@42 1569 T52 = T4S + T4T;
Chris@42 1570 T55 = T51 + T52;
Chris@42 1571 T53 = KP559016994 * (T51 - T52);
Chris@42 1572 T56 = FNMS(KP250000000, T55, T54);
Chris@42 1573 }
Chris@42 1574 T5r = T4L + T4M;
Chris@42 1575 T5s = T54 + T55;
Chris@42 1576 cr[WS(rs, 3)] = FNMS(TR, T5s, TO * T5r);
Chris@42 1577 ci[WS(rs, 3)] = FMA(TR, T5r, TO * T5s);
Chris@42 1578 {
Chris@42 1579 E T5g, T5o, T5k, T5q, T5e, T5j;
Chris@42 1580 T5e = T4N - T4K;
Chris@42 1581 T5g = T5e - T5f;
Chris@42 1582 T5o = T5e + T5f;
Chris@42 1583 T5j = T56 - T53;
Chris@42 1584 T5k = T5i + T5j;
Chris@42 1585 T5q = T5j - T5i;
Chris@42 1586 cr[WS(rs, 13)] = FNMS(T5h, T5k, T5d * T5g);
Chris@42 1587 ci[WS(rs, 13)] = FMA(T5h, T5g, T5d * T5k);
Chris@42 1588 cr[WS(rs, 18)] = FNMS(T5p, T5q, T5n * T5o);
Chris@42 1589 ci[WS(rs, 18)] = FMA(T5p, T5o, T5n * T5q);
Chris@42 1590 }
Chris@42 1591 {
Chris@42 1592 E T4W, T5a, T58, T5c, T4O, T57;
Chris@42 1593 T4O = T4K + T4N;
Chris@42 1594 T4W = T4O - T4V;
Chris@42 1595 T5a = T4O + T4V;
Chris@42 1596 T57 = T53 + T56;
Chris@42 1597 T58 = T50 + T57;
Chris@42 1598 T5c = T57 - T50;
Chris@42 1599 cr[WS(rs, 8)] = FNMS(T4X, T58, T4v * T4W);
Chris@42 1600 ci[WS(rs, 8)] = FMA(T4X, T4W, T4v * T58);
Chris@42 1601 cr[WS(rs, 23)] = FNMS(T5b, T5c, T59 * T5a);
Chris@42 1602 ci[WS(rs, 23)] = FMA(T5b, T5a, T59 * T5c);
Chris@42 1603 }
Chris@42 1604 }
Chris@42 1605 {
Chris@42 1606 E T6a, T6v, T6e, T6y, T60, T61, T5X, T62, T6k, T6l, T6h, T6m, T6F, T6G;
Chris@42 1607 {
Chris@42 1608 E T66, T69, T6c, T6d;
Chris@42 1609 T66 = T64 - T65;
Chris@42 1610 T69 = T67 - T68;
Chris@42 1611 T6a = FMA(KP951056516, T66, KP587785252 * T69);
Chris@42 1612 T6v = FNMS(KP951056516, T69, KP587785252 * T66);
Chris@42 1613 T6c = T5z - T5G;
Chris@42 1614 T6d = T5O - T5V;
Chris@42 1615 T6e = FMA(KP951056516, T6c, KP587785252 * T6d);
Chris@42 1616 T6y = FNMS(KP951056516, T6d, KP587785252 * T6c);
Chris@42 1617 }
Chris@42 1618 {
Chris@42 1619 E T5H, T5W, T6f, T6g;
Chris@42 1620 T60 = T5Y - T5Z;
Chris@42 1621 T5H = T5z + T5G;
Chris@42 1622 T5W = T5O + T5V;
Chris@42 1623 T61 = T5H + T5W;
Chris@42 1624 T5X = KP559016994 * (T5H - T5W);
Chris@42 1625 T62 = FNMS(KP250000000, T61, T60);
Chris@42 1626 T6k = T6i + T6j;
Chris@42 1627 T6f = T64 + T65;
Chris@42 1628 T6g = T67 + T68;
Chris@42 1629 T6l = T6f + T6g;
Chris@42 1630 T6h = KP559016994 * (T6f - T6g);
Chris@42 1631 T6m = FNMS(KP250000000, T6l, T6k);
Chris@42 1632 }
Chris@42 1633 T6F = T60 + T61;
Chris@42 1634 T6G = T6k + T6l;
Chris@42 1635 cr[WS(rs, 1)] = FNMS(TQ, T6G, TN * T6F);
Chris@42 1636 ci[WS(rs, 1)] = FMA(TQ, T6F, TN * T6G);
Chris@42 1637 {
Chris@42 1638 E T6w, T6C, T6A, T6E, T6u, T6z;
Chris@42 1639 T6u = T62 - T5X;
Chris@42 1640 T6w = T6u - T6v;
Chris@42 1641 T6C = T6u + T6v;
Chris@42 1642 T6z = T6m - T6h;
Chris@42 1643 T6A = T6y + T6z;
Chris@42 1644 T6E = T6z - T6y;
Chris@42 1645 cr[WS(rs, 11)] = FNMS(T6x, T6A, T6t * T6w);
Chris@42 1646 ci[WS(rs, 11)] = FMA(T6x, T6w, T6t * T6A);
Chris@42 1647 cr[WS(rs, 16)] = FNMS(T6D, T6E, T6B * T6C);
Chris@42 1648 ci[WS(rs, 16)] = FMA(T6D, T6C, T6B * T6E);
Chris@42 1649 }
Chris@42 1650 {
Chris@42 1651 E T6b, T6q, T6o, T6s, T63, T6n;
Chris@42 1652 T63 = T5X + T62;
Chris@42 1653 T6b = T63 - T6a;
Chris@42 1654 T6q = T63 + T6a;
Chris@42 1655 T6n = T6h + T6m;
Chris@42 1656 T6o = T6e + T6n;
Chris@42 1657 T6s = T6n - T6e;
Chris@42 1658 cr[WS(rs, 6)] = FNMS(T5m, T6o, T5l * T6b);
Chris@42 1659 ci[WS(rs, 6)] = FMA(T5m, T6b, T5l * T6o);
Chris@42 1660 cr[WS(rs, 21)] = FNMS(T6r, T6s, T6p * T6q);
Chris@42 1661 ci[WS(rs, 21)] = FMA(T6r, T6q, T6p * T6s);
Chris@42 1662 }
Chris@42 1663 }
Chris@42 1664 }
Chris@42 1665 }
Chris@42 1666 }
Chris@42 1667 }
Chris@42 1668
Chris@42 1669 static const tw_instr twinstr[] = {
Chris@42 1670 {TW_CEXP, 1, 1},
Chris@42 1671 {TW_CEXP, 1, 3},
Chris@42 1672 {TW_CEXP, 1, 9},
Chris@42 1673 {TW_CEXP, 1, 24},
Chris@42 1674 {TW_NEXT, 1, 0}
Chris@42 1675 };
Chris@42 1676
Chris@42 1677 static const hc2hc_desc desc = { 25, "hb2_25", twinstr, &GENUS, {280, 180, 160, 0} };
Chris@42 1678
Chris@42 1679 void X(codelet_hb2_25) (planner *p) {
Chris@42 1680 X(khc2hc_register) (p, hb2_25, &desc);
Chris@42 1681 }
Chris@42 1682 #endif /* HAVE_FMA */