annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb2_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:41 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 25 -dif -name hb2_25 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 440 FP additions, 434 FP multiplications,
Chris@82 32 * (or, 84 additions, 78 multiplications, 356 fused multiply/add),
Chris@82 33 * 206 stack variables, 47 constants, and 100 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@82 40 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@82 41 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@82 42 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 43 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 44 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 45 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 46 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 47 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 48 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@82 49 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 50 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@82 51 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 52 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@82 53 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@82 54 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@82 55 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@82 56 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@82 57 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 58 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@82 59 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@82 60 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@82 61 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@82 62 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 63 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 64 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 65 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 66 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@82 67 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@82 68 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 69 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 70 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 71 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 72 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 73 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@82 74 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@82 75 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@82 76 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@82 77 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@82 78 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@82 79 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@82 80 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@82 81 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@82 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 86 {
Chris@82 87 INT m;
Chris@82 88 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 89 E TN, TT, TO, TR, T23, T25, TQ, TS, T4l, TW, T4n, TX, T2e, T2y, T4z;
Chris@82 90 E T2q, T76, T4o, T8d, T2u, T4e, T4i, T8a, T86, T71, T6Y, T6U, T26, T2a, T3U;
Chris@82 91 E T8o, T8s, T4B, T4C, T4G, T2k, T5w, T5C, T6E, T5T, T4u, T7g, T7c, T1I, TY;
Chris@82 92 E T5I, T8i, T5M;
Chris@82 93 {
Chris@82 94 E T2x, T2p, T85, T4d, T2t, T89, T4h, TU, T4m, T2j, T3T, TP, TV, T2d, T5v;
Chris@82 95 E T5B;
Chris@82 96 TN = W[0];
Chris@82 97 TT = W[4];
Chris@82 98 TO = W[2];
Chris@82 99 TR = W[3];
Chris@82 100 TP = TN * TO;
Chris@82 101 T2x = TO * TT;
Chris@82 102 TV = TN * TR;
Chris@82 103 T2d = TN * TT;
Chris@82 104 T23 = W[6];
Chris@82 105 T2p = TT * T23;
Chris@82 106 T85 = TN * T23;
Chris@82 107 T4d = TO * T23;
Chris@82 108 T25 = W[7];
Chris@82 109 T2t = TT * T25;
Chris@82 110 T89 = TN * T25;
Chris@82 111 T4h = TO * T25;
Chris@82 112 TQ = W[1];
Chris@82 113 TS = FNMS(TQ, TR, TP);
Chris@82 114 T4l = FMA(TQ, TR, TP);
Chris@82 115 TU = TS * TT;
Chris@82 116 T4m = T4l * TT;
Chris@82 117 TW = FMA(TQ, TO, TV);
Chris@82 118 T4n = FNMS(TQ, TO, TV);
Chris@82 119 TX = W[5];
Chris@82 120 T2j = TN * TX;
Chris@82 121 T3T = TO * TX;
Chris@82 122 T2e = FNMS(TQ, TX, T2d);
Chris@82 123 T2y = FMA(TR, TX, T2x);
Chris@82 124 T4z = FMA(TQ, TX, T2d);
Chris@82 125 T2q = FMA(TX, T25, T2p);
Chris@82 126 T76 = FMA(TR, TT, T3T);
Chris@82 127 T4o = FNMS(T4n, TX, T4m);
Chris@82 128 T8d = FNMS(TW, TX, TU);
Chris@82 129 T2u = FNMS(TX, T23, T2t);
Chris@82 130 T4e = FMA(TR, T25, T4d);
Chris@82 131 T4i = FNMS(TR, T23, T4h);
Chris@82 132 T8a = FNMS(TQ, T23, T89);
Chris@82 133 T86 = FMA(TQ, T25, T85);
Chris@82 134 {
Chris@82 135 E T6X, T6T, T24, T29;
Chris@82 136 T71 = FNMS(TR, TX, T2x);
Chris@82 137 T6X = T4l * T25;
Chris@82 138 T6Y = FNMS(T4n, T23, T6X);
Chris@82 139 T6T = T4l * T23;
Chris@82 140 T6U = FMA(T4n, T25, T6T);
Chris@82 141 T24 = TS * T23;
Chris@82 142 T26 = FMA(TW, T25, T24);
Chris@82 143 T29 = TS * T25;
Chris@82 144 T2a = FNMS(TW, T23, T29);
Chris@82 145 }
Chris@82 146 {
Chris@82 147 E T8n, T8r, T4A, T4F;
Chris@82 148 T8n = T2y * T23;
Chris@82 149 T8r = T2y * T25;
Chris@82 150 T3U = FNMS(TR, TT, T3T);
Chris@82 151 T8o = FMA(T3U, T25, T8n);
Chris@82 152 T8s = FNMS(T3U, T23, T8r);
Chris@82 153 T4A = T4z * T23;
Chris@82 154 T4F = T4z * T25;
Chris@82 155 T4B = FNMS(TQ, TT, T2j);
Chris@82 156 T4C = FMA(T4B, T25, T4A);
Chris@82 157 T4G = FNMS(T4B, T23, T4F);
Chris@82 158 }
Chris@82 159 T5v = T2e * T23;
Chris@82 160 T5B = T2e * T25;
Chris@82 161 T2k = FMA(TQ, TT, T2j);
Chris@82 162 T5w = FMA(T2k, T25, T5v);
Chris@82 163 T5C = FNMS(T2k, T23, T5B);
Chris@82 164 {
Chris@82 165 E T4t, T7b, T7f, T1H, T5H, T5L;
Chris@82 166 T4t = T4l * TX;
Chris@82 167 T6E = FNMS(T4n, TT, T4t);
Chris@82 168 T5T = FMA(T4n, TX, T4m);
Chris@82 169 T7b = T5T * T23;
Chris@82 170 T7f = T5T * T25;
Chris@82 171 T4u = FMA(T4n, TT, T4t);
Chris@82 172 T7g = FNMS(T6E, T23, T7f);
Chris@82 173 T7c = FMA(T6E, T25, T7b);
Chris@82 174 T1H = TS * TX;
Chris@82 175 T1I = FNMS(TW, TT, T1H);
Chris@82 176 TY = FMA(TW, TX, TU);
Chris@82 177 T5H = TY * T23;
Chris@82 178 T5L = TY * T25;
Chris@82 179 T5I = FMA(T1I, T25, T5H);
Chris@82 180 T8i = FMA(TW, TT, T1H);
Chris@82 181 T5M = FNMS(T1I, T23, T5L);
Chris@82 182 }
Chris@82 183 }
Chris@82 184 {
Chris@82 185 E T9, T40, T1R, T6G, T6F, T3X, T6H, T2F, T7n, T4N, T5W, T1k, T1S, T1D, T1T;
Chris@82 186 E Ti, Tr, Ts, TB, TK, TL, TM, T6p, T7K, T6w, T7A, T2U, T56, T3K, T4X;
Chris@82 187 E T6i, T7J, T6v, T7x, T39, T57, T3L, T50, T3E, T59, T3O, T4Q, T63, T7H, T6y;
Chris@82 188 E T7t, T3p, T5a, T3N, T4T, T6a, T7G, T6z, T7q;
Chris@82 189 {
Chris@82 190 E T1, T1J, T8, T3Z, T2A, T3Y, T1Q, T3W, T2C, T2D, T3V;
Chris@82 191 T1 = cr[0];
Chris@82 192 T1J = ci[WS(rs, 24)];
Chris@82 193 {
Chris@82 194 E T2, T3, T4, T5, T6, T7;
Chris@82 195 T2 = cr[WS(rs, 5)];
Chris@82 196 T3 = ci[WS(rs, 4)];
Chris@82 197 T4 = T2 + T3;
Chris@82 198 T5 = cr[WS(rs, 10)];
Chris@82 199 T6 = ci[WS(rs, 9)];
Chris@82 200 T7 = T5 + T6;
Chris@82 201 T8 = T4 + T7;
Chris@82 202 T3Z = T5 - T6;
Chris@82 203 T2A = T4 - T7;
Chris@82 204 T3Y = T2 - T3;
Chris@82 205 }
Chris@82 206 {
Chris@82 207 E T1K, T1L, T1M, T1N, T1O, T1P;
Chris@82 208 T1K = ci[WS(rs, 19)];
Chris@82 209 T1L = cr[WS(rs, 20)];
Chris@82 210 T1M = T1K - T1L;
Chris@82 211 T1N = ci[WS(rs, 14)];
Chris@82 212 T1O = cr[WS(rs, 15)];
Chris@82 213 T1P = T1N - T1O;
Chris@82 214 T1Q = T1M + T1P;
Chris@82 215 T3W = T1M - T1P;
Chris@82 216 T2C = T1K + T1L;
Chris@82 217 T2D = T1N + T1O;
Chris@82 218 }
Chris@82 219 T9 = T1 + T8;
Chris@82 220 T40 = FMA(KP618033988, T3Z, T3Y);
Chris@82 221 T1R = T1J + T1Q;
Chris@82 222 T6G = FNMS(KP618033988, T3Y, T3Z);
Chris@82 223 T3V = FNMS(KP250000000, T1Q, T1J);
Chris@82 224 T6F = FNMS(KP559016994, T3W, T3V);
Chris@82 225 T3X = FMA(KP559016994, T3W, T3V);
Chris@82 226 T6H = FNMS(KP951056516, T6G, T6F);
Chris@82 227 {
Chris@82 228 E T2E, T5V, T2B, T5U, T2z;
Chris@82 229 T2E = FMA(KP618033988, T2D, T2C);
Chris@82 230 T5V = FNMS(KP618033988, T2C, T2D);
Chris@82 231 T2z = FNMS(KP250000000, T8, T1);
Chris@82 232 T2B = FMA(KP559016994, T2A, T2z);
Chris@82 233 T5U = FNMS(KP559016994, T2A, T2z);
Chris@82 234 T2F = FNMS(KP951056516, T2E, T2B);
Chris@82 235 T7n = FNMS(KP951056516, T5V, T5U);
Chris@82 236 T4N = FMA(KP951056516, T2E, T2B);
Chris@82 237 T5W = FMA(KP951056516, T5V, T5U);
Chris@82 238 }
Chris@82 239 }
Chris@82 240 {
Chris@82 241 E Ta, T2H, T6n, T2S, Th, T2G, TC, T3r, T5Y, T3C, TJ, T3q, Tj, T30, T6d;
Chris@82 242 E T33, Tq, T32, T1u, T3v, T61, T3y, T1B, T3x, T12, T2L, T6k, T2O, T19, T2N;
Chris@82 243 E T1b, T2W, T6g, T37, T1i, T2V, T1l, T3g, T68, T3j, T1s, T3i, Tt, T3c, T65;
Chris@82 244 E T3n, TA, T3b;
Chris@82 245 {
Chris@82 246 E Tg, T2R, Td, T2Q;
Chris@82 247 Ta = cr[WS(rs, 1)];
Chris@82 248 {
Chris@82 249 E Te, Tf, Tb, Tc;
Chris@82 250 Te = cr[WS(rs, 11)];
Chris@82 251 Tf = ci[WS(rs, 8)];
Chris@82 252 Tg = Te + Tf;
Chris@82 253 T2R = Tf - Te;
Chris@82 254 Tb = cr[WS(rs, 6)];
Chris@82 255 Tc = ci[WS(rs, 3)];
Chris@82 256 Td = Tb + Tc;
Chris@82 257 T2Q = Tb - Tc;
Chris@82 258 }
Chris@82 259 T2H = Td - Tg;
Chris@82 260 T6n = FMA(KP618033988, T2Q, T2R);
Chris@82 261 T2S = FNMS(KP618033988, T2R, T2Q);
Chris@82 262 Th = Td + Tg;
Chris@82 263 T2G = FNMS(KP250000000, Th, Ta);
Chris@82 264 }
Chris@82 265 {
Chris@82 266 E TI, T3B, TF, T3A;
Chris@82 267 TC = cr[WS(rs, 3)];
Chris@82 268 {
Chris@82 269 E TG, TH, TD, TE;
Chris@82 270 TG = ci[WS(rs, 11)];
Chris@82 271 TH = ci[WS(rs, 6)];
Chris@82 272 TI = TG + TH;
Chris@82 273 T3B = TG - TH;
Chris@82 274 TD = cr[WS(rs, 8)];
Chris@82 275 TE = ci[WS(rs, 1)];
Chris@82 276 TF = TD + TE;
Chris@82 277 T3A = TD - TE;
Chris@82 278 }
Chris@82 279 T3r = TI - TF;
Chris@82 280 T5Y = FNMS(KP618033988, T3A, T3B);
Chris@82 281 T3C = FMA(KP618033988, T3B, T3A);
Chris@82 282 TJ = TF + TI;
Chris@82 283 T3q = FNMS(KP250000000, TJ, TC);
Chris@82 284 }
Chris@82 285 {
Chris@82 286 E Tp, T2Z, Tm, T2Y;
Chris@82 287 Tj = cr[WS(rs, 4)];
Chris@82 288 {
Chris@82 289 E Tn, To, Tk, Tl;
Chris@82 290 Tn = ci[WS(rs, 10)];
Chris@82 291 To = ci[WS(rs, 5)];
Chris@82 292 Tp = Tn + To;
Chris@82 293 T2Z = To - Tn;
Chris@82 294 Tk = cr[WS(rs, 9)];
Chris@82 295 Tl = ci[0];
Chris@82 296 Tm = Tk + Tl;
Chris@82 297 T2Y = Tl - Tk;
Chris@82 298 }
Chris@82 299 T30 = FMA(KP618033988, T2Z, T2Y);
Chris@82 300 T6d = FNMS(KP618033988, T2Y, T2Z);
Chris@82 301 T33 = Tm - Tp;
Chris@82 302 Tq = Tm + Tp;
Chris@82 303 T32 = FMS(KP250000000, Tq, Tj);
Chris@82 304 }
Chris@82 305 {
Chris@82 306 E T1A, T3u, T1x, T3t;
Chris@82 307 T1u = ci[WS(rs, 21)];
Chris@82 308 {
Chris@82 309 E T1y, T1z, T1v, T1w;
Chris@82 310 T1y = cr[WS(rs, 13)];
Chris@82 311 T1z = cr[WS(rs, 18)];
Chris@82 312 T1A = T1y + T1z;
Chris@82 313 T3u = T1z - T1y;
Chris@82 314 T1v = ci[WS(rs, 16)];
Chris@82 315 T1w = cr[WS(rs, 23)];
Chris@82 316 T1x = T1v - T1w;
Chris@82 317 T3t = T1v + T1w;
Chris@82 318 }
Chris@82 319 T3v = FMA(KP618033988, T3u, T3t);
Chris@82 320 T61 = FNMS(KP618033988, T3t, T3u);
Chris@82 321 T3y = T1x + T1A;
Chris@82 322 T1B = T1x - T1A;
Chris@82 323 T3x = FMS(KP250000000, T1B, T1u);
Chris@82 324 }
Chris@82 325 {
Chris@82 326 E T18, T2K, T15, T2J;
Chris@82 327 T12 = ci[WS(rs, 23)];
Chris@82 328 {
Chris@82 329 E T16, T17, T13, T14;
Chris@82 330 T16 = ci[WS(rs, 13)];
Chris@82 331 T17 = cr[WS(rs, 16)];
Chris@82 332 T18 = T16 - T17;
Chris@82 333 T2K = T16 + T17;
Chris@82 334 T13 = ci[WS(rs, 18)];
Chris@82 335 T14 = cr[WS(rs, 21)];
Chris@82 336 T15 = T13 - T14;
Chris@82 337 T2J = T13 + T14;
Chris@82 338 }
Chris@82 339 T2L = FMA(KP618033988, T2K, T2J);
Chris@82 340 T6k = FNMS(KP618033988, T2J, T2K);
Chris@82 341 T2O = T15 - T18;
Chris@82 342 T19 = T15 + T18;
Chris@82 343 T2N = FNMS(KP250000000, T19, T12);
Chris@82 344 }
Chris@82 345 {
Chris@82 346 E T1h, T36, T1e, T35;
Chris@82 347 T1b = ci[WS(rs, 20)];
Chris@82 348 {
Chris@82 349 E T1f, T1g, T1c, T1d;
Chris@82 350 T1f = cr[WS(rs, 14)];
Chris@82 351 T1g = cr[WS(rs, 19)];
Chris@82 352 T1h = T1f + T1g;
Chris@82 353 T36 = T1g - T1f;
Chris@82 354 T1c = ci[WS(rs, 15)];
Chris@82 355 T1d = cr[WS(rs, 24)];
Chris@82 356 T1e = T1c - T1d;
Chris@82 357 T35 = T1c + T1d;
Chris@82 358 }
Chris@82 359 T2W = T1e + T1h;
Chris@82 360 T6g = FNMS(KP618033988, T35, T36);
Chris@82 361 T37 = FMA(KP618033988, T36, T35);
Chris@82 362 T1i = T1e - T1h;
Chris@82 363 T2V = FMS(KP250000000, T1i, T1b);
Chris@82 364 }
Chris@82 365 {
Chris@82 366 E T1o, T3e, T1r, T3f;
Chris@82 367 T1l = ci[WS(rs, 22)];
Chris@82 368 {
Chris@82 369 E T1m, T1n, T1p, T1q;
Chris@82 370 T1m = ci[WS(rs, 17)];
Chris@82 371 T1n = cr[WS(rs, 22)];
Chris@82 372 T1o = T1m - T1n;
Chris@82 373 T3e = T1m + T1n;
Chris@82 374 T1p = ci[WS(rs, 12)];
Chris@82 375 T1q = cr[WS(rs, 17)];
Chris@82 376 T1r = T1p - T1q;
Chris@82 377 T3f = T1p + T1q;
Chris@82 378 }
Chris@82 379 T3g = FMA(KP618033988, T3f, T3e);
Chris@82 380 T68 = FNMS(KP618033988, T3e, T3f);
Chris@82 381 T3j = T1o - T1r;
Chris@82 382 T1s = T1o + T1r;
Chris@82 383 T3i = FMS(KP250000000, T1s, T1l);
Chris@82 384 }
Chris@82 385 {
Chris@82 386 E Tw, T3l, Tz, T3m;
Chris@82 387 Tt = cr[WS(rs, 2)];
Chris@82 388 {
Chris@82 389 E Tu, Tv, Tx, Ty;
Chris@82 390 Tu = cr[WS(rs, 7)];
Chris@82 391 Tv = ci[WS(rs, 2)];
Chris@82 392 Tw = Tu + Tv;
Chris@82 393 T3l = Tu - Tv;
Chris@82 394 Tx = cr[WS(rs, 12)];
Chris@82 395 Ty = ci[WS(rs, 7)];
Chris@82 396 Tz = Tx + Ty;
Chris@82 397 T3m = Ty - Tx;
Chris@82 398 }
Chris@82 399 T3c = Tz - Tw;
Chris@82 400 T65 = FMA(KP618033988, T3l, T3m);
Chris@82 401 T3n = FNMS(KP618033988, T3m, T3l);
Chris@82 402 TA = Tw + Tz;
Chris@82 403 T3b = FNMS(KP250000000, TA, Tt);
Chris@82 404 }
Chris@82 405 {
Chris@82 406 E T1a, T1j, T1t, T1C;
Chris@82 407 T1a = T12 + T19;
Chris@82 408 T1j = T1b + T1i;
Chris@82 409 T1k = T1a - T1j;
Chris@82 410 T1S = T1a + T1j;
Chris@82 411 T1t = T1l + T1s;
Chris@82 412 T1C = T1u + T1B;
Chris@82 413 T1D = T1t - T1C;
Chris@82 414 T1T = T1t + T1C;
Chris@82 415 }
Chris@82 416 Ti = Ta + Th;
Chris@82 417 Tr = Tj + Tq;
Chris@82 418 Ts = Ti + Tr;
Chris@82 419 TB = Tt + TA;
Chris@82 420 TK = TC + TJ;
Chris@82 421 TL = TB + TK;
Chris@82 422 TM = Ts + TL;
Chris@82 423 {
Chris@82 424 E T6l, T7y, T6o, T7z, T6j, T6m;
Chris@82 425 T6j = FNMS(KP559016994, T2H, T2G);
Chris@82 426 T6l = FMA(KP951056516, T6k, T6j);
Chris@82 427 T7y = FNMS(KP951056516, T6k, T6j);
Chris@82 428 T6m = FNMS(KP559016994, T2O, T2N);
Chris@82 429 T6o = FMA(KP951056516, T6n, T6m);
Chris@82 430 T7z = FNMS(KP951056516, T6n, T6m);
Chris@82 431 T6p = FNMS(KP549754652, T6o, T6l);
Chris@82 432 T7K = FMA(KP939062505, T7y, T7z);
Chris@82 433 T6w = FMA(KP549754652, T6l, T6o);
Chris@82 434 T7A = FNMS(KP939062505, T7z, T7y);
Chris@82 435 }
Chris@82 436 {
Chris@82 437 E T2M, T4W, T2T, T4V, T2I, T2P;
Chris@82 438 T2I = FMA(KP559016994, T2H, T2G);
Chris@82 439 T2M = FNMS(KP951056516, T2L, T2I);
Chris@82 440 T4W = FMA(KP951056516, T2L, T2I);
Chris@82 441 T2P = FMA(KP559016994, T2O, T2N);
Chris@82 442 T2T = FMA(KP951056516, T2S, T2P);
Chris@82 443 T4V = FNMS(KP951056516, T2S, T2P);
Chris@82 444 T2U = FNMS(KP256756360, T2T, T2M);
Chris@82 445 T56 = FMA(KP634619297, T4V, T4W);
Chris@82 446 T3K = FMA(KP256756360, T2M, T2T);
Chris@82 447 T4X = FNMS(KP634619297, T4W, T4V);
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T6e, T7w, T6h, T7v, T6c, T6f;
Chris@82 451 T6c = FMA(KP559016994, T2W, T2V);
Chris@82 452 T6e = FNMS(KP951056516, T6d, T6c);
Chris@82 453 T7w = FMA(KP951056516, T6d, T6c);
Chris@82 454 T6f = FMA(KP559016994, T33, T32);
Chris@82 455 T6h = FNMS(KP951056516, T6g, T6f);
Chris@82 456 T7v = FMA(KP951056516, T6g, T6f);
Chris@82 457 T6i = FMA(KP470564281, T6h, T6e);
Chris@82 458 T7J = FNMS(KP126329378, T7v, T7w);
Chris@82 459 T6v = FNMS(KP470564281, T6e, T6h);
Chris@82 460 T7x = FMA(KP126329378, T7w, T7v);
Chris@82 461 }
Chris@82 462 {
Chris@82 463 E T31, T4Y, T38, T4Z, T2X, T34;
Chris@82 464 T2X = FNMS(KP559016994, T2W, T2V);
Chris@82 465 T31 = FMA(KP951056516, T30, T2X);
Chris@82 466 T4Y = FNMS(KP951056516, T30, T2X);
Chris@82 467 T34 = FNMS(KP559016994, T33, T32);
Chris@82 468 T38 = FMA(KP951056516, T37, T34);
Chris@82 469 T4Z = FNMS(KP951056516, T37, T34);
Chris@82 470 T39 = FNMS(KP634619297, T38, T31);
Chris@82 471 T57 = FMA(KP827271945, T4Y, T4Z);
Chris@82 472 T3L = FMA(KP634619297, T31, T38);
Chris@82 473 T50 = FNMS(KP827271945, T4Z, T4Y);
Chris@82 474 }
Chris@82 475 {
Chris@82 476 E T3w, T4O, T3D, T4P, T3s, T3z;
Chris@82 477 T3s = FNMS(KP559016994, T3r, T3q);
Chris@82 478 T3w = FNMS(KP951056516, T3v, T3s);
Chris@82 479 T4O = FMA(KP951056516, T3v, T3s);
Chris@82 480 T3z = FNMS(KP559016994, T3y, T3x);
Chris@82 481 T3D = FNMS(KP951056516, T3C, T3z);
Chris@82 482 T4P = FMA(KP951056516, T3C, T3z);
Chris@82 483 T3E = FMA(KP939062505, T3D, T3w);
Chris@82 484 T59 = FMA(KP126329378, T4O, T4P);
Chris@82 485 T3O = FNMS(KP939062505, T3w, T3D);
Chris@82 486 T4Q = FNMS(KP126329378, T4P, T4O);
Chris@82 487 }
Chris@82 488 {
Chris@82 489 E T5Z, T7r, T62, T7s, T5X, T60;
Chris@82 490 T5X = FMA(KP559016994, T3y, T3x);
Chris@82 491 T5Z = FMA(KP951056516, T5Y, T5X);
Chris@82 492 T7r = FNMS(KP951056516, T5Y, T5X);
Chris@82 493 T60 = FMA(KP559016994, T3r, T3q);
Chris@82 494 T62 = FMA(KP951056516, T61, T60);
Chris@82 495 T7s = FNMS(KP951056516, T61, T60);
Chris@82 496 T63 = FMA(KP062914667, T62, T5Z);
Chris@82 497 T7H = FMA(KP827271945, T7r, T7s);
Chris@82 498 T6y = FNMS(KP062914667, T5Z, T62);
Chris@82 499 T7t = FNMS(KP827271945, T7s, T7r);
Chris@82 500 }
Chris@82 501 {
Chris@82 502 E T3h, T4S, T3o, T4R, T3d, T3k;
Chris@82 503 T3d = FNMS(KP559016994, T3c, T3b);
Chris@82 504 T3h = FNMS(KP951056516, T3g, T3d);
Chris@82 505 T4S = FMA(KP951056516, T3g, T3d);
Chris@82 506 T3k = FNMS(KP559016994, T3j, T3i);
Chris@82 507 T3o = FNMS(KP951056516, T3n, T3k);
Chris@82 508 T4R = FMA(KP951056516, T3n, T3k);
Chris@82 509 T3p = FMA(KP549754652, T3o, T3h);
Chris@82 510 T5a = FMA(KP470564281, T4R, T4S);
Chris@82 511 T3N = FNMS(KP549754652, T3h, T3o);
Chris@82 512 T4T = FNMS(KP470564281, T4S, T4R);
Chris@82 513 }
Chris@82 514 {
Chris@82 515 E T66, T7o, T69, T7p, T64, T67;
Chris@82 516 T64 = FMA(KP559016994, T3j, T3i);
Chris@82 517 T66 = FNMS(KP951056516, T65, T64);
Chris@82 518 T7o = FMA(KP951056516, T65, T64);
Chris@82 519 T67 = FMA(KP559016994, T3c, T3b);
Chris@82 520 T69 = FMA(KP951056516, T68, T67);
Chris@82 521 T7p = FNMS(KP951056516, T68, T67);
Chris@82 522 T6a = FMA(KP634619297, T69, T66);
Chris@82 523 T7G = FNMS(KP062914667, T7o, T7p);
Chris@82 524 T6z = FNMS(KP634619297, T66, T69);
Chris@82 525 T7q = FMA(KP062914667, T7p, T7o);
Chris@82 526 }
Chris@82 527 }
Chris@82 528 cr[0] = T9 + TM;
Chris@82 529 {
Chris@82 530 E T1U, T1X, T2l, T20, T2m, T1F, T2r, T27, T2h;
Chris@82 531 {
Chris@82 532 E T1W, T1V, T1Y, T1Z;
Chris@82 533 T1W = T1S - T1T;
Chris@82 534 T1U = T1S + T1T;
Chris@82 535 T1V = FNMS(KP250000000, T1U, T1R);
Chris@82 536 T1X = FMA(KP559016994, T1W, T1V);
Chris@82 537 T2l = FNMS(KP559016994, T1W, T1V);
Chris@82 538 T1Y = Ti - Tr;
Chris@82 539 T1Z = TB - TK;
Chris@82 540 T20 = FMA(KP618033988, T1Z, T1Y);
Chris@82 541 T2m = FNMS(KP618033988, T1Y, T1Z);
Chris@82 542 {
Chris@82 543 E T1E, T2g, T11, T2f, TZ, T10;
Chris@82 544 T1E = FMA(KP618033988, T1D, T1k);
Chris@82 545 T2g = FNMS(KP618033988, T1k, T1D);
Chris@82 546 TZ = FNMS(KP250000000, TM, T9);
Chris@82 547 T10 = Ts - TL;
Chris@82 548 T11 = FMA(KP559016994, T10, TZ);
Chris@82 549 T2f = FNMS(KP559016994, T10, TZ);
Chris@82 550 T1F = FNMS(KP951056516, T1E, T11);
Chris@82 551 T2r = FNMS(KP951056516, T2g, T2f);
Chris@82 552 T27 = FMA(KP951056516, T1E, T11);
Chris@82 553 T2h = FMA(KP951056516, T2g, T2f);
Chris@82 554 }
Chris@82 555 }
Chris@82 556 {
Chris@82 557 E T2s, T2w, T2v, T1G, T22, T21;
Chris@82 558 ci[0] = T1R + T1U;
Chris@82 559 T2s = T2q * T2r;
Chris@82 560 T2w = T2u * T2r;
Chris@82 561 T2v = FMA(KP951056516, T2m, T2l);
Chris@82 562 cr[WS(rs, 15)] = FNMS(T2u, T2v, T2s);
Chris@82 563 ci[WS(rs, 15)] = FMA(T2q, T2v, T2w);
Chris@82 564 T1G = TY * T1F;
Chris@82 565 T22 = T1I * T1F;
Chris@82 566 T21 = FMA(KP951056516, T20, T1X);
Chris@82 567 cr[WS(rs, 5)] = FNMS(T1I, T21, T1G);
Chris@82 568 ci[WS(rs, 5)] = FMA(TY, T21, T22);
Chris@82 569 {
Chris@82 570 E T28, T2c, T2b, T2i, T2o, T2n;
Chris@82 571 T28 = T26 * T27;
Chris@82 572 T2c = T2a * T27;
Chris@82 573 T2b = FNMS(KP951056516, T20, T1X);
Chris@82 574 cr[WS(rs, 20)] = FNMS(T2a, T2b, T28);
Chris@82 575 ci[WS(rs, 20)] = FMA(T26, T2b, T2c);
Chris@82 576 T2i = T2e * T2h;
Chris@82 577 T2o = T2k * T2h;
Chris@82 578 T2n = FNMS(KP951056516, T2m, T2l);
Chris@82 579 cr[WS(rs, 10)] = FNMS(T2k, T2n, T2i);
Chris@82 580 ci[WS(rs, 10)] = FMA(T2e, T2n, T2o);
Chris@82 581 }
Chris@82 582 }
Chris@82 583 }
Chris@82 584 {
Chris@82 585 E T6B, T73, T6Q, T78, T7j, T6u, T72, T7l, T6N, T77, T7k, T7m;
Chris@82 586 {
Chris@82 587 E T6x, T6A, T6O, T6P;
Chris@82 588 T6x = FMA(KP968479752, T6w, T6v);
Chris@82 589 T6A = FNMS(KP845997307, T6z, T6y);
Chris@82 590 T6B = FNMS(KP681693190, T6A, T6x);
Chris@82 591 T73 = FMA(KP560319534, T6x, T6A);
Chris@82 592 T6O = FNMS(KP968479752, T6p, T6i);
Chris@82 593 T6P = FNMS(KP845997307, T6a, T63);
Chris@82 594 T6Q = FMA(KP681693190, T6P, T6O);
Chris@82 595 T78 = FNMS(KP560319534, T6O, T6P);
Chris@82 596 }
Chris@82 597 {
Chris@82 598 E T6r, T6t, T6b, T6q, T6s;
Chris@82 599 T6b = FMA(KP845997307, T6a, T63);
Chris@82 600 T6q = FMA(KP968479752, T6p, T6i);
Chris@82 601 T6r = FMA(KP906616052, T6q, T6b);
Chris@82 602 T6t = FNMS(KP906616052, T6q, T6b);
Chris@82 603 T7j = FMA(KP998026728, T6r, T5W);
Chris@82 604 T6s = FNMS(KP249506682, T6r, T5W);
Chris@82 605 T6u = FNMS(KP557913902, T6t, T6s);
Chris@82 606 T72 = FMA(KP557913902, T6t, T6s);
Chris@82 607 }
Chris@82 608 {
Chris@82 609 E T6K, T6M, T6I, T6J, T6L;
Chris@82 610 T6I = FMA(KP845997307, T6z, T6y);
Chris@82 611 T6J = FNMS(KP968479752, T6w, T6v);
Chris@82 612 T6K = FNMS(KP906616052, T6J, T6I);
Chris@82 613 T6M = FMA(KP906616052, T6J, T6I);
Chris@82 614 T7l = FMA(KP998026728, T6K, T6H);
Chris@82 615 T6L = FNMS(KP249506682, T6K, T6H);
Chris@82 616 T6N = FNMS(KP557913902, T6M, T6L);
Chris@82 617 T77 = FMA(KP557913902, T6M, T6L);
Chris@82 618 }
Chris@82 619 T7k = T4l * T7j;
Chris@82 620 cr[WS(rs, 2)] = FNMS(T4n, T7l, T7k);
Chris@82 621 T7m = T4l * T7l;
Chris@82 622 ci[WS(rs, 2)] = FMA(T4n, T7j, T7m);
Chris@82 623 {
Chris@82 624 E T6C, T6D, T6R, T6S;
Chris@82 625 T6C = FNMS(KP860541664, T6B, T6u);
Chris@82 626 T6D = T5T * T6C;
Chris@82 627 T6R = FNMS(KP860541664, T6Q, T6N);
Chris@82 628 T6S = T5T * T6R;
Chris@82 629 cr[WS(rs, 7)] = FNMS(T6E, T6R, T6D);
Chris@82 630 ci[WS(rs, 7)] = FMA(T6E, T6C, T6S);
Chris@82 631 }
Chris@82 632 {
Chris@82 633 E T7d, T7e, T7h, T7i;
Chris@82 634 T7d = FMA(KP949179823, T73, T72);
Chris@82 635 T7e = T7c * T7d;
Chris@82 636 T7h = FNMS(KP949179823, T78, T77);
Chris@82 637 T7i = T7c * T7h;
Chris@82 638 cr[WS(rs, 17)] = FNMS(T7g, T7h, T7e);
Chris@82 639 ci[WS(rs, 17)] = FMA(T7g, T7d, T7i);
Chris@82 640 }
Chris@82 641 {
Chris@82 642 E T74, T75, T79, T7a;
Chris@82 643 T74 = FNMS(KP949179823, T73, T72);
Chris@82 644 T75 = T71 * T74;
Chris@82 645 T79 = FMA(KP949179823, T78, T77);
Chris@82 646 T7a = T71 * T79;
Chris@82 647 cr[WS(rs, 12)] = FNMS(T76, T79, T75);
Chris@82 648 ci[WS(rs, 12)] = FMA(T76, T74, T7a);
Chris@82 649 }
Chris@82 650 {
Chris@82 651 E T6V, T6W, T6Z, T70;
Chris@82 652 T6V = FMA(KP860541664, T6B, T6u);
Chris@82 653 T6W = T6U * T6V;
Chris@82 654 T6Z = FMA(KP860541664, T6Q, T6N);
Chris@82 655 T70 = T6U * T6Z;
Chris@82 656 cr[WS(rs, 22)] = FNMS(T6Y, T6Z, T6W);
Chris@82 657 ci[WS(rs, 22)] = FMA(T6Y, T6V, T70);
Chris@82 658 }
Chris@82 659 }
Chris@82 660 {
Chris@82 661 E T7U, T8f, T82, T8k, T7F, T7M, T7X, T7Y, T7D, T7R, T8e, T7I, T7L, T7E, T7O;
Chris@82 662 E T7N;
Chris@82 663 {
Chris@82 664 E T7S, T7T, T80, T81;
Chris@82 665 T7S = FNMS(KP734762448, T7K, T7J);
Chris@82 666 T7T = FNMS(KP772036680, T7H, T7G);
Chris@82 667 T7U = FNMS(KP621716863, T7T, T7S);
Chris@82 668 T8f = FMA(KP614372930, T7S, T7T);
Chris@82 669 T80 = FNMS(KP734762448, T7A, T7x);
Chris@82 670 T81 = FNMS(KP772036680, T7t, T7q);
Chris@82 671 T82 = FNMS(KP621716863, T81, T80);
Chris@82 672 T8k = FMA(KP614372930, T80, T81);
Chris@82 673 }
Chris@82 674 T7F = FMA(KP951056516, T6G, T6F);
Chris@82 675 T7I = FMA(KP772036680, T7H, T7G);
Chris@82 676 T7L = FMA(KP734762448, T7K, T7J);
Chris@82 677 T7M = FMA(KP994076283, T7L, T7I);
Chris@82 678 T7X = FNMS(KP249506682, T7M, T7F);
Chris@82 679 T7Y = FNMS(KP994076283, T7L, T7I);
Chris@82 680 {
Chris@82 681 E T7C, T7Q, T7u, T7B, T7P;
Chris@82 682 T7u = FMA(KP772036680, T7t, T7q);
Chris@82 683 T7B = FMA(KP734762448, T7A, T7x);
Chris@82 684 T7C = FMA(KP994076283, T7B, T7u);
Chris@82 685 T7Q = FNMS(KP994076283, T7B, T7u);
Chris@82 686 T7D = FMA(KP998026728, T7C, T7n);
Chris@82 687 T7P = FNMS(KP249506682, T7C, T7n);
Chris@82 688 T7R = FNMS(KP557913902, T7Q, T7P);
Chris@82 689 T8e = FMA(KP557913902, T7Q, T7P);
Chris@82 690 }
Chris@82 691 T7E = TO * T7D;
Chris@82 692 T7O = TR * T7D;
Chris@82 693 T7N = FMA(KP998026728, T7M, T7F);
Chris@82 694 cr[WS(rs, 3)] = FNMS(TR, T7N, T7E);
Chris@82 695 ci[WS(rs, 3)] = FMA(TO, T7N, T7O);
Chris@82 696 {
Chris@82 697 E T8l, T8t, T8q, T8u, T8h, T8m, T8j, T8p, T8g;
Chris@82 698 T8j = FMA(KP557913902, T7Y, T7X);
Chris@82 699 T8l = FNMS(KP949179823, T8k, T8j);
Chris@82 700 T8t = FMA(KP949179823, T8k, T8j);
Chris@82 701 T8p = FNMS(KP949179823, T8f, T8e);
Chris@82 702 T8q = T8o * T8p;
Chris@82 703 T8u = T8s * T8p;
Chris@82 704 T8g = FMA(KP949179823, T8f, T8e);
Chris@82 705 T8h = T8d * T8g;
Chris@82 706 T8m = T8i * T8g;
Chris@82 707 cr[WS(rs, 13)] = FNMS(T8i, T8l, T8h);
Chris@82 708 ci[WS(rs, 13)] = FMA(T8d, T8l, T8m);
Chris@82 709 cr[WS(rs, 18)] = FNMS(T8s, T8t, T8q);
Chris@82 710 ci[WS(rs, 18)] = FMA(T8o, T8t, T8u);
Chris@82 711 }
Chris@82 712 {
Chris@82 713 E T83, T8b, T88, T8c, T7W, T84, T7Z, T87, T7V;
Chris@82 714 T7Z = FNMS(KP557913902, T7Y, T7X);
Chris@82 715 T83 = FNMS(KP943557151, T82, T7Z);
Chris@82 716 T8b = FMA(KP943557151, T82, T7Z);
Chris@82 717 T87 = FNMS(KP943557151, T7U, T7R);
Chris@82 718 T88 = T86 * T87;
Chris@82 719 T8c = T8a * T87;
Chris@82 720 T7V = FMA(KP943557151, T7U, T7R);
Chris@82 721 T7W = T4z * T7V;
Chris@82 722 T84 = T4B * T7V;
Chris@82 723 cr[WS(rs, 8)] = FNMS(T4B, T83, T7W);
Chris@82 724 ci[WS(rs, 8)] = FMA(T4z, T83, T84);
Chris@82 725 cr[WS(rs, 23)] = FNMS(T8a, T8b, T88);
Chris@82 726 ci[WS(rs, 23)] = FMA(T86, T8b, T8c);
Chris@82 727 }
Chris@82 728 }
Chris@82 729 {
Chris@82 730 E T5c, T5y, T5o, T5E, T5f, T5i, T5j, T5k, T5P, T55, T5x, T5g, T5h, T5Q, T5S;
Chris@82 731 E T5R;
Chris@82 732 {
Chris@82 733 E T58, T5b, T5m, T5n;
Chris@82 734 T58 = FNMS(KP912575812, T57, T56);
Chris@82 735 T5b = FNMS(KP912018591, T5a, T59);
Chris@82 736 T5c = FNMS(KP726211448, T5b, T58);
Chris@82 737 T5y = FMA(KP525970792, T58, T5b);
Chris@82 738 T5m = FNMS(KP912575812, T50, T4X);
Chris@82 739 T5n = FMA(KP912018591, T4T, T4Q);
Chris@82 740 T5o = FNMS(KP726211448, T5n, T5m);
Chris@82 741 T5E = FMA(KP525970792, T5m, T5n);
Chris@82 742 }
Chris@82 743 T5f = FNMS(KP951056516, T40, T3X);
Chris@82 744 T5g = FMA(KP912018591, T5a, T59);
Chris@82 745 T5h = FMA(KP912575812, T57, T56);
Chris@82 746 T5i = FMA(KP851038619, T5h, T5g);
Chris@82 747 T5j = FNMS(KP248028675, T5i, T5f);
Chris@82 748 T5k = FNMS(KP851038619, T5h, T5g);
Chris@82 749 {
Chris@82 750 E T52, T54, T4U, T51, T53;
Chris@82 751 T4U = FNMS(KP912018591, T4T, T4Q);
Chris@82 752 T51 = FMA(KP912575812, T50, T4X);
Chris@82 753 T52 = FMA(KP851038619, T51, T4U);
Chris@82 754 T54 = FNMS(KP851038619, T51, T4U);
Chris@82 755 T5P = FNMS(KP992114701, T52, T4N);
Chris@82 756 T53 = FMA(KP248028675, T52, T4N);
Chris@82 757 T55 = FMA(KP554608978, T54, T53);
Chris@82 758 T5x = FNMS(KP554608978, T54, T53);
Chris@82 759 }
Chris@82 760 T5Q = TS * T5P;
Chris@82 761 T5S = TW * T5P;
Chris@82 762 T5R = FMA(KP992114701, T5i, T5f);
Chris@82 763 cr[WS(rs, 4)] = FNMS(TW, T5R, T5Q);
Chris@82 764 ci[WS(rs, 4)] = FMA(TS, T5R, T5S);
Chris@82 765 {
Chris@82 766 E T5F, T5N, T5K, T5O, T5A, T5G, T5D, T5J, T5z;
Chris@82 767 T5D = FMA(KP554608978, T5k, T5j);
Chris@82 768 T5F = FNMS(KP943557151, T5E, T5D);
Chris@82 769 T5N = FMA(KP943557151, T5E, T5D);
Chris@82 770 T5J = FMA(KP943557151, T5y, T5x);
Chris@82 771 T5K = T5I * T5J;
Chris@82 772 T5O = T5M * T5J;
Chris@82 773 T5z = FNMS(KP943557151, T5y, T5x);
Chris@82 774 T5A = T5w * T5z;
Chris@82 775 T5G = T5C * T5z;
Chris@82 776 cr[WS(rs, 14)] = FNMS(T5C, T5F, T5A);
Chris@82 777 ci[WS(rs, 14)] = FMA(T5w, T5F, T5G);
Chris@82 778 cr[WS(rs, 19)] = FNMS(T5M, T5N, T5K);
Chris@82 779 ci[WS(rs, 19)] = FMA(T5I, T5N, T5O);
Chris@82 780 }
Chris@82 781 {
Chris@82 782 E T5p, T5t, T5s, T5u, T5e, T5q, T5l, T5r, T5d;
Chris@82 783 T5l = FNMS(KP554608978, T5k, T5j);
Chris@82 784 T5p = FNMS(KP803003575, T5o, T5l);
Chris@82 785 T5t = FMA(KP803003575, T5o, T5l);
Chris@82 786 T5r = FMA(KP803003575, T5c, T55);
Chris@82 787 T5s = T23 * T5r;
Chris@82 788 T5u = T25 * T5r;
Chris@82 789 T5d = FNMS(KP803003575, T5c, T55);
Chris@82 790 T5e = TT * T5d;
Chris@82 791 T5q = TX * T5d;
Chris@82 792 cr[WS(rs, 9)] = FNMS(TX, T5p, T5e);
Chris@82 793 ci[WS(rs, 9)] = FMA(TT, T5p, T5q);
Chris@82 794 cr[WS(rs, 24)] = FNMS(T25, T5t, T5s);
Chris@82 795 ci[WS(rs, 24)] = FMA(T23, T5t, T5u);
Chris@82 796 }
Chris@82 797 }
Chris@82 798 {
Chris@82 799 E T3Q, T4q, T4a, T4w, T41, T44, T45, T46, T4J, T3J, T4p, T42, T43, T4K, T4M;
Chris@82 800 E T4L;
Chris@82 801 {
Chris@82 802 E T3M, T3P, T48, T49;
Chris@82 803 T3M = FMA(KP871714437, T3L, T3K);
Chris@82 804 T3P = FNMS(KP831864738, T3O, T3N);
Chris@82 805 T3Q = FNMS(KP559154169, T3P, T3M);
Chris@82 806 T4q = FMA(KP683113946, T3M, T3P);
Chris@82 807 T48 = FNMS(KP871714437, T39, T2U);
Chris@82 808 T49 = FNMS(KP831864738, T3E, T3p);
Chris@82 809 T4a = FMA(KP559154169, T49, T48);
Chris@82 810 T4w = FNMS(KP683113946, T48, T49);
Chris@82 811 }
Chris@82 812 T41 = FMA(KP951056516, T40, T3X);
Chris@82 813 T42 = FNMS(KP871714437, T3L, T3K);
Chris@82 814 T43 = FMA(KP831864738, T3O, T3N);
Chris@82 815 T44 = FNMS(KP904730450, T43, T42);
Chris@82 816 T45 = FNMS(KP242145790, T44, T41);
Chris@82 817 T46 = FMA(KP904730450, T43, T42);
Chris@82 818 {
Chris@82 819 E T3G, T3I, T3a, T3F, T3H;
Chris@82 820 T3a = FMA(KP871714437, T39, T2U);
Chris@82 821 T3F = FMA(KP831864738, T3E, T3p);
Chris@82 822 T3G = FMA(KP904730450, T3F, T3a);
Chris@82 823 T3I = FNMS(KP904730450, T3F, T3a);
Chris@82 824 T4J = FMA(KP968583161, T3G, T2F);
Chris@82 825 T3H = FNMS(KP242145790, T3G, T2F);
Chris@82 826 T3J = FMA(KP541454447, T3I, T3H);
Chris@82 827 T4p = FNMS(KP541454447, T3I, T3H);
Chris@82 828 }
Chris@82 829 T4K = TN * T4J;
Chris@82 830 T4M = TQ * T4J;
Chris@82 831 T4L = FMA(KP968583161, T44, T41);
Chris@82 832 cr[WS(rs, 1)] = FNMS(TQ, T4L, T4K);
Chris@82 833 ci[WS(rs, 1)] = FMA(TN, T4L, T4M);
Chris@82 834 {
Chris@82 835 E T4x, T4H, T4E, T4I, T4s, T4y, T4v, T4D, T4r;
Chris@82 836 T4v = FNMS(KP541454447, T46, T45);
Chris@82 837 T4x = FNMS(KP833417178, T4w, T4v);
Chris@82 838 T4H = FMA(KP833417178, T4w, T4v);
Chris@82 839 T4D = FMA(KP833417178, T4q, T4p);
Chris@82 840 T4E = T4C * T4D;
Chris@82 841 T4I = T4G * T4D;
Chris@82 842 T4r = FNMS(KP833417178, T4q, T4p);
Chris@82 843 T4s = T4o * T4r;
Chris@82 844 T4y = T4u * T4r;
Chris@82 845 cr[WS(rs, 11)] = FNMS(T4u, T4x, T4s);
Chris@82 846 ci[WS(rs, 11)] = FMA(T4o, T4x, T4y);
Chris@82 847 cr[WS(rs, 16)] = FNMS(T4G, T4H, T4E);
Chris@82 848 ci[WS(rs, 16)] = FMA(T4C, T4H, T4I);
Chris@82 849 }
Chris@82 850 {
Chris@82 851 E T4b, T4j, T4g, T4k, T3S, T4c, T47, T4f, T3R;
Chris@82 852 T47 = FMA(KP541454447, T46, T45);
Chris@82 853 T4b = FMA(KP921177326, T4a, T47);
Chris@82 854 T4j = FNMS(KP921177326, T4a, T47);
Chris@82 855 T4f = FMA(KP921177326, T3Q, T3J);
Chris@82 856 T4g = T4e * T4f;
Chris@82 857 T4k = T4i * T4f;
Chris@82 858 T3R = FNMS(KP921177326, T3Q, T3J);
Chris@82 859 T3S = T2y * T3R;
Chris@82 860 T4c = T3U * T3R;
Chris@82 861 cr[WS(rs, 6)] = FNMS(T3U, T4b, T3S);
Chris@82 862 ci[WS(rs, 6)] = FMA(T2y, T4b, T4c);
Chris@82 863 cr[WS(rs, 21)] = FNMS(T4i, T4j, T4g);
Chris@82 864 ci[WS(rs, 21)] = FMA(T4e, T4j, T4k);
Chris@82 865 }
Chris@82 866 }
Chris@82 867 }
Chris@82 868 }
Chris@82 869 }
Chris@82 870 }
Chris@82 871
Chris@82 872 static const tw_instr twinstr[] = {
Chris@82 873 {TW_CEXP, 1, 1},
Chris@82 874 {TW_CEXP, 1, 3},
Chris@82 875 {TW_CEXP, 1, 9},
Chris@82 876 {TW_CEXP, 1, 24},
Chris@82 877 {TW_NEXT, 1, 0}
Chris@82 878 };
Chris@82 879
Chris@82 880 static const hc2hc_desc desc = { 25, "hb2_25", twinstr, &GENUS, {84, 78, 356, 0} };
Chris@82 881
Chris@82 882 void X(codelet_hb2_25) (planner *p) {
Chris@82 883 X(khc2hc_register) (p, hb2_25, &desc);
Chris@82 884 }
Chris@82 885 #else
Chris@82 886
Chris@82 887 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 25 -dif -name hb2_25 -include rdft/scalar/hb.h */
Chris@82 888
Chris@82 889 /*
Chris@82 890 * This function contains 440 FP additions, 340 FP multiplications,
Chris@82 891 * (or, 280 additions, 180 multiplications, 160 fused multiply/add),
Chris@82 892 * 155 stack variables, 20 constants, and 100 memory accesses
Chris@82 893 */
Chris@82 894 #include "rdft/scalar/hb.h"
Chris@82 895
Chris@82 896 static void hb2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 897 {
Chris@82 898 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 899 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 900 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 901 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 902 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 903 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 904 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 905 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 906 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 907 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 908 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 909 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 910 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 911 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 912 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 913 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 914 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 915 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 916 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 917 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 918 {
Chris@82 919 INT m;
Chris@82 920 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 921 E TN, TQ, TO, TR, TT, TY, T2t, T2r, TZ, TU, T4f, T4l, T2d, T4v, T5m;
Chris@82 922 E T2j, T5l, T4X, T2v, T11, T3R, T1L, T5d, T6x, T5h, T6t, T25, T26, T27, T29;
Chris@82 923 E T6D, T7v, T49, T7l, T7p, T7t, T2p, T2n, T4b, T4p, T5n, T6B, T5b, T5p, T6p;
Chris@82 924 E T6r, T59, T4r;
Chris@82 925 {
Chris@82 926 E T2c, T4j, T2h, T4e, T2b, T4k, T2i, T4d;
Chris@82 927 {
Chris@82 928 E TP, TX, TS, TW;
Chris@82 929 TN = W[0];
Chris@82 930 TQ = W[1];
Chris@82 931 TO = W[2];
Chris@82 932 TR = W[3];
Chris@82 933 TP = TN * TO;
Chris@82 934 TX = TQ * TO;
Chris@82 935 TS = TQ * TR;
Chris@82 936 TW = TN * TR;
Chris@82 937 TT = TP - TS;
Chris@82 938 TY = TW + TX;
Chris@82 939 T2t = TW - TX;
Chris@82 940 T2r = TP + TS;
Chris@82 941 TZ = W[5];
Chris@82 942 T2c = TQ * TZ;
Chris@82 943 T4j = TO * TZ;
Chris@82 944 T2h = TN * TZ;
Chris@82 945 T4e = TR * TZ;
Chris@82 946 TU = W[4];
Chris@82 947 T2b = TN * TU;
Chris@82 948 T4k = TR * TU;
Chris@82 949 T2i = TQ * TU;
Chris@82 950 T4d = TO * TU;
Chris@82 951 }
Chris@82 952 T4f = T4d - T4e;
Chris@82 953 T4l = T4j + T4k;
Chris@82 954 {
Chris@82 955 E T2s, T2u, TV, T10, T3P, T3Q, T1J, T1K;
Chris@82 956 T2d = T2b - T2c;
Chris@82 957 T4v = T2b + T2c;
Chris@82 958 T5m = T4j - T4k;
Chris@82 959 T2j = T2h + T2i;
Chris@82 960 T5l = T4d + T4e;
Chris@82 961 T4X = T2h - T2i;
Chris@82 962 T2s = T2r * TU;
Chris@82 963 T2u = T2t * TZ;
Chris@82 964 T2v = T2s + T2u;
Chris@82 965 TV = TT * TU;
Chris@82 966 T10 = TY * TZ;
Chris@82 967 T11 = TV + T10;
Chris@82 968 T3P = T2r * TZ;
Chris@82 969 T3Q = T2t * TU;
Chris@82 970 T3R = T3P - T3Q;
Chris@82 971 T1J = TT * TZ;
Chris@82 972 T1K = TY * TU;
Chris@82 973 T1L = T1J - T1K;
Chris@82 974 T5d = TV - T10;
Chris@82 975 T6x = T3P + T3Q;
Chris@82 976 T5h = T1J + T1K;
Chris@82 977 T6t = T2s - T2u;
Chris@82 978 T25 = W[6];
Chris@82 979 T26 = W[7];
Chris@82 980 T27 = FMA(TT, T25, TY * T26);
Chris@82 981 T29 = FNMS(TY, T25, TT * T26);
Chris@82 982 T6D = FNMS(T4X, T25, T4v * T26);
Chris@82 983 T7v = FNMS(T1L, T25, T11 * T26);
Chris@82 984 T49 = FMA(T2r, T25, T2t * T26);
Chris@82 985 T7l = FMA(T2d, T25, T2j * T26);
Chris@82 986 T7p = FNMS(T2j, T25, T2d * T26);
Chris@82 987 T7t = FMA(T11, T25, T1L * T26);
Chris@82 988 T2p = FNMS(TZ, T25, TU * T26);
Chris@82 989 T2n = FMA(TU, T25, TZ * T26);
Chris@82 990 T4b = FNMS(T2t, T25, T2r * T26);
Chris@82 991 T4p = FMA(T2v, T25, T3R * T26);
Chris@82 992 T5n = FMA(T5l, T25, T5m * T26);
Chris@82 993 T6B = FMA(T4v, T25, T4X * T26);
Chris@82 994 T5b = FNMS(TQ, T25, TN * T26);
Chris@82 995 T5p = FNMS(T5m, T25, T5l * T26);
Chris@82 996 T6p = FMA(TO, T25, TR * T26);
Chris@82 997 T6r = FNMS(TR, T25, TO * T26);
Chris@82 998 T59 = FMA(TN, T25, TQ * T26);
Chris@82 999 T4r = FNMS(T3R, T25, T2v * T26);
Chris@82 1000 }
Chris@82 1001 }
Chris@82 1002 {
Chris@82 1003 E T9, T6i, T40, T3z, T5Y, Ti, Tr, Ts, T1d, T1m, T1P, T2K, T4P, T3H, T4y;
Chris@82 1004 E T5G, T71, T65, T6N, T5z, T70, T64, T6K, T2Z, T4Q, T3I, T4B, T20, T5Z, T3C;
Chris@82 1005 E T43, T6j, TB, TK, TL, T1w, T1F, T1Q, T3f, T4S, T3K, T4F, T5V, T74, T68;
Chris@82 1006 E T6U, T5O, T73, T67, T6R, T3u, T4T, T3L, T4I;
Chris@82 1007 {
Chris@82 1008 E T1, T4, T7, T8, T3Z, T3Y, T3x, T3y;
Chris@82 1009 T1 = cr[0];
Chris@82 1010 {
Chris@82 1011 E T2, T3, T5, T6;
Chris@82 1012 T2 = cr[WS(rs, 5)];
Chris@82 1013 T3 = ci[WS(rs, 4)];
Chris@82 1014 T4 = T2 + T3;
Chris@82 1015 T5 = cr[WS(rs, 10)];
Chris@82 1016 T6 = ci[WS(rs, 9)];
Chris@82 1017 T7 = T5 + T6;
Chris@82 1018 T8 = T4 + T7;
Chris@82 1019 T3Z = T5 - T6;
Chris@82 1020 T3Y = T2 - T3;
Chris@82 1021 }
Chris@82 1022 T9 = T1 + T8;
Chris@82 1023 T6i = FMA(KP951056516, T3Y, KP587785252 * T3Z);
Chris@82 1024 T40 = FNMS(KP951056516, T3Z, KP587785252 * T3Y);
Chris@82 1025 T3x = FNMS(KP250000000, T8, T1);
Chris@82 1026 T3y = KP559016994 * (T4 - T7);
Chris@82 1027 T3z = T3x - T3y;
Chris@82 1028 T5Y = T3y + T3x;
Chris@82 1029 }
Chris@82 1030 {
Chris@82 1031 E Ta, T2x, T5w, T2F, Th, T2w, T1e, T2P, T5B, T2X, T1l, T2O, Tj, T2N, T5D;
Chris@82 1032 E T2T, Tq, T2S, T15, T2B, T5u, T2H, T1c, T2G;
Chris@82 1033 {
Chris@82 1034 E Tg, T2E, Td, T2D;
Chris@82 1035 Ta = cr[WS(rs, 1)];
Chris@82 1036 {
Chris@82 1037 E Te, Tf, Tb, Tc;
Chris@82 1038 Te = cr[WS(rs, 11)];
Chris@82 1039 Tf = ci[WS(rs, 8)];
Chris@82 1040 Tg = Te + Tf;
Chris@82 1041 T2E = Te - Tf;
Chris@82 1042 Tb = cr[WS(rs, 6)];
Chris@82 1043 Tc = ci[WS(rs, 3)];
Chris@82 1044 Td = Tb + Tc;
Chris@82 1045 T2D = Tb - Tc;
Chris@82 1046 }
Chris@82 1047 T2x = KP559016994 * (Td - Tg);
Chris@82 1048 T5w = FMA(KP951056516, T2D, KP587785252 * T2E);
Chris@82 1049 T2F = FNMS(KP951056516, T2E, KP587785252 * T2D);
Chris@82 1050 Th = Td + Tg;
Chris@82 1051 T2w = FNMS(KP250000000, Th, Ta);
Chris@82 1052 }
Chris@82 1053 {
Chris@82 1054 E T1k, T2W, T1h, T2V;
Chris@82 1055 T1e = ci[WS(rs, 20)];
Chris@82 1056 {
Chris@82 1057 E T1i, T1j, T1f, T1g;
Chris@82 1058 T1i = cr[WS(rs, 14)];
Chris@82 1059 T1j = cr[WS(rs, 19)];
Chris@82 1060 T1k = T1i + T1j;
Chris@82 1061 T2W = T1j - T1i;
Chris@82 1062 T1f = ci[WS(rs, 15)];
Chris@82 1063 T1g = cr[WS(rs, 24)];
Chris@82 1064 T1h = T1f - T1g;
Chris@82 1065 T2V = T1f + T1g;
Chris@82 1066 }
Chris@82 1067 T2P = KP559016994 * (T1h + T1k);
Chris@82 1068 T5B = FMA(KP951056516, T2V, KP587785252 * T2W);
Chris@82 1069 T2X = FNMS(KP951056516, T2W, KP587785252 * T2V);
Chris@82 1070 T1l = T1h - T1k;
Chris@82 1071 T2O = FNMS(KP250000000, T1l, T1e);
Chris@82 1072 }
Chris@82 1073 {
Chris@82 1074 E Tp, T2M, Tm, T2L;
Chris@82 1075 Tj = cr[WS(rs, 4)];
Chris@82 1076 {
Chris@82 1077 E Tn, To, Tk, Tl;
Chris@82 1078 Tn = ci[WS(rs, 10)];
Chris@82 1079 To = ci[WS(rs, 5)];
Chris@82 1080 Tp = Tn + To;
Chris@82 1081 T2M = Tn - To;
Chris@82 1082 Tk = cr[WS(rs, 9)];
Chris@82 1083 Tl = ci[0];
Chris@82 1084 Tm = Tk + Tl;
Chris@82 1085 T2L = Tk - Tl;
Chris@82 1086 }
Chris@82 1087 T2N = FNMS(KP951056516, T2M, KP587785252 * T2L);
Chris@82 1088 T5D = FMA(KP951056516, T2L, KP587785252 * T2M);
Chris@82 1089 T2T = KP559016994 * (Tm - Tp);
Chris@82 1090 Tq = Tm + Tp;
Chris@82 1091 T2S = FNMS(KP250000000, Tq, Tj);
Chris@82 1092 }
Chris@82 1093 {
Chris@82 1094 E T1b, T2A, T18, T2z;
Chris@82 1095 T15 = ci[WS(rs, 23)];
Chris@82 1096 {
Chris@82 1097 E T19, T1a, T16, T17;
Chris@82 1098 T19 = ci[WS(rs, 13)];
Chris@82 1099 T1a = cr[WS(rs, 16)];
Chris@82 1100 T1b = T19 - T1a;
Chris@82 1101 T2A = T19 + T1a;
Chris@82 1102 T16 = ci[WS(rs, 18)];
Chris@82 1103 T17 = cr[WS(rs, 21)];
Chris@82 1104 T18 = T16 - T17;
Chris@82 1105 T2z = T16 + T17;
Chris@82 1106 }
Chris@82 1107 T2B = FNMS(KP951056516, T2A, KP587785252 * T2z);
Chris@82 1108 T5u = FMA(KP951056516, T2z, KP587785252 * T2A);
Chris@82 1109 T2H = KP559016994 * (T18 - T1b);
Chris@82 1110 T1c = T18 + T1b;
Chris@82 1111 T2G = FNMS(KP250000000, T1c, T15);
Chris@82 1112 }
Chris@82 1113 Ti = Ta + Th;
Chris@82 1114 Tr = Tj + Tq;
Chris@82 1115 Ts = Ti + Tr;
Chris@82 1116 T1d = T15 + T1c;
Chris@82 1117 T1m = T1e + T1l;
Chris@82 1118 T1P = T1d + T1m;
Chris@82 1119 {
Chris@82 1120 E T2C, T4w, T2J, T4x, T2y, T2I;
Chris@82 1121 T2y = T2w - T2x;
Chris@82 1122 T2C = T2y - T2B;
Chris@82 1123 T4w = T2y + T2B;
Chris@82 1124 T2I = T2G - T2H;
Chris@82 1125 T2J = T2F + T2I;
Chris@82 1126 T4x = T2I - T2F;
Chris@82 1127 T2K = FNMS(KP481753674, T2J, KP876306680 * T2C);
Chris@82 1128 T4P = FMA(KP728968627, T4x, KP684547105 * T4w);
Chris@82 1129 T3H = FMA(KP876306680, T2J, KP481753674 * T2C);
Chris@82 1130 T4y = FNMS(KP684547105, T4x, KP728968627 * T4w);
Chris@82 1131 }
Chris@82 1132 {
Chris@82 1133 E T5C, T6M, T5F, T6L, T5A, T5E;
Chris@82 1134 T5A = T2T + T2S;
Chris@82 1135 T5C = T5A - T5B;
Chris@82 1136 T6M = T5A + T5B;
Chris@82 1137 T5E = T2O + T2P;
Chris@82 1138 T5F = T5D + T5E;
Chris@82 1139 T6L = T5E - T5D;
Chris@82 1140 T5G = FNMS(KP844327925, T5F, KP535826794 * T5C);
Chris@82 1141 T71 = FMA(KP637423989, T6L, KP770513242 * T6M);
Chris@82 1142 T65 = FMA(KP535826794, T5F, KP844327925 * T5C);
Chris@82 1143 T6N = FNMS(KP637423989, T6M, KP770513242 * T6L);
Chris@82 1144 }
Chris@82 1145 {
Chris@82 1146 E T5v, T6I, T5y, T6J, T5t, T5x;
Chris@82 1147 T5t = T2x + T2w;
Chris@82 1148 T5v = T5t - T5u;
Chris@82 1149 T6I = T5t + T5u;
Chris@82 1150 T5x = T2H + T2G;
Chris@82 1151 T5y = T5w + T5x;
Chris@82 1152 T6J = T5x - T5w;
Chris@82 1153 T5z = FNMS(KP248689887, T5y, KP968583161 * T5v);
Chris@82 1154 T70 = FMA(KP535826794, T6J, KP844327925 * T6I);
Chris@82 1155 T64 = FMA(KP968583161, T5y, KP248689887 * T5v);
Chris@82 1156 T6K = FNMS(KP844327925, T6J, KP535826794 * T6I);
Chris@82 1157 }
Chris@82 1158 {
Chris@82 1159 E T2R, T4z, T2Y, T4A, T2Q, T2U;
Chris@82 1160 T2Q = T2O - T2P;
Chris@82 1161 T2R = T2N + T2Q;
Chris@82 1162 T4z = T2Q - T2N;
Chris@82 1163 T2U = T2S - T2T;
Chris@82 1164 T2Y = T2U - T2X;
Chris@82 1165 T4A = T2U + T2X;
Chris@82 1166 T2Z = FMA(KP904827052, T2R, KP425779291 * T2Y);
Chris@82 1167 T4Q = FNMS(KP992114701, T4z, KP125333233 * T4A);
Chris@82 1168 T3I = FNMS(KP425779291, T2R, KP904827052 * T2Y);
Chris@82 1169 T4B = FMA(KP125333233, T4z, KP992114701 * T4A);
Chris@82 1170 }
Chris@82 1171 }
Chris@82 1172 {
Chris@82 1173 E T1S, T1V, T1Y, T1Z, T3B, T3A, T41, T42;
Chris@82 1174 T1S = ci[WS(rs, 24)];
Chris@82 1175 {
Chris@82 1176 E T1T, T1U, T1W, T1X;
Chris@82 1177 T1T = ci[WS(rs, 19)];
Chris@82 1178 T1U = cr[WS(rs, 20)];
Chris@82 1179 T1V = T1T - T1U;
Chris@82 1180 T1W = ci[WS(rs, 14)];
Chris@82 1181 T1X = cr[WS(rs, 15)];
Chris@82 1182 T1Y = T1W - T1X;
Chris@82 1183 T1Z = T1V + T1Y;
Chris@82 1184 T3B = T1W + T1X;
Chris@82 1185 T3A = T1T + T1U;
Chris@82 1186 }
Chris@82 1187 T20 = T1S + T1Z;
Chris@82 1188 T5Z = FMA(KP951056516, T3A, KP587785252 * T3B);
Chris@82 1189 T3C = FNMS(KP951056516, T3B, KP587785252 * T3A);
Chris@82 1190 T41 = FNMS(KP250000000, T1Z, T1S);
Chris@82 1191 T42 = KP559016994 * (T1V - T1Y);
Chris@82 1192 T43 = T41 - T42;
Chris@82 1193 T6j = T42 + T41;
Chris@82 1194 }
Chris@82 1195 {
Chris@82 1196 E Tt, T32, T5L, T3a, TA, T31, T1o, T36, T5J, T3c, T1v, T3b, TC, T3h, T5S;
Chris@82 1197 E T3p, TJ, T3g, T1x, T3l, T5Q, T3r, T1E, T3q;
Chris@82 1198 {
Chris@82 1199 E Tw, T38, Tz, T39;
Chris@82 1200 Tt = cr[WS(rs, 2)];
Chris@82 1201 {
Chris@82 1202 E Tu, Tv, Tx, Ty;
Chris@82 1203 Tu = cr[WS(rs, 7)];
Chris@82 1204 Tv = ci[WS(rs, 2)];
Chris@82 1205 Tw = Tu + Tv;
Chris@82 1206 T38 = Tu - Tv;
Chris@82 1207 Tx = cr[WS(rs, 12)];
Chris@82 1208 Ty = ci[WS(rs, 7)];
Chris@82 1209 Tz = Tx + Ty;
Chris@82 1210 T39 = Tx - Ty;
Chris@82 1211 }
Chris@82 1212 T32 = KP559016994 * (Tw - Tz);
Chris@82 1213 T5L = FMA(KP951056516, T38, KP587785252 * T39);
Chris@82 1214 T3a = FNMS(KP951056516, T39, KP587785252 * T38);
Chris@82 1215 TA = Tw + Tz;
Chris@82 1216 T31 = FNMS(KP250000000, TA, Tt);
Chris@82 1217 }
Chris@82 1218 {
Chris@82 1219 E T1r, T34, T1u, T35;
Chris@82 1220 T1o = ci[WS(rs, 22)];
Chris@82 1221 {
Chris@82 1222 E T1p, T1q, T1s, T1t;
Chris@82 1223 T1p = ci[WS(rs, 17)];
Chris@82 1224 T1q = cr[WS(rs, 22)];
Chris@82 1225 T1r = T1p - T1q;
Chris@82 1226 T34 = T1p + T1q;
Chris@82 1227 T1s = ci[WS(rs, 12)];
Chris@82 1228 T1t = cr[WS(rs, 17)];
Chris@82 1229 T1u = T1s - T1t;
Chris@82 1230 T35 = T1s + T1t;
Chris@82 1231 }
Chris@82 1232 T36 = FNMS(KP951056516, T35, KP587785252 * T34);
Chris@82 1233 T5J = FMA(KP951056516, T34, KP587785252 * T35);
Chris@82 1234 T3c = KP559016994 * (T1r - T1u);
Chris@82 1235 T1v = T1r + T1u;
Chris@82 1236 T3b = FNMS(KP250000000, T1v, T1o);
Chris@82 1237 }
Chris@82 1238 {
Chris@82 1239 E TI, T3o, TF, T3n;
Chris@82 1240 TC = cr[WS(rs, 3)];
Chris@82 1241 {
Chris@82 1242 E TG, TH, TD, TE;
Chris@82 1243 TG = ci[WS(rs, 11)];
Chris@82 1244 TH = ci[WS(rs, 6)];
Chris@82 1245 TI = TG + TH;
Chris@82 1246 T3o = TG - TH;
Chris@82 1247 TD = cr[WS(rs, 8)];
Chris@82 1248 TE = ci[WS(rs, 1)];
Chris@82 1249 TF = TD + TE;
Chris@82 1250 T3n = TD - TE;
Chris@82 1251 }
Chris@82 1252 T3h = KP559016994 * (TF - TI);
Chris@82 1253 T5S = FMA(KP951056516, T3n, KP587785252 * T3o);
Chris@82 1254 T3p = FNMS(KP951056516, T3o, KP587785252 * T3n);
Chris@82 1255 TJ = TF + TI;
Chris@82 1256 T3g = FNMS(KP250000000, TJ, TC);
Chris@82 1257 }
Chris@82 1258 {
Chris@82 1259 E T1D, T3k, T1A, T3j;
Chris@82 1260 T1x = ci[WS(rs, 21)];
Chris@82 1261 {
Chris@82 1262 E T1B, T1C, T1y, T1z;
Chris@82 1263 T1B = cr[WS(rs, 13)];
Chris@82 1264 T1C = cr[WS(rs, 18)];
Chris@82 1265 T1D = T1B + T1C;
Chris@82 1266 T3k = T1C - T1B;
Chris@82 1267 T1y = ci[WS(rs, 16)];
Chris@82 1268 T1z = cr[WS(rs, 23)];
Chris@82 1269 T1A = T1y - T1z;
Chris@82 1270 T3j = T1y + T1z;
Chris@82 1271 }
Chris@82 1272 T3l = FNMS(KP951056516, T3k, KP587785252 * T3j);
Chris@82 1273 T5Q = FMA(KP951056516, T3j, KP587785252 * T3k);
Chris@82 1274 T3r = KP559016994 * (T1A + T1D);
Chris@82 1275 T1E = T1A - T1D;
Chris@82 1276 T3q = FNMS(KP250000000, T1E, T1x);
Chris@82 1277 }
Chris@82 1278 TB = Tt + TA;
Chris@82 1279 TK = TC + TJ;
Chris@82 1280 TL = TB + TK;
Chris@82 1281 T1w = T1o + T1v;
Chris@82 1282 T1F = T1x + T1E;
Chris@82 1283 T1Q = T1w + T1F;
Chris@82 1284 {
Chris@82 1285 E T37, T4D, T3e, T4E, T33, T3d;
Chris@82 1286 T33 = T31 - T32;
Chris@82 1287 T37 = T33 - T36;
Chris@82 1288 T4D = T33 + T36;
Chris@82 1289 T3d = T3b - T3c;
Chris@82 1290 T3e = T3a + T3d;
Chris@82 1291 T4E = T3d - T3a;
Chris@82 1292 T3f = FNMS(KP844327925, T3e, KP535826794 * T37);
Chris@82 1293 T4S = FMA(KP062790519, T4E, KP998026728 * T4D);
Chris@82 1294 T3K = FMA(KP535826794, T3e, KP844327925 * T37);
Chris@82 1295 T4F = FNMS(KP998026728, T4E, KP062790519 * T4D);
Chris@82 1296 }
Chris@82 1297 {
Chris@82 1298 E T5R, T6T, T5U, T6S, T5P, T5T;
Chris@82 1299 T5P = T3h + T3g;
Chris@82 1300 T5R = T5P - T5Q;
Chris@82 1301 T6T = T5P + T5Q;
Chris@82 1302 T5T = T3q + T3r;
Chris@82 1303 T5U = T5S + T5T;
Chris@82 1304 T6S = T5T - T5S;
Chris@82 1305 T5V = FNMS(KP684547105, T5U, KP728968627 * T5R);
Chris@82 1306 T74 = FNMS(KP992114701, T6S, KP125333233 * T6T);
Chris@82 1307 T68 = FMA(KP728968627, T5U, KP684547105 * T5R);
Chris@82 1308 T6U = FMA(KP125333233, T6S, KP992114701 * T6T);
Chris@82 1309 }
Chris@82 1310 {
Chris@82 1311 E T5K, T6Q, T5N, T6P, T5I, T5M;
Chris@82 1312 T5I = T32 + T31;
Chris@82 1313 T5K = T5I - T5J;
Chris@82 1314 T6Q = T5I + T5J;
Chris@82 1315 T5M = T3c + T3b;
Chris@82 1316 T5N = T5L + T5M;
Chris@82 1317 T6P = T5M - T5L;
Chris@82 1318 T5O = FNMS(KP481753674, T5N, KP876306680 * T5K);
Chris@82 1319 T73 = FNMS(KP425779291, T6P, KP904827052 * T6Q);
Chris@82 1320 T67 = FMA(KP876306680, T5N, KP481753674 * T5K);
Chris@82 1321 T6R = FMA(KP904827052, T6P, KP425779291 * T6Q);
Chris@82 1322 }
Chris@82 1323 {
Chris@82 1324 E T3m, T4H, T3t, T4G, T3i, T3s;
Chris@82 1325 T3i = T3g - T3h;
Chris@82 1326 T3m = T3i - T3l;
Chris@82 1327 T4H = T3i + T3l;
Chris@82 1328 T3s = T3q - T3r;
Chris@82 1329 T3t = T3p + T3s;
Chris@82 1330 T4G = T3s - T3p;
Chris@82 1331 T3u = FNMS(KP998026728, T3t, KP062790519 * T3m);
Chris@82 1332 T4T = FNMS(KP637423989, T4G, KP770513242 * T4H);
Chris@82 1333 T3L = FMA(KP062790519, T3t, KP998026728 * T3m);
Chris@82 1334 T4I = FMA(KP770513242, T4G, KP637423989 * T4H);
Chris@82 1335 }
Chris@82 1336 }
Chris@82 1337 {
Chris@82 1338 E TM, T14, T2e, T21, T23, T2l, T1H, T2f, T1O, T2k;
Chris@82 1339 {
Chris@82 1340 E T12, T13, T1R, T22;
Chris@82 1341 T12 = KP559016994 * (Ts - TL);
Chris@82 1342 TM = Ts + TL;
Chris@82 1343 T13 = FNMS(KP250000000, TM, T9);
Chris@82 1344 T14 = T12 + T13;
Chris@82 1345 T2e = T13 - T12;
Chris@82 1346 T1R = KP559016994 * (T1P - T1Q);
Chris@82 1347 T21 = T1P + T1Q;
Chris@82 1348 T22 = FNMS(KP250000000, T21, T20);
Chris@82 1349 T23 = T1R + T22;
Chris@82 1350 T2l = T22 - T1R;
Chris@82 1351 }
Chris@82 1352 {
Chris@82 1353 E T1n, T1G, T1M, T1N;
Chris@82 1354 T1n = T1d - T1m;
Chris@82 1355 T1G = T1w - T1F;
Chris@82 1356 T1H = FMA(KP951056516, T1n, KP587785252 * T1G);
Chris@82 1357 T2f = FNMS(KP951056516, T1G, KP587785252 * T1n);
Chris@82 1358 T1M = Ti - Tr;
Chris@82 1359 T1N = TB - TK;
Chris@82 1360 T1O = FMA(KP951056516, T1M, KP587785252 * T1N);
Chris@82 1361 T2k = FNMS(KP951056516, T1N, KP587785252 * T1M);
Chris@82 1362 }
Chris@82 1363 {
Chris@82 1364 E T1I, T24, T2o, T2q;
Chris@82 1365 cr[0] = T9 + TM;
Chris@82 1366 ci[0] = T20 + T21;
Chris@82 1367 T1I = T14 - T1H;
Chris@82 1368 T24 = T1O + T23;
Chris@82 1369 cr[WS(rs, 5)] = FNMS(T1L, T24, T11 * T1I);
Chris@82 1370 ci[WS(rs, 5)] = FMA(T1L, T1I, T11 * T24);
Chris@82 1371 T2o = T2e + T2f;
Chris@82 1372 T2q = T2l - T2k;
Chris@82 1373 cr[WS(rs, 15)] = FNMS(T2p, T2q, T2n * T2o);
Chris@82 1374 ci[WS(rs, 15)] = FMA(T2p, T2o, T2n * T2q);
Chris@82 1375 {
Chris@82 1376 E T2g, T2m, T28, T2a;
Chris@82 1377 T2g = T2e - T2f;
Chris@82 1378 T2m = T2k + T2l;
Chris@82 1379 cr[WS(rs, 10)] = FNMS(T2j, T2m, T2d * T2g);
Chris@82 1380 ci[WS(rs, 10)] = FMA(T2j, T2g, T2d * T2m);
Chris@82 1381 T28 = T14 + T1H;
Chris@82 1382 T2a = T23 - T1O;
Chris@82 1383 cr[WS(rs, 20)] = FNMS(T29, T2a, T27 * T28);
Chris@82 1384 ci[WS(rs, 20)] = FMA(T29, T28, T27 * T2a);
Chris@82 1385 }
Chris@82 1386 }
Chris@82 1387 }
Chris@82 1388 {
Chris@82 1389 E T76, T7n, T7a, T7q, T6H, T6W, T6X, T6Y, T7e, T7f, T7d, T7g, T7x, T7y;
Chris@82 1390 {
Chris@82 1391 E T72, T75, T78, T79;
Chris@82 1392 T72 = T70 + T71;
Chris@82 1393 T75 = T73 - T74;
Chris@82 1394 T76 = FMA(KP951056516, T72, KP587785252 * T75);
Chris@82 1395 T7n = FNMS(KP951056516, T75, KP587785252 * T72);
Chris@82 1396 T78 = T6K - T6N;
Chris@82 1397 T79 = T6U - T6R;
Chris@82 1398 T7a = FMA(KP951056516, T78, KP587785252 * T79);
Chris@82 1399 T7q = FNMS(KP951056516, T79, KP587785252 * T78);
Chris@82 1400 }
Chris@82 1401 {
Chris@82 1402 E T6O, T6V, T7b, T7c;
Chris@82 1403 T6H = T5Y + T5Z;
Chris@82 1404 T6O = T6K + T6N;
Chris@82 1405 T6V = T6R + T6U;
Chris@82 1406 T6W = T6O - T6V;
Chris@82 1407 T6X = FNMS(KP250000000, T6W, T6H);
Chris@82 1408 T6Y = KP559016994 * (T6O + T6V);
Chris@82 1409 T7e = T6j - T6i;
Chris@82 1410 T7b = T70 - T71;
Chris@82 1411 T7c = T73 + T74;
Chris@82 1412 T7f = T7b + T7c;
Chris@82 1413 T7d = KP559016994 * (T7b - T7c);
Chris@82 1414 T7g = FNMS(KP250000000, T7f, T7e);
Chris@82 1415 }
Chris@82 1416 T7x = T6H + T6W;
Chris@82 1417 T7y = T7e + T7f;
Chris@82 1418 cr[WS(rs, 4)] = FNMS(TY, T7y, TT * T7x);
Chris@82 1419 ci[WS(rs, 4)] = FMA(TY, T7x, TT * T7y);
Chris@82 1420 {
Chris@82 1421 E T7o, T7u, T7s, T7w, T7m, T7r;
Chris@82 1422 T7m = T6X - T6Y;
Chris@82 1423 T7o = T7m - T7n;
Chris@82 1424 T7u = T7m + T7n;
Chris@82 1425 T7r = T7g - T7d;
Chris@82 1426 T7s = T7q + T7r;
Chris@82 1427 T7w = T7r - T7q;
Chris@82 1428 cr[WS(rs, 14)] = FNMS(T7p, T7s, T7l * T7o);
Chris@82 1429 ci[WS(rs, 14)] = FMA(T7p, T7o, T7l * T7s);
Chris@82 1430 cr[WS(rs, 19)] = FNMS(T7v, T7w, T7t * T7u);
Chris@82 1431 ci[WS(rs, 19)] = FMA(T7v, T7u, T7t * T7w);
Chris@82 1432 }
Chris@82 1433 {
Chris@82 1434 E T77, T7j, T7i, T7k, T6Z, T7h;
Chris@82 1435 T6Z = T6X + T6Y;
Chris@82 1436 T77 = T6Z - T76;
Chris@82 1437 T7j = T6Z + T76;
Chris@82 1438 T7h = T7d + T7g;
Chris@82 1439 T7i = T7a + T7h;
Chris@82 1440 T7k = T7h - T7a;
Chris@82 1441 cr[WS(rs, 9)] = FNMS(TZ, T7i, TU * T77);
Chris@82 1442 ci[WS(rs, 9)] = FMA(TZ, T77, TU * T7i);
Chris@82 1443 cr[WS(rs, 24)] = FNMS(T26, T7k, T25 * T7j);
Chris@82 1444 ci[WS(rs, 24)] = FMA(T26, T7j, T25 * T7k);
Chris@82 1445 }
Chris@82 1446 }
Chris@82 1447 {
Chris@82 1448 E T3N, T4h, T3U, T4m, T3D, T3E, T3w, T3F, T44, T45, T3X, T46, T4t, T4u;
Chris@82 1449 {
Chris@82 1450 E T3J, T3M, T3S, T3T;
Chris@82 1451 T3J = T3H - T3I;
Chris@82 1452 T3M = T3K - T3L;
Chris@82 1453 T3N = FMA(KP951056516, T3J, KP587785252 * T3M);
Chris@82 1454 T4h = FNMS(KP951056516, T3M, KP587785252 * T3J);
Chris@82 1455 T3S = T2K + T2Z;
Chris@82 1456 T3T = T3f - T3u;
Chris@82 1457 T3U = FMA(KP951056516, T3S, KP587785252 * T3T);
Chris@82 1458 T4m = FNMS(KP951056516, T3T, KP587785252 * T3S);
Chris@82 1459 }
Chris@82 1460 {
Chris@82 1461 E T30, T3v, T3V, T3W;
Chris@82 1462 T3D = T3z - T3C;
Chris@82 1463 T30 = T2K - T2Z;
Chris@82 1464 T3v = T3f + T3u;
Chris@82 1465 T3E = T30 + T3v;
Chris@82 1466 T3w = KP559016994 * (T30 - T3v);
Chris@82 1467 T3F = FNMS(KP250000000, T3E, T3D);
Chris@82 1468 T44 = T40 + T43;
Chris@82 1469 T3V = T3H + T3I;
Chris@82 1470 T3W = T3K + T3L;
Chris@82 1471 T45 = T3V + T3W;
Chris@82 1472 T3X = KP559016994 * (T3V - T3W);
Chris@82 1473 T46 = FNMS(KP250000000, T45, T44);
Chris@82 1474 }
Chris@82 1475 T4t = T3D + T3E;
Chris@82 1476 T4u = T44 + T45;
Chris@82 1477 cr[WS(rs, 2)] = FNMS(T2t, T4u, T2r * T4t);
Chris@82 1478 ci[WS(rs, 2)] = FMA(T2t, T4t, T2r * T4u);
Chris@82 1479 {
Chris@82 1480 E T4i, T4q, T4o, T4s, T4g, T4n;
Chris@82 1481 T4g = T3F - T3w;
Chris@82 1482 T4i = T4g - T4h;
Chris@82 1483 T4q = T4g + T4h;
Chris@82 1484 T4n = T46 - T3X;
Chris@82 1485 T4o = T4m + T4n;
Chris@82 1486 T4s = T4n - T4m;
Chris@82 1487 cr[WS(rs, 12)] = FNMS(T4l, T4o, T4f * T4i);
Chris@82 1488 ci[WS(rs, 12)] = FMA(T4l, T4i, T4f * T4o);
Chris@82 1489 cr[WS(rs, 17)] = FNMS(T4r, T4s, T4p * T4q);
Chris@82 1490 ci[WS(rs, 17)] = FMA(T4r, T4q, T4p * T4s);
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T3O, T4a, T48, T4c, T3G, T47;
Chris@82 1494 T3G = T3w + T3F;
Chris@82 1495 T3O = T3G - T3N;
Chris@82 1496 T4a = T3G + T3N;
Chris@82 1497 T47 = T3X + T46;
Chris@82 1498 T48 = T3U + T47;
Chris@82 1499 T4c = T47 - T3U;
Chris@82 1500 cr[WS(rs, 7)] = FNMS(T3R, T48, T2v * T3O);
Chris@82 1501 ci[WS(rs, 7)] = FMA(T3R, T3O, T2v * T48);
Chris@82 1502 cr[WS(rs, 22)] = FNMS(T4b, T4c, T49 * T4a);
Chris@82 1503 ci[WS(rs, 22)] = FMA(T4b, T4a, T49 * T4c);
Chris@82 1504 }
Chris@82 1505 }
Chris@82 1506 {
Chris@82 1507 E T4V, T5f, T50, T5i, T4L, T4M, T4K, T4N, T54, T55, T53, T56, T5r, T5s;
Chris@82 1508 {
Chris@82 1509 E T4R, T4U, T4Y, T4Z;
Chris@82 1510 T4R = T4P - T4Q;
Chris@82 1511 T4U = T4S - T4T;
Chris@82 1512 T4V = FMA(KP951056516, T4R, KP587785252 * T4U);
Chris@82 1513 T5f = FNMS(KP951056516, T4U, KP587785252 * T4R);
Chris@82 1514 T4Y = T4y + T4B;
Chris@82 1515 T4Z = T4F + T4I;
Chris@82 1516 T50 = FMA(KP951056516, T4Y, KP587785252 * T4Z);
Chris@82 1517 T5i = FNMS(KP951056516, T4Z, KP587785252 * T4Y);
Chris@82 1518 }
Chris@82 1519 {
Chris@82 1520 E T4C, T4J, T51, T52;
Chris@82 1521 T4L = T3z + T3C;
Chris@82 1522 T4C = T4y - T4B;
Chris@82 1523 T4J = T4F - T4I;
Chris@82 1524 T4M = T4C + T4J;
Chris@82 1525 T4K = KP559016994 * (T4C - T4J);
Chris@82 1526 T4N = FNMS(KP250000000, T4M, T4L);
Chris@82 1527 T54 = T43 - T40;
Chris@82 1528 T51 = T4P + T4Q;
Chris@82 1529 T52 = T4S + T4T;
Chris@82 1530 T55 = T51 + T52;
Chris@82 1531 T53 = KP559016994 * (T51 - T52);
Chris@82 1532 T56 = FNMS(KP250000000, T55, T54);
Chris@82 1533 }
Chris@82 1534 T5r = T4L + T4M;
Chris@82 1535 T5s = T54 + T55;
Chris@82 1536 cr[WS(rs, 3)] = FNMS(TR, T5s, TO * T5r);
Chris@82 1537 ci[WS(rs, 3)] = FMA(TR, T5r, TO * T5s);
Chris@82 1538 {
Chris@82 1539 E T5g, T5o, T5k, T5q, T5e, T5j;
Chris@82 1540 T5e = T4N - T4K;
Chris@82 1541 T5g = T5e - T5f;
Chris@82 1542 T5o = T5e + T5f;
Chris@82 1543 T5j = T56 - T53;
Chris@82 1544 T5k = T5i + T5j;
Chris@82 1545 T5q = T5j - T5i;
Chris@82 1546 cr[WS(rs, 13)] = FNMS(T5h, T5k, T5d * T5g);
Chris@82 1547 ci[WS(rs, 13)] = FMA(T5h, T5g, T5d * T5k);
Chris@82 1548 cr[WS(rs, 18)] = FNMS(T5p, T5q, T5n * T5o);
Chris@82 1549 ci[WS(rs, 18)] = FMA(T5p, T5o, T5n * T5q);
Chris@82 1550 }
Chris@82 1551 {
Chris@82 1552 E T4W, T5a, T58, T5c, T4O, T57;
Chris@82 1553 T4O = T4K + T4N;
Chris@82 1554 T4W = T4O - T4V;
Chris@82 1555 T5a = T4O + T4V;
Chris@82 1556 T57 = T53 + T56;
Chris@82 1557 T58 = T50 + T57;
Chris@82 1558 T5c = T57 - T50;
Chris@82 1559 cr[WS(rs, 8)] = FNMS(T4X, T58, T4v * T4W);
Chris@82 1560 ci[WS(rs, 8)] = FMA(T4X, T4W, T4v * T58);
Chris@82 1561 cr[WS(rs, 23)] = FNMS(T5b, T5c, T59 * T5a);
Chris@82 1562 ci[WS(rs, 23)] = FMA(T5b, T5a, T59 * T5c);
Chris@82 1563 }
Chris@82 1564 }
Chris@82 1565 {
Chris@82 1566 E T6a, T6v, T6e, T6y, T60, T61, T5X, T62, T6k, T6l, T6h, T6m, T6F, T6G;
Chris@82 1567 {
Chris@82 1568 E T66, T69, T6c, T6d;
Chris@82 1569 T66 = T64 - T65;
Chris@82 1570 T69 = T67 - T68;
Chris@82 1571 T6a = FMA(KP951056516, T66, KP587785252 * T69);
Chris@82 1572 T6v = FNMS(KP951056516, T69, KP587785252 * T66);
Chris@82 1573 T6c = T5z - T5G;
Chris@82 1574 T6d = T5O - T5V;
Chris@82 1575 T6e = FMA(KP951056516, T6c, KP587785252 * T6d);
Chris@82 1576 T6y = FNMS(KP951056516, T6d, KP587785252 * T6c);
Chris@82 1577 }
Chris@82 1578 {
Chris@82 1579 E T5H, T5W, T6f, T6g;
Chris@82 1580 T60 = T5Y - T5Z;
Chris@82 1581 T5H = T5z + T5G;
Chris@82 1582 T5W = T5O + T5V;
Chris@82 1583 T61 = T5H + T5W;
Chris@82 1584 T5X = KP559016994 * (T5H - T5W);
Chris@82 1585 T62 = FNMS(KP250000000, T61, T60);
Chris@82 1586 T6k = T6i + T6j;
Chris@82 1587 T6f = T64 + T65;
Chris@82 1588 T6g = T67 + T68;
Chris@82 1589 T6l = T6f + T6g;
Chris@82 1590 T6h = KP559016994 * (T6f - T6g);
Chris@82 1591 T6m = FNMS(KP250000000, T6l, T6k);
Chris@82 1592 }
Chris@82 1593 T6F = T60 + T61;
Chris@82 1594 T6G = T6k + T6l;
Chris@82 1595 cr[WS(rs, 1)] = FNMS(TQ, T6G, TN * T6F);
Chris@82 1596 ci[WS(rs, 1)] = FMA(TQ, T6F, TN * T6G);
Chris@82 1597 {
Chris@82 1598 E T6w, T6C, T6A, T6E, T6u, T6z;
Chris@82 1599 T6u = T62 - T5X;
Chris@82 1600 T6w = T6u - T6v;
Chris@82 1601 T6C = T6u + T6v;
Chris@82 1602 T6z = T6m - T6h;
Chris@82 1603 T6A = T6y + T6z;
Chris@82 1604 T6E = T6z - T6y;
Chris@82 1605 cr[WS(rs, 11)] = FNMS(T6x, T6A, T6t * T6w);
Chris@82 1606 ci[WS(rs, 11)] = FMA(T6x, T6w, T6t * T6A);
Chris@82 1607 cr[WS(rs, 16)] = FNMS(T6D, T6E, T6B * T6C);
Chris@82 1608 ci[WS(rs, 16)] = FMA(T6D, T6C, T6B * T6E);
Chris@82 1609 }
Chris@82 1610 {
Chris@82 1611 E T6b, T6q, T6o, T6s, T63, T6n;
Chris@82 1612 T63 = T5X + T62;
Chris@82 1613 T6b = T63 - T6a;
Chris@82 1614 T6q = T63 + T6a;
Chris@82 1615 T6n = T6h + T6m;
Chris@82 1616 T6o = T6e + T6n;
Chris@82 1617 T6s = T6n - T6e;
Chris@82 1618 cr[WS(rs, 6)] = FNMS(T5m, T6o, T5l * T6b);
Chris@82 1619 ci[WS(rs, 6)] = FMA(T5m, T6b, T5l * T6o);
Chris@82 1620 cr[WS(rs, 21)] = FNMS(T6r, T6s, T6p * T6q);
Chris@82 1621 ci[WS(rs, 21)] = FMA(T6r, T6q, T6p * T6s);
Chris@82 1622 }
Chris@82 1623 }
Chris@82 1624 }
Chris@82 1625 }
Chris@82 1626 }
Chris@82 1627 }
Chris@82 1628
Chris@82 1629 static const tw_instr twinstr[] = {
Chris@82 1630 {TW_CEXP, 1, 1},
Chris@82 1631 {TW_CEXP, 1, 3},
Chris@82 1632 {TW_CEXP, 1, 9},
Chris@82 1633 {TW_CEXP, 1, 24},
Chris@82 1634 {TW_NEXT, 1, 0}
Chris@82 1635 };
Chris@82 1636
Chris@82 1637 static const hc2hc_desc desc = { 25, "hb2_25", twinstr, &GENUS, {280, 180, 160, 0} };
Chris@82 1638
Chris@82 1639 void X(codelet_hb2_25) (planner *p) {
Chris@82 1640 X(khc2hc_register) (p, hb2_25, &desc);
Chris@82 1641 }
Chris@82 1642 #endif