annotate src/fftw-3.3.5/rdft/scalar/r2cf/hf_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:44 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -dit -name hf_25 -include hf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 400 FP additions, 364 FP multiplications,
Chris@42 32 * (or, 84 additions, 48 multiplications, 316 fused multiply/add),
Chris@42 33 * 178 stack variables, 47 constants, and 100 memory accesses
Chris@42 34 */
Chris@42 35 #include "hf.h"
Chris@42 36
Chris@42 37 static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@42 40 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@42 41 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@42 42 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@42 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@42 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@42 45 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@42 46 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@42 47 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@42 48 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@42 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@42 50 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@42 52 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@42 53 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@42 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@42 55 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@42 56 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@42 57 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@42 58 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@42 59 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@42 60 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@42 61 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@42 62 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 63 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 64 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@42 65 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 66 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@42 67 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@42 68 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 69 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@42 70 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@42 71 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@42 72 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 73 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@42 74 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@42 75 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@42 76 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@42 77 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@42 78 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@42 79 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@42 80 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@42 81 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@42 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 86 {
Chris@42 87 INT m;
Chris@42 88 for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@42 89 E T7i, T6o, T6m, T7o, T7m, T7h, T6n, T6f, T7j, T7n;
Chris@42 90 {
Chris@42 91 E T6W, T5G, T3Y, T3M, T7q, T70, T6V, T7P, Tt, T3L, T5T, T45, T5Q, T4c, T3G;
Chris@42 92 E T2G, T5P, T49, T5S, T42, T65, T4H, T68, T4A, T2Z, T11, T67, T4x, T64, T4E;
Chris@42 93 E T5Y, T4W, T61, T4P, T3d, T1z, T60, T4M, T5X, T4T, T3g, T1G, T3q, T4q, T4j;
Chris@42 94 E T26, T3i, T1M, T3k, T1S;
Chris@42 95 {
Chris@42 96 E T3u, T2e, T3E, T44, T4b, T2E, T3w, T2k, T3y, T2q;
Chris@42 97 {
Chris@42 98 E T1, T6R, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta;
Chris@42 99 {
Chris@42 100 E T3, T6, T2, T5;
Chris@42 101 T1 = cr[0];
Chris@42 102 T6R = ci[0];
Chris@42 103 T3 = cr[WS(rs, 5)];
Chris@42 104 T6 = ci[WS(rs, 5)];
Chris@42 105 T2 = W[8];
Chris@42 106 T5 = W[9];
Chris@42 107 {
Chris@42 108 E Tm, Tp, To, T3V, Tn, T3O, T4, Tl;
Chris@42 109 Tm = cr[WS(rs, 15)];
Chris@42 110 Tp = ci[WS(rs, 15)];
Chris@42 111 T3O = T2 * T6;
Chris@42 112 T4 = T2 * T3;
Chris@42 113 Tl = W[28];
Chris@42 114 To = W[29];
Chris@42 115 T3P = FNMS(T5, T3, T3O);
Chris@42 116 T7 = FMA(T5, T6, T4);
Chris@42 117 T3V = Tl * Tp;
Chris@42 118 Tn = Tl * Tm;
Chris@42 119 {
Chris@42 120 E Tg, Tj, Tf, Ti, T3T, Th, T8;
Chris@42 121 Tg = cr[WS(rs, 10)];
Chris@42 122 Tj = ci[WS(rs, 10)];
Chris@42 123 T3W = FNMS(To, Tm, T3V);
Chris@42 124 Tq = FMA(To, Tp, Tn);
Chris@42 125 Tf = W[18];
Chris@42 126 Ti = W[19];
Chris@42 127 T9 = cr[WS(rs, 20)];
Chris@42 128 Tc = ci[WS(rs, 20)];
Chris@42 129 T3T = Tf * Tj;
Chris@42 130 Th = Tf * Tg;
Chris@42 131 T8 = W[38];
Chris@42 132 Tb = W[39];
Chris@42 133 T3U = FNMS(Ti, Tg, T3T);
Chris@42 134 Tk = FMA(Ti, Tj, Th);
Chris@42 135 T3Q = T8 * Tc;
Chris@42 136 Ta = T8 * T9;
Chris@42 137 }
Chris@42 138 }
Chris@42 139 }
Chris@42 140 {
Chris@42 141 E T6T, T3X, T6Y, Tr, T3R, Td;
Chris@42 142 T6T = T3U + T3W;
Chris@42 143 T3X = T3U - T3W;
Chris@42 144 T6Y = Tk - Tq;
Chris@42 145 Tr = Tk + Tq;
Chris@42 146 T3R = FNMS(Tb, T9, T3Q);
Chris@42 147 Td = FMA(Tb, Tc, Ta);
Chris@42 148 {
Chris@42 149 E T3S, T6Z, Te, T6U, T6S, Ts;
Chris@42 150 T3S = T3P - T3R;
Chris@42 151 T6S = T3P + T3R;
Chris@42 152 T6Z = T7 - Td;
Chris@42 153 Te = T7 + Td;
Chris@42 154 T6W = T6S - T6T;
Chris@42 155 T6U = T6S + T6T;
Chris@42 156 T5G = FNMS(KP618033988, T3S, T3X);
Chris@42 157 T3Y = FMA(KP618033988, T3X, T3S);
Chris@42 158 T3M = Te - Tr;
Chris@42 159 Ts = Te + Tr;
Chris@42 160 T7q = FMA(KP618033988, T6Y, T6Z);
Chris@42 161 T70 = FNMS(KP618033988, T6Z, T6Y);
Chris@42 162 T6V = FNMS(KP250000000, T6U, T6R);
Chris@42 163 T7P = T6U + T6R;
Chris@42 164 Tt = T1 + Ts;
Chris@42 165 T3L = FNMS(KP250000000, Ts, T1);
Chris@42 166 }
Chris@42 167 }
Chris@42 168 }
Chris@42 169 {
Chris@42 170 E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n;
Chris@42 171 {
Chris@42 172 E T2a, T2d, T29, T2c;
Chris@42 173 T2a = cr[WS(rs, 3)];
Chris@42 174 T2d = ci[WS(rs, 3)];
Chris@42 175 T29 = W[4];
Chris@42 176 T2c = W[5];
Chris@42 177 {
Chris@42 178 E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f;
Chris@42 179 T2t = cr[WS(rs, 13)];
Chris@42 180 T2w = ci[WS(rs, 13)];
Chris@42 181 T3t = T29 * T2d;
Chris@42 182 T2b = T29 * T2a;
Chris@42 183 T2s = W[24];
Chris@42 184 T2z = cr[WS(rs, 18)];
Chris@42 185 T3u = FNMS(T2c, T2a, T3t);
Chris@42 186 T2e = FMA(T2c, T2d, T2b);
Chris@42 187 T3A = T2s * T2w;
Chris@42 188 T2u = T2s * T2t;
Chris@42 189 T2C = ci[WS(rs, 18)];
Chris@42 190 T2y = W[34];
Chris@42 191 T2v = W[25];
Chris@42 192 T2B = W[35];
Chris@42 193 {
Chris@42 194 E T3B, T2x, T3D, T2D, T3C, T2A;
Chris@42 195 T2g = cr[WS(rs, 8)];
Chris@42 196 T3C = T2y * T2C;
Chris@42 197 T2A = T2y * T2z;
Chris@42 198 T3B = FNMS(T2v, T2t, T3A);
Chris@42 199 T2x = FMA(T2v, T2w, T2u);
Chris@42 200 T3D = FNMS(T2B, T2z, T3C);
Chris@42 201 T2D = FMA(T2B, T2C, T2A);
Chris@42 202 T2j = ci[WS(rs, 8)];
Chris@42 203 T2f = W[14];
Chris@42 204 T3E = T3B + T3D;
Chris@42 205 T44 = T3D - T3B;
Chris@42 206 T4b = T2x - T2D;
Chris@42 207 T2E = T2x + T2D;
Chris@42 208 }
Chris@42 209 T2m = cr[WS(rs, 23)];
Chris@42 210 T3v = T2f * T2j;
Chris@42 211 T2h = T2f * T2g;
Chris@42 212 T2p = ci[WS(rs, 23)];
Chris@42 213 T2l = W[44];
Chris@42 214 T2i = W[15];
Chris@42 215 T2o = W[45];
Chris@42 216 }
Chris@42 217 }
Chris@42 218 T3x = T2l * T2p;
Chris@42 219 T2n = T2l * T2m;
Chris@42 220 T3w = FNMS(T2i, T2g, T3v);
Chris@42 221 T2k = FMA(T2i, T2j, T2h);
Chris@42 222 T3y = FNMS(T2o, T2m, T3x);
Chris@42 223 T2q = FMA(T2o, T2p, T2n);
Chris@42 224 }
Chris@42 225 {
Chris@42 226 E T2N, Tz, T2X, T4G, T4z, TZ, T2P, TF, T2R, TL;
Chris@42 227 {
Chris@42 228 E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI;
Chris@42 229 {
Chris@42 230 E Tv, Ty, Tu, Tx;
Chris@42 231 {
Chris@42 232 E T48, T41, T47, T40, T43, T3z;
Chris@42 233 Tv = cr[WS(rs, 1)];
Chris@42 234 T43 = T3y - T3w;
Chris@42 235 T3z = T3w + T3y;
Chris@42 236 {
Chris@42 237 E T4a, T2r, T3F, T2F;
Chris@42 238 T4a = T2k - T2q;
Chris@42 239 T2r = T2k + T2q;
Chris@42 240 T5T = FNMS(KP618033988, T43, T44);
Chris@42 241 T45 = FMA(KP618033988, T44, T43);
Chris@42 242 T3F = T3z + T3E;
Chris@42 243 T48 = T3E - T3z;
Chris@42 244 T5Q = FNMS(KP618033988, T4a, T4b);
Chris@42 245 T4c = FMA(KP618033988, T4b, T4a);
Chris@42 246 T2F = T2r + T2E;
Chris@42 247 T41 = T2E - T2r;
Chris@42 248 T3G = T3u + T3F;
Chris@42 249 T47 = FNMS(KP250000000, T3F, T3u);
Chris@42 250 T2G = T2e + T2F;
Chris@42 251 T40 = FNMS(KP250000000, T2F, T2e);
Chris@42 252 Ty = ci[WS(rs, 1)];
Chris@42 253 }
Chris@42 254 T5P = FMA(KP559016994, T48, T47);
Chris@42 255 T49 = FNMS(KP559016994, T48, T47);
Chris@42 256 T5S = FMA(KP559016994, T41, T40);
Chris@42 257 T42 = FNMS(KP559016994, T41, T40);
Chris@42 258 Tu = W[0];
Chris@42 259 }
Chris@42 260 Tx = W[1];
Chris@42 261 {
Chris@42 262 E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA;
Chris@42 263 TO = cr[WS(rs, 11)];
Chris@42 264 TR = ci[WS(rs, 11)];
Chris@42 265 T2M = Tu * Ty;
Chris@42 266 Tw = Tu * Tv;
Chris@42 267 TN = W[20];
Chris@42 268 TU = cr[WS(rs, 16)];
Chris@42 269 T2N = FNMS(Tx, Tv, T2M);
Chris@42 270 Tz = FMA(Tx, Ty, Tw);
Chris@42 271 T2T = TN * TR;
Chris@42 272 TP = TN * TO;
Chris@42 273 TX = ci[WS(rs, 16)];
Chris@42 274 TT = W[30];
Chris@42 275 TQ = W[21];
Chris@42 276 TW = W[31];
Chris@42 277 {
Chris@42 278 E T2U, TS, T2W, TY, T2V, TV;
Chris@42 279 TB = cr[WS(rs, 6)];
Chris@42 280 T2V = TT * TX;
Chris@42 281 TV = TT * TU;
Chris@42 282 T2U = FNMS(TQ, TO, T2T);
Chris@42 283 TS = FMA(TQ, TR, TP);
Chris@42 284 T2W = FNMS(TW, TU, T2V);
Chris@42 285 TY = FMA(TW, TX, TV);
Chris@42 286 TE = ci[WS(rs, 6)];
Chris@42 287 TA = W[10];
Chris@42 288 T2X = T2U + T2W;
Chris@42 289 T4G = T2W - T2U;
Chris@42 290 T4z = TY - TS;
Chris@42 291 TZ = TS + TY;
Chris@42 292 }
Chris@42 293 TH = cr[WS(rs, 21)];
Chris@42 294 T2O = TA * TE;
Chris@42 295 TC = TA * TB;
Chris@42 296 TK = ci[WS(rs, 21)];
Chris@42 297 TG = W[40];
Chris@42 298 TD = W[11];
Chris@42 299 TJ = W[41];
Chris@42 300 }
Chris@42 301 }
Chris@42 302 T2Q = TG * TK;
Chris@42 303 TI = TG * TH;
Chris@42 304 T2P = FNMS(TD, TB, T2O);
Chris@42 305 TF = FMA(TD, TE, TC);
Chris@42 306 T2R = FNMS(TJ, TH, T2Q);
Chris@42 307 TL = FMA(TJ, TK, TI);
Chris@42 308 }
Chris@42 309 {
Chris@42 310 E T31, T17, T3b, T4V, T4O, T1x, T33, T1d, T35, T1j;
Chris@42 311 {
Chris@42 312 E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g;
Chris@42 313 {
Chris@42 314 E T13, T16, T12, T15;
Chris@42 315 {
Chris@42 316 E T4w, T4D, T4v, T4C, T4F, T2S;
Chris@42 317 T13 = cr[WS(rs, 4)];
Chris@42 318 T4F = T2P - T2R;
Chris@42 319 T2S = T2P + T2R;
Chris@42 320 {
Chris@42 321 E T4y, TM, T2Y, T10;
Chris@42 322 T4y = TL - TF;
Chris@42 323 TM = TF + TL;
Chris@42 324 T65 = FMA(KP618033988, T4F, T4G);
Chris@42 325 T4H = FNMS(KP618033988, T4G, T4F);
Chris@42 326 T2Y = T2S + T2X;
Chris@42 327 T4w = T2S - T2X;
Chris@42 328 T68 = FNMS(KP618033988, T4y, T4z);
Chris@42 329 T4A = FMA(KP618033988, T4z, T4y);
Chris@42 330 T10 = TM + TZ;
Chris@42 331 T4D = TM - TZ;
Chris@42 332 T2Z = T2N + T2Y;
Chris@42 333 T4v = FNMS(KP250000000, T2Y, T2N);
Chris@42 334 T11 = Tz + T10;
Chris@42 335 T4C = FNMS(KP250000000, T10, Tz);
Chris@42 336 T16 = ci[WS(rs, 4)];
Chris@42 337 }
Chris@42 338 T67 = FNMS(KP559016994, T4w, T4v);
Chris@42 339 T4x = FMA(KP559016994, T4w, T4v);
Chris@42 340 T64 = FNMS(KP559016994, T4D, T4C);
Chris@42 341 T4E = FMA(KP559016994, T4D, T4C);
Chris@42 342 T12 = W[6];
Chris@42 343 }
Chris@42 344 T15 = W[7];
Chris@42 345 {
Chris@42 346 E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18;
Chris@42 347 T1m = cr[WS(rs, 14)];
Chris@42 348 T1p = ci[WS(rs, 14)];
Chris@42 349 T30 = T12 * T16;
Chris@42 350 T14 = T12 * T13;
Chris@42 351 T1l = W[26];
Chris@42 352 T1s = cr[WS(rs, 19)];
Chris@42 353 T31 = FNMS(T15, T13, T30);
Chris@42 354 T17 = FMA(T15, T16, T14);
Chris@42 355 T37 = T1l * T1p;
Chris@42 356 T1n = T1l * T1m;
Chris@42 357 T1v = ci[WS(rs, 19)];
Chris@42 358 T1r = W[36];
Chris@42 359 T1o = W[27];
Chris@42 360 T1u = W[37];
Chris@42 361 {
Chris@42 362 E T38, T1q, T3a, T1w, T39, T1t;
Chris@42 363 T19 = cr[WS(rs, 9)];
Chris@42 364 T39 = T1r * T1v;
Chris@42 365 T1t = T1r * T1s;
Chris@42 366 T38 = FNMS(T1o, T1m, T37);
Chris@42 367 T1q = FMA(T1o, T1p, T1n);
Chris@42 368 T3a = FNMS(T1u, T1s, T39);
Chris@42 369 T1w = FMA(T1u, T1v, T1t);
Chris@42 370 T1c = ci[WS(rs, 9)];
Chris@42 371 T18 = W[16];
Chris@42 372 T3b = T38 + T3a;
Chris@42 373 T4V = T3a - T38;
Chris@42 374 T4O = T1w - T1q;
Chris@42 375 T1x = T1q + T1w;
Chris@42 376 }
Chris@42 377 T1f = cr[WS(rs, 24)];
Chris@42 378 T32 = T18 * T1c;
Chris@42 379 T1a = T18 * T19;
Chris@42 380 T1i = ci[WS(rs, 24)];
Chris@42 381 T1e = W[46];
Chris@42 382 T1b = W[17];
Chris@42 383 T1h = W[47];
Chris@42 384 }
Chris@42 385 }
Chris@42 386 T34 = T1e * T1i;
Chris@42 387 T1g = T1e * T1f;
Chris@42 388 T33 = FNMS(T1b, T19, T32);
Chris@42 389 T1d = FMA(T1b, T1c, T1a);
Chris@42 390 T35 = FNMS(T1h, T1f, T34);
Chris@42 391 T1j = FMA(T1h, T1i, T1g);
Chris@42 392 }
Chris@42 393 {
Chris@42 394 E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P;
Chris@42 395 {
Chris@42 396 E T1C, T1F, T1B, T1E;
Chris@42 397 {
Chris@42 398 E T4L, T4S, T4K, T4R, T4U, T36;
Chris@42 399 T1C = cr[WS(rs, 2)];
Chris@42 400 T4U = T35 - T33;
Chris@42 401 T36 = T33 + T35;
Chris@42 402 {
Chris@42 403 E T4N, T1k, T3c, T1y;
Chris@42 404 T4N = T1j - T1d;
Chris@42 405 T1k = T1d + T1j;
Chris@42 406 T5Y = FNMS(KP618033988, T4U, T4V);
Chris@42 407 T4W = FMA(KP618033988, T4V, T4U);
Chris@42 408 T3c = T36 + T3b;
Chris@42 409 T4L = T3b - T36;
Chris@42 410 T61 = FNMS(KP618033988, T4N, T4O);
Chris@42 411 T4P = FMA(KP618033988, T4O, T4N);
Chris@42 412 T1y = T1k + T1x;
Chris@42 413 T4S = T1k - T1x;
Chris@42 414 T3d = T31 + T3c;
Chris@42 415 T4K = FNMS(KP250000000, T3c, T31);
Chris@42 416 T1z = T17 + T1y;
Chris@42 417 T4R = FNMS(KP250000000, T1y, T17);
Chris@42 418 T1F = ci[WS(rs, 2)];
Chris@42 419 }
Chris@42 420 T60 = FMA(KP559016994, T4L, T4K);
Chris@42 421 T4M = FNMS(KP559016994, T4L, T4K);
Chris@42 422 T5X = FNMS(KP559016994, T4S, T4R);
Chris@42 423 T4T = FMA(KP559016994, T4S, T4R);
Chris@42 424 T1B = W[2];
Chris@42 425 }
Chris@42 426 T1E = W[3];
Chris@42 427 {
Chris@42 428 E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H;
Chris@42 429 T1V = cr[WS(rs, 12)];
Chris@42 430 T1Y = ci[WS(rs, 12)];
Chris@42 431 T3f = T1B * T1F;
Chris@42 432 T1D = T1B * T1C;
Chris@42 433 T1U = W[22];
Chris@42 434 T21 = cr[WS(rs, 17)];
Chris@42 435 T3g = FNMS(T1E, T1C, T3f);
Chris@42 436 T1G = FMA(T1E, T1F, T1D);
Chris@42 437 T3m = T1U * T1Y;
Chris@42 438 T1W = T1U * T1V;
Chris@42 439 T24 = ci[WS(rs, 17)];
Chris@42 440 T20 = W[32];
Chris@42 441 T1X = W[23];
Chris@42 442 T23 = W[33];
Chris@42 443 {
Chris@42 444 E T3n, T1Z, T3p, T25, T3o, T22;
Chris@42 445 T1I = cr[WS(rs, 7)];
Chris@42 446 T3o = T20 * T24;
Chris@42 447 T22 = T20 * T21;
Chris@42 448 T3n = FNMS(T1X, T1V, T3m);
Chris@42 449 T1Z = FMA(T1X, T1Y, T1W);
Chris@42 450 T3p = FNMS(T23, T21, T3o);
Chris@42 451 T25 = FMA(T23, T24, T22);
Chris@42 452 T1L = ci[WS(rs, 7)];
Chris@42 453 T1H = W[12];
Chris@42 454 T3q = T3n + T3p;
Chris@42 455 T4q = T3n - T3p;
Chris@42 456 T4j = T25 - T1Z;
Chris@42 457 T26 = T1Z + T25;
Chris@42 458 }
Chris@42 459 T1O = cr[WS(rs, 22)];
Chris@42 460 T3h = T1H * T1L;
Chris@42 461 T1J = T1H * T1I;
Chris@42 462 T1R = ci[WS(rs, 22)];
Chris@42 463 T1N = W[42];
Chris@42 464 T1K = W[13];
Chris@42 465 T1Q = W[43];
Chris@42 466 }
Chris@42 467 }
Chris@42 468 T3j = T1N * T1R;
Chris@42 469 T1P = T1N * T1O;
Chris@42 470 T3i = FNMS(T1K, T1I, T3h);
Chris@42 471 T1M = FMA(T1K, T1L, T1J);
Chris@42 472 T3k = FNMS(T1Q, T1O, T3j);
Chris@42 473 T1S = FMA(T1Q, T1R, T1P);
Chris@42 474 }
Chris@42 475 }
Chris@42 476 }
Chris@42 477 }
Chris@42 478 {
Chris@42 479 E T7Q, T5M, T5J, T7R, T5I, T5L, T7X, T7W, T5F, T6X, T5u, T7M, T7O, T5C, T5E;
Chris@42 480 E T5t, T7J, T7N;
Chris@42 481 {
Chris@42 482 E T4r, T4k, T4h, T4o, T3K, T3I, T1A, T2H, T28;
Chris@42 483 {
Chris@42 484 E T3e, T4g, T4n, T4f, T4m, T3H, T4p, T3l;
Chris@42 485 T7Q = T2Z + T3d;
Chris@42 486 T3e = T2Z - T3d;
Chris@42 487 T4p = T3k - T3i;
Chris@42 488 T3l = T3i + T3k;
Chris@42 489 {
Chris@42 490 E T4i, T1T, T3r, T27, T3s;
Chris@42 491 T4i = T1S - T1M;
Chris@42 492 T1T = T1M + T1S;
Chris@42 493 T5M = FMA(KP618033988, T4p, T4q);
Chris@42 494 T4r = FNMS(KP618033988, T4q, T4p);
Chris@42 495 T3r = T3l + T3q;
Chris@42 496 T4g = T3q - T3l;
Chris@42 497 T5J = FNMS(KP618033988, T4i, T4j);
Chris@42 498 T4k = FMA(KP618033988, T4j, T4i);
Chris@42 499 T27 = T1T + T26;
Chris@42 500 T4n = T26 - T1T;
Chris@42 501 T3s = T3g + T3r;
Chris@42 502 T4f = FNMS(KP250000000, T3r, T3g);
Chris@42 503 T28 = T1G + T27;
Chris@42 504 T4m = FNMS(KP250000000, T27, T1G);
Chris@42 505 T3H = T3s - T3G;
Chris@42 506 T7R = T3s + T3G;
Chris@42 507 }
Chris@42 508 T5I = FMA(KP559016994, T4g, T4f);
Chris@42 509 T4h = FNMS(KP559016994, T4g, T4f);
Chris@42 510 T5L = FMA(KP559016994, T4n, T4m);
Chris@42 511 T4o = FNMS(KP559016994, T4n, T4m);
Chris@42 512 T3K = FNMS(KP618033988, T3e, T3H);
Chris@42 513 T3I = FMA(KP618033988, T3H, T3e);
Chris@42 514 }
Chris@42 515 T1A = T11 + T1z;
Chris@42 516 T7X = T1z - T11;
Chris@42 517 T7W = T28 - T2G;
Chris@42 518 T2H = T28 + T2G;
Chris@42 519 {
Chris@42 520 E T3Z, T5d, T7r, T7D, T5h, T5i, T5m, T5l, T59, T7K, T56, T7L, T7I, T7G, T52;
Chris@42 521 E T50, T5w, T5g, T5q, T5A, T3N, T7p;
Chris@42 522 T3N = FMA(KP559016994, T3M, T3L);
Chris@42 523 T5F = FNMS(KP559016994, T3M, T3L);
Chris@42 524 T6X = FNMS(KP559016994, T6W, T6V);
Chris@42 525 T7p = FMA(KP559016994, T6W, T6V);
Chris@42 526 {
Chris@42 527 E T5o, T5p, T57, T4e, T4Y, T55, T4l, T4s, T4B, T5f, T5e, T4I;
Chris@42 528 {
Chris@42 529 E T46, T2K, T2J, T4d, T2I;
Chris@42 530 T46 = FMA(KP951056516, T45, T42);
Chris@42 531 T5o = FNMS(KP951056516, T45, T42);
Chris@42 532 T2I = T1A + T2H;
Chris@42 533 T2K = T1A - T2H;
Chris@42 534 T3Z = FNMS(KP951056516, T3Y, T3N);
Chris@42 535 T5d = FMA(KP951056516, T3Y, T3N);
Chris@42 536 T7r = FNMS(KP951056516, T7q, T7p);
Chris@42 537 T7D = FMA(KP951056516, T7q, T7p);
Chris@42 538 cr[0] = Tt + T2I;
Chris@42 539 T2J = FNMS(KP250000000, T2I, Tt);
Chris@42 540 T5p = FNMS(KP951056516, T4c, T49);
Chris@42 541 T4d = FMA(KP951056516, T4c, T49);
Chris@42 542 {
Chris@42 543 E T4Q, T4X, T2L, T3J;
Chris@42 544 T4Q = FNMS(KP951056516, T4P, T4M);
Chris@42 545 T5h = FMA(KP951056516, T4P, T4M);
Chris@42 546 T5i = FNMS(KP951056516, T4W, T4T);
Chris@42 547 T4X = FMA(KP951056516, T4W, T4T);
Chris@42 548 T2L = FMA(KP559016994, T2K, T2J);
Chris@42 549 T3J = FNMS(KP559016994, T2K, T2J);
Chris@42 550 T57 = FMA(KP126329378, T46, T4d);
Chris@42 551 T4e = FNMS(KP126329378, T4d, T46);
Chris@42 552 cr[WS(rs, 5)] = FMA(KP951056516, T3I, T2L);
Chris@42 553 ci[WS(rs, 4)] = FNMS(KP951056516, T3I, T2L);
Chris@42 554 ci[WS(rs, 9)] = FMA(KP951056516, T3K, T3J);
Chris@42 555 cr[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J);
Chris@42 556 T4Y = FMA(KP827271945, T4X, T4Q);
Chris@42 557 T55 = FNMS(KP827271945, T4Q, T4X);
Chris@42 558 }
Chris@42 559 }
Chris@42 560 T4l = FNMS(KP951056516, T4k, T4h);
Chris@42 561 T5m = FMA(KP951056516, T4k, T4h);
Chris@42 562 T5l = FNMS(KP951056516, T4r, T4o);
Chris@42 563 T4s = FMA(KP951056516, T4r, T4o);
Chris@42 564 T4B = FNMS(KP951056516, T4A, T4x);
Chris@42 565 T5f = FMA(KP951056516, T4A, T4x);
Chris@42 566 T5e = FMA(KP951056516, T4H, T4E);
Chris@42 567 T4I = FNMS(KP951056516, T4H, T4E);
Chris@42 568 {
Chris@42 569 E T4u, T4Z, T4t, T58;
Chris@42 570 T4t = FNMS(KP470564281, T4s, T4l);
Chris@42 571 T58 = FMA(KP470564281, T4l, T4s);
Chris@42 572 {
Chris@42 573 E T4J, T54, T7E, T7F;
Chris@42 574 T4J = FMA(KP634619297, T4I, T4B);
Chris@42 575 T54 = FNMS(KP634619297, T4B, T4I);
Chris@42 576 T59 = FNMS(KP912018591, T58, T57);
Chris@42 577 T7E = FMA(KP912018591, T58, T57);
Chris@42 578 T7K = FMA(KP912018591, T4t, T4e);
Chris@42 579 T4u = FNMS(KP912018591, T4t, T4e);
Chris@42 580 T56 = FMA(KP912575812, T55, T54);
Chris@42 581 T7F = FNMS(KP912575812, T55, T54);
Chris@42 582 T7L = FMA(KP912575812, T4Y, T4J);
Chris@42 583 T4Z = FNMS(KP912575812, T4Y, T4J);
Chris@42 584 T7I = FNMS(KP851038619, T7F, T7E);
Chris@42 585 T7G = FMA(KP851038619, T7F, T7E);
Chris@42 586 }
Chris@42 587 T52 = FMA(KP851038619, T4Z, T4u);
Chris@42 588 T50 = FNMS(KP851038619, T4Z, T4u);
Chris@42 589 }
Chris@42 590 T5w = FNMS(KP256756360, T5e, T5f);
Chris@42 591 T5g = FMA(KP256756360, T5f, T5e);
Chris@42 592 T5q = FMA(KP939062505, T5p, T5o);
Chris@42 593 T5A = FNMS(KP939062505, T5o, T5p);
Chris@42 594 }
Chris@42 595 {
Chris@42 596 E T5y, T7z, T5B, T7y, T7w, T7u, T5s;
Chris@42 597 {
Chris@42 598 E T5k, T5r, T5j, T5x;
Chris@42 599 cr[WS(rs, 4)] = FNMS(KP992114701, T50, T3Z);
Chris@42 600 T5j = FMA(KP634619297, T5i, T5h);
Chris@42 601 T5x = FNMS(KP634619297, T5h, T5i);
Chris@42 602 {
Chris@42 603 E T5n, T5z, T7s, T7t;
Chris@42 604 T5n = FMA(KP549754652, T5m, T5l);
Chris@42 605 T5z = FNMS(KP549754652, T5l, T5m);
Chris@42 606 T5y = FMA(KP871714437, T5x, T5w);
Chris@42 607 T7s = FNMS(KP871714437, T5x, T5w);
Chris@42 608 T7z = FNMS(KP871714437, T5j, T5g);
Chris@42 609 T5k = FMA(KP871714437, T5j, T5g);
Chris@42 610 T5B = FNMS(KP831864738, T5A, T5z);
Chris@42 611 T7t = FMA(KP831864738, T5A, T5z);
Chris@42 612 T7y = FNMS(KP831864738, T5q, T5n);
Chris@42 613 T5r = FMA(KP831864738, T5q, T5n);
Chris@42 614 T7w = FNMS(KP904730450, T7t, T7s);
Chris@42 615 T7u = FMA(KP904730450, T7t, T7s);
Chris@42 616 }
Chris@42 617 ci[WS(rs, 20)] = FNMS(KP992114701, T7G, T7D);
Chris@42 618 T5u = FNMS(KP904730450, T5r, T5k);
Chris@42 619 T5s = FMA(KP904730450, T5r, T5k);
Chris@42 620 }
Chris@42 621 {
Chris@42 622 E T5a, T5c, T7A, T7C, T7v, T53, T5b, T51, T7H, T7x, T7B;
Chris@42 623 T5a = FNMS(KP726211448, T59, T56);
Chris@42 624 T5c = FMA(KP525970792, T56, T59);
Chris@42 625 ci[WS(rs, 23)] = FMA(KP968583161, T7u, T7r);
Chris@42 626 cr[WS(rs, 1)] = FMA(KP968583161, T5s, T5d);
Chris@42 627 T51 = FMA(KP248028675, T50, T3Z);
Chris@42 628 T7A = FNMS(KP683113946, T7z, T7y);
Chris@42 629 T7C = FMA(KP559154169, T7y, T7z);
Chris@42 630 T7v = FNMS(KP242145790, T7u, T7r);
Chris@42 631 T53 = FMA(KP554608978, T52, T51);
Chris@42 632 T5b = FNMS(KP554608978, T52, T51);
Chris@42 633 T7M = FNMS(KP525970792, T7L, T7K);
Chris@42 634 T7O = FMA(KP726211448, T7K, T7L);
Chris@42 635 ci[WS(rs, 10)] = FNMS(KP943557151, T5c, T5b);
Chris@42 636 ci[WS(rs, 5)] = FMA(KP943557151, T5c, T5b);
Chris@42 637 ci[0] = FMA(KP803003575, T5a, T53);
Chris@42 638 cr[WS(rs, 9)] = FNMS(KP803003575, T5a, T53);
Chris@42 639 T7x = FNMS(KP541454447, T7w, T7v);
Chris@42 640 T7B = FMA(KP541454447, T7w, T7v);
Chris@42 641 T7H = FMA(KP248028675, T7G, T7D);
Chris@42 642 cr[WS(rs, 21)] = -(FMA(KP921177326, T7C, T7B));
Chris@42 643 ci[WS(rs, 18)] = FNMS(KP921177326, T7C, T7B);
Chris@42 644 ci[WS(rs, 13)] = FMA(KP833417178, T7A, T7x);
Chris@42 645 cr[WS(rs, 16)] = FMS(KP833417178, T7A, T7x);
Chris@42 646 T5C = FMA(KP559154169, T5B, T5y);
Chris@42 647 T5E = FNMS(KP683113946, T5y, T5B);
Chris@42 648 T5t = FNMS(KP242145790, T5s, T5d);
Chris@42 649 T7J = FNMS(KP554608978, T7I, T7H);
Chris@42 650 T7N = FMA(KP554608978, T7I, T7H);
Chris@42 651 }
Chris@42 652 }
Chris@42 653 }
Chris@42 654 }
Chris@42 655 {
Chris@42 656 E T7Y, T80, T5v, T5D;
Chris@42 657 cr[WS(rs, 24)] = -(FMA(KP803003575, T7O, T7N));
Chris@42 658 ci[WS(rs, 15)] = FNMS(KP803003575, T7O, T7N);
Chris@42 659 cr[WS(rs, 19)] = FMS(KP943557151, T7M, T7J);
Chris@42 660 cr[WS(rs, 14)] = -(FMA(KP943557151, T7M, T7J));
Chris@42 661 T5v = FMA(KP541454447, T5u, T5t);
Chris@42 662 T5D = FNMS(KP541454447, T5u, T5t);
Chris@42 663 cr[WS(rs, 11)] = FNMS(KP833417178, T5E, T5D);
Chris@42 664 ci[WS(rs, 8)] = FMA(KP833417178, T5E, T5D);
Chris@42 665 cr[WS(rs, 6)] = FMA(KP921177326, T5C, T5v);
Chris@42 666 ci[WS(rs, 3)] = FNMS(KP921177326, T5C, T5v);
Chris@42 667 T7Y = FMA(KP618033988, T7X, T7W);
Chris@42 668 T80 = FNMS(KP618033988, T7W, T7X);
Chris@42 669 {
Chris@42 670 E T6t, T6p, T5H, T7d, T71, T6u, T6y, T6x, T6l, T7k, T6i, T7l, T7g, T6c, T6e;
Chris@42 671 E T6s, T6L, T6J, T6C;
Chris@42 672 {
Chris@42 673 E T6A, T6B, T5O, T6j, T6h, T6a, T6q, T5R, T5U, T6r, T5Z, T62;
Chris@42 674 {
Chris@42 675 E T5K, T7U, T7T, T5N, T7S;
Chris@42 676 T6t = FNMS(KP951056516, T5J, T5I);
Chris@42 677 T5K = FMA(KP951056516, T5J, T5I);
Chris@42 678 T7U = T7Q - T7R;
Chris@42 679 T7S = T7Q + T7R;
Chris@42 680 T6p = FNMS(KP951056516, T5G, T5F);
Chris@42 681 T5H = FMA(KP951056516, T5G, T5F);
Chris@42 682 T7d = FNMS(KP951056516, T70, T6X);
Chris@42 683 T71 = FMA(KP951056516, T70, T6X);
Chris@42 684 ci[WS(rs, 24)] = T7S + T7P;
Chris@42 685 T7T = FNMS(KP250000000, T7S, T7P);
Chris@42 686 T5N = FMA(KP951056516, T5M, T5L);
Chris@42 687 T6u = FNMS(KP951056516, T5M, T5L);
Chris@42 688 {
Chris@42 689 E T66, T69, T7Z, T7V;
Chris@42 690 T6A = FMA(KP951056516, T65, T64);
Chris@42 691 T66 = FNMS(KP951056516, T65, T64);
Chris@42 692 T69 = FMA(KP951056516, T68, T67);
Chris@42 693 T6B = FNMS(KP951056516, T68, T67);
Chris@42 694 T7Z = FMA(KP559016994, T7U, T7T);
Chris@42 695 T7V = FNMS(KP559016994, T7U, T7T);
Chris@42 696 T5O = FMA(KP062914667, T5N, T5K);
Chris@42 697 T6j = FNMS(KP062914667, T5K, T5N);
Chris@42 698 ci[WS(rs, 14)] = FMA(KP951056516, T7Y, T7V);
Chris@42 699 cr[WS(rs, 15)] = FMS(KP951056516, T7Y, T7V);
Chris@42 700 ci[WS(rs, 19)] = FMA(KP951056516, T80, T7Z);
Chris@42 701 cr[WS(rs, 20)] = FMS(KP951056516, T80, T7Z);
Chris@42 702 T6h = FNMS(KP939062505, T66, T69);
Chris@42 703 T6a = FMA(KP939062505, T69, T66);
Chris@42 704 }
Chris@42 705 }
Chris@42 706 T6q = FMA(KP951056516, T5Q, T5P);
Chris@42 707 T5R = FNMS(KP951056516, T5Q, T5P);
Chris@42 708 T5U = FNMS(KP951056516, T5T, T5S);
Chris@42 709 T6r = FMA(KP951056516, T5T, T5S);
Chris@42 710 T6y = FMA(KP951056516, T5Y, T5X);
Chris@42 711 T5Z = FNMS(KP951056516, T5Y, T5X);
Chris@42 712 T62 = FMA(KP951056516, T61, T60);
Chris@42 713 T6x = FNMS(KP951056516, T61, T60);
Chris@42 714 {
Chris@42 715 E T5W, T6b, T6k, T5V;
Chris@42 716 T6k = FMA(KP827271945, T5R, T5U);
Chris@42 717 T5V = FNMS(KP827271945, T5U, T5R);
Chris@42 718 {
Chris@42 719 E T6g, T63, T7e, T7f;
Chris@42 720 T6g = FMA(KP126329378, T5Z, T62);
Chris@42 721 T63 = FNMS(KP126329378, T62, T5Z);
Chris@42 722 T7e = FMA(KP772036680, T6k, T6j);
Chris@42 723 T6l = FNMS(KP772036680, T6k, T6j);
Chris@42 724 T5W = FMA(KP772036680, T5V, T5O);
Chris@42 725 T7k = FNMS(KP772036680, T5V, T5O);
Chris@42 726 T7f = FNMS(KP734762448, T6h, T6g);
Chris@42 727 T6i = FMA(KP734762448, T6h, T6g);
Chris@42 728 T6b = FNMS(KP734762448, T6a, T63);
Chris@42 729 T7l = FMA(KP734762448, T6a, T63);
Chris@42 730 T7g = FMA(KP994076283, T7f, T7e);
Chris@42 731 T7i = FNMS(KP994076283, T7f, T7e);
Chris@42 732 }
Chris@42 733 T6c = FNMS(KP994076283, T6b, T5W);
Chris@42 734 T6e = FMA(KP994076283, T6b, T5W);
Chris@42 735 }
Chris@42 736 T6s = FMA(KP062914667, T6r, T6q);
Chris@42 737 T6L = FNMS(KP062914667, T6q, T6r);
Chris@42 738 T6J = FNMS(KP549754652, T6A, T6B);
Chris@42 739 T6C = FMA(KP549754652, T6B, T6A);
Chris@42 740 }
Chris@42 741 {
Chris@42 742 E T6N, T78, T6K, T79, T74, T76, T6E, T6G;
Chris@42 743 {
Chris@42 744 E T6w, T6D, T6M, T6v;
Chris@42 745 cr[WS(rs, 3)] = FMA(KP998026728, T6c, T5H);
Chris@42 746 T6M = FNMS(KP634619297, T6t, T6u);
Chris@42 747 T6v = FMA(KP634619297, T6u, T6t);
Chris@42 748 {
Chris@42 749 E T6I, T6z, T72, T73;
Chris@42 750 T6I = FMA(KP470564281, T6x, T6y);
Chris@42 751 T6z = FNMS(KP470564281, T6y, T6x);
Chris@42 752 T72 = FMA(KP845997307, T6M, T6L);
Chris@42 753 T6N = FNMS(KP845997307, T6M, T6L);
Chris@42 754 T6w = FMA(KP845997307, T6v, T6s);
Chris@42 755 T78 = FNMS(KP845997307, T6v, T6s);
Chris@42 756 T73 = FNMS(KP968479752, T6J, T6I);
Chris@42 757 T6K = FMA(KP968479752, T6J, T6I);
Chris@42 758 T6D = FMA(KP968479752, T6C, T6z);
Chris@42 759 T79 = FNMS(KP968479752, T6C, T6z);
Chris@42 760 T74 = FMA(KP906616052, T73, T72);
Chris@42 761 T76 = FNMS(KP906616052, T73, T72);
Chris@42 762 }
Chris@42 763 ci[WS(rs, 21)] = FNMS(KP998026728, T7g, T7d);
Chris@42 764 T6E = FMA(KP906616052, T6D, T6w);
Chris@42 765 T6G = FNMS(KP906616052, T6D, T6w);
Chris@42 766 }
Chris@42 767 {
Chris@42 768 E T7c, T7a, T6Q, T6O, T6F, T7b, T77, T75, T6d, T6P, T6H;
Chris@42 769 T7c = FMA(KP681693190, T78, T79);
Chris@42 770 T7a = FNMS(KP560319534, T79, T78);
Chris@42 771 ci[WS(rs, 22)] = FNMS(KP998026728, T74, T71);
Chris@42 772 cr[WS(rs, 2)] = FMA(KP998026728, T6E, T6p);
Chris@42 773 T75 = FMA(KP249506682, T74, T71);
Chris@42 774 T6Q = FNMS(KP560319534, T6K, T6N);
Chris@42 775 T6O = FMA(KP681693190, T6N, T6K);
Chris@42 776 T6F = FNMS(KP249506682, T6E, T6p);
Chris@42 777 T7b = FMA(KP557913902, T76, T75);
Chris@42 778 T77 = FNMS(KP557913902, T76, T75);
Chris@42 779 T6o = FMA(KP614372930, T6i, T6l);
Chris@42 780 T6m = FNMS(KP621716863, T6l, T6i);
Chris@42 781 cr[WS(rs, 22)] = FMS(KP860541664, T7c, T7b);
Chris@42 782 ci[WS(rs, 17)] = FMA(KP860541664, T7c, T7b);
Chris@42 783 ci[WS(rs, 12)] = FNMS(KP949179823, T7a, T77);
Chris@42 784 cr[WS(rs, 17)] = -(FMA(KP949179823, T7a, T77));
Chris@42 785 T6P = FMA(KP557913902, T6G, T6F);
Chris@42 786 T6H = FNMS(KP557913902, T6G, T6F);
Chris@42 787 T6d = FNMS(KP249506682, T6c, T5H);
Chris@42 788 ci[WS(rs, 7)] = FMA(KP949179823, T6Q, T6P);
Chris@42 789 cr[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P);
Chris@42 790 cr[WS(rs, 7)] = FMA(KP860541664, T6O, T6H);
Chris@42 791 ci[WS(rs, 2)] = FNMS(KP860541664, T6O, T6H);
Chris@42 792 T7o = FMA(KP621716863, T7k, T7l);
Chris@42 793 T7m = FNMS(KP614372930, T7l, T7k);
Chris@42 794 T7h = FMA(KP249506682, T7g, T7d);
Chris@42 795 T6n = FMA(KP557913902, T6e, T6d);
Chris@42 796 T6f = FNMS(KP557913902, T6e, T6d);
Chris@42 797 }
Chris@42 798 }
Chris@42 799 }
Chris@42 800 }
Chris@42 801 }
Chris@42 802 }
Chris@42 803 ci[WS(rs, 6)] = FNMS(KP949179823, T6o, T6n);
Chris@42 804 ci[WS(rs, 11)] = FMA(KP949179823, T6o, T6n);
Chris@42 805 cr[WS(rs, 8)] = FMA(KP943557151, T6m, T6f);
Chris@42 806 ci[WS(rs, 1)] = FNMS(KP943557151, T6m, T6f);
Chris@42 807 T7j = FNMS(KP557913902, T7i, T7h);
Chris@42 808 T7n = FMA(KP557913902, T7i, T7h);
Chris@42 809 cr[WS(rs, 23)] = -(FMA(KP943557151, T7o, T7n));
Chris@42 810 ci[WS(rs, 16)] = FNMS(KP943557151, T7o, T7n);
Chris@42 811 cr[WS(rs, 18)] = FMS(KP949179823, T7m, T7j);
Chris@42 812 cr[WS(rs, 13)] = -(FMA(KP949179823, T7m, T7j));
Chris@42 813 }
Chris@42 814 }
Chris@42 815 }
Chris@42 816
Chris@42 817 static const tw_instr twinstr[] = {
Chris@42 818 {TW_FULL, 1, 25},
Chris@42 819 {TW_NEXT, 1, 0}
Chris@42 820 };
Chris@42 821
Chris@42 822 static const hc2hc_desc desc = { 25, "hf_25", twinstr, &GENUS, {84, 48, 316, 0} };
Chris@42 823
Chris@42 824 void X(codelet_hf_25) (planner *p) {
Chris@42 825 X(khc2hc_register) (p, hf_25, &desc);
Chris@42 826 }
Chris@42 827 #else /* HAVE_FMA */
Chris@42 828
Chris@42 829 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 25 -dit -name hf_25 -include hf.h */
Chris@42 830
Chris@42 831 /*
Chris@42 832 * This function contains 400 FP additions, 280 FP multiplications,
Chris@42 833 * (or, 260 additions, 140 multiplications, 140 fused multiply/add),
Chris@42 834 * 101 stack variables, 20 constants, and 100 memory accesses
Chris@42 835 */
Chris@42 836 #include "hf.h"
Chris@42 837
Chris@42 838 static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 839 {
Chris@42 840 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 841 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 842 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 843 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 844 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 845 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 846 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 847 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 848 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 849 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@42 850 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 851 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 852 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@42 853 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@42 854 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 855 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 856 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 857 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 858 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 859 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 860 {
Chris@42 861 INT m;
Chris@42 862 for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@42 863 E T1, T6b, T2l, T6g, To, T2m, T6e, T6f, T6a, T6H, T2u, T4I, T2i, T60, T3S;
Chris@42 864 E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t;
Chris@42 865 E T4h, T4P, T1l, T5X, T37, T5v, T4k, T4T, T3e, T5w, T4j, T4W, T1P, T5Z, T3v;
Chris@42 866 E T5A, T4o, T54, T3C, T5z, T4n, T51;
Chris@42 867 {
Chris@42 868 E T6, T2o, Tb, T2p, Tc, T6c, Th, T2r, Tm, T2s, Tn, T6d;
Chris@42 869 T1 = cr[0];
Chris@42 870 T6b = ci[0];
Chris@42 871 {
Chris@42 872 E T3, T5, T2, T4;
Chris@42 873 T3 = cr[WS(rs, 5)];
Chris@42 874 T5 = ci[WS(rs, 5)];
Chris@42 875 T2 = W[8];
Chris@42 876 T4 = W[9];
Chris@42 877 T6 = FMA(T2, T3, T4 * T5);
Chris@42 878 T2o = FNMS(T4, T3, T2 * T5);
Chris@42 879 }
Chris@42 880 {
Chris@42 881 E T8, Ta, T7, T9;
Chris@42 882 T8 = cr[WS(rs, 20)];
Chris@42 883 Ta = ci[WS(rs, 20)];
Chris@42 884 T7 = W[38];
Chris@42 885 T9 = W[39];
Chris@42 886 Tb = FMA(T7, T8, T9 * Ta);
Chris@42 887 T2p = FNMS(T9, T8, T7 * Ta);
Chris@42 888 }
Chris@42 889 Tc = T6 + Tb;
Chris@42 890 T6c = T2o + T2p;
Chris@42 891 {
Chris@42 892 E Te, Tg, Td, Tf;
Chris@42 893 Te = cr[WS(rs, 10)];
Chris@42 894 Tg = ci[WS(rs, 10)];
Chris@42 895 Td = W[18];
Chris@42 896 Tf = W[19];
Chris@42 897 Th = FMA(Td, Te, Tf * Tg);
Chris@42 898 T2r = FNMS(Tf, Te, Td * Tg);
Chris@42 899 }
Chris@42 900 {
Chris@42 901 E Tj, Tl, Ti, Tk;
Chris@42 902 Tj = cr[WS(rs, 15)];
Chris@42 903 Tl = ci[WS(rs, 15)];
Chris@42 904 Ti = W[28];
Chris@42 905 Tk = W[29];
Chris@42 906 Tm = FMA(Ti, Tj, Tk * Tl);
Chris@42 907 T2s = FNMS(Tk, Tj, Ti * Tl);
Chris@42 908 }
Chris@42 909 Tn = Th + Tm;
Chris@42 910 T6d = T2r + T2s;
Chris@42 911 T2l = KP559016994 * (Tc - Tn);
Chris@42 912 T6g = KP559016994 * (T6c - T6d);
Chris@42 913 To = Tc + Tn;
Chris@42 914 T2m = FNMS(KP250000000, To, T1);
Chris@42 915 T6e = T6c + T6d;
Chris@42 916 T6f = FNMS(KP250000000, T6e, T6b);
Chris@42 917 {
Chris@42 918 E T68, T69, T2q, T2t;
Chris@42 919 T68 = Th - Tm;
Chris@42 920 T69 = T6 - Tb;
Chris@42 921 T6a = FNMS(KP587785252, T69, KP951056516 * T68);
Chris@42 922 T6H = FMA(KP951056516, T69, KP587785252 * T68);
Chris@42 923 T2q = T2o - T2p;
Chris@42 924 T2t = T2r - T2s;
Chris@42 925 T2u = FMA(KP951056516, T2q, KP587785252 * T2t);
Chris@42 926 T4I = FNMS(KP587785252, T2q, KP951056516 * T2t);
Chris@42 927 }
Chris@42 928 }
Chris@42 929 {
Chris@42 930 E T1U, T3O, T3E, T3F, T3X, T3W, T3J, T3M, T3P, T25, T2g, T2h;
Chris@42 931 {
Chris@42 932 E T1R, T1T, T1Q, T1S;
Chris@42 933 T1R = cr[WS(rs, 3)];
Chris@42 934 T1T = ci[WS(rs, 3)];
Chris@42 935 T1Q = W[4];
Chris@42 936 T1S = W[5];
Chris@42 937 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@42 938 T3O = FNMS(T1S, T1R, T1Q * T1T);
Chris@42 939 }
Chris@42 940 {
Chris@42 941 E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K;
Chris@42 942 {
Chris@42 943 E T1W, T1Y, T1V, T1X;
Chris@42 944 T1W = cr[WS(rs, 8)];
Chris@42 945 T1Y = ci[WS(rs, 8)];
Chris@42 946 T1V = W[14];
Chris@42 947 T1X = W[15];
Chris@42 948 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@42 949 T3H = FNMS(T1X, T1W, T1V * T1Y);
Chris@42 950 }
Chris@42 951 {
Chris@42 952 E T2c, T2e, T2b, T2d;
Chris@42 953 T2c = cr[WS(rs, 18)];
Chris@42 954 T2e = ci[WS(rs, 18)];
Chris@42 955 T2b = W[34];
Chris@42 956 T2d = W[35];
Chris@42 957 T2f = FMA(T2b, T2c, T2d * T2e);
Chris@42 958 T3L = FNMS(T2d, T2c, T2b * T2e);
Chris@42 959 }
Chris@42 960 {
Chris@42 961 E T21, T23, T20, T22;
Chris@42 962 T21 = cr[WS(rs, 23)];
Chris@42 963 T23 = ci[WS(rs, 23)];
Chris@42 964 T20 = W[44];
Chris@42 965 T22 = W[45];
Chris@42 966 T24 = FMA(T20, T21, T22 * T23);
Chris@42 967 T3I = FNMS(T22, T21, T20 * T23);
Chris@42 968 }
Chris@42 969 {
Chris@42 970 E T27, T29, T26, T28;
Chris@42 971 T27 = cr[WS(rs, 13)];
Chris@42 972 T29 = ci[WS(rs, 13)];
Chris@42 973 T26 = W[24];
Chris@42 974 T28 = W[25];
Chris@42 975 T2a = FMA(T26, T27, T28 * T29);
Chris@42 976 T3K = FNMS(T28, T27, T26 * T29);
Chris@42 977 }
Chris@42 978 T3E = T1Z - T24;
Chris@42 979 T3F = T2a - T2f;
Chris@42 980 T3X = T3K - T3L;
Chris@42 981 T3W = T3H - T3I;
Chris@42 982 T3J = T3H + T3I;
Chris@42 983 T3M = T3K + T3L;
Chris@42 984 T3P = T3J + T3M;
Chris@42 985 T25 = T1Z + T24;
Chris@42 986 T2g = T2a + T2f;
Chris@42 987 T2h = T25 + T2g;
Chris@42 988 }
Chris@42 989 T2i = T1U + T2h;
Chris@42 990 T60 = T3O + T3P;
Chris@42 991 {
Chris@42 992 E T3G, T57, T3R, T56, T3N, T3Q;
Chris@42 993 T3G = FMA(KP951056516, T3E, KP587785252 * T3F);
Chris@42 994 T57 = FNMS(KP587785252, T3E, KP951056516 * T3F);
Chris@42 995 T3N = KP559016994 * (T3J - T3M);
Chris@42 996 T3Q = FNMS(KP250000000, T3P, T3O);
Chris@42 997 T3R = T3N + T3Q;
Chris@42 998 T56 = T3Q - T3N;
Chris@42 999 T3S = T3G + T3R;
Chris@42 1000 T5D = T57 + T56;
Chris@42 1001 T4r = T3R - T3G;
Chris@42 1002 T58 = T56 - T57;
Chris@42 1003 }
Chris@42 1004 {
Chris@42 1005 E T3Y, T5a, T3V, T59, T3T, T3U;
Chris@42 1006 T3Y = FMA(KP951056516, T3W, KP587785252 * T3X);
Chris@42 1007 T5a = FNMS(KP587785252, T3W, KP951056516 * T3X);
Chris@42 1008 T3T = KP559016994 * (T25 - T2g);
Chris@42 1009 T3U = FNMS(KP250000000, T2h, T1U);
Chris@42 1010 T3V = T3T + T3U;
Chris@42 1011 T59 = T3U - T3T;
Chris@42 1012 T3Z = T3V - T3Y;
Chris@42 1013 T5C = T59 - T5a;
Chris@42 1014 T4q = T3V + T3Y;
Chris@42 1015 T5b = T59 + T5a;
Chris@42 1016 }
Chris@42 1017 }
Chris@42 1018 {
Chris@42 1019 E Tu, T2N, T2B, T2E, T2I, T2H, T2K, T2L, T2O, TF, TQ, TR;
Chris@42 1020 {
Chris@42 1021 E Tr, Tt, Tq, Ts;
Chris@42 1022 Tr = cr[WS(rs, 1)];
Chris@42 1023 Tt = ci[WS(rs, 1)];
Chris@42 1024 Tq = W[0];
Chris@42 1025 Ts = W[1];
Chris@42 1026 Tu = FMA(Tq, Tr, Ts * Tt);
Chris@42 1027 T2N = FNMS(Ts, Tr, Tq * Tt);
Chris@42 1028 }
Chris@42 1029 {
Chris@42 1030 E Tz, T2z, TP, T2D, TE, T2A, TK, T2C;
Chris@42 1031 {
Chris@42 1032 E Tw, Ty, Tv, Tx;
Chris@42 1033 Tw = cr[WS(rs, 6)];
Chris@42 1034 Ty = ci[WS(rs, 6)];
Chris@42 1035 Tv = W[10];
Chris@42 1036 Tx = W[11];
Chris@42 1037 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@42 1038 T2z = FNMS(Tx, Tw, Tv * Ty);
Chris@42 1039 }
Chris@42 1040 {
Chris@42 1041 E TM, TO, TL, TN;
Chris@42 1042 TM = cr[WS(rs, 16)];
Chris@42 1043 TO = ci[WS(rs, 16)];
Chris@42 1044 TL = W[30];
Chris@42 1045 TN = W[31];
Chris@42 1046 TP = FMA(TL, TM, TN * TO);
Chris@42 1047 T2D = FNMS(TN, TM, TL * TO);
Chris@42 1048 }
Chris@42 1049 {
Chris@42 1050 E TB, TD, TA, TC;
Chris@42 1051 TB = cr[WS(rs, 21)];
Chris@42 1052 TD = ci[WS(rs, 21)];
Chris@42 1053 TA = W[40];
Chris@42 1054 TC = W[41];
Chris@42 1055 TE = FMA(TA, TB, TC * TD);
Chris@42 1056 T2A = FNMS(TC, TB, TA * TD);
Chris@42 1057 }
Chris@42 1058 {
Chris@42 1059 E TH, TJ, TG, TI;
Chris@42 1060 TH = cr[WS(rs, 11)];
Chris@42 1061 TJ = ci[WS(rs, 11)];
Chris@42 1062 TG = W[20];
Chris@42 1063 TI = W[21];
Chris@42 1064 TK = FMA(TG, TH, TI * TJ);
Chris@42 1065 T2C = FNMS(TI, TH, TG * TJ);
Chris@42 1066 }
Chris@42 1067 T2B = T2z - T2A;
Chris@42 1068 T2E = T2C - T2D;
Chris@42 1069 T2I = TK - TP;
Chris@42 1070 T2H = Tz - TE;
Chris@42 1071 T2K = T2z + T2A;
Chris@42 1072 T2L = T2C + T2D;
Chris@42 1073 T2O = T2K + T2L;
Chris@42 1074 TF = Tz + TE;
Chris@42 1075 TQ = TK + TP;
Chris@42 1076 TR = TF + TQ;
Chris@42 1077 }
Chris@42 1078 TS = Tu + TR;
Chris@42 1079 T5W = T2N + T2O;
Chris@42 1080 {
Chris@42 1081 E T2F, T4L, T2y, T4K, T2w, T2x;
Chris@42 1082 T2F = FMA(KP951056516, T2B, KP587785252 * T2E);
Chris@42 1083 T4L = FNMS(KP587785252, T2B, KP951056516 * T2E);
Chris@42 1084 T2w = KP559016994 * (TF - TQ);
Chris@42 1085 T2x = FNMS(KP250000000, TR, Tu);
Chris@42 1086 T2y = T2w + T2x;
Chris@42 1087 T4K = T2x - T2w;
Chris@42 1088 T2G = T2y - T2F;
Chris@42 1089 T5s = T4K - T4L;
Chris@42 1090 T4g = T2y + T2F;
Chris@42 1091 T4M = T4K + T4L;
Chris@42 1092 }
Chris@42 1093 {
Chris@42 1094 E T2J, T4O, T2Q, T4N, T2M, T2P;
Chris@42 1095 T2J = FMA(KP951056516, T2H, KP587785252 * T2I);
Chris@42 1096 T4O = FNMS(KP587785252, T2H, KP951056516 * T2I);
Chris@42 1097 T2M = KP559016994 * (T2K - T2L);
Chris@42 1098 T2P = FNMS(KP250000000, T2O, T2N);
Chris@42 1099 T2Q = T2M + T2P;
Chris@42 1100 T4N = T2P - T2M;
Chris@42 1101 T2R = T2J + T2Q;
Chris@42 1102 T5t = T4O + T4N;
Chris@42 1103 T4h = T2Q - T2J;
Chris@42 1104 T4P = T4N - T4O;
Chris@42 1105 }
Chris@42 1106 }
Chris@42 1107 {
Chris@42 1108 E TX, T33, T2T, T2U, T3c, T3b, T2Y, T31, T34, T18, T1j, T1k;
Chris@42 1109 {
Chris@42 1110 E TU, TW, TT, TV;
Chris@42 1111 TU = cr[WS(rs, 4)];
Chris@42 1112 TW = ci[WS(rs, 4)];
Chris@42 1113 TT = W[6];
Chris@42 1114 TV = W[7];
Chris@42 1115 TX = FMA(TT, TU, TV * TW);
Chris@42 1116 T33 = FNMS(TV, TU, TT * TW);
Chris@42 1117 }
Chris@42 1118 {
Chris@42 1119 E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z;
Chris@42 1120 {
Chris@42 1121 E TZ, T11, TY, T10;
Chris@42 1122 TZ = cr[WS(rs, 9)];
Chris@42 1123 T11 = ci[WS(rs, 9)];
Chris@42 1124 TY = W[16];
Chris@42 1125 T10 = W[17];
Chris@42 1126 T12 = FMA(TY, TZ, T10 * T11);
Chris@42 1127 T2W = FNMS(T10, TZ, TY * T11);
Chris@42 1128 }
Chris@42 1129 {
Chris@42 1130 E T1f, T1h, T1e, T1g;
Chris@42 1131 T1f = cr[WS(rs, 19)];
Chris@42 1132 T1h = ci[WS(rs, 19)];
Chris@42 1133 T1e = W[36];
Chris@42 1134 T1g = W[37];
Chris@42 1135 T1i = FMA(T1e, T1f, T1g * T1h);
Chris@42 1136 T30 = FNMS(T1g, T1f, T1e * T1h);
Chris@42 1137 }
Chris@42 1138 {
Chris@42 1139 E T14, T16, T13, T15;
Chris@42 1140 T14 = cr[WS(rs, 24)];
Chris@42 1141 T16 = ci[WS(rs, 24)];
Chris@42 1142 T13 = W[46];
Chris@42 1143 T15 = W[47];
Chris@42 1144 T17 = FMA(T13, T14, T15 * T16);
Chris@42 1145 T2X = FNMS(T15, T14, T13 * T16);
Chris@42 1146 }
Chris@42 1147 {
Chris@42 1148 E T1a, T1c, T19, T1b;
Chris@42 1149 T1a = cr[WS(rs, 14)];
Chris@42 1150 T1c = ci[WS(rs, 14)];
Chris@42 1151 T19 = W[26];
Chris@42 1152 T1b = W[27];
Chris@42 1153 T1d = FMA(T19, T1a, T1b * T1c);
Chris@42 1154 T2Z = FNMS(T1b, T1a, T19 * T1c);
Chris@42 1155 }
Chris@42 1156 T2T = T17 - T12;
Chris@42 1157 T2U = T1d - T1i;
Chris@42 1158 T3c = T2Z - T30;
Chris@42 1159 T3b = T2W - T2X;
Chris@42 1160 T2Y = T2W + T2X;
Chris@42 1161 T31 = T2Z + T30;
Chris@42 1162 T34 = T2Y + T31;
Chris@42 1163 T18 = T12 + T17;
Chris@42 1164 T1j = T1d + T1i;
Chris@42 1165 T1k = T18 + T1j;
Chris@42 1166 }
Chris@42 1167 T1l = TX + T1k;
Chris@42 1168 T5X = T33 + T34;
Chris@42 1169 {
Chris@42 1170 E T2V, T4S, T36, T4R, T32, T35;
Chris@42 1171 T2V = FNMS(KP587785252, T2U, KP951056516 * T2T);
Chris@42 1172 T4S = FMA(KP587785252, T2T, KP951056516 * T2U);
Chris@42 1173 T32 = KP559016994 * (T2Y - T31);
Chris@42 1174 T35 = FNMS(KP250000000, T34, T33);
Chris@42 1175 T36 = T32 + T35;
Chris@42 1176 T4R = T35 - T32;
Chris@42 1177 T37 = T2V - T36;
Chris@42 1178 T5v = T4S + T4R;
Chris@42 1179 T4k = T2V + T36;
Chris@42 1180 T4T = T4R - T4S;
Chris@42 1181 }
Chris@42 1182 {
Chris@42 1183 E T3d, T4V, T3a, T4U, T38, T39;
Chris@42 1184 T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
Chris@42 1185 T4V = FNMS(KP587785252, T3b, KP951056516 * T3c);
Chris@42 1186 T38 = KP559016994 * (T18 - T1j);
Chris@42 1187 T39 = FNMS(KP250000000, T1k, TX);
Chris@42 1188 T3a = T38 + T39;
Chris@42 1189 T4U = T39 - T38;
Chris@42 1190 T3e = T3a - T3d;
Chris@42 1191 T5w = T4U - T4V;
Chris@42 1192 T4j = T3a + T3d;
Chris@42 1193 T4W = T4U + T4V;
Chris@42 1194 }
Chris@42 1195 }
Chris@42 1196 {
Chris@42 1197 E T1r, T3r, T3h, T3i, T3A, T3z, T3m, T3p, T3s, T1C, T1N, T1O;
Chris@42 1198 {
Chris@42 1199 E T1o, T1q, T1n, T1p;
Chris@42 1200 T1o = cr[WS(rs, 2)];
Chris@42 1201 T1q = ci[WS(rs, 2)];
Chris@42 1202 T1n = W[2];
Chris@42 1203 T1p = W[3];
Chris@42 1204 T1r = FMA(T1n, T1o, T1p * T1q);
Chris@42 1205 T3r = FNMS(T1p, T1o, T1n * T1q);
Chris@42 1206 }
Chris@42 1207 {
Chris@42 1208 E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n;
Chris@42 1209 {
Chris@42 1210 E T1t, T1v, T1s, T1u;
Chris@42 1211 T1t = cr[WS(rs, 7)];
Chris@42 1212 T1v = ci[WS(rs, 7)];
Chris@42 1213 T1s = W[12];
Chris@42 1214 T1u = W[13];
Chris@42 1215 T1w = FMA(T1s, T1t, T1u * T1v);
Chris@42 1216 T3k = FNMS(T1u, T1t, T1s * T1v);
Chris@42 1217 }
Chris@42 1218 {
Chris@42 1219 E T1J, T1L, T1I, T1K;
Chris@42 1220 T1J = cr[WS(rs, 17)];
Chris@42 1221 T1L = ci[WS(rs, 17)];
Chris@42 1222 T1I = W[32];
Chris@42 1223 T1K = W[33];
Chris@42 1224 T1M = FMA(T1I, T1J, T1K * T1L);
Chris@42 1225 T3o = FNMS(T1K, T1J, T1I * T1L);
Chris@42 1226 }
Chris@42 1227 {
Chris@42 1228 E T1y, T1A, T1x, T1z;
Chris@42 1229 T1y = cr[WS(rs, 22)];
Chris@42 1230 T1A = ci[WS(rs, 22)];
Chris@42 1231 T1x = W[42];
Chris@42 1232 T1z = W[43];
Chris@42 1233 T1B = FMA(T1x, T1y, T1z * T1A);
Chris@42 1234 T3l = FNMS(T1z, T1y, T1x * T1A);
Chris@42 1235 }
Chris@42 1236 {
Chris@42 1237 E T1E, T1G, T1D, T1F;
Chris@42 1238 T1E = cr[WS(rs, 12)];
Chris@42 1239 T1G = ci[WS(rs, 12)];
Chris@42 1240 T1D = W[22];
Chris@42 1241 T1F = W[23];
Chris@42 1242 T1H = FMA(T1D, T1E, T1F * T1G);
Chris@42 1243 T3n = FNMS(T1F, T1E, T1D * T1G);
Chris@42 1244 }
Chris@42 1245 T3h = T1w - T1B;
Chris@42 1246 T3i = T1H - T1M;
Chris@42 1247 T3A = T3n - T3o;
Chris@42 1248 T3z = T3k - T3l;
Chris@42 1249 T3m = T3k + T3l;
Chris@42 1250 T3p = T3n + T3o;
Chris@42 1251 T3s = T3m + T3p;
Chris@42 1252 T1C = T1w + T1B;
Chris@42 1253 T1N = T1H + T1M;
Chris@42 1254 T1O = T1C + T1N;
Chris@42 1255 }
Chris@42 1256 T1P = T1r + T1O;
Chris@42 1257 T5Z = T3r + T3s;
Chris@42 1258 {
Chris@42 1259 E T3j, T53, T3u, T52, T3q, T3t;
Chris@42 1260 T3j = FMA(KP951056516, T3h, KP587785252 * T3i);
Chris@42 1261 T53 = FNMS(KP587785252, T3h, KP951056516 * T3i);
Chris@42 1262 T3q = KP559016994 * (T3m - T3p);
Chris@42 1263 T3t = FNMS(KP250000000, T3s, T3r);
Chris@42 1264 T3u = T3q + T3t;
Chris@42 1265 T52 = T3t - T3q;
Chris@42 1266 T3v = T3j + T3u;
Chris@42 1267 T5A = T53 + T52;
Chris@42 1268 T4o = T3u - T3j;
Chris@42 1269 T54 = T52 - T53;
Chris@42 1270 }
Chris@42 1271 {
Chris@42 1272 E T3B, T50, T3y, T4Z, T3w, T3x;
Chris@42 1273 T3B = FMA(KP951056516, T3z, KP587785252 * T3A);
Chris@42 1274 T50 = FNMS(KP587785252, T3z, KP951056516 * T3A);
Chris@42 1275 T3w = KP559016994 * (T1C - T1N);
Chris@42 1276 T3x = FNMS(KP250000000, T1O, T1r);
Chris@42 1277 T3y = T3w + T3x;
Chris@42 1278 T4Z = T3x - T3w;
Chris@42 1279 T3C = T3y - T3B;
Chris@42 1280 T5z = T4Z - T50;
Chris@42 1281 T4n = T3y + T3B;
Chris@42 1282 T51 = T4Z + T50;
Chris@42 1283 }
Chris@42 1284 }
Chris@42 1285 {
Chris@42 1286 E T62, T64, Tp, T2k, T5T, T5U, T63, T5V;
Chris@42 1287 {
Chris@42 1288 E T5Y, T61, T1m, T2j;
Chris@42 1289 T5Y = T5W - T5X;
Chris@42 1290 T61 = T5Z - T60;
Chris@42 1291 T62 = FMA(KP951056516, T5Y, KP587785252 * T61);
Chris@42 1292 T64 = FNMS(KP587785252, T5Y, KP951056516 * T61);
Chris@42 1293 Tp = T1 + To;
Chris@42 1294 T1m = TS + T1l;
Chris@42 1295 T2j = T1P + T2i;
Chris@42 1296 T2k = T1m + T2j;
Chris@42 1297 T5T = KP559016994 * (T1m - T2j);
Chris@42 1298 T5U = FNMS(KP250000000, T2k, Tp);
Chris@42 1299 }
Chris@42 1300 cr[0] = Tp + T2k;
Chris@42 1301 T63 = T5U - T5T;
Chris@42 1302 cr[WS(rs, 10)] = T63 - T64;
Chris@42 1303 ci[WS(rs, 9)] = T63 + T64;
Chris@42 1304 T5V = T5T + T5U;
Chris@42 1305 ci[WS(rs, 4)] = T5V - T62;
Chris@42 1306 cr[WS(rs, 5)] = T5V + T62;
Chris@42 1307 }
Chris@42 1308 {
Chris@42 1309 E T2v, T4f, T6I, T6U, T42, T6Z, T43, T6Y, T4A, T6N, T4D, T6L, T4u, T6E, T4v;
Chris@42 1310 E T6D, T48, T6V, T4b, T6T, T2n, T6G;
Chris@42 1311 T2n = T2l + T2m;
Chris@42 1312 T2v = T2n - T2u;
Chris@42 1313 T4f = T2n + T2u;
Chris@42 1314 T6G = T6g + T6f;
Chris@42 1315 T6I = T6G - T6H;
Chris@42 1316 T6U = T6H + T6G;
Chris@42 1317 {
Chris@42 1318 E T2S, T3f, T3g, T3D, T40, T41;
Chris@42 1319 T2S = FMA(KP535826794, T2G, KP844327925 * T2R);
Chris@42 1320 T3f = FNMS(KP637423989, T3e, KP770513242 * T37);
Chris@42 1321 T3g = T2S + T3f;
Chris@42 1322 T3D = FNMS(KP425779291, T3C, KP904827052 * T3v);
Chris@42 1323 T40 = FNMS(KP992114701, T3Z, KP125333233 * T3S);
Chris@42 1324 T41 = T3D + T40;
Chris@42 1325 T42 = T3g + T41;
Chris@42 1326 T6Z = T3D - T40;
Chris@42 1327 T43 = KP559016994 * (T3g - T41);
Chris@42 1328 T6Y = T3f - T2S;
Chris@42 1329 }
Chris@42 1330 {
Chris@42 1331 E T4y, T4z, T6J, T4B, T4C, T6K;
Chris@42 1332 T4y = FNMS(KP248689887, T4g, KP968583161 * T4h);
Chris@42 1333 T4z = FNMS(KP844327925, T4j, KP535826794 * T4k);
Chris@42 1334 T6J = T4y + T4z;
Chris@42 1335 T4B = FNMS(KP481753674, T4n, KP876306680 * T4o);
Chris@42 1336 T4C = FNMS(KP684547105, T4q, KP728968627 * T4r);
Chris@42 1337 T6K = T4B + T4C;
Chris@42 1338 T4A = T4y - T4z;
Chris@42 1339 T6N = KP559016994 * (T6J - T6K);
Chris@42 1340 T4D = T4B - T4C;
Chris@42 1341 T6L = T6J + T6K;
Chris@42 1342 }
Chris@42 1343 {
Chris@42 1344 E T4i, T4l, T4m, T4p, T4s, T4t;
Chris@42 1345 T4i = FMA(KP968583161, T4g, KP248689887 * T4h);
Chris@42 1346 T4l = FMA(KP535826794, T4j, KP844327925 * T4k);
Chris@42 1347 T4m = T4i + T4l;
Chris@42 1348 T4p = FMA(KP876306680, T4n, KP481753674 * T4o);
Chris@42 1349 T4s = FMA(KP728968627, T4q, KP684547105 * T4r);
Chris@42 1350 T4t = T4p + T4s;
Chris@42 1351 T4u = T4m + T4t;
Chris@42 1352 T6E = T4p - T4s;
Chris@42 1353 T4v = KP559016994 * (T4m - T4t);
Chris@42 1354 T6D = T4l - T4i;
Chris@42 1355 }
Chris@42 1356 {
Chris@42 1357 E T46, T47, T6R, T49, T4a, T6S;
Chris@42 1358 T46 = FNMS(KP844327925, T2G, KP535826794 * T2R);
Chris@42 1359 T47 = FMA(KP770513242, T3e, KP637423989 * T37);
Chris@42 1360 T6R = T46 + T47;
Chris@42 1361 T49 = FMA(KP125333233, T3Z, KP992114701 * T3S);
Chris@42 1362 T4a = FMA(KP904827052, T3C, KP425779291 * T3v);
Chris@42 1363 T6S = T4a + T49;
Chris@42 1364 T48 = T46 - T47;
Chris@42 1365 T6V = T6R - T6S;
Chris@42 1366 T4b = T49 - T4a;
Chris@42 1367 T6T = KP559016994 * (T6R + T6S);
Chris@42 1368 }
Chris@42 1369 cr[WS(rs, 4)] = T2v + T42;
Chris@42 1370 ci[WS(rs, 23)] = T6L + T6I;
Chris@42 1371 ci[WS(rs, 20)] = T6V + T6U;
Chris@42 1372 cr[WS(rs, 1)] = T4f + T4u;
Chris@42 1373 {
Chris@42 1374 E T4c, T4e, T45, T4d, T44;
Chris@42 1375 T4c = FMA(KP951056516, T48, KP587785252 * T4b);
Chris@42 1376 T4e = FNMS(KP587785252, T48, KP951056516 * T4b);
Chris@42 1377 T44 = FNMS(KP250000000, T42, T2v);
Chris@42 1378 T45 = T43 + T44;
Chris@42 1379 T4d = T44 - T43;
Chris@42 1380 ci[0] = T45 - T4c;
Chris@42 1381 ci[WS(rs, 5)] = T4d + T4e;
Chris@42 1382 cr[WS(rs, 9)] = T45 + T4c;
Chris@42 1383 ci[WS(rs, 10)] = T4d - T4e;
Chris@42 1384 }
Chris@42 1385 {
Chris@42 1386 E T6F, T6P, T6O, T6Q, T6M;
Chris@42 1387 T6F = FMA(KP587785252, T6D, KP951056516 * T6E);
Chris@42 1388 T6P = FNMS(KP587785252, T6E, KP951056516 * T6D);
Chris@42 1389 T6M = FNMS(KP250000000, T6L, T6I);
Chris@42 1390 T6O = T6M - T6N;
Chris@42 1391 T6Q = T6N + T6M;
Chris@42 1392 cr[WS(rs, 16)] = T6F - T6O;
Chris@42 1393 ci[WS(rs, 18)] = T6P + T6Q;
Chris@42 1394 ci[WS(rs, 13)] = T6F + T6O;
Chris@42 1395 cr[WS(rs, 21)] = T6P - T6Q;
Chris@42 1396 }
Chris@42 1397 {
Chris@42 1398 E T70, T71, T6X, T72, T6W;
Chris@42 1399 T70 = FMA(KP587785252, T6Y, KP951056516 * T6Z);
Chris@42 1400 T71 = FNMS(KP587785252, T6Z, KP951056516 * T6Y);
Chris@42 1401 T6W = FNMS(KP250000000, T6V, T6U);
Chris@42 1402 T6X = T6T - T6W;
Chris@42 1403 T72 = T6T + T6W;
Chris@42 1404 cr[WS(rs, 14)] = T6X - T70;
Chris@42 1405 ci[WS(rs, 15)] = T71 + T72;
Chris@42 1406 cr[WS(rs, 19)] = T70 + T6X;
Chris@42 1407 cr[WS(rs, 24)] = T71 - T72;
Chris@42 1408 }
Chris@42 1409 {
Chris@42 1410 E T4E, T4G, T4x, T4F, T4w;
Chris@42 1411 T4E = FMA(KP951056516, T4A, KP587785252 * T4D);
Chris@42 1412 T4G = FNMS(KP587785252, T4A, KP951056516 * T4D);
Chris@42 1413 T4w = FNMS(KP250000000, T4u, T4f);
Chris@42 1414 T4x = T4v + T4w;
Chris@42 1415 T4F = T4w - T4v;
Chris@42 1416 ci[WS(rs, 3)] = T4x - T4E;
Chris@42 1417 ci[WS(rs, 8)] = T4F + T4G;
Chris@42 1418 cr[WS(rs, 6)] = T4x + T4E;
Chris@42 1419 cr[WS(rs, 11)] = T4F - T4G;
Chris@42 1420 }
Chris@42 1421 }
Chris@42 1422 {
Chris@42 1423 E T75, T7d, T76, T79, T7a, T7b, T7e, T7c;
Chris@42 1424 {
Chris@42 1425 E T73, T74, T77, T78;
Chris@42 1426 T73 = T1l - TS;
Chris@42 1427 T74 = T1P - T2i;
Chris@42 1428 T75 = FMA(KP587785252, T73, KP951056516 * T74);
Chris@42 1429 T7d = FNMS(KP587785252, T74, KP951056516 * T73);
Chris@42 1430 T76 = T6e + T6b;
Chris@42 1431 T77 = T5W + T5X;
Chris@42 1432 T78 = T5Z + T60;
Chris@42 1433 T79 = T77 + T78;
Chris@42 1434 T7a = FNMS(KP250000000, T79, T76);
Chris@42 1435 T7b = KP559016994 * (T77 - T78);
Chris@42 1436 }
Chris@42 1437 ci[WS(rs, 24)] = T79 + T76;
Chris@42 1438 T7e = T7b + T7a;
Chris@42 1439 cr[WS(rs, 20)] = T7d - T7e;
Chris@42 1440 ci[WS(rs, 19)] = T7d + T7e;
Chris@42 1441 T7c = T7a - T7b;
Chris@42 1442 cr[WS(rs, 15)] = T75 - T7c;
Chris@42 1443 ci[WS(rs, 14)] = T75 + T7c;
Chris@42 1444 }
Chris@42 1445 {
Chris@42 1446 E T4J, T5r, T6i, T6u, T5e, T6z, T5f, T6y, T5M, T6n, T5P, T6l, T5G, T66, T5H;
Chris@42 1447 E T65, T5k, T6v, T5n, T6t, T4H, T6h;
Chris@42 1448 T4H = T2m - T2l;
Chris@42 1449 T4J = T4H + T4I;
Chris@42 1450 T5r = T4H - T4I;
Chris@42 1451 T6h = T6f - T6g;
Chris@42 1452 T6i = T6a + T6h;
Chris@42 1453 T6u = T6h - T6a;
Chris@42 1454 {
Chris@42 1455 E T4Q, T4X, T4Y, T55, T5c, T5d;
Chris@42 1456 T4Q = FMA(KP728968627, T4M, KP684547105 * T4P);
Chris@42 1457 T4X = FNMS(KP992114701, T4W, KP125333233 * T4T);
Chris@42 1458 T4Y = T4Q + T4X;
Chris@42 1459 T55 = FMA(KP062790519, T51, KP998026728 * T54);
Chris@42 1460 T5c = FNMS(KP637423989, T5b, KP770513242 * T58);
Chris@42 1461 T5d = T55 + T5c;
Chris@42 1462 T5e = T4Y + T5d;
Chris@42 1463 T6z = T55 - T5c;
Chris@42 1464 T5f = KP559016994 * (T4Y - T5d);
Chris@42 1465 T6y = T4X - T4Q;
Chris@42 1466 }
Chris@42 1467 {
Chris@42 1468 E T5K, T5L, T6j, T5N, T5O, T6k;
Chris@42 1469 T5K = FNMS(KP481753674, T5s, KP876306680 * T5t);
Chris@42 1470 T5L = FMA(KP904827052, T5w, KP425779291 * T5v);
Chris@42 1471 T6j = T5K - T5L;
Chris@42 1472 T5N = FNMS(KP844327925, T5z, KP535826794 * T5A);
Chris@42 1473 T5O = FNMS(KP998026728, T5C, KP062790519 * T5D);
Chris@42 1474 T6k = T5N + T5O;
Chris@42 1475 T5M = T5K + T5L;
Chris@42 1476 T6n = KP559016994 * (T6j - T6k);
Chris@42 1477 T5P = T5N - T5O;
Chris@42 1478 T6l = T6j + T6k;
Chris@42 1479 }
Chris@42 1480 {
Chris@42 1481 E T5u, T5x, T5y, T5B, T5E, T5F;
Chris@42 1482 T5u = FMA(KP876306680, T5s, KP481753674 * T5t);
Chris@42 1483 T5x = FNMS(KP425779291, T5w, KP904827052 * T5v);
Chris@42 1484 T5y = T5u + T5x;
Chris@42 1485 T5B = FMA(KP535826794, T5z, KP844327925 * T5A);
Chris@42 1486 T5E = FMA(KP062790519, T5C, KP998026728 * T5D);
Chris@42 1487 T5F = T5B + T5E;
Chris@42 1488 T5G = T5y + T5F;
Chris@42 1489 T66 = T5B - T5E;
Chris@42 1490 T5H = KP559016994 * (T5y - T5F);
Chris@42 1491 T65 = T5x - T5u;
Chris@42 1492 }
Chris@42 1493 {
Chris@42 1494 E T5i, T5j, T6r, T5l, T5m, T6s;
Chris@42 1495 T5i = FNMS(KP684547105, T4M, KP728968627 * T4P);
Chris@42 1496 T5j = FMA(KP125333233, T4W, KP992114701 * T4T);
Chris@42 1497 T6r = T5i - T5j;
Chris@42 1498 T5l = FNMS(KP998026728, T51, KP062790519 * T54);
Chris@42 1499 T5m = FMA(KP770513242, T5b, KP637423989 * T58);
Chris@42 1500 T6s = T5l - T5m;
Chris@42 1501 T5k = T5i + T5j;
Chris@42 1502 T6v = T6r + T6s;
Chris@42 1503 T5n = T5l + T5m;
Chris@42 1504 T6t = KP559016994 * (T6r - T6s);
Chris@42 1505 }
Chris@42 1506 cr[WS(rs, 3)] = T4J + T5e;
Chris@42 1507 ci[WS(rs, 22)] = T6l + T6i;
Chris@42 1508 ci[WS(rs, 21)] = T6v + T6u;
Chris@42 1509 cr[WS(rs, 2)] = T5r + T5G;
Chris@42 1510 {
Chris@42 1511 E T67, T6p, T6o, T6q, T6m;
Chris@42 1512 T67 = FMA(KP587785252, T65, KP951056516 * T66);
Chris@42 1513 T6p = FNMS(KP587785252, T66, KP951056516 * T65);
Chris@42 1514 T6m = FNMS(KP250000000, T6l, T6i);
Chris@42 1515 T6o = T6m - T6n;
Chris@42 1516 T6q = T6n + T6m;
Chris@42 1517 cr[WS(rs, 17)] = T67 - T6o;
Chris@42 1518 ci[WS(rs, 17)] = T6p + T6q;
Chris@42 1519 ci[WS(rs, 12)] = T67 + T6o;
Chris@42 1520 cr[WS(rs, 22)] = T6p - T6q;
Chris@42 1521 }
Chris@42 1522 {
Chris@42 1523 E T5Q, T5S, T5J, T5R, T5I;
Chris@42 1524 T5Q = FMA(KP951056516, T5M, KP587785252 * T5P);
Chris@42 1525 T5S = FNMS(KP587785252, T5M, KP951056516 * T5P);
Chris@42 1526 T5I = FNMS(KP250000000, T5G, T5r);
Chris@42 1527 T5J = T5H + T5I;
Chris@42 1528 T5R = T5I - T5H;
Chris@42 1529 ci[WS(rs, 2)] = T5J - T5Q;
Chris@42 1530 ci[WS(rs, 7)] = T5R + T5S;
Chris@42 1531 cr[WS(rs, 7)] = T5J + T5Q;
Chris@42 1532 cr[WS(rs, 12)] = T5R - T5S;
Chris@42 1533 }
Chris@42 1534 {
Chris@42 1535 E T5o, T5q, T5h, T5p, T5g;
Chris@42 1536 T5o = FMA(KP951056516, T5k, KP587785252 * T5n);
Chris@42 1537 T5q = FNMS(KP587785252, T5k, KP951056516 * T5n);
Chris@42 1538 T5g = FNMS(KP250000000, T5e, T4J);
Chris@42 1539 T5h = T5f + T5g;
Chris@42 1540 T5p = T5g - T5f;
Chris@42 1541 ci[WS(rs, 1)] = T5h - T5o;
Chris@42 1542 ci[WS(rs, 6)] = T5p + T5q;
Chris@42 1543 cr[WS(rs, 8)] = T5h + T5o;
Chris@42 1544 ci[WS(rs, 11)] = T5p - T5q;
Chris@42 1545 }
Chris@42 1546 {
Chris@42 1547 E T6A, T6B, T6x, T6C, T6w;
Chris@42 1548 T6A = FMA(KP587785252, T6y, KP951056516 * T6z);
Chris@42 1549 T6B = FNMS(KP587785252, T6z, KP951056516 * T6y);
Chris@42 1550 T6w = FNMS(KP250000000, T6v, T6u);
Chris@42 1551 T6x = T6t - T6w;
Chris@42 1552 T6C = T6t + T6w;
Chris@42 1553 cr[WS(rs, 13)] = T6x - T6A;
Chris@42 1554 ci[WS(rs, 16)] = T6B + T6C;
Chris@42 1555 cr[WS(rs, 18)] = T6A + T6x;
Chris@42 1556 cr[WS(rs, 23)] = T6B - T6C;
Chris@42 1557 }
Chris@42 1558 }
Chris@42 1559 }
Chris@42 1560 }
Chris@42 1561 }
Chris@42 1562
Chris@42 1563 static const tw_instr twinstr[] = {
Chris@42 1564 {TW_FULL, 1, 25},
Chris@42 1565 {TW_NEXT, 1, 0}
Chris@42 1566 };
Chris@42 1567
Chris@42 1568 static const hc2hc_desc desc = { 25, "hf_25", twinstr, &GENUS, {260, 140, 140, 0} };
Chris@42 1569
Chris@42 1570 void X(codelet_hf_25) (planner *p) {
Chris@42 1571 X(khc2hc_register) (p, hf_25, &desc);
Chris@42 1572 }
Chris@42 1573 #endif /* HAVE_FMA */