annotate src/fftw-3.3.3/dft/scalar/codelets/t1_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:35:54 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -name t1_25 -include t.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 400 FP additions, 364 FP multiplications,
Chris@10 32 * (or, 84 additions, 48 multiplications, 316 fused multiply/add),
Chris@10 33 * 181 stack variables, 47 constants, and 100 memory accesses
Chris@10 34 */
Chris@10 35 #include "t.h"
Chris@10 36
Chris@10 37 static void t1_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 38 {
Chris@10 39 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@10 40 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@10 41 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@10 42 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@10 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@10 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@10 45 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@10 46 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@10 47 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@10 48 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@10 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@10 50 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@10 52 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@10 53 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@10 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@10 55 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@10 56 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@10 57 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@10 58 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@10 59 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@10 60 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@10 61 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@10 62 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@10 63 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@10 64 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 65 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@10 66 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@10 67 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@10 68 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@10 69 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@10 70 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@10 71 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 72 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@10 73 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@10 74 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@10 75 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@10 76 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@10 77 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@10 78 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@10 79 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@10 80 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@10 81 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@10 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@10 86 {
Chris@10 87 INT m;
Chris@10 88 for (m = mb, W = W + (mb * 48); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@10 89 E T7I, T6Q, T6O, T7O, T7M, T7H, T6P, T6H, T7J, T7N;
Chris@10 90 {
Chris@10 91 E T78, T5G, T3Y, T3M, T7C, T7c, T77, T6Y, Tt, T3L, T5T, T4P, T5Q, T4W, T3G;
Chris@10 92 E T2G, T5P, T4T, T5S, T4M, T65, T45, T68, T4c, T2Z, T11, T67, T49, T64, T42;
Chris@10 93 E T5Y, T4r, T61, T4k, T3d, T1z, T60, T4h, T5X, T4o, T3g, T1G, T3q, T4z, T4G;
Chris@10 94 E T26, T3i, T1M, T3k, T1S;
Chris@10 95 {
Chris@10 96 E T3u, T2e, T3E, T4O, T4V, T2E, T3w, T2k, T3y, T2q;
Chris@10 97 {
Chris@10 98 E T1, T6X, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta;
Chris@10 99 {
Chris@10 100 E T3, T6, T2, T5;
Chris@10 101 T1 = ri[0];
Chris@10 102 T6X = ii[0];
Chris@10 103 T3 = ri[WS(rs, 5)];
Chris@10 104 T6 = ii[WS(rs, 5)];
Chris@10 105 T2 = W[8];
Chris@10 106 T5 = W[9];
Chris@10 107 {
Chris@10 108 E Tm, Tp, To, T3V, Tn, T3O, T4, Tl;
Chris@10 109 Tm = ri[WS(rs, 15)];
Chris@10 110 Tp = ii[WS(rs, 15)];
Chris@10 111 T3O = T2 * T6;
Chris@10 112 T4 = T2 * T3;
Chris@10 113 Tl = W[28];
Chris@10 114 To = W[29];
Chris@10 115 T3P = FNMS(T5, T3, T3O);
Chris@10 116 T7 = FMA(T5, T6, T4);
Chris@10 117 T3V = Tl * Tp;
Chris@10 118 Tn = Tl * Tm;
Chris@10 119 {
Chris@10 120 E Tg, Tj, Tf, Ti, T3T, Th, T8;
Chris@10 121 Tg = ri[WS(rs, 10)];
Chris@10 122 Tj = ii[WS(rs, 10)];
Chris@10 123 T3W = FNMS(To, Tm, T3V);
Chris@10 124 Tq = FMA(To, Tp, Tn);
Chris@10 125 Tf = W[18];
Chris@10 126 Ti = W[19];
Chris@10 127 T9 = ri[WS(rs, 20)];
Chris@10 128 Tc = ii[WS(rs, 20)];
Chris@10 129 T3T = Tf * Tj;
Chris@10 130 Th = Tf * Tg;
Chris@10 131 T8 = W[38];
Chris@10 132 Tb = W[39];
Chris@10 133 T3U = FNMS(Ti, Tg, T3T);
Chris@10 134 Tk = FMA(Ti, Tj, Th);
Chris@10 135 T3Q = T8 * Tc;
Chris@10 136 Ta = T8 * T9;
Chris@10 137 }
Chris@10 138 }
Chris@10 139 }
Chris@10 140 {
Chris@10 141 E T6V, T3X, T7b, Tr, T3R, Td;
Chris@10 142 T6V = T3U + T3W;
Chris@10 143 T3X = T3U - T3W;
Chris@10 144 T7b = Tk - Tq;
Chris@10 145 Tr = Tk + Tq;
Chris@10 146 T3R = FNMS(Tb, T9, T3Q);
Chris@10 147 Td = FMA(Tb, Tc, Ta);
Chris@10 148 {
Chris@10 149 E T3S, T7a, Te, T6W, T6U, Ts;
Chris@10 150 T3S = T3P - T3R;
Chris@10 151 T6U = T3P + T3R;
Chris@10 152 T7a = T7 - Td;
Chris@10 153 Te = T7 + Td;
Chris@10 154 T78 = T6U - T6V;
Chris@10 155 T6W = T6U + T6V;
Chris@10 156 T5G = FNMS(KP618033988, T3S, T3X);
Chris@10 157 T3Y = FMA(KP618033988, T3X, T3S);
Chris@10 158 T3M = Te - Tr;
Chris@10 159 Ts = Te + Tr;
Chris@10 160 T7C = FNMS(KP618033988, T7a, T7b);
Chris@10 161 T7c = FMA(KP618033988, T7b, T7a);
Chris@10 162 T77 = FNMS(KP250000000, T6W, T6X);
Chris@10 163 T6Y = T6W + T6X;
Chris@10 164 Tt = T1 + Ts;
Chris@10 165 T3L = FNMS(KP250000000, Ts, T1);
Chris@10 166 }
Chris@10 167 }
Chris@10 168 }
Chris@10 169 {
Chris@10 170 E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n;
Chris@10 171 {
Chris@10 172 E T2a, T2d, T29, T2c;
Chris@10 173 T2a = ri[WS(rs, 3)];
Chris@10 174 T2d = ii[WS(rs, 3)];
Chris@10 175 T29 = W[4];
Chris@10 176 T2c = W[5];
Chris@10 177 {
Chris@10 178 E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f;
Chris@10 179 T2t = ri[WS(rs, 13)];
Chris@10 180 T2w = ii[WS(rs, 13)];
Chris@10 181 T3t = T29 * T2d;
Chris@10 182 T2b = T29 * T2a;
Chris@10 183 T2s = W[24];
Chris@10 184 T2z = ri[WS(rs, 18)];
Chris@10 185 T3u = FNMS(T2c, T2a, T3t);
Chris@10 186 T2e = FMA(T2c, T2d, T2b);
Chris@10 187 T3A = T2s * T2w;
Chris@10 188 T2u = T2s * T2t;
Chris@10 189 T2C = ii[WS(rs, 18)];
Chris@10 190 T2y = W[34];
Chris@10 191 T2v = W[25];
Chris@10 192 T2B = W[35];
Chris@10 193 {
Chris@10 194 E T3B, T2x, T3D, T2D, T3C, T2A;
Chris@10 195 T2g = ri[WS(rs, 8)];
Chris@10 196 T3C = T2y * T2C;
Chris@10 197 T2A = T2y * T2z;
Chris@10 198 T3B = FNMS(T2v, T2t, T3A);
Chris@10 199 T2x = FMA(T2v, T2w, T2u);
Chris@10 200 T3D = FNMS(T2B, T2z, T3C);
Chris@10 201 T2D = FMA(T2B, T2C, T2A);
Chris@10 202 T2j = ii[WS(rs, 8)];
Chris@10 203 T2f = W[14];
Chris@10 204 T3E = T3B + T3D;
Chris@10 205 T4O = T3D - T3B;
Chris@10 206 T4V = T2x - T2D;
Chris@10 207 T2E = T2x + T2D;
Chris@10 208 }
Chris@10 209 T2m = ri[WS(rs, 23)];
Chris@10 210 T3v = T2f * T2j;
Chris@10 211 T2h = T2f * T2g;
Chris@10 212 T2p = ii[WS(rs, 23)];
Chris@10 213 T2l = W[44];
Chris@10 214 T2i = W[15];
Chris@10 215 T2o = W[45];
Chris@10 216 }
Chris@10 217 }
Chris@10 218 T3x = T2l * T2p;
Chris@10 219 T2n = T2l * T2m;
Chris@10 220 T3w = FNMS(T2i, T2g, T3v);
Chris@10 221 T2k = FMA(T2i, T2j, T2h);
Chris@10 222 T3y = FNMS(T2o, T2m, T3x);
Chris@10 223 T2q = FMA(T2o, T2p, T2n);
Chris@10 224 }
Chris@10 225 {
Chris@10 226 E T2N, Tz, T2X, T44, T4b, TZ, T2P, TF, T2R, TL;
Chris@10 227 {
Chris@10 228 E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI;
Chris@10 229 {
Chris@10 230 E Tv, Ty, Tu, Tx;
Chris@10 231 {
Chris@10 232 E T4S, T4L, T4R, T4K, T4N, T3z;
Chris@10 233 Tv = ri[WS(rs, 1)];
Chris@10 234 T4N = T3y - T3w;
Chris@10 235 T3z = T3w + T3y;
Chris@10 236 {
Chris@10 237 E T4U, T2r, T3F, T2F;
Chris@10 238 T4U = T2k - T2q;
Chris@10 239 T2r = T2k + T2q;
Chris@10 240 T5T = FNMS(KP618033988, T4N, T4O);
Chris@10 241 T4P = FMA(KP618033988, T4O, T4N);
Chris@10 242 T3F = T3z + T3E;
Chris@10 243 T4S = T3E - T3z;
Chris@10 244 T5Q = FNMS(KP618033988, T4U, T4V);
Chris@10 245 T4W = FMA(KP618033988, T4V, T4U);
Chris@10 246 T2F = T2r + T2E;
Chris@10 247 T4L = T2E - T2r;
Chris@10 248 T3G = T3u + T3F;
Chris@10 249 T4R = FNMS(KP250000000, T3F, T3u);
Chris@10 250 T2G = T2e + T2F;
Chris@10 251 T4K = FNMS(KP250000000, T2F, T2e);
Chris@10 252 Ty = ii[WS(rs, 1)];
Chris@10 253 }
Chris@10 254 T5P = FMA(KP559016994, T4S, T4R);
Chris@10 255 T4T = FNMS(KP559016994, T4S, T4R);
Chris@10 256 T5S = FMA(KP559016994, T4L, T4K);
Chris@10 257 T4M = FNMS(KP559016994, T4L, T4K);
Chris@10 258 Tu = W[0];
Chris@10 259 }
Chris@10 260 Tx = W[1];
Chris@10 261 {
Chris@10 262 E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA;
Chris@10 263 TO = ri[WS(rs, 11)];
Chris@10 264 TR = ii[WS(rs, 11)];
Chris@10 265 T2M = Tu * Ty;
Chris@10 266 Tw = Tu * Tv;
Chris@10 267 TN = W[20];
Chris@10 268 TU = ri[WS(rs, 16)];
Chris@10 269 T2N = FNMS(Tx, Tv, T2M);
Chris@10 270 Tz = FMA(Tx, Ty, Tw);
Chris@10 271 T2T = TN * TR;
Chris@10 272 TP = TN * TO;
Chris@10 273 TX = ii[WS(rs, 16)];
Chris@10 274 TT = W[30];
Chris@10 275 TQ = W[21];
Chris@10 276 TW = W[31];
Chris@10 277 {
Chris@10 278 E T2U, TS, T2W, TY, T2V, TV;
Chris@10 279 TB = ri[WS(rs, 6)];
Chris@10 280 T2V = TT * TX;
Chris@10 281 TV = TT * TU;
Chris@10 282 T2U = FNMS(TQ, TO, T2T);
Chris@10 283 TS = FMA(TQ, TR, TP);
Chris@10 284 T2W = FNMS(TW, TU, T2V);
Chris@10 285 TY = FMA(TW, TX, TV);
Chris@10 286 TE = ii[WS(rs, 6)];
Chris@10 287 TA = W[10];
Chris@10 288 T2X = T2U + T2W;
Chris@10 289 T44 = T2W - T2U;
Chris@10 290 T4b = TY - TS;
Chris@10 291 TZ = TS + TY;
Chris@10 292 }
Chris@10 293 TH = ri[WS(rs, 21)];
Chris@10 294 T2O = TA * TE;
Chris@10 295 TC = TA * TB;
Chris@10 296 TK = ii[WS(rs, 21)];
Chris@10 297 TG = W[40];
Chris@10 298 TD = W[11];
Chris@10 299 TJ = W[41];
Chris@10 300 }
Chris@10 301 }
Chris@10 302 T2Q = TG * TK;
Chris@10 303 TI = TG * TH;
Chris@10 304 T2P = FNMS(TD, TB, T2O);
Chris@10 305 TF = FMA(TD, TE, TC);
Chris@10 306 T2R = FNMS(TJ, TH, T2Q);
Chris@10 307 TL = FMA(TJ, TK, TI);
Chris@10 308 }
Chris@10 309 {
Chris@10 310 E T31, T17, T3b, T4q, T4j, T1x, T33, T1d, T35, T1j;
Chris@10 311 {
Chris@10 312 E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g;
Chris@10 313 {
Chris@10 314 E T13, T16, T12, T15;
Chris@10 315 {
Chris@10 316 E T48, T41, T47, T40, T43, T2S;
Chris@10 317 T13 = ri[WS(rs, 4)];
Chris@10 318 T43 = T2P - T2R;
Chris@10 319 T2S = T2P + T2R;
Chris@10 320 {
Chris@10 321 E T4a, TM, T2Y, T10;
Chris@10 322 T4a = TL - TF;
Chris@10 323 TM = TF + TL;
Chris@10 324 T65 = FMA(KP618033988, T43, T44);
Chris@10 325 T45 = FNMS(KP618033988, T44, T43);
Chris@10 326 T2Y = T2S + T2X;
Chris@10 327 T48 = T2S - T2X;
Chris@10 328 T68 = FNMS(KP618033988, T4a, T4b);
Chris@10 329 T4c = FMA(KP618033988, T4b, T4a);
Chris@10 330 T10 = TM + TZ;
Chris@10 331 T41 = TM - TZ;
Chris@10 332 T2Z = T2N + T2Y;
Chris@10 333 T47 = FNMS(KP250000000, T2Y, T2N);
Chris@10 334 T11 = Tz + T10;
Chris@10 335 T40 = FNMS(KP250000000, T10, Tz);
Chris@10 336 T16 = ii[WS(rs, 4)];
Chris@10 337 }
Chris@10 338 T67 = FNMS(KP559016994, T48, T47);
Chris@10 339 T49 = FMA(KP559016994, T48, T47);
Chris@10 340 T64 = FNMS(KP559016994, T41, T40);
Chris@10 341 T42 = FMA(KP559016994, T41, T40);
Chris@10 342 T12 = W[6];
Chris@10 343 }
Chris@10 344 T15 = W[7];
Chris@10 345 {
Chris@10 346 E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18;
Chris@10 347 T1m = ri[WS(rs, 14)];
Chris@10 348 T1p = ii[WS(rs, 14)];
Chris@10 349 T30 = T12 * T16;
Chris@10 350 T14 = T12 * T13;
Chris@10 351 T1l = W[26];
Chris@10 352 T1s = ri[WS(rs, 19)];
Chris@10 353 T31 = FNMS(T15, T13, T30);
Chris@10 354 T17 = FMA(T15, T16, T14);
Chris@10 355 T37 = T1l * T1p;
Chris@10 356 T1n = T1l * T1m;
Chris@10 357 T1v = ii[WS(rs, 19)];
Chris@10 358 T1r = W[36];
Chris@10 359 T1o = W[27];
Chris@10 360 T1u = W[37];
Chris@10 361 {
Chris@10 362 E T38, T1q, T3a, T1w, T39, T1t;
Chris@10 363 T19 = ri[WS(rs, 9)];
Chris@10 364 T39 = T1r * T1v;
Chris@10 365 T1t = T1r * T1s;
Chris@10 366 T38 = FNMS(T1o, T1m, T37);
Chris@10 367 T1q = FMA(T1o, T1p, T1n);
Chris@10 368 T3a = FNMS(T1u, T1s, T39);
Chris@10 369 T1w = FMA(T1u, T1v, T1t);
Chris@10 370 T1c = ii[WS(rs, 9)];
Chris@10 371 T18 = W[16];
Chris@10 372 T3b = T38 + T3a;
Chris@10 373 T4q = T3a - T38;
Chris@10 374 T4j = T1w - T1q;
Chris@10 375 T1x = T1q + T1w;
Chris@10 376 }
Chris@10 377 T1f = ri[WS(rs, 24)];
Chris@10 378 T32 = T18 * T1c;
Chris@10 379 T1a = T18 * T19;
Chris@10 380 T1i = ii[WS(rs, 24)];
Chris@10 381 T1e = W[46];
Chris@10 382 T1b = W[17];
Chris@10 383 T1h = W[47];
Chris@10 384 }
Chris@10 385 }
Chris@10 386 T34 = T1e * T1i;
Chris@10 387 T1g = T1e * T1f;
Chris@10 388 T33 = FNMS(T1b, T19, T32);
Chris@10 389 T1d = FMA(T1b, T1c, T1a);
Chris@10 390 T35 = FNMS(T1h, T1f, T34);
Chris@10 391 T1j = FMA(T1h, T1i, T1g);
Chris@10 392 }
Chris@10 393 {
Chris@10 394 E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P;
Chris@10 395 {
Chris@10 396 E T1C, T1F, T1B, T1E;
Chris@10 397 {
Chris@10 398 E T4g, T4n, T4f, T4m, T4p, T36;
Chris@10 399 T1C = ri[WS(rs, 2)];
Chris@10 400 T4p = T35 - T33;
Chris@10 401 T36 = T33 + T35;
Chris@10 402 {
Chris@10 403 E T4i, T1k, T3c, T1y;
Chris@10 404 T4i = T1j - T1d;
Chris@10 405 T1k = T1d + T1j;
Chris@10 406 T5Y = FNMS(KP618033988, T4p, T4q);
Chris@10 407 T4r = FMA(KP618033988, T4q, T4p);
Chris@10 408 T3c = T36 + T3b;
Chris@10 409 T4g = T3b - T36;
Chris@10 410 T61 = FNMS(KP618033988, T4i, T4j);
Chris@10 411 T4k = FMA(KP618033988, T4j, T4i);
Chris@10 412 T1y = T1k + T1x;
Chris@10 413 T4n = T1k - T1x;
Chris@10 414 T3d = T31 + T3c;
Chris@10 415 T4f = FNMS(KP250000000, T3c, T31);
Chris@10 416 T1z = T17 + T1y;
Chris@10 417 T4m = FNMS(KP250000000, T1y, T17);
Chris@10 418 T1F = ii[WS(rs, 2)];
Chris@10 419 }
Chris@10 420 T60 = FMA(KP559016994, T4g, T4f);
Chris@10 421 T4h = FNMS(KP559016994, T4g, T4f);
Chris@10 422 T5X = FNMS(KP559016994, T4n, T4m);
Chris@10 423 T4o = FMA(KP559016994, T4n, T4m);
Chris@10 424 T1B = W[2];
Chris@10 425 }
Chris@10 426 T1E = W[3];
Chris@10 427 {
Chris@10 428 E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H;
Chris@10 429 T1V = ri[WS(rs, 12)];
Chris@10 430 T1Y = ii[WS(rs, 12)];
Chris@10 431 T3f = T1B * T1F;
Chris@10 432 T1D = T1B * T1C;
Chris@10 433 T1U = W[22];
Chris@10 434 T21 = ri[WS(rs, 17)];
Chris@10 435 T3g = FNMS(T1E, T1C, T3f);
Chris@10 436 T1G = FMA(T1E, T1F, T1D);
Chris@10 437 T3m = T1U * T1Y;
Chris@10 438 T1W = T1U * T1V;
Chris@10 439 T24 = ii[WS(rs, 17)];
Chris@10 440 T20 = W[32];
Chris@10 441 T1X = W[23];
Chris@10 442 T23 = W[33];
Chris@10 443 {
Chris@10 444 E T3n, T1Z, T3p, T25, T3o, T22;
Chris@10 445 T1I = ri[WS(rs, 7)];
Chris@10 446 T3o = T20 * T24;
Chris@10 447 T22 = T20 * T21;
Chris@10 448 T3n = FNMS(T1X, T1V, T3m);
Chris@10 449 T1Z = FMA(T1X, T1Y, T1W);
Chris@10 450 T3p = FNMS(T23, T21, T3o);
Chris@10 451 T25 = FMA(T23, T24, T22);
Chris@10 452 T1L = ii[WS(rs, 7)];
Chris@10 453 T1H = W[12];
Chris@10 454 T3q = T3n + T3p;
Chris@10 455 T4z = T3n - T3p;
Chris@10 456 T4G = T25 - T1Z;
Chris@10 457 T26 = T1Z + T25;
Chris@10 458 }
Chris@10 459 T1O = ri[WS(rs, 22)];
Chris@10 460 T3h = T1H * T1L;
Chris@10 461 T1J = T1H * T1I;
Chris@10 462 T1R = ii[WS(rs, 22)];
Chris@10 463 T1N = W[42];
Chris@10 464 T1K = W[13];
Chris@10 465 T1Q = W[43];
Chris@10 466 }
Chris@10 467 }
Chris@10 468 T3j = T1N * T1R;
Chris@10 469 T1P = T1N * T1O;
Chris@10 470 T3i = FNMS(T1K, T1I, T3h);
Chris@10 471 T1M = FMA(T1K, T1L, T1J);
Chris@10 472 T3k = FNMS(T1Q, T1O, T3j);
Chris@10 473 T1S = FMA(T1Q, T1R, T1P);
Chris@10 474 }
Chris@10 475 }
Chris@10 476 }
Chris@10 477 }
Chris@10 478 {
Chris@10 479 E T6R, T5M, T4A, T5J, T4H, T6S, T5I, T4E, T5L, T4x, T3K, T3I, T2K, T74, T76;
Chris@10 480 E T2J;
Chris@10 481 {
Chris@10 482 E T1A, T72, T73, T2H, T28, T2I;
Chris@10 483 {
Chris@10 484 E T3e, T4D, T4w, T4C, T4v, T3H, T4y, T3l;
Chris@10 485 T6R = T2Z + T3d;
Chris@10 486 T3e = T2Z - T3d;
Chris@10 487 T4y = T3k - T3i;
Chris@10 488 T3l = T3i + T3k;
Chris@10 489 {
Chris@10 490 E T4F, T1T, T3r, T27, T3s;
Chris@10 491 T4F = T1S - T1M;
Chris@10 492 T1T = T1M + T1S;
Chris@10 493 T5M = FMA(KP618033988, T4y, T4z);
Chris@10 494 T4A = FNMS(KP618033988, T4z, T4y);
Chris@10 495 T3r = T3l + T3q;
Chris@10 496 T4D = T3q - T3l;
Chris@10 497 T5J = FNMS(KP618033988, T4F, T4G);
Chris@10 498 T4H = FMA(KP618033988, T4G, T4F);
Chris@10 499 T27 = T1T + T26;
Chris@10 500 T4w = T26 - T1T;
Chris@10 501 T3s = T3g + T3r;
Chris@10 502 T4C = FNMS(KP250000000, T3r, T3g);
Chris@10 503 T28 = T1G + T27;
Chris@10 504 T4v = FNMS(KP250000000, T27, T1G);
Chris@10 505 T3H = T3s - T3G;
Chris@10 506 T6S = T3s + T3G;
Chris@10 507 }
Chris@10 508 T5I = FMA(KP559016994, T4D, T4C);
Chris@10 509 T4E = FNMS(KP559016994, T4D, T4C);
Chris@10 510 T5L = FMA(KP559016994, T4w, T4v);
Chris@10 511 T4x = FNMS(KP559016994, T4w, T4v);
Chris@10 512 T3K = FNMS(KP618033988, T3e, T3H);
Chris@10 513 T3I = FMA(KP618033988, T3H, T3e);
Chris@10 514 }
Chris@10 515 T1A = T11 + T1z;
Chris@10 516 T72 = T11 - T1z;
Chris@10 517 T73 = T28 - T2G;
Chris@10 518 T2H = T28 + T2G;
Chris@10 519 T2I = T1A + T2H;
Chris@10 520 T2K = T1A - T2H;
Chris@10 521 T74 = FMA(KP618033988, T73, T72);
Chris@10 522 T76 = FNMS(KP618033988, T72, T73);
Chris@10 523 ri[0] = Tt + T2I;
Chris@10 524 T2J = FNMS(KP250000000, T2I, Tt);
Chris@10 525 }
Chris@10 526 {
Chris@10 527 E T5F, T7B, T7u, T5E, T5C, T7A, T7y, T7t, T5D, T5v;
Chris@10 528 {
Chris@10 529 E T3Z, T5d, T7p, T7d, T5m, T5l, T56, T7k, T59, T7l, T5z, T5g, T7g, T7i, T52;
Chris@10 530 E T50, T5x, T5q, T5A, T5j, T70, T6Z, T3N;
Chris@10 531 T5F = FNMS(KP559016994, T3M, T3L);
Chris@10 532 T3N = FMA(KP559016994, T3M, T3L);
Chris@10 533 {
Chris@10 534 E T79, T3J, T2L, T6T;
Chris@10 535 T79 = FMA(KP559016994, T78, T77);
Chris@10 536 T7B = FNMS(KP559016994, T78, T77);
Chris@10 537 T3J = FNMS(KP559016994, T2K, T2J);
Chris@10 538 T2L = FMA(KP559016994, T2K, T2J);
Chris@10 539 T6T = T6R + T6S;
Chris@10 540 T70 = T6R - T6S;
Chris@10 541 T3Z = FMA(KP951056516, T3Y, T3N);
Chris@10 542 T5d = FNMS(KP951056516, T3Y, T3N);
Chris@10 543 ri[WS(rs, 5)] = FMA(KP951056516, T3I, T2L);
Chris@10 544 ri[WS(rs, 20)] = FNMS(KP951056516, T3I, T2L);
Chris@10 545 ri[WS(rs, 15)] = FMA(KP951056516, T3K, T3J);
Chris@10 546 ri[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J);
Chris@10 547 ii[0] = T6T + T6Y;
Chris@10 548 T6Z = FNMS(KP250000000, T6T, T6Y);
Chris@10 549 T7p = FMA(KP951056516, T7c, T79);
Chris@10 550 T7d = FNMS(KP951056516, T7c, T79);
Chris@10 551 }
Chris@10 552 {
Chris@10 553 E T5e, T54, T4e, T5f, T5o, T5p, T5i, T4B, T58, T4Y, T55, T4t, T4I, T5h;
Chris@10 554 {
Chris@10 555 E T4Q, T4X, T4l, T4s;
Chris@10 556 {
Chris@10 557 E T46, T71, T75, T4d;
Chris@10 558 T5m = FNMS(KP951056516, T45, T42);
Chris@10 559 T46 = FMA(KP951056516, T45, T42);
Chris@10 560 T71 = FMA(KP559016994, T70, T6Z);
Chris@10 561 T75 = FNMS(KP559016994, T70, T6Z);
Chris@10 562 T4d = FMA(KP951056516, T4c, T49);
Chris@10 563 T5l = FNMS(KP951056516, T4c, T49);
Chris@10 564 T5e = FMA(KP951056516, T4P, T4M);
Chris@10 565 T4Q = FNMS(KP951056516, T4P, T4M);
Chris@10 566 ii[WS(rs, 20)] = FMA(KP951056516, T74, T71);
Chris@10 567 ii[WS(rs, 5)] = FNMS(KP951056516, T74, T71);
Chris@10 568 ii[WS(rs, 15)] = FNMS(KP951056516, T76, T75);
Chris@10 569 ii[WS(rs, 10)] = FMA(KP951056516, T76, T75);
Chris@10 570 T54 = FNMS(KP256756360, T46, T4d);
Chris@10 571 T4e = FMA(KP256756360, T4d, T46);
Chris@10 572 T4X = FNMS(KP951056516, T4W, T4T);
Chris@10 573 T5f = FMA(KP951056516, T4W, T4T);
Chris@10 574 }
Chris@10 575 T5o = FNMS(KP951056516, T4k, T4h);
Chris@10 576 T4l = FMA(KP951056516, T4k, T4h);
Chris@10 577 T4s = FNMS(KP951056516, T4r, T4o);
Chris@10 578 T5p = FMA(KP951056516, T4r, T4o);
Chris@10 579 T5i = FMA(KP951056516, T4A, T4x);
Chris@10 580 T4B = FNMS(KP951056516, T4A, T4x);
Chris@10 581 T58 = FNMS(KP939062505, T4Q, T4X);
Chris@10 582 T4Y = FMA(KP939062505, T4X, T4Q);
Chris@10 583 T55 = FNMS(KP634619297, T4l, T4s);
Chris@10 584 T4t = FMA(KP634619297, T4s, T4l);
Chris@10 585 T4I = FMA(KP951056516, T4H, T4E);
Chris@10 586 T5h = FNMS(KP951056516, T4H, T4E);
Chris@10 587 }
Chris@10 588 {
Chris@10 589 E T7e, T4u, T57, T4J, T7f, T4Z;
Chris@10 590 T7e = FNMS(KP871714437, T55, T54);
Chris@10 591 T56 = FMA(KP871714437, T55, T54);
Chris@10 592 T4u = FMA(KP871714437, T4t, T4e);
Chris@10 593 T7k = FNMS(KP871714437, T4t, T4e);
Chris@10 594 T57 = FNMS(KP549754652, T4B, T4I);
Chris@10 595 T4J = FMA(KP549754652, T4I, T4B);
Chris@10 596 T7f = FMA(KP831864738, T58, T57);
Chris@10 597 T59 = FNMS(KP831864738, T58, T57);
Chris@10 598 T4Z = FMA(KP831864738, T4Y, T4J);
Chris@10 599 T7l = FNMS(KP831864738, T4Y, T4J);
Chris@10 600 T5z = FMA(KP126329378, T5e, T5f);
Chris@10 601 T5g = FNMS(KP126329378, T5f, T5e);
Chris@10 602 T7g = FMA(KP904730450, T7f, T7e);
Chris@10 603 T7i = FNMS(KP904730450, T7f, T7e);
Chris@10 604 T52 = FNMS(KP904730450, T4Z, T4u);
Chris@10 605 T50 = FMA(KP904730450, T4Z, T4u);
Chris@10 606 }
Chris@10 607 T5x = FNMS(KP827271945, T5o, T5p);
Chris@10 608 T5q = FMA(KP827271945, T5p, T5o);
Chris@10 609 T5A = FMA(KP470564281, T5h, T5i);
Chris@10 610 T5j = FNMS(KP470564281, T5i, T5h);
Chris@10 611 }
Chris@10 612 {
Chris@10 613 E T7q, T5B, T5k, T7x, T5w, T5n;
Chris@10 614 ri[WS(rs, 1)] = FMA(KP968583161, T50, T3Z);
Chris@10 615 T7q = FMA(KP912018591, T5A, T5z);
Chris@10 616 T5B = FNMS(KP912018591, T5A, T5z);
Chris@10 617 T5k = FNMS(KP912018591, T5j, T5g);
Chris@10 618 T7x = FMA(KP912018591, T5j, T5g);
Chris@10 619 T5w = FNMS(KP634619297, T5l, T5m);
Chris@10 620 T5n = FMA(KP634619297, T5m, T5l);
Chris@10 621 ii[WS(rs, 1)] = FMA(KP968583161, T7g, T7d);
Chris@10 622 {
Chris@10 623 E T5y, T7w, T7s, T5s, T5u, T7o, T7m, T7n, T7j, T5t;
Chris@10 624 {
Chris@10 625 E T5c, T5a, T51, T7r, T5r, T53, T5b, T7h;
Chris@10 626 T5c = FNMS(KP683113946, T56, T59);
Chris@10 627 T5a = FMA(KP559154169, T59, T56);
Chris@10 628 T7r = FNMS(KP912575812, T5x, T5w);
Chris@10 629 T5y = FMA(KP912575812, T5x, T5w);
Chris@10 630 T5r = FNMS(KP912575812, T5q, T5n);
Chris@10 631 T7w = FMA(KP912575812, T5q, T5n);
Chris@10 632 T7s = FMA(KP851038619, T7r, T7q);
Chris@10 633 T7u = FNMS(KP851038619, T7r, T7q);
Chris@10 634 T5s = FNMS(KP851038619, T5r, T5k);
Chris@10 635 T5u = FMA(KP851038619, T5r, T5k);
Chris@10 636 T51 = FNMS(KP242145790, T50, T3Z);
Chris@10 637 ii[WS(rs, 4)] = FNMS(KP992114701, T7s, T7p);
Chris@10 638 ri[WS(rs, 4)] = FNMS(KP992114701, T5s, T5d);
Chris@10 639 T7o = FNMS(KP683113946, T7k, T7l);
Chris@10 640 T7m = FMA(KP559154169, T7l, T7k);
Chris@10 641 T53 = FMA(KP541454447, T52, T51);
Chris@10 642 T5b = FNMS(KP541454447, T52, T51);
Chris@10 643 T7h = FNMS(KP242145790, T7g, T7d);
Chris@10 644 ri[WS(rs, 11)] = FNMS(KP833417178, T5c, T5b);
Chris@10 645 ri[WS(rs, 16)] = FMA(KP833417178, T5c, T5b);
Chris@10 646 ri[WS(rs, 21)] = FNMS(KP921177326, T5a, T53);
Chris@10 647 ri[WS(rs, 6)] = FMA(KP921177326, T5a, T53);
Chris@10 648 T7n = FNMS(KP541454447, T7i, T7h);
Chris@10 649 T7j = FMA(KP541454447, T7i, T7h);
Chris@10 650 }
Chris@10 651 T5E = FMA(KP525970792, T5y, T5B);
Chris@10 652 T5C = FNMS(KP726211448, T5B, T5y);
Chris@10 653 ii[WS(rs, 21)] = FMA(KP921177326, T7m, T7j);
Chris@10 654 ii[WS(rs, 6)] = FNMS(KP921177326, T7m, T7j);
Chris@10 655 ii[WS(rs, 11)] = FMA(KP833417178, T7o, T7n);
Chris@10 656 ii[WS(rs, 16)] = FNMS(KP833417178, T7o, T7n);
Chris@10 657 T5t = FMA(KP248028675, T5s, T5d);
Chris@10 658 T7A = FNMS(KP525970792, T7w, T7x);
Chris@10 659 T7y = FMA(KP726211448, T7x, T7w);
Chris@10 660 T7t = FMA(KP248028675, T7s, T7p);
Chris@10 661 T5D = FNMS(KP554608978, T5u, T5t);
Chris@10 662 T5v = FMA(KP554608978, T5u, T5t);
Chris@10 663 }
Chris@10 664 }
Chris@10 665 }
Chris@10 666 {
Chris@10 667 E T5H, T6p, T7P, T7D, T6y, T6x, T6l, T7X, T6i, T7W, T6L, T6s, T7S, T7U, T6e;
Chris@10 668 E T6c, T6J, T6C, T6M, T6v, T7z, T7v;
Chris@10 669 ri[WS(rs, 14)] = FNMS(KP943557151, T5E, T5D);
Chris@10 670 ri[WS(rs, 19)] = FMA(KP943557151, T5E, T5D);
Chris@10 671 ri[WS(rs, 24)] = FMA(KP803003575, T5C, T5v);
Chris@10 672 ri[WS(rs, 9)] = FNMS(KP803003575, T5C, T5v);
Chris@10 673 T7z = FNMS(KP554608978, T7u, T7t);
Chris@10 674 T7v = FMA(KP554608978, T7u, T7t);
Chris@10 675 T5H = FMA(KP951056516, T5G, T5F);
Chris@10 676 T6p = FNMS(KP951056516, T5G, T5F);
Chris@10 677 ii[WS(rs, 14)] = FMA(KP943557151, T7A, T7z);
Chris@10 678 ii[WS(rs, 19)] = FNMS(KP943557151, T7A, T7z);
Chris@10 679 ii[WS(rs, 24)] = FMA(KP803003575, T7y, T7v);
Chris@10 680 ii[WS(rs, 9)] = FNMS(KP803003575, T7y, T7v);
Chris@10 681 {
Chris@10 682 E T6t, T6u, T6A, T6j, T5O, T6B, T6q, T6r, T5Z, T6h, T6a, T6k, T5V, T62;
Chris@10 683 {
Chris@10 684 E T66, T69, T5K, T5N, T5R, T5U;
Chris@10 685 T6t = FNMS(KP951056516, T5J, T5I);
Chris@10 686 T5K = FMA(KP951056516, T5J, T5I);
Chris@10 687 T5N = FMA(KP951056516, T5M, T5L);
Chris@10 688 T6u = FNMS(KP951056516, T5M, T5L);
Chris@10 689 T6A = FMA(KP951056516, T65, T64);
Chris@10 690 T66 = FNMS(KP951056516, T65, T64);
Chris@10 691 T7P = FNMS(KP951056516, T7C, T7B);
Chris@10 692 T7D = FMA(KP951056516, T7C, T7B);
Chris@10 693 T6j = FNMS(KP062914667, T5K, T5N);
Chris@10 694 T5O = FMA(KP062914667, T5N, T5K);
Chris@10 695 T69 = FMA(KP951056516, T68, T67);
Chris@10 696 T6B = FNMS(KP951056516, T68, T67);
Chris@10 697 T6q = FMA(KP951056516, T5Q, T5P);
Chris@10 698 T5R = FNMS(KP951056516, T5Q, T5P);
Chris@10 699 T5U = FNMS(KP951056516, T5T, T5S);
Chris@10 700 T6r = FMA(KP951056516, T5T, T5S);
Chris@10 701 T6y = FMA(KP951056516, T5Y, T5X);
Chris@10 702 T5Z = FNMS(KP951056516, T5Y, T5X);
Chris@10 703 T6h = FNMS(KP939062505, T66, T69);
Chris@10 704 T6a = FMA(KP939062505, T69, T66);
Chris@10 705 T6k = FMA(KP827271945, T5R, T5U);
Chris@10 706 T5V = FNMS(KP827271945, T5U, T5R);
Chris@10 707 T62 = FMA(KP951056516, T61, T60);
Chris@10 708 T6x = FNMS(KP951056516, T61, T60);
Chris@10 709 }
Chris@10 710 {
Chris@10 711 E T7Q, T5W, T6g, T63, T7R, T6b;
Chris@10 712 T7Q = FMA(KP772036680, T6k, T6j);
Chris@10 713 T6l = FNMS(KP772036680, T6k, T6j);
Chris@10 714 T5W = FMA(KP772036680, T5V, T5O);
Chris@10 715 T7X = FNMS(KP772036680, T5V, T5O);
Chris@10 716 T6g = FMA(KP126329378, T5Z, T62);
Chris@10 717 T63 = FNMS(KP126329378, T62, T5Z);
Chris@10 718 T7R = FNMS(KP734762448, T6h, T6g);
Chris@10 719 T6i = FMA(KP734762448, T6h, T6g);
Chris@10 720 T6b = FNMS(KP734762448, T6a, T63);
Chris@10 721 T7W = FMA(KP734762448, T6a, T63);
Chris@10 722 T6L = FNMS(KP062914667, T6q, T6r);
Chris@10 723 T6s = FMA(KP062914667, T6r, T6q);
Chris@10 724 T7S = FMA(KP994076283, T7R, T7Q);
Chris@10 725 T7U = FNMS(KP994076283, T7R, T7Q);
Chris@10 726 T6e = FMA(KP994076283, T6b, T5W);
Chris@10 727 T6c = FNMS(KP994076283, T6b, T5W);
Chris@10 728 }
Chris@10 729 T6J = FNMS(KP549754652, T6A, T6B);
Chris@10 730 T6C = FMA(KP549754652, T6B, T6A);
Chris@10 731 T6M = FNMS(KP634619297, T6t, T6u);
Chris@10 732 T6v = FMA(KP634619297, T6u, T6t);
Chris@10 733 }
Chris@10 734 {
Chris@10 735 E T7E, T6N, T6w, T7L, T6I, T6z;
Chris@10 736 ri[WS(rs, 3)] = FMA(KP998026728, T6c, T5H);
Chris@10 737 T7E = FMA(KP845997307, T6M, T6L);
Chris@10 738 T6N = FNMS(KP845997307, T6M, T6L);
Chris@10 739 T6w = FMA(KP845997307, T6v, T6s);
Chris@10 740 T7L = FNMS(KP845997307, T6v, T6s);
Chris@10 741 T6I = FMA(KP470564281, T6x, T6y);
Chris@10 742 T6z = FNMS(KP470564281, T6y, T6x);
Chris@10 743 ii[WS(rs, 3)] = FNMS(KP998026728, T7S, T7P);
Chris@10 744 {
Chris@10 745 E T6K, T7K, T7G, T6E, T6G, T80, T7Y, T7Z, T7V, T6F;
Chris@10 746 {
Chris@10 747 E T6o, T6m, T6d, T7F, T6D, T6f, T6n, T7T;
Chris@10 748 T6o = FMA(KP614372930, T6i, T6l);
Chris@10 749 T6m = FNMS(KP621716863, T6l, T6i);
Chris@10 750 T7F = FNMS(KP968479752, T6J, T6I);
Chris@10 751 T6K = FMA(KP968479752, T6J, T6I);
Chris@10 752 T6D = FMA(KP968479752, T6C, T6z);
Chris@10 753 T7K = FNMS(KP968479752, T6C, T6z);
Chris@10 754 T7G = FMA(KP906616052, T7F, T7E);
Chris@10 755 T7I = FNMS(KP906616052, T7F, T7E);
Chris@10 756 T6E = FMA(KP906616052, T6D, T6w);
Chris@10 757 T6G = FNMS(KP906616052, T6D, T6w);
Chris@10 758 T6d = FNMS(KP249506682, T6c, T5H);
Chris@10 759 ii[WS(rs, 2)] = FNMS(KP998026728, T7G, T7D);
Chris@10 760 ri[WS(rs, 2)] = FMA(KP998026728, T6E, T6p);
Chris@10 761 T80 = FNMS(KP614372930, T7W, T7X);
Chris@10 762 T7Y = FMA(KP621716863, T7X, T7W);
Chris@10 763 T6f = FNMS(KP557913902, T6e, T6d);
Chris@10 764 T6n = FMA(KP557913902, T6e, T6d);
Chris@10 765 T7T = FMA(KP249506682, T7S, T7P);
Chris@10 766 ri[WS(rs, 18)] = FNMS(KP949179823, T6o, T6n);
Chris@10 767 ri[WS(rs, 13)] = FMA(KP949179823, T6o, T6n);
Chris@10 768 ri[WS(rs, 8)] = FMA(KP943557151, T6m, T6f);
Chris@10 769 ri[WS(rs, 23)] = FNMS(KP943557151, T6m, T6f);
Chris@10 770 T7Z = FNMS(KP557913902, T7U, T7T);
Chris@10 771 T7V = FMA(KP557913902, T7U, T7T);
Chris@10 772 }
Chris@10 773 T6Q = FNMS(KP560319534, T6K, T6N);
Chris@10 774 T6O = FMA(KP681693190, T6N, T6K);
Chris@10 775 ii[WS(rs, 23)] = FMA(KP943557151, T7Y, T7V);
Chris@10 776 ii[WS(rs, 8)] = FNMS(KP943557151, T7Y, T7V);
Chris@10 777 ii[WS(rs, 13)] = FMA(KP949179823, T80, T7Z);
Chris@10 778 ii[WS(rs, 18)] = FNMS(KP949179823, T80, T7Z);
Chris@10 779 T6F = FNMS(KP249506682, T6E, T6p);
Chris@10 780 T7O = FNMS(KP560319534, T7K, T7L);
Chris@10 781 T7M = FMA(KP681693190, T7L, T7K);
Chris@10 782 T7H = FMA(KP249506682, T7G, T7D);
Chris@10 783 T6P = FMA(KP557913902, T6G, T6F);
Chris@10 784 T6H = FNMS(KP557913902, T6G, T6F);
Chris@10 785 }
Chris@10 786 }
Chris@10 787 }
Chris@10 788 }
Chris@10 789 }
Chris@10 790 }
Chris@10 791 ri[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P);
Chris@10 792 ri[WS(rs, 17)] = FMA(KP949179823, T6Q, T6P);
Chris@10 793 ri[WS(rs, 7)] = FMA(KP860541664, T6O, T6H);
Chris@10 794 ri[WS(rs, 22)] = FNMS(KP860541664, T6O, T6H);
Chris@10 795 T7J = FMA(KP557913902, T7I, T7H);
Chris@10 796 T7N = FNMS(KP557913902, T7I, T7H);
Chris@10 797 ii[WS(rs, 12)] = FNMS(KP949179823, T7O, T7N);
Chris@10 798 ii[WS(rs, 17)] = FMA(KP949179823, T7O, T7N);
Chris@10 799 ii[WS(rs, 22)] = FNMS(KP860541664, T7M, T7J);
Chris@10 800 ii[WS(rs, 7)] = FMA(KP860541664, T7M, T7J);
Chris@10 801 }
Chris@10 802 }
Chris@10 803 }
Chris@10 804
Chris@10 805 static const tw_instr twinstr[] = {
Chris@10 806 {TW_FULL, 0, 25},
Chris@10 807 {TW_NEXT, 1, 0}
Chris@10 808 };
Chris@10 809
Chris@10 810 static const ct_desc desc = { 25, "t1_25", twinstr, &GENUS, {84, 48, 316, 0}, 0, 0, 0 };
Chris@10 811
Chris@10 812 void X(codelet_t1_25) (planner *p) {
Chris@10 813 X(kdft_dit_register) (p, t1_25, &desc);
Chris@10 814 }
Chris@10 815 #else /* HAVE_FMA */
Chris@10 816
Chris@10 817 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 25 -name t1_25 -include t.h */
Chris@10 818
Chris@10 819 /*
Chris@10 820 * This function contains 400 FP additions, 280 FP multiplications,
Chris@10 821 * (or, 260 additions, 140 multiplications, 140 fused multiply/add),
Chris@10 822 * 101 stack variables, 20 constants, and 100 memory accesses
Chris@10 823 */
Chris@10 824 #include "t.h"
Chris@10 825
Chris@10 826 static void t1_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 827 {
Chris@10 828 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 829 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@10 830 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@10 831 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@10 832 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 833 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@10 834 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@10 835 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@10 836 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@10 837 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@10 838 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@10 839 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 840 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@10 841 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@10 842 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@10 843 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 844 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@10 845 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 846 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 847 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 848 {
Chris@10 849 INT m;
Chris@10 850 for (m = mb, W = W + (mb * 48); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@10 851 E T1, T6b, T2l, T6o, To, T2m, T6a, T6p, T6t, T6S, T2u, T4I, T2i, T60, T3O;
Chris@10 852 E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t;
Chris@10 853 E T4h, T4P, T1l, T5X, T33, T5w, T4j, T4W, T3e, T5v, T4k, T4T, T1P, T5Z, T3r;
Chris@10 854 E T5z, T4o, T51, T3C, T5A, T4n, T54;
Chris@10 855 {
Chris@10 856 E T6, T2o, Tb, T2p, Tc, T68, Th, T2r, Tm, T2s, Tn, T69;
Chris@10 857 T1 = ri[0];
Chris@10 858 T6b = ii[0];
Chris@10 859 {
Chris@10 860 E T3, T5, T2, T4;
Chris@10 861 T3 = ri[WS(rs, 5)];
Chris@10 862 T5 = ii[WS(rs, 5)];
Chris@10 863 T2 = W[8];
Chris@10 864 T4 = W[9];
Chris@10 865 T6 = FMA(T2, T3, T4 * T5);
Chris@10 866 T2o = FNMS(T4, T3, T2 * T5);
Chris@10 867 }
Chris@10 868 {
Chris@10 869 E T8, Ta, T7, T9;
Chris@10 870 T8 = ri[WS(rs, 20)];
Chris@10 871 Ta = ii[WS(rs, 20)];
Chris@10 872 T7 = W[38];
Chris@10 873 T9 = W[39];
Chris@10 874 Tb = FMA(T7, T8, T9 * Ta);
Chris@10 875 T2p = FNMS(T9, T8, T7 * Ta);
Chris@10 876 }
Chris@10 877 Tc = T6 + Tb;
Chris@10 878 T68 = T2o + T2p;
Chris@10 879 {
Chris@10 880 E Te, Tg, Td, Tf;
Chris@10 881 Te = ri[WS(rs, 10)];
Chris@10 882 Tg = ii[WS(rs, 10)];
Chris@10 883 Td = W[18];
Chris@10 884 Tf = W[19];
Chris@10 885 Th = FMA(Td, Te, Tf * Tg);
Chris@10 886 T2r = FNMS(Tf, Te, Td * Tg);
Chris@10 887 }
Chris@10 888 {
Chris@10 889 E Tj, Tl, Ti, Tk;
Chris@10 890 Tj = ri[WS(rs, 15)];
Chris@10 891 Tl = ii[WS(rs, 15)];
Chris@10 892 Ti = W[28];
Chris@10 893 Tk = W[29];
Chris@10 894 Tm = FMA(Ti, Tj, Tk * Tl);
Chris@10 895 T2s = FNMS(Tk, Tj, Ti * Tl);
Chris@10 896 }
Chris@10 897 Tn = Th + Tm;
Chris@10 898 T69 = T2r + T2s;
Chris@10 899 T2l = KP559016994 * (Tc - Tn);
Chris@10 900 T6o = KP559016994 * (T68 - T69);
Chris@10 901 To = Tc + Tn;
Chris@10 902 T2m = FNMS(KP250000000, To, T1);
Chris@10 903 T6a = T68 + T69;
Chris@10 904 T6p = FNMS(KP250000000, T6a, T6b);
Chris@10 905 {
Chris@10 906 E T6r, T6s, T2q, T2t;
Chris@10 907 T6r = T6 - Tb;
Chris@10 908 T6s = Th - Tm;
Chris@10 909 T6t = FMA(KP951056516, T6r, KP587785252 * T6s);
Chris@10 910 T6S = FNMS(KP587785252, T6r, KP951056516 * T6s);
Chris@10 911 T2q = T2o - T2p;
Chris@10 912 T2t = T2r - T2s;
Chris@10 913 T2u = FMA(KP951056516, T2q, KP587785252 * T2t);
Chris@10 914 T4I = FNMS(KP587785252, T2q, KP951056516 * T2t);
Chris@10 915 }
Chris@10 916 }
Chris@10 917 {
Chris@10 918 E T1U, T3S, T3J, T3M, T3X, T3W, T3P, T3Q, T3T, T25, T2g, T2h;
Chris@10 919 {
Chris@10 920 E T1R, T1T, T1Q, T1S;
Chris@10 921 T1R = ri[WS(rs, 3)];
Chris@10 922 T1T = ii[WS(rs, 3)];
Chris@10 923 T1Q = W[4];
Chris@10 924 T1S = W[5];
Chris@10 925 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@10 926 T3S = FNMS(T1S, T1R, T1Q * T1T);
Chris@10 927 }
Chris@10 928 {
Chris@10 929 E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K;
Chris@10 930 {
Chris@10 931 E T1W, T1Y, T1V, T1X;
Chris@10 932 T1W = ri[WS(rs, 8)];
Chris@10 933 T1Y = ii[WS(rs, 8)];
Chris@10 934 T1V = W[14];
Chris@10 935 T1X = W[15];
Chris@10 936 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@10 937 T3H = FNMS(T1X, T1W, T1V * T1Y);
Chris@10 938 }
Chris@10 939 {
Chris@10 940 E T2c, T2e, T2b, T2d;
Chris@10 941 T2c = ri[WS(rs, 18)];
Chris@10 942 T2e = ii[WS(rs, 18)];
Chris@10 943 T2b = W[34];
Chris@10 944 T2d = W[35];
Chris@10 945 T2f = FMA(T2b, T2c, T2d * T2e);
Chris@10 946 T3L = FNMS(T2d, T2c, T2b * T2e);
Chris@10 947 }
Chris@10 948 {
Chris@10 949 E T21, T23, T20, T22;
Chris@10 950 T21 = ri[WS(rs, 23)];
Chris@10 951 T23 = ii[WS(rs, 23)];
Chris@10 952 T20 = W[44];
Chris@10 953 T22 = W[45];
Chris@10 954 T24 = FMA(T20, T21, T22 * T23);
Chris@10 955 T3I = FNMS(T22, T21, T20 * T23);
Chris@10 956 }
Chris@10 957 {
Chris@10 958 E T27, T29, T26, T28;
Chris@10 959 T27 = ri[WS(rs, 13)];
Chris@10 960 T29 = ii[WS(rs, 13)];
Chris@10 961 T26 = W[24];
Chris@10 962 T28 = W[25];
Chris@10 963 T2a = FMA(T26, T27, T28 * T29);
Chris@10 964 T3K = FNMS(T28, T27, T26 * T29);
Chris@10 965 }
Chris@10 966 T3J = T3H - T3I;
Chris@10 967 T3M = T3K - T3L;
Chris@10 968 T3X = T2a - T2f;
Chris@10 969 T3W = T1Z - T24;
Chris@10 970 T3P = T3H + T3I;
Chris@10 971 T3Q = T3K + T3L;
Chris@10 972 T3T = T3P + T3Q;
Chris@10 973 T25 = T1Z + T24;
Chris@10 974 T2g = T2a + T2f;
Chris@10 975 T2h = T25 + T2g;
Chris@10 976 }
Chris@10 977 T2i = T1U + T2h;
Chris@10 978 T60 = T3S + T3T;
Chris@10 979 {
Chris@10 980 E T3N, T57, T3G, T56, T3E, T3F;
Chris@10 981 T3N = FMA(KP951056516, T3J, KP587785252 * T3M);
Chris@10 982 T57 = FNMS(KP587785252, T3J, KP951056516 * T3M);
Chris@10 983 T3E = KP559016994 * (T25 - T2g);
Chris@10 984 T3F = FNMS(KP250000000, T2h, T1U);
Chris@10 985 T3G = T3E + T3F;
Chris@10 986 T56 = T3F - T3E;
Chris@10 987 T3O = T3G + T3N;
Chris@10 988 T5D = T56 + T57;
Chris@10 989 T4r = T3G - T3N;
Chris@10 990 T58 = T56 - T57;
Chris@10 991 }
Chris@10 992 {
Chris@10 993 E T3Y, T59, T3V, T5a, T3R, T3U;
Chris@10 994 T3Y = FMA(KP951056516, T3W, KP587785252 * T3X);
Chris@10 995 T59 = FNMS(KP587785252, T3W, KP951056516 * T3X);
Chris@10 996 T3R = KP559016994 * (T3P - T3Q);
Chris@10 997 T3U = FNMS(KP250000000, T3T, T3S);
Chris@10 998 T3V = T3R + T3U;
Chris@10 999 T5a = T3U - T3R;
Chris@10 1000 T3Z = T3V - T3Y;
Chris@10 1001 T5C = T5a - T59;
Chris@10 1002 T4q = T3Y + T3V;
Chris@10 1003 T5b = T59 + T5a;
Chris@10 1004 }
Chris@10 1005 }
Chris@10 1006 {
Chris@10 1007 E Tu, T2K, T2B, T2E, T2P, T2O, T2H, T2I, T2L, TF, TQ, TR;
Chris@10 1008 {
Chris@10 1009 E Tr, Tt, Tq, Ts;
Chris@10 1010 Tr = ri[WS(rs, 1)];
Chris@10 1011 Tt = ii[WS(rs, 1)];
Chris@10 1012 Tq = W[0];
Chris@10 1013 Ts = W[1];
Chris@10 1014 Tu = FMA(Tq, Tr, Ts * Tt);
Chris@10 1015 T2K = FNMS(Ts, Tr, Tq * Tt);
Chris@10 1016 }
Chris@10 1017 {
Chris@10 1018 E Tz, T2z, TP, T2D, TE, T2A, TK, T2C;
Chris@10 1019 {
Chris@10 1020 E Tw, Ty, Tv, Tx;
Chris@10 1021 Tw = ri[WS(rs, 6)];
Chris@10 1022 Ty = ii[WS(rs, 6)];
Chris@10 1023 Tv = W[10];
Chris@10 1024 Tx = W[11];
Chris@10 1025 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@10 1026 T2z = FNMS(Tx, Tw, Tv * Ty);
Chris@10 1027 }
Chris@10 1028 {
Chris@10 1029 E TM, TO, TL, TN;
Chris@10 1030 TM = ri[WS(rs, 16)];
Chris@10 1031 TO = ii[WS(rs, 16)];
Chris@10 1032 TL = W[30];
Chris@10 1033 TN = W[31];
Chris@10 1034 TP = FMA(TL, TM, TN * TO);
Chris@10 1035 T2D = FNMS(TN, TM, TL * TO);
Chris@10 1036 }
Chris@10 1037 {
Chris@10 1038 E TB, TD, TA, TC;
Chris@10 1039 TB = ri[WS(rs, 21)];
Chris@10 1040 TD = ii[WS(rs, 21)];
Chris@10 1041 TA = W[40];
Chris@10 1042 TC = W[41];
Chris@10 1043 TE = FMA(TA, TB, TC * TD);
Chris@10 1044 T2A = FNMS(TC, TB, TA * TD);
Chris@10 1045 }
Chris@10 1046 {
Chris@10 1047 E TH, TJ, TG, TI;
Chris@10 1048 TH = ri[WS(rs, 11)];
Chris@10 1049 TJ = ii[WS(rs, 11)];
Chris@10 1050 TG = W[20];
Chris@10 1051 TI = W[21];
Chris@10 1052 TK = FMA(TG, TH, TI * TJ);
Chris@10 1053 T2C = FNMS(TI, TH, TG * TJ);
Chris@10 1054 }
Chris@10 1055 T2B = T2z - T2A;
Chris@10 1056 T2E = T2C - T2D;
Chris@10 1057 T2P = TK - TP;
Chris@10 1058 T2O = Tz - TE;
Chris@10 1059 T2H = T2z + T2A;
Chris@10 1060 T2I = T2C + T2D;
Chris@10 1061 T2L = T2H + T2I;
Chris@10 1062 TF = Tz + TE;
Chris@10 1063 TQ = TK + TP;
Chris@10 1064 TR = TF + TQ;
Chris@10 1065 }
Chris@10 1066 TS = Tu + TR;
Chris@10 1067 T5W = T2K + T2L;
Chris@10 1068 {
Chris@10 1069 E T2F, T4L, T2y, T4K, T2w, T2x;
Chris@10 1070 T2F = FMA(KP951056516, T2B, KP587785252 * T2E);
Chris@10 1071 T4L = FNMS(KP587785252, T2B, KP951056516 * T2E);
Chris@10 1072 T2w = KP559016994 * (TF - TQ);
Chris@10 1073 T2x = FNMS(KP250000000, TR, Tu);
Chris@10 1074 T2y = T2w + T2x;
Chris@10 1075 T4K = T2x - T2w;
Chris@10 1076 T2G = T2y + T2F;
Chris@10 1077 T5s = T4K + T4L;
Chris@10 1078 T4g = T2y - T2F;
Chris@10 1079 T4M = T4K - T4L;
Chris@10 1080 }
Chris@10 1081 {
Chris@10 1082 E T2Q, T4N, T2N, T4O, T2J, T2M;
Chris@10 1083 T2Q = FMA(KP951056516, T2O, KP587785252 * T2P);
Chris@10 1084 T4N = FNMS(KP587785252, T2O, KP951056516 * T2P);
Chris@10 1085 T2J = KP559016994 * (T2H - T2I);
Chris@10 1086 T2M = FNMS(KP250000000, T2L, T2K);
Chris@10 1087 T2N = T2J + T2M;
Chris@10 1088 T4O = T2M - T2J;
Chris@10 1089 T2R = T2N - T2Q;
Chris@10 1090 T5t = T4O - T4N;
Chris@10 1091 T4h = T2Q + T2N;
Chris@10 1092 T4P = T4N + T4O;
Chris@10 1093 }
Chris@10 1094 }
Chris@10 1095 {
Chris@10 1096 E TX, T37, T2Y, T31, T3c, T3b, T34, T35, T38, T18, T1j, T1k;
Chris@10 1097 {
Chris@10 1098 E TU, TW, TT, TV;
Chris@10 1099 TU = ri[WS(rs, 4)];
Chris@10 1100 TW = ii[WS(rs, 4)];
Chris@10 1101 TT = W[6];
Chris@10 1102 TV = W[7];
Chris@10 1103 TX = FMA(TT, TU, TV * TW);
Chris@10 1104 T37 = FNMS(TV, TU, TT * TW);
Chris@10 1105 }
Chris@10 1106 {
Chris@10 1107 E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z;
Chris@10 1108 {
Chris@10 1109 E TZ, T11, TY, T10;
Chris@10 1110 TZ = ri[WS(rs, 9)];
Chris@10 1111 T11 = ii[WS(rs, 9)];
Chris@10 1112 TY = W[16];
Chris@10 1113 T10 = W[17];
Chris@10 1114 T12 = FMA(TY, TZ, T10 * T11);
Chris@10 1115 T2W = FNMS(T10, TZ, TY * T11);
Chris@10 1116 }
Chris@10 1117 {
Chris@10 1118 E T1f, T1h, T1e, T1g;
Chris@10 1119 T1f = ri[WS(rs, 19)];
Chris@10 1120 T1h = ii[WS(rs, 19)];
Chris@10 1121 T1e = W[36];
Chris@10 1122 T1g = W[37];
Chris@10 1123 T1i = FMA(T1e, T1f, T1g * T1h);
Chris@10 1124 T30 = FNMS(T1g, T1f, T1e * T1h);
Chris@10 1125 }
Chris@10 1126 {
Chris@10 1127 E T14, T16, T13, T15;
Chris@10 1128 T14 = ri[WS(rs, 24)];
Chris@10 1129 T16 = ii[WS(rs, 24)];
Chris@10 1130 T13 = W[46];
Chris@10 1131 T15 = W[47];
Chris@10 1132 T17 = FMA(T13, T14, T15 * T16);
Chris@10 1133 T2X = FNMS(T15, T14, T13 * T16);
Chris@10 1134 }
Chris@10 1135 {
Chris@10 1136 E T1a, T1c, T19, T1b;
Chris@10 1137 T1a = ri[WS(rs, 14)];
Chris@10 1138 T1c = ii[WS(rs, 14)];
Chris@10 1139 T19 = W[26];
Chris@10 1140 T1b = W[27];
Chris@10 1141 T1d = FMA(T19, T1a, T1b * T1c);
Chris@10 1142 T2Z = FNMS(T1b, T1a, T19 * T1c);
Chris@10 1143 }
Chris@10 1144 T2Y = T2W - T2X;
Chris@10 1145 T31 = T2Z - T30;
Chris@10 1146 T3c = T1d - T1i;
Chris@10 1147 T3b = T12 - T17;
Chris@10 1148 T34 = T2W + T2X;
Chris@10 1149 T35 = T2Z + T30;
Chris@10 1150 T38 = T34 + T35;
Chris@10 1151 T18 = T12 + T17;
Chris@10 1152 T1j = T1d + T1i;
Chris@10 1153 T1k = T18 + T1j;
Chris@10 1154 }
Chris@10 1155 T1l = TX + T1k;
Chris@10 1156 T5X = T37 + T38;
Chris@10 1157 {
Chris@10 1158 E T32, T4V, T2V, T4U, T2T, T2U;
Chris@10 1159 T32 = FMA(KP951056516, T2Y, KP587785252 * T31);
Chris@10 1160 T4V = FNMS(KP587785252, T2Y, KP951056516 * T31);
Chris@10 1161 T2T = KP559016994 * (T18 - T1j);
Chris@10 1162 T2U = FNMS(KP250000000, T1k, TX);
Chris@10 1163 T2V = T2T + T2U;
Chris@10 1164 T4U = T2U - T2T;
Chris@10 1165 T33 = T2V + T32;
Chris@10 1166 T5w = T4U + T4V;
Chris@10 1167 T4j = T2V - T32;
Chris@10 1168 T4W = T4U - T4V;
Chris@10 1169 }
Chris@10 1170 {
Chris@10 1171 E T3d, T4R, T3a, T4S, T36, T39;
Chris@10 1172 T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
Chris@10 1173 T4R = FNMS(KP587785252, T3b, KP951056516 * T3c);
Chris@10 1174 T36 = KP559016994 * (T34 - T35);
Chris@10 1175 T39 = FNMS(KP250000000, T38, T37);
Chris@10 1176 T3a = T36 + T39;
Chris@10 1177 T4S = T39 - T36;
Chris@10 1178 T3e = T3a - T3d;
Chris@10 1179 T5v = T4S - T4R;
Chris@10 1180 T4k = T3d + T3a;
Chris@10 1181 T4T = T4R + T4S;
Chris@10 1182 }
Chris@10 1183 }
Chris@10 1184 {
Chris@10 1185 E T1r, T3v, T3m, T3p, T3A, T3z, T3s, T3t, T3w, T1C, T1N, T1O;
Chris@10 1186 {
Chris@10 1187 E T1o, T1q, T1n, T1p;
Chris@10 1188 T1o = ri[WS(rs, 2)];
Chris@10 1189 T1q = ii[WS(rs, 2)];
Chris@10 1190 T1n = W[2];
Chris@10 1191 T1p = W[3];
Chris@10 1192 T1r = FMA(T1n, T1o, T1p * T1q);
Chris@10 1193 T3v = FNMS(T1p, T1o, T1n * T1q);
Chris@10 1194 }
Chris@10 1195 {
Chris@10 1196 E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n;
Chris@10 1197 {
Chris@10 1198 E T1t, T1v, T1s, T1u;
Chris@10 1199 T1t = ri[WS(rs, 7)];
Chris@10 1200 T1v = ii[WS(rs, 7)];
Chris@10 1201 T1s = W[12];
Chris@10 1202 T1u = W[13];
Chris@10 1203 T1w = FMA(T1s, T1t, T1u * T1v);
Chris@10 1204 T3k = FNMS(T1u, T1t, T1s * T1v);
Chris@10 1205 }
Chris@10 1206 {
Chris@10 1207 E T1J, T1L, T1I, T1K;
Chris@10 1208 T1J = ri[WS(rs, 17)];
Chris@10 1209 T1L = ii[WS(rs, 17)];
Chris@10 1210 T1I = W[32];
Chris@10 1211 T1K = W[33];
Chris@10 1212 T1M = FMA(T1I, T1J, T1K * T1L);
Chris@10 1213 T3o = FNMS(T1K, T1J, T1I * T1L);
Chris@10 1214 }
Chris@10 1215 {
Chris@10 1216 E T1y, T1A, T1x, T1z;
Chris@10 1217 T1y = ri[WS(rs, 22)];
Chris@10 1218 T1A = ii[WS(rs, 22)];
Chris@10 1219 T1x = W[42];
Chris@10 1220 T1z = W[43];
Chris@10 1221 T1B = FMA(T1x, T1y, T1z * T1A);
Chris@10 1222 T3l = FNMS(T1z, T1y, T1x * T1A);
Chris@10 1223 }
Chris@10 1224 {
Chris@10 1225 E T1E, T1G, T1D, T1F;
Chris@10 1226 T1E = ri[WS(rs, 12)];
Chris@10 1227 T1G = ii[WS(rs, 12)];
Chris@10 1228 T1D = W[22];
Chris@10 1229 T1F = W[23];
Chris@10 1230 T1H = FMA(T1D, T1E, T1F * T1G);
Chris@10 1231 T3n = FNMS(T1F, T1E, T1D * T1G);
Chris@10 1232 }
Chris@10 1233 T3m = T3k - T3l;
Chris@10 1234 T3p = T3n - T3o;
Chris@10 1235 T3A = T1H - T1M;
Chris@10 1236 T3z = T1w - T1B;
Chris@10 1237 T3s = T3k + T3l;
Chris@10 1238 T3t = T3n + T3o;
Chris@10 1239 T3w = T3s + T3t;
Chris@10 1240 T1C = T1w + T1B;
Chris@10 1241 T1N = T1H + T1M;
Chris@10 1242 T1O = T1C + T1N;
Chris@10 1243 }
Chris@10 1244 T1P = T1r + T1O;
Chris@10 1245 T5Z = T3v + T3w;
Chris@10 1246 {
Chris@10 1247 E T3q, T50, T3j, T4Z, T3h, T3i;
Chris@10 1248 T3q = FMA(KP951056516, T3m, KP587785252 * T3p);
Chris@10 1249 T50 = FNMS(KP587785252, T3m, KP951056516 * T3p);
Chris@10 1250 T3h = KP559016994 * (T1C - T1N);
Chris@10 1251 T3i = FNMS(KP250000000, T1O, T1r);
Chris@10 1252 T3j = T3h + T3i;
Chris@10 1253 T4Z = T3i - T3h;
Chris@10 1254 T3r = T3j + T3q;
Chris@10 1255 T5z = T4Z + T50;
Chris@10 1256 T4o = T3j - T3q;
Chris@10 1257 T51 = T4Z - T50;
Chris@10 1258 }
Chris@10 1259 {
Chris@10 1260 E T3B, T52, T3y, T53, T3u, T3x;
Chris@10 1261 T3B = FMA(KP951056516, T3z, KP587785252 * T3A);
Chris@10 1262 T52 = FNMS(KP587785252, T3z, KP951056516 * T3A);
Chris@10 1263 T3u = KP559016994 * (T3s - T3t);
Chris@10 1264 T3x = FNMS(KP250000000, T3w, T3v);
Chris@10 1265 T3y = T3u + T3x;
Chris@10 1266 T53 = T3x - T3u;
Chris@10 1267 T3C = T3y - T3B;
Chris@10 1268 T5A = T53 - T52;
Chris@10 1269 T4n = T3B + T3y;
Chris@10 1270 T54 = T52 + T53;
Chris@10 1271 }
Chris@10 1272 }
Chris@10 1273 {
Chris@10 1274 E T62, T64, Tp, T2k, T5T, T5U, T63, T5V;
Chris@10 1275 {
Chris@10 1276 E T5Y, T61, T1m, T2j;
Chris@10 1277 T5Y = T5W - T5X;
Chris@10 1278 T61 = T5Z - T60;
Chris@10 1279 T62 = FMA(KP951056516, T5Y, KP587785252 * T61);
Chris@10 1280 T64 = FNMS(KP587785252, T5Y, KP951056516 * T61);
Chris@10 1281 Tp = T1 + To;
Chris@10 1282 T1m = TS + T1l;
Chris@10 1283 T2j = T1P + T2i;
Chris@10 1284 T2k = T1m + T2j;
Chris@10 1285 T5T = KP559016994 * (T1m - T2j);
Chris@10 1286 T5U = FNMS(KP250000000, T2k, Tp);
Chris@10 1287 }
Chris@10 1288 ri[0] = Tp + T2k;
Chris@10 1289 T63 = T5U - T5T;
Chris@10 1290 ri[WS(rs, 10)] = T63 - T64;
Chris@10 1291 ri[WS(rs, 15)] = T63 + T64;
Chris@10 1292 T5V = T5T + T5U;
Chris@10 1293 ri[WS(rs, 20)] = T5V - T62;
Chris@10 1294 ri[WS(rs, 5)] = T5V + T62;
Chris@10 1295 }
Chris@10 1296 {
Chris@10 1297 E T6i, T6j, T6c, T67, T6d, T6e, T6k, T6f;
Chris@10 1298 {
Chris@10 1299 E T6g, T6h, T65, T66;
Chris@10 1300 T6g = TS - T1l;
Chris@10 1301 T6h = T1P - T2i;
Chris@10 1302 T6i = FMA(KP951056516, T6g, KP587785252 * T6h);
Chris@10 1303 T6j = FNMS(KP587785252, T6g, KP951056516 * T6h);
Chris@10 1304 T6c = T6a + T6b;
Chris@10 1305 T65 = T5W + T5X;
Chris@10 1306 T66 = T5Z + T60;
Chris@10 1307 T67 = T65 + T66;
Chris@10 1308 T6d = KP559016994 * (T65 - T66);
Chris@10 1309 T6e = FNMS(KP250000000, T67, T6c);
Chris@10 1310 }
Chris@10 1311 ii[0] = T67 + T6c;
Chris@10 1312 T6k = T6e - T6d;
Chris@10 1313 ii[WS(rs, 10)] = T6j + T6k;
Chris@10 1314 ii[WS(rs, 15)] = T6k - T6j;
Chris@10 1315 T6f = T6d + T6e;
Chris@10 1316 ii[WS(rs, 5)] = T6f - T6i;
Chris@10 1317 ii[WS(rs, 20)] = T6i + T6f;
Chris@10 1318 }
Chris@10 1319 {
Chris@10 1320 E T2v, T4f, T6u, T6G, T42, T6z, T43, T6y, T4A, T6H, T4D, T6F, T4u, T6L, T4v;
Chris@10 1321 E T6K, T48, T6v, T4b, T6n, T2n, T6q;
Chris@10 1322 T2n = T2l + T2m;
Chris@10 1323 T2v = T2n + T2u;
Chris@10 1324 T4f = T2n - T2u;
Chris@10 1325 T6q = T6o + T6p;
Chris@10 1326 T6u = T6q - T6t;
Chris@10 1327 T6G = T6t + T6q;
Chris@10 1328 {
Chris@10 1329 E T2S, T3f, T3g, T3D, T40, T41;
Chris@10 1330 T2S = FMA(KP968583161, T2G, KP248689887 * T2R);
Chris@10 1331 T3f = FMA(KP535826794, T33, KP844327925 * T3e);
Chris@10 1332 T3g = T2S + T3f;
Chris@10 1333 T3D = FMA(KP876306680, T3r, KP481753674 * T3C);
Chris@10 1334 T40 = FMA(KP728968627, T3O, KP684547105 * T3Z);
Chris@10 1335 T41 = T3D + T40;
Chris@10 1336 T42 = T3g + T41;
Chris@10 1337 T6z = T3D - T40;
Chris@10 1338 T43 = KP559016994 * (T3g - T41);
Chris@10 1339 T6y = T2S - T3f;
Chris@10 1340 }
Chris@10 1341 {
Chris@10 1342 E T4y, T4z, T6D, T4B, T4C, T6E;
Chris@10 1343 T4y = FNMS(KP844327925, T4g, KP535826794 * T4h);
Chris@10 1344 T4z = FNMS(KP637423989, T4k, KP770513242 * T4j);
Chris@10 1345 T6D = T4y + T4z;
Chris@10 1346 T4B = FMA(KP125333233, T4r, KP992114701 * T4q);
Chris@10 1347 T4C = FMA(KP904827052, T4o, KP425779291 * T4n);
Chris@10 1348 T6E = T4C + T4B;
Chris@10 1349 T4A = T4y - T4z;
Chris@10 1350 T6H = KP559016994 * (T6D + T6E);
Chris@10 1351 T4D = T4B - T4C;
Chris@10 1352 T6F = T6D - T6E;
Chris@10 1353 }
Chris@10 1354 {
Chris@10 1355 E T4i, T4l, T4m, T4p, T4s, T4t;
Chris@10 1356 T4i = FMA(KP535826794, T4g, KP844327925 * T4h);
Chris@10 1357 T4l = FMA(KP637423989, T4j, KP770513242 * T4k);
Chris@10 1358 T4m = T4i - T4l;
Chris@10 1359 T4p = FNMS(KP425779291, T4o, KP904827052 * T4n);
Chris@10 1360 T4s = FNMS(KP992114701, T4r, KP125333233 * T4q);
Chris@10 1361 T4t = T4p + T4s;
Chris@10 1362 T4u = T4m + T4t;
Chris@10 1363 T6L = T4p - T4s;
Chris@10 1364 T4v = KP559016994 * (T4m - T4t);
Chris@10 1365 T6K = T4i + T4l;
Chris@10 1366 }
Chris@10 1367 {
Chris@10 1368 E T46, T47, T6l, T49, T4a, T6m;
Chris@10 1369 T46 = FNMS(KP248689887, T2G, KP968583161 * T2R);
Chris@10 1370 T47 = FNMS(KP844327925, T33, KP535826794 * T3e);
Chris@10 1371 T6l = T46 + T47;
Chris@10 1372 T49 = FNMS(KP481753674, T3r, KP876306680 * T3C);
Chris@10 1373 T4a = FNMS(KP684547105, T3O, KP728968627 * T3Z);
Chris@10 1374 T6m = T49 + T4a;
Chris@10 1375 T48 = T46 - T47;
Chris@10 1376 T6v = KP559016994 * (T6l - T6m);
Chris@10 1377 T4b = T49 - T4a;
Chris@10 1378 T6n = T6l + T6m;
Chris@10 1379 }
Chris@10 1380 ri[WS(rs, 1)] = T2v + T42;
Chris@10 1381 ii[WS(rs, 1)] = T6n + T6u;
Chris@10 1382 ri[WS(rs, 4)] = T4f + T4u;
Chris@10 1383 ii[WS(rs, 4)] = T6F + T6G;
Chris@10 1384 {
Chris@10 1385 E T4c, T4e, T45, T4d, T44;
Chris@10 1386 T4c = FMA(KP951056516, T48, KP587785252 * T4b);
Chris@10 1387 T4e = FNMS(KP587785252, T48, KP951056516 * T4b);
Chris@10 1388 T44 = FNMS(KP250000000, T42, T2v);
Chris@10 1389 T45 = T43 + T44;
Chris@10 1390 T4d = T44 - T43;
Chris@10 1391 ri[WS(rs, 21)] = T45 - T4c;
Chris@10 1392 ri[WS(rs, 16)] = T4d + T4e;
Chris@10 1393 ri[WS(rs, 6)] = T45 + T4c;
Chris@10 1394 ri[WS(rs, 11)] = T4d - T4e;
Chris@10 1395 }
Chris@10 1396 {
Chris@10 1397 E T6A, T6B, T6x, T6C, T6w;
Chris@10 1398 T6A = FMA(KP951056516, T6y, KP587785252 * T6z);
Chris@10 1399 T6B = FNMS(KP587785252, T6y, KP951056516 * T6z);
Chris@10 1400 T6w = FNMS(KP250000000, T6n, T6u);
Chris@10 1401 T6x = T6v + T6w;
Chris@10 1402 T6C = T6w - T6v;
Chris@10 1403 ii[WS(rs, 6)] = T6x - T6A;
Chris@10 1404 ii[WS(rs, 16)] = T6C - T6B;
Chris@10 1405 ii[WS(rs, 21)] = T6A + T6x;
Chris@10 1406 ii[WS(rs, 11)] = T6B + T6C;
Chris@10 1407 }
Chris@10 1408 {
Chris@10 1409 E T4E, T4G, T4x, T4F, T4w;
Chris@10 1410 T4E = FMA(KP951056516, T4A, KP587785252 * T4D);
Chris@10 1411 T4G = FNMS(KP587785252, T4A, KP951056516 * T4D);
Chris@10 1412 T4w = FNMS(KP250000000, T4u, T4f);
Chris@10 1413 T4x = T4v + T4w;
Chris@10 1414 T4F = T4w - T4v;
Chris@10 1415 ri[WS(rs, 24)] = T4x - T4E;
Chris@10 1416 ri[WS(rs, 19)] = T4F + T4G;
Chris@10 1417 ri[WS(rs, 9)] = T4x + T4E;
Chris@10 1418 ri[WS(rs, 14)] = T4F - T4G;
Chris@10 1419 }
Chris@10 1420 {
Chris@10 1421 E T6M, T6N, T6J, T6O, T6I;
Chris@10 1422 T6M = FMA(KP951056516, T6K, KP587785252 * T6L);
Chris@10 1423 T6N = FNMS(KP587785252, T6K, KP951056516 * T6L);
Chris@10 1424 T6I = FNMS(KP250000000, T6F, T6G);
Chris@10 1425 T6J = T6H + T6I;
Chris@10 1426 T6O = T6I - T6H;
Chris@10 1427 ii[WS(rs, 9)] = T6J - T6M;
Chris@10 1428 ii[WS(rs, 19)] = T6O - T6N;
Chris@10 1429 ii[WS(rs, 24)] = T6M + T6J;
Chris@10 1430 ii[WS(rs, 14)] = T6N + T6O;
Chris@10 1431 }
Chris@10 1432 }
Chris@10 1433 {
Chris@10 1434 E T4J, T5r, T6U, T76, T5e, T6Z, T5f, T6Y, T5M, T77, T5P, T75, T5G, T7b, T5H;
Chris@10 1435 E T7a, T5k, T6V, T5n, T6R, T4H, T6T;
Chris@10 1436 T4H = T2m - T2l;
Chris@10 1437 T4J = T4H - T4I;
Chris@10 1438 T5r = T4H + T4I;
Chris@10 1439 T6T = T6p - T6o;
Chris@10 1440 T6U = T6S + T6T;
Chris@10 1441 T76 = T6T - T6S;
Chris@10 1442 {
Chris@10 1443 E T4Q, T4X, T4Y, T55, T5c, T5d;
Chris@10 1444 T4Q = FMA(KP876306680, T4M, KP481753674 * T4P);
Chris@10 1445 T4X = FNMS(KP425779291, T4W, KP904827052 * T4T);
Chris@10 1446 T4Y = T4Q + T4X;
Chris@10 1447 T55 = FMA(KP535826794, T51, KP844327925 * T54);
Chris@10 1448 T5c = FMA(KP062790519, T58, KP998026728 * T5b);
Chris@10 1449 T5d = T55 + T5c;
Chris@10 1450 T5e = T4Y + T5d;
Chris@10 1451 T6Z = T55 - T5c;
Chris@10 1452 T5f = KP559016994 * (T4Y - T5d);
Chris@10 1453 T6Y = T4Q - T4X;
Chris@10 1454 }
Chris@10 1455 {
Chris@10 1456 E T5K, T5L, T73, T5N, T5O, T74;
Chris@10 1457 T5K = FNMS(KP684547105, T5s, KP728968627 * T5t);
Chris@10 1458 T5L = FMA(KP125333233, T5w, KP992114701 * T5v);
Chris@10 1459 T73 = T5K - T5L;
Chris@10 1460 T5N = FNMS(KP998026728, T5z, KP062790519 * T5A);
Chris@10 1461 T5O = FMA(KP770513242, T5D, KP637423989 * T5C);
Chris@10 1462 T74 = T5N - T5O;
Chris@10 1463 T5M = T5K + T5L;
Chris@10 1464 T77 = KP559016994 * (T73 - T74);
Chris@10 1465 T5P = T5N + T5O;
Chris@10 1466 T75 = T73 + T74;
Chris@10 1467 }
Chris@10 1468 {
Chris@10 1469 E T5u, T5x, T5y, T5B, T5E, T5F;
Chris@10 1470 T5u = FMA(KP728968627, T5s, KP684547105 * T5t);
Chris@10 1471 T5x = FNMS(KP992114701, T5w, KP125333233 * T5v);
Chris@10 1472 T5y = T5u + T5x;
Chris@10 1473 T5B = FMA(KP062790519, T5z, KP998026728 * T5A);
Chris@10 1474 T5E = FNMS(KP637423989, T5D, KP770513242 * T5C);
Chris@10 1475 T5F = T5B + T5E;
Chris@10 1476 T5G = T5y + T5F;
Chris@10 1477 T7b = T5B - T5E;
Chris@10 1478 T5H = KP559016994 * (T5y - T5F);
Chris@10 1479 T7a = T5u - T5x;
Chris@10 1480 }
Chris@10 1481 {
Chris@10 1482 E T5i, T5j, T6P, T5l, T5m, T6Q;
Chris@10 1483 T5i = FNMS(KP481753674, T4M, KP876306680 * T4P);
Chris@10 1484 T5j = FMA(KP904827052, T4W, KP425779291 * T4T);
Chris@10 1485 T6P = T5i - T5j;
Chris@10 1486 T5l = FNMS(KP844327925, T51, KP535826794 * T54);
Chris@10 1487 T5m = FNMS(KP998026728, T58, KP062790519 * T5b);
Chris@10 1488 T6Q = T5l + T5m;
Chris@10 1489 T5k = T5i + T5j;
Chris@10 1490 T6V = KP559016994 * (T6P - T6Q);
Chris@10 1491 T5n = T5l - T5m;
Chris@10 1492 T6R = T6P + T6Q;
Chris@10 1493 }
Chris@10 1494 ri[WS(rs, 2)] = T4J + T5e;
Chris@10 1495 ii[WS(rs, 2)] = T6R + T6U;
Chris@10 1496 ri[WS(rs, 3)] = T5r + T5G;
Chris@10 1497 ii[WS(rs, 3)] = T75 + T76;
Chris@10 1498 {
Chris@10 1499 E T5o, T5q, T5h, T5p, T5g;
Chris@10 1500 T5o = FMA(KP951056516, T5k, KP587785252 * T5n);
Chris@10 1501 T5q = FNMS(KP587785252, T5k, KP951056516 * T5n);
Chris@10 1502 T5g = FNMS(KP250000000, T5e, T4J);
Chris@10 1503 T5h = T5f + T5g;
Chris@10 1504 T5p = T5g - T5f;
Chris@10 1505 ri[WS(rs, 22)] = T5h - T5o;
Chris@10 1506 ri[WS(rs, 17)] = T5p + T5q;
Chris@10 1507 ri[WS(rs, 7)] = T5h + T5o;
Chris@10 1508 ri[WS(rs, 12)] = T5p - T5q;
Chris@10 1509 }
Chris@10 1510 {
Chris@10 1511 E T70, T71, T6X, T72, T6W;
Chris@10 1512 T70 = FMA(KP951056516, T6Y, KP587785252 * T6Z);
Chris@10 1513 T71 = FNMS(KP587785252, T6Y, KP951056516 * T6Z);
Chris@10 1514 T6W = FNMS(KP250000000, T6R, T6U);
Chris@10 1515 T6X = T6V + T6W;
Chris@10 1516 T72 = T6W - T6V;
Chris@10 1517 ii[WS(rs, 7)] = T6X - T70;
Chris@10 1518 ii[WS(rs, 17)] = T72 - T71;
Chris@10 1519 ii[WS(rs, 22)] = T70 + T6X;
Chris@10 1520 ii[WS(rs, 12)] = T71 + T72;
Chris@10 1521 }
Chris@10 1522 {
Chris@10 1523 E T5Q, T5S, T5J, T5R, T5I;
Chris@10 1524 T5Q = FMA(KP951056516, T5M, KP587785252 * T5P);
Chris@10 1525 T5S = FNMS(KP587785252, T5M, KP951056516 * T5P);
Chris@10 1526 T5I = FNMS(KP250000000, T5G, T5r);
Chris@10 1527 T5J = T5H + T5I;
Chris@10 1528 T5R = T5I - T5H;
Chris@10 1529 ri[WS(rs, 23)] = T5J - T5Q;
Chris@10 1530 ri[WS(rs, 18)] = T5R + T5S;
Chris@10 1531 ri[WS(rs, 8)] = T5J + T5Q;
Chris@10 1532 ri[WS(rs, 13)] = T5R - T5S;
Chris@10 1533 }
Chris@10 1534 {
Chris@10 1535 E T7c, T7d, T79, T7e, T78;
Chris@10 1536 T7c = FMA(KP951056516, T7a, KP587785252 * T7b);
Chris@10 1537 T7d = FNMS(KP587785252, T7a, KP951056516 * T7b);
Chris@10 1538 T78 = FNMS(KP250000000, T75, T76);
Chris@10 1539 T79 = T77 + T78;
Chris@10 1540 T7e = T78 - T77;
Chris@10 1541 ii[WS(rs, 8)] = T79 - T7c;
Chris@10 1542 ii[WS(rs, 18)] = T7e - T7d;
Chris@10 1543 ii[WS(rs, 23)] = T7c + T79;
Chris@10 1544 ii[WS(rs, 13)] = T7d + T7e;
Chris@10 1545 }
Chris@10 1546 }
Chris@10 1547 }
Chris@10 1548 }
Chris@10 1549 }
Chris@10 1550
Chris@10 1551 static const tw_instr twinstr[] = {
Chris@10 1552 {TW_FULL, 0, 25},
Chris@10 1553 {TW_NEXT, 1, 0}
Chris@10 1554 };
Chris@10 1555
Chris@10 1556 static const ct_desc desc = { 25, "t1_25", twinstr, &GENUS, {260, 140, 140, 0}, 0, 0, 0 };
Chris@10 1557
Chris@10 1558 void X(codelet_t1_25) (planner *p) {
Chris@10 1559 X(kdft_dit_register) (p, t1_25, &desc);
Chris@10 1560 }
Chris@10 1561 #endif /* HAVE_FMA */