annotate src/fftw-3.3.8/dft/scalar/codelets/t1_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:16 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 25 -name t1_25 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 400 FP additions, 364 FP multiplications,
Chris@82 32 * (or, 84 additions, 48 multiplications, 316 fused multiply/add),
Chris@82 33 * 138 stack variables, 47 constants, and 100 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t1_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 40 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 41 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 42 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 43 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@82 44 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@82 45 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@82 46 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@82 47 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 48 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 49 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 50 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@82 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@82 52 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 53 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 54 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@82 55 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@82 56 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 57 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@82 58 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@82 59 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@82 60 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@82 61 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@82 62 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@82 63 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@82 64 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 65 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 66 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 67 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 68 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 69 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@82 70 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@82 71 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 72 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@82 73 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@82 74 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@82 75 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 76 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 77 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@82 78 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@82 79 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@82 80 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@82 81 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@82 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 86 {
Chris@82 87 INT m;
Chris@82 88 for (m = mb, W = W + (mb * 48); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 89 E T1, T6X, T3Y, T5G, T7c, T7C, Ts, T3L, T3M, T6W, T77, T78, T4P, T5T, T4W;
Chris@82 90 E T5Q, T2G, T5S, T4M, T3G, T5P, T4T, T45, T65, T4c, T68, T11, T64, T42, T2Z;
Chris@82 91 E T67, T49, T4k, T61, T4r, T5Y, T1z, T5X, T4o, T3d, T60, T4h, T4A, T5M, T4H;
Chris@82 92 E T5J, T28, T5L, T4x, T3s, T5I, T4E;
Chris@82 93 {
Chris@82 94 E T7, T3P, Tq, T3W, Tk, T3U, Td, T3R;
Chris@82 95 T1 = ri[0];
Chris@82 96 T6X = ii[0];
Chris@82 97 {
Chris@82 98 E T3, T6, T4, T3O, T2, T5;
Chris@82 99 T3 = ri[WS(rs, 5)];
Chris@82 100 T6 = ii[WS(rs, 5)];
Chris@82 101 T2 = W[8];
Chris@82 102 T4 = T2 * T3;
Chris@82 103 T3O = T2 * T6;
Chris@82 104 T5 = W[9];
Chris@82 105 T7 = FMA(T5, T6, T4);
Chris@82 106 T3P = FNMS(T5, T3, T3O);
Chris@82 107 }
Chris@82 108 {
Chris@82 109 E Tm, Tp, Tn, T3V, Tl, To;
Chris@82 110 Tm = ri[WS(rs, 15)];
Chris@82 111 Tp = ii[WS(rs, 15)];
Chris@82 112 Tl = W[28];
Chris@82 113 Tn = Tl * Tm;
Chris@82 114 T3V = Tl * Tp;
Chris@82 115 To = W[29];
Chris@82 116 Tq = FMA(To, Tp, Tn);
Chris@82 117 T3W = FNMS(To, Tm, T3V);
Chris@82 118 }
Chris@82 119 {
Chris@82 120 E Tg, Tj, Th, T3T, Tf, Ti;
Chris@82 121 Tg = ri[WS(rs, 10)];
Chris@82 122 Tj = ii[WS(rs, 10)];
Chris@82 123 Tf = W[18];
Chris@82 124 Th = Tf * Tg;
Chris@82 125 T3T = Tf * Tj;
Chris@82 126 Ti = W[19];
Chris@82 127 Tk = FMA(Ti, Tj, Th);
Chris@82 128 T3U = FNMS(Ti, Tg, T3T);
Chris@82 129 }
Chris@82 130 {
Chris@82 131 E T9, Tc, Ta, T3Q, T8, Tb;
Chris@82 132 T9 = ri[WS(rs, 20)];
Chris@82 133 Tc = ii[WS(rs, 20)];
Chris@82 134 T8 = W[38];
Chris@82 135 Ta = T8 * T9;
Chris@82 136 T3Q = T8 * Tc;
Chris@82 137 Tb = W[39];
Chris@82 138 Td = FMA(Tb, Tc, Ta);
Chris@82 139 T3R = FNMS(Tb, T9, T3Q);
Chris@82 140 }
Chris@82 141 {
Chris@82 142 E T3S, T3X, T7a, T7b;
Chris@82 143 T3S = T3P - T3R;
Chris@82 144 T3X = T3U - T3W;
Chris@82 145 T3Y = FMA(KP618033988, T3X, T3S);
Chris@82 146 T5G = FNMS(KP618033988, T3S, T3X);
Chris@82 147 T7a = T7 - Td;
Chris@82 148 T7b = Tk - Tq;
Chris@82 149 T7c = FMA(KP618033988, T7b, T7a);
Chris@82 150 T7C = FNMS(KP618033988, T7a, T7b);
Chris@82 151 }
Chris@82 152 {
Chris@82 153 E Te, Tr, T6U, T6V;
Chris@82 154 Te = T7 + Td;
Chris@82 155 Tr = Tk + Tq;
Chris@82 156 Ts = Te + Tr;
Chris@82 157 T3L = FNMS(KP250000000, Ts, T1);
Chris@82 158 T3M = Te - Tr;
Chris@82 159 T6U = T3P + T3R;
Chris@82 160 T6V = T3U + T3W;
Chris@82 161 T6W = T6U + T6V;
Chris@82 162 T77 = FNMS(KP250000000, T6W, T6X);
Chris@82 163 T78 = T6U - T6V;
Chris@82 164 }
Chris@82 165 }
Chris@82 166 {
Chris@82 167 E T2e, T3u, T2x, T3B, T2D, T3D, T2E, T3E, T2k, T3w, T2q, T3y, T2r, T3z;
Chris@82 168 {
Chris@82 169 E T2a, T2d, T2b, T3t, T29, T2c;
Chris@82 170 T2a = ri[WS(rs, 3)];
Chris@82 171 T2d = ii[WS(rs, 3)];
Chris@82 172 T29 = W[4];
Chris@82 173 T2b = T29 * T2a;
Chris@82 174 T3t = T29 * T2d;
Chris@82 175 T2c = W[5];
Chris@82 176 T2e = FMA(T2c, T2d, T2b);
Chris@82 177 T3u = FNMS(T2c, T2a, T3t);
Chris@82 178 }
Chris@82 179 {
Chris@82 180 E T2t, T2w, T2u, T3A, T2z, T2C, T2A, T3C, T2s, T2y, T2v, T2B;
Chris@82 181 T2t = ri[WS(rs, 13)];
Chris@82 182 T2w = ii[WS(rs, 13)];
Chris@82 183 T2s = W[24];
Chris@82 184 T2u = T2s * T2t;
Chris@82 185 T3A = T2s * T2w;
Chris@82 186 T2z = ri[WS(rs, 18)];
Chris@82 187 T2C = ii[WS(rs, 18)];
Chris@82 188 T2y = W[34];
Chris@82 189 T2A = T2y * T2z;
Chris@82 190 T3C = T2y * T2C;
Chris@82 191 T2v = W[25];
Chris@82 192 T2x = FMA(T2v, T2w, T2u);
Chris@82 193 T3B = FNMS(T2v, T2t, T3A);
Chris@82 194 T2B = W[35];
Chris@82 195 T2D = FMA(T2B, T2C, T2A);
Chris@82 196 T3D = FNMS(T2B, T2z, T3C);
Chris@82 197 T2E = T2x + T2D;
Chris@82 198 T3E = T3B + T3D;
Chris@82 199 }
Chris@82 200 {
Chris@82 201 E T2g, T2j, T2h, T3v, T2m, T2p, T2n, T3x, T2f, T2l, T2i, T2o;
Chris@82 202 T2g = ri[WS(rs, 8)];
Chris@82 203 T2j = ii[WS(rs, 8)];
Chris@82 204 T2f = W[14];
Chris@82 205 T2h = T2f * T2g;
Chris@82 206 T3v = T2f * T2j;
Chris@82 207 T2m = ri[WS(rs, 23)];
Chris@82 208 T2p = ii[WS(rs, 23)];
Chris@82 209 T2l = W[44];
Chris@82 210 T2n = T2l * T2m;
Chris@82 211 T3x = T2l * T2p;
Chris@82 212 T2i = W[15];
Chris@82 213 T2k = FMA(T2i, T2j, T2h);
Chris@82 214 T3w = FNMS(T2i, T2g, T3v);
Chris@82 215 T2o = W[45];
Chris@82 216 T2q = FMA(T2o, T2p, T2n);
Chris@82 217 T3y = FNMS(T2o, T2m, T3x);
Chris@82 218 T2r = T2k + T2q;
Chris@82 219 T3z = T3w + T3y;
Chris@82 220 }
Chris@82 221 {
Chris@82 222 E T4N, T4O, T4U, T4V;
Chris@82 223 T4N = T3y - T3w;
Chris@82 224 T4O = T3D - T3B;
Chris@82 225 T4P = FMA(KP618033988, T4O, T4N);
Chris@82 226 T5T = FNMS(KP618033988, T4N, T4O);
Chris@82 227 T4U = T2k - T2q;
Chris@82 228 T4V = T2x - T2D;
Chris@82 229 T4W = FMA(KP618033988, T4V, T4U);
Chris@82 230 T5Q = FNMS(KP618033988, T4U, T4V);
Chris@82 231 }
Chris@82 232 {
Chris@82 233 E T4L, T2F, T4K, T4S, T3F, T4R;
Chris@82 234 T4L = T2E - T2r;
Chris@82 235 T2F = T2r + T2E;
Chris@82 236 T4K = FNMS(KP250000000, T2F, T2e);
Chris@82 237 T2G = T2e + T2F;
Chris@82 238 T5S = FMA(KP559016994, T4L, T4K);
Chris@82 239 T4M = FNMS(KP559016994, T4L, T4K);
Chris@82 240 T4S = T3E - T3z;
Chris@82 241 T3F = T3z + T3E;
Chris@82 242 T4R = FNMS(KP250000000, T3F, T3u);
Chris@82 243 T3G = T3u + T3F;
Chris@82 244 T5P = FMA(KP559016994, T4S, T4R);
Chris@82 245 T4T = FNMS(KP559016994, T4S, T4R);
Chris@82 246 }
Chris@82 247 }
Chris@82 248 {
Chris@82 249 E Tz, T2N, TS, T2U, TY, T2W, TZ, T2X, TF, T2P, TL, T2R, TM, T2S;
Chris@82 250 {
Chris@82 251 E Tv, Ty, Tw, T2M, Tu, Tx;
Chris@82 252 Tv = ri[WS(rs, 1)];
Chris@82 253 Ty = ii[WS(rs, 1)];
Chris@82 254 Tu = W[0];
Chris@82 255 Tw = Tu * Tv;
Chris@82 256 T2M = Tu * Ty;
Chris@82 257 Tx = W[1];
Chris@82 258 Tz = FMA(Tx, Ty, Tw);
Chris@82 259 T2N = FNMS(Tx, Tv, T2M);
Chris@82 260 }
Chris@82 261 {
Chris@82 262 E TO, TR, TP, T2T, TU, TX, TV, T2V, TN, TT, TQ, TW;
Chris@82 263 TO = ri[WS(rs, 11)];
Chris@82 264 TR = ii[WS(rs, 11)];
Chris@82 265 TN = W[20];
Chris@82 266 TP = TN * TO;
Chris@82 267 T2T = TN * TR;
Chris@82 268 TU = ri[WS(rs, 16)];
Chris@82 269 TX = ii[WS(rs, 16)];
Chris@82 270 TT = W[30];
Chris@82 271 TV = TT * TU;
Chris@82 272 T2V = TT * TX;
Chris@82 273 TQ = W[21];
Chris@82 274 TS = FMA(TQ, TR, TP);
Chris@82 275 T2U = FNMS(TQ, TO, T2T);
Chris@82 276 TW = W[31];
Chris@82 277 TY = FMA(TW, TX, TV);
Chris@82 278 T2W = FNMS(TW, TU, T2V);
Chris@82 279 TZ = TS + TY;
Chris@82 280 T2X = T2U + T2W;
Chris@82 281 }
Chris@82 282 {
Chris@82 283 E TB, TE, TC, T2O, TH, TK, TI, T2Q, TA, TG, TD, TJ;
Chris@82 284 TB = ri[WS(rs, 6)];
Chris@82 285 TE = ii[WS(rs, 6)];
Chris@82 286 TA = W[10];
Chris@82 287 TC = TA * TB;
Chris@82 288 T2O = TA * TE;
Chris@82 289 TH = ri[WS(rs, 21)];
Chris@82 290 TK = ii[WS(rs, 21)];
Chris@82 291 TG = W[40];
Chris@82 292 TI = TG * TH;
Chris@82 293 T2Q = TG * TK;
Chris@82 294 TD = W[11];
Chris@82 295 TF = FMA(TD, TE, TC);
Chris@82 296 T2P = FNMS(TD, TB, T2O);
Chris@82 297 TJ = W[41];
Chris@82 298 TL = FMA(TJ, TK, TI);
Chris@82 299 T2R = FNMS(TJ, TH, T2Q);
Chris@82 300 TM = TF + TL;
Chris@82 301 T2S = T2P + T2R;
Chris@82 302 }
Chris@82 303 {
Chris@82 304 E T43, T44, T4a, T4b;
Chris@82 305 T43 = T2P - T2R;
Chris@82 306 T44 = T2W - T2U;
Chris@82 307 T45 = FNMS(KP618033988, T44, T43);
Chris@82 308 T65 = FMA(KP618033988, T43, T44);
Chris@82 309 T4a = TL - TF;
Chris@82 310 T4b = TY - TS;
Chris@82 311 T4c = FMA(KP618033988, T4b, T4a);
Chris@82 312 T68 = FNMS(KP618033988, T4a, T4b);
Chris@82 313 }
Chris@82 314 {
Chris@82 315 E T41, T10, T40, T48, T2Y, T47;
Chris@82 316 T41 = TM - TZ;
Chris@82 317 T10 = TM + TZ;
Chris@82 318 T40 = FNMS(KP250000000, T10, Tz);
Chris@82 319 T11 = Tz + T10;
Chris@82 320 T64 = FNMS(KP559016994, T41, T40);
Chris@82 321 T42 = FMA(KP559016994, T41, T40);
Chris@82 322 T48 = T2S - T2X;
Chris@82 323 T2Y = T2S + T2X;
Chris@82 324 T47 = FNMS(KP250000000, T2Y, T2N);
Chris@82 325 T2Z = T2N + T2Y;
Chris@82 326 T67 = FNMS(KP559016994, T48, T47);
Chris@82 327 T49 = FMA(KP559016994, T48, T47);
Chris@82 328 }
Chris@82 329 }
Chris@82 330 {
Chris@82 331 E T17, T31, T1q, T38, T1w, T3a, T1x, T3b, T1d, T33, T1j, T35, T1k, T36;
Chris@82 332 {
Chris@82 333 E T13, T16, T14, T30, T12, T15;
Chris@82 334 T13 = ri[WS(rs, 4)];
Chris@82 335 T16 = ii[WS(rs, 4)];
Chris@82 336 T12 = W[6];
Chris@82 337 T14 = T12 * T13;
Chris@82 338 T30 = T12 * T16;
Chris@82 339 T15 = W[7];
Chris@82 340 T17 = FMA(T15, T16, T14);
Chris@82 341 T31 = FNMS(T15, T13, T30);
Chris@82 342 }
Chris@82 343 {
Chris@82 344 E T1m, T1p, T1n, T37, T1s, T1v, T1t, T39, T1l, T1r, T1o, T1u;
Chris@82 345 T1m = ri[WS(rs, 14)];
Chris@82 346 T1p = ii[WS(rs, 14)];
Chris@82 347 T1l = W[26];
Chris@82 348 T1n = T1l * T1m;
Chris@82 349 T37 = T1l * T1p;
Chris@82 350 T1s = ri[WS(rs, 19)];
Chris@82 351 T1v = ii[WS(rs, 19)];
Chris@82 352 T1r = W[36];
Chris@82 353 T1t = T1r * T1s;
Chris@82 354 T39 = T1r * T1v;
Chris@82 355 T1o = W[27];
Chris@82 356 T1q = FMA(T1o, T1p, T1n);
Chris@82 357 T38 = FNMS(T1o, T1m, T37);
Chris@82 358 T1u = W[37];
Chris@82 359 T1w = FMA(T1u, T1v, T1t);
Chris@82 360 T3a = FNMS(T1u, T1s, T39);
Chris@82 361 T1x = T1q + T1w;
Chris@82 362 T3b = T38 + T3a;
Chris@82 363 }
Chris@82 364 {
Chris@82 365 E T19, T1c, T1a, T32, T1f, T1i, T1g, T34, T18, T1e, T1b, T1h;
Chris@82 366 T19 = ri[WS(rs, 9)];
Chris@82 367 T1c = ii[WS(rs, 9)];
Chris@82 368 T18 = W[16];
Chris@82 369 T1a = T18 * T19;
Chris@82 370 T32 = T18 * T1c;
Chris@82 371 T1f = ri[WS(rs, 24)];
Chris@82 372 T1i = ii[WS(rs, 24)];
Chris@82 373 T1e = W[46];
Chris@82 374 T1g = T1e * T1f;
Chris@82 375 T34 = T1e * T1i;
Chris@82 376 T1b = W[17];
Chris@82 377 T1d = FMA(T1b, T1c, T1a);
Chris@82 378 T33 = FNMS(T1b, T19, T32);
Chris@82 379 T1h = W[47];
Chris@82 380 T1j = FMA(T1h, T1i, T1g);
Chris@82 381 T35 = FNMS(T1h, T1f, T34);
Chris@82 382 T1k = T1d + T1j;
Chris@82 383 T36 = T33 + T35;
Chris@82 384 }
Chris@82 385 {
Chris@82 386 E T4i, T4j, T4p, T4q;
Chris@82 387 T4i = T1j - T1d;
Chris@82 388 T4j = T1w - T1q;
Chris@82 389 T4k = FMA(KP618033988, T4j, T4i);
Chris@82 390 T61 = FNMS(KP618033988, T4i, T4j);
Chris@82 391 T4p = T35 - T33;
Chris@82 392 T4q = T3a - T38;
Chris@82 393 T4r = FMA(KP618033988, T4q, T4p);
Chris@82 394 T5Y = FNMS(KP618033988, T4p, T4q);
Chris@82 395 }
Chris@82 396 {
Chris@82 397 E T4n, T1y, T4m, T4g, T3c, T4f;
Chris@82 398 T4n = T1k - T1x;
Chris@82 399 T1y = T1k + T1x;
Chris@82 400 T4m = FNMS(KP250000000, T1y, T17);
Chris@82 401 T1z = T17 + T1y;
Chris@82 402 T5X = FNMS(KP559016994, T4n, T4m);
Chris@82 403 T4o = FMA(KP559016994, T4n, T4m);
Chris@82 404 T4g = T3b - T36;
Chris@82 405 T3c = T36 + T3b;
Chris@82 406 T4f = FNMS(KP250000000, T3c, T31);
Chris@82 407 T3d = T31 + T3c;
Chris@82 408 T60 = FMA(KP559016994, T4g, T4f);
Chris@82 409 T4h = FNMS(KP559016994, T4g, T4f);
Chris@82 410 }
Chris@82 411 }
Chris@82 412 {
Chris@82 413 E T1G, T3g, T1Z, T3n, T25, T3p, T26, T3q, T1M, T3i, T1S, T3k, T1T, T3l;
Chris@82 414 {
Chris@82 415 E T1C, T1F, T1D, T3f, T1B, T1E;
Chris@82 416 T1C = ri[WS(rs, 2)];
Chris@82 417 T1F = ii[WS(rs, 2)];
Chris@82 418 T1B = W[2];
Chris@82 419 T1D = T1B * T1C;
Chris@82 420 T3f = T1B * T1F;
Chris@82 421 T1E = W[3];
Chris@82 422 T1G = FMA(T1E, T1F, T1D);
Chris@82 423 T3g = FNMS(T1E, T1C, T3f);
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T1V, T1Y, T1W, T3m, T21, T24, T22, T3o, T1U, T20, T1X, T23;
Chris@82 427 T1V = ri[WS(rs, 12)];
Chris@82 428 T1Y = ii[WS(rs, 12)];
Chris@82 429 T1U = W[22];
Chris@82 430 T1W = T1U * T1V;
Chris@82 431 T3m = T1U * T1Y;
Chris@82 432 T21 = ri[WS(rs, 17)];
Chris@82 433 T24 = ii[WS(rs, 17)];
Chris@82 434 T20 = W[32];
Chris@82 435 T22 = T20 * T21;
Chris@82 436 T3o = T20 * T24;
Chris@82 437 T1X = W[23];
Chris@82 438 T1Z = FMA(T1X, T1Y, T1W);
Chris@82 439 T3n = FNMS(T1X, T1V, T3m);
Chris@82 440 T23 = W[33];
Chris@82 441 T25 = FMA(T23, T24, T22);
Chris@82 442 T3p = FNMS(T23, T21, T3o);
Chris@82 443 T26 = T1Z + T25;
Chris@82 444 T3q = T3n + T3p;
Chris@82 445 }
Chris@82 446 {
Chris@82 447 E T1I, T1L, T1J, T3h, T1O, T1R, T1P, T3j, T1H, T1N, T1K, T1Q;
Chris@82 448 T1I = ri[WS(rs, 7)];
Chris@82 449 T1L = ii[WS(rs, 7)];
Chris@82 450 T1H = W[12];
Chris@82 451 T1J = T1H * T1I;
Chris@82 452 T3h = T1H * T1L;
Chris@82 453 T1O = ri[WS(rs, 22)];
Chris@82 454 T1R = ii[WS(rs, 22)];
Chris@82 455 T1N = W[42];
Chris@82 456 T1P = T1N * T1O;
Chris@82 457 T3j = T1N * T1R;
Chris@82 458 T1K = W[13];
Chris@82 459 T1M = FMA(T1K, T1L, T1J);
Chris@82 460 T3i = FNMS(T1K, T1I, T3h);
Chris@82 461 T1Q = W[43];
Chris@82 462 T1S = FMA(T1Q, T1R, T1P);
Chris@82 463 T3k = FNMS(T1Q, T1O, T3j);
Chris@82 464 T1T = T1M + T1S;
Chris@82 465 T3l = T3i + T3k;
Chris@82 466 }
Chris@82 467 {
Chris@82 468 E T4y, T4z, T4F, T4G;
Chris@82 469 T4y = T3k - T3i;
Chris@82 470 T4z = T3n - T3p;
Chris@82 471 T4A = FNMS(KP618033988, T4z, T4y);
Chris@82 472 T5M = FMA(KP618033988, T4y, T4z);
Chris@82 473 T4F = T1S - T1M;
Chris@82 474 T4G = T25 - T1Z;
Chris@82 475 T4H = FMA(KP618033988, T4G, T4F);
Chris@82 476 T5J = FNMS(KP618033988, T4F, T4G);
Chris@82 477 }
Chris@82 478 {
Chris@82 479 E T4w, T27, T4v, T4D, T3r, T4C;
Chris@82 480 T4w = T26 - T1T;
Chris@82 481 T27 = T1T + T26;
Chris@82 482 T4v = FNMS(KP250000000, T27, T1G);
Chris@82 483 T28 = T1G + T27;
Chris@82 484 T5L = FMA(KP559016994, T4w, T4v);
Chris@82 485 T4x = FNMS(KP559016994, T4w, T4v);
Chris@82 486 T4D = T3q - T3l;
Chris@82 487 T3r = T3l + T3q;
Chris@82 488 T4C = FNMS(KP250000000, T3r, T3g);
Chris@82 489 T3s = T3g + T3r;
Chris@82 490 T5I = FMA(KP559016994, T4D, T4C);
Chris@82 491 T4E = FNMS(KP559016994, T4D, T4C);
Chris@82 492 }
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T3I, T3K, Tt, T2I, T2J, T2K, T3J, T2L;
Chris@82 496 {
Chris@82 497 E T3e, T3H, T1A, T2H;
Chris@82 498 T3e = T2Z - T3d;
Chris@82 499 T3H = T3s - T3G;
Chris@82 500 T3I = FMA(KP618033988, T3H, T3e);
Chris@82 501 T3K = FNMS(KP618033988, T3e, T3H);
Chris@82 502 Tt = T1 + Ts;
Chris@82 503 T1A = T11 + T1z;
Chris@82 504 T2H = T28 + T2G;
Chris@82 505 T2I = T1A + T2H;
Chris@82 506 T2J = FNMS(KP250000000, T2I, Tt);
Chris@82 507 T2K = T1A - T2H;
Chris@82 508 }
Chris@82 509 ri[0] = Tt + T2I;
Chris@82 510 T3J = FNMS(KP559016994, T2K, T2J);
Chris@82 511 ri[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J);
Chris@82 512 ri[WS(rs, 15)] = FMA(KP951056516, T3K, T3J);
Chris@82 513 T2L = FMA(KP559016994, T2K, T2J);
Chris@82 514 ri[WS(rs, 20)] = FNMS(KP951056516, T3I, T2L);
Chris@82 515 ri[WS(rs, 5)] = FMA(KP951056516, T3I, T2L);
Chris@82 516 }
Chris@82 517 {
Chris@82 518 E T74, T76, T6Y, T6T, T6Z, T70, T75, T71;
Chris@82 519 {
Chris@82 520 E T72, T73, T6R, T6S;
Chris@82 521 T72 = T11 - T1z;
Chris@82 522 T73 = T28 - T2G;
Chris@82 523 T74 = FMA(KP618033988, T73, T72);
Chris@82 524 T76 = FNMS(KP618033988, T72, T73);
Chris@82 525 T6Y = T6W + T6X;
Chris@82 526 T6R = T2Z + T3d;
Chris@82 527 T6S = T3s + T3G;
Chris@82 528 T6T = T6R + T6S;
Chris@82 529 T6Z = FNMS(KP250000000, T6T, T6Y);
Chris@82 530 T70 = T6R - T6S;
Chris@82 531 }
Chris@82 532 ii[0] = T6T + T6Y;
Chris@82 533 T75 = FNMS(KP559016994, T70, T6Z);
Chris@82 534 ii[WS(rs, 10)] = FMA(KP951056516, T76, T75);
Chris@82 535 ii[WS(rs, 15)] = FNMS(KP951056516, T76, T75);
Chris@82 536 T71 = FMA(KP559016994, T70, T6Z);
Chris@82 537 ii[WS(rs, 5)] = FNMS(KP951056516, T74, T71);
Chris@82 538 ii[WS(rs, 20)] = FMA(KP951056516, T74, T71);
Chris@82 539 }
Chris@82 540 {
Chris@82 541 E T3Z, T5d, T7d, T7p, T56, T59, T7l, T7k, T7e, T7f, T7g, T4u, T4Z, T50, T5y;
Chris@82 542 E T5B, T7x, T7w, T7q, T7r, T7s, T5k, T5r, T5s, T3N, T79;
Chris@82 543 T3N = FMA(KP559016994, T3M, T3L);
Chris@82 544 T3Z = FMA(KP951056516, T3Y, T3N);
Chris@82 545 T5d = FNMS(KP951056516, T3Y, T3N);
Chris@82 546 T79 = FMA(KP559016994, T78, T77);
Chris@82 547 T7d = FNMS(KP951056516, T7c, T79);
Chris@82 548 T7p = FMA(KP951056516, T7c, T79);
Chris@82 549 {
Chris@82 550 E T4e, T54, T4Y, T58, T4t, T55, T4J, T57;
Chris@82 551 {
Chris@82 552 E T46, T4d, T4Q, T4X;
Chris@82 553 T46 = FMA(KP951056516, T45, T42);
Chris@82 554 T4d = FMA(KP951056516, T4c, T49);
Chris@82 555 T4e = FMA(KP256756360, T4d, T46);
Chris@82 556 T54 = FNMS(KP256756360, T46, T4d);
Chris@82 557 T4Q = FNMS(KP951056516, T4P, T4M);
Chris@82 558 T4X = FNMS(KP951056516, T4W, T4T);
Chris@82 559 T4Y = FMA(KP939062505, T4X, T4Q);
Chris@82 560 T58 = FNMS(KP939062505, T4Q, T4X);
Chris@82 561 }
Chris@82 562 {
Chris@82 563 E T4l, T4s, T4B, T4I;
Chris@82 564 T4l = FMA(KP951056516, T4k, T4h);
Chris@82 565 T4s = FNMS(KP951056516, T4r, T4o);
Chris@82 566 T4t = FMA(KP634619297, T4s, T4l);
Chris@82 567 T55 = FNMS(KP634619297, T4l, T4s);
Chris@82 568 T4B = FNMS(KP951056516, T4A, T4x);
Chris@82 569 T4I = FMA(KP951056516, T4H, T4E);
Chris@82 570 T4J = FMA(KP549754652, T4I, T4B);
Chris@82 571 T57 = FNMS(KP549754652, T4B, T4I);
Chris@82 572 }
Chris@82 573 T56 = FMA(KP871714437, T55, T54);
Chris@82 574 T59 = FNMS(KP831864738, T58, T57);
Chris@82 575 T7l = FNMS(KP831864738, T4Y, T4J);
Chris@82 576 T7k = FNMS(KP871714437, T4t, T4e);
Chris@82 577 T7e = FNMS(KP871714437, T55, T54);
Chris@82 578 T7f = FMA(KP831864738, T58, T57);
Chris@82 579 T7g = FMA(KP904730450, T7f, T7e);
Chris@82 580 T4u = FMA(KP871714437, T4t, T4e);
Chris@82 581 T4Z = FMA(KP831864738, T4Y, T4J);
Chris@82 582 T50 = FMA(KP904730450, T4Z, T4u);
Chris@82 583 }
Chris@82 584 {
Chris@82 585 E T5g, T5z, T5q, T5x, T5j, T5A, T5n, T5w;
Chris@82 586 {
Chris@82 587 E T5e, T5f, T5o, T5p;
Chris@82 588 T5e = FMA(KP951056516, T4P, T4M);
Chris@82 589 T5f = FMA(KP951056516, T4W, T4T);
Chris@82 590 T5g = FNMS(KP126329378, T5f, T5e);
Chris@82 591 T5z = FMA(KP126329378, T5e, T5f);
Chris@82 592 T5o = FNMS(KP951056516, T4k, T4h);
Chris@82 593 T5p = FMA(KP951056516, T4r, T4o);
Chris@82 594 T5q = FMA(KP827271945, T5p, T5o);
Chris@82 595 T5x = FNMS(KP827271945, T5o, T5p);
Chris@82 596 }
Chris@82 597 {
Chris@82 598 E T5h, T5i, T5l, T5m;
Chris@82 599 T5h = FNMS(KP951056516, T4H, T4E);
Chris@82 600 T5i = FMA(KP951056516, T4A, T4x);
Chris@82 601 T5j = FNMS(KP470564281, T5i, T5h);
Chris@82 602 T5A = FMA(KP470564281, T5h, T5i);
Chris@82 603 T5l = FNMS(KP951056516, T4c, T49);
Chris@82 604 T5m = FNMS(KP951056516, T45, T42);
Chris@82 605 T5n = FMA(KP634619297, T5m, T5l);
Chris@82 606 T5w = FNMS(KP634619297, T5l, T5m);
Chris@82 607 }
Chris@82 608 T5y = FMA(KP912575812, T5x, T5w);
Chris@82 609 T5B = FNMS(KP912018591, T5A, T5z);
Chris@82 610 T7x = FMA(KP912018591, T5j, T5g);
Chris@82 611 T7w = FMA(KP912575812, T5q, T5n);
Chris@82 612 T7q = FMA(KP912018591, T5A, T5z);
Chris@82 613 T7r = FNMS(KP912575812, T5x, T5w);
Chris@82 614 T7s = FMA(KP851038619, T7r, T7q);
Chris@82 615 T5k = FNMS(KP912018591, T5j, T5g);
Chris@82 616 T5r = FNMS(KP912575812, T5q, T5n);
Chris@82 617 T5s = FNMS(KP851038619, T5r, T5k);
Chris@82 618 }
Chris@82 619 ri[WS(rs, 1)] = FMA(KP968583161, T50, T3Z);
Chris@82 620 ii[WS(rs, 1)] = FMA(KP968583161, T7g, T7d);
Chris@82 621 ri[WS(rs, 4)] = FNMS(KP992114701, T5s, T5d);
Chris@82 622 ii[WS(rs, 4)] = FNMS(KP992114701, T7s, T7p);
Chris@82 623 {
Chris@82 624 E T5a, T5c, T53, T5b, T51, T52;
Chris@82 625 T5a = FMA(KP559154169, T59, T56);
Chris@82 626 T5c = FNMS(KP683113946, T56, T59);
Chris@82 627 T51 = FNMS(KP242145790, T50, T3Z);
Chris@82 628 T52 = FNMS(KP904730450, T4Z, T4u);
Chris@82 629 T53 = FMA(KP541454447, T52, T51);
Chris@82 630 T5b = FNMS(KP541454447, T52, T51);
Chris@82 631 ri[WS(rs, 6)] = FMA(KP921177326, T5a, T53);
Chris@82 632 ri[WS(rs, 16)] = FMA(KP833417178, T5c, T5b);
Chris@82 633 ri[WS(rs, 21)] = FNMS(KP921177326, T5a, T53);
Chris@82 634 ri[WS(rs, 11)] = FNMS(KP833417178, T5c, T5b);
Chris@82 635 }
Chris@82 636 {
Chris@82 637 E T7m, T7o, T7j, T7n, T7h, T7i;
Chris@82 638 T7m = FMA(KP559154169, T7l, T7k);
Chris@82 639 T7o = FNMS(KP683113946, T7k, T7l);
Chris@82 640 T7h = FNMS(KP242145790, T7g, T7d);
Chris@82 641 T7i = FNMS(KP904730450, T7f, T7e);
Chris@82 642 T7j = FMA(KP541454447, T7i, T7h);
Chris@82 643 T7n = FNMS(KP541454447, T7i, T7h);
Chris@82 644 ii[WS(rs, 6)] = FNMS(KP921177326, T7m, T7j);
Chris@82 645 ii[WS(rs, 16)] = FNMS(KP833417178, T7o, T7n);
Chris@82 646 ii[WS(rs, 21)] = FMA(KP921177326, T7m, T7j);
Chris@82 647 ii[WS(rs, 11)] = FMA(KP833417178, T7o, T7n);
Chris@82 648 }
Chris@82 649 {
Chris@82 650 E T5C, T5E, T5v, T5D, T5t, T5u;
Chris@82 651 T5C = FNMS(KP726211448, T5B, T5y);
Chris@82 652 T5E = FMA(KP525970792, T5y, T5B);
Chris@82 653 T5t = FMA(KP248028675, T5s, T5d);
Chris@82 654 T5u = FMA(KP851038619, T5r, T5k);
Chris@82 655 T5v = FMA(KP554608978, T5u, T5t);
Chris@82 656 T5D = FNMS(KP554608978, T5u, T5t);
Chris@82 657 ri[WS(rs, 9)] = FNMS(KP803003575, T5C, T5v);
Chris@82 658 ri[WS(rs, 19)] = FMA(KP943557151, T5E, T5D);
Chris@82 659 ri[WS(rs, 24)] = FMA(KP803003575, T5C, T5v);
Chris@82 660 ri[WS(rs, 14)] = FNMS(KP943557151, T5E, T5D);
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T7y, T7A, T7v, T7z, T7t, T7u;
Chris@82 664 T7y = FMA(KP726211448, T7x, T7w);
Chris@82 665 T7A = FNMS(KP525970792, T7w, T7x);
Chris@82 666 T7t = FMA(KP248028675, T7s, T7p);
Chris@82 667 T7u = FNMS(KP851038619, T7r, T7q);
Chris@82 668 T7v = FMA(KP554608978, T7u, T7t);
Chris@82 669 T7z = FNMS(KP554608978, T7u, T7t);
Chris@82 670 ii[WS(rs, 9)] = FNMS(KP803003575, T7y, T7v);
Chris@82 671 ii[WS(rs, 19)] = FNMS(KP943557151, T7A, T7z);
Chris@82 672 ii[WS(rs, 24)] = FMA(KP803003575, T7y, T7v);
Chris@82 673 ii[WS(rs, 14)] = FMA(KP943557151, T7A, T7z);
Chris@82 674 }
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E T5H, T6p, T7D, T7P, T6i, T6l, T7X, T7W, T7Q, T7R, T7S, T5W, T6b, T6c, T6K;
Chris@82 678 E T6N, T7L, T7K, T7E, T7F, T7G, T6w, T6D, T6E, T5F, T7B;
Chris@82 679 T5F = FNMS(KP559016994, T3M, T3L);
Chris@82 680 T5H = FMA(KP951056516, T5G, T5F);
Chris@82 681 T6p = FNMS(KP951056516, T5G, T5F);
Chris@82 682 T7B = FNMS(KP559016994, T78, T77);
Chris@82 683 T7D = FMA(KP951056516, T7C, T7B);
Chris@82 684 T7P = FNMS(KP951056516, T7C, T7B);
Chris@82 685 {
Chris@82 686 E T5O, T6j, T6a, T6h, T5V, T6k, T63, T6g;
Chris@82 687 {
Chris@82 688 E T5K, T5N, T66, T69;
Chris@82 689 T5K = FMA(KP951056516, T5J, T5I);
Chris@82 690 T5N = FMA(KP951056516, T5M, T5L);
Chris@82 691 T5O = FMA(KP062914667, T5N, T5K);
Chris@82 692 T6j = FNMS(KP062914667, T5K, T5N);
Chris@82 693 T66 = FNMS(KP951056516, T65, T64);
Chris@82 694 T69 = FMA(KP951056516, T68, T67);
Chris@82 695 T6a = FMA(KP939062505, T69, T66);
Chris@82 696 T6h = FNMS(KP939062505, T66, T69);
Chris@82 697 }
Chris@82 698 {
Chris@82 699 E T5R, T5U, T5Z, T62;
Chris@82 700 T5R = FNMS(KP951056516, T5Q, T5P);
Chris@82 701 T5U = FNMS(KP951056516, T5T, T5S);
Chris@82 702 T5V = FNMS(KP827271945, T5U, T5R);
Chris@82 703 T6k = FMA(KP827271945, T5R, T5U);
Chris@82 704 T5Z = FNMS(KP951056516, T5Y, T5X);
Chris@82 705 T62 = FMA(KP951056516, T61, T60);
Chris@82 706 T63 = FNMS(KP126329378, T62, T5Z);
Chris@82 707 T6g = FMA(KP126329378, T5Z, T62);
Chris@82 708 }
Chris@82 709 T6i = FMA(KP734762448, T6h, T6g);
Chris@82 710 T6l = FNMS(KP772036680, T6k, T6j);
Chris@82 711 T7X = FNMS(KP772036680, T5V, T5O);
Chris@82 712 T7W = FMA(KP734762448, T6a, T63);
Chris@82 713 T7Q = FMA(KP772036680, T6k, T6j);
Chris@82 714 T7R = FNMS(KP734762448, T6h, T6g);
Chris@82 715 T7S = FMA(KP994076283, T7R, T7Q);
Chris@82 716 T5W = FMA(KP772036680, T5V, T5O);
Chris@82 717 T6b = FNMS(KP734762448, T6a, T63);
Chris@82 718 T6c = FNMS(KP994076283, T6b, T5W);
Chris@82 719 }
Chris@82 720 {
Chris@82 721 E T6s, T6L, T6C, T6J, T6v, T6M, T6z, T6I;
Chris@82 722 {
Chris@82 723 E T6q, T6r, T6A, T6B;
Chris@82 724 T6q = FMA(KP951056516, T5Q, T5P);
Chris@82 725 T6r = FMA(KP951056516, T5T, T5S);
Chris@82 726 T6s = FMA(KP062914667, T6r, T6q);
Chris@82 727 T6L = FNMS(KP062914667, T6q, T6r);
Chris@82 728 T6A = FMA(KP951056516, T65, T64);
Chris@82 729 T6B = FNMS(KP951056516, T68, T67);
Chris@82 730 T6C = FMA(KP549754652, T6B, T6A);
Chris@82 731 T6J = FNMS(KP549754652, T6A, T6B);
Chris@82 732 }
Chris@82 733 {
Chris@82 734 E T6t, T6u, T6x, T6y;
Chris@82 735 T6t = FNMS(KP951056516, T5J, T5I);
Chris@82 736 T6u = FNMS(KP951056516, T5M, T5L);
Chris@82 737 T6v = FMA(KP634619297, T6u, T6t);
Chris@82 738 T6M = FNMS(KP634619297, T6t, T6u);
Chris@82 739 T6x = FNMS(KP951056516, T61, T60);
Chris@82 740 T6y = FMA(KP951056516, T5Y, T5X);
Chris@82 741 T6z = FNMS(KP470564281, T6y, T6x);
Chris@82 742 T6I = FMA(KP470564281, T6x, T6y);
Chris@82 743 }
Chris@82 744 T6K = FMA(KP968479752, T6J, T6I);
Chris@82 745 T6N = FNMS(KP845997307, T6M, T6L);
Chris@82 746 T7L = FNMS(KP845997307, T6v, T6s);
Chris@82 747 T7K = FNMS(KP968479752, T6C, T6z);
Chris@82 748 T7E = FMA(KP845997307, T6M, T6L);
Chris@82 749 T7F = FNMS(KP968479752, T6J, T6I);
Chris@82 750 T7G = FMA(KP906616052, T7F, T7E);
Chris@82 751 T6w = FMA(KP845997307, T6v, T6s);
Chris@82 752 T6D = FMA(KP968479752, T6C, T6z);
Chris@82 753 T6E = FMA(KP906616052, T6D, T6w);
Chris@82 754 }
Chris@82 755 ri[WS(rs, 3)] = FMA(KP998026728, T6c, T5H);
Chris@82 756 ii[WS(rs, 3)] = FNMS(KP998026728, T7S, T7P);
Chris@82 757 ri[WS(rs, 2)] = FMA(KP998026728, T6E, T6p);
Chris@82 758 ii[WS(rs, 2)] = FNMS(KP998026728, T7G, T7D);
Chris@82 759 {
Chris@82 760 E T6m, T6o, T6f, T6n, T6d, T6e;
Chris@82 761 T6m = FNMS(KP621716863, T6l, T6i);
Chris@82 762 T6o = FMA(KP614372930, T6i, T6l);
Chris@82 763 T6d = FNMS(KP249506682, T6c, T5H);
Chris@82 764 T6e = FMA(KP994076283, T6b, T5W);
Chris@82 765 T6f = FNMS(KP557913902, T6e, T6d);
Chris@82 766 T6n = FMA(KP557913902, T6e, T6d);
Chris@82 767 ri[WS(rs, 23)] = FNMS(KP943557151, T6m, T6f);
Chris@82 768 ri[WS(rs, 13)] = FMA(KP949179823, T6o, T6n);
Chris@82 769 ri[WS(rs, 8)] = FMA(KP943557151, T6m, T6f);
Chris@82 770 ri[WS(rs, 18)] = FNMS(KP949179823, T6o, T6n);
Chris@82 771 }
Chris@82 772 {
Chris@82 773 E T7Y, T80, T7V, T7Z, T7T, T7U;
Chris@82 774 T7Y = FMA(KP621716863, T7X, T7W);
Chris@82 775 T80 = FNMS(KP614372930, T7W, T7X);
Chris@82 776 T7T = FMA(KP249506682, T7S, T7P);
Chris@82 777 T7U = FNMS(KP994076283, T7R, T7Q);
Chris@82 778 T7V = FMA(KP557913902, T7U, T7T);
Chris@82 779 T7Z = FNMS(KP557913902, T7U, T7T);
Chris@82 780 ii[WS(rs, 8)] = FNMS(KP943557151, T7Y, T7V);
Chris@82 781 ii[WS(rs, 18)] = FNMS(KP949179823, T80, T7Z);
Chris@82 782 ii[WS(rs, 23)] = FMA(KP943557151, T7Y, T7V);
Chris@82 783 ii[WS(rs, 13)] = FMA(KP949179823, T80, T7Z);
Chris@82 784 }
Chris@82 785 {
Chris@82 786 E T6O, T6Q, T6H, T6P, T6F, T6G;
Chris@82 787 T6O = FMA(KP681693190, T6N, T6K);
Chris@82 788 T6Q = FNMS(KP560319534, T6K, T6N);
Chris@82 789 T6F = FNMS(KP249506682, T6E, T6p);
Chris@82 790 T6G = FNMS(KP906616052, T6D, T6w);
Chris@82 791 T6H = FNMS(KP557913902, T6G, T6F);
Chris@82 792 T6P = FMA(KP557913902, T6G, T6F);
Chris@82 793 ri[WS(rs, 22)] = FNMS(KP860541664, T6O, T6H);
Chris@82 794 ri[WS(rs, 17)] = FMA(KP949179823, T6Q, T6P);
Chris@82 795 ri[WS(rs, 7)] = FMA(KP860541664, T6O, T6H);
Chris@82 796 ri[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P);
Chris@82 797 }
Chris@82 798 {
Chris@82 799 E T7M, T7O, T7J, T7N, T7H, T7I;
Chris@82 800 T7M = FMA(KP681693190, T7L, T7K);
Chris@82 801 T7O = FNMS(KP560319534, T7K, T7L);
Chris@82 802 T7H = FMA(KP249506682, T7G, T7D);
Chris@82 803 T7I = FNMS(KP906616052, T7F, T7E);
Chris@82 804 T7J = FMA(KP557913902, T7I, T7H);
Chris@82 805 T7N = FNMS(KP557913902, T7I, T7H);
Chris@82 806 ii[WS(rs, 7)] = FMA(KP860541664, T7M, T7J);
Chris@82 807 ii[WS(rs, 17)] = FMA(KP949179823, T7O, T7N);
Chris@82 808 ii[WS(rs, 22)] = FNMS(KP860541664, T7M, T7J);
Chris@82 809 ii[WS(rs, 12)] = FNMS(KP949179823, T7O, T7N);
Chris@82 810 }
Chris@82 811 }
Chris@82 812 }
Chris@82 813 }
Chris@82 814 }
Chris@82 815
Chris@82 816 static const tw_instr twinstr[] = {
Chris@82 817 {TW_FULL, 0, 25},
Chris@82 818 {TW_NEXT, 1, 0}
Chris@82 819 };
Chris@82 820
Chris@82 821 static const ct_desc desc = { 25, "t1_25", twinstr, &GENUS, {84, 48, 316, 0}, 0, 0, 0 };
Chris@82 822
Chris@82 823 void X(codelet_t1_25) (planner *p) {
Chris@82 824 X(kdft_dit_register) (p, t1_25, &desc);
Chris@82 825 }
Chris@82 826 #else
Chris@82 827
Chris@82 828 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 25 -name t1_25 -include dft/scalar/t.h */
Chris@82 829
Chris@82 830 /*
Chris@82 831 * This function contains 400 FP additions, 280 FP multiplications,
Chris@82 832 * (or, 260 additions, 140 multiplications, 140 fused multiply/add),
Chris@82 833 * 101 stack variables, 20 constants, and 100 memory accesses
Chris@82 834 */
Chris@82 835 #include "dft/scalar/t.h"
Chris@82 836
Chris@82 837 static void t1_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 838 {
Chris@82 839 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 840 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 841 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 842 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 843 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 844 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 845 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 846 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 847 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 848 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 849 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 850 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 851 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 852 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 853 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 854 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 855 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 856 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 857 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 858 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 859 {
Chris@82 860 INT m;
Chris@82 861 for (m = mb, W = W + (mb * 48); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
Chris@82 862 E T1, T6b, T2l, T6o, To, T2m, T6a, T6p, T6t, T6S, T2u, T4I, T2i, T60, T3O;
Chris@82 863 E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t;
Chris@82 864 E T4h, T4P, T1l, T5X, T33, T5w, T4j, T4W, T3e, T5v, T4k, T4T, T1P, T5Z, T3r;
Chris@82 865 E T5z, T4o, T51, T3C, T5A, T4n, T54;
Chris@82 866 {
Chris@82 867 E T6, T2o, Tb, T2p, Tc, T68, Th, T2r, Tm, T2s, Tn, T69;
Chris@82 868 T1 = ri[0];
Chris@82 869 T6b = ii[0];
Chris@82 870 {
Chris@82 871 E T3, T5, T2, T4;
Chris@82 872 T3 = ri[WS(rs, 5)];
Chris@82 873 T5 = ii[WS(rs, 5)];
Chris@82 874 T2 = W[8];
Chris@82 875 T4 = W[9];
Chris@82 876 T6 = FMA(T2, T3, T4 * T5);
Chris@82 877 T2o = FNMS(T4, T3, T2 * T5);
Chris@82 878 }
Chris@82 879 {
Chris@82 880 E T8, Ta, T7, T9;
Chris@82 881 T8 = ri[WS(rs, 20)];
Chris@82 882 Ta = ii[WS(rs, 20)];
Chris@82 883 T7 = W[38];
Chris@82 884 T9 = W[39];
Chris@82 885 Tb = FMA(T7, T8, T9 * Ta);
Chris@82 886 T2p = FNMS(T9, T8, T7 * Ta);
Chris@82 887 }
Chris@82 888 Tc = T6 + Tb;
Chris@82 889 T68 = T2o + T2p;
Chris@82 890 {
Chris@82 891 E Te, Tg, Td, Tf;
Chris@82 892 Te = ri[WS(rs, 10)];
Chris@82 893 Tg = ii[WS(rs, 10)];
Chris@82 894 Td = W[18];
Chris@82 895 Tf = W[19];
Chris@82 896 Th = FMA(Td, Te, Tf * Tg);
Chris@82 897 T2r = FNMS(Tf, Te, Td * Tg);
Chris@82 898 }
Chris@82 899 {
Chris@82 900 E Tj, Tl, Ti, Tk;
Chris@82 901 Tj = ri[WS(rs, 15)];
Chris@82 902 Tl = ii[WS(rs, 15)];
Chris@82 903 Ti = W[28];
Chris@82 904 Tk = W[29];
Chris@82 905 Tm = FMA(Ti, Tj, Tk * Tl);
Chris@82 906 T2s = FNMS(Tk, Tj, Ti * Tl);
Chris@82 907 }
Chris@82 908 Tn = Th + Tm;
Chris@82 909 T69 = T2r + T2s;
Chris@82 910 T2l = KP559016994 * (Tc - Tn);
Chris@82 911 T6o = KP559016994 * (T68 - T69);
Chris@82 912 To = Tc + Tn;
Chris@82 913 T2m = FNMS(KP250000000, To, T1);
Chris@82 914 T6a = T68 + T69;
Chris@82 915 T6p = FNMS(KP250000000, T6a, T6b);
Chris@82 916 {
Chris@82 917 E T6r, T6s, T2q, T2t;
Chris@82 918 T6r = T6 - Tb;
Chris@82 919 T6s = Th - Tm;
Chris@82 920 T6t = FMA(KP951056516, T6r, KP587785252 * T6s);
Chris@82 921 T6S = FNMS(KP587785252, T6r, KP951056516 * T6s);
Chris@82 922 T2q = T2o - T2p;
Chris@82 923 T2t = T2r - T2s;
Chris@82 924 T2u = FMA(KP951056516, T2q, KP587785252 * T2t);
Chris@82 925 T4I = FNMS(KP587785252, T2q, KP951056516 * T2t);
Chris@82 926 }
Chris@82 927 }
Chris@82 928 {
Chris@82 929 E T1U, T3S, T3J, T3M, T3X, T3W, T3P, T3Q, T3T, T25, T2g, T2h;
Chris@82 930 {
Chris@82 931 E T1R, T1T, T1Q, T1S;
Chris@82 932 T1R = ri[WS(rs, 3)];
Chris@82 933 T1T = ii[WS(rs, 3)];
Chris@82 934 T1Q = W[4];
Chris@82 935 T1S = W[5];
Chris@82 936 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@82 937 T3S = FNMS(T1S, T1R, T1Q * T1T);
Chris@82 938 }
Chris@82 939 {
Chris@82 940 E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K;
Chris@82 941 {
Chris@82 942 E T1W, T1Y, T1V, T1X;
Chris@82 943 T1W = ri[WS(rs, 8)];
Chris@82 944 T1Y = ii[WS(rs, 8)];
Chris@82 945 T1V = W[14];
Chris@82 946 T1X = W[15];
Chris@82 947 T1Z = FMA(T1V, T1W, T1X * T1Y);
Chris@82 948 T3H = FNMS(T1X, T1W, T1V * T1Y);
Chris@82 949 }
Chris@82 950 {
Chris@82 951 E T2c, T2e, T2b, T2d;
Chris@82 952 T2c = ri[WS(rs, 18)];
Chris@82 953 T2e = ii[WS(rs, 18)];
Chris@82 954 T2b = W[34];
Chris@82 955 T2d = W[35];
Chris@82 956 T2f = FMA(T2b, T2c, T2d * T2e);
Chris@82 957 T3L = FNMS(T2d, T2c, T2b * T2e);
Chris@82 958 }
Chris@82 959 {
Chris@82 960 E T21, T23, T20, T22;
Chris@82 961 T21 = ri[WS(rs, 23)];
Chris@82 962 T23 = ii[WS(rs, 23)];
Chris@82 963 T20 = W[44];
Chris@82 964 T22 = W[45];
Chris@82 965 T24 = FMA(T20, T21, T22 * T23);
Chris@82 966 T3I = FNMS(T22, T21, T20 * T23);
Chris@82 967 }
Chris@82 968 {
Chris@82 969 E T27, T29, T26, T28;
Chris@82 970 T27 = ri[WS(rs, 13)];
Chris@82 971 T29 = ii[WS(rs, 13)];
Chris@82 972 T26 = W[24];
Chris@82 973 T28 = W[25];
Chris@82 974 T2a = FMA(T26, T27, T28 * T29);
Chris@82 975 T3K = FNMS(T28, T27, T26 * T29);
Chris@82 976 }
Chris@82 977 T3J = T3H - T3I;
Chris@82 978 T3M = T3K - T3L;
Chris@82 979 T3X = T2a - T2f;
Chris@82 980 T3W = T1Z - T24;
Chris@82 981 T3P = T3H + T3I;
Chris@82 982 T3Q = T3K + T3L;
Chris@82 983 T3T = T3P + T3Q;
Chris@82 984 T25 = T1Z + T24;
Chris@82 985 T2g = T2a + T2f;
Chris@82 986 T2h = T25 + T2g;
Chris@82 987 }
Chris@82 988 T2i = T1U + T2h;
Chris@82 989 T60 = T3S + T3T;
Chris@82 990 {
Chris@82 991 E T3N, T57, T3G, T56, T3E, T3F;
Chris@82 992 T3N = FMA(KP951056516, T3J, KP587785252 * T3M);
Chris@82 993 T57 = FNMS(KP587785252, T3J, KP951056516 * T3M);
Chris@82 994 T3E = KP559016994 * (T25 - T2g);
Chris@82 995 T3F = FNMS(KP250000000, T2h, T1U);
Chris@82 996 T3G = T3E + T3F;
Chris@82 997 T56 = T3F - T3E;
Chris@82 998 T3O = T3G + T3N;
Chris@82 999 T5D = T56 + T57;
Chris@82 1000 T4r = T3G - T3N;
Chris@82 1001 T58 = T56 - T57;
Chris@82 1002 }
Chris@82 1003 {
Chris@82 1004 E T3Y, T59, T3V, T5a, T3R, T3U;
Chris@82 1005 T3Y = FMA(KP951056516, T3W, KP587785252 * T3X);
Chris@82 1006 T59 = FNMS(KP587785252, T3W, KP951056516 * T3X);
Chris@82 1007 T3R = KP559016994 * (T3P - T3Q);
Chris@82 1008 T3U = FNMS(KP250000000, T3T, T3S);
Chris@82 1009 T3V = T3R + T3U;
Chris@82 1010 T5a = T3U - T3R;
Chris@82 1011 T3Z = T3V - T3Y;
Chris@82 1012 T5C = T5a - T59;
Chris@82 1013 T4q = T3Y + T3V;
Chris@82 1014 T5b = T59 + T5a;
Chris@82 1015 }
Chris@82 1016 }
Chris@82 1017 {
Chris@82 1018 E Tu, T2K, T2B, T2E, T2P, T2O, T2H, T2I, T2L, TF, TQ, TR;
Chris@82 1019 {
Chris@82 1020 E Tr, Tt, Tq, Ts;
Chris@82 1021 Tr = ri[WS(rs, 1)];
Chris@82 1022 Tt = ii[WS(rs, 1)];
Chris@82 1023 Tq = W[0];
Chris@82 1024 Ts = W[1];
Chris@82 1025 Tu = FMA(Tq, Tr, Ts * Tt);
Chris@82 1026 T2K = FNMS(Ts, Tr, Tq * Tt);
Chris@82 1027 }
Chris@82 1028 {
Chris@82 1029 E Tz, T2z, TP, T2D, TE, T2A, TK, T2C;
Chris@82 1030 {
Chris@82 1031 E Tw, Ty, Tv, Tx;
Chris@82 1032 Tw = ri[WS(rs, 6)];
Chris@82 1033 Ty = ii[WS(rs, 6)];
Chris@82 1034 Tv = W[10];
Chris@82 1035 Tx = W[11];
Chris@82 1036 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 1037 T2z = FNMS(Tx, Tw, Tv * Ty);
Chris@82 1038 }
Chris@82 1039 {
Chris@82 1040 E TM, TO, TL, TN;
Chris@82 1041 TM = ri[WS(rs, 16)];
Chris@82 1042 TO = ii[WS(rs, 16)];
Chris@82 1043 TL = W[30];
Chris@82 1044 TN = W[31];
Chris@82 1045 TP = FMA(TL, TM, TN * TO);
Chris@82 1046 T2D = FNMS(TN, TM, TL * TO);
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E TB, TD, TA, TC;
Chris@82 1050 TB = ri[WS(rs, 21)];
Chris@82 1051 TD = ii[WS(rs, 21)];
Chris@82 1052 TA = W[40];
Chris@82 1053 TC = W[41];
Chris@82 1054 TE = FMA(TA, TB, TC * TD);
Chris@82 1055 T2A = FNMS(TC, TB, TA * TD);
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E TH, TJ, TG, TI;
Chris@82 1059 TH = ri[WS(rs, 11)];
Chris@82 1060 TJ = ii[WS(rs, 11)];
Chris@82 1061 TG = W[20];
Chris@82 1062 TI = W[21];
Chris@82 1063 TK = FMA(TG, TH, TI * TJ);
Chris@82 1064 T2C = FNMS(TI, TH, TG * TJ);
Chris@82 1065 }
Chris@82 1066 T2B = T2z - T2A;
Chris@82 1067 T2E = T2C - T2D;
Chris@82 1068 T2P = TK - TP;
Chris@82 1069 T2O = Tz - TE;
Chris@82 1070 T2H = T2z + T2A;
Chris@82 1071 T2I = T2C + T2D;
Chris@82 1072 T2L = T2H + T2I;
Chris@82 1073 TF = Tz + TE;
Chris@82 1074 TQ = TK + TP;
Chris@82 1075 TR = TF + TQ;
Chris@82 1076 }
Chris@82 1077 TS = Tu + TR;
Chris@82 1078 T5W = T2K + T2L;
Chris@82 1079 {
Chris@82 1080 E T2F, T4L, T2y, T4K, T2w, T2x;
Chris@82 1081 T2F = FMA(KP951056516, T2B, KP587785252 * T2E);
Chris@82 1082 T4L = FNMS(KP587785252, T2B, KP951056516 * T2E);
Chris@82 1083 T2w = KP559016994 * (TF - TQ);
Chris@82 1084 T2x = FNMS(KP250000000, TR, Tu);
Chris@82 1085 T2y = T2w + T2x;
Chris@82 1086 T4K = T2x - T2w;
Chris@82 1087 T2G = T2y + T2F;
Chris@82 1088 T5s = T4K + T4L;
Chris@82 1089 T4g = T2y - T2F;
Chris@82 1090 T4M = T4K - T4L;
Chris@82 1091 }
Chris@82 1092 {
Chris@82 1093 E T2Q, T4N, T2N, T4O, T2J, T2M;
Chris@82 1094 T2Q = FMA(KP951056516, T2O, KP587785252 * T2P);
Chris@82 1095 T4N = FNMS(KP587785252, T2O, KP951056516 * T2P);
Chris@82 1096 T2J = KP559016994 * (T2H - T2I);
Chris@82 1097 T2M = FNMS(KP250000000, T2L, T2K);
Chris@82 1098 T2N = T2J + T2M;
Chris@82 1099 T4O = T2M - T2J;
Chris@82 1100 T2R = T2N - T2Q;
Chris@82 1101 T5t = T4O - T4N;
Chris@82 1102 T4h = T2Q + T2N;
Chris@82 1103 T4P = T4N + T4O;
Chris@82 1104 }
Chris@82 1105 }
Chris@82 1106 {
Chris@82 1107 E TX, T37, T2Y, T31, T3c, T3b, T34, T35, T38, T18, T1j, T1k;
Chris@82 1108 {
Chris@82 1109 E TU, TW, TT, TV;
Chris@82 1110 TU = ri[WS(rs, 4)];
Chris@82 1111 TW = ii[WS(rs, 4)];
Chris@82 1112 TT = W[6];
Chris@82 1113 TV = W[7];
Chris@82 1114 TX = FMA(TT, TU, TV * TW);
Chris@82 1115 T37 = FNMS(TV, TU, TT * TW);
Chris@82 1116 }
Chris@82 1117 {
Chris@82 1118 E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z;
Chris@82 1119 {
Chris@82 1120 E TZ, T11, TY, T10;
Chris@82 1121 TZ = ri[WS(rs, 9)];
Chris@82 1122 T11 = ii[WS(rs, 9)];
Chris@82 1123 TY = W[16];
Chris@82 1124 T10 = W[17];
Chris@82 1125 T12 = FMA(TY, TZ, T10 * T11);
Chris@82 1126 T2W = FNMS(T10, TZ, TY * T11);
Chris@82 1127 }
Chris@82 1128 {
Chris@82 1129 E T1f, T1h, T1e, T1g;
Chris@82 1130 T1f = ri[WS(rs, 19)];
Chris@82 1131 T1h = ii[WS(rs, 19)];
Chris@82 1132 T1e = W[36];
Chris@82 1133 T1g = W[37];
Chris@82 1134 T1i = FMA(T1e, T1f, T1g * T1h);
Chris@82 1135 T30 = FNMS(T1g, T1f, T1e * T1h);
Chris@82 1136 }
Chris@82 1137 {
Chris@82 1138 E T14, T16, T13, T15;
Chris@82 1139 T14 = ri[WS(rs, 24)];
Chris@82 1140 T16 = ii[WS(rs, 24)];
Chris@82 1141 T13 = W[46];
Chris@82 1142 T15 = W[47];
Chris@82 1143 T17 = FMA(T13, T14, T15 * T16);
Chris@82 1144 T2X = FNMS(T15, T14, T13 * T16);
Chris@82 1145 }
Chris@82 1146 {
Chris@82 1147 E T1a, T1c, T19, T1b;
Chris@82 1148 T1a = ri[WS(rs, 14)];
Chris@82 1149 T1c = ii[WS(rs, 14)];
Chris@82 1150 T19 = W[26];
Chris@82 1151 T1b = W[27];
Chris@82 1152 T1d = FMA(T19, T1a, T1b * T1c);
Chris@82 1153 T2Z = FNMS(T1b, T1a, T19 * T1c);
Chris@82 1154 }
Chris@82 1155 T2Y = T2W - T2X;
Chris@82 1156 T31 = T2Z - T30;
Chris@82 1157 T3c = T1d - T1i;
Chris@82 1158 T3b = T12 - T17;
Chris@82 1159 T34 = T2W + T2X;
Chris@82 1160 T35 = T2Z + T30;
Chris@82 1161 T38 = T34 + T35;
Chris@82 1162 T18 = T12 + T17;
Chris@82 1163 T1j = T1d + T1i;
Chris@82 1164 T1k = T18 + T1j;
Chris@82 1165 }
Chris@82 1166 T1l = TX + T1k;
Chris@82 1167 T5X = T37 + T38;
Chris@82 1168 {
Chris@82 1169 E T32, T4V, T2V, T4U, T2T, T2U;
Chris@82 1170 T32 = FMA(KP951056516, T2Y, KP587785252 * T31);
Chris@82 1171 T4V = FNMS(KP587785252, T2Y, KP951056516 * T31);
Chris@82 1172 T2T = KP559016994 * (T18 - T1j);
Chris@82 1173 T2U = FNMS(KP250000000, T1k, TX);
Chris@82 1174 T2V = T2T + T2U;
Chris@82 1175 T4U = T2U - T2T;
Chris@82 1176 T33 = T2V + T32;
Chris@82 1177 T5w = T4U + T4V;
Chris@82 1178 T4j = T2V - T32;
Chris@82 1179 T4W = T4U - T4V;
Chris@82 1180 }
Chris@82 1181 {
Chris@82 1182 E T3d, T4R, T3a, T4S, T36, T39;
Chris@82 1183 T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
Chris@82 1184 T4R = FNMS(KP587785252, T3b, KP951056516 * T3c);
Chris@82 1185 T36 = KP559016994 * (T34 - T35);
Chris@82 1186 T39 = FNMS(KP250000000, T38, T37);
Chris@82 1187 T3a = T36 + T39;
Chris@82 1188 T4S = T39 - T36;
Chris@82 1189 T3e = T3a - T3d;
Chris@82 1190 T5v = T4S - T4R;
Chris@82 1191 T4k = T3d + T3a;
Chris@82 1192 T4T = T4R + T4S;
Chris@82 1193 }
Chris@82 1194 }
Chris@82 1195 {
Chris@82 1196 E T1r, T3v, T3m, T3p, T3A, T3z, T3s, T3t, T3w, T1C, T1N, T1O;
Chris@82 1197 {
Chris@82 1198 E T1o, T1q, T1n, T1p;
Chris@82 1199 T1o = ri[WS(rs, 2)];
Chris@82 1200 T1q = ii[WS(rs, 2)];
Chris@82 1201 T1n = W[2];
Chris@82 1202 T1p = W[3];
Chris@82 1203 T1r = FMA(T1n, T1o, T1p * T1q);
Chris@82 1204 T3v = FNMS(T1p, T1o, T1n * T1q);
Chris@82 1205 }
Chris@82 1206 {
Chris@82 1207 E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n;
Chris@82 1208 {
Chris@82 1209 E T1t, T1v, T1s, T1u;
Chris@82 1210 T1t = ri[WS(rs, 7)];
Chris@82 1211 T1v = ii[WS(rs, 7)];
Chris@82 1212 T1s = W[12];
Chris@82 1213 T1u = W[13];
Chris@82 1214 T1w = FMA(T1s, T1t, T1u * T1v);
Chris@82 1215 T3k = FNMS(T1u, T1t, T1s * T1v);
Chris@82 1216 }
Chris@82 1217 {
Chris@82 1218 E T1J, T1L, T1I, T1K;
Chris@82 1219 T1J = ri[WS(rs, 17)];
Chris@82 1220 T1L = ii[WS(rs, 17)];
Chris@82 1221 T1I = W[32];
Chris@82 1222 T1K = W[33];
Chris@82 1223 T1M = FMA(T1I, T1J, T1K * T1L);
Chris@82 1224 T3o = FNMS(T1K, T1J, T1I * T1L);
Chris@82 1225 }
Chris@82 1226 {
Chris@82 1227 E T1y, T1A, T1x, T1z;
Chris@82 1228 T1y = ri[WS(rs, 22)];
Chris@82 1229 T1A = ii[WS(rs, 22)];
Chris@82 1230 T1x = W[42];
Chris@82 1231 T1z = W[43];
Chris@82 1232 T1B = FMA(T1x, T1y, T1z * T1A);
Chris@82 1233 T3l = FNMS(T1z, T1y, T1x * T1A);
Chris@82 1234 }
Chris@82 1235 {
Chris@82 1236 E T1E, T1G, T1D, T1F;
Chris@82 1237 T1E = ri[WS(rs, 12)];
Chris@82 1238 T1G = ii[WS(rs, 12)];
Chris@82 1239 T1D = W[22];
Chris@82 1240 T1F = W[23];
Chris@82 1241 T1H = FMA(T1D, T1E, T1F * T1G);
Chris@82 1242 T3n = FNMS(T1F, T1E, T1D * T1G);
Chris@82 1243 }
Chris@82 1244 T3m = T3k - T3l;
Chris@82 1245 T3p = T3n - T3o;
Chris@82 1246 T3A = T1H - T1M;
Chris@82 1247 T3z = T1w - T1B;
Chris@82 1248 T3s = T3k + T3l;
Chris@82 1249 T3t = T3n + T3o;
Chris@82 1250 T3w = T3s + T3t;
Chris@82 1251 T1C = T1w + T1B;
Chris@82 1252 T1N = T1H + T1M;
Chris@82 1253 T1O = T1C + T1N;
Chris@82 1254 }
Chris@82 1255 T1P = T1r + T1O;
Chris@82 1256 T5Z = T3v + T3w;
Chris@82 1257 {
Chris@82 1258 E T3q, T50, T3j, T4Z, T3h, T3i;
Chris@82 1259 T3q = FMA(KP951056516, T3m, KP587785252 * T3p);
Chris@82 1260 T50 = FNMS(KP587785252, T3m, KP951056516 * T3p);
Chris@82 1261 T3h = KP559016994 * (T1C - T1N);
Chris@82 1262 T3i = FNMS(KP250000000, T1O, T1r);
Chris@82 1263 T3j = T3h + T3i;
Chris@82 1264 T4Z = T3i - T3h;
Chris@82 1265 T3r = T3j + T3q;
Chris@82 1266 T5z = T4Z + T50;
Chris@82 1267 T4o = T3j - T3q;
Chris@82 1268 T51 = T4Z - T50;
Chris@82 1269 }
Chris@82 1270 {
Chris@82 1271 E T3B, T52, T3y, T53, T3u, T3x;
Chris@82 1272 T3B = FMA(KP951056516, T3z, KP587785252 * T3A);
Chris@82 1273 T52 = FNMS(KP587785252, T3z, KP951056516 * T3A);
Chris@82 1274 T3u = KP559016994 * (T3s - T3t);
Chris@82 1275 T3x = FNMS(KP250000000, T3w, T3v);
Chris@82 1276 T3y = T3u + T3x;
Chris@82 1277 T53 = T3x - T3u;
Chris@82 1278 T3C = T3y - T3B;
Chris@82 1279 T5A = T53 - T52;
Chris@82 1280 T4n = T3B + T3y;
Chris@82 1281 T54 = T52 + T53;
Chris@82 1282 }
Chris@82 1283 }
Chris@82 1284 {
Chris@82 1285 E T62, T64, Tp, T2k, T5T, T5U, T63, T5V;
Chris@82 1286 {
Chris@82 1287 E T5Y, T61, T1m, T2j;
Chris@82 1288 T5Y = T5W - T5X;
Chris@82 1289 T61 = T5Z - T60;
Chris@82 1290 T62 = FMA(KP951056516, T5Y, KP587785252 * T61);
Chris@82 1291 T64 = FNMS(KP587785252, T5Y, KP951056516 * T61);
Chris@82 1292 Tp = T1 + To;
Chris@82 1293 T1m = TS + T1l;
Chris@82 1294 T2j = T1P + T2i;
Chris@82 1295 T2k = T1m + T2j;
Chris@82 1296 T5T = KP559016994 * (T1m - T2j);
Chris@82 1297 T5U = FNMS(KP250000000, T2k, Tp);
Chris@82 1298 }
Chris@82 1299 ri[0] = Tp + T2k;
Chris@82 1300 T63 = T5U - T5T;
Chris@82 1301 ri[WS(rs, 10)] = T63 - T64;
Chris@82 1302 ri[WS(rs, 15)] = T63 + T64;
Chris@82 1303 T5V = T5T + T5U;
Chris@82 1304 ri[WS(rs, 20)] = T5V - T62;
Chris@82 1305 ri[WS(rs, 5)] = T5V + T62;
Chris@82 1306 }
Chris@82 1307 {
Chris@82 1308 E T6i, T6j, T6c, T67, T6d, T6e, T6k, T6f;
Chris@82 1309 {
Chris@82 1310 E T6g, T6h, T65, T66;
Chris@82 1311 T6g = TS - T1l;
Chris@82 1312 T6h = T1P - T2i;
Chris@82 1313 T6i = FMA(KP951056516, T6g, KP587785252 * T6h);
Chris@82 1314 T6j = FNMS(KP587785252, T6g, KP951056516 * T6h);
Chris@82 1315 T6c = T6a + T6b;
Chris@82 1316 T65 = T5W + T5X;
Chris@82 1317 T66 = T5Z + T60;
Chris@82 1318 T67 = T65 + T66;
Chris@82 1319 T6d = KP559016994 * (T65 - T66);
Chris@82 1320 T6e = FNMS(KP250000000, T67, T6c);
Chris@82 1321 }
Chris@82 1322 ii[0] = T67 + T6c;
Chris@82 1323 T6k = T6e - T6d;
Chris@82 1324 ii[WS(rs, 10)] = T6j + T6k;
Chris@82 1325 ii[WS(rs, 15)] = T6k - T6j;
Chris@82 1326 T6f = T6d + T6e;
Chris@82 1327 ii[WS(rs, 5)] = T6f - T6i;
Chris@82 1328 ii[WS(rs, 20)] = T6i + T6f;
Chris@82 1329 }
Chris@82 1330 {
Chris@82 1331 E T2v, T4f, T6u, T6G, T42, T6z, T43, T6y, T4A, T6H, T4D, T6F, T4u, T6L, T4v;
Chris@82 1332 E T6K, T48, T6v, T4b, T6n, T2n, T6q;
Chris@82 1333 T2n = T2l + T2m;
Chris@82 1334 T2v = T2n + T2u;
Chris@82 1335 T4f = T2n - T2u;
Chris@82 1336 T6q = T6o + T6p;
Chris@82 1337 T6u = T6q - T6t;
Chris@82 1338 T6G = T6t + T6q;
Chris@82 1339 {
Chris@82 1340 E T2S, T3f, T3g, T3D, T40, T41;
Chris@82 1341 T2S = FMA(KP968583161, T2G, KP248689887 * T2R);
Chris@82 1342 T3f = FMA(KP535826794, T33, KP844327925 * T3e);
Chris@82 1343 T3g = T2S + T3f;
Chris@82 1344 T3D = FMA(KP876306680, T3r, KP481753674 * T3C);
Chris@82 1345 T40 = FMA(KP728968627, T3O, KP684547105 * T3Z);
Chris@82 1346 T41 = T3D + T40;
Chris@82 1347 T42 = T3g + T41;
Chris@82 1348 T6z = T3D - T40;
Chris@82 1349 T43 = KP559016994 * (T3g - T41);
Chris@82 1350 T6y = T2S - T3f;
Chris@82 1351 }
Chris@82 1352 {
Chris@82 1353 E T4y, T4z, T6D, T4B, T4C, T6E;
Chris@82 1354 T4y = FNMS(KP844327925, T4g, KP535826794 * T4h);
Chris@82 1355 T4z = FNMS(KP637423989, T4k, KP770513242 * T4j);
Chris@82 1356 T6D = T4y + T4z;
Chris@82 1357 T4B = FMA(KP125333233, T4r, KP992114701 * T4q);
Chris@82 1358 T4C = FMA(KP904827052, T4o, KP425779291 * T4n);
Chris@82 1359 T6E = T4C + T4B;
Chris@82 1360 T4A = T4y - T4z;
Chris@82 1361 T6H = KP559016994 * (T6D + T6E);
Chris@82 1362 T4D = T4B - T4C;
Chris@82 1363 T6F = T6D - T6E;
Chris@82 1364 }
Chris@82 1365 {
Chris@82 1366 E T4i, T4l, T4m, T4p, T4s, T4t;
Chris@82 1367 T4i = FMA(KP535826794, T4g, KP844327925 * T4h);
Chris@82 1368 T4l = FMA(KP637423989, T4j, KP770513242 * T4k);
Chris@82 1369 T4m = T4i - T4l;
Chris@82 1370 T4p = FNMS(KP425779291, T4o, KP904827052 * T4n);
Chris@82 1371 T4s = FNMS(KP992114701, T4r, KP125333233 * T4q);
Chris@82 1372 T4t = T4p + T4s;
Chris@82 1373 T4u = T4m + T4t;
Chris@82 1374 T6L = T4p - T4s;
Chris@82 1375 T4v = KP559016994 * (T4m - T4t);
Chris@82 1376 T6K = T4i + T4l;
Chris@82 1377 }
Chris@82 1378 {
Chris@82 1379 E T46, T47, T6l, T49, T4a, T6m;
Chris@82 1380 T46 = FNMS(KP248689887, T2G, KP968583161 * T2R);
Chris@82 1381 T47 = FNMS(KP844327925, T33, KP535826794 * T3e);
Chris@82 1382 T6l = T46 + T47;
Chris@82 1383 T49 = FNMS(KP481753674, T3r, KP876306680 * T3C);
Chris@82 1384 T4a = FNMS(KP684547105, T3O, KP728968627 * T3Z);
Chris@82 1385 T6m = T49 + T4a;
Chris@82 1386 T48 = T46 - T47;
Chris@82 1387 T6v = KP559016994 * (T6l - T6m);
Chris@82 1388 T4b = T49 - T4a;
Chris@82 1389 T6n = T6l + T6m;
Chris@82 1390 }
Chris@82 1391 ri[WS(rs, 1)] = T2v + T42;
Chris@82 1392 ii[WS(rs, 1)] = T6n + T6u;
Chris@82 1393 ri[WS(rs, 4)] = T4f + T4u;
Chris@82 1394 ii[WS(rs, 4)] = T6F + T6G;
Chris@82 1395 {
Chris@82 1396 E T4c, T4e, T45, T4d, T44;
Chris@82 1397 T4c = FMA(KP951056516, T48, KP587785252 * T4b);
Chris@82 1398 T4e = FNMS(KP587785252, T48, KP951056516 * T4b);
Chris@82 1399 T44 = FNMS(KP250000000, T42, T2v);
Chris@82 1400 T45 = T43 + T44;
Chris@82 1401 T4d = T44 - T43;
Chris@82 1402 ri[WS(rs, 21)] = T45 - T4c;
Chris@82 1403 ri[WS(rs, 16)] = T4d + T4e;
Chris@82 1404 ri[WS(rs, 6)] = T45 + T4c;
Chris@82 1405 ri[WS(rs, 11)] = T4d - T4e;
Chris@82 1406 }
Chris@82 1407 {
Chris@82 1408 E T6A, T6B, T6x, T6C, T6w;
Chris@82 1409 T6A = FMA(KP951056516, T6y, KP587785252 * T6z);
Chris@82 1410 T6B = FNMS(KP587785252, T6y, KP951056516 * T6z);
Chris@82 1411 T6w = FNMS(KP250000000, T6n, T6u);
Chris@82 1412 T6x = T6v + T6w;
Chris@82 1413 T6C = T6w - T6v;
Chris@82 1414 ii[WS(rs, 6)] = T6x - T6A;
Chris@82 1415 ii[WS(rs, 16)] = T6C - T6B;
Chris@82 1416 ii[WS(rs, 21)] = T6A + T6x;
Chris@82 1417 ii[WS(rs, 11)] = T6B + T6C;
Chris@82 1418 }
Chris@82 1419 {
Chris@82 1420 E T4E, T4G, T4x, T4F, T4w;
Chris@82 1421 T4E = FMA(KP951056516, T4A, KP587785252 * T4D);
Chris@82 1422 T4G = FNMS(KP587785252, T4A, KP951056516 * T4D);
Chris@82 1423 T4w = FNMS(KP250000000, T4u, T4f);
Chris@82 1424 T4x = T4v + T4w;
Chris@82 1425 T4F = T4w - T4v;
Chris@82 1426 ri[WS(rs, 24)] = T4x - T4E;
Chris@82 1427 ri[WS(rs, 19)] = T4F + T4G;
Chris@82 1428 ri[WS(rs, 9)] = T4x + T4E;
Chris@82 1429 ri[WS(rs, 14)] = T4F - T4G;
Chris@82 1430 }
Chris@82 1431 {
Chris@82 1432 E T6M, T6N, T6J, T6O, T6I;
Chris@82 1433 T6M = FMA(KP951056516, T6K, KP587785252 * T6L);
Chris@82 1434 T6N = FNMS(KP587785252, T6K, KP951056516 * T6L);
Chris@82 1435 T6I = FNMS(KP250000000, T6F, T6G);
Chris@82 1436 T6J = T6H + T6I;
Chris@82 1437 T6O = T6I - T6H;
Chris@82 1438 ii[WS(rs, 9)] = T6J - T6M;
Chris@82 1439 ii[WS(rs, 19)] = T6O - T6N;
Chris@82 1440 ii[WS(rs, 24)] = T6M + T6J;
Chris@82 1441 ii[WS(rs, 14)] = T6N + T6O;
Chris@82 1442 }
Chris@82 1443 }
Chris@82 1444 {
Chris@82 1445 E T4J, T5r, T6U, T76, T5e, T6Z, T5f, T6Y, T5M, T77, T5P, T75, T5G, T7b, T5H;
Chris@82 1446 E T7a, T5k, T6V, T5n, T6R, T4H, T6T;
Chris@82 1447 T4H = T2m - T2l;
Chris@82 1448 T4J = T4H - T4I;
Chris@82 1449 T5r = T4H + T4I;
Chris@82 1450 T6T = T6p - T6o;
Chris@82 1451 T6U = T6S + T6T;
Chris@82 1452 T76 = T6T - T6S;
Chris@82 1453 {
Chris@82 1454 E T4Q, T4X, T4Y, T55, T5c, T5d;
Chris@82 1455 T4Q = FMA(KP876306680, T4M, KP481753674 * T4P);
Chris@82 1456 T4X = FNMS(KP425779291, T4W, KP904827052 * T4T);
Chris@82 1457 T4Y = T4Q + T4X;
Chris@82 1458 T55 = FMA(KP535826794, T51, KP844327925 * T54);
Chris@82 1459 T5c = FMA(KP062790519, T58, KP998026728 * T5b);
Chris@82 1460 T5d = T55 + T5c;
Chris@82 1461 T5e = T4Y + T5d;
Chris@82 1462 T6Z = T55 - T5c;
Chris@82 1463 T5f = KP559016994 * (T4Y - T5d);
Chris@82 1464 T6Y = T4Q - T4X;
Chris@82 1465 }
Chris@82 1466 {
Chris@82 1467 E T5K, T5L, T73, T5N, T5O, T74;
Chris@82 1468 T5K = FNMS(KP684547105, T5s, KP728968627 * T5t);
Chris@82 1469 T5L = FMA(KP125333233, T5w, KP992114701 * T5v);
Chris@82 1470 T73 = T5K - T5L;
Chris@82 1471 T5N = FNMS(KP998026728, T5z, KP062790519 * T5A);
Chris@82 1472 T5O = FMA(KP770513242, T5D, KP637423989 * T5C);
Chris@82 1473 T74 = T5N - T5O;
Chris@82 1474 T5M = T5K + T5L;
Chris@82 1475 T77 = KP559016994 * (T73 - T74);
Chris@82 1476 T5P = T5N + T5O;
Chris@82 1477 T75 = T73 + T74;
Chris@82 1478 }
Chris@82 1479 {
Chris@82 1480 E T5u, T5x, T5y, T5B, T5E, T5F;
Chris@82 1481 T5u = FMA(KP728968627, T5s, KP684547105 * T5t);
Chris@82 1482 T5x = FNMS(KP992114701, T5w, KP125333233 * T5v);
Chris@82 1483 T5y = T5u + T5x;
Chris@82 1484 T5B = FMA(KP062790519, T5z, KP998026728 * T5A);
Chris@82 1485 T5E = FNMS(KP637423989, T5D, KP770513242 * T5C);
Chris@82 1486 T5F = T5B + T5E;
Chris@82 1487 T5G = T5y + T5F;
Chris@82 1488 T7b = T5B - T5E;
Chris@82 1489 T5H = KP559016994 * (T5y - T5F);
Chris@82 1490 T7a = T5u - T5x;
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T5i, T5j, T6P, T5l, T5m, T6Q;
Chris@82 1494 T5i = FNMS(KP481753674, T4M, KP876306680 * T4P);
Chris@82 1495 T5j = FMA(KP904827052, T4W, KP425779291 * T4T);
Chris@82 1496 T6P = T5i - T5j;
Chris@82 1497 T5l = FNMS(KP844327925, T51, KP535826794 * T54);
Chris@82 1498 T5m = FNMS(KP998026728, T58, KP062790519 * T5b);
Chris@82 1499 T6Q = T5l + T5m;
Chris@82 1500 T5k = T5i + T5j;
Chris@82 1501 T6V = KP559016994 * (T6P - T6Q);
Chris@82 1502 T5n = T5l - T5m;
Chris@82 1503 T6R = T6P + T6Q;
Chris@82 1504 }
Chris@82 1505 ri[WS(rs, 2)] = T4J + T5e;
Chris@82 1506 ii[WS(rs, 2)] = T6R + T6U;
Chris@82 1507 ri[WS(rs, 3)] = T5r + T5G;
Chris@82 1508 ii[WS(rs, 3)] = T75 + T76;
Chris@82 1509 {
Chris@82 1510 E T5o, T5q, T5h, T5p, T5g;
Chris@82 1511 T5o = FMA(KP951056516, T5k, KP587785252 * T5n);
Chris@82 1512 T5q = FNMS(KP587785252, T5k, KP951056516 * T5n);
Chris@82 1513 T5g = FNMS(KP250000000, T5e, T4J);
Chris@82 1514 T5h = T5f + T5g;
Chris@82 1515 T5p = T5g - T5f;
Chris@82 1516 ri[WS(rs, 22)] = T5h - T5o;
Chris@82 1517 ri[WS(rs, 17)] = T5p + T5q;
Chris@82 1518 ri[WS(rs, 7)] = T5h + T5o;
Chris@82 1519 ri[WS(rs, 12)] = T5p - T5q;
Chris@82 1520 }
Chris@82 1521 {
Chris@82 1522 E T70, T71, T6X, T72, T6W;
Chris@82 1523 T70 = FMA(KP951056516, T6Y, KP587785252 * T6Z);
Chris@82 1524 T71 = FNMS(KP587785252, T6Y, KP951056516 * T6Z);
Chris@82 1525 T6W = FNMS(KP250000000, T6R, T6U);
Chris@82 1526 T6X = T6V + T6W;
Chris@82 1527 T72 = T6W - T6V;
Chris@82 1528 ii[WS(rs, 7)] = T6X - T70;
Chris@82 1529 ii[WS(rs, 17)] = T72 - T71;
Chris@82 1530 ii[WS(rs, 22)] = T70 + T6X;
Chris@82 1531 ii[WS(rs, 12)] = T71 + T72;
Chris@82 1532 }
Chris@82 1533 {
Chris@82 1534 E T5Q, T5S, T5J, T5R, T5I;
Chris@82 1535 T5Q = FMA(KP951056516, T5M, KP587785252 * T5P);
Chris@82 1536 T5S = FNMS(KP587785252, T5M, KP951056516 * T5P);
Chris@82 1537 T5I = FNMS(KP250000000, T5G, T5r);
Chris@82 1538 T5J = T5H + T5I;
Chris@82 1539 T5R = T5I - T5H;
Chris@82 1540 ri[WS(rs, 23)] = T5J - T5Q;
Chris@82 1541 ri[WS(rs, 18)] = T5R + T5S;
Chris@82 1542 ri[WS(rs, 8)] = T5J + T5Q;
Chris@82 1543 ri[WS(rs, 13)] = T5R - T5S;
Chris@82 1544 }
Chris@82 1545 {
Chris@82 1546 E T7c, T7d, T79, T7e, T78;
Chris@82 1547 T7c = FMA(KP951056516, T7a, KP587785252 * T7b);
Chris@82 1548 T7d = FNMS(KP587785252, T7a, KP951056516 * T7b);
Chris@82 1549 T78 = FNMS(KP250000000, T75, T76);
Chris@82 1550 T79 = T77 + T78;
Chris@82 1551 T7e = T78 - T77;
Chris@82 1552 ii[WS(rs, 8)] = T79 - T7c;
Chris@82 1553 ii[WS(rs, 18)] = T7e - T7d;
Chris@82 1554 ii[WS(rs, 23)] = T7c + T79;
Chris@82 1555 ii[WS(rs, 13)] = T7d + T7e;
Chris@82 1556 }
Chris@82 1557 }
Chris@82 1558 }
Chris@82 1559 }
Chris@82 1560 }
Chris@82 1561
Chris@82 1562 static const tw_instr twinstr[] = {
Chris@82 1563 {TW_FULL, 0, 25},
Chris@82 1564 {TW_NEXT, 1, 0}
Chris@82 1565 };
Chris@82 1566
Chris@82 1567 static const ct_desc desc = { 25, "t1_25", twinstr, &GENUS, {260, 140, 140, 0}, 0, 0, 0 };
Chris@82 1568
Chris@82 1569 void X(codelet_t1_25) (planner *p) {
Chris@82 1570 X(kdft_dit_register) (p, t1_25, &desc);
Chris@82 1571 }
Chris@82 1572 #endif