annotate src/fftw-3.3.8/dft/scalar/codelets/t2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:20 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 488 FP additions, 350 FP multiplications,
Chris@82 32 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
Chris@82 33 * 164 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 41 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 49 E T2, T8, T3, T6, Te, Ti, T5, T7, TJ, Tb, TM, Tc, Ts, T23, T1w;
Chris@82 50 E T19, TA, TE, T1s, T1N, T1o, T1C, T1F, T1K, T15, T11, T2F, T31, T2J, T34;
Chris@82 51 E T3f, T3z, T3j, T3C, Tw, T3M, T3Q, T1z, T2s, T2w, T1d, T3n, T3r, T26, T2T;
Chris@82 52 E T2X, Th, TR, TP, Td, Tj, TW, Tn, TS, T1U, T2b, T29, T1R, T1V, T2g;
Chris@82 53 E T1Z, T2c;
Chris@82 54 {
Chris@82 55 E Tz, T1n, T10, TD, T1r, T14, T9, T1Q, Tv, T1c;
Chris@82 56 {
Chris@82 57 E T4, T18, Ta, Tr;
Chris@82 58 T2 = W[0];
Chris@82 59 T8 = W[4];
Chris@82 60 T3 = W[2];
Chris@82 61 T6 = W[3];
Chris@82 62 T4 = T2 * T3;
Chris@82 63 T18 = T3 * T8;
Chris@82 64 Ta = T2 * T6;
Chris@82 65 Tr = T2 * T8;
Chris@82 66 Te = W[6];
Chris@82 67 Tz = T3 * Te;
Chris@82 68 T1n = T8 * Te;
Chris@82 69 T10 = T2 * Te;
Chris@82 70 Ti = W[7];
Chris@82 71 TD = T3 * Ti;
Chris@82 72 T1r = T8 * Ti;
Chris@82 73 T14 = T2 * Ti;
Chris@82 74 T5 = W[1];
Chris@82 75 T7 = FMA(T5, T6, T4);
Chris@82 76 TJ = FNMS(T5, T6, T4);
Chris@82 77 T9 = T7 * T8;
Chris@82 78 T1Q = TJ * T8;
Chris@82 79 Tb = FNMS(T5, T3, Ta);
Chris@82 80 TM = FMA(T5, T3, Ta);
Chris@82 81 Tc = W[5];
Chris@82 82 Tv = T2 * Tc;
Chris@82 83 T1c = T3 * Tc;
Chris@82 84 Ts = FMA(T5, Tc, Tr);
Chris@82 85 T23 = FMA(T6, Tc, T18);
Chris@82 86 T1w = FNMS(T5, Tc, Tr);
Chris@82 87 T19 = FNMS(T6, Tc, T18);
Chris@82 88 }
Chris@82 89 TA = FMA(T6, Ti, Tz);
Chris@82 90 TE = FNMS(T6, Te, TD);
Chris@82 91 T1s = FNMS(Tc, Te, T1r);
Chris@82 92 T1N = FMA(T6, Te, TD);
Chris@82 93 T1o = FMA(Tc, Ti, T1n);
Chris@82 94 T1C = FMA(T5, Ti, T10);
Chris@82 95 T1F = FNMS(T5, Te, T14);
Chris@82 96 T1K = FNMS(T6, Ti, Tz);
Chris@82 97 T15 = FMA(T5, Te, T14);
Chris@82 98 T11 = FNMS(T5, Ti, T10);
Chris@82 99 {
Chris@82 100 E T2E, T2I, T2S, T2W;
Chris@82 101 T2E = T7 * Te;
Chris@82 102 T2F = FMA(Tb, Ti, T2E);
Chris@82 103 T31 = FNMS(Tb, Ti, T2E);
Chris@82 104 T2I = T7 * Ti;
Chris@82 105 T2J = FNMS(Tb, Te, T2I);
Chris@82 106 T34 = FMA(Tb, Te, T2I);
Chris@82 107 {
Chris@82 108 E T3e, T3i, T3L, T3P;
Chris@82 109 T3e = TJ * Te;
Chris@82 110 T3f = FNMS(TM, Ti, T3e);
Chris@82 111 T3z = FMA(TM, Ti, T3e);
Chris@82 112 T3i = TJ * Ti;
Chris@82 113 T3j = FMA(TM, Te, T3i);
Chris@82 114 T3C = FNMS(TM, Te, T3i);
Chris@82 115 T3L = Ts * Te;
Chris@82 116 T3P = Ts * Ti;
Chris@82 117 Tw = FNMS(T5, T8, Tv);
Chris@82 118 T3M = FMA(Tw, Ti, T3L);
Chris@82 119 T3Q = FNMS(Tw, Te, T3P);
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E T2r, T2v, T3m, T3q;
Chris@82 123 T2r = T1w * Te;
Chris@82 124 T2v = T1w * Ti;
Chris@82 125 T1z = FMA(T5, T8, Tv);
Chris@82 126 T2s = FMA(T1z, Ti, T2r);
Chris@82 127 T2w = FNMS(T1z, Te, T2v);
Chris@82 128 T3m = T19 * Te;
Chris@82 129 T3q = T19 * Ti;
Chris@82 130 T1d = FMA(T6, T8, T1c);
Chris@82 131 T3n = FMA(T1d, Ti, T3m);
Chris@82 132 T3r = FNMS(T1d, Te, T3q);
Chris@82 133 }
Chris@82 134 T2S = T23 * Te;
Chris@82 135 T2W = T23 * Ti;
Chris@82 136 T26 = FNMS(T6, T8, T1c);
Chris@82 137 T2T = FMA(T26, Ti, T2S);
Chris@82 138 T2X = FNMS(T26, Te, T2W);
Chris@82 139 {
Chris@82 140 E TQ, TV, Tf, Tm, Tg;
Chris@82 141 Tg = T7 * Tc;
Chris@82 142 Th = FMA(Tb, T8, Tg);
Chris@82 143 TR = FNMS(Tb, T8, Tg);
Chris@82 144 TP = FMA(Tb, Tc, T9);
Chris@82 145 TQ = TP * Te;
Chris@82 146 TV = TP * Ti;
Chris@82 147 Td = FNMS(Tb, Tc, T9);
Chris@82 148 Tf = Td * Te;
Chris@82 149 Tm = Td * Ti;
Chris@82 150 Tj = FMA(Th, Ti, Tf);
Chris@82 151 TW = FNMS(TR, Te, TV);
Chris@82 152 Tn = FNMS(Th, Te, Tm);
Chris@82 153 TS = FMA(TR, Ti, TQ);
Chris@82 154 }
Chris@82 155 {
Chris@82 156 E T2a, T2f, T1S, T1Y, T1T;
Chris@82 157 T1T = TJ * Tc;
Chris@82 158 T1U = FMA(TM, T8, T1T);
Chris@82 159 T2b = FNMS(TM, T8, T1T);
Chris@82 160 T29 = FMA(TM, Tc, T1Q);
Chris@82 161 T2a = T29 * Te;
Chris@82 162 T2f = T29 * Ti;
Chris@82 163 T1R = FNMS(TM, Tc, T1Q);
Chris@82 164 T1S = T1R * Te;
Chris@82 165 T1Y = T1R * Ti;
Chris@82 166 T1V = FMA(T1U, Ti, T1S);
Chris@82 167 T2g = FNMS(T2b, Te, T2f);
Chris@82 168 T1Z = FNMS(T1U, Te, T1Y);
Chris@82 169 T2c = FMA(T2b, Ti, T2a);
Chris@82 170 }
Chris@82 171 }
Chris@82 172 }
Chris@82 173 {
Chris@82 174 E Tq, T46, T8H, T97, TH, T98, T4b, T8D, TZ, T7f, T4j, T6t, T1g, T7g, T4q;
Chris@82 175 E T6u, T1v, T1I, T7m, T7j, T7k, T7l, T4z, T6x, T4G, T6y, T22, T2j, T7o, T7p;
Chris@82 176 E T7q, T7r, T4O, T6A, T4V, T6B, T3G, T7L, T7I, T8n, T5E, T6P, T61, T6M, T2N;
Chris@82 177 E T7A, T7x, T8i, T55, T6I, T5s, T6F, T43, T7J, T7O, T8o, T5L, T62, T5S, T63;
Chris@82 178 E T3c, T7y, T7D, T8j, T5c, T5t, T5j, T5u;
Chris@82 179 {
Chris@82 180 E T1, T8G, Tk, Tl, To, T8E, Tp, T8F;
Chris@82 181 T1 = ri[0];
Chris@82 182 T8G = ii[0];
Chris@82 183 Tk = ri[WS(rs, 16)];
Chris@82 184 Tl = Tj * Tk;
Chris@82 185 To = ii[WS(rs, 16)];
Chris@82 186 T8E = Tj * To;
Chris@82 187 Tp = FMA(Tn, To, Tl);
Chris@82 188 Tq = T1 + Tp;
Chris@82 189 T46 = T1 - Tp;
Chris@82 190 T8F = FNMS(Tn, Tk, T8E);
Chris@82 191 T8H = T8F + T8G;
Chris@82 192 T97 = T8G - T8F;
Chris@82 193 }
Chris@82 194 {
Chris@82 195 E Tt, Tu, Tx, T47, TB, TC, TF, T49;
Chris@82 196 Tt = ri[WS(rs, 8)];
Chris@82 197 Tu = Ts * Tt;
Chris@82 198 Tx = ii[WS(rs, 8)];
Chris@82 199 T47 = Ts * Tx;
Chris@82 200 TB = ri[WS(rs, 24)];
Chris@82 201 TC = TA * TB;
Chris@82 202 TF = ii[WS(rs, 24)];
Chris@82 203 T49 = TA * TF;
Chris@82 204 {
Chris@82 205 E Ty, TG, T48, T4a;
Chris@82 206 Ty = FMA(Tw, Tx, Tu);
Chris@82 207 TG = FMA(TE, TF, TC);
Chris@82 208 TH = Ty + TG;
Chris@82 209 T98 = Ty - TG;
Chris@82 210 T48 = FNMS(Tw, Tt, T47);
Chris@82 211 T4a = FNMS(TE, TB, T49);
Chris@82 212 T4b = T48 - T4a;
Chris@82 213 T8D = T48 + T4a;
Chris@82 214 }
Chris@82 215 }
Chris@82 216 {
Chris@82 217 E TO, T4f, TY, T4h, T4d, T4i;
Chris@82 218 {
Chris@82 219 E TK, TL, TN, T4e;
Chris@82 220 TK = ri[WS(rs, 4)];
Chris@82 221 TL = TJ * TK;
Chris@82 222 TN = ii[WS(rs, 4)];
Chris@82 223 T4e = TJ * TN;
Chris@82 224 TO = FMA(TM, TN, TL);
Chris@82 225 T4f = FNMS(TM, TK, T4e);
Chris@82 226 }
Chris@82 227 {
Chris@82 228 E TT, TU, TX, T4g;
Chris@82 229 TT = ri[WS(rs, 20)];
Chris@82 230 TU = TS * TT;
Chris@82 231 TX = ii[WS(rs, 20)];
Chris@82 232 T4g = TS * TX;
Chris@82 233 TY = FMA(TW, TX, TU);
Chris@82 234 T4h = FNMS(TW, TT, T4g);
Chris@82 235 }
Chris@82 236 TZ = TO + TY;
Chris@82 237 T7f = T4f + T4h;
Chris@82 238 T4d = TO - TY;
Chris@82 239 T4i = T4f - T4h;
Chris@82 240 T4j = T4d + T4i;
Chris@82 241 T6t = T4i - T4d;
Chris@82 242 }
Chris@82 243 {
Chris@82 244 E T17, T4m, T1f, T4o, T4k, T4p;
Chris@82 245 {
Chris@82 246 E T12, T13, T16, T4l;
Chris@82 247 T12 = ri[WS(rs, 28)];
Chris@82 248 T13 = T11 * T12;
Chris@82 249 T16 = ii[WS(rs, 28)];
Chris@82 250 T4l = T11 * T16;
Chris@82 251 T17 = FMA(T15, T16, T13);
Chris@82 252 T4m = FNMS(T15, T12, T4l);
Chris@82 253 }
Chris@82 254 {
Chris@82 255 E T1a, T1b, T1e, T4n;
Chris@82 256 T1a = ri[WS(rs, 12)];
Chris@82 257 T1b = T19 * T1a;
Chris@82 258 T1e = ii[WS(rs, 12)];
Chris@82 259 T4n = T19 * T1e;
Chris@82 260 T1f = FMA(T1d, T1e, T1b);
Chris@82 261 T4o = FNMS(T1d, T1a, T4n);
Chris@82 262 }
Chris@82 263 T1g = T17 + T1f;
Chris@82 264 T7g = T4m + T4o;
Chris@82 265 T4k = T17 - T1f;
Chris@82 266 T4p = T4m - T4o;
Chris@82 267 T4q = T4k - T4p;
Chris@82 268 T6u = T4k + T4p;
Chris@82 269 }
Chris@82 270 {
Chris@82 271 E T1m, T4u, T1H, T4E, T1u, T4w, T1B, T4C;
Chris@82 272 {
Chris@82 273 E T1j, T1k, T1l, T4t;
Chris@82 274 T1j = ri[WS(rs, 2)];
Chris@82 275 T1k = T7 * T1j;
Chris@82 276 T1l = ii[WS(rs, 2)];
Chris@82 277 T4t = T7 * T1l;
Chris@82 278 T1m = FMA(Tb, T1l, T1k);
Chris@82 279 T4u = FNMS(Tb, T1j, T4t);
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E T1D, T1E, T1G, T4D;
Chris@82 283 T1D = ri[WS(rs, 26)];
Chris@82 284 T1E = T1C * T1D;
Chris@82 285 T1G = ii[WS(rs, 26)];
Chris@82 286 T4D = T1C * T1G;
Chris@82 287 T1H = FMA(T1F, T1G, T1E);
Chris@82 288 T4E = FNMS(T1F, T1D, T4D);
Chris@82 289 }
Chris@82 290 {
Chris@82 291 E T1p, T1q, T1t, T4v;
Chris@82 292 T1p = ri[WS(rs, 18)];
Chris@82 293 T1q = T1o * T1p;
Chris@82 294 T1t = ii[WS(rs, 18)];
Chris@82 295 T4v = T1o * T1t;
Chris@82 296 T1u = FMA(T1s, T1t, T1q);
Chris@82 297 T4w = FNMS(T1s, T1p, T4v);
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T1x, T1y, T1A, T4B;
Chris@82 301 T1x = ri[WS(rs, 10)];
Chris@82 302 T1y = T1w * T1x;
Chris@82 303 T1A = ii[WS(rs, 10)];
Chris@82 304 T4B = T1w * T1A;
Chris@82 305 T1B = FMA(T1z, T1A, T1y);
Chris@82 306 T4C = FNMS(T1z, T1x, T4B);
Chris@82 307 }
Chris@82 308 T1v = T1m + T1u;
Chris@82 309 T1I = T1B + T1H;
Chris@82 310 T7m = T1v - T1I;
Chris@82 311 T7j = T4u + T4w;
Chris@82 312 T7k = T4C + T4E;
Chris@82 313 T7l = T7j - T7k;
Chris@82 314 {
Chris@82 315 E T4x, T4y, T4A, T4F;
Chris@82 316 T4x = T4u - T4w;
Chris@82 317 T4y = T1B - T1H;
Chris@82 318 T4z = T4x - T4y;
Chris@82 319 T6x = T4x + T4y;
Chris@82 320 T4A = T1m - T1u;
Chris@82 321 T4F = T4C - T4E;
Chris@82 322 T4G = T4A + T4F;
Chris@82 323 T6y = T4A - T4F;
Chris@82 324 }
Chris@82 325 }
Chris@82 326 {
Chris@82 327 E T1P, T4J, T2i, T4T, T21, T4L, T28, T4R;
Chris@82 328 {
Chris@82 329 E T1L, T1M, T1O, T4I;
Chris@82 330 T1L = ri[WS(rs, 30)];
Chris@82 331 T1M = T1K * T1L;
Chris@82 332 T1O = ii[WS(rs, 30)];
Chris@82 333 T4I = T1K * T1O;
Chris@82 334 T1P = FMA(T1N, T1O, T1M);
Chris@82 335 T4J = FNMS(T1N, T1L, T4I);
Chris@82 336 }
Chris@82 337 {
Chris@82 338 E T2d, T2e, T2h, T4S;
Chris@82 339 T2d = ri[WS(rs, 22)];
Chris@82 340 T2e = T2c * T2d;
Chris@82 341 T2h = ii[WS(rs, 22)];
Chris@82 342 T4S = T2c * T2h;
Chris@82 343 T2i = FMA(T2g, T2h, T2e);
Chris@82 344 T4T = FNMS(T2g, T2d, T4S);
Chris@82 345 }
Chris@82 346 {
Chris@82 347 E T1W, T1X, T20, T4K;
Chris@82 348 T1W = ri[WS(rs, 14)];
Chris@82 349 T1X = T1V * T1W;
Chris@82 350 T20 = ii[WS(rs, 14)];
Chris@82 351 T4K = T1V * T20;
Chris@82 352 T21 = FMA(T1Z, T20, T1X);
Chris@82 353 T4L = FNMS(T1Z, T1W, T4K);
Chris@82 354 }
Chris@82 355 {
Chris@82 356 E T24, T25, T27, T4Q;
Chris@82 357 T24 = ri[WS(rs, 6)];
Chris@82 358 T25 = T23 * T24;
Chris@82 359 T27 = ii[WS(rs, 6)];
Chris@82 360 T4Q = T23 * T27;
Chris@82 361 T28 = FMA(T26, T27, T25);
Chris@82 362 T4R = FNMS(T26, T24, T4Q);
Chris@82 363 }
Chris@82 364 T22 = T1P + T21;
Chris@82 365 T2j = T28 + T2i;
Chris@82 366 T7o = T22 - T2j;
Chris@82 367 T7p = T4J + T4L;
Chris@82 368 T7q = T4R + T4T;
Chris@82 369 T7r = T7p - T7q;
Chris@82 370 {
Chris@82 371 E T4M, T4N, T4P, T4U;
Chris@82 372 T4M = T4J - T4L;
Chris@82 373 T4N = T28 - T2i;
Chris@82 374 T4O = T4M - T4N;
Chris@82 375 T6A = T4M + T4N;
Chris@82 376 T4P = T1P - T21;
Chris@82 377 T4U = T4R - T4T;
Chris@82 378 T4V = T4P + T4U;
Chris@82 379 T6B = T4P - T4U;
Chris@82 380 }
Chris@82 381 }
Chris@82 382 {
Chris@82 383 E T3l, T5z, T3E, T5Z, T3t, T5B, T3y, T5X;
Chris@82 384 {
Chris@82 385 E T3g, T3h, T3k, T5y;
Chris@82 386 T3g = ri[WS(rs, 31)];
Chris@82 387 T3h = T3f * T3g;
Chris@82 388 T3k = ii[WS(rs, 31)];
Chris@82 389 T5y = T3f * T3k;
Chris@82 390 T3l = FMA(T3j, T3k, T3h);
Chris@82 391 T5z = FNMS(T3j, T3g, T5y);
Chris@82 392 }
Chris@82 393 {
Chris@82 394 E T3A, T3B, T3D, T5Y;
Chris@82 395 T3A = ri[WS(rs, 23)];
Chris@82 396 T3B = T3z * T3A;
Chris@82 397 T3D = ii[WS(rs, 23)];
Chris@82 398 T5Y = T3z * T3D;
Chris@82 399 T3E = FMA(T3C, T3D, T3B);
Chris@82 400 T5Z = FNMS(T3C, T3A, T5Y);
Chris@82 401 }
Chris@82 402 {
Chris@82 403 E T3o, T3p, T3s, T5A;
Chris@82 404 T3o = ri[WS(rs, 15)];
Chris@82 405 T3p = T3n * T3o;
Chris@82 406 T3s = ii[WS(rs, 15)];
Chris@82 407 T5A = T3n * T3s;
Chris@82 408 T3t = FMA(T3r, T3s, T3p);
Chris@82 409 T5B = FNMS(T3r, T3o, T5A);
Chris@82 410 }
Chris@82 411 {
Chris@82 412 E T3v, T3w, T3x, T5W;
Chris@82 413 T3v = ri[WS(rs, 7)];
Chris@82 414 T3w = TP * T3v;
Chris@82 415 T3x = ii[WS(rs, 7)];
Chris@82 416 T5W = TP * T3x;
Chris@82 417 T3y = FMA(TR, T3x, T3w);
Chris@82 418 T5X = FNMS(TR, T3v, T5W);
Chris@82 419 }
Chris@82 420 {
Chris@82 421 E T3u, T3F, T7G, T7H;
Chris@82 422 T3u = T3l + T3t;
Chris@82 423 T3F = T3y + T3E;
Chris@82 424 T3G = T3u + T3F;
Chris@82 425 T7L = T3u - T3F;
Chris@82 426 T7G = T5z + T5B;
Chris@82 427 T7H = T5X + T5Z;
Chris@82 428 T7I = T7G - T7H;
Chris@82 429 T8n = T7G + T7H;
Chris@82 430 }
Chris@82 431 {
Chris@82 432 E T5C, T5D, T5V, T60;
Chris@82 433 T5C = T5z - T5B;
Chris@82 434 T5D = T3y - T3E;
Chris@82 435 T5E = T5C - T5D;
Chris@82 436 T6P = T5C + T5D;
Chris@82 437 T5V = T3l - T3t;
Chris@82 438 T60 = T5X - T5Z;
Chris@82 439 T61 = T5V + T60;
Chris@82 440 T6M = T5V - T60;
Chris@82 441 }
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E T2q, T50, T2L, T5q, T2y, T52, T2D, T5o;
Chris@82 445 {
Chris@82 446 E T2n, T2o, T2p, T4Z;
Chris@82 447 T2n = ri[WS(rs, 1)];
Chris@82 448 T2o = T2 * T2n;
Chris@82 449 T2p = ii[WS(rs, 1)];
Chris@82 450 T4Z = T2 * T2p;
Chris@82 451 T2q = FMA(T5, T2p, T2o);
Chris@82 452 T50 = FNMS(T5, T2n, T4Z);
Chris@82 453 }
Chris@82 454 {
Chris@82 455 E T2G, T2H, T2K, T5p;
Chris@82 456 T2G = ri[WS(rs, 25)];
Chris@82 457 T2H = T2F * T2G;
Chris@82 458 T2K = ii[WS(rs, 25)];
Chris@82 459 T5p = T2F * T2K;
Chris@82 460 T2L = FMA(T2J, T2K, T2H);
Chris@82 461 T5q = FNMS(T2J, T2G, T5p);
Chris@82 462 }
Chris@82 463 {
Chris@82 464 E T2t, T2u, T2x, T51;
Chris@82 465 T2t = ri[WS(rs, 17)];
Chris@82 466 T2u = T2s * T2t;
Chris@82 467 T2x = ii[WS(rs, 17)];
Chris@82 468 T51 = T2s * T2x;
Chris@82 469 T2y = FMA(T2w, T2x, T2u);
Chris@82 470 T52 = FNMS(T2w, T2t, T51);
Chris@82 471 }
Chris@82 472 {
Chris@82 473 E T2A, T2B, T2C, T5n;
Chris@82 474 T2A = ri[WS(rs, 9)];
Chris@82 475 T2B = T8 * T2A;
Chris@82 476 T2C = ii[WS(rs, 9)];
Chris@82 477 T5n = T8 * T2C;
Chris@82 478 T2D = FMA(Tc, T2C, T2B);
Chris@82 479 T5o = FNMS(Tc, T2A, T5n);
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E T2z, T2M, T7v, T7w;
Chris@82 483 T2z = T2q + T2y;
Chris@82 484 T2M = T2D + T2L;
Chris@82 485 T2N = T2z + T2M;
Chris@82 486 T7A = T2z - T2M;
Chris@82 487 T7v = T50 + T52;
Chris@82 488 T7w = T5o + T5q;
Chris@82 489 T7x = T7v - T7w;
Chris@82 490 T8i = T7v + T7w;
Chris@82 491 }
Chris@82 492 {
Chris@82 493 E T53, T54, T5m, T5r;
Chris@82 494 T53 = T50 - T52;
Chris@82 495 T54 = T2D - T2L;
Chris@82 496 T55 = T53 - T54;
Chris@82 497 T6I = T53 + T54;
Chris@82 498 T5m = T2q - T2y;
Chris@82 499 T5r = T5o - T5q;
Chris@82 500 T5s = T5m + T5r;
Chris@82 501 T6F = T5m - T5r;
Chris@82 502 }
Chris@82 503 }
Chris@82 504 {
Chris@82 505 E T3K, T5G, T41, T5Q, T3S, T5I, T3X, T5O;
Chris@82 506 {
Chris@82 507 E T3H, T3I, T3J, T5F;
Chris@82 508 T3H = ri[WS(rs, 3)];
Chris@82 509 T3I = T3 * T3H;
Chris@82 510 T3J = ii[WS(rs, 3)];
Chris@82 511 T5F = T3 * T3J;
Chris@82 512 T3K = FMA(T6, T3J, T3I);
Chris@82 513 T5G = FNMS(T6, T3H, T5F);
Chris@82 514 }
Chris@82 515 {
Chris@82 516 E T3Y, T3Z, T40, T5P;
Chris@82 517 T3Y = ri[WS(rs, 11)];
Chris@82 518 T3Z = Td * T3Y;
Chris@82 519 T40 = ii[WS(rs, 11)];
Chris@82 520 T5P = Td * T40;
Chris@82 521 T41 = FMA(Th, T40, T3Z);
Chris@82 522 T5Q = FNMS(Th, T3Y, T5P);
Chris@82 523 }
Chris@82 524 {
Chris@82 525 E T3N, T3O, T3R, T5H;
Chris@82 526 T3N = ri[WS(rs, 19)];
Chris@82 527 T3O = T3M * T3N;
Chris@82 528 T3R = ii[WS(rs, 19)];
Chris@82 529 T5H = T3M * T3R;
Chris@82 530 T3S = FMA(T3Q, T3R, T3O);
Chris@82 531 T5I = FNMS(T3Q, T3N, T5H);
Chris@82 532 }
Chris@82 533 {
Chris@82 534 E T3U, T3V, T3W, T5N;
Chris@82 535 T3U = ri[WS(rs, 27)];
Chris@82 536 T3V = Te * T3U;
Chris@82 537 T3W = ii[WS(rs, 27)];
Chris@82 538 T5N = Te * T3W;
Chris@82 539 T3X = FMA(Ti, T3W, T3V);
Chris@82 540 T5O = FNMS(Ti, T3U, T5N);
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T3T, T42, T7M, T7N;
Chris@82 544 T3T = T3K + T3S;
Chris@82 545 T42 = T3X + T41;
Chris@82 546 T43 = T3T + T42;
Chris@82 547 T7J = T42 - T3T;
Chris@82 548 T7M = T5G + T5I;
Chris@82 549 T7N = T5O + T5Q;
Chris@82 550 T7O = T7M - T7N;
Chris@82 551 T8o = T7M + T7N;
Chris@82 552 }
Chris@82 553 {
Chris@82 554 E T5J, T5K, T5M, T5R;
Chris@82 555 T5J = T5G - T5I;
Chris@82 556 T5K = T3K - T3S;
Chris@82 557 T5L = T5J - T5K;
Chris@82 558 T62 = T5K + T5J;
Chris@82 559 T5M = T3X - T41;
Chris@82 560 T5R = T5O - T5Q;
Chris@82 561 T5S = T5M + T5R;
Chris@82 562 T63 = T5M - T5R;
Chris@82 563 }
Chris@82 564 }
Chris@82 565 {
Chris@82 566 E T2R, T57, T3a, T5h, T2Z, T59, T36, T5f;
Chris@82 567 {
Chris@82 568 E T2O, T2P, T2Q, T56;
Chris@82 569 T2O = ri[WS(rs, 5)];
Chris@82 570 T2P = T29 * T2O;
Chris@82 571 T2Q = ii[WS(rs, 5)];
Chris@82 572 T56 = T29 * T2Q;
Chris@82 573 T2R = FMA(T2b, T2Q, T2P);
Chris@82 574 T57 = FNMS(T2b, T2O, T56);
Chris@82 575 }
Chris@82 576 {
Chris@82 577 E T37, T38, T39, T5g;
Chris@82 578 T37 = ri[WS(rs, 13)];
Chris@82 579 T38 = T1R * T37;
Chris@82 580 T39 = ii[WS(rs, 13)];
Chris@82 581 T5g = T1R * T39;
Chris@82 582 T3a = FMA(T1U, T39, T38);
Chris@82 583 T5h = FNMS(T1U, T37, T5g);
Chris@82 584 }
Chris@82 585 {
Chris@82 586 E T2U, T2V, T2Y, T58;
Chris@82 587 T2U = ri[WS(rs, 21)];
Chris@82 588 T2V = T2T * T2U;
Chris@82 589 T2Y = ii[WS(rs, 21)];
Chris@82 590 T58 = T2T * T2Y;
Chris@82 591 T2Z = FMA(T2X, T2Y, T2V);
Chris@82 592 T59 = FNMS(T2X, T2U, T58);
Chris@82 593 }
Chris@82 594 {
Chris@82 595 E T32, T33, T35, T5e;
Chris@82 596 T32 = ri[WS(rs, 29)];
Chris@82 597 T33 = T31 * T32;
Chris@82 598 T35 = ii[WS(rs, 29)];
Chris@82 599 T5e = T31 * T35;
Chris@82 600 T36 = FMA(T34, T35, T33);
Chris@82 601 T5f = FNMS(T34, T32, T5e);
Chris@82 602 }
Chris@82 603 {
Chris@82 604 E T30, T3b, T7B, T7C;
Chris@82 605 T30 = T2R + T2Z;
Chris@82 606 T3b = T36 + T3a;
Chris@82 607 T3c = T30 + T3b;
Chris@82 608 T7y = T3b - T30;
Chris@82 609 T7B = T57 + T59;
Chris@82 610 T7C = T5f + T5h;
Chris@82 611 T7D = T7B - T7C;
Chris@82 612 T8j = T7B + T7C;
Chris@82 613 }
Chris@82 614 {
Chris@82 615 E T5a, T5b, T5d, T5i;
Chris@82 616 T5a = T57 - T59;
Chris@82 617 T5b = T2R - T2Z;
Chris@82 618 T5c = T5a - T5b;
Chris@82 619 T5t = T5b + T5a;
Chris@82 620 T5d = T36 - T3a;
Chris@82 621 T5i = T5f - T5h;
Chris@82 622 T5j = T5d + T5i;
Chris@82 623 T5u = T5d - T5i;
Chris@82 624 }
Chris@82 625 }
Chris@82 626 {
Chris@82 627 E T1i, T8c, T8z, T8A, T8J, T8O, T2l, T8N, T45, T8L, T8l, T8t, T8q, T8u, T8f;
Chris@82 628 E T8B;
Chris@82 629 {
Chris@82 630 E TI, T1h, T8x, T8y;
Chris@82 631 TI = Tq + TH;
Chris@82 632 T1h = TZ + T1g;
Chris@82 633 T1i = TI + T1h;
Chris@82 634 T8c = TI - T1h;
Chris@82 635 T8x = T8i + T8j;
Chris@82 636 T8y = T8n + T8o;
Chris@82 637 T8z = T8x - T8y;
Chris@82 638 T8A = T8x + T8y;
Chris@82 639 }
Chris@82 640 {
Chris@82 641 E T8C, T8I, T1J, T2k;
Chris@82 642 T8C = T7f + T7g;
Chris@82 643 T8I = T8D + T8H;
Chris@82 644 T8J = T8C + T8I;
Chris@82 645 T8O = T8I - T8C;
Chris@82 646 T1J = T1v + T1I;
Chris@82 647 T2k = T22 + T2j;
Chris@82 648 T2l = T1J + T2k;
Chris@82 649 T8N = T2k - T1J;
Chris@82 650 }
Chris@82 651 {
Chris@82 652 E T3d, T44, T8h, T8k;
Chris@82 653 T3d = T2N + T3c;
Chris@82 654 T44 = T3G + T43;
Chris@82 655 T45 = T3d + T44;
Chris@82 656 T8L = T44 - T3d;
Chris@82 657 T8h = T2N - T3c;
Chris@82 658 T8k = T8i - T8j;
Chris@82 659 T8l = T8h + T8k;
Chris@82 660 T8t = T8k - T8h;
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T8m, T8p, T8d, T8e;
Chris@82 664 T8m = T3G - T43;
Chris@82 665 T8p = T8n - T8o;
Chris@82 666 T8q = T8m - T8p;
Chris@82 667 T8u = T8m + T8p;
Chris@82 668 T8d = T7j + T7k;
Chris@82 669 T8e = T7p + T7q;
Chris@82 670 T8f = T8d - T8e;
Chris@82 671 T8B = T8d + T8e;
Chris@82 672 }
Chris@82 673 {
Chris@82 674 E T2m, T8K, T8w, T8M;
Chris@82 675 T2m = T1i + T2l;
Chris@82 676 ri[WS(rs, 16)] = T2m - T45;
Chris@82 677 ri[0] = T2m + T45;
Chris@82 678 T8K = T8B + T8J;
Chris@82 679 ii[0] = T8A + T8K;
Chris@82 680 ii[WS(rs, 16)] = T8K - T8A;
Chris@82 681 T8w = T1i - T2l;
Chris@82 682 ri[WS(rs, 24)] = T8w - T8z;
Chris@82 683 ri[WS(rs, 8)] = T8w + T8z;
Chris@82 684 T8M = T8J - T8B;
Chris@82 685 ii[WS(rs, 8)] = T8L + T8M;
Chris@82 686 ii[WS(rs, 24)] = T8M - T8L;
Chris@82 687 }
Chris@82 688 {
Chris@82 689 E T8g, T8r, T8P, T8Q;
Chris@82 690 T8g = T8c + T8f;
Chris@82 691 T8r = T8l + T8q;
Chris@82 692 ri[WS(rs, 20)] = FNMS(KP707106781, T8r, T8g);
Chris@82 693 ri[WS(rs, 4)] = FMA(KP707106781, T8r, T8g);
Chris@82 694 T8P = T8N + T8O;
Chris@82 695 T8Q = T8t + T8u;
Chris@82 696 ii[WS(rs, 4)] = FMA(KP707106781, T8Q, T8P);
Chris@82 697 ii[WS(rs, 20)] = FNMS(KP707106781, T8Q, T8P);
Chris@82 698 }
Chris@82 699 {
Chris@82 700 E T8s, T8v, T8R, T8S;
Chris@82 701 T8s = T8c - T8f;
Chris@82 702 T8v = T8t - T8u;
Chris@82 703 ri[WS(rs, 28)] = FNMS(KP707106781, T8v, T8s);
Chris@82 704 ri[WS(rs, 12)] = FMA(KP707106781, T8v, T8s);
Chris@82 705 T8R = T8O - T8N;
Chris@82 706 T8S = T8q - T8l;
Chris@82 707 ii[WS(rs, 12)] = FMA(KP707106781, T8S, T8R);
Chris@82 708 ii[WS(rs, 28)] = FNMS(KP707106781, T8S, T8R);
Chris@82 709 }
Chris@82 710 }
Chris@82 711 {
Chris@82 712 E T7i, T7W, T86, T8a, T8V, T91, T7t, T8W, T7F, T7T, T7Z, T92, T83, T89, T7Q;
Chris@82 713 E T7U;
Chris@82 714 {
Chris@82 715 E T7e, T7h, T84, T85;
Chris@82 716 T7e = Tq - TH;
Chris@82 717 T7h = T7f - T7g;
Chris@82 718 T7i = T7e - T7h;
Chris@82 719 T7W = T7e + T7h;
Chris@82 720 T84 = T7L + T7O;
Chris@82 721 T85 = T7I + T7J;
Chris@82 722 T86 = FNMS(KP414213562, T85, T84);
Chris@82 723 T8a = FMA(KP414213562, T84, T85);
Chris@82 724 }
Chris@82 725 {
Chris@82 726 E T8T, T8U, T7n, T7s;
Chris@82 727 T8T = T1g - TZ;
Chris@82 728 T8U = T8H - T8D;
Chris@82 729 T8V = T8T + T8U;
Chris@82 730 T91 = T8U - T8T;
Chris@82 731 T7n = T7l - T7m;
Chris@82 732 T7s = T7o + T7r;
Chris@82 733 T7t = T7n - T7s;
Chris@82 734 T8W = T7n + T7s;
Chris@82 735 }
Chris@82 736 {
Chris@82 737 E T7z, T7E, T7X, T7Y;
Chris@82 738 T7z = T7x - T7y;
Chris@82 739 T7E = T7A - T7D;
Chris@82 740 T7F = FMA(KP414213562, T7E, T7z);
Chris@82 741 T7T = FNMS(KP414213562, T7z, T7E);
Chris@82 742 T7X = T7m + T7l;
Chris@82 743 T7Y = T7o - T7r;
Chris@82 744 T7Z = T7X + T7Y;
Chris@82 745 T92 = T7Y - T7X;
Chris@82 746 }
Chris@82 747 {
Chris@82 748 E T81, T82, T7K, T7P;
Chris@82 749 T81 = T7A + T7D;
Chris@82 750 T82 = T7x + T7y;
Chris@82 751 T83 = FMA(KP414213562, T82, T81);
Chris@82 752 T89 = FNMS(KP414213562, T81, T82);
Chris@82 753 T7K = T7I - T7J;
Chris@82 754 T7P = T7L - T7O;
Chris@82 755 T7Q = FNMS(KP414213562, T7P, T7K);
Chris@82 756 T7U = FMA(KP414213562, T7K, T7P);
Chris@82 757 }
Chris@82 758 {
Chris@82 759 E T7u, T7R, T93, T94;
Chris@82 760 T7u = FMA(KP707106781, T7t, T7i);
Chris@82 761 T7R = T7F - T7Q;
Chris@82 762 ri[WS(rs, 22)] = FNMS(KP923879532, T7R, T7u);
Chris@82 763 ri[WS(rs, 6)] = FMA(KP923879532, T7R, T7u);
Chris@82 764 T93 = FMA(KP707106781, T92, T91);
Chris@82 765 T94 = T7U - T7T;
Chris@82 766 ii[WS(rs, 6)] = FMA(KP923879532, T94, T93);
Chris@82 767 ii[WS(rs, 22)] = FNMS(KP923879532, T94, T93);
Chris@82 768 }
Chris@82 769 {
Chris@82 770 E T7S, T7V, T95, T96;
Chris@82 771 T7S = FNMS(KP707106781, T7t, T7i);
Chris@82 772 T7V = T7T + T7U;
Chris@82 773 ri[WS(rs, 14)] = FNMS(KP923879532, T7V, T7S);
Chris@82 774 ri[WS(rs, 30)] = FMA(KP923879532, T7V, T7S);
Chris@82 775 T95 = FNMS(KP707106781, T92, T91);
Chris@82 776 T96 = T7F + T7Q;
Chris@82 777 ii[WS(rs, 14)] = FNMS(KP923879532, T96, T95);
Chris@82 778 ii[WS(rs, 30)] = FMA(KP923879532, T96, T95);
Chris@82 779 }
Chris@82 780 {
Chris@82 781 E T80, T87, T8X, T8Y;
Chris@82 782 T80 = FMA(KP707106781, T7Z, T7W);
Chris@82 783 T87 = T83 + T86;
Chris@82 784 ri[WS(rs, 18)] = FNMS(KP923879532, T87, T80);
Chris@82 785 ri[WS(rs, 2)] = FMA(KP923879532, T87, T80);
Chris@82 786 T8X = FMA(KP707106781, T8W, T8V);
Chris@82 787 T8Y = T89 + T8a;
Chris@82 788 ii[WS(rs, 2)] = FMA(KP923879532, T8Y, T8X);
Chris@82 789 ii[WS(rs, 18)] = FNMS(KP923879532, T8Y, T8X);
Chris@82 790 }
Chris@82 791 {
Chris@82 792 E T88, T8b, T8Z, T90;
Chris@82 793 T88 = FNMS(KP707106781, T7Z, T7W);
Chris@82 794 T8b = T89 - T8a;
Chris@82 795 ri[WS(rs, 26)] = FNMS(KP923879532, T8b, T88);
Chris@82 796 ri[WS(rs, 10)] = FMA(KP923879532, T8b, T88);
Chris@82 797 T8Z = FNMS(KP707106781, T8W, T8V);
Chris@82 798 T90 = T86 - T83;
Chris@82 799 ii[WS(rs, 10)] = FMA(KP923879532, T90, T8Z);
Chris@82 800 ii[WS(rs, 26)] = FNMS(KP923879532, T90, T8Z);
Chris@82 801 }
Chris@82 802 }
Chris@82 803 {
Chris@82 804 E T4s, T6c, T4X, T9c, T9b, T9h, T6f, T9i, T66, T6q, T6a, T6m, T5x, T6p, T69;
Chris@82 805 E T6j;
Chris@82 806 {
Chris@82 807 E T4c, T4r, T6d, T6e;
Chris@82 808 T4c = T46 + T4b;
Chris@82 809 T4r = T4j + T4q;
Chris@82 810 T4s = FNMS(KP707106781, T4r, T4c);
Chris@82 811 T6c = FMA(KP707106781, T4r, T4c);
Chris@82 812 {
Chris@82 813 E T4H, T4W, T99, T9a;
Chris@82 814 T4H = FNMS(KP414213562, T4G, T4z);
Chris@82 815 T4W = FMA(KP414213562, T4V, T4O);
Chris@82 816 T4X = T4H - T4W;
Chris@82 817 T9c = T4H + T4W;
Chris@82 818 T99 = T97 - T98;
Chris@82 819 T9a = T6t + T6u;
Chris@82 820 T9b = FMA(KP707106781, T9a, T99);
Chris@82 821 T9h = FNMS(KP707106781, T9a, T99);
Chris@82 822 }
Chris@82 823 T6d = FMA(KP414213562, T4z, T4G);
Chris@82 824 T6e = FNMS(KP414213562, T4O, T4V);
Chris@82 825 T6f = T6d + T6e;
Chris@82 826 T9i = T6e - T6d;
Chris@82 827 {
Chris@82 828 E T5U, T6l, T65, T6k, T5T, T64;
Chris@82 829 T5T = T5L + T5S;
Chris@82 830 T5U = FNMS(KP707106781, T5T, T5E);
Chris@82 831 T6l = FMA(KP707106781, T5T, T5E);
Chris@82 832 T64 = T62 + T63;
Chris@82 833 T65 = FNMS(KP707106781, T64, T61);
Chris@82 834 T6k = FMA(KP707106781, T64, T61);
Chris@82 835 T66 = FNMS(KP668178637, T65, T5U);
Chris@82 836 T6q = FMA(KP198912367, T6k, T6l);
Chris@82 837 T6a = FMA(KP668178637, T5U, T65);
Chris@82 838 T6m = FNMS(KP198912367, T6l, T6k);
Chris@82 839 }
Chris@82 840 {
Chris@82 841 E T5l, T6i, T5w, T6h, T5k, T5v;
Chris@82 842 T5k = T5c + T5j;
Chris@82 843 T5l = FNMS(KP707106781, T5k, T55);
Chris@82 844 T6i = FMA(KP707106781, T5k, T55);
Chris@82 845 T5v = T5t + T5u;
Chris@82 846 T5w = FNMS(KP707106781, T5v, T5s);
Chris@82 847 T6h = FMA(KP707106781, T5v, T5s);
Chris@82 848 T5x = FMA(KP668178637, T5w, T5l);
Chris@82 849 T6p = FNMS(KP198912367, T6h, T6i);
Chris@82 850 T69 = FNMS(KP668178637, T5l, T5w);
Chris@82 851 T6j = FMA(KP198912367, T6i, T6h);
Chris@82 852 }
Chris@82 853 }
Chris@82 854 {
Chris@82 855 E T4Y, T67, T9j, T9k;
Chris@82 856 T4Y = FMA(KP923879532, T4X, T4s);
Chris@82 857 T67 = T5x - T66;
Chris@82 858 ri[WS(rs, 21)] = FNMS(KP831469612, T67, T4Y);
Chris@82 859 ri[WS(rs, 5)] = FMA(KP831469612, T67, T4Y);
Chris@82 860 T9j = FMA(KP923879532, T9i, T9h);
Chris@82 861 T9k = T6a - T69;
Chris@82 862 ii[WS(rs, 5)] = FMA(KP831469612, T9k, T9j);
Chris@82 863 ii[WS(rs, 21)] = FNMS(KP831469612, T9k, T9j);
Chris@82 864 }
Chris@82 865 {
Chris@82 866 E T68, T6b, T9l, T9m;
Chris@82 867 T68 = FNMS(KP923879532, T4X, T4s);
Chris@82 868 T6b = T69 + T6a;
Chris@82 869 ri[WS(rs, 13)] = FNMS(KP831469612, T6b, T68);
Chris@82 870 ri[WS(rs, 29)] = FMA(KP831469612, T6b, T68);
Chris@82 871 T9l = FNMS(KP923879532, T9i, T9h);
Chris@82 872 T9m = T5x + T66;
Chris@82 873 ii[WS(rs, 13)] = FNMS(KP831469612, T9m, T9l);
Chris@82 874 ii[WS(rs, 29)] = FMA(KP831469612, T9m, T9l);
Chris@82 875 }
Chris@82 876 {
Chris@82 877 E T6g, T6n, T9d, T9e;
Chris@82 878 T6g = FMA(KP923879532, T6f, T6c);
Chris@82 879 T6n = T6j + T6m;
Chris@82 880 ri[WS(rs, 17)] = FNMS(KP980785280, T6n, T6g);
Chris@82 881 ri[WS(rs, 1)] = FMA(KP980785280, T6n, T6g);
Chris@82 882 T9d = FMA(KP923879532, T9c, T9b);
Chris@82 883 T9e = T6p + T6q;
Chris@82 884 ii[WS(rs, 1)] = FMA(KP980785280, T9e, T9d);
Chris@82 885 ii[WS(rs, 17)] = FNMS(KP980785280, T9e, T9d);
Chris@82 886 }
Chris@82 887 {
Chris@82 888 E T6o, T6r, T9f, T9g;
Chris@82 889 T6o = FNMS(KP923879532, T6f, T6c);
Chris@82 890 T6r = T6p - T6q;
Chris@82 891 ri[WS(rs, 25)] = FNMS(KP980785280, T6r, T6o);
Chris@82 892 ri[WS(rs, 9)] = FMA(KP980785280, T6r, T6o);
Chris@82 893 T9f = FNMS(KP923879532, T9c, T9b);
Chris@82 894 T9g = T6m - T6j;
Chris@82 895 ii[WS(rs, 9)] = FMA(KP980785280, T9g, T9f);
Chris@82 896 ii[WS(rs, 25)] = FNMS(KP980785280, T9g, T9f);
Chris@82 897 }
Chris@82 898 }
Chris@82 899 {
Chris@82 900 E T6w, T6Y, T6D, T9w, T9p, T9v, T71, T9q, T6S, T7c, T6W, T78, T6L, T7b, T6V;
Chris@82 901 E T75;
Chris@82 902 {
Chris@82 903 E T6s, T6v, T6Z, T70;
Chris@82 904 T6s = T46 - T4b;
Chris@82 905 T6v = T6t - T6u;
Chris@82 906 T6w = FMA(KP707106781, T6v, T6s);
Chris@82 907 T6Y = FNMS(KP707106781, T6v, T6s);
Chris@82 908 {
Chris@82 909 E T6z, T6C, T9n, T9o;
Chris@82 910 T6z = FMA(KP414213562, T6y, T6x);
Chris@82 911 T6C = FNMS(KP414213562, T6B, T6A);
Chris@82 912 T6D = T6z - T6C;
Chris@82 913 T9w = T6z + T6C;
Chris@82 914 T9n = T98 + T97;
Chris@82 915 T9o = T4q - T4j;
Chris@82 916 T9p = FMA(KP707106781, T9o, T9n);
Chris@82 917 T9v = FNMS(KP707106781, T9o, T9n);
Chris@82 918 }
Chris@82 919 T6Z = FNMS(KP414213562, T6x, T6y);
Chris@82 920 T70 = FMA(KP414213562, T6A, T6B);
Chris@82 921 T71 = T6Z + T70;
Chris@82 922 T9q = T70 - T6Z;
Chris@82 923 {
Chris@82 924 E T6O, T77, T6R, T76, T6N, T6Q;
Chris@82 925 T6N = T5S - T5L;
Chris@82 926 T6O = FNMS(KP707106781, T6N, T6M);
Chris@82 927 T77 = FMA(KP707106781, T6N, T6M);
Chris@82 928 T6Q = T62 - T63;
Chris@82 929 T6R = FNMS(KP707106781, T6Q, T6P);
Chris@82 930 T76 = FMA(KP707106781, T6Q, T6P);
Chris@82 931 T6S = FNMS(KP668178637, T6R, T6O);
Chris@82 932 T7c = FMA(KP198912367, T76, T77);
Chris@82 933 T6W = FMA(KP668178637, T6O, T6R);
Chris@82 934 T78 = FNMS(KP198912367, T77, T76);
Chris@82 935 }
Chris@82 936 {
Chris@82 937 E T6H, T74, T6K, T73, T6G, T6J;
Chris@82 938 T6G = T5j - T5c;
Chris@82 939 T6H = FNMS(KP707106781, T6G, T6F);
Chris@82 940 T74 = FMA(KP707106781, T6G, T6F);
Chris@82 941 T6J = T5t - T5u;
Chris@82 942 T6K = FNMS(KP707106781, T6J, T6I);
Chris@82 943 T73 = FMA(KP707106781, T6J, T6I);
Chris@82 944 T6L = FMA(KP668178637, T6K, T6H);
Chris@82 945 T7b = FNMS(KP198912367, T73, T74);
Chris@82 946 T6V = FNMS(KP668178637, T6H, T6K);
Chris@82 947 T75 = FMA(KP198912367, T74, T73);
Chris@82 948 }
Chris@82 949 }
Chris@82 950 {
Chris@82 951 E T6E, T6T, T9r, T9s;
Chris@82 952 T6E = FMA(KP923879532, T6D, T6w);
Chris@82 953 T6T = T6L + T6S;
Chris@82 954 ri[WS(rs, 19)] = FNMS(KP831469612, T6T, T6E);
Chris@82 955 ri[WS(rs, 3)] = FMA(KP831469612, T6T, T6E);
Chris@82 956 T9r = FMA(KP923879532, T9q, T9p);
Chris@82 957 T9s = T6V + T6W;
Chris@82 958 ii[WS(rs, 3)] = FMA(KP831469612, T9s, T9r);
Chris@82 959 ii[WS(rs, 19)] = FNMS(KP831469612, T9s, T9r);
Chris@82 960 }
Chris@82 961 {
Chris@82 962 E T6U, T6X, T9t, T9u;
Chris@82 963 T6U = FNMS(KP923879532, T6D, T6w);
Chris@82 964 T6X = T6V - T6W;
Chris@82 965 ri[WS(rs, 27)] = FNMS(KP831469612, T6X, T6U);
Chris@82 966 ri[WS(rs, 11)] = FMA(KP831469612, T6X, T6U);
Chris@82 967 T9t = FNMS(KP923879532, T9q, T9p);
Chris@82 968 T9u = T6S - T6L;
Chris@82 969 ii[WS(rs, 11)] = FMA(KP831469612, T9u, T9t);
Chris@82 970 ii[WS(rs, 27)] = FNMS(KP831469612, T9u, T9t);
Chris@82 971 }
Chris@82 972 {
Chris@82 973 E T72, T79, T9x, T9y;
Chris@82 974 T72 = FNMS(KP923879532, T71, T6Y);
Chris@82 975 T79 = T75 - T78;
Chris@82 976 ri[WS(rs, 23)] = FNMS(KP980785280, T79, T72);
Chris@82 977 ri[WS(rs, 7)] = FMA(KP980785280, T79, T72);
Chris@82 978 T9x = FNMS(KP923879532, T9w, T9v);
Chris@82 979 T9y = T7c - T7b;
Chris@82 980 ii[WS(rs, 7)] = FMA(KP980785280, T9y, T9x);
Chris@82 981 ii[WS(rs, 23)] = FNMS(KP980785280, T9y, T9x);
Chris@82 982 }
Chris@82 983 {
Chris@82 984 E T7a, T7d, T9z, T9A;
Chris@82 985 T7a = FMA(KP923879532, T71, T6Y);
Chris@82 986 T7d = T7b + T7c;
Chris@82 987 ri[WS(rs, 15)] = FNMS(KP980785280, T7d, T7a);
Chris@82 988 ri[WS(rs, 31)] = FMA(KP980785280, T7d, T7a);
Chris@82 989 T9z = FMA(KP923879532, T9w, T9v);
Chris@82 990 T9A = T75 + T78;
Chris@82 991 ii[WS(rs, 15)] = FNMS(KP980785280, T9A, T9z);
Chris@82 992 ii[WS(rs, 31)] = FMA(KP980785280, T9A, T9z);
Chris@82 993 }
Chris@82 994 }
Chris@82 995 }
Chris@82 996 }
Chris@82 997 }
Chris@82 998 }
Chris@82 999
Chris@82 1000 static const tw_instr twinstr[] = {
Chris@82 1001 {TW_CEXP, 0, 1},
Chris@82 1002 {TW_CEXP, 0, 3},
Chris@82 1003 {TW_CEXP, 0, 9},
Chris@82 1004 {TW_CEXP, 0, 27},
Chris@82 1005 {TW_NEXT, 1, 0}
Chris@82 1006 };
Chris@82 1007
Chris@82 1008 static const ct_desc desc = { 32, "t2_32", twinstr, &GENUS, {236, 98, 252, 0}, 0, 0, 0 };
Chris@82 1009
Chris@82 1010 void X(codelet_t2_32) (planner *p) {
Chris@82 1011 X(kdft_dit_register) (p, t2_32, &desc);
Chris@82 1012 }
Chris@82 1013 #else
Chris@82 1014
Chris@82 1015 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include dft/scalar/t.h */
Chris@82 1016
Chris@82 1017 /*
Chris@82 1018 * This function contains 488 FP additions, 280 FP multiplications,
Chris@82 1019 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
Chris@82 1020 * 158 stack variables, 7 constants, and 128 memory accesses
Chris@82 1021 */
Chris@82 1022 #include "dft/scalar/t.h"
Chris@82 1023
Chris@82 1024 static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 1025 {
Chris@82 1026 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 1027 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 1028 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 1029 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 1030 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 1031 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 1032 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1033 {
Chris@82 1034 INT m;
Chris@82 1035 for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 1036 E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y;
Chris@82 1037 E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d;
Chris@82 1038 E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C;
Chris@82 1039 E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25;
Chris@82 1040 E T1S, T23;
Chris@82 1041 {
Chris@82 1042 E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF;
Chris@82 1043 E T10;
Chris@82 1044 {
Chris@82 1045 E T4, Tc, T7, Tb;
Chris@82 1046 T2 = W[0];
Chris@82 1047 T5 = W[1];
Chris@82 1048 T3 = W[2];
Chris@82 1049 T6 = W[3];
Chris@82 1050 T4 = T2 * T3;
Chris@82 1051 Tc = T5 * T3;
Chris@82 1052 T7 = T5 * T6;
Chris@82 1053 Tb = T2 * T6;
Chris@82 1054 T8 = T4 + T7;
Chris@82 1055 TM = T4 - T7;
Chris@82 1056 TO = Tb + Tc;
Chris@82 1057 Td = Tb - Tc;
Chris@82 1058 T9 = W[4];
Chris@82 1059 Ts = T2 * T9;
Chris@82 1060 T1d = T6 * T9;
Chris@82 1061 Tx = T5 * T9;
Chris@82 1062 T18 = T3 * T9;
Chris@82 1063 Te = W[5];
Chris@82 1064 Tt = T5 * Te;
Chris@82 1065 T1c = T3 * Te;
Chris@82 1066 Tw = T2 * Te;
Chris@82 1067 T19 = T6 * Te;
Chris@82 1068 Th = W[6];
Chris@82 1069 TB = T3 * Th;
Chris@82 1070 T14 = T5 * Th;
Chris@82 1071 TG = T6 * Th;
Chris@82 1072 TZ = T2 * Th;
Chris@82 1073 Tl = W[7];
Chris@82 1074 TC = T6 * Tl;
Chris@82 1075 T13 = T2 * Tl;
Chris@82 1076 TF = T3 * Tl;
Chris@82 1077 T10 = T5 * Tl;
Chris@82 1078 }
Chris@82 1079 TD = TB + TC;
Chris@82 1080 TH = TF - TG;
Chris@82 1081 T1y = TZ + T10;
Chris@82 1082 T1H = TF + TG;
Chris@82 1083 T15 = T13 + T14;
Chris@82 1084 T1A = T13 - T14;
Chris@82 1085 T11 = TZ - T10;
Chris@82 1086 T1F = TB - TC;
Chris@82 1087 T1n = FMA(T9, Th, Te * Tl);
Chris@82 1088 T1p = FNMS(Te, Th, T9 * Tl);
Chris@82 1089 {
Chris@82 1090 E T2o, T2p, T2s, T2t;
Chris@82 1091 T2o = T8 * Th;
Chris@82 1092 T2p = Td * Tl;
Chris@82 1093 T2q = T2o + T2p;
Chris@82 1094 T2I = T2o - T2p;
Chris@82 1095 T2s = T8 * Tl;
Chris@82 1096 T2t = Td * Th;
Chris@82 1097 T2u = T2s - T2t;
Chris@82 1098 T2K = T2s + T2t;
Chris@82 1099 }
Chris@82 1100 {
Chris@82 1101 E T2T, T2U, T2X, T2Y;
Chris@82 1102 T2T = TM * Th;
Chris@82 1103 T2U = TO * Tl;
Chris@82 1104 T2V = T2T - T2U;
Chris@82 1105 T3b = T2T + T2U;
Chris@82 1106 T2X = TM * Tl;
Chris@82 1107 T2Y = TO * Th;
Chris@82 1108 T2Z = T2X + T2Y;
Chris@82 1109 T3d = T2X - T2Y;
Chris@82 1110 Tu = Ts + Tt;
Chris@82 1111 Ty = Tw - Tx;
Chris@82 1112 T3l = FMA(Tu, Th, Ty * Tl);
Chris@82 1113 T3n = FNMS(Ty, Th, Tu * Tl);
Chris@82 1114 }
Chris@82 1115 T1t = Ts - Tt;
Chris@82 1116 T1v = Tw + Tx;
Chris@82 1117 T2f = FMA(T1t, Th, T1v * Tl);
Chris@82 1118 T2h = FNMS(T1v, Th, T1t * Tl);
Chris@82 1119 T1a = T18 - T19;
Chris@82 1120 T1e = T1c + T1d;
Chris@82 1121 T32 = FMA(T1a, Th, T1e * Tl);
Chris@82 1122 T34 = FNMS(T1e, Th, T1a * Tl);
Chris@82 1123 T1W = T18 + T19;
Chris@82 1124 T1Y = T1c - T1d;
Chris@82 1125 T2C = FMA(T1W, Th, T1Y * Tl);
Chris@82 1126 T2E = FNMS(T1Y, Th, T1W * Tl);
Chris@82 1127 {
Chris@82 1128 E Ta, Tf, Ti, Tj;
Chris@82 1129 Ta = T8 * T9;
Chris@82 1130 Tf = Td * Te;
Chris@82 1131 Tg = Ta - Tf;
Chris@82 1132 TR = Ta + Tf;
Chris@82 1133 Ti = T8 * Te;
Chris@82 1134 Tj = Td * T9;
Chris@82 1135 Tk = Ti + Tj;
Chris@82 1136 TS = Ti - Tj;
Chris@82 1137 }
Chris@82 1138 Tm = FMA(Tg, Th, Tk * Tl);
Chris@82 1139 TV = FNMS(TS, Th, TR * Tl);
Chris@82 1140 To = FNMS(Tk, Th, Tg * Tl);
Chris@82 1141 TT = FMA(TR, Th, TS * Tl);
Chris@82 1142 {
Chris@82 1143 E T1K, T1L, T1N, T1O;
Chris@82 1144 T1K = TM * T9;
Chris@82 1145 T1L = TO * Te;
Chris@82 1146 T1M = T1K - T1L;
Chris@82 1147 T21 = T1K + T1L;
Chris@82 1148 T1N = TM * Te;
Chris@82 1149 T1O = TO * T9;
Chris@82 1150 T1P = T1N + T1O;
Chris@82 1151 T22 = T1N - T1O;
Chris@82 1152 }
Chris@82 1153 T1Q = FMA(T1M, Th, T1P * Tl);
Chris@82 1154 T25 = FNMS(T22, Th, T21 * Tl);
Chris@82 1155 T1S = FNMS(T1P, Th, T1M * Tl);
Chris@82 1156 T23 = FMA(T21, Th, T22 * Tl);
Chris@82 1157 }
Chris@82 1158 {
Chris@82 1159 E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B;
Chris@82 1160 E T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u;
Chris@82 1161 E T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47;
Chris@82 1162 E T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p;
Chris@82 1163 E T4W, T5R, T55, T5O;
Chris@82 1164 {
Chris@82 1165 E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp;
Chris@82 1166 T1 = ri[0];
Chris@82 1167 T7G = ii[0];
Chris@82 1168 Tn = ri[WS(rs, 16)];
Chris@82 1169 Tp = ii[WS(rs, 16)];
Chris@82 1170 Tq = FMA(Tm, Tn, To * Tp);
Chris@82 1171 T7F = FNMS(To, Tn, Tm * Tp);
Chris@82 1172 {
Chris@82 1173 E Tv, Tz, TE, TI;
Chris@82 1174 Tv = ri[WS(rs, 8)];
Chris@82 1175 Tz = ii[WS(rs, 8)];
Chris@82 1176 TA = FMA(Tu, Tv, Ty * Tz);
Chris@82 1177 T3C = FNMS(Ty, Tv, Tu * Tz);
Chris@82 1178 TE = ri[WS(rs, 24)];
Chris@82 1179 TI = ii[WS(rs, 24)];
Chris@82 1180 TJ = FMA(TD, TE, TH * TI);
Chris@82 1181 T3D = FNMS(TH, TE, TD * TI);
Chris@82 1182 }
Chris@82 1183 {
Chris@82 1184 E Tr, TK, T8a, T8b;
Chris@82 1185 Tr = T1 + Tq;
Chris@82 1186 TK = TA + TJ;
Chris@82 1187 TL = Tr + TK;
Chris@82 1188 T6f = Tr - TK;
Chris@82 1189 T8a = T7G - T7F;
Chris@82 1190 T8b = TA - TJ;
Chris@82 1191 T8c = T8a - T8b;
Chris@82 1192 T8q = T8b + T8a;
Chris@82 1193 }
Chris@82 1194 {
Chris@82 1195 E T3B, T3E, T7E, T7H;
Chris@82 1196 T3B = T1 - Tq;
Chris@82 1197 T3E = T3C - T3D;
Chris@82 1198 T3F = T3B - T3E;
Chris@82 1199 T5t = T3B + T3E;
Chris@82 1200 T7E = T3C + T3D;
Chris@82 1201 T7H = T7F + T7G;
Chris@82 1202 T7I = T7E + T7H;
Chris@82 1203 T7W = T7H - T7E;
Chris@82 1204 }
Chris@82 1205 }
Chris@82 1206 {
Chris@82 1207 E T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y;
Chris@82 1208 {
Chris@82 1209 E T2c, T2d, T2r, T2v;
Chris@82 1210 T2c = ri[WS(rs, 1)];
Chris@82 1211 T2d = ii[WS(rs, 1)];
Chris@82 1212 T2e = FMA(T2, T2c, T5 * T2d);
Chris@82 1213 T4g = FNMS(T5, T2c, T2 * T2d);
Chris@82 1214 T2r = ri[WS(rs, 25)];
Chris@82 1215 T2v = ii[WS(rs, 25)];
Chris@82 1216 T2w = FMA(T2q, T2r, T2u * T2v);
Chris@82 1217 T4z = FNMS(T2u, T2r, T2q * T2v);
Chris@82 1218 }
Chris@82 1219 {
Chris@82 1220 E T2g, T2i, T2l, T2m;
Chris@82 1221 T2g = ri[WS(rs, 17)];
Chris@82 1222 T2i = ii[WS(rs, 17)];
Chris@82 1223 T2j = FMA(T2f, T2g, T2h * T2i);
Chris@82 1224 T4h = FNMS(T2h, T2g, T2f * T2i);
Chris@82 1225 T2l = ri[WS(rs, 9)];
Chris@82 1226 T2m = ii[WS(rs, 9)];
Chris@82 1227 T2n = FMA(T9, T2l, Te * T2m);
Chris@82 1228 T4y = FNMS(Te, T2l, T9 * T2m);
Chris@82 1229 }
Chris@82 1230 {
Chris@82 1231 E T2k, T2x, T6w, T6x;
Chris@82 1232 T2k = T2e + T2j;
Chris@82 1233 T2x = T2n + T2w;
Chris@82 1234 T2y = T2k + T2x;
Chris@82 1235 T6B = T2k - T2x;
Chris@82 1236 T6w = T4g + T4h;
Chris@82 1237 T6x = T4y + T4z;
Chris@82 1238 T6y = T6w - T6x;
Chris@82 1239 T7j = T6w + T6x;
Chris@82 1240 }
Chris@82 1241 {
Chris@82 1242 E T4i, T4j, T4x, T4A;
Chris@82 1243 T4i = T4g - T4h;
Chris@82 1244 T4j = T2n - T2w;
Chris@82 1245 T4k = T4i + T4j;
Chris@82 1246 T5J = T4i - T4j;
Chris@82 1247 T4x = T2e - T2j;
Chris@82 1248 T4A = T4y - T4z;
Chris@82 1249 T4B = T4x - T4A;
Chris@82 1250 T5G = T4x + T4A;
Chris@82 1251 }
Chris@82 1252 }
Chris@82 1253 {
Chris@82 1254 E T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I;
Chris@82 1255 {
Chris@82 1256 E T2W, T30, T3c, T3e;
Chris@82 1257 T2W = ri[WS(rs, 31)];
Chris@82 1258 T30 = ii[WS(rs, 31)];
Chris@82 1259 T31 = FMA(T2V, T2W, T2Z * T30);
Chris@82 1260 T4Y = FNMS(T2Z, T2W, T2V * T30);
Chris@82 1261 T3c = ri[WS(rs, 23)];
Chris@82 1262 T3e = ii[WS(rs, 23)];
Chris@82 1263 T3f = FMA(T3b, T3c, T3d * T3e);
Chris@82 1264 T4J = FNMS(T3d, T3c, T3b * T3e);
Chris@82 1265 }
Chris@82 1266 {
Chris@82 1267 E T33, T35, T38, T39;
Chris@82 1268 T33 = ri[WS(rs, 15)];
Chris@82 1269 T35 = ii[WS(rs, 15)];
Chris@82 1270 T36 = FMA(T32, T33, T34 * T35);
Chris@82 1271 T4Z = FNMS(T34, T33, T32 * T35);
Chris@82 1272 T38 = ri[WS(rs, 7)];
Chris@82 1273 T39 = ii[WS(rs, 7)];
Chris@82 1274 T3a = FMA(TR, T38, TS * T39);
Chris@82 1275 T4I = FNMS(TS, T38, TR * T39);
Chris@82 1276 }
Chris@82 1277 {
Chris@82 1278 E T37, T3g, T6M, T6N;
Chris@82 1279 T37 = T31 + T36;
Chris@82 1280 T3g = T3a + T3f;
Chris@82 1281 T3h = T37 + T3g;
Chris@82 1282 T6H = T37 - T3g;
Chris@82 1283 T6M = T4Y + T4Z;
Chris@82 1284 T6N = T4I + T4J;
Chris@82 1285 T6O = T6M - T6N;
Chris@82 1286 T7o = T6M + T6N;
Chris@82 1287 }
Chris@82 1288 {
Chris@82 1289 E T4H, T4K, T50, T51;
Chris@82 1290 T4H = T31 - T36;
Chris@82 1291 T4K = T4I - T4J;
Chris@82 1292 T4L = T4H - T4K;
Chris@82 1293 T5N = T4H + T4K;
Chris@82 1294 T50 = T4Y - T4Z;
Chris@82 1295 T51 = T3a - T3f;
Chris@82 1296 T52 = T50 + T51;
Chris@82 1297 T5Q = T50 - T51;
Chris@82 1298 }
Chris@82 1299 }
Chris@82 1300 {
Chris@82 1301 E TQ, T3G, T1g, T3N, TX, T3H, T17, T3M;
Chris@82 1302 {
Chris@82 1303 E TN, TP, T1b, T1f;
Chris@82 1304 TN = ri[WS(rs, 4)];
Chris@82 1305 TP = ii[WS(rs, 4)];
Chris@82 1306 TQ = FMA(TM, TN, TO * TP);
Chris@82 1307 T3G = FNMS(TO, TN, TM * TP);
Chris@82 1308 T1b = ri[WS(rs, 12)];
Chris@82 1309 T1f = ii[WS(rs, 12)];
Chris@82 1310 T1g = FMA(T1a, T1b, T1e * T1f);
Chris@82 1311 T3N = FNMS(T1e, T1b, T1a * T1f);
Chris@82 1312 }
Chris@82 1313 {
Chris@82 1314 E TU, TW, T12, T16;
Chris@82 1315 TU = ri[WS(rs, 20)];
Chris@82 1316 TW = ii[WS(rs, 20)];
Chris@82 1317 TX = FMA(TT, TU, TV * TW);
Chris@82 1318 T3H = FNMS(TV, TU, TT * TW);
Chris@82 1319 T12 = ri[WS(rs, 28)];
Chris@82 1320 T16 = ii[WS(rs, 28)];
Chris@82 1321 T17 = FMA(T11, T12, T15 * T16);
Chris@82 1322 T3M = FNMS(T15, T12, T11 * T16);
Chris@82 1323 }
Chris@82 1324 {
Chris@82 1325 E TY, T1h, T6g, T6h;
Chris@82 1326 TY = TQ + TX;
Chris@82 1327 T1h = T17 + T1g;
Chris@82 1328 T1i = TY + T1h;
Chris@82 1329 T7V = T1h - TY;
Chris@82 1330 T6g = T3G + T3H;
Chris@82 1331 T6h = T3M + T3N;
Chris@82 1332 T6i = T6g - T6h;
Chris@82 1333 T7D = T6g + T6h;
Chris@82 1334 }
Chris@82 1335 {
Chris@82 1336 E T3I, T3J, T3L, T3O;
Chris@82 1337 T3I = T3G - T3H;
Chris@82 1338 T3J = TQ - TX;
Chris@82 1339 T3K = T3I - T3J;
Chris@82 1340 T5u = T3J + T3I;
Chris@82 1341 T3L = T17 - T1g;
Chris@82 1342 T3O = T3M - T3N;
Chris@82 1343 T3P = T3L + T3O;
Chris@82 1344 T5v = T3L - T3O;
Chris@82 1345 }
Chris@82 1346 }
Chris@82 1347 {
Chris@82 1348 E T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y;
Chris@82 1349 {
Chris@82 1350 E T1k, T1l, T1z, T1B;
Chris@82 1351 T1k = ri[WS(rs, 2)];
Chris@82 1352 T1l = ii[WS(rs, 2)];
Chris@82 1353 T1m = FMA(T8, T1k, Td * T1l);
Chris@82 1354 T3S = FNMS(Td, T1k, T8 * T1l);
Chris@82 1355 T1z = ri[WS(rs, 26)];
Chris@82 1356 T1B = ii[WS(rs, 26)];
Chris@82 1357 T1C = FMA(T1y, T1z, T1A * T1B);
Chris@82 1358 T3Z = FNMS(T1A, T1z, T1y * T1B);
Chris@82 1359 }
Chris@82 1360 {
Chris@82 1361 E T1o, T1q, T1u, T1w;
Chris@82 1362 T1o = ri[WS(rs, 18)];
Chris@82 1363 T1q = ii[WS(rs, 18)];
Chris@82 1364 T1r = FMA(T1n, T1o, T1p * T1q);
Chris@82 1365 T3T = FNMS(T1p, T1o, T1n * T1q);
Chris@82 1366 T1u = ri[WS(rs, 10)];
Chris@82 1367 T1w = ii[WS(rs, 10)];
Chris@82 1368 T1x = FMA(T1t, T1u, T1v * T1w);
Chris@82 1369 T3Y = FNMS(T1v, T1u, T1t * T1w);
Chris@82 1370 }
Chris@82 1371 {
Chris@82 1372 E T1s, T1D, T6k, T6l;
Chris@82 1373 T1s = T1m + T1r;
Chris@82 1374 T1D = T1x + T1C;
Chris@82 1375 T1E = T1s + T1D;
Chris@82 1376 T6n = T1s - T1D;
Chris@82 1377 T6k = T3S + T3T;
Chris@82 1378 T6l = T3Y + T3Z;
Chris@82 1379 T6m = T6k - T6l;
Chris@82 1380 T7e = T6k + T6l;
Chris@82 1381 }
Chris@82 1382 {
Chris@82 1383 E T3U, T3V, T3X, T40;
Chris@82 1384 T3U = T3S - T3T;
Chris@82 1385 T3V = T1x - T1C;
Chris@82 1386 T3W = T3U + T3V;
Chris@82 1387 T5y = T3U - T3V;
Chris@82 1388 T3X = T1m - T1r;
Chris@82 1389 T40 = T3Y - T3Z;
Chris@82 1390 T41 = T3X - T40;
Chris@82 1391 T5z = T3X + T40;
Chris@82 1392 }
Chris@82 1393 }
Chris@82 1394 {
Chris@82 1395 E T1J, T43, T27, T4a, T1U, T44, T20, T49;
Chris@82 1396 {
Chris@82 1397 E T1G, T1I, T24, T26;
Chris@82 1398 T1G = ri[WS(rs, 30)];
Chris@82 1399 T1I = ii[WS(rs, 30)];
Chris@82 1400 T1J = FMA(T1F, T1G, T1H * T1I);
Chris@82 1401 T43 = FNMS(T1H, T1G, T1F * T1I);
Chris@82 1402 T24 = ri[WS(rs, 22)];
Chris@82 1403 T26 = ii[WS(rs, 22)];
Chris@82 1404 T27 = FMA(T23, T24, T25 * T26);
Chris@82 1405 T4a = FNMS(T25, T24, T23 * T26);
Chris@82 1406 }
Chris@82 1407 {
Chris@82 1408 E T1R, T1T, T1X, T1Z;
Chris@82 1409 T1R = ri[WS(rs, 14)];
Chris@82 1410 T1T = ii[WS(rs, 14)];
Chris@82 1411 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@82 1412 T44 = FNMS(T1S, T1R, T1Q * T1T);
Chris@82 1413 T1X = ri[WS(rs, 6)];
Chris@82 1414 T1Z = ii[WS(rs, 6)];
Chris@82 1415 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@82 1416 T49 = FNMS(T1Y, T1X, T1W * T1Z);
Chris@82 1417 }
Chris@82 1418 {
Chris@82 1419 E T1V, T28, T6q, T6r;
Chris@82 1420 T1V = T1J + T1U;
Chris@82 1421 T28 = T20 + T27;
Chris@82 1422 T29 = T1V + T28;
Chris@82 1423 T6p = T1V - T28;
Chris@82 1424 T6q = T43 + T44;
Chris@82 1425 T6r = T49 + T4a;
Chris@82 1426 T6s = T6q - T6r;
Chris@82 1427 T7f = T6q + T6r;
Chris@82 1428 }
Chris@82 1429 {
Chris@82 1430 E T45, T46, T48, T4b;
Chris@82 1431 T45 = T43 - T44;
Chris@82 1432 T46 = T20 - T27;
Chris@82 1433 T47 = T45 + T46;
Chris@82 1434 T5B = T45 - T46;
Chris@82 1435 T48 = T1J - T1U;
Chris@82 1436 T4b = T49 - T4a;
Chris@82 1437 T4c = T48 - T4b;
Chris@82 1438 T5C = T48 + T4b;
Chris@82 1439 }
Chris@82 1440 }
Chris@82 1441 {
Chris@82 1442 E T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o;
Chris@82 1443 {
Chris@82 1444 E T2z, T2A, T2D, T2F;
Chris@82 1445 T2z = ri[WS(rs, 5)];
Chris@82 1446 T2A = ii[WS(rs, 5)];
Chris@82 1447 T2B = FMA(T21, T2z, T22 * T2A);
Chris@82 1448 T4r = FNMS(T22, T2z, T21 * T2A);
Chris@82 1449 T2D = ri[WS(rs, 21)];
Chris@82 1450 T2F = ii[WS(rs, 21)];
Chris@82 1451 T2G = FMA(T2C, T2D, T2E * T2F);
Chris@82 1452 T4s = FNMS(T2E, T2D, T2C * T2F);
Chris@82 1453 }
Chris@82 1454 T4q = T2B - T2G;
Chris@82 1455 T4t = T4r - T4s;
Chris@82 1456 {
Chris@82 1457 E T2J, T2L, T2N, T2O;
Chris@82 1458 T2J = ri[WS(rs, 29)];
Chris@82 1459 T2L = ii[WS(rs, 29)];
Chris@82 1460 T2M = FMA(T2I, T2J, T2K * T2L);
Chris@82 1461 T4m = FNMS(T2K, T2J, T2I * T2L);
Chris@82 1462 T2N = ri[WS(rs, 13)];
Chris@82 1463 T2O = ii[WS(rs, 13)];
Chris@82 1464 T2P = FMA(T1M, T2N, T1P * T2O);
Chris@82 1465 T4n = FNMS(T1P, T2N, T1M * T2O);
Chris@82 1466 }
Chris@82 1467 T4l = T2M - T2P;
Chris@82 1468 T4o = T4m - T4n;
Chris@82 1469 {
Chris@82 1470 E T2H, T2Q, T6C, T6D;
Chris@82 1471 T2H = T2B + T2G;
Chris@82 1472 T2Q = T2M + T2P;
Chris@82 1473 T2R = T2H + T2Q;
Chris@82 1474 T6z = T2Q - T2H;
Chris@82 1475 T6C = T4r + T4s;
Chris@82 1476 T6D = T4m + T4n;
Chris@82 1477 T6E = T6C - T6D;
Chris@82 1478 T7k = T6C + T6D;
Chris@82 1479 }
Chris@82 1480 {
Chris@82 1481 E T4p, T4u, T4C, T4D;
Chris@82 1482 T4p = T4l - T4o;
Chris@82 1483 T4u = T4q + T4t;
Chris@82 1484 T4v = KP707106781 * (T4p - T4u);
Chris@82 1485 T5H = KP707106781 * (T4u + T4p);
Chris@82 1486 T4C = T4t - T4q;
Chris@82 1487 T4D = T4l + T4o;
Chris@82 1488 T4E = KP707106781 * (T4C - T4D);
Chris@82 1489 T5K = KP707106781 * (T4C + T4D);
Chris@82 1490 }
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U;
Chris@82 1494 {
Chris@82 1495 E T3i, T3j, T3m, T3o;
Chris@82 1496 T3i = ri[WS(rs, 3)];
Chris@82 1497 T3j = ii[WS(rs, 3)];
Chris@82 1498 T3k = FMA(T3, T3i, T6 * T3j);
Chris@82 1499 T4M = FNMS(T6, T3i, T3 * T3j);
Chris@82 1500 T3m = ri[WS(rs, 19)];
Chris@82 1501 T3o = ii[WS(rs, 19)];
Chris@82 1502 T3p = FMA(T3l, T3m, T3n * T3o);
Chris@82 1503 T4N = FNMS(T3n, T3m, T3l * T3o);
Chris@82 1504 }
Chris@82 1505 T4O = T4M - T4N;
Chris@82 1506 T4P = T3k - T3p;
Chris@82 1507 {
Chris@82 1508 E T3r, T3s, T3u, T3v;
Chris@82 1509 T3r = ri[WS(rs, 27)];
Chris@82 1510 T3s = ii[WS(rs, 27)];
Chris@82 1511 T3t = FMA(Th, T3r, Tl * T3s);
Chris@82 1512 T4S = FNMS(Tl, T3r, Th * T3s);
Chris@82 1513 T3u = ri[WS(rs, 11)];
Chris@82 1514 T3v = ii[WS(rs, 11)];
Chris@82 1515 T3w = FMA(Tg, T3u, Tk * T3v);
Chris@82 1516 T4T = FNMS(Tk, T3u, Tg * T3v);
Chris@82 1517 }
Chris@82 1518 T4R = T3t - T3w;
Chris@82 1519 T4U = T4S - T4T;
Chris@82 1520 {
Chris@82 1521 E T3q, T3x, T6I, T6J;
Chris@82 1522 T3q = T3k + T3p;
Chris@82 1523 T3x = T3t + T3w;
Chris@82 1524 T3y = T3q + T3x;
Chris@82 1525 T6P = T3x - T3q;
Chris@82 1526 T6I = T4M + T4N;
Chris@82 1527 T6J = T4S + T4T;
Chris@82 1528 T6K = T6I - T6J;
Chris@82 1529 T7p = T6I + T6J;
Chris@82 1530 }
Chris@82 1531 {
Chris@82 1532 E T4Q, T4V, T53, T54;
Chris@82 1533 T4Q = T4O - T4P;
Chris@82 1534 T4V = T4R + T4U;
Chris@82 1535 T4W = KP707106781 * (T4Q - T4V);
Chris@82 1536 T5R = KP707106781 * (T4Q + T4V);
Chris@82 1537 T53 = T4R - T4U;
Chris@82 1538 T54 = T4P + T4O;
Chris@82 1539 T55 = KP707106781 * (T53 - T54);
Chris@82 1540 T5O = KP707106781 * (T54 + T53);
Chris@82 1541 }
Chris@82 1542 }
Chris@82 1543 {
Chris@82 1544 E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B;
Chris@82 1545 {
Chris@82 1546 E T1j, T2a, T7C, T7J;
Chris@82 1547 T1j = TL + T1i;
Chris@82 1548 T2a = T1E + T29;
Chris@82 1549 T2b = T1j + T2a;
Chris@82 1550 T7x = T1j - T2a;
Chris@82 1551 T7C = T7e + T7f;
Chris@82 1552 T7J = T7D + T7I;
Chris@82 1553 T7K = T7C + T7J;
Chris@82 1554 T7M = T7J - T7C;
Chris@82 1555 }
Chris@82 1556 {
Chris@82 1557 E T2S, T3z, T7y, T7z;
Chris@82 1558 T2S = T2y + T2R;
Chris@82 1559 T3z = T3h + T3y;
Chris@82 1560 T3A = T2S + T3z;
Chris@82 1561 T7L = T3z - T2S;
Chris@82 1562 T7y = T7j + T7k;
Chris@82 1563 T7z = T7o + T7p;
Chris@82 1564 T7A = T7y - T7z;
Chris@82 1565 T7B = T7y + T7z;
Chris@82 1566 }
Chris@82 1567 ri[WS(rs, 16)] = T2b - T3A;
Chris@82 1568 ii[WS(rs, 16)] = T7K - T7B;
Chris@82 1569 ri[0] = T2b + T3A;
Chris@82 1570 ii[0] = T7B + T7K;
Chris@82 1571 ri[WS(rs, 24)] = T7x - T7A;
Chris@82 1572 ii[WS(rs, 24)] = T7M - T7L;
Chris@82 1573 ri[WS(rs, 8)] = T7x + T7A;
Chris@82 1574 ii[WS(rs, 8)] = T7L + T7M;
Chris@82 1575 }
Chris@82 1576 {
Chris@82 1577 E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v;
Chris@82 1578 {
Chris@82 1579 E T7d, T7g, T7O, T7P;
Chris@82 1580 T7d = TL - T1i;
Chris@82 1581 T7g = T7e - T7f;
Chris@82 1582 T7h = T7d + T7g;
Chris@82 1583 T7t = T7d - T7g;
Chris@82 1584 T7O = T29 - T1E;
Chris@82 1585 T7P = T7I - T7D;
Chris@82 1586 T7Q = T7O + T7P;
Chris@82 1587 T7S = T7P - T7O;
Chris@82 1588 }
Chris@82 1589 {
Chris@82 1590 E T7i, T7l, T7n, T7q;
Chris@82 1591 T7i = T2y - T2R;
Chris@82 1592 T7l = T7j - T7k;
Chris@82 1593 T7m = T7i + T7l;
Chris@82 1594 T7u = T7l - T7i;
Chris@82 1595 T7n = T3h - T3y;
Chris@82 1596 T7q = T7o - T7p;
Chris@82 1597 T7r = T7n - T7q;
Chris@82 1598 T7v = T7n + T7q;
Chris@82 1599 }
Chris@82 1600 {
Chris@82 1601 E T7s, T7N, T7w, T7R;
Chris@82 1602 T7s = KP707106781 * (T7m + T7r);
Chris@82 1603 ri[WS(rs, 20)] = T7h - T7s;
Chris@82 1604 ri[WS(rs, 4)] = T7h + T7s;
Chris@82 1605 T7N = KP707106781 * (T7u + T7v);
Chris@82 1606 ii[WS(rs, 4)] = T7N + T7Q;
Chris@82 1607 ii[WS(rs, 20)] = T7Q - T7N;
Chris@82 1608 T7w = KP707106781 * (T7u - T7v);
Chris@82 1609 ri[WS(rs, 28)] = T7t - T7w;
Chris@82 1610 ri[WS(rs, 12)] = T7t + T7w;
Chris@82 1611 T7R = KP707106781 * (T7r - T7m);
Chris@82 1612 ii[WS(rs, 12)] = T7R + T7S;
Chris@82 1613 ii[WS(rs, 28)] = T7S - T7R;
Chris@82 1614 }
Chris@82 1615 }
Chris@82 1616 {
Chris@82 1617 E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R;
Chris@82 1618 E T6V;
Chris@82 1619 {
Chris@82 1620 E T6o, T6t, T6A, T6F;
Chris@82 1621 T6j = T6f - T6i;
Chris@82 1622 T7X = T7V + T7W;
Chris@82 1623 T83 = T7W - T7V;
Chris@82 1624 T6X = T6f + T6i;
Chris@82 1625 T6o = T6m - T6n;
Chris@82 1626 T6t = T6p + T6s;
Chris@82 1627 T6u = KP707106781 * (T6o - T6t);
Chris@82 1628 T7U = KP707106781 * (T6o + T6t);
Chris@82 1629 {
Chris@82 1630 E T75, T76, T6Y, T6Z;
Chris@82 1631 T75 = T6H + T6K;
Chris@82 1632 T76 = T6O + T6P;
Chris@82 1633 T77 = FNMS(KP382683432, T76, KP923879532 * T75);
Chris@82 1634 T7b = FMA(KP923879532, T76, KP382683432 * T75);
Chris@82 1635 T6Y = T6n + T6m;
Chris@82 1636 T6Z = T6p - T6s;
Chris@82 1637 T70 = KP707106781 * (T6Y + T6Z);
Chris@82 1638 T82 = KP707106781 * (T6Z - T6Y);
Chris@82 1639 }
Chris@82 1640 T6A = T6y - T6z;
Chris@82 1641 T6F = T6B - T6E;
Chris@82 1642 T6G = FMA(KP923879532, T6A, KP382683432 * T6F);
Chris@82 1643 T6U = FNMS(KP923879532, T6F, KP382683432 * T6A);
Chris@82 1644 {
Chris@82 1645 E T72, T73, T6L, T6Q;
Chris@82 1646 T72 = T6y + T6z;
Chris@82 1647 T73 = T6B + T6E;
Chris@82 1648 T74 = FMA(KP382683432, T72, KP923879532 * T73);
Chris@82 1649 T7a = FNMS(KP382683432, T73, KP923879532 * T72);
Chris@82 1650 T6L = T6H - T6K;
Chris@82 1651 T6Q = T6O - T6P;
Chris@82 1652 T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L);
Chris@82 1653 T6V = FMA(KP382683432, T6Q, KP923879532 * T6L);
Chris@82 1654 }
Chris@82 1655 }
Chris@82 1656 {
Chris@82 1657 E T6v, T6S, T81, T84;
Chris@82 1658 T6v = T6j + T6u;
Chris@82 1659 T6S = T6G + T6R;
Chris@82 1660 ri[WS(rs, 22)] = T6v - T6S;
Chris@82 1661 ri[WS(rs, 6)] = T6v + T6S;
Chris@82 1662 T81 = T6U + T6V;
Chris@82 1663 T84 = T82 + T83;
Chris@82 1664 ii[WS(rs, 6)] = T81 + T84;
Chris@82 1665 ii[WS(rs, 22)] = T84 - T81;
Chris@82 1666 }
Chris@82 1667 {
Chris@82 1668 E T6T, T6W, T85, T86;
Chris@82 1669 T6T = T6j - T6u;
Chris@82 1670 T6W = T6U - T6V;
Chris@82 1671 ri[WS(rs, 30)] = T6T - T6W;
Chris@82 1672 ri[WS(rs, 14)] = T6T + T6W;
Chris@82 1673 T85 = T6R - T6G;
Chris@82 1674 T86 = T83 - T82;
Chris@82 1675 ii[WS(rs, 14)] = T85 + T86;
Chris@82 1676 ii[WS(rs, 30)] = T86 - T85;
Chris@82 1677 }
Chris@82 1678 {
Chris@82 1679 E T71, T78, T7T, T7Y;
Chris@82 1680 T71 = T6X + T70;
Chris@82 1681 T78 = T74 + T77;
Chris@82 1682 ri[WS(rs, 18)] = T71 - T78;
Chris@82 1683 ri[WS(rs, 2)] = T71 + T78;
Chris@82 1684 T7T = T7a + T7b;
Chris@82 1685 T7Y = T7U + T7X;
Chris@82 1686 ii[WS(rs, 2)] = T7T + T7Y;
Chris@82 1687 ii[WS(rs, 18)] = T7Y - T7T;
Chris@82 1688 }
Chris@82 1689 {
Chris@82 1690 E T79, T7c, T7Z, T80;
Chris@82 1691 T79 = T6X - T70;
Chris@82 1692 T7c = T7a - T7b;
Chris@82 1693 ri[WS(rs, 26)] = T79 - T7c;
Chris@82 1694 ri[WS(rs, 10)] = T79 + T7c;
Chris@82 1695 T7Z = T77 - T74;
Chris@82 1696 T80 = T7X - T7U;
Chris@82 1697 ii[WS(rs, 10)] = T7Z + T80;
Chris@82 1698 ii[WS(rs, 26)] = T80 - T7Z;
Chris@82 1699 }
Chris@82 1700 }
Chris@82 1701 {
Chris@82 1702 E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57;
Chris@82 1703 E T5b, T3Q, T8p;
Chris@82 1704 T3Q = KP707106781 * (T3K - T3P);
Chris@82 1705 T3R = T3F - T3Q;
Chris@82 1706 T5d = T3F + T3Q;
Chris@82 1707 T8p = KP707106781 * (T5v - T5u);
Chris@82 1708 T8r = T8p + T8q;
Chris@82 1709 T8x = T8q - T8p;
Chris@82 1710 {
Chris@82 1711 E T42, T4d, T5l, T5m;
Chris@82 1712 T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
Chris@82 1713 T4d = FMA(KP382683432, T47, KP923879532 * T4c);
Chris@82 1714 T4e = T42 - T4d;
Chris@82 1715 T8o = T42 + T4d;
Chris@82 1716 T5l = T4L + T4W;
Chris@82 1717 T5m = T52 + T55;
Chris@82 1718 T5n = FNMS(KP555570233, T5m, KP831469612 * T5l);
Chris@82 1719 T5r = FMA(KP831469612, T5m, KP555570233 * T5l);
Chris@82 1720 }
Chris@82 1721 {
Chris@82 1722 E T4w, T4F, T5e, T5f;
Chris@82 1723 T4w = T4k - T4v;
Chris@82 1724 T4F = T4B - T4E;
Chris@82 1725 T4G = FMA(KP980785280, T4w, KP195090322 * T4F);
Chris@82 1726 T5a = FNMS(KP980785280, T4F, KP195090322 * T4w);
Chris@82 1727 T5e = FMA(KP923879532, T3W, KP382683432 * T41);
Chris@82 1728 T5f = FNMS(KP923879532, T47, KP382683432 * T4c);
Chris@82 1729 T5g = T5e + T5f;
Chris@82 1730 T8w = T5f - T5e;
Chris@82 1731 }
Chris@82 1732 {
Chris@82 1733 E T5i, T5j, T4X, T56;
Chris@82 1734 T5i = T4k + T4v;
Chris@82 1735 T5j = T4B + T4E;
Chris@82 1736 T5k = FMA(KP555570233, T5i, KP831469612 * T5j);
Chris@82 1737 T5q = FNMS(KP555570233, T5j, KP831469612 * T5i);
Chris@82 1738 T4X = T4L - T4W;
Chris@82 1739 T56 = T52 - T55;
Chris@82 1740 T57 = FNMS(KP980785280, T56, KP195090322 * T4X);
Chris@82 1741 T5b = FMA(KP195090322, T56, KP980785280 * T4X);
Chris@82 1742 }
Chris@82 1743 {
Chris@82 1744 E T4f, T58, T8v, T8y;
Chris@82 1745 T4f = T3R + T4e;
Chris@82 1746 T58 = T4G + T57;
Chris@82 1747 ri[WS(rs, 23)] = T4f - T58;
Chris@82 1748 ri[WS(rs, 7)] = T4f + T58;
Chris@82 1749 T8v = T5a + T5b;
Chris@82 1750 T8y = T8w + T8x;
Chris@82 1751 ii[WS(rs, 7)] = T8v + T8y;
Chris@82 1752 ii[WS(rs, 23)] = T8y - T8v;
Chris@82 1753 }
Chris@82 1754 {
Chris@82 1755 E T59, T5c, T8z, T8A;
Chris@82 1756 T59 = T3R - T4e;
Chris@82 1757 T5c = T5a - T5b;
Chris@82 1758 ri[WS(rs, 31)] = T59 - T5c;
Chris@82 1759 ri[WS(rs, 15)] = T59 + T5c;
Chris@82 1760 T8z = T57 - T4G;
Chris@82 1761 T8A = T8x - T8w;
Chris@82 1762 ii[WS(rs, 15)] = T8z + T8A;
Chris@82 1763 ii[WS(rs, 31)] = T8A - T8z;
Chris@82 1764 }
Chris@82 1765 {
Chris@82 1766 E T5h, T5o, T8n, T8s;
Chris@82 1767 T5h = T5d + T5g;
Chris@82 1768 T5o = T5k + T5n;
Chris@82 1769 ri[WS(rs, 19)] = T5h - T5o;
Chris@82 1770 ri[WS(rs, 3)] = T5h + T5o;
Chris@82 1771 T8n = T5q + T5r;
Chris@82 1772 T8s = T8o + T8r;
Chris@82 1773 ii[WS(rs, 3)] = T8n + T8s;
Chris@82 1774 ii[WS(rs, 19)] = T8s - T8n;
Chris@82 1775 }
Chris@82 1776 {
Chris@82 1777 E T5p, T5s, T8t, T8u;
Chris@82 1778 T5p = T5d - T5g;
Chris@82 1779 T5s = T5q - T5r;
Chris@82 1780 ri[WS(rs, 27)] = T5p - T5s;
Chris@82 1781 ri[WS(rs, 11)] = T5p + T5s;
Chris@82 1782 T8t = T5n - T5k;
Chris@82 1783 T8u = T8r - T8o;
Chris@82 1784 ii[WS(rs, 11)] = T8t + T8u;
Chris@82 1785 ii[WS(rs, 27)] = T8u - T8t;
Chris@82 1786 }
Chris@82 1787 }
Chris@82 1788 {
Chris@82 1789 E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T;
Chris@82 1790 E T5X, T5w, T89;
Chris@82 1791 T5w = KP707106781 * (T5u + T5v);
Chris@82 1792 T5x = T5t - T5w;
Chris@82 1793 T5Z = T5t + T5w;
Chris@82 1794 T89 = KP707106781 * (T3K + T3P);
Chris@82 1795 T8d = T89 + T8c;
Chris@82 1796 T8j = T8c - T89;
Chris@82 1797 {
Chris@82 1798 E T5A, T5D, T67, T68;
Chris@82 1799 T5A = FNMS(KP382683432, T5z, KP923879532 * T5y);
Chris@82 1800 T5D = FMA(KP923879532, T5B, KP382683432 * T5C);
Chris@82 1801 T5E = T5A - T5D;
Chris@82 1802 T88 = T5A + T5D;
Chris@82 1803 T67 = T5N + T5O;
Chris@82 1804 T68 = T5Q + T5R;
Chris@82 1805 T69 = FNMS(KP195090322, T68, KP980785280 * T67);
Chris@82 1806 T6d = FMA(KP195090322, T67, KP980785280 * T68);
Chris@82 1807 }
Chris@82 1808 {
Chris@82 1809 E T5I, T5L, T60, T61;
Chris@82 1810 T5I = T5G - T5H;
Chris@82 1811 T5L = T5J - T5K;
Chris@82 1812 T5M = FMA(KP555570233, T5I, KP831469612 * T5L);
Chris@82 1813 T5W = FNMS(KP831469612, T5I, KP555570233 * T5L);
Chris@82 1814 T60 = FMA(KP382683432, T5y, KP923879532 * T5z);
Chris@82 1815 T61 = FNMS(KP382683432, T5B, KP923879532 * T5C);
Chris@82 1816 T62 = T60 + T61;
Chris@82 1817 T8i = T61 - T60;
Chris@82 1818 }
Chris@82 1819 {
Chris@82 1820 E T64, T65, T5P, T5S;
Chris@82 1821 T64 = T5G + T5H;
Chris@82 1822 T65 = T5J + T5K;
Chris@82 1823 T66 = FMA(KP980785280, T64, KP195090322 * T65);
Chris@82 1824 T6c = FNMS(KP195090322, T64, KP980785280 * T65);
Chris@82 1825 T5P = T5N - T5O;
Chris@82 1826 T5S = T5Q - T5R;
Chris@82 1827 T5T = FNMS(KP831469612, T5S, KP555570233 * T5P);
Chris@82 1828 T5X = FMA(KP831469612, T5P, KP555570233 * T5S);
Chris@82 1829 }
Chris@82 1830 {
Chris@82 1831 E T5F, T5U, T8h, T8k;
Chris@82 1832 T5F = T5x + T5E;
Chris@82 1833 T5U = T5M + T5T;
Chris@82 1834 ri[WS(rs, 21)] = T5F - T5U;
Chris@82 1835 ri[WS(rs, 5)] = T5F + T5U;
Chris@82 1836 T8h = T5W + T5X;
Chris@82 1837 T8k = T8i + T8j;
Chris@82 1838 ii[WS(rs, 5)] = T8h + T8k;
Chris@82 1839 ii[WS(rs, 21)] = T8k - T8h;
Chris@82 1840 }
Chris@82 1841 {
Chris@82 1842 E T5V, T5Y, T8l, T8m;
Chris@82 1843 T5V = T5x - T5E;
Chris@82 1844 T5Y = T5W - T5X;
Chris@82 1845 ri[WS(rs, 29)] = T5V - T5Y;
Chris@82 1846 ri[WS(rs, 13)] = T5V + T5Y;
Chris@82 1847 T8l = T5T - T5M;
Chris@82 1848 T8m = T8j - T8i;
Chris@82 1849 ii[WS(rs, 13)] = T8l + T8m;
Chris@82 1850 ii[WS(rs, 29)] = T8m - T8l;
Chris@82 1851 }
Chris@82 1852 {
Chris@82 1853 E T63, T6a, T87, T8e;
Chris@82 1854 T63 = T5Z + T62;
Chris@82 1855 T6a = T66 + T69;
Chris@82 1856 ri[WS(rs, 17)] = T63 - T6a;
Chris@82 1857 ri[WS(rs, 1)] = T63 + T6a;
Chris@82 1858 T87 = T6c + T6d;
Chris@82 1859 T8e = T88 + T8d;
Chris@82 1860 ii[WS(rs, 1)] = T87 + T8e;
Chris@82 1861 ii[WS(rs, 17)] = T8e - T87;
Chris@82 1862 }
Chris@82 1863 {
Chris@82 1864 E T6b, T6e, T8f, T8g;
Chris@82 1865 T6b = T5Z - T62;
Chris@82 1866 T6e = T6c - T6d;
Chris@82 1867 ri[WS(rs, 25)] = T6b - T6e;
Chris@82 1868 ri[WS(rs, 9)] = T6b + T6e;
Chris@82 1869 T8f = T69 - T66;
Chris@82 1870 T8g = T8d - T88;
Chris@82 1871 ii[WS(rs, 9)] = T8f + T8g;
Chris@82 1872 ii[WS(rs, 25)] = T8g - T8f;
Chris@82 1873 }
Chris@82 1874 }
Chris@82 1875 }
Chris@82 1876 }
Chris@82 1877 }
Chris@82 1878 }
Chris@82 1879
Chris@82 1880 static const tw_instr twinstr[] = {
Chris@82 1881 {TW_CEXP, 0, 1},
Chris@82 1882 {TW_CEXP, 0, 3},
Chris@82 1883 {TW_CEXP, 0, 9},
Chris@82 1884 {TW_CEXP, 0, 27},
Chris@82 1885 {TW_NEXT, 1, 0}
Chris@82 1886 };
Chris@82 1887
Chris@82 1888 static const ct_desc desc = { 32, "t2_32", twinstr, &GENUS, {376, 168, 112, 0}, 0, 0, 0 };
Chris@82 1889
Chris@82 1890 void X(codelet_t2_32) (planner *p) {
Chris@82 1891 X(kdft_dit_register) (p, t2_32, &desc);
Chris@82 1892 }
Chris@82 1893 #endif