annotate src/fftw-3.3.8/rdft/scalar/r2cf/hf2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:36 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include rdft/scalar/hf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 488 FP additions, 350 FP multiplications,
Chris@82 32 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
Chris@82 33 * 164 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hf.h"
Chris@82 36
Chris@82 37 static void hf2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 41 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 43 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 49 E T2, T8, T3, T6, Te, Ti, T5, T7, TJ, Tb, TM, Tc, Ts, T23, T1w;
Chris@82 50 E T19, TA, TE, T1s, T1N, T1o, T1C, T1F, T1K, T15, T11, T2F, T31, T2J, T34;
Chris@82 51 E T3f, T3z, T3j, T3C, Tw, T3M, T3Q, T1z, T2s, T2w, T1d, T3n, T3r, T26, T2T;
Chris@82 52 E T2X, Th, TR, TP, Td, Tj, TW, Tn, TS, T1U, T2b, T29, T1R, T1V, T2g;
Chris@82 53 E T1Z, T2c;
Chris@82 54 {
Chris@82 55 E Tz, T1n, T10, TD, T1r, T14, T9, T1Q, Tv, T1c;
Chris@82 56 {
Chris@82 57 E T4, T18, Ta, Tr;
Chris@82 58 T2 = W[0];
Chris@82 59 T8 = W[4];
Chris@82 60 T3 = W[2];
Chris@82 61 T6 = W[3];
Chris@82 62 T4 = T2 * T3;
Chris@82 63 T18 = T3 * T8;
Chris@82 64 Ta = T2 * T6;
Chris@82 65 Tr = T2 * T8;
Chris@82 66 Te = W[6];
Chris@82 67 Tz = T3 * Te;
Chris@82 68 T1n = T8 * Te;
Chris@82 69 T10 = T2 * Te;
Chris@82 70 Ti = W[7];
Chris@82 71 TD = T3 * Ti;
Chris@82 72 T1r = T8 * Ti;
Chris@82 73 T14 = T2 * Ti;
Chris@82 74 T5 = W[1];
Chris@82 75 T7 = FMA(T5, T6, T4);
Chris@82 76 TJ = FNMS(T5, T6, T4);
Chris@82 77 T9 = T7 * T8;
Chris@82 78 T1Q = TJ * T8;
Chris@82 79 Tb = FNMS(T5, T3, Ta);
Chris@82 80 TM = FMA(T5, T3, Ta);
Chris@82 81 Tc = W[5];
Chris@82 82 Tv = T2 * Tc;
Chris@82 83 T1c = T3 * Tc;
Chris@82 84 Ts = FMA(T5, Tc, Tr);
Chris@82 85 T23 = FMA(T6, Tc, T18);
Chris@82 86 T1w = FNMS(T5, Tc, Tr);
Chris@82 87 T19 = FNMS(T6, Tc, T18);
Chris@82 88 }
Chris@82 89 TA = FMA(T6, Ti, Tz);
Chris@82 90 TE = FNMS(T6, Te, TD);
Chris@82 91 T1s = FNMS(Tc, Te, T1r);
Chris@82 92 T1N = FMA(T6, Te, TD);
Chris@82 93 T1o = FMA(Tc, Ti, T1n);
Chris@82 94 T1C = FMA(T5, Ti, T10);
Chris@82 95 T1F = FNMS(T5, Te, T14);
Chris@82 96 T1K = FNMS(T6, Ti, Tz);
Chris@82 97 T15 = FMA(T5, Te, T14);
Chris@82 98 T11 = FNMS(T5, Ti, T10);
Chris@82 99 {
Chris@82 100 E T2E, T2I, T2S, T2W;
Chris@82 101 T2E = T7 * Te;
Chris@82 102 T2F = FMA(Tb, Ti, T2E);
Chris@82 103 T31 = FNMS(Tb, Ti, T2E);
Chris@82 104 T2I = T7 * Ti;
Chris@82 105 T2J = FNMS(Tb, Te, T2I);
Chris@82 106 T34 = FMA(Tb, Te, T2I);
Chris@82 107 {
Chris@82 108 E T3e, T3i, T3L, T3P;
Chris@82 109 T3e = TJ * Te;
Chris@82 110 T3f = FNMS(TM, Ti, T3e);
Chris@82 111 T3z = FMA(TM, Ti, T3e);
Chris@82 112 T3i = TJ * Ti;
Chris@82 113 T3j = FMA(TM, Te, T3i);
Chris@82 114 T3C = FNMS(TM, Te, T3i);
Chris@82 115 T3L = Ts * Te;
Chris@82 116 T3P = Ts * Ti;
Chris@82 117 Tw = FNMS(T5, T8, Tv);
Chris@82 118 T3M = FMA(Tw, Ti, T3L);
Chris@82 119 T3Q = FNMS(Tw, Te, T3P);
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E T2r, T2v, T3m, T3q;
Chris@82 123 T2r = T1w * Te;
Chris@82 124 T2v = T1w * Ti;
Chris@82 125 T1z = FMA(T5, T8, Tv);
Chris@82 126 T2s = FMA(T1z, Ti, T2r);
Chris@82 127 T2w = FNMS(T1z, Te, T2v);
Chris@82 128 T3m = T19 * Te;
Chris@82 129 T3q = T19 * Ti;
Chris@82 130 T1d = FMA(T6, T8, T1c);
Chris@82 131 T3n = FMA(T1d, Ti, T3m);
Chris@82 132 T3r = FNMS(T1d, Te, T3q);
Chris@82 133 }
Chris@82 134 T2S = T23 * Te;
Chris@82 135 T2W = T23 * Ti;
Chris@82 136 T26 = FNMS(T6, T8, T1c);
Chris@82 137 T2T = FMA(T26, Ti, T2S);
Chris@82 138 T2X = FNMS(T26, Te, T2W);
Chris@82 139 {
Chris@82 140 E TQ, TV, Tf, Tm, Tg;
Chris@82 141 Tg = T7 * Tc;
Chris@82 142 Th = FMA(Tb, T8, Tg);
Chris@82 143 TR = FNMS(Tb, T8, Tg);
Chris@82 144 TP = FMA(Tb, Tc, T9);
Chris@82 145 TQ = TP * Te;
Chris@82 146 TV = TP * Ti;
Chris@82 147 Td = FNMS(Tb, Tc, T9);
Chris@82 148 Tf = Td * Te;
Chris@82 149 Tm = Td * Ti;
Chris@82 150 Tj = FMA(Th, Ti, Tf);
Chris@82 151 TW = FNMS(TR, Te, TV);
Chris@82 152 Tn = FNMS(Th, Te, Tm);
Chris@82 153 TS = FMA(TR, Ti, TQ);
Chris@82 154 }
Chris@82 155 {
Chris@82 156 E T2a, T2f, T1S, T1Y, T1T;
Chris@82 157 T1T = TJ * Tc;
Chris@82 158 T1U = FMA(TM, T8, T1T);
Chris@82 159 T2b = FNMS(TM, T8, T1T);
Chris@82 160 T29 = FMA(TM, Tc, T1Q);
Chris@82 161 T2a = T29 * Te;
Chris@82 162 T2f = T29 * Ti;
Chris@82 163 T1R = FNMS(TM, Tc, T1Q);
Chris@82 164 T1S = T1R * Te;
Chris@82 165 T1Y = T1R * Ti;
Chris@82 166 T1V = FMA(T1U, Ti, T1S);
Chris@82 167 T2g = FNMS(T2b, Te, T2f);
Chris@82 168 T1Z = FNMS(T1U, Te, T1Y);
Chris@82 169 T2c = FMA(T2b, Ti, T2a);
Chris@82 170 }
Chris@82 171 }
Chris@82 172 }
Chris@82 173 {
Chris@82 174 E Tq, T46, T8H, T98, TH, T97, T4b, T8D, TZ, T7g, T4j, T6t, T1g, T7f, T4q;
Chris@82 175 E T6u, T1v, T1I, T7j, T7k, T7l, T7m, T4z, T6y, T4G, T6x, T22, T2j, T7o, T7p;
Chris@82 176 E T7q, T7r, T4O, T6B, T4V, T6A, T3G, T7G, T7N, T8n, T5E, T6M, T61, T6P, T2N;
Chris@82 177 E T7v, T7C, T8i, T55, T6F, T5s, T6I, T43, T7O, T7J, T8o, T5L, T63, T5S, T62;
Chris@82 178 E T3c, T7D, T7y, T8j, T5c, T5t, T5j, T5u;
Chris@82 179 {
Chris@82 180 E T1, T8G, Tk, Tl, To, T8E, Tp, T8F;
Chris@82 181 T1 = cr[0];
Chris@82 182 T8G = ci[0];
Chris@82 183 Tk = cr[WS(rs, 16)];
Chris@82 184 Tl = Tj * Tk;
Chris@82 185 To = ci[WS(rs, 16)];
Chris@82 186 T8E = Tj * To;
Chris@82 187 Tp = FMA(Tn, To, Tl);
Chris@82 188 Tq = T1 + Tp;
Chris@82 189 T46 = T1 - Tp;
Chris@82 190 T8F = FNMS(Tn, Tk, T8E);
Chris@82 191 T8H = T8F + T8G;
Chris@82 192 T98 = T8G - T8F;
Chris@82 193 }
Chris@82 194 {
Chris@82 195 E Tt, Tu, Tx, T47, TB, TC, TF, T49;
Chris@82 196 Tt = cr[WS(rs, 8)];
Chris@82 197 Tu = Ts * Tt;
Chris@82 198 Tx = ci[WS(rs, 8)];
Chris@82 199 T47 = Ts * Tx;
Chris@82 200 TB = cr[WS(rs, 24)];
Chris@82 201 TC = TA * TB;
Chris@82 202 TF = ci[WS(rs, 24)];
Chris@82 203 T49 = TA * TF;
Chris@82 204 {
Chris@82 205 E Ty, TG, T48, T4a;
Chris@82 206 Ty = FMA(Tw, Tx, Tu);
Chris@82 207 TG = FMA(TE, TF, TC);
Chris@82 208 TH = Ty + TG;
Chris@82 209 T97 = Ty - TG;
Chris@82 210 T48 = FNMS(Tw, Tt, T47);
Chris@82 211 T4a = FNMS(TE, TB, T49);
Chris@82 212 T4b = T48 - T4a;
Chris@82 213 T8D = T48 + T4a;
Chris@82 214 }
Chris@82 215 }
Chris@82 216 {
Chris@82 217 E TO, T4f, TY, T4h, T4d, T4i;
Chris@82 218 {
Chris@82 219 E TK, TL, TN, T4e;
Chris@82 220 TK = cr[WS(rs, 4)];
Chris@82 221 TL = TJ * TK;
Chris@82 222 TN = ci[WS(rs, 4)];
Chris@82 223 T4e = TJ * TN;
Chris@82 224 TO = FMA(TM, TN, TL);
Chris@82 225 T4f = FNMS(TM, TK, T4e);
Chris@82 226 }
Chris@82 227 {
Chris@82 228 E TT, TU, TX, T4g;
Chris@82 229 TT = cr[WS(rs, 20)];
Chris@82 230 TU = TS * TT;
Chris@82 231 TX = ci[WS(rs, 20)];
Chris@82 232 T4g = TS * TX;
Chris@82 233 TY = FMA(TW, TX, TU);
Chris@82 234 T4h = FNMS(TW, TT, T4g);
Chris@82 235 }
Chris@82 236 TZ = TO + TY;
Chris@82 237 T7g = T4f + T4h;
Chris@82 238 T4d = TO - TY;
Chris@82 239 T4i = T4f - T4h;
Chris@82 240 T4j = T4d - T4i;
Chris@82 241 T6t = T4d + T4i;
Chris@82 242 }
Chris@82 243 {
Chris@82 244 E T17, T4m, T1f, T4o, T4k, T4p;
Chris@82 245 {
Chris@82 246 E T12, T13, T16, T4l;
Chris@82 247 T12 = cr[WS(rs, 28)];
Chris@82 248 T13 = T11 * T12;
Chris@82 249 T16 = ci[WS(rs, 28)];
Chris@82 250 T4l = T11 * T16;
Chris@82 251 T17 = FMA(T15, T16, T13);
Chris@82 252 T4m = FNMS(T15, T12, T4l);
Chris@82 253 }
Chris@82 254 {
Chris@82 255 E T1a, T1b, T1e, T4n;
Chris@82 256 T1a = cr[WS(rs, 12)];
Chris@82 257 T1b = T19 * T1a;
Chris@82 258 T1e = ci[WS(rs, 12)];
Chris@82 259 T4n = T19 * T1e;
Chris@82 260 T1f = FMA(T1d, T1e, T1b);
Chris@82 261 T4o = FNMS(T1d, T1a, T4n);
Chris@82 262 }
Chris@82 263 T1g = T17 + T1f;
Chris@82 264 T7f = T4m + T4o;
Chris@82 265 T4k = T17 - T1f;
Chris@82 266 T4p = T4m - T4o;
Chris@82 267 T4q = T4k + T4p;
Chris@82 268 T6u = T4k - T4p;
Chris@82 269 }
Chris@82 270 {
Chris@82 271 E T1m, T4u, T1H, T4E, T1u, T4w, T1B, T4C;
Chris@82 272 {
Chris@82 273 E T1j, T1k, T1l, T4t;
Chris@82 274 T1j = cr[WS(rs, 2)];
Chris@82 275 T1k = T7 * T1j;
Chris@82 276 T1l = ci[WS(rs, 2)];
Chris@82 277 T4t = T7 * T1l;
Chris@82 278 T1m = FMA(Tb, T1l, T1k);
Chris@82 279 T4u = FNMS(Tb, T1j, T4t);
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E T1D, T1E, T1G, T4D;
Chris@82 283 T1D = cr[WS(rs, 26)];
Chris@82 284 T1E = T1C * T1D;
Chris@82 285 T1G = ci[WS(rs, 26)];
Chris@82 286 T4D = T1C * T1G;
Chris@82 287 T1H = FMA(T1F, T1G, T1E);
Chris@82 288 T4E = FNMS(T1F, T1D, T4D);
Chris@82 289 }
Chris@82 290 {
Chris@82 291 E T1p, T1q, T1t, T4v;
Chris@82 292 T1p = cr[WS(rs, 18)];
Chris@82 293 T1q = T1o * T1p;
Chris@82 294 T1t = ci[WS(rs, 18)];
Chris@82 295 T4v = T1o * T1t;
Chris@82 296 T1u = FMA(T1s, T1t, T1q);
Chris@82 297 T4w = FNMS(T1s, T1p, T4v);
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T1x, T1y, T1A, T4B;
Chris@82 301 T1x = cr[WS(rs, 10)];
Chris@82 302 T1y = T1w * T1x;
Chris@82 303 T1A = ci[WS(rs, 10)];
Chris@82 304 T4B = T1w * T1A;
Chris@82 305 T1B = FMA(T1z, T1A, T1y);
Chris@82 306 T4C = FNMS(T1z, T1x, T4B);
Chris@82 307 }
Chris@82 308 T1v = T1m + T1u;
Chris@82 309 T1I = T1B + T1H;
Chris@82 310 T7j = T1v - T1I;
Chris@82 311 T7k = T4u + T4w;
Chris@82 312 T7l = T4C + T4E;
Chris@82 313 T7m = T7k - T7l;
Chris@82 314 {
Chris@82 315 E T4x, T4y, T4A, T4F;
Chris@82 316 T4x = T4u - T4w;
Chris@82 317 T4y = T1B - T1H;
Chris@82 318 T4z = T4x + T4y;
Chris@82 319 T6y = T4x - T4y;
Chris@82 320 T4A = T1m - T1u;
Chris@82 321 T4F = T4C - T4E;
Chris@82 322 T4G = T4A - T4F;
Chris@82 323 T6x = T4A + T4F;
Chris@82 324 }
Chris@82 325 }
Chris@82 326 {
Chris@82 327 E T1P, T4J, T2i, T4T, T21, T4L, T28, T4R;
Chris@82 328 {
Chris@82 329 E T1L, T1M, T1O, T4I;
Chris@82 330 T1L = cr[WS(rs, 30)];
Chris@82 331 T1M = T1K * T1L;
Chris@82 332 T1O = ci[WS(rs, 30)];
Chris@82 333 T4I = T1K * T1O;
Chris@82 334 T1P = FMA(T1N, T1O, T1M);
Chris@82 335 T4J = FNMS(T1N, T1L, T4I);
Chris@82 336 }
Chris@82 337 {
Chris@82 338 E T2d, T2e, T2h, T4S;
Chris@82 339 T2d = cr[WS(rs, 22)];
Chris@82 340 T2e = T2c * T2d;
Chris@82 341 T2h = ci[WS(rs, 22)];
Chris@82 342 T4S = T2c * T2h;
Chris@82 343 T2i = FMA(T2g, T2h, T2e);
Chris@82 344 T4T = FNMS(T2g, T2d, T4S);
Chris@82 345 }
Chris@82 346 {
Chris@82 347 E T1W, T1X, T20, T4K;
Chris@82 348 T1W = cr[WS(rs, 14)];
Chris@82 349 T1X = T1V * T1W;
Chris@82 350 T20 = ci[WS(rs, 14)];
Chris@82 351 T4K = T1V * T20;
Chris@82 352 T21 = FMA(T1Z, T20, T1X);
Chris@82 353 T4L = FNMS(T1Z, T1W, T4K);
Chris@82 354 }
Chris@82 355 {
Chris@82 356 E T24, T25, T27, T4Q;
Chris@82 357 T24 = cr[WS(rs, 6)];
Chris@82 358 T25 = T23 * T24;
Chris@82 359 T27 = ci[WS(rs, 6)];
Chris@82 360 T4Q = T23 * T27;
Chris@82 361 T28 = FMA(T26, T27, T25);
Chris@82 362 T4R = FNMS(T26, T24, T4Q);
Chris@82 363 }
Chris@82 364 T22 = T1P + T21;
Chris@82 365 T2j = T28 + T2i;
Chris@82 366 T7o = T22 - T2j;
Chris@82 367 T7p = T4J + T4L;
Chris@82 368 T7q = T4R + T4T;
Chris@82 369 T7r = T7p - T7q;
Chris@82 370 {
Chris@82 371 E T4M, T4N, T4P, T4U;
Chris@82 372 T4M = T4J - T4L;
Chris@82 373 T4N = T28 - T2i;
Chris@82 374 T4O = T4M + T4N;
Chris@82 375 T6B = T4M - T4N;
Chris@82 376 T4P = T1P - T21;
Chris@82 377 T4U = T4R - T4T;
Chris@82 378 T4V = T4P - T4U;
Chris@82 379 T6A = T4P + T4U;
Chris@82 380 }
Chris@82 381 }
Chris@82 382 {
Chris@82 383 E T3l, T5X, T3E, T5C, T3t, T5Z, T3y, T5A;
Chris@82 384 {
Chris@82 385 E T3g, T3h, T3k, T5W;
Chris@82 386 T3g = cr[WS(rs, 31)];
Chris@82 387 T3h = T3f * T3g;
Chris@82 388 T3k = ci[WS(rs, 31)];
Chris@82 389 T5W = T3f * T3k;
Chris@82 390 T3l = FMA(T3j, T3k, T3h);
Chris@82 391 T5X = FNMS(T3j, T3g, T5W);
Chris@82 392 }
Chris@82 393 {
Chris@82 394 E T3A, T3B, T3D, T5B;
Chris@82 395 T3A = cr[WS(rs, 23)];
Chris@82 396 T3B = T3z * T3A;
Chris@82 397 T3D = ci[WS(rs, 23)];
Chris@82 398 T5B = T3z * T3D;
Chris@82 399 T3E = FMA(T3C, T3D, T3B);
Chris@82 400 T5C = FNMS(T3C, T3A, T5B);
Chris@82 401 }
Chris@82 402 {
Chris@82 403 E T3o, T3p, T3s, T5Y;
Chris@82 404 T3o = cr[WS(rs, 15)];
Chris@82 405 T3p = T3n * T3o;
Chris@82 406 T3s = ci[WS(rs, 15)];
Chris@82 407 T5Y = T3n * T3s;
Chris@82 408 T3t = FMA(T3r, T3s, T3p);
Chris@82 409 T5Z = FNMS(T3r, T3o, T5Y);
Chris@82 410 }
Chris@82 411 {
Chris@82 412 E T3v, T3w, T3x, T5z;
Chris@82 413 T3v = cr[WS(rs, 7)];
Chris@82 414 T3w = TP * T3v;
Chris@82 415 T3x = ci[WS(rs, 7)];
Chris@82 416 T5z = TP * T3x;
Chris@82 417 T3y = FMA(TR, T3x, T3w);
Chris@82 418 T5A = FNMS(TR, T3v, T5z);
Chris@82 419 }
Chris@82 420 {
Chris@82 421 E T3u, T3F, T7L, T7M;
Chris@82 422 T3u = T3l + T3t;
Chris@82 423 T3F = T3y + T3E;
Chris@82 424 T3G = T3u + T3F;
Chris@82 425 T7G = T3u - T3F;
Chris@82 426 T7L = T5X + T5Z;
Chris@82 427 T7M = T5A + T5C;
Chris@82 428 T7N = T7L - T7M;
Chris@82 429 T8n = T7L + T7M;
Chris@82 430 }
Chris@82 431 {
Chris@82 432 E T5y, T5D, T5V, T60;
Chris@82 433 T5y = T3l - T3t;
Chris@82 434 T5D = T5A - T5C;
Chris@82 435 T5E = T5y - T5D;
Chris@82 436 T6M = T5y + T5D;
Chris@82 437 T5V = T3E - T3y;
Chris@82 438 T60 = T5X - T5Z;
Chris@82 439 T61 = T5V - T60;
Chris@82 440 T6P = T60 + T5V;
Chris@82 441 }
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E T2q, T5n, T2L, T53, T2y, T5p, T2D, T51;
Chris@82 445 {
Chris@82 446 E T2n, T2o, T2p, T5m;
Chris@82 447 T2n = cr[WS(rs, 1)];
Chris@82 448 T2o = T2 * T2n;
Chris@82 449 T2p = ci[WS(rs, 1)];
Chris@82 450 T5m = T2 * T2p;
Chris@82 451 T2q = FMA(T5, T2p, T2o);
Chris@82 452 T5n = FNMS(T5, T2n, T5m);
Chris@82 453 }
Chris@82 454 {
Chris@82 455 E T2G, T2H, T2K, T52;
Chris@82 456 T2G = cr[WS(rs, 25)];
Chris@82 457 T2H = T2F * T2G;
Chris@82 458 T2K = ci[WS(rs, 25)];
Chris@82 459 T52 = T2F * T2K;
Chris@82 460 T2L = FMA(T2J, T2K, T2H);
Chris@82 461 T53 = FNMS(T2J, T2G, T52);
Chris@82 462 }
Chris@82 463 {
Chris@82 464 E T2t, T2u, T2x, T5o;
Chris@82 465 T2t = cr[WS(rs, 17)];
Chris@82 466 T2u = T2s * T2t;
Chris@82 467 T2x = ci[WS(rs, 17)];
Chris@82 468 T5o = T2s * T2x;
Chris@82 469 T2y = FMA(T2w, T2x, T2u);
Chris@82 470 T5p = FNMS(T2w, T2t, T5o);
Chris@82 471 }
Chris@82 472 {
Chris@82 473 E T2A, T2B, T2C, T50;
Chris@82 474 T2A = cr[WS(rs, 9)];
Chris@82 475 T2B = T8 * T2A;
Chris@82 476 T2C = ci[WS(rs, 9)];
Chris@82 477 T50 = T8 * T2C;
Chris@82 478 T2D = FMA(Tc, T2C, T2B);
Chris@82 479 T51 = FNMS(Tc, T2A, T50);
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E T2z, T2M, T7A, T7B;
Chris@82 483 T2z = T2q + T2y;
Chris@82 484 T2M = T2D + T2L;
Chris@82 485 T2N = T2z + T2M;
Chris@82 486 T7v = T2z - T2M;
Chris@82 487 T7A = T5n + T5p;
Chris@82 488 T7B = T51 + T53;
Chris@82 489 T7C = T7A - T7B;
Chris@82 490 T8i = T7A + T7B;
Chris@82 491 }
Chris@82 492 {
Chris@82 493 E T4Z, T54, T5q, T5r;
Chris@82 494 T4Z = T2q - T2y;
Chris@82 495 T54 = T51 - T53;
Chris@82 496 T55 = T4Z - T54;
Chris@82 497 T6F = T4Z + T54;
Chris@82 498 T5q = T5n - T5p;
Chris@82 499 T5r = T2D - T2L;
Chris@82 500 T5s = T5q + T5r;
Chris@82 501 T6I = T5q - T5r;
Chris@82 502 }
Chris@82 503 }
Chris@82 504 {
Chris@82 505 E T3K, T5H, T41, T5Q, T3S, T5J, T3X, T5O;
Chris@82 506 {
Chris@82 507 E T3H, T3I, T3J, T5G;
Chris@82 508 T3H = cr[WS(rs, 3)];
Chris@82 509 T3I = T3 * T3H;
Chris@82 510 T3J = ci[WS(rs, 3)];
Chris@82 511 T5G = T3 * T3J;
Chris@82 512 T3K = FMA(T6, T3J, T3I);
Chris@82 513 T5H = FNMS(T6, T3H, T5G);
Chris@82 514 }
Chris@82 515 {
Chris@82 516 E T3Y, T3Z, T40, T5P;
Chris@82 517 T3Y = cr[WS(rs, 11)];
Chris@82 518 T3Z = Td * T3Y;
Chris@82 519 T40 = ci[WS(rs, 11)];
Chris@82 520 T5P = Td * T40;
Chris@82 521 T41 = FMA(Th, T40, T3Z);
Chris@82 522 T5Q = FNMS(Th, T3Y, T5P);
Chris@82 523 }
Chris@82 524 {
Chris@82 525 E T3N, T3O, T3R, T5I;
Chris@82 526 T3N = cr[WS(rs, 19)];
Chris@82 527 T3O = T3M * T3N;
Chris@82 528 T3R = ci[WS(rs, 19)];
Chris@82 529 T5I = T3M * T3R;
Chris@82 530 T3S = FMA(T3Q, T3R, T3O);
Chris@82 531 T5J = FNMS(T3Q, T3N, T5I);
Chris@82 532 }
Chris@82 533 {
Chris@82 534 E T3U, T3V, T3W, T5N;
Chris@82 535 T3U = cr[WS(rs, 27)];
Chris@82 536 T3V = Te * T3U;
Chris@82 537 T3W = ci[WS(rs, 27)];
Chris@82 538 T5N = Te * T3W;
Chris@82 539 T3X = FMA(Ti, T3W, T3V);
Chris@82 540 T5O = FNMS(Ti, T3U, T5N);
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T3T, T42, T7H, T7I;
Chris@82 544 T3T = T3K + T3S;
Chris@82 545 T42 = T3X + T41;
Chris@82 546 T43 = T3T + T42;
Chris@82 547 T7O = T42 - T3T;
Chris@82 548 T7H = T5O + T5Q;
Chris@82 549 T7I = T5H + T5J;
Chris@82 550 T7J = T7H - T7I;
Chris@82 551 T8o = T7I + T7H;
Chris@82 552 }
Chris@82 553 {
Chris@82 554 E T5F, T5K, T5M, T5R;
Chris@82 555 T5F = T3K - T3S;
Chris@82 556 T5K = T5H - T5J;
Chris@82 557 T5L = T5F - T5K;
Chris@82 558 T63 = T5F + T5K;
Chris@82 559 T5M = T3X - T41;
Chris@82 560 T5R = T5O - T5Q;
Chris@82 561 T5S = T5M + T5R;
Chris@82 562 T62 = T5M - T5R;
Chris@82 563 }
Chris@82 564 }
Chris@82 565 {
Chris@82 566 E T2R, T58, T3a, T5h, T2Z, T5a, T36, T5f;
Chris@82 567 {
Chris@82 568 E T2O, T2P, T2Q, T57;
Chris@82 569 T2O = cr[WS(rs, 5)];
Chris@82 570 T2P = T29 * T2O;
Chris@82 571 T2Q = ci[WS(rs, 5)];
Chris@82 572 T57 = T29 * T2Q;
Chris@82 573 T2R = FMA(T2b, T2Q, T2P);
Chris@82 574 T58 = FNMS(T2b, T2O, T57);
Chris@82 575 }
Chris@82 576 {
Chris@82 577 E T37, T38, T39, T5g;
Chris@82 578 T37 = cr[WS(rs, 13)];
Chris@82 579 T38 = T1R * T37;
Chris@82 580 T39 = ci[WS(rs, 13)];
Chris@82 581 T5g = T1R * T39;
Chris@82 582 T3a = FMA(T1U, T39, T38);
Chris@82 583 T5h = FNMS(T1U, T37, T5g);
Chris@82 584 }
Chris@82 585 {
Chris@82 586 E T2U, T2V, T2Y, T59;
Chris@82 587 T2U = cr[WS(rs, 21)];
Chris@82 588 T2V = T2T * T2U;
Chris@82 589 T2Y = ci[WS(rs, 21)];
Chris@82 590 T59 = T2T * T2Y;
Chris@82 591 T2Z = FMA(T2X, T2Y, T2V);
Chris@82 592 T5a = FNMS(T2X, T2U, T59);
Chris@82 593 }
Chris@82 594 {
Chris@82 595 E T32, T33, T35, T5e;
Chris@82 596 T32 = cr[WS(rs, 29)];
Chris@82 597 T33 = T31 * T32;
Chris@82 598 T35 = ci[WS(rs, 29)];
Chris@82 599 T5e = T31 * T35;
Chris@82 600 T36 = FMA(T34, T35, T33);
Chris@82 601 T5f = FNMS(T34, T32, T5e);
Chris@82 602 }
Chris@82 603 {
Chris@82 604 E T30, T3b, T7w, T7x;
Chris@82 605 T30 = T2R + T2Z;
Chris@82 606 T3b = T36 + T3a;
Chris@82 607 T3c = T30 + T3b;
Chris@82 608 T7D = T30 - T3b;
Chris@82 609 T7w = T5f + T5h;
Chris@82 610 T7x = T58 + T5a;
Chris@82 611 T7y = T7w - T7x;
Chris@82 612 T8j = T7x + T7w;
Chris@82 613 }
Chris@82 614 {
Chris@82 615 E T56, T5b, T5d, T5i;
Chris@82 616 T56 = T2R - T2Z;
Chris@82 617 T5b = T58 - T5a;
Chris@82 618 T5c = T56 - T5b;
Chris@82 619 T5t = T56 + T5b;
Chris@82 620 T5d = T36 - T3a;
Chris@82 621 T5i = T5f - T5h;
Chris@82 622 T5j = T5d + T5i;
Chris@82 623 T5u = T5i - T5d;
Chris@82 624 }
Chris@82 625 }
Chris@82 626 {
Chris@82 627 E T1i, T8c, T8z, T8A, T8J, T8O, T2l, T8N, T45, T8L, T8l, T8t, T8q, T8u, T8f;
Chris@82 628 E T8B;
Chris@82 629 {
Chris@82 630 E TI, T1h, T8x, T8y;
Chris@82 631 TI = Tq + TH;
Chris@82 632 T1h = TZ + T1g;
Chris@82 633 T1i = TI + T1h;
Chris@82 634 T8c = TI - T1h;
Chris@82 635 T8x = T8n + T8o;
Chris@82 636 T8y = T8i + T8j;
Chris@82 637 T8z = T8x - T8y;
Chris@82 638 T8A = T8y + T8x;
Chris@82 639 }
Chris@82 640 {
Chris@82 641 E T8C, T8I, T1J, T2k;
Chris@82 642 T8C = T7g + T7f;
Chris@82 643 T8I = T8D + T8H;
Chris@82 644 T8J = T8C + T8I;
Chris@82 645 T8O = T8I - T8C;
Chris@82 646 T1J = T1v + T1I;
Chris@82 647 T2k = T22 + T2j;
Chris@82 648 T2l = T1J + T2k;
Chris@82 649 T8N = T1J - T2k;
Chris@82 650 }
Chris@82 651 {
Chris@82 652 E T3d, T44, T8h, T8k;
Chris@82 653 T3d = T2N + T3c;
Chris@82 654 T44 = T3G + T43;
Chris@82 655 T45 = T3d + T44;
Chris@82 656 T8L = T44 - T3d;
Chris@82 657 T8h = T2N - T3c;
Chris@82 658 T8k = T8i - T8j;
Chris@82 659 T8l = T8h + T8k;
Chris@82 660 T8t = T8h - T8k;
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T8m, T8p, T8d, T8e;
Chris@82 664 T8m = T3G - T43;
Chris@82 665 T8p = T8n - T8o;
Chris@82 666 T8q = T8m - T8p;
Chris@82 667 T8u = T8m + T8p;
Chris@82 668 T8d = T7p + T7q;
Chris@82 669 T8e = T7k + T7l;
Chris@82 670 T8f = T8d - T8e;
Chris@82 671 T8B = T8e + T8d;
Chris@82 672 }
Chris@82 673 {
Chris@82 674 E T2m, T8K, T8M, T8w;
Chris@82 675 T2m = T1i + T2l;
Chris@82 676 ci[WS(rs, 15)] = T2m - T45;
Chris@82 677 cr[0] = T2m + T45;
Chris@82 678 T8K = T8B + T8J;
Chris@82 679 cr[WS(rs, 16)] = T8A - T8K;
Chris@82 680 ci[WS(rs, 31)] = T8A + T8K;
Chris@82 681 T8M = T8J - T8B;
Chris@82 682 cr[WS(rs, 24)] = T8L - T8M;
Chris@82 683 ci[WS(rs, 23)] = T8L + T8M;
Chris@82 684 T8w = T1i - T2l;
Chris@82 685 cr[WS(rs, 8)] = T8w - T8z;
Chris@82 686 ci[WS(rs, 7)] = T8w + T8z;
Chris@82 687 }
Chris@82 688 {
Chris@82 689 E T8g, T8r, T8P, T8Q;
Chris@82 690 T8g = T8c - T8f;
Chris@82 691 T8r = T8l + T8q;
Chris@82 692 ci[WS(rs, 11)] = FNMS(KP707106781, T8r, T8g);
Chris@82 693 cr[WS(rs, 4)] = FMA(KP707106781, T8r, T8g);
Chris@82 694 T8P = T8N + T8O;
Chris@82 695 T8Q = T8q - T8l;
Chris@82 696 cr[WS(rs, 28)] = FMS(KP707106781, T8Q, T8P);
Chris@82 697 ci[WS(rs, 19)] = FMA(KP707106781, T8Q, T8P);
Chris@82 698 }
Chris@82 699 {
Chris@82 700 E T8R, T8S, T8s, T8v;
Chris@82 701 T8R = T8O - T8N;
Chris@82 702 T8S = T8u - T8t;
Chris@82 703 cr[WS(rs, 20)] = FMS(KP707106781, T8S, T8R);
Chris@82 704 ci[WS(rs, 27)] = FMA(KP707106781, T8S, T8R);
Chris@82 705 T8s = T8c + T8f;
Chris@82 706 T8v = T8t + T8u;
Chris@82 707 cr[WS(rs, 12)] = FNMS(KP707106781, T8v, T8s);
Chris@82 708 ci[WS(rs, 3)] = FMA(KP707106781, T8v, T8s);
Chris@82 709 }
Chris@82 710 }
Chris@82 711 {
Chris@82 712 E T4s, T6c, T4X, T9c, T9b, T9h, T6f, T9i, T66, T6q, T6a, T6m, T5x, T6p, T69;
Chris@82 713 E T6j;
Chris@82 714 {
Chris@82 715 E T4c, T4r, T6d, T6e;
Chris@82 716 T4c = T46 - T4b;
Chris@82 717 T4r = T4j + T4q;
Chris@82 718 T4s = FNMS(KP707106781, T4r, T4c);
Chris@82 719 T6c = FMA(KP707106781, T4r, T4c);
Chris@82 720 {
Chris@82 721 E T4H, T4W, T99, T9a;
Chris@82 722 T4H = FMA(KP414213562, T4G, T4z);
Chris@82 723 T4W = FNMS(KP414213562, T4V, T4O);
Chris@82 724 T4X = T4H - T4W;
Chris@82 725 T9c = T4H + T4W;
Chris@82 726 T99 = T97 + T98;
Chris@82 727 T9a = T6t - T6u;
Chris@82 728 T9b = FMA(KP707106781, T9a, T99);
Chris@82 729 T9h = FNMS(KP707106781, T9a, T99);
Chris@82 730 }
Chris@82 731 T6d = FNMS(KP414213562, T4z, T4G);
Chris@82 732 T6e = FMA(KP414213562, T4O, T4V);
Chris@82 733 T6f = T6d + T6e;
Chris@82 734 T9i = T6e - T6d;
Chris@82 735 {
Chris@82 736 E T5U, T6k, T65, T6l, T5T, T64;
Chris@82 737 T5T = T5L + T5S;
Chris@82 738 T5U = FNMS(KP707106781, T5T, T5E);
Chris@82 739 T6k = FMA(KP707106781, T5T, T5E);
Chris@82 740 T64 = T62 - T63;
Chris@82 741 T65 = FNMS(KP707106781, T64, T61);
Chris@82 742 T6l = FMA(KP707106781, T64, T61);
Chris@82 743 T66 = FMA(KP668178637, T65, T5U);
Chris@82 744 T6q = FMA(KP198912367, T6k, T6l);
Chris@82 745 T6a = FNMS(KP668178637, T5U, T65);
Chris@82 746 T6m = FNMS(KP198912367, T6l, T6k);
Chris@82 747 }
Chris@82 748 {
Chris@82 749 E T5l, T6h, T5w, T6i, T5k, T5v;
Chris@82 750 T5k = T5c + T5j;
Chris@82 751 T5l = FNMS(KP707106781, T5k, T55);
Chris@82 752 T6h = FMA(KP707106781, T5k, T55);
Chris@82 753 T5v = T5t + T5u;
Chris@82 754 T5w = FNMS(KP707106781, T5v, T5s);
Chris@82 755 T6i = FMA(KP707106781, T5v, T5s);
Chris@82 756 T5x = FMA(KP668178637, T5w, T5l);
Chris@82 757 T6p = FMA(KP198912367, T6h, T6i);
Chris@82 758 T69 = FNMS(KP668178637, T5l, T5w);
Chris@82 759 T6j = FNMS(KP198912367, T6i, T6h);
Chris@82 760 }
Chris@82 761 }
Chris@82 762 {
Chris@82 763 E T4Y, T67, T9j, T9k;
Chris@82 764 T4Y = FMA(KP923879532, T4X, T4s);
Chris@82 765 T67 = T5x + T66;
Chris@82 766 ci[WS(rs, 12)] = FNMS(KP831469612, T67, T4Y);
Chris@82 767 cr[WS(rs, 3)] = FMA(KP831469612, T67, T4Y);
Chris@82 768 T9j = FMA(KP923879532, T9i, T9h);
Chris@82 769 T9k = T69 - T6a;
Chris@82 770 cr[WS(rs, 19)] = FMS(KP831469612, T9k, T9j);
Chris@82 771 ci[WS(rs, 28)] = FMA(KP831469612, T9k, T9j);
Chris@82 772 }
Chris@82 773 {
Chris@82 774 E T9l, T9m, T68, T6b;
Chris@82 775 T9l = FNMS(KP923879532, T9i, T9h);
Chris@82 776 T9m = T66 - T5x;
Chris@82 777 cr[WS(rs, 27)] = FMS(KP831469612, T9m, T9l);
Chris@82 778 ci[WS(rs, 20)] = FMA(KP831469612, T9m, T9l);
Chris@82 779 T68 = FNMS(KP923879532, T4X, T4s);
Chris@82 780 T6b = T69 + T6a;
Chris@82 781 cr[WS(rs, 11)] = FMA(KP831469612, T6b, T68);
Chris@82 782 ci[WS(rs, 4)] = FNMS(KP831469612, T6b, T68);
Chris@82 783 }
Chris@82 784 {
Chris@82 785 E T6g, T6n, T9d, T9e;
Chris@82 786 T6g = FMA(KP923879532, T6f, T6c);
Chris@82 787 T6n = T6j + T6m;
Chris@82 788 cr[WS(rs, 15)] = FNMS(KP980785280, T6n, T6g);
Chris@82 789 ci[0] = FMA(KP980785280, T6n, T6g);
Chris@82 790 T9d = FMA(KP923879532, T9c, T9b);
Chris@82 791 T9e = T6q - T6p;
Chris@82 792 cr[WS(rs, 31)] = FMS(KP980785280, T9e, T9d);
Chris@82 793 ci[WS(rs, 16)] = FMA(KP980785280, T9e, T9d);
Chris@82 794 }
Chris@82 795 {
Chris@82 796 E T9f, T9g, T6o, T6r;
Chris@82 797 T9f = FNMS(KP923879532, T9c, T9b);
Chris@82 798 T9g = T6m - T6j;
Chris@82 799 cr[WS(rs, 23)] = FMS(KP980785280, T9g, T9f);
Chris@82 800 ci[WS(rs, 24)] = FMA(KP980785280, T9g, T9f);
Chris@82 801 T6o = FNMS(KP923879532, T6f, T6c);
Chris@82 802 T6r = T6p + T6q;
Chris@82 803 ci[WS(rs, 8)] = FNMS(KP980785280, T6r, T6o);
Chris@82 804 cr[WS(rs, 7)] = FMA(KP980785280, T6r, T6o);
Chris@82 805 }
Chris@82 806 }
Chris@82 807 {
Chris@82 808 E T7i, T7W, T86, T8a, T8V, T91, T7t, T8W, T7F, T7U, T7Z, T92, T83, T89, T7Q;
Chris@82 809 E T7T;
Chris@82 810 {
Chris@82 811 E T7e, T7h, T84, T85;
Chris@82 812 T7e = Tq - TH;
Chris@82 813 T7h = T7f - T7g;
Chris@82 814 T7i = T7e - T7h;
Chris@82 815 T7W = T7e + T7h;
Chris@82 816 T84 = T7G + T7J;
Chris@82 817 T85 = T7O - T7N;
Chris@82 818 T86 = FNMS(KP414213562, T85, T84);
Chris@82 819 T8a = FMA(KP414213562, T84, T85);
Chris@82 820 }
Chris@82 821 {
Chris@82 822 E T8T, T8U, T7n, T7s;
Chris@82 823 T8T = TZ - T1g;
Chris@82 824 T8U = T8H - T8D;
Chris@82 825 T8V = T8T + T8U;
Chris@82 826 T91 = T8U - T8T;
Chris@82 827 T7n = T7j + T7m;
Chris@82 828 T7s = T7o - T7r;
Chris@82 829 T7t = T7n + T7s;
Chris@82 830 T8W = T7n - T7s;
Chris@82 831 }
Chris@82 832 {
Chris@82 833 E T7z, T7E, T7X, T7Y;
Chris@82 834 T7z = T7v - T7y;
Chris@82 835 T7E = T7C - T7D;
Chris@82 836 T7F = FMA(KP414213562, T7E, T7z);
Chris@82 837 T7U = FNMS(KP414213562, T7z, T7E);
Chris@82 838 T7X = T7j - T7m;
Chris@82 839 T7Y = T7o + T7r;
Chris@82 840 T7Z = T7X + T7Y;
Chris@82 841 T92 = T7Y - T7X;
Chris@82 842 }
Chris@82 843 {
Chris@82 844 E T81, T82, T7K, T7P;
Chris@82 845 T81 = T7v + T7y;
Chris@82 846 T82 = T7C + T7D;
Chris@82 847 T83 = FNMS(KP414213562, T82, T81);
Chris@82 848 T89 = FMA(KP414213562, T81, T82);
Chris@82 849 T7K = T7G - T7J;
Chris@82 850 T7P = T7N + T7O;
Chris@82 851 T7Q = FNMS(KP414213562, T7P, T7K);
Chris@82 852 T7T = FMA(KP414213562, T7K, T7P);
Chris@82 853 }
Chris@82 854 {
Chris@82 855 E T7u, T7R, T93, T94;
Chris@82 856 T7u = FMA(KP707106781, T7t, T7i);
Chris@82 857 T7R = T7F + T7Q;
Chris@82 858 ci[WS(rs, 13)] = FNMS(KP923879532, T7R, T7u);
Chris@82 859 cr[WS(rs, 2)] = FMA(KP923879532, T7R, T7u);
Chris@82 860 T93 = FMA(KP707106781, T92, T91);
Chris@82 861 T94 = T7U + T7T;
Chris@82 862 cr[WS(rs, 18)] = FMS(KP923879532, T94, T93);
Chris@82 863 ci[WS(rs, 29)] = FMA(KP923879532, T94, T93);
Chris@82 864 }
Chris@82 865 {
Chris@82 866 E T95, T96, T7S, T7V;
Chris@82 867 T95 = FNMS(KP707106781, T92, T91);
Chris@82 868 T96 = T7Q - T7F;
Chris@82 869 cr[WS(rs, 26)] = FMS(KP923879532, T96, T95);
Chris@82 870 ci[WS(rs, 21)] = FMA(KP923879532, T96, T95);
Chris@82 871 T7S = FNMS(KP707106781, T7t, T7i);
Chris@82 872 T7V = T7T - T7U;
Chris@82 873 cr[WS(rs, 10)] = FNMS(KP923879532, T7V, T7S);
Chris@82 874 ci[WS(rs, 5)] = FMA(KP923879532, T7V, T7S);
Chris@82 875 }
Chris@82 876 {
Chris@82 877 E T80, T87, T8X, T8Y;
Chris@82 878 T80 = FMA(KP707106781, T7Z, T7W);
Chris@82 879 T87 = T83 + T86;
Chris@82 880 cr[WS(rs, 14)] = FNMS(KP923879532, T87, T80);
Chris@82 881 ci[WS(rs, 1)] = FMA(KP923879532, T87, T80);
Chris@82 882 T8X = FMA(KP707106781, T8W, T8V);
Chris@82 883 T8Y = T8a - T89;
Chris@82 884 cr[WS(rs, 30)] = FMS(KP923879532, T8Y, T8X);
Chris@82 885 ci[WS(rs, 17)] = FMA(KP923879532, T8Y, T8X);
Chris@82 886 }
Chris@82 887 {
Chris@82 888 E T8Z, T90, T88, T8b;
Chris@82 889 T8Z = FNMS(KP707106781, T8W, T8V);
Chris@82 890 T90 = T86 - T83;
Chris@82 891 cr[WS(rs, 22)] = FMS(KP923879532, T90, T8Z);
Chris@82 892 ci[WS(rs, 25)] = FMA(KP923879532, T90, T8Z);
Chris@82 893 T88 = FNMS(KP707106781, T7Z, T7W);
Chris@82 894 T8b = T89 + T8a;
Chris@82 895 ci[WS(rs, 9)] = FNMS(KP923879532, T8b, T88);
Chris@82 896 cr[WS(rs, 6)] = FMA(KP923879532, T8b, T88);
Chris@82 897 }
Chris@82 898 }
Chris@82 899 {
Chris@82 900 E T6w, T6Y, T6D, T9w, T9p, T9v, T71, T9q, T6S, T7c, T6V, T78, T6L, T7b, T6W;
Chris@82 901 E T75;
Chris@82 902 {
Chris@82 903 E T6s, T6v, T6Z, T70;
Chris@82 904 T6s = T46 + T4b;
Chris@82 905 T6v = T6t + T6u;
Chris@82 906 T6w = FMA(KP707106781, T6v, T6s);
Chris@82 907 T6Y = FNMS(KP707106781, T6v, T6s);
Chris@82 908 {
Chris@82 909 E T6z, T6C, T9n, T9o;
Chris@82 910 T6z = FMA(KP414213562, T6y, T6x);
Chris@82 911 T6C = FNMS(KP414213562, T6B, T6A);
Chris@82 912 T6D = T6z + T6C;
Chris@82 913 T9w = T6z - T6C;
Chris@82 914 T9n = T98 - T97;
Chris@82 915 T9o = T4q - T4j;
Chris@82 916 T9p = FMA(KP707106781, T9o, T9n);
Chris@82 917 T9v = FNMS(KP707106781, T9o, T9n);
Chris@82 918 }
Chris@82 919 T6Z = FMA(KP414213562, T6A, T6B);
Chris@82 920 T70 = FNMS(KP414213562, T6x, T6y);
Chris@82 921 T71 = T6Z - T70;
Chris@82 922 T9q = T70 + T6Z;
Chris@82 923 {
Chris@82 924 E T6O, T76, T6R, T77, T6N, T6Q;
Chris@82 925 T6N = T63 + T62;
Chris@82 926 T6O = FMA(KP707106781, T6N, T6M);
Chris@82 927 T76 = FNMS(KP707106781, T6N, T6M);
Chris@82 928 T6Q = T5S - T5L;
Chris@82 929 T6R = FMA(KP707106781, T6Q, T6P);
Chris@82 930 T77 = FNMS(KP707106781, T6Q, T6P);
Chris@82 931 T6S = FNMS(KP198912367, T6R, T6O);
Chris@82 932 T7c = FNMS(KP668178637, T76, T77);
Chris@82 933 T6V = FMA(KP198912367, T6O, T6R);
Chris@82 934 T78 = FMA(KP668178637, T77, T76);
Chris@82 935 }
Chris@82 936 {
Chris@82 937 E T6H, T73, T6K, T74, T6G, T6J;
Chris@82 938 T6G = T5t - T5u;
Chris@82 939 T6H = FMA(KP707106781, T6G, T6F);
Chris@82 940 T73 = FNMS(KP707106781, T6G, T6F);
Chris@82 941 T6J = T5j - T5c;
Chris@82 942 T6K = FMA(KP707106781, T6J, T6I);
Chris@82 943 T74 = FNMS(KP707106781, T6J, T6I);
Chris@82 944 T6L = FMA(KP198912367, T6K, T6H);
Chris@82 945 T7b = FMA(KP668178637, T73, T74);
Chris@82 946 T6W = FNMS(KP198912367, T6H, T6K);
Chris@82 947 T75 = FNMS(KP668178637, T74, T73);
Chris@82 948 }
Chris@82 949 }
Chris@82 950 {
Chris@82 951 E T6E, T6T, T9x, T9y;
Chris@82 952 T6E = FMA(KP923879532, T6D, T6w);
Chris@82 953 T6T = T6L + T6S;
Chris@82 954 ci[WS(rs, 14)] = FNMS(KP980785280, T6T, T6E);
Chris@82 955 cr[WS(rs, 1)] = FMA(KP980785280, T6T, T6E);
Chris@82 956 T9x = FMA(KP923879532, T9w, T9v);
Chris@82 957 T9y = T7b + T7c;
Chris@82 958 cr[WS(rs, 29)] = -(FMA(KP831469612, T9y, T9x));
Chris@82 959 ci[WS(rs, 18)] = FNMS(KP831469612, T9y, T9x);
Chris@82 960 }
Chris@82 961 {
Chris@82 962 E T9z, T9A, T6U, T6X;
Chris@82 963 T9z = FNMS(KP923879532, T9w, T9v);
Chris@82 964 T9A = T78 - T75;
Chris@82 965 cr[WS(rs, 21)] = FMS(KP831469612, T9A, T9z);
Chris@82 966 ci[WS(rs, 26)] = FMA(KP831469612, T9A, T9z);
Chris@82 967 T6U = FNMS(KP923879532, T6D, T6w);
Chris@82 968 T6X = T6V - T6W;
Chris@82 969 cr[WS(rs, 9)] = FNMS(KP980785280, T6X, T6U);
Chris@82 970 ci[WS(rs, 6)] = FMA(KP980785280, T6X, T6U);
Chris@82 971 }
Chris@82 972 {
Chris@82 973 E T72, T79, T9r, T9s;
Chris@82 974 T72 = FMA(KP923879532, T71, T6Y);
Chris@82 975 T79 = T75 + T78;
Chris@82 976 cr[WS(rs, 13)] = FNMS(KP831469612, T79, T72);
Chris@82 977 ci[WS(rs, 2)] = FMA(KP831469612, T79, T72);
Chris@82 978 T9r = FMA(KP923879532, T9q, T9p);
Chris@82 979 T9s = T6W + T6V;
Chris@82 980 cr[WS(rs, 17)] = FMS(KP980785280, T9s, T9r);
Chris@82 981 ci[WS(rs, 30)] = FMA(KP980785280, T9s, T9r);
Chris@82 982 }
Chris@82 983 {
Chris@82 984 E T9t, T9u, T7a, T7d;
Chris@82 985 T9t = FNMS(KP923879532, T9q, T9p);
Chris@82 986 T9u = T6S - T6L;
Chris@82 987 cr[WS(rs, 25)] = FMS(KP980785280, T9u, T9t);
Chris@82 988 ci[WS(rs, 22)] = FMA(KP980785280, T9u, T9t);
Chris@82 989 T7a = FNMS(KP923879532, T71, T6Y);
Chris@82 990 T7d = T7b - T7c;
Chris@82 991 ci[WS(rs, 10)] = FNMS(KP831469612, T7d, T7a);
Chris@82 992 cr[WS(rs, 5)] = FMA(KP831469612, T7d, T7a);
Chris@82 993 }
Chris@82 994 }
Chris@82 995 }
Chris@82 996 }
Chris@82 997 }
Chris@82 998 }
Chris@82 999
Chris@82 1000 static const tw_instr twinstr[] = {
Chris@82 1001 {TW_CEXP, 1, 1},
Chris@82 1002 {TW_CEXP, 1, 3},
Chris@82 1003 {TW_CEXP, 1, 9},
Chris@82 1004 {TW_CEXP, 1, 27},
Chris@82 1005 {TW_NEXT, 1, 0}
Chris@82 1006 };
Chris@82 1007
Chris@82 1008 static const hc2hc_desc desc = { 32, "hf2_32", twinstr, &GENUS, {236, 98, 252, 0} };
Chris@82 1009
Chris@82 1010 void X(codelet_hf2_32) (planner *p) {
Chris@82 1011 X(khc2hc_register) (p, hf2_32, &desc);
Chris@82 1012 }
Chris@82 1013 #else
Chris@82 1014
Chris@82 1015 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include rdft/scalar/hf.h */
Chris@82 1016
Chris@82 1017 /*
Chris@82 1018 * This function contains 488 FP additions, 280 FP multiplications,
Chris@82 1019 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
Chris@82 1020 * 158 stack variables, 7 constants, and 128 memory accesses
Chris@82 1021 */
Chris@82 1022 #include "rdft/scalar/hf.h"
Chris@82 1023
Chris@82 1024 static void hf2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 1025 {
Chris@82 1026 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 1027 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 1028 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 1029 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 1030 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 1031 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 1032 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1033 {
Chris@82 1034 INT m;
Chris@82 1035 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 1036 E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y;
Chris@82 1037 E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d;
Chris@82 1038 E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C;
Chris@82 1039 E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25;
Chris@82 1040 E T1S, T23;
Chris@82 1041 {
Chris@82 1042 E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF;
Chris@82 1043 E T10;
Chris@82 1044 {
Chris@82 1045 E T4, Tc, T7, Tb;
Chris@82 1046 T2 = W[0];
Chris@82 1047 T5 = W[1];
Chris@82 1048 T3 = W[2];
Chris@82 1049 T6 = W[3];
Chris@82 1050 T4 = T2 * T3;
Chris@82 1051 Tc = T5 * T3;
Chris@82 1052 T7 = T5 * T6;
Chris@82 1053 Tb = T2 * T6;
Chris@82 1054 T8 = T4 + T7;
Chris@82 1055 TM = T4 - T7;
Chris@82 1056 TO = Tb + Tc;
Chris@82 1057 Td = Tb - Tc;
Chris@82 1058 T9 = W[4];
Chris@82 1059 Ts = T2 * T9;
Chris@82 1060 T1d = T6 * T9;
Chris@82 1061 Tx = T5 * T9;
Chris@82 1062 T18 = T3 * T9;
Chris@82 1063 Te = W[5];
Chris@82 1064 Tt = T5 * Te;
Chris@82 1065 T1c = T3 * Te;
Chris@82 1066 Tw = T2 * Te;
Chris@82 1067 T19 = T6 * Te;
Chris@82 1068 Th = W[6];
Chris@82 1069 TB = T3 * Th;
Chris@82 1070 T14 = T5 * Th;
Chris@82 1071 TG = T6 * Th;
Chris@82 1072 TZ = T2 * Th;
Chris@82 1073 Tl = W[7];
Chris@82 1074 TC = T6 * Tl;
Chris@82 1075 T13 = T2 * Tl;
Chris@82 1076 TF = T3 * Tl;
Chris@82 1077 T10 = T5 * Tl;
Chris@82 1078 }
Chris@82 1079 TD = TB + TC;
Chris@82 1080 TH = TF - TG;
Chris@82 1081 T1y = TZ + T10;
Chris@82 1082 T1H = TF + TG;
Chris@82 1083 T15 = T13 + T14;
Chris@82 1084 T1A = T13 - T14;
Chris@82 1085 T11 = TZ - T10;
Chris@82 1086 T1F = TB - TC;
Chris@82 1087 T1n = FMA(T9, Th, Te * Tl);
Chris@82 1088 T1p = FNMS(Te, Th, T9 * Tl);
Chris@82 1089 {
Chris@82 1090 E T2o, T2p, T2s, T2t;
Chris@82 1091 T2o = T8 * Th;
Chris@82 1092 T2p = Td * Tl;
Chris@82 1093 T2q = T2o + T2p;
Chris@82 1094 T2I = T2o - T2p;
Chris@82 1095 T2s = T8 * Tl;
Chris@82 1096 T2t = Td * Th;
Chris@82 1097 T2u = T2s - T2t;
Chris@82 1098 T2K = T2s + T2t;
Chris@82 1099 }
Chris@82 1100 {
Chris@82 1101 E T2T, T2U, T2X, T2Y;
Chris@82 1102 T2T = TM * Th;
Chris@82 1103 T2U = TO * Tl;
Chris@82 1104 T2V = T2T - T2U;
Chris@82 1105 T3b = T2T + T2U;
Chris@82 1106 T2X = TM * Tl;
Chris@82 1107 T2Y = TO * Th;
Chris@82 1108 T2Z = T2X + T2Y;
Chris@82 1109 T3d = T2X - T2Y;
Chris@82 1110 Tu = Ts + Tt;
Chris@82 1111 Ty = Tw - Tx;
Chris@82 1112 T3l = FMA(Tu, Th, Ty * Tl);
Chris@82 1113 T3n = FNMS(Ty, Th, Tu * Tl);
Chris@82 1114 }
Chris@82 1115 T1t = Ts - Tt;
Chris@82 1116 T1v = Tw + Tx;
Chris@82 1117 T2f = FMA(T1t, Th, T1v * Tl);
Chris@82 1118 T2h = FNMS(T1v, Th, T1t * Tl);
Chris@82 1119 T1a = T18 - T19;
Chris@82 1120 T1e = T1c + T1d;
Chris@82 1121 T32 = FMA(T1a, Th, T1e * Tl);
Chris@82 1122 T34 = FNMS(T1e, Th, T1a * Tl);
Chris@82 1123 T1W = T18 + T19;
Chris@82 1124 T1Y = T1c - T1d;
Chris@82 1125 T2C = FMA(T1W, Th, T1Y * Tl);
Chris@82 1126 T2E = FNMS(T1Y, Th, T1W * Tl);
Chris@82 1127 {
Chris@82 1128 E Ta, Tf, Ti, Tj;
Chris@82 1129 Ta = T8 * T9;
Chris@82 1130 Tf = Td * Te;
Chris@82 1131 Tg = Ta - Tf;
Chris@82 1132 TR = Ta + Tf;
Chris@82 1133 Ti = T8 * Te;
Chris@82 1134 Tj = Td * T9;
Chris@82 1135 Tk = Ti + Tj;
Chris@82 1136 TS = Ti - Tj;
Chris@82 1137 }
Chris@82 1138 Tm = FMA(Tg, Th, Tk * Tl);
Chris@82 1139 TV = FNMS(TS, Th, TR * Tl);
Chris@82 1140 To = FNMS(Tk, Th, Tg * Tl);
Chris@82 1141 TT = FMA(TR, Th, TS * Tl);
Chris@82 1142 {
Chris@82 1143 E T1K, T1L, T1N, T1O;
Chris@82 1144 T1K = TM * T9;
Chris@82 1145 T1L = TO * Te;
Chris@82 1146 T1M = T1K - T1L;
Chris@82 1147 T21 = T1K + T1L;
Chris@82 1148 T1N = TM * Te;
Chris@82 1149 T1O = TO * T9;
Chris@82 1150 T1P = T1N + T1O;
Chris@82 1151 T22 = T1N - T1O;
Chris@82 1152 }
Chris@82 1153 T1Q = FMA(T1M, Th, T1P * Tl);
Chris@82 1154 T25 = FNMS(T22, Th, T21 * Tl);
Chris@82 1155 T1S = FNMS(T1P, Th, T1M * Tl);
Chris@82 1156 T23 = FMA(T21, Th, T22 * Tl);
Chris@82 1157 }
Chris@82 1158 {
Chris@82 1159 E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5G, T4B;
Chris@82 1160 E T5J, T3h, T6H, T6O, T7o, T4L, T5Q, T52, T5N, T1i, T7V, T6i, T7D, T3K, T5u;
Chris@82 1161 E T3P, T5v, T1E, T6k, T6n, T7f, T3W, T5z, T41, T5y, T29, T6p, T6s, T7e, T47;
Chris@82 1162 E T5C, T4c, T5B, T2R, T6z, T6E, T7k, T4v, T5K, T4E, T5H, T3y, T6P, T6K, T7p;
Chris@82 1163 E T4W, T5O, T55, T5R;
Chris@82 1164 {
Chris@82 1165 E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp;
Chris@82 1166 T1 = cr[0];
Chris@82 1167 T7G = ci[0];
Chris@82 1168 Tn = cr[WS(rs, 16)];
Chris@82 1169 Tp = ci[WS(rs, 16)];
Chris@82 1170 Tq = FMA(Tm, Tn, To * Tp);
Chris@82 1171 T7F = FNMS(To, Tn, Tm * Tp);
Chris@82 1172 {
Chris@82 1173 E Tv, Tz, TE, TI;
Chris@82 1174 Tv = cr[WS(rs, 8)];
Chris@82 1175 Tz = ci[WS(rs, 8)];
Chris@82 1176 TA = FMA(Tu, Tv, Ty * Tz);
Chris@82 1177 T3C = FNMS(Ty, Tv, Tu * Tz);
Chris@82 1178 TE = cr[WS(rs, 24)];
Chris@82 1179 TI = ci[WS(rs, 24)];
Chris@82 1180 TJ = FMA(TD, TE, TH * TI);
Chris@82 1181 T3D = FNMS(TH, TE, TD * TI);
Chris@82 1182 }
Chris@82 1183 {
Chris@82 1184 E Tr, TK, T8a, T8b;
Chris@82 1185 Tr = T1 + Tq;
Chris@82 1186 TK = TA + TJ;
Chris@82 1187 TL = Tr + TK;
Chris@82 1188 T6f = Tr - TK;
Chris@82 1189 T8a = TA - TJ;
Chris@82 1190 T8b = T7G - T7F;
Chris@82 1191 T8c = T8a + T8b;
Chris@82 1192 T8q = T8b - T8a;
Chris@82 1193 }
Chris@82 1194 {
Chris@82 1195 E T3B, T3E, T7E, T7H;
Chris@82 1196 T3B = T1 - Tq;
Chris@82 1197 T3E = T3C - T3D;
Chris@82 1198 T3F = T3B + T3E;
Chris@82 1199 T5t = T3B - T3E;
Chris@82 1200 T7E = T3C + T3D;
Chris@82 1201 T7H = T7F + T7G;
Chris@82 1202 T7I = T7E + T7H;
Chris@82 1203 T7W = T7H - T7E;
Chris@82 1204 }
Chris@82 1205 }
Chris@82 1206 {
Chris@82 1207 E T2e, T4x, T2w, T4i, T2j, T4y, T2n, T4h;
Chris@82 1208 {
Chris@82 1209 E T2c, T2d, T2r, T2v;
Chris@82 1210 T2c = cr[WS(rs, 1)];
Chris@82 1211 T2d = ci[WS(rs, 1)];
Chris@82 1212 T2e = FMA(T2, T2c, T5 * T2d);
Chris@82 1213 T4x = FNMS(T5, T2c, T2 * T2d);
Chris@82 1214 T2r = cr[WS(rs, 25)];
Chris@82 1215 T2v = ci[WS(rs, 25)];
Chris@82 1216 T2w = FMA(T2q, T2r, T2u * T2v);
Chris@82 1217 T4i = FNMS(T2u, T2r, T2q * T2v);
Chris@82 1218 }
Chris@82 1219 {
Chris@82 1220 E T2g, T2i, T2l, T2m;
Chris@82 1221 T2g = cr[WS(rs, 17)];
Chris@82 1222 T2i = ci[WS(rs, 17)];
Chris@82 1223 T2j = FMA(T2f, T2g, T2h * T2i);
Chris@82 1224 T4y = FNMS(T2h, T2g, T2f * T2i);
Chris@82 1225 T2l = cr[WS(rs, 9)];
Chris@82 1226 T2m = ci[WS(rs, 9)];
Chris@82 1227 T2n = FMA(T9, T2l, Te * T2m);
Chris@82 1228 T4h = FNMS(Te, T2l, T9 * T2m);
Chris@82 1229 }
Chris@82 1230 {
Chris@82 1231 E T2k, T2x, T6w, T6x;
Chris@82 1232 T2k = T2e + T2j;
Chris@82 1233 T2x = T2n + T2w;
Chris@82 1234 T2y = T2k + T2x;
Chris@82 1235 T6B = T2k - T2x;
Chris@82 1236 T6w = T4x + T4y;
Chris@82 1237 T6x = T4h + T4i;
Chris@82 1238 T6y = T6w - T6x;
Chris@82 1239 T7j = T6w + T6x;
Chris@82 1240 }
Chris@82 1241 {
Chris@82 1242 E T4g, T4j, T4z, T4A;
Chris@82 1243 T4g = T2e - T2j;
Chris@82 1244 T4j = T4h - T4i;
Chris@82 1245 T4k = T4g + T4j;
Chris@82 1246 T5G = T4g - T4j;
Chris@82 1247 T4z = T4x - T4y;
Chris@82 1248 T4A = T2n - T2w;
Chris@82 1249 T4B = T4z - T4A;
Chris@82 1250 T5J = T4z + T4A;
Chris@82 1251 }
Chris@82 1252 }
Chris@82 1253 {
Chris@82 1254 E T31, T4H, T3f, T50, T36, T4I, T3a, T4Z;
Chris@82 1255 {
Chris@82 1256 E T2W, T30, T3c, T3e;
Chris@82 1257 T2W = cr[WS(rs, 31)];
Chris@82 1258 T30 = ci[WS(rs, 31)];
Chris@82 1259 T31 = FMA(T2V, T2W, T2Z * T30);
Chris@82 1260 T4H = FNMS(T2Z, T2W, T2V * T30);
Chris@82 1261 T3c = cr[WS(rs, 23)];
Chris@82 1262 T3e = ci[WS(rs, 23)];
Chris@82 1263 T3f = FMA(T3b, T3c, T3d * T3e);
Chris@82 1264 T50 = FNMS(T3d, T3c, T3b * T3e);
Chris@82 1265 }
Chris@82 1266 {
Chris@82 1267 E T33, T35, T38, T39;
Chris@82 1268 T33 = cr[WS(rs, 15)];
Chris@82 1269 T35 = ci[WS(rs, 15)];
Chris@82 1270 T36 = FMA(T32, T33, T34 * T35);
Chris@82 1271 T4I = FNMS(T34, T33, T32 * T35);
Chris@82 1272 T38 = cr[WS(rs, 7)];
Chris@82 1273 T39 = ci[WS(rs, 7)];
Chris@82 1274 T3a = FMA(TR, T38, TS * T39);
Chris@82 1275 T4Z = FNMS(TS, T38, TR * T39);
Chris@82 1276 }
Chris@82 1277 {
Chris@82 1278 E T37, T3g, T6M, T6N;
Chris@82 1279 T37 = T31 + T36;
Chris@82 1280 T3g = T3a + T3f;
Chris@82 1281 T3h = T37 + T3g;
Chris@82 1282 T6H = T37 - T3g;
Chris@82 1283 T6M = T4H + T4I;
Chris@82 1284 T6N = T4Z + T50;
Chris@82 1285 T6O = T6M - T6N;
Chris@82 1286 T7o = T6M + T6N;
Chris@82 1287 }
Chris@82 1288 {
Chris@82 1289 E T4J, T4K, T4Y, T51;
Chris@82 1290 T4J = T4H - T4I;
Chris@82 1291 T4K = T3a - T3f;
Chris@82 1292 T4L = T4J - T4K;
Chris@82 1293 T5Q = T4J + T4K;
Chris@82 1294 T4Y = T31 - T36;
Chris@82 1295 T51 = T4Z - T50;
Chris@82 1296 T52 = T4Y + T51;
Chris@82 1297 T5N = T4Y - T51;
Chris@82 1298 }
Chris@82 1299 }
Chris@82 1300 {
Chris@82 1301 E TQ, T3H, T1g, T3N, TX, T3I, T17, T3M;
Chris@82 1302 {
Chris@82 1303 E TN, TP, T1b, T1f;
Chris@82 1304 TN = cr[WS(rs, 4)];
Chris@82 1305 TP = ci[WS(rs, 4)];
Chris@82 1306 TQ = FMA(TM, TN, TO * TP);
Chris@82 1307 T3H = FNMS(TO, TN, TM * TP);
Chris@82 1308 T1b = cr[WS(rs, 12)];
Chris@82 1309 T1f = ci[WS(rs, 12)];
Chris@82 1310 T1g = FMA(T1a, T1b, T1e * T1f);
Chris@82 1311 T3N = FNMS(T1e, T1b, T1a * T1f);
Chris@82 1312 }
Chris@82 1313 {
Chris@82 1314 E TU, TW, T12, T16;
Chris@82 1315 TU = cr[WS(rs, 20)];
Chris@82 1316 TW = ci[WS(rs, 20)];
Chris@82 1317 TX = FMA(TT, TU, TV * TW);
Chris@82 1318 T3I = FNMS(TV, TU, TT * TW);
Chris@82 1319 T12 = cr[WS(rs, 28)];
Chris@82 1320 T16 = ci[WS(rs, 28)];
Chris@82 1321 T17 = FMA(T11, T12, T15 * T16);
Chris@82 1322 T3M = FNMS(T15, T12, T11 * T16);
Chris@82 1323 }
Chris@82 1324 {
Chris@82 1325 E TY, T1h, T6g, T6h;
Chris@82 1326 TY = TQ + TX;
Chris@82 1327 T1h = T17 + T1g;
Chris@82 1328 T1i = TY + T1h;
Chris@82 1329 T7V = TY - T1h;
Chris@82 1330 T6g = T3M + T3N;
Chris@82 1331 T6h = T3H + T3I;
Chris@82 1332 T6i = T6g - T6h;
Chris@82 1333 T7D = T6h + T6g;
Chris@82 1334 }
Chris@82 1335 {
Chris@82 1336 E T3G, T3J, T3L, T3O;
Chris@82 1337 T3G = TQ - TX;
Chris@82 1338 T3J = T3H - T3I;
Chris@82 1339 T3K = T3G + T3J;
Chris@82 1340 T5u = T3G - T3J;
Chris@82 1341 T3L = T17 - T1g;
Chris@82 1342 T3O = T3M - T3N;
Chris@82 1343 T3P = T3L - T3O;
Chris@82 1344 T5v = T3L + T3O;
Chris@82 1345 }
Chris@82 1346 }
Chris@82 1347 {
Chris@82 1348 E T1m, T3X, T1C, T3U, T1r, T3Y, T1x, T3T;
Chris@82 1349 {
Chris@82 1350 E T1k, T1l, T1z, T1B;
Chris@82 1351 T1k = cr[WS(rs, 2)];
Chris@82 1352 T1l = ci[WS(rs, 2)];
Chris@82 1353 T1m = FMA(T8, T1k, Td * T1l);
Chris@82 1354 T3X = FNMS(Td, T1k, T8 * T1l);
Chris@82 1355 T1z = cr[WS(rs, 26)];
Chris@82 1356 T1B = ci[WS(rs, 26)];
Chris@82 1357 T1C = FMA(T1y, T1z, T1A * T1B);
Chris@82 1358 T3U = FNMS(T1A, T1z, T1y * T1B);
Chris@82 1359 }
Chris@82 1360 {
Chris@82 1361 E T1o, T1q, T1u, T1w;
Chris@82 1362 T1o = cr[WS(rs, 18)];
Chris@82 1363 T1q = ci[WS(rs, 18)];
Chris@82 1364 T1r = FMA(T1n, T1o, T1p * T1q);
Chris@82 1365 T3Y = FNMS(T1p, T1o, T1n * T1q);
Chris@82 1366 T1u = cr[WS(rs, 10)];
Chris@82 1367 T1w = ci[WS(rs, 10)];
Chris@82 1368 T1x = FMA(T1t, T1u, T1v * T1w);
Chris@82 1369 T3T = FNMS(T1v, T1u, T1t * T1w);
Chris@82 1370 }
Chris@82 1371 {
Chris@82 1372 E T1s, T1D, T6l, T6m;
Chris@82 1373 T1s = T1m + T1r;
Chris@82 1374 T1D = T1x + T1C;
Chris@82 1375 T1E = T1s + T1D;
Chris@82 1376 T6k = T1s - T1D;
Chris@82 1377 T6l = T3X + T3Y;
Chris@82 1378 T6m = T3T + T3U;
Chris@82 1379 T6n = T6l - T6m;
Chris@82 1380 T7f = T6l + T6m;
Chris@82 1381 }
Chris@82 1382 {
Chris@82 1383 E T3S, T3V, T3Z, T40;
Chris@82 1384 T3S = T1m - T1r;
Chris@82 1385 T3V = T3T - T3U;
Chris@82 1386 T3W = T3S + T3V;
Chris@82 1387 T5z = T3S - T3V;
Chris@82 1388 T3Z = T3X - T3Y;
Chris@82 1389 T40 = T1x - T1C;
Chris@82 1390 T41 = T3Z - T40;
Chris@82 1391 T5y = T3Z + T40;
Chris@82 1392 }
Chris@82 1393 }
Chris@82 1394 {
Chris@82 1395 E T1J, T43, T27, T4a, T1U, T44, T20, T49;
Chris@82 1396 {
Chris@82 1397 E T1G, T1I, T24, T26;
Chris@82 1398 T1G = cr[WS(rs, 30)];
Chris@82 1399 T1I = ci[WS(rs, 30)];
Chris@82 1400 T1J = FMA(T1F, T1G, T1H * T1I);
Chris@82 1401 T43 = FNMS(T1H, T1G, T1F * T1I);
Chris@82 1402 T24 = cr[WS(rs, 22)];
Chris@82 1403 T26 = ci[WS(rs, 22)];
Chris@82 1404 T27 = FMA(T23, T24, T25 * T26);
Chris@82 1405 T4a = FNMS(T25, T24, T23 * T26);
Chris@82 1406 }
Chris@82 1407 {
Chris@82 1408 E T1R, T1T, T1X, T1Z;
Chris@82 1409 T1R = cr[WS(rs, 14)];
Chris@82 1410 T1T = ci[WS(rs, 14)];
Chris@82 1411 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@82 1412 T44 = FNMS(T1S, T1R, T1Q * T1T);
Chris@82 1413 T1X = cr[WS(rs, 6)];
Chris@82 1414 T1Z = ci[WS(rs, 6)];
Chris@82 1415 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@82 1416 T49 = FNMS(T1Y, T1X, T1W * T1Z);
Chris@82 1417 }
Chris@82 1418 {
Chris@82 1419 E T1V, T28, T6q, T6r;
Chris@82 1420 T1V = T1J + T1U;
Chris@82 1421 T28 = T20 + T27;
Chris@82 1422 T29 = T1V + T28;
Chris@82 1423 T6p = T1V - T28;
Chris@82 1424 T6q = T43 + T44;
Chris@82 1425 T6r = T49 + T4a;
Chris@82 1426 T6s = T6q - T6r;
Chris@82 1427 T7e = T6q + T6r;
Chris@82 1428 }
Chris@82 1429 {
Chris@82 1430 E T45, T46, T48, T4b;
Chris@82 1431 T45 = T43 - T44;
Chris@82 1432 T46 = T20 - T27;
Chris@82 1433 T47 = T45 - T46;
Chris@82 1434 T5C = T45 + T46;
Chris@82 1435 T48 = T1J - T1U;
Chris@82 1436 T4b = T49 - T4a;
Chris@82 1437 T4c = T48 + T4b;
Chris@82 1438 T5B = T48 - T4b;
Chris@82 1439 }
Chris@82 1440 }
Chris@82 1441 {
Chris@82 1442 E T2B, T4m, T2G, T4n, T4l, T4o, T2M, T4q, T2P, T4r, T4s, T4t;
Chris@82 1443 {
Chris@82 1444 E T2z, T2A, T2D, T2F;
Chris@82 1445 T2z = cr[WS(rs, 5)];
Chris@82 1446 T2A = ci[WS(rs, 5)];
Chris@82 1447 T2B = FMA(T21, T2z, T22 * T2A);
Chris@82 1448 T4m = FNMS(T22, T2z, T21 * T2A);
Chris@82 1449 T2D = cr[WS(rs, 21)];
Chris@82 1450 T2F = ci[WS(rs, 21)];
Chris@82 1451 T2G = FMA(T2C, T2D, T2E * T2F);
Chris@82 1452 T4n = FNMS(T2E, T2D, T2C * T2F);
Chris@82 1453 }
Chris@82 1454 T4l = T2B - T2G;
Chris@82 1455 T4o = T4m - T4n;
Chris@82 1456 {
Chris@82 1457 E T2J, T2L, T2N, T2O;
Chris@82 1458 T2J = cr[WS(rs, 29)];
Chris@82 1459 T2L = ci[WS(rs, 29)];
Chris@82 1460 T2M = FMA(T2I, T2J, T2K * T2L);
Chris@82 1461 T4q = FNMS(T2K, T2J, T2I * T2L);
Chris@82 1462 T2N = cr[WS(rs, 13)];
Chris@82 1463 T2O = ci[WS(rs, 13)];
Chris@82 1464 T2P = FMA(T1M, T2N, T1P * T2O);
Chris@82 1465 T4r = FNMS(T1P, T2N, T1M * T2O);
Chris@82 1466 }
Chris@82 1467 T4s = T4q - T4r;
Chris@82 1468 T4t = T2M - T2P;
Chris@82 1469 {
Chris@82 1470 E T2H, T2Q, T6C, T6D;
Chris@82 1471 T2H = T2B + T2G;
Chris@82 1472 T2Q = T2M + T2P;
Chris@82 1473 T2R = T2H + T2Q;
Chris@82 1474 T6z = T2H - T2Q;
Chris@82 1475 T6C = T4q + T4r;
Chris@82 1476 T6D = T4m + T4n;
Chris@82 1477 T6E = T6C - T6D;
Chris@82 1478 T7k = T6D + T6C;
Chris@82 1479 }
Chris@82 1480 {
Chris@82 1481 E T4p, T4u, T4C, T4D;
Chris@82 1482 T4p = T4l + T4o;
Chris@82 1483 T4u = T4s - T4t;
Chris@82 1484 T4v = KP707106781 * (T4p - T4u);
Chris@82 1485 T5K = KP707106781 * (T4p + T4u);
Chris@82 1486 T4C = T4t + T4s;
Chris@82 1487 T4D = T4l - T4o;
Chris@82 1488 T4E = KP707106781 * (T4C - T4D);
Chris@82 1489 T5H = KP707106781 * (T4D + T4C);
Chris@82 1490 }
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T3k, T4S, T3p, T4T, T4R, T4U, T3t, T4N, T3w, T4O, T4M, T4P;
Chris@82 1494 {
Chris@82 1495 E T3i, T3j, T3m, T3o;
Chris@82 1496 T3i = cr[WS(rs, 3)];
Chris@82 1497 T3j = ci[WS(rs, 3)];
Chris@82 1498 T3k = FMA(T3, T3i, T6 * T3j);
Chris@82 1499 T4S = FNMS(T6, T3i, T3 * T3j);
Chris@82 1500 T3m = cr[WS(rs, 19)];
Chris@82 1501 T3o = ci[WS(rs, 19)];
Chris@82 1502 T3p = FMA(T3l, T3m, T3n * T3o);
Chris@82 1503 T4T = FNMS(T3n, T3m, T3l * T3o);
Chris@82 1504 }
Chris@82 1505 T4R = T3k - T3p;
Chris@82 1506 T4U = T4S - T4T;
Chris@82 1507 {
Chris@82 1508 E T3r, T3s, T3u, T3v;
Chris@82 1509 T3r = cr[WS(rs, 27)];
Chris@82 1510 T3s = ci[WS(rs, 27)];
Chris@82 1511 T3t = FMA(Th, T3r, Tl * T3s);
Chris@82 1512 T4N = FNMS(Tl, T3r, Th * T3s);
Chris@82 1513 T3u = cr[WS(rs, 11)];
Chris@82 1514 T3v = ci[WS(rs, 11)];
Chris@82 1515 T3w = FMA(Tg, T3u, Tk * T3v);
Chris@82 1516 T4O = FNMS(Tk, T3u, Tg * T3v);
Chris@82 1517 }
Chris@82 1518 T4M = T3t - T3w;
Chris@82 1519 T4P = T4N - T4O;
Chris@82 1520 {
Chris@82 1521 E T3q, T3x, T6I, T6J;
Chris@82 1522 T3q = T3k + T3p;
Chris@82 1523 T3x = T3t + T3w;
Chris@82 1524 T3y = T3q + T3x;
Chris@82 1525 T6P = T3q - T3x;
Chris@82 1526 T6I = T4N + T4O;
Chris@82 1527 T6J = T4S + T4T;
Chris@82 1528 T6K = T6I - T6J;
Chris@82 1529 T7p = T6J + T6I;
Chris@82 1530 }
Chris@82 1531 {
Chris@82 1532 E T4Q, T4V, T53, T54;
Chris@82 1533 T4Q = T4M + T4P;
Chris@82 1534 T4V = T4R - T4U;
Chris@82 1535 T4W = KP707106781 * (T4Q - T4V);
Chris@82 1536 T5O = KP707106781 * (T4V + T4Q);
Chris@82 1537 T53 = T4R + T4U;
Chris@82 1538 T54 = T4P - T4M;
Chris@82 1539 T55 = KP707106781 * (T53 - T54);
Chris@82 1540 T5R = KP707106781 * (T53 + T54);
Chris@82 1541 }
Chris@82 1542 }
Chris@82 1543 {
Chris@82 1544 E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B;
Chris@82 1545 {
Chris@82 1546 E T1j, T2a, T7C, T7J;
Chris@82 1547 T1j = TL + T1i;
Chris@82 1548 T2a = T1E + T29;
Chris@82 1549 T2b = T1j + T2a;
Chris@82 1550 T7x = T1j - T2a;
Chris@82 1551 T7C = T7f + T7e;
Chris@82 1552 T7J = T7D + T7I;
Chris@82 1553 T7K = T7C + T7J;
Chris@82 1554 T7M = T7J - T7C;
Chris@82 1555 }
Chris@82 1556 {
Chris@82 1557 E T2S, T3z, T7y, T7z;
Chris@82 1558 T2S = T2y + T2R;
Chris@82 1559 T3z = T3h + T3y;
Chris@82 1560 T3A = T2S + T3z;
Chris@82 1561 T7L = T3z - T2S;
Chris@82 1562 T7y = T7o + T7p;
Chris@82 1563 T7z = T7j + T7k;
Chris@82 1564 T7A = T7y - T7z;
Chris@82 1565 T7B = T7z + T7y;
Chris@82 1566 }
Chris@82 1567 ci[WS(rs, 15)] = T2b - T3A;
Chris@82 1568 cr[WS(rs, 24)] = T7L - T7M;
Chris@82 1569 ci[WS(rs, 23)] = T7L + T7M;
Chris@82 1570 cr[0] = T2b + T3A;
Chris@82 1571 cr[WS(rs, 8)] = T7x - T7A;
Chris@82 1572 cr[WS(rs, 16)] = T7B - T7K;
Chris@82 1573 ci[WS(rs, 31)] = T7B + T7K;
Chris@82 1574 ci[WS(rs, 7)] = T7x + T7A;
Chris@82 1575 }
Chris@82 1576 {
Chris@82 1577 E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T;
Chris@82 1578 E T5X, T5w, T89;
Chris@82 1579 T5w = KP707106781 * (T5u + T5v);
Chris@82 1580 T5x = T5t - T5w;
Chris@82 1581 T5Z = T5t + T5w;
Chris@82 1582 T89 = KP707106781 * (T3K - T3P);
Chris@82 1583 T8d = T89 + T8c;
Chris@82 1584 T8j = T8c - T89;
Chris@82 1585 {
Chris@82 1586 E T5A, T5D, T67, T68;
Chris@82 1587 T5A = FMA(KP923879532, T5y, KP382683432 * T5z);
Chris@82 1588 T5D = FNMS(KP923879532, T5C, KP382683432 * T5B);
Chris@82 1589 T5E = T5A + T5D;
Chris@82 1590 T88 = T5A - T5D;
Chris@82 1591 T67 = T5N + T5O;
Chris@82 1592 T68 = T5Q + T5R;
Chris@82 1593 T69 = FNMS(KP980785280, T68, KP195090322 * T67);
Chris@82 1594 T6d = FMA(KP980785280, T67, KP195090322 * T68);
Chris@82 1595 }
Chris@82 1596 {
Chris@82 1597 E T5I, T5L, T60, T61;
Chris@82 1598 T5I = T5G - T5H;
Chris@82 1599 T5L = T5J - T5K;
Chris@82 1600 T5M = FMA(KP831469612, T5I, KP555570233 * T5L);
Chris@82 1601 T5W = FNMS(KP831469612, T5L, KP555570233 * T5I);
Chris@82 1602 T60 = FNMS(KP382683432, T5y, KP923879532 * T5z);
Chris@82 1603 T61 = FMA(KP382683432, T5C, KP923879532 * T5B);
Chris@82 1604 T62 = T60 + T61;
Chris@82 1605 T8i = T61 - T60;
Chris@82 1606 }
Chris@82 1607 {
Chris@82 1608 E T64, T65, T5P, T5S;
Chris@82 1609 T64 = T5G + T5H;
Chris@82 1610 T65 = T5J + T5K;
Chris@82 1611 T66 = FMA(KP195090322, T64, KP980785280 * T65);
Chris@82 1612 T6c = FNMS(KP195090322, T65, KP980785280 * T64);
Chris@82 1613 T5P = T5N - T5O;
Chris@82 1614 T5S = T5Q - T5R;
Chris@82 1615 T5T = FNMS(KP555570233, T5S, KP831469612 * T5P);
Chris@82 1616 T5X = FMA(KP555570233, T5P, KP831469612 * T5S);
Chris@82 1617 }
Chris@82 1618 {
Chris@82 1619 E T5F, T5U, T8h, T8k;
Chris@82 1620 T5F = T5x + T5E;
Chris@82 1621 T5U = T5M + T5T;
Chris@82 1622 ci[WS(rs, 12)] = T5F - T5U;
Chris@82 1623 cr[WS(rs, 3)] = T5F + T5U;
Chris@82 1624 T8h = T5X - T5W;
Chris@82 1625 T8k = T8i + T8j;
Chris@82 1626 cr[WS(rs, 19)] = T8h - T8k;
Chris@82 1627 ci[WS(rs, 28)] = T8h + T8k;
Chris@82 1628 }
Chris@82 1629 {
Chris@82 1630 E T8l, T8m, T5V, T5Y;
Chris@82 1631 T8l = T5T - T5M;
Chris@82 1632 T8m = T8j - T8i;
Chris@82 1633 cr[WS(rs, 27)] = T8l - T8m;
Chris@82 1634 ci[WS(rs, 20)] = T8l + T8m;
Chris@82 1635 T5V = T5x - T5E;
Chris@82 1636 T5Y = T5W + T5X;
Chris@82 1637 cr[WS(rs, 11)] = T5V - T5Y;
Chris@82 1638 ci[WS(rs, 4)] = T5V + T5Y;
Chris@82 1639 }
Chris@82 1640 {
Chris@82 1641 E T63, T6a, T87, T8e;
Chris@82 1642 T63 = T5Z - T62;
Chris@82 1643 T6a = T66 + T69;
Chris@82 1644 ci[WS(rs, 8)] = T63 - T6a;
Chris@82 1645 cr[WS(rs, 7)] = T63 + T6a;
Chris@82 1646 T87 = T69 - T66;
Chris@82 1647 T8e = T88 + T8d;
Chris@82 1648 cr[WS(rs, 31)] = T87 - T8e;
Chris@82 1649 ci[WS(rs, 16)] = T87 + T8e;
Chris@82 1650 }
Chris@82 1651 {
Chris@82 1652 E T8f, T8g, T6b, T6e;
Chris@82 1653 T8f = T6d - T6c;
Chris@82 1654 T8g = T8d - T88;
Chris@82 1655 cr[WS(rs, 23)] = T8f - T8g;
Chris@82 1656 ci[WS(rs, 24)] = T8f + T8g;
Chris@82 1657 T6b = T5Z + T62;
Chris@82 1658 T6e = T6c + T6d;
Chris@82 1659 cr[WS(rs, 15)] = T6b - T6e;
Chris@82 1660 ci[0] = T6b + T6e;
Chris@82 1661 }
Chris@82 1662 }
Chris@82 1663 {
Chris@82 1664 E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v;
Chris@82 1665 {
Chris@82 1666 E T7d, T7g, T7O, T7P;
Chris@82 1667 T7d = TL - T1i;
Chris@82 1668 T7g = T7e - T7f;
Chris@82 1669 T7h = T7d - T7g;
Chris@82 1670 T7t = T7d + T7g;
Chris@82 1671 T7O = T1E - T29;
Chris@82 1672 T7P = T7I - T7D;
Chris@82 1673 T7Q = T7O + T7P;
Chris@82 1674 T7S = T7P - T7O;
Chris@82 1675 }
Chris@82 1676 {
Chris@82 1677 E T7i, T7l, T7n, T7q;
Chris@82 1678 T7i = T2y - T2R;
Chris@82 1679 T7l = T7j - T7k;
Chris@82 1680 T7m = T7i + T7l;
Chris@82 1681 T7u = T7i - T7l;
Chris@82 1682 T7n = T3h - T3y;
Chris@82 1683 T7q = T7o - T7p;
Chris@82 1684 T7r = T7n - T7q;
Chris@82 1685 T7v = T7n + T7q;
Chris@82 1686 }
Chris@82 1687 {
Chris@82 1688 E T7s, T7R, T7w, T7N;
Chris@82 1689 T7s = KP707106781 * (T7m + T7r);
Chris@82 1690 ci[WS(rs, 11)] = T7h - T7s;
Chris@82 1691 cr[WS(rs, 4)] = T7h + T7s;
Chris@82 1692 T7R = KP707106781 * (T7v - T7u);
Chris@82 1693 cr[WS(rs, 20)] = T7R - T7S;
Chris@82 1694 ci[WS(rs, 27)] = T7R + T7S;
Chris@82 1695 T7w = KP707106781 * (T7u + T7v);
Chris@82 1696 cr[WS(rs, 12)] = T7t - T7w;
Chris@82 1697 ci[WS(rs, 3)] = T7t + T7w;
Chris@82 1698 T7N = KP707106781 * (T7r - T7m);
Chris@82 1699 cr[WS(rs, 28)] = T7N - T7Q;
Chris@82 1700 ci[WS(rs, 19)] = T7N + T7Q;
Chris@82 1701 }
Chris@82 1702 }
Chris@82 1703 {
Chris@82 1704 E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R;
Chris@82 1705 E T6V;
Chris@82 1706 {
Chris@82 1707 E T6o, T6t, T6A, T6F;
Chris@82 1708 T6j = T6f - T6i;
Chris@82 1709 T7X = T7V + T7W;
Chris@82 1710 T83 = T7W - T7V;
Chris@82 1711 T6X = T6f + T6i;
Chris@82 1712 T6o = T6k + T6n;
Chris@82 1713 T6t = T6p - T6s;
Chris@82 1714 T6u = KP707106781 * (T6o + T6t);
Chris@82 1715 T7U = KP707106781 * (T6o - T6t);
Chris@82 1716 {
Chris@82 1717 E T75, T76, T6Y, T6Z;
Chris@82 1718 T75 = T6O + T6P;
Chris@82 1719 T76 = T6H + T6K;
Chris@82 1720 T77 = FMA(KP382683432, T75, KP923879532 * T76);
Chris@82 1721 T7b = FNMS(KP923879532, T75, KP382683432 * T76);
Chris@82 1722 T6Y = T6k - T6n;
Chris@82 1723 T6Z = T6p + T6s;
Chris@82 1724 T70 = KP707106781 * (T6Y + T6Z);
Chris@82 1725 T82 = KP707106781 * (T6Z - T6Y);
Chris@82 1726 }
Chris@82 1727 T6A = T6y - T6z;
Chris@82 1728 T6F = T6B - T6E;
Chris@82 1729 T6G = FMA(KP382683432, T6A, KP923879532 * T6F);
Chris@82 1730 T6U = FNMS(KP923879532, T6A, KP382683432 * T6F);
Chris@82 1731 {
Chris@82 1732 E T72, T73, T6L, T6Q;
Chris@82 1733 T72 = T6B + T6E;
Chris@82 1734 T73 = T6y + T6z;
Chris@82 1735 T74 = FNMS(KP382683432, T73, KP923879532 * T72);
Chris@82 1736 T7a = FMA(KP923879532, T73, KP382683432 * T72);
Chris@82 1737 T6L = T6H - T6K;
Chris@82 1738 T6Q = T6O - T6P;
Chris@82 1739 T6R = FNMS(KP382683432, T6Q, KP923879532 * T6L);
Chris@82 1740 T6V = FMA(KP923879532, T6Q, KP382683432 * T6L);
Chris@82 1741 }
Chris@82 1742 }
Chris@82 1743 {
Chris@82 1744 E T6v, T6S, T81, T84;
Chris@82 1745 T6v = T6j + T6u;
Chris@82 1746 T6S = T6G + T6R;
Chris@82 1747 ci[WS(rs, 13)] = T6v - T6S;
Chris@82 1748 cr[WS(rs, 2)] = T6v + T6S;
Chris@82 1749 T81 = T6V - T6U;
Chris@82 1750 T84 = T82 + T83;
Chris@82 1751 cr[WS(rs, 18)] = T81 - T84;
Chris@82 1752 ci[WS(rs, 29)] = T81 + T84;
Chris@82 1753 }
Chris@82 1754 {
Chris@82 1755 E T85, T86, T6T, T6W;
Chris@82 1756 T85 = T6R - T6G;
Chris@82 1757 T86 = T83 - T82;
Chris@82 1758 cr[WS(rs, 26)] = T85 - T86;
Chris@82 1759 ci[WS(rs, 21)] = T85 + T86;
Chris@82 1760 T6T = T6j - T6u;
Chris@82 1761 T6W = T6U + T6V;
Chris@82 1762 cr[WS(rs, 10)] = T6T - T6W;
Chris@82 1763 ci[WS(rs, 5)] = T6T + T6W;
Chris@82 1764 }
Chris@82 1765 {
Chris@82 1766 E T71, T78, T7T, T7Y;
Chris@82 1767 T71 = T6X + T70;
Chris@82 1768 T78 = T74 + T77;
Chris@82 1769 cr[WS(rs, 14)] = T71 - T78;
Chris@82 1770 ci[WS(rs, 1)] = T71 + T78;
Chris@82 1771 T7T = T7b - T7a;
Chris@82 1772 T7Y = T7U + T7X;
Chris@82 1773 cr[WS(rs, 30)] = T7T - T7Y;
Chris@82 1774 ci[WS(rs, 17)] = T7T + T7Y;
Chris@82 1775 }
Chris@82 1776 {
Chris@82 1777 E T7Z, T80, T79, T7c;
Chris@82 1778 T7Z = T77 - T74;
Chris@82 1779 T80 = T7X - T7U;
Chris@82 1780 cr[WS(rs, 22)] = T7Z - T80;
Chris@82 1781 ci[WS(rs, 25)] = T7Z + T80;
Chris@82 1782 T79 = T6X - T70;
Chris@82 1783 T7c = T7a + T7b;
Chris@82 1784 ci[WS(rs, 9)] = T79 - T7c;
Chris@82 1785 cr[WS(rs, 6)] = T79 + T7c;
Chris@82 1786 }
Chris@82 1787 }
Chris@82 1788 {
Chris@82 1789 E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57;
Chris@82 1790 E T5b, T3Q, T8p;
Chris@82 1791 T3Q = KP707106781 * (T3K + T3P);
Chris@82 1792 T3R = T3F - T3Q;
Chris@82 1793 T5d = T3F + T3Q;
Chris@82 1794 T8p = KP707106781 * (T5v - T5u);
Chris@82 1795 T8r = T8p + T8q;
Chris@82 1796 T8x = T8q - T8p;
Chris@82 1797 {
Chris@82 1798 E T42, T4d, T5l, T5m;
Chris@82 1799 T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
Chris@82 1800 T4d = FMA(KP923879532, T47, KP382683432 * T4c);
Chris@82 1801 T4e = T42 + T4d;
Chris@82 1802 T8o = T4d - T42;
Chris@82 1803 T5l = T52 + T55;
Chris@82 1804 T5m = T4L + T4W;
Chris@82 1805 T5n = FNMS(KP195090322, T5m, KP980785280 * T5l);
Chris@82 1806 T5r = FMA(KP980785280, T5m, KP195090322 * T5l);
Chris@82 1807 }
Chris@82 1808 {
Chris@82 1809 E T4w, T4F, T5e, T5f;
Chris@82 1810 T4w = T4k - T4v;
Chris@82 1811 T4F = T4B - T4E;
Chris@82 1812 T4G = FNMS(KP555570233, T4F, KP831469612 * T4w);
Chris@82 1813 T5a = FMA(KP831469612, T4F, KP555570233 * T4w);
Chris@82 1814 T5e = FMA(KP382683432, T41, KP923879532 * T3W);
Chris@82 1815 T5f = FNMS(KP382683432, T47, KP923879532 * T4c);
Chris@82 1816 T5g = T5e + T5f;
Chris@82 1817 T8w = T5e - T5f;
Chris@82 1818 }
Chris@82 1819 {
Chris@82 1820 E T5i, T5j, T4X, T56;
Chris@82 1821 T5i = T4B + T4E;
Chris@82 1822 T5j = T4k + T4v;
Chris@82 1823 T5k = FMA(KP195090322, T5i, KP980785280 * T5j);
Chris@82 1824 T5q = FNMS(KP980785280, T5i, KP195090322 * T5j);
Chris@82 1825 T4X = T4L - T4W;
Chris@82 1826 T56 = T52 - T55;
Chris@82 1827 T57 = FMA(KP555570233, T4X, KP831469612 * T56);
Chris@82 1828 T5b = FNMS(KP831469612, T4X, KP555570233 * T56);
Chris@82 1829 }
Chris@82 1830 {
Chris@82 1831 E T4f, T58, T8v, T8y;
Chris@82 1832 T4f = T3R + T4e;
Chris@82 1833 T58 = T4G + T57;
Chris@82 1834 cr[WS(rs, 13)] = T4f - T58;
Chris@82 1835 ci[WS(rs, 2)] = T4f + T58;
Chris@82 1836 T8v = T5b - T5a;
Chris@82 1837 T8y = T8w + T8x;
Chris@82 1838 cr[WS(rs, 29)] = T8v - T8y;
Chris@82 1839 ci[WS(rs, 18)] = T8v + T8y;
Chris@82 1840 }
Chris@82 1841 {
Chris@82 1842 E T8z, T8A, T59, T5c;
Chris@82 1843 T8z = T57 - T4G;
Chris@82 1844 T8A = T8x - T8w;
Chris@82 1845 cr[WS(rs, 21)] = T8z - T8A;
Chris@82 1846 ci[WS(rs, 26)] = T8z + T8A;
Chris@82 1847 T59 = T3R - T4e;
Chris@82 1848 T5c = T5a + T5b;
Chris@82 1849 ci[WS(rs, 10)] = T59 - T5c;
Chris@82 1850 cr[WS(rs, 5)] = T59 + T5c;
Chris@82 1851 }
Chris@82 1852 {
Chris@82 1853 E T5h, T5o, T8n, T8s;
Chris@82 1854 T5h = T5d + T5g;
Chris@82 1855 T5o = T5k + T5n;
Chris@82 1856 ci[WS(rs, 14)] = T5h - T5o;
Chris@82 1857 cr[WS(rs, 1)] = T5h + T5o;
Chris@82 1858 T8n = T5r - T5q;
Chris@82 1859 T8s = T8o + T8r;
Chris@82 1860 cr[WS(rs, 17)] = T8n - T8s;
Chris@82 1861 ci[WS(rs, 30)] = T8n + T8s;
Chris@82 1862 }
Chris@82 1863 {
Chris@82 1864 E T8t, T8u, T5p, T5s;
Chris@82 1865 T8t = T5n - T5k;
Chris@82 1866 T8u = T8r - T8o;
Chris@82 1867 cr[WS(rs, 25)] = T8t - T8u;
Chris@82 1868 ci[WS(rs, 22)] = T8t + T8u;
Chris@82 1869 T5p = T5d - T5g;
Chris@82 1870 T5s = T5q + T5r;
Chris@82 1871 cr[WS(rs, 9)] = T5p - T5s;
Chris@82 1872 ci[WS(rs, 6)] = T5p + T5s;
Chris@82 1873 }
Chris@82 1874 }
Chris@82 1875 }
Chris@82 1876 }
Chris@82 1877 }
Chris@82 1878 }
Chris@82 1879
Chris@82 1880 static const tw_instr twinstr[] = {
Chris@82 1881 {TW_CEXP, 1, 1},
Chris@82 1882 {TW_CEXP, 1, 3},
Chris@82 1883 {TW_CEXP, 1, 9},
Chris@82 1884 {TW_CEXP, 1, 27},
Chris@82 1885 {TW_NEXT, 1, 0}
Chris@82 1886 };
Chris@82 1887
Chris@82 1888 static const hc2hc_desc desc = { 32, "hf2_32", twinstr, &GENUS, {376, 168, 112, 0} };
Chris@82 1889
Chris@82 1890 void X(codelet_hf2_32) (planner *p) {
Chris@82 1891 X(khc2hc_register) (p, hf2_32, &desc);
Chris@82 1892 }
Chris@82 1893 #endif