annotate src/fftw-3.3.5/rdft/scalar/r2cf/hc2cf2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:48:34 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cf2_32 -include hc2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 488 FP additions, 350 FP multiplications,
Chris@42 32 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
Chris@42 33 * 181 stack variables, 7 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cf.h"
Chris@42 36
Chris@42 37 static void hc2cf2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 40 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 41 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 46 {
Chris@42 47 INT m;
Chris@42 48 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 49 E T9A, T9z;
Chris@42 50 {
Chris@42 51 E T2, T8, T3, T6, Te, Tr, T18, T4, Ta, Tz, T1n, T10, Ti, T5, Tc;
Chris@42 52 T2 = W[0];
Chris@42 53 T8 = W[4];
Chris@42 54 T3 = W[2];
Chris@42 55 T6 = W[3];
Chris@42 56 Te = W[6];
Chris@42 57 Tr = T2 * T8;
Chris@42 58 T18 = T3 * T8;
Chris@42 59 T4 = T2 * T3;
Chris@42 60 Ta = T2 * T6;
Chris@42 61 Tz = T3 * Te;
Chris@42 62 T1n = T8 * Te;
Chris@42 63 T10 = T2 * Te;
Chris@42 64 Ti = W[7];
Chris@42 65 T5 = W[1];
Chris@42 66 Tc = W[5];
Chris@42 67 {
Chris@42 68 E T34, T31, T2X, T2T, Tq, T46, T8H, T97, TH, T98, T4b, T8D, TZ, T7f, T4j;
Chris@42 69 E T6t, T1g, T7g, T4q, T6u, T1J, T7m, T6y, T4z, T7l, T8d, T6x, T4G, T2k, T7o;
Chris@42 70 E T7r, T8e, T6B, T4O, T6A, T4V, T7L, T3G, T6P, T61, T6M, T5E, T8n, T7J, T5s;
Chris@42 71 E T6I, T2N, T7A, T55, T6F, T7x, T8i, T5L, T62, T43, T7G, T5S, T63, T7O, T8o;
Chris@42 72 E T2U, T2R, T2V, T58, T3a, T5h, T2Y, T32, T35;
Chris@42 73 {
Chris@42 74 E T1K, T23, T1N, T26, T2b, T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J;
Chris@42 75 E T2F, Td, TP, T3r, T3n, T2w, T2s, T3Q, T3M, T1Z, T1V, T2g, T2c;
Chris@42 76 {
Chris@42 77 E T11, T1C, TM, Tb, TJ, T7, T1o, T19, T1w, T1F, T15, T1s, T1d, T1z, TW;
Chris@42 78 E TS, Ty, T48, TG, T4a;
Chris@42 79 {
Chris@42 80 E T1, TA, Ts, TE, Tw, Tn, Tj, T8G, Tk, To, T14;
Chris@42 81 T1 = Rp[0];
Chris@42 82 TA = FMA(T6, Ti, Tz);
Chris@42 83 T1K = FNMS(T6, Ti, Tz);
Chris@42 84 T14 = T2 * Ti;
Chris@42 85 {
Chris@42 86 E T1r, TD, T1c, Tv;
Chris@42 87 T1r = T8 * Ti;
Chris@42 88 TD = T3 * Ti;
Chris@42 89 T11 = FNMS(T5, Ti, T10);
Chris@42 90 T1C = FMA(T5, Ti, T10);
Chris@42 91 TM = FMA(T5, T3, Ta);
Chris@42 92 Tb = FNMS(T5, T3, Ta);
Chris@42 93 TJ = FNMS(T5, T6, T4);
Chris@42 94 T7 = FMA(T5, T6, T4);
Chris@42 95 T1o = FMA(Tc, Ti, T1n);
Chris@42 96 T23 = FMA(T6, Tc, T18);
Chris@42 97 T19 = FNMS(T6, Tc, T18);
Chris@42 98 T1w = FNMS(T5, Tc, Tr);
Chris@42 99 Ts = FMA(T5, Tc, Tr);
Chris@42 100 T1c = T3 * Tc;
Chris@42 101 Tv = T2 * Tc;
Chris@42 102 T1F = FNMS(T5, Te, T14);
Chris@42 103 T15 = FMA(T5, Te, T14);
Chris@42 104 T1s = FNMS(Tc, Te, T1r);
Chris@42 105 T1N = FMA(T6, Te, TD);
Chris@42 106 TE = FNMS(T6, Te, TD);
Chris@42 107 {
Chris@42 108 E T1T, T3i, T3e, T1Q;
Chris@42 109 T1T = TJ * Tc;
Chris@42 110 T3i = TJ * Ti;
Chris@42 111 T3e = TJ * Te;
Chris@42 112 T1Q = TJ * T8;
Chris@42 113 {
Chris@42 114 E Tg, T2I, T2E, T9;
Chris@42 115 Tg = T7 * Tc;
Chris@42 116 T2I = T7 * Ti;
Chris@42 117 T2E = T7 * Te;
Chris@42 118 T9 = T7 * T8;
Chris@42 119 {
Chris@42 120 E T3q, T3m, T2v, T2r;
Chris@42 121 T3q = T19 * Ti;
Chris@42 122 T3m = T19 * Te;
Chris@42 123 T2v = T1w * Ti;
Chris@42 124 T2r = T1w * Te;
Chris@42 125 {
Chris@42 126 E T2W, T2S, T3P, T3L;
Chris@42 127 T2W = T23 * Ti;
Chris@42 128 T2S = T23 * Te;
Chris@42 129 T3P = Ts * Ti;
Chris@42 130 T3L = Ts * Te;
Chris@42 131 T26 = FNMS(T6, T8, T1c);
Chris@42 132 T1d = FMA(T6, T8, T1c);
Chris@42 133 T1z = FMA(T5, T8, Tv);
Chris@42 134 Tw = FNMS(T5, T8, Tv);
Chris@42 135 T2b = FNMS(TM, T8, T1T);
Chris@42 136 T1U = FMA(TM, T8, T1T);
Chris@42 137 T3C = FNMS(TM, Te, T3i);
Chris@42 138 T3j = FMA(TM, Te, T3i);
Chris@42 139 T3z = FMA(TM, Ti, T3e);
Chris@42 140 T3f = FNMS(TM, Ti, T3e);
Chris@42 141 T1R = FNMS(TM, Tc, T1Q);
Chris@42 142 T29 = FMA(TM, Tc, T1Q);
Chris@42 143 TR = FNMS(Tb, T8, Tg);
Chris@42 144 Th = FMA(Tb, T8, Tg);
Chris@42 145 T34 = FMA(Tb, Te, T2I);
Chris@42 146 T2J = FNMS(Tb, Te, T2I);
Chris@42 147 T31 = FNMS(Tb, Ti, T2E);
Chris@42 148 T2F = FMA(Tb, Ti, T2E);
Chris@42 149 Td = FNMS(Tb, Tc, T9);
Chris@42 150 TP = FMA(Tb, Tc, T9);
Chris@42 151 T2X = FNMS(T26, Te, T2W);
Chris@42 152 T2T = FMA(T26, Ti, T2S);
Chris@42 153 T3r = FNMS(T1d, Te, T3q);
Chris@42 154 T3n = FMA(T1d, Ti, T3m);
Chris@42 155 T2w = FNMS(T1z, Te, T2v);
Chris@42 156 T2s = FMA(T1z, Ti, T2r);
Chris@42 157 T3Q = FNMS(Tw, Te, T3P);
Chris@42 158 T3M = FMA(Tw, Ti, T3L);
Chris@42 159 {
Chris@42 160 E T1Y, T1S, T2f, T2a;
Chris@42 161 T1Y = T1R * Ti;
Chris@42 162 T1S = T1R * Te;
Chris@42 163 T2f = T29 * Ti;
Chris@42 164 T2a = T29 * Te;
Chris@42 165 {
Chris@42 166 E Tm, Tf, TV, TQ;
Chris@42 167 Tm = Td * Ti;
Chris@42 168 Tf = Td * Te;
Chris@42 169 TV = TP * Ti;
Chris@42 170 TQ = TP * Te;
Chris@42 171 T1Z = FNMS(T1U, Te, T1Y);
Chris@42 172 T1V = FMA(T1U, Ti, T1S);
Chris@42 173 T2g = FNMS(T2b, Te, T2f);
Chris@42 174 T2c = FMA(T2b, Ti, T2a);
Chris@42 175 Tn = FNMS(Th, Te, Tm);
Chris@42 176 Tj = FMA(Th, Ti, Tf);
Chris@42 177 TW = FNMS(TR, Te, TV);
Chris@42 178 TS = FMA(TR, Ti, TQ);
Chris@42 179 T8G = Rm[0];
Chris@42 180 }
Chris@42 181 }
Chris@42 182 }
Chris@42 183 }
Chris@42 184 }
Chris@42 185 }
Chris@42 186 }
Chris@42 187 Tk = Rp[WS(rs, 8)];
Chris@42 188 To = Rm[WS(rs, 8)];
Chris@42 189 {
Chris@42 190 E Tt, Tx, Tu, T47, TB, TF, TC, T49;
Chris@42 191 {
Chris@42 192 E Tl, T8E, Tp, T8F;
Chris@42 193 Tt = Rp[WS(rs, 4)];
Chris@42 194 Tx = Rm[WS(rs, 4)];
Chris@42 195 Tl = Tj * Tk;
Chris@42 196 T8E = Tj * To;
Chris@42 197 Tu = Ts * Tt;
Chris@42 198 T47 = Ts * Tx;
Chris@42 199 Tp = FMA(Tn, To, Tl);
Chris@42 200 T8F = FNMS(Tn, Tk, T8E);
Chris@42 201 TB = Rp[WS(rs, 12)];
Chris@42 202 TF = Rm[WS(rs, 12)];
Chris@42 203 Tq = T1 + Tp;
Chris@42 204 T46 = T1 - Tp;
Chris@42 205 T8H = T8F + T8G;
Chris@42 206 T97 = T8G - T8F;
Chris@42 207 TC = TA * TB;
Chris@42 208 T49 = TA * TF;
Chris@42 209 }
Chris@42 210 Ty = FMA(Tw, Tx, Tu);
Chris@42 211 T48 = FNMS(Tw, Tt, T47);
Chris@42 212 TG = FMA(TE, TF, TC);
Chris@42 213 T4a = FNMS(TE, TB, T49);
Chris@42 214 }
Chris@42 215 }
Chris@42 216 {
Chris@42 217 E TT, TX, TO, T4f, TU, T4g;
Chris@42 218 {
Chris@42 219 E TK, TN, TL, T4e;
Chris@42 220 TK = Rp[WS(rs, 2)];
Chris@42 221 TN = Rm[WS(rs, 2)];
Chris@42 222 TH = Ty + TG;
Chris@42 223 T98 = Ty - TG;
Chris@42 224 T4b = T48 - T4a;
Chris@42 225 T8D = T48 + T4a;
Chris@42 226 TL = TJ * TK;
Chris@42 227 T4e = TJ * TN;
Chris@42 228 TT = Rp[WS(rs, 10)];
Chris@42 229 TX = Rm[WS(rs, 10)];
Chris@42 230 TO = FMA(TM, TN, TL);
Chris@42 231 T4f = FNMS(TM, TK, T4e);
Chris@42 232 TU = TS * TT;
Chris@42 233 T4g = TS * TX;
Chris@42 234 }
Chris@42 235 {
Chris@42 236 E T17, T4m, T1a, T1e, T4d, T4i;
Chris@42 237 {
Chris@42 238 E T12, T16, TY, T4h, T13, T4l;
Chris@42 239 T12 = Rp[WS(rs, 14)];
Chris@42 240 T16 = Rm[WS(rs, 14)];
Chris@42 241 TY = FMA(TW, TX, TU);
Chris@42 242 T4h = FNMS(TW, TT, T4g);
Chris@42 243 T13 = T11 * T12;
Chris@42 244 T4l = T11 * T16;
Chris@42 245 TZ = TO + TY;
Chris@42 246 T4d = TO - TY;
Chris@42 247 T7f = T4f + T4h;
Chris@42 248 T4i = T4f - T4h;
Chris@42 249 T17 = FMA(T15, T16, T13);
Chris@42 250 T4m = FNMS(T15, T12, T4l);
Chris@42 251 }
Chris@42 252 T4j = T4d + T4i;
Chris@42 253 T6t = T4i - T4d;
Chris@42 254 T1a = Rp[WS(rs, 6)];
Chris@42 255 T1e = Rm[WS(rs, 6)];
Chris@42 256 {
Chris@42 257 E T1m, T4B, T1H, T4x, T1x, T1A, T1u, T4D, T1y, T4u;
Chris@42 258 {
Chris@42 259 E T1D, T1G, T1E, T4w;
Chris@42 260 {
Chris@42 261 E T1f, T4o, T4k, T4p;
Chris@42 262 {
Chris@42 263 E T1j, T1l, T1b, T4n, T1k, T4A;
Chris@42 264 T1j = Rp[WS(rs, 1)];
Chris@42 265 T1l = Rm[WS(rs, 1)];
Chris@42 266 T1b = T19 * T1a;
Chris@42 267 T4n = T19 * T1e;
Chris@42 268 T1k = T7 * T1j;
Chris@42 269 T4A = T7 * T1l;
Chris@42 270 T1f = FMA(T1d, T1e, T1b);
Chris@42 271 T4o = FNMS(T1d, T1a, T4n);
Chris@42 272 T1m = FMA(Tb, T1l, T1k);
Chris@42 273 T4B = FNMS(Tb, T1j, T4A);
Chris@42 274 }
Chris@42 275 T1g = T17 + T1f;
Chris@42 276 T4k = T17 - T1f;
Chris@42 277 T7g = T4m + T4o;
Chris@42 278 T4p = T4m - T4o;
Chris@42 279 T1D = Rp[WS(rs, 13)];
Chris@42 280 T1G = Rm[WS(rs, 13)];
Chris@42 281 T4q = T4k - T4p;
Chris@42 282 T6u = T4k + T4p;
Chris@42 283 T1E = T1C * T1D;
Chris@42 284 T4w = T1C * T1G;
Chris@42 285 }
Chris@42 286 {
Chris@42 287 E T1p, T1t, T1q, T4C;
Chris@42 288 T1p = Rp[WS(rs, 9)];
Chris@42 289 T1t = Rm[WS(rs, 9)];
Chris@42 290 T1H = FMA(T1F, T1G, T1E);
Chris@42 291 T4x = FNMS(T1F, T1D, T4w);
Chris@42 292 T1q = T1o * T1p;
Chris@42 293 T4C = T1o * T1t;
Chris@42 294 T1x = Rp[WS(rs, 5)];
Chris@42 295 T1A = Rm[WS(rs, 5)];
Chris@42 296 T1u = FMA(T1s, T1t, T1q);
Chris@42 297 T4D = FNMS(T1s, T1p, T4C);
Chris@42 298 T1y = T1w * T1x;
Chris@42 299 T4u = T1w * T1A;
Chris@42 300 }
Chris@42 301 }
Chris@42 302 {
Chris@42 303 E T4t, T1v, T7j, T4E, T1B, T4v;
Chris@42 304 T4t = T1m - T1u;
Chris@42 305 T1v = T1m + T1u;
Chris@42 306 T7j = T4B + T4D;
Chris@42 307 T4E = T4B - T4D;
Chris@42 308 T1B = FMA(T1z, T1A, T1y);
Chris@42 309 T4v = FNMS(T1z, T1x, T4u);
Chris@42 310 {
Chris@42 311 E T4F, T1I, T4y, T7k;
Chris@42 312 T4F = T1B - T1H;
Chris@42 313 T1I = T1B + T1H;
Chris@42 314 T4y = T4v - T4x;
Chris@42 315 T7k = T4v + T4x;
Chris@42 316 T1J = T1v + T1I;
Chris@42 317 T7m = T1v - T1I;
Chris@42 318 T6y = T4t - T4y;
Chris@42 319 T4z = T4t + T4y;
Chris@42 320 T7l = T7j - T7k;
Chris@42 321 T8d = T7j + T7k;
Chris@42 322 T6x = T4E + T4F;
Chris@42 323 T4G = T4E - T4F;
Chris@42 324 }
Chris@42 325 }
Chris@42 326 }
Chris@42 327 }
Chris@42 328 }
Chris@42 329 }
Chris@42 330 {
Chris@42 331 E T5C, T3u, T5y, T7H, T5Z, T3F, T60, T5A, T4T, T4U;
Chris@42 332 {
Chris@42 333 E T1P, T4Q, T2i, T4M, T21, T4S, T28, T4K;
Chris@42 334 {
Chris@42 335 E T1L, T1O, T1W, T20;
Chris@42 336 T1L = Rp[WS(rs, 15)];
Chris@42 337 T1O = Rm[WS(rs, 15)];
Chris@42 338 {
Chris@42 339 E T2d, T2h, T1M, T4P, T2e, T4L;
Chris@42 340 T2d = Rp[WS(rs, 11)];
Chris@42 341 T2h = Rm[WS(rs, 11)];
Chris@42 342 T1M = T1K * T1L;
Chris@42 343 T4P = T1K * T1O;
Chris@42 344 T2e = T2c * T2d;
Chris@42 345 T4L = T2c * T2h;
Chris@42 346 T1P = FMA(T1N, T1O, T1M);
Chris@42 347 T4Q = FNMS(T1N, T1L, T4P);
Chris@42 348 T2i = FMA(T2g, T2h, T2e);
Chris@42 349 T4M = FNMS(T2g, T2d, T4L);
Chris@42 350 }
Chris@42 351 T1W = Rp[WS(rs, 7)];
Chris@42 352 T20 = Rm[WS(rs, 7)];
Chris@42 353 {
Chris@42 354 E T24, T27, T1X, T4R, T25, T4J;
Chris@42 355 T24 = Rp[WS(rs, 3)];
Chris@42 356 T27 = Rm[WS(rs, 3)];
Chris@42 357 T1X = T1V * T1W;
Chris@42 358 T4R = T1V * T20;
Chris@42 359 T25 = T23 * T24;
Chris@42 360 T4J = T23 * T27;
Chris@42 361 T21 = FMA(T1Z, T20, T1X);
Chris@42 362 T4S = FNMS(T1Z, T1W, T4R);
Chris@42 363 T28 = FMA(T26, T27, T25);
Chris@42 364 T4K = FNMS(T26, T24, T4J);
Chris@42 365 }
Chris@42 366 }
Chris@42 367 {
Chris@42 368 E T4I, T22, T7p, T2j, T7q, T4N;
Chris@42 369 T4I = T1P - T21;
Chris@42 370 T22 = T1P + T21;
Chris@42 371 T7p = T4Q + T4S;
Chris@42 372 T4T = T4Q - T4S;
Chris@42 373 T4U = T28 - T2i;
Chris@42 374 T2j = T28 + T2i;
Chris@42 375 T7q = T4K + T4M;
Chris@42 376 T4N = T4K - T4M;
Chris@42 377 T2k = T22 + T2j;
Chris@42 378 T7o = T22 - T2j;
Chris@42 379 T7r = T7p - T7q;
Chris@42 380 T8e = T7p + T7q;
Chris@42 381 T6B = T4I - T4N;
Chris@42 382 T4O = T4I + T4N;
Chris@42 383 }
Chris@42 384 }
Chris@42 385 {
Chris@42 386 E T3l, T5W, T3E, T3v, T3t, T3w, T3x, T5Y, T3A, T3B, T3D, T3y, T5z;
Chris@42 387 {
Chris@42 388 E T3g, T3k, T3h, T5V;
Chris@42 389 T3g = Ip[WS(rs, 15)];
Chris@42 390 T3k = Im[WS(rs, 15)];
Chris@42 391 T3A = Ip[WS(rs, 11)];
Chris@42 392 T6A = T4T + T4U;
Chris@42 393 T4V = T4T - T4U;
Chris@42 394 T3h = T3f * T3g;
Chris@42 395 T5V = T3f * T3k;
Chris@42 396 T3B = T3z * T3A;
Chris@42 397 T3D = Im[WS(rs, 11)];
Chris@42 398 T3l = FMA(T3j, T3k, T3h);
Chris@42 399 T5W = FNMS(T3j, T3g, T5V);
Chris@42 400 }
Chris@42 401 {
Chris@42 402 E T3o, T5B, T3s, T3p, T5X;
Chris@42 403 T3o = Ip[WS(rs, 7)];
Chris@42 404 T3E = FMA(T3C, T3D, T3B);
Chris@42 405 T5B = T3z * T3D;
Chris@42 406 T3s = Im[WS(rs, 7)];
Chris@42 407 T3p = T3n * T3o;
Chris@42 408 T3v = Ip[WS(rs, 3)];
Chris@42 409 T5C = FNMS(T3C, T3A, T5B);
Chris@42 410 T5X = T3n * T3s;
Chris@42 411 T3t = FMA(T3r, T3s, T3p);
Chris@42 412 T3w = TP * T3v;
Chris@42 413 T3x = Im[WS(rs, 3)];
Chris@42 414 T5Y = FNMS(T3r, T3o, T5X);
Chris@42 415 }
Chris@42 416 T3u = T3l + T3t;
Chris@42 417 T5y = T3l - T3t;
Chris@42 418 T3y = FMA(TR, T3x, T3w);
Chris@42 419 T5z = TP * T3x;
Chris@42 420 T7H = T5W + T5Y;
Chris@42 421 T5Z = T5W - T5Y;
Chris@42 422 T3F = T3y + T3E;
Chris@42 423 T60 = T3E - T3y;
Chris@42 424 T5A = FNMS(TR, T3v, T5z);
Chris@42 425 }
Chris@42 426 {
Chris@42 427 E T2t, T2q, T2u, T5n, T2L, T53, T2x, T2A, T2C;
Chris@42 428 {
Chris@42 429 E T2n, T2o, T2p, T2G, T2K, T5D, T7I, T5m, T2H, T52;
Chris@42 430 T2n = Ip[0];
Chris@42 431 T7L = T3u - T3F;
Chris@42 432 T3G = T3u + T3F;
Chris@42 433 T5D = T5A - T5C;
Chris@42 434 T7I = T5A + T5C;
Chris@42 435 T6P = T60 - T5Z;
Chris@42 436 T61 = T5Z + T60;
Chris@42 437 T6M = T5y - T5D;
Chris@42 438 T5E = T5y + T5D;
Chris@42 439 T8n = T7H + T7I;
Chris@42 440 T7J = T7H - T7I;
Chris@42 441 T2o = T2 * T2n;
Chris@42 442 T2p = Im[0];
Chris@42 443 T2G = Ip[WS(rs, 12)];
Chris@42 444 T2K = Im[WS(rs, 12)];
Chris@42 445 T2t = Ip[WS(rs, 8)];
Chris@42 446 T2q = FMA(T5, T2p, T2o);
Chris@42 447 T5m = T2 * T2p;
Chris@42 448 T2H = T2F * T2G;
Chris@42 449 T52 = T2F * T2K;
Chris@42 450 T2u = T2s * T2t;
Chris@42 451 T5n = FNMS(T5, T2n, T5m);
Chris@42 452 T2L = FMA(T2J, T2K, T2H);
Chris@42 453 T53 = FNMS(T2J, T2G, T52);
Chris@42 454 T2x = Im[WS(rs, 8)];
Chris@42 455 T2A = Ip[WS(rs, 4)];
Chris@42 456 T2C = Im[WS(rs, 4)];
Chris@42 457 }
Chris@42 458 {
Chris@42 459 E T3N, T3K, T3O, T5H, T41, T5Q, T3R, T3U, T3W;
Chris@42 460 {
Chris@42 461 E T3H, T3I, T3J, T3Y, T40, T5G, T3Z, T5P;
Chris@42 462 {
Chris@42 463 E T2z, T4Z, T5p, T2D, T51, T7v, T5q;
Chris@42 464 T3H = Ip[WS(rs, 1)];
Chris@42 465 {
Chris@42 466 E T2y, T5o, T2B, T50;
Chris@42 467 T2y = FMA(T2w, T2x, T2u);
Chris@42 468 T5o = T2s * T2x;
Chris@42 469 T2B = T8 * T2A;
Chris@42 470 T50 = T8 * T2C;
Chris@42 471 T2z = T2q + T2y;
Chris@42 472 T4Z = T2q - T2y;
Chris@42 473 T5p = FNMS(T2w, T2t, T5o);
Chris@42 474 T2D = FMA(Tc, T2C, T2B);
Chris@42 475 T51 = FNMS(Tc, T2A, T50);
Chris@42 476 T3I = T3 * T3H;
Chris@42 477 }
Chris@42 478 T7v = T5n + T5p;
Chris@42 479 T5q = T5n - T5p;
Chris@42 480 {
Chris@42 481 E T2M, T5r, T7w, T54;
Chris@42 482 T2M = T2D + T2L;
Chris@42 483 T5r = T2D - T2L;
Chris@42 484 T7w = T51 + T53;
Chris@42 485 T54 = T51 - T53;
Chris@42 486 T5s = T5q - T5r;
Chris@42 487 T6I = T5q + T5r;
Chris@42 488 T2N = T2z + T2M;
Chris@42 489 T7A = T2z - T2M;
Chris@42 490 T55 = T4Z + T54;
Chris@42 491 T6F = T4Z - T54;
Chris@42 492 T7x = T7v - T7w;
Chris@42 493 T8i = T7v + T7w;
Chris@42 494 T3J = Im[WS(rs, 1)];
Chris@42 495 }
Chris@42 496 }
Chris@42 497 T3Y = Ip[WS(rs, 5)];
Chris@42 498 T40 = Im[WS(rs, 5)];
Chris@42 499 T3N = Ip[WS(rs, 9)];
Chris@42 500 T3K = FMA(T6, T3J, T3I);
Chris@42 501 T5G = T3 * T3J;
Chris@42 502 T3Z = Td * T3Y;
Chris@42 503 T5P = Td * T40;
Chris@42 504 T3O = T3M * T3N;
Chris@42 505 T5H = FNMS(T6, T3H, T5G);
Chris@42 506 T41 = FMA(Th, T40, T3Z);
Chris@42 507 T5Q = FNMS(Th, T3Y, T5P);
Chris@42 508 T3R = Im[WS(rs, 9)];
Chris@42 509 T3U = Ip[WS(rs, 13)];
Chris@42 510 T3W = Im[WS(rs, 13)];
Chris@42 511 }
Chris@42 512 {
Chris@42 513 E T2O, T2P, T2Q, T37, T39, T57, T38, T5g;
Chris@42 514 {
Chris@42 515 E T3T, T5F, T5J, T3X, T5O, T7M, T5K;
Chris@42 516 T2O = Ip[WS(rs, 2)];
Chris@42 517 {
Chris@42 518 E T3S, T5I, T3V, T5N;
Chris@42 519 T3S = FMA(T3Q, T3R, T3O);
Chris@42 520 T5I = T3M * T3R;
Chris@42 521 T3V = Te * T3U;
Chris@42 522 T5N = Te * T3W;
Chris@42 523 T3T = T3K + T3S;
Chris@42 524 T5F = T3K - T3S;
Chris@42 525 T5J = FNMS(T3Q, T3N, T5I);
Chris@42 526 T3X = FMA(Ti, T3W, T3V);
Chris@42 527 T5O = FNMS(Ti, T3U, T5N);
Chris@42 528 T2P = T29 * T2O;
Chris@42 529 }
Chris@42 530 T7M = T5H + T5J;
Chris@42 531 T5K = T5H - T5J;
Chris@42 532 {
Chris@42 533 E T42, T5M, T7N, T5R;
Chris@42 534 T42 = T3X + T41;
Chris@42 535 T5M = T3X - T41;
Chris@42 536 T7N = T5O + T5Q;
Chris@42 537 T5R = T5O - T5Q;
Chris@42 538 T5L = T5F + T5K;
Chris@42 539 T62 = T5K - T5F;
Chris@42 540 T43 = T3T + T42;
Chris@42 541 T7G = T42 - T3T;
Chris@42 542 T5S = T5M - T5R;
Chris@42 543 T63 = T5M + T5R;
Chris@42 544 T7O = T7M - T7N;
Chris@42 545 T8o = T7M + T7N;
Chris@42 546 T2Q = Im[WS(rs, 2)];
Chris@42 547 }
Chris@42 548 }
Chris@42 549 T37 = Ip[WS(rs, 6)];
Chris@42 550 T39 = Im[WS(rs, 6)];
Chris@42 551 T2U = Ip[WS(rs, 10)];
Chris@42 552 T2R = FMA(T2b, T2Q, T2P);
Chris@42 553 T57 = T29 * T2Q;
Chris@42 554 T38 = T1R * T37;
Chris@42 555 T5g = T1R * T39;
Chris@42 556 T2V = T2T * T2U;
Chris@42 557 T58 = FNMS(T2b, T2O, T57);
Chris@42 558 T3a = FMA(T1U, T39, T38);
Chris@42 559 T5h = FNMS(T1U, T37, T5g);
Chris@42 560 T2Y = Im[WS(rs, 10)];
Chris@42 561 T32 = Ip[WS(rs, 14)];
Chris@42 562 T35 = Im[WS(rs, 14)];
Chris@42 563 }
Chris@42 564 }
Chris@42 565 }
Chris@42 566 }
Chris@42 567 }
Chris@42 568 {
Chris@42 569 E T5c, T5t, T5j, T5u, T88, T90, T8Z, T8b;
Chris@42 570 {
Chris@42 571 E T7e, T8T, T7y, T7D, T7h, T8U, T8S, T8R;
Chris@42 572 {
Chris@42 573 E T8c, T1i, T8A, T8z, T8O, T8J, T8N, T2l, T8L, T45, T8t, T8l, T8u, T8q, T3c;
Chris@42 574 E T8k, T8p, T8w, T2m;
Chris@42 575 {
Chris@42 576 E T8x, T8y, T8j, T8C, T8I;
Chris@42 577 {
Chris@42 578 E TI, T30, T56, T5a, T36, T5f, T1h, T7B, T5b;
Chris@42 579 TI = Tq + TH;
Chris@42 580 T7e = Tq - TH;
Chris@42 581 {
Chris@42 582 E T2Z, T59, T33, T5e;
Chris@42 583 T2Z = FMA(T2X, T2Y, T2V);
Chris@42 584 T59 = T2T * T2Y;
Chris@42 585 T33 = T31 * T32;
Chris@42 586 T5e = T31 * T35;
Chris@42 587 T30 = T2R + T2Z;
Chris@42 588 T56 = T2R - T2Z;
Chris@42 589 T5a = FNMS(T2X, T2U, T59);
Chris@42 590 T36 = FMA(T34, T35, T33);
Chris@42 591 T5f = FNMS(T34, T32, T5e);
Chris@42 592 T1h = TZ + T1g;
Chris@42 593 T8T = T1g - TZ;
Chris@42 594 }
Chris@42 595 T7B = T58 + T5a;
Chris@42 596 T5b = T58 - T5a;
Chris@42 597 {
Chris@42 598 E T3b, T5d, T7C, T5i;
Chris@42 599 T3b = T36 + T3a;
Chris@42 600 T5d = T36 - T3a;
Chris@42 601 T7C = T5f + T5h;
Chris@42 602 T5i = T5f - T5h;
Chris@42 603 T5c = T56 + T5b;
Chris@42 604 T5t = T5b - T56;
Chris@42 605 T3c = T30 + T3b;
Chris@42 606 T7y = T3b - T30;
Chris@42 607 T5j = T5d - T5i;
Chris@42 608 T5u = T5d + T5i;
Chris@42 609 T7D = T7B - T7C;
Chris@42 610 T8j = T7B + T7C;
Chris@42 611 T8c = TI - T1h;
Chris@42 612 T1i = TI + T1h;
Chris@42 613 }
Chris@42 614 }
Chris@42 615 T8k = T8i - T8j;
Chris@42 616 T8x = T8i + T8j;
Chris@42 617 T8y = T8n + T8o;
Chris@42 618 T8p = T8n - T8o;
Chris@42 619 T7h = T7f - T7g;
Chris@42 620 T8C = T7f + T7g;
Chris@42 621 T8I = T8D + T8H;
Chris@42 622 T8U = T8H - T8D;
Chris@42 623 T8A = T8x + T8y;
Chris@42 624 T8z = T8x - T8y;
Chris@42 625 T8O = T8I - T8C;
Chris@42 626 T8J = T8C + T8I;
Chris@42 627 }
Chris@42 628 {
Chris@42 629 E T8h, T8m, T3d, T44;
Chris@42 630 T8h = T2N - T3c;
Chris@42 631 T3d = T2N + T3c;
Chris@42 632 T44 = T3G + T43;
Chris@42 633 T8m = T3G - T43;
Chris@42 634 T8N = T2k - T1J;
Chris@42 635 T2l = T1J + T2k;
Chris@42 636 T8L = T44 - T3d;
Chris@42 637 T45 = T3d + T44;
Chris@42 638 T8t = T8k - T8h;
Chris@42 639 T8l = T8h + T8k;
Chris@42 640 T8u = T8m + T8p;
Chris@42 641 T8q = T8m - T8p;
Chris@42 642 }
Chris@42 643 T8w = T1i - T2l;
Chris@42 644 T2m = T1i + T2l;
Chris@42 645 {
Chris@42 646 E T8s, T8P, T8Q, T8v;
Chris@42 647 {
Chris@42 648 E T8r, T8M, T8K, T8g, T8B, T8f;
Chris@42 649 T8S = T8q - T8l;
Chris@42 650 T8r = T8l + T8q;
Chris@42 651 T8B = T8d + T8e;
Chris@42 652 T8f = T8d - T8e;
Chris@42 653 Rp[0] = T2m + T45;
Chris@42 654 Rm[WS(rs, 15)] = T2m - T45;
Chris@42 655 Rp[WS(rs, 8)] = T8w + T8z;
Chris@42 656 Rm[WS(rs, 7)] = T8w - T8z;
Chris@42 657 T8M = T8J - T8B;
Chris@42 658 T8K = T8B + T8J;
Chris@42 659 T8g = T8c + T8f;
Chris@42 660 T8s = T8c - T8f;
Chris@42 661 T8R = T8O - T8N;
Chris@42 662 T8P = T8N + T8O;
Chris@42 663 Ip[WS(rs, 8)] = T8L + T8M;
Chris@42 664 Im[WS(rs, 7)] = T8L - T8M;
Chris@42 665 Ip[0] = T8A + T8K;
Chris@42 666 Im[WS(rs, 15)] = T8A - T8K;
Chris@42 667 Rp[WS(rs, 4)] = FMA(KP707106781, T8r, T8g);
Chris@42 668 Rm[WS(rs, 11)] = FNMS(KP707106781, T8r, T8g);
Chris@42 669 T8Q = T8t + T8u;
Chris@42 670 T8v = T8t - T8u;
Chris@42 671 }
Chris@42 672 Ip[WS(rs, 4)] = FMA(KP707106781, T8Q, T8P);
Chris@42 673 Im[WS(rs, 11)] = FMS(KP707106781, T8Q, T8P);
Chris@42 674 Rp[WS(rs, 12)] = FMA(KP707106781, T8v, T8s);
Chris@42 675 Rm[WS(rs, 3)] = FNMS(KP707106781, T8v, T8s);
Chris@42 676 }
Chris@42 677 }
Chris@42 678 {
Chris@42 679 E T7P, T7W, T7i, T7K, T8a, T86, T91, T8V, T8W, T7t, T7T, T7F, T92, T7Z, T89;
Chris@42 680 E T83;
Chris@42 681 {
Chris@42 682 E T7X, T7n, T7s, T7Y, T84, T85;
Chris@42 683 T7P = T7L - T7O;
Chris@42 684 T84 = T7L + T7O;
Chris@42 685 Ip[WS(rs, 12)] = FMA(KP707106781, T8S, T8R);
Chris@42 686 Im[WS(rs, 3)] = FMS(KP707106781, T8S, T8R);
Chris@42 687 T7W = T7e + T7h;
Chris@42 688 T7i = T7e - T7h;
Chris@42 689 T85 = T7J + T7G;
Chris@42 690 T7K = T7G - T7J;
Chris@42 691 T7X = T7m + T7l;
Chris@42 692 T7n = T7l - T7m;
Chris@42 693 T8a = FMA(KP414213562, T84, T85);
Chris@42 694 T86 = FNMS(KP414213562, T85, T84);
Chris@42 695 T91 = T8U - T8T;
Chris@42 696 T8V = T8T + T8U;
Chris@42 697 T7s = T7o + T7r;
Chris@42 698 T7Y = T7o - T7r;
Chris@42 699 {
Chris@42 700 E T82, T81, T7z, T7E;
Chris@42 701 T82 = T7x + T7y;
Chris@42 702 T7z = T7x - T7y;
Chris@42 703 T7E = T7A - T7D;
Chris@42 704 T81 = T7A + T7D;
Chris@42 705 T8W = T7n + T7s;
Chris@42 706 T7t = T7n - T7s;
Chris@42 707 T7T = FNMS(KP414213562, T7z, T7E);
Chris@42 708 T7F = FMA(KP414213562, T7E, T7z);
Chris@42 709 T92 = T7Y - T7X;
Chris@42 710 T7Z = T7X + T7Y;
Chris@42 711 T89 = FNMS(KP414213562, T81, T82);
Chris@42 712 T83 = FMA(KP414213562, T82, T81);
Chris@42 713 }
Chris@42 714 }
Chris@42 715 {
Chris@42 716 E T7S, T7u, T93, T95, T7U, T7Q;
Chris@42 717 T7S = FNMS(KP707106781, T7t, T7i);
Chris@42 718 T7u = FMA(KP707106781, T7t, T7i);
Chris@42 719 T93 = FMA(KP707106781, T92, T91);
Chris@42 720 T95 = FNMS(KP707106781, T92, T91);
Chris@42 721 T7U = FNMS(KP414213562, T7K, T7P);
Chris@42 722 T7Q = FMA(KP414213562, T7P, T7K);
Chris@42 723 {
Chris@42 724 E T80, T87, T8X, T8Y;
Chris@42 725 T88 = FNMS(KP707106781, T7Z, T7W);
Chris@42 726 T80 = FMA(KP707106781, T7Z, T7W);
Chris@42 727 {
Chris@42 728 E T7V, T94, T96, T7R;
Chris@42 729 T7V = T7T + T7U;
Chris@42 730 T94 = T7U - T7T;
Chris@42 731 T96 = T7Q - T7F;
Chris@42 732 T7R = T7F + T7Q;
Chris@42 733 Rm[WS(rs, 1)] = FMA(KP923879532, T7V, T7S);
Chris@42 734 Rp[WS(rs, 14)] = FNMS(KP923879532, T7V, T7S);
Chris@42 735 Ip[WS(rs, 6)] = FMA(KP923879532, T94, T93);
Chris@42 736 Im[WS(rs, 9)] = FMS(KP923879532, T94, T93);
Chris@42 737 Ip[WS(rs, 14)] = FMA(KP923879532, T96, T95);
Chris@42 738 Im[WS(rs, 1)] = FMS(KP923879532, T96, T95);
Chris@42 739 Rp[WS(rs, 6)] = FMA(KP923879532, T7R, T7u);
Chris@42 740 Rm[WS(rs, 9)] = FNMS(KP923879532, T7R, T7u);
Chris@42 741 T87 = T83 + T86;
Chris@42 742 T90 = T86 - T83;
Chris@42 743 }
Chris@42 744 T8Z = FNMS(KP707106781, T8W, T8V);
Chris@42 745 T8X = FMA(KP707106781, T8W, T8V);
Chris@42 746 T8Y = T89 + T8a;
Chris@42 747 T8b = T89 - T8a;
Chris@42 748 Rp[WS(rs, 2)] = FMA(KP923879532, T87, T80);
Chris@42 749 Rm[WS(rs, 13)] = FNMS(KP923879532, T87, T80);
Chris@42 750 Ip[WS(rs, 2)] = FMA(KP923879532, T8Y, T8X);
Chris@42 751 Im[WS(rs, 13)] = FMS(KP923879532, T8Y, T8X);
Chris@42 752 }
Chris@42 753 }
Chris@42 754 }
Chris@42 755 }
Chris@42 756 {
Chris@42 757 E T6s, T9o, T9n, T6v, T6Q, T6N, T6J, T6G, T9k, T9j;
Chris@42 758 {
Chris@42 759 E T6c, T4s, T9i, T4X, T9h, T9b, T9c, T6f, T5U, T6k, T64, T5k, T5v;
Chris@42 760 {
Chris@42 761 E T6d, T6e, T99, T9a, T5T;
Chris@42 762 {
Chris@42 763 E T4c, T4r, T4H, T4W;
Chris@42 764 T6s = T46 - T4b;
Chris@42 765 T4c = T46 + T4b;
Chris@42 766 Rp[WS(rs, 10)] = FMA(KP923879532, T8b, T88);
Chris@42 767 Rm[WS(rs, 5)] = FNMS(KP923879532, T8b, T88);
Chris@42 768 Ip[WS(rs, 10)] = FMA(KP923879532, T90, T8Z);
Chris@42 769 Im[WS(rs, 5)] = FMS(KP923879532, T90, T8Z);
Chris@42 770 T4r = T4j + T4q;
Chris@42 771 T9o = T4q - T4j;
Chris@42 772 T6d = FNMS(KP414213562, T4z, T4G);
Chris@42 773 T4H = FMA(KP414213562, T4G, T4z);
Chris@42 774 T4W = FNMS(KP414213562, T4V, T4O);
Chris@42 775 T6e = FMA(KP414213562, T4O, T4V);
Chris@42 776 T9n = T98 + T97;
Chris@42 777 T99 = T97 - T98;
Chris@42 778 T6c = FNMS(KP707106781, T4r, T4c);
Chris@42 779 T4s = FMA(KP707106781, T4r, T4c);
Chris@42 780 T9i = T4W - T4H;
Chris@42 781 T4X = T4H + T4W;
Chris@42 782 T9a = T6t + T6u;
Chris@42 783 T6v = T6t - T6u;
Chris@42 784 }
Chris@42 785 T6Q = T5S - T5L;
Chris@42 786 T5T = T5L + T5S;
Chris@42 787 T9h = FNMS(KP707106781, T9a, T99);
Chris@42 788 T9b = FMA(KP707106781, T9a, T99);
Chris@42 789 T9c = T6d + T6e;
Chris@42 790 T6f = T6d - T6e;
Chris@42 791 T5U = FMA(KP707106781, T5T, T5E);
Chris@42 792 T6k = FNMS(KP707106781, T5T, T5E);
Chris@42 793 T64 = T62 + T63;
Chris@42 794 T6N = T63 - T62;
Chris@42 795 T6J = T5c - T5j;
Chris@42 796 T5k = T5c + T5j;
Chris@42 797 T5v = T5t + T5u;
Chris@42 798 T6G = T5u - T5t;
Chris@42 799 }
Chris@42 800 {
Chris@42 801 E T6m, T6q, T6j, T6p, T9f, T9g;
Chris@42 802 {
Chris@42 803 E T68, T4Y, T6a, T66, T69, T5x, T9d, T6l, T65, T9e, T6b, T67;
Chris@42 804 T68 = FNMS(KP923879532, T4X, T4s);
Chris@42 805 T4Y = FMA(KP923879532, T4X, T4s);
Chris@42 806 T6l = FNMS(KP707106781, T64, T61);
Chris@42 807 T65 = FMA(KP707106781, T64, T61);
Chris@42 808 {
Chris@42 809 E T6h, T5l, T6i, T5w;
Chris@42 810 T6h = FNMS(KP707106781, T5k, T55);
Chris@42 811 T5l = FMA(KP707106781, T5k, T55);
Chris@42 812 T6i = FNMS(KP707106781, T5v, T5s);
Chris@42 813 T5w = FMA(KP707106781, T5v, T5s);
Chris@42 814 T6m = FMA(KP668178637, T6l, T6k);
Chris@42 815 T6q = FNMS(KP668178637, T6k, T6l);
Chris@42 816 T6a = FMA(KP198912367, T5U, T65);
Chris@42 817 T66 = FNMS(KP198912367, T65, T5U);
Chris@42 818 T6j = FNMS(KP668178637, T6i, T6h);
Chris@42 819 T6p = FMA(KP668178637, T6h, T6i);
Chris@42 820 T69 = FNMS(KP198912367, T5l, T5w);
Chris@42 821 T5x = FMA(KP198912367, T5w, T5l);
Chris@42 822 }
Chris@42 823 T9d = FMA(KP923879532, T9c, T9b);
Chris@42 824 T9f = FNMS(KP923879532, T9c, T9b);
Chris@42 825 T9e = T69 + T6a;
Chris@42 826 T6b = T69 - T6a;
Chris@42 827 T9g = T66 - T5x;
Chris@42 828 T67 = T5x + T66;
Chris@42 829 Ip[WS(rs, 1)] = FMA(KP980785280, T9e, T9d);
Chris@42 830 Im[WS(rs, 14)] = FMS(KP980785280, T9e, T9d);
Chris@42 831 Rp[WS(rs, 1)] = FMA(KP980785280, T67, T4Y);
Chris@42 832 Rm[WS(rs, 14)] = FNMS(KP980785280, T67, T4Y);
Chris@42 833 Rp[WS(rs, 9)] = FMA(KP980785280, T6b, T68);
Chris@42 834 Rm[WS(rs, 6)] = FNMS(KP980785280, T6b, T68);
Chris@42 835 }
Chris@42 836 {
Chris@42 837 E T6o, T9l, T9m, T6r, T6g, T6n;
Chris@42 838 T6o = FMA(KP923879532, T6f, T6c);
Chris@42 839 T6g = FNMS(KP923879532, T6f, T6c);
Chris@42 840 T6n = T6j + T6m;
Chris@42 841 T9k = T6m - T6j;
Chris@42 842 T9j = FMA(KP923879532, T9i, T9h);
Chris@42 843 T9l = FNMS(KP923879532, T9i, T9h);
Chris@42 844 Ip[WS(rs, 9)] = FMA(KP980785280, T9g, T9f);
Chris@42 845 Im[WS(rs, 6)] = FMS(KP980785280, T9g, T9f);
Chris@42 846 Rm[WS(rs, 2)] = FMA(KP831469612, T6n, T6g);
Chris@42 847 Rp[WS(rs, 13)] = FNMS(KP831469612, T6n, T6g);
Chris@42 848 T9m = T6p + T6q;
Chris@42 849 T6r = T6p - T6q;
Chris@42 850 Ip[WS(rs, 13)] = FNMS(KP831469612, T9m, T9l);
Chris@42 851 Im[WS(rs, 2)] = -(FMA(KP831469612, T9m, T9l));
Chris@42 852 Rp[WS(rs, 5)] = FMA(KP831469612, T6r, T6o);
Chris@42 853 Rm[WS(rs, 10)] = FNMS(KP831469612, T6r, T6o);
Chris@42 854 }
Chris@42 855 }
Chris@42 856 }
Chris@42 857 {
Chris@42 858 E T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T6H, T74, T78, T7c, T6W, T6S;
Chris@42 859 {
Chris@42 860 E T6Z, T6z, T6C, T70;
Chris@42 861 T6Z = FNMS(KP414213562, T6x, T6y);
Chris@42 862 T6z = FMA(KP414213562, T6y, T6x);
Chris@42 863 Ip[WS(rs, 5)] = FMA(KP831469612, T9k, T9j);
Chris@42 864 Im[WS(rs, 10)] = FMS(KP831469612, T9k, T9j);
Chris@42 865 T6Y = FNMS(KP707106781, T6v, T6s);
Chris@42 866 T6w = FMA(KP707106781, T6v, T6s);
Chris@42 867 T6C = FNMS(KP414213562, T6B, T6A);
Chris@42 868 T70 = FMA(KP414213562, T6A, T6B);
Chris@42 869 T9w = T6z + T6C;
Chris@42 870 T6D = T6z - T6C;
Chris@42 871 T9v = FNMS(KP707106781, T9o, T9n);
Chris@42 872 T9p = FMA(KP707106781, T9o, T9n);
Chris@42 873 {
Chris@42 874 E T77, T6O, T76, T6R;
Chris@42 875 T9q = T70 - T6Z;
Chris@42 876 T71 = T6Z + T70;
Chris@42 877 T77 = FMA(KP707106781, T6N, T6M);
Chris@42 878 T6O = FNMS(KP707106781, T6N, T6M);
Chris@42 879 T76 = FMA(KP707106781, T6Q, T6P);
Chris@42 880 T6R = FNMS(KP707106781, T6Q, T6P);
Chris@42 881 T6H = FNMS(KP707106781, T6G, T6F);
Chris@42 882 T74 = FMA(KP707106781, T6G, T6F);
Chris@42 883 T78 = FMA(KP198912367, T77, T76);
Chris@42 884 T7c = FNMS(KP198912367, T76, T77);
Chris@42 885 T6W = FNMS(KP668178637, T6O, T6R);
Chris@42 886 T6S = FMA(KP668178637, T6R, T6O);
Chris@42 887 }
Chris@42 888 }
Chris@42 889 {
Chris@42 890 E T6U, T6E, T9r, T9t, T73, T6K;
Chris@42 891 T6U = FNMS(KP923879532, T6D, T6w);
Chris@42 892 T6E = FMA(KP923879532, T6D, T6w);
Chris@42 893 T9r = FMA(KP923879532, T9q, T9p);
Chris@42 894 T9t = FNMS(KP923879532, T9q, T9p);
Chris@42 895 T73 = FMA(KP707106781, T6J, T6I);
Chris@42 896 T6K = FNMS(KP707106781, T6J, T6I);
Chris@42 897 {
Chris@42 898 E T7a, T9x, T9y, T7d;
Chris@42 899 {
Chris@42 900 E T72, T7b, T6V, T6L, T79, T75;
Chris@42 901 T7a = FMA(KP923879532, T71, T6Y);
Chris@42 902 T72 = FNMS(KP923879532, T71, T6Y);
Chris@42 903 T75 = FMA(KP198912367, T74, T73);
Chris@42 904 T7b = FNMS(KP198912367, T73, T74);
Chris@42 905 T6V = FNMS(KP668178637, T6H, T6K);
Chris@42 906 T6L = FMA(KP668178637, T6K, T6H);
Chris@42 907 T79 = T75 + T78;
Chris@42 908 T9A = T78 - T75;
Chris@42 909 T9z = FMA(KP923879532, T9w, T9v);
Chris@42 910 T9x = FNMS(KP923879532, T9w, T9v);
Chris@42 911 {
Chris@42 912 E T6X, T9s, T9u, T6T;
Chris@42 913 T6X = T6V + T6W;
Chris@42 914 T9s = T6V - T6W;
Chris@42 915 T9u = T6S - T6L;
Chris@42 916 T6T = T6L + T6S;
Chris@42 917 Rp[WS(rs, 7)] = FMA(KP980785280, T79, T72);
Chris@42 918 Rm[WS(rs, 8)] = FNMS(KP980785280, T79, T72);
Chris@42 919 Rp[WS(rs, 11)] = FMA(KP831469612, T6X, T6U);
Chris@42 920 Rm[WS(rs, 4)] = FNMS(KP831469612, T6X, T6U);
Chris@42 921 Ip[WS(rs, 3)] = FMA(KP831469612, T9s, T9r);
Chris@42 922 Im[WS(rs, 12)] = FMS(KP831469612, T9s, T9r);
Chris@42 923 Ip[WS(rs, 11)] = FMA(KP831469612, T9u, T9t);
Chris@42 924 Im[WS(rs, 4)] = FMS(KP831469612, T9u, T9t);
Chris@42 925 Rp[WS(rs, 3)] = FMA(KP831469612, T6T, T6E);
Chris@42 926 Rm[WS(rs, 12)] = FNMS(KP831469612, T6T, T6E);
Chris@42 927 T9y = T7c - T7b;
Chris@42 928 T7d = T7b + T7c;
Chris@42 929 }
Chris@42 930 }
Chris@42 931 Ip[WS(rs, 7)] = FMA(KP980785280, T9y, T9x);
Chris@42 932 Im[WS(rs, 8)] = FMS(KP980785280, T9y, T9x);
Chris@42 933 Rm[0] = FMA(KP980785280, T7d, T7a);
Chris@42 934 Rp[WS(rs, 15)] = FNMS(KP980785280, T7d, T7a);
Chris@42 935 }
Chris@42 936 }
Chris@42 937 }
Chris@42 938 }
Chris@42 939 }
Chris@42 940 }
Chris@42 941 }
Chris@42 942 Ip[WS(rs, 15)] = FMA(KP980785280, T9A, T9z);
Chris@42 943 Im[0] = FMS(KP980785280, T9A, T9z);
Chris@42 944 }
Chris@42 945 }
Chris@42 946 }
Chris@42 947
Chris@42 948 static const tw_instr twinstr[] = {
Chris@42 949 {TW_CEXP, 1, 1},
Chris@42 950 {TW_CEXP, 1, 3},
Chris@42 951 {TW_CEXP, 1, 9},
Chris@42 952 {TW_CEXP, 1, 27},
Chris@42 953 {TW_NEXT, 1, 0}
Chris@42 954 };
Chris@42 955
Chris@42 956 static const hc2c_desc desc = { 32, "hc2cf2_32", twinstr, &GENUS, {236, 98, 252, 0} };
Chris@42 957
Chris@42 958 void X(codelet_hc2cf2_32) (planner *p) {
Chris@42 959 X(khc2c_register) (p, hc2cf2_32, &desc, HC2C_VIA_RDFT);
Chris@42 960 }
Chris@42 961 #else /* HAVE_FMA */
Chris@42 962
Chris@42 963 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cf2_32 -include hc2cf.h */
Chris@42 964
Chris@42 965 /*
Chris@42 966 * This function contains 488 FP additions, 280 FP multiplications,
Chris@42 967 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
Chris@42 968 * 158 stack variables, 7 constants, and 128 memory accesses
Chris@42 969 */
Chris@42 970 #include "hc2cf.h"
Chris@42 971
Chris@42 972 static void hc2cf2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 973 {
Chris@42 974 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 975 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 976 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 977 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 978 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 979 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 980 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 981 {
Chris@42 982 INT m;
Chris@42 983 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 984 E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y;
Chris@42 985 E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d;
Chris@42 986 E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C;
Chris@42 987 E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25;
Chris@42 988 E T1S, T23;
Chris@42 989 {
Chris@42 990 E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF;
Chris@42 991 E T10;
Chris@42 992 {
Chris@42 993 E T4, Tc, T7, Tb;
Chris@42 994 T2 = W[0];
Chris@42 995 T5 = W[1];
Chris@42 996 T3 = W[2];
Chris@42 997 T6 = W[3];
Chris@42 998 T4 = T2 * T3;
Chris@42 999 Tc = T5 * T3;
Chris@42 1000 T7 = T5 * T6;
Chris@42 1001 Tb = T2 * T6;
Chris@42 1002 T8 = T4 + T7;
Chris@42 1003 TM = T4 - T7;
Chris@42 1004 TO = Tb + Tc;
Chris@42 1005 Td = Tb - Tc;
Chris@42 1006 T9 = W[4];
Chris@42 1007 Ts = T2 * T9;
Chris@42 1008 T1d = T6 * T9;
Chris@42 1009 Tx = T5 * T9;
Chris@42 1010 T18 = T3 * T9;
Chris@42 1011 Te = W[5];
Chris@42 1012 Tt = T5 * Te;
Chris@42 1013 T1c = T3 * Te;
Chris@42 1014 Tw = T2 * Te;
Chris@42 1015 T19 = T6 * Te;
Chris@42 1016 Th = W[6];
Chris@42 1017 TB = T3 * Th;
Chris@42 1018 T14 = T5 * Th;
Chris@42 1019 TG = T6 * Th;
Chris@42 1020 TZ = T2 * Th;
Chris@42 1021 Tl = W[7];
Chris@42 1022 TC = T6 * Tl;
Chris@42 1023 T13 = T2 * Tl;
Chris@42 1024 TF = T3 * Tl;
Chris@42 1025 T10 = T5 * Tl;
Chris@42 1026 }
Chris@42 1027 TD = TB + TC;
Chris@42 1028 TH = TF - TG;
Chris@42 1029 T1y = TZ + T10;
Chris@42 1030 T1H = TF + TG;
Chris@42 1031 T15 = T13 + T14;
Chris@42 1032 T1A = T13 - T14;
Chris@42 1033 T11 = TZ - T10;
Chris@42 1034 T1F = TB - TC;
Chris@42 1035 T1n = FMA(T9, Th, Te * Tl);
Chris@42 1036 T1p = FNMS(Te, Th, T9 * Tl);
Chris@42 1037 {
Chris@42 1038 E T2o, T2p, T2s, T2t;
Chris@42 1039 T2o = T8 * Th;
Chris@42 1040 T2p = Td * Tl;
Chris@42 1041 T2q = T2o + T2p;
Chris@42 1042 T2I = T2o - T2p;
Chris@42 1043 T2s = T8 * Tl;
Chris@42 1044 T2t = Td * Th;
Chris@42 1045 T2u = T2s - T2t;
Chris@42 1046 T2K = T2s + T2t;
Chris@42 1047 }
Chris@42 1048 {
Chris@42 1049 E T2T, T2U, T2X, T2Y;
Chris@42 1050 T2T = TM * Th;
Chris@42 1051 T2U = TO * Tl;
Chris@42 1052 T2V = T2T - T2U;
Chris@42 1053 T3b = T2T + T2U;
Chris@42 1054 T2X = TM * Tl;
Chris@42 1055 T2Y = TO * Th;
Chris@42 1056 T2Z = T2X + T2Y;
Chris@42 1057 T3d = T2X - T2Y;
Chris@42 1058 Tu = Ts + Tt;
Chris@42 1059 Ty = Tw - Tx;
Chris@42 1060 T3l = FMA(Tu, Th, Ty * Tl);
Chris@42 1061 T3n = FNMS(Ty, Th, Tu * Tl);
Chris@42 1062 }
Chris@42 1063 T1t = Ts - Tt;
Chris@42 1064 T1v = Tw + Tx;
Chris@42 1065 T2f = FMA(T1t, Th, T1v * Tl);
Chris@42 1066 T2h = FNMS(T1v, Th, T1t * Tl);
Chris@42 1067 T1a = T18 - T19;
Chris@42 1068 T1e = T1c + T1d;
Chris@42 1069 T32 = FMA(T1a, Th, T1e * Tl);
Chris@42 1070 T34 = FNMS(T1e, Th, T1a * Tl);
Chris@42 1071 T1W = T18 + T19;
Chris@42 1072 T1Y = T1c - T1d;
Chris@42 1073 T2C = FMA(T1W, Th, T1Y * Tl);
Chris@42 1074 T2E = FNMS(T1Y, Th, T1W * Tl);
Chris@42 1075 {
Chris@42 1076 E Ta, Tf, Ti, Tj;
Chris@42 1077 Ta = T8 * T9;
Chris@42 1078 Tf = Td * Te;
Chris@42 1079 Tg = Ta - Tf;
Chris@42 1080 TR = Ta + Tf;
Chris@42 1081 Ti = T8 * Te;
Chris@42 1082 Tj = Td * T9;
Chris@42 1083 Tk = Ti + Tj;
Chris@42 1084 TS = Ti - Tj;
Chris@42 1085 }
Chris@42 1086 Tm = FMA(Tg, Th, Tk * Tl);
Chris@42 1087 TV = FNMS(TS, Th, TR * Tl);
Chris@42 1088 To = FNMS(Tk, Th, Tg * Tl);
Chris@42 1089 TT = FMA(TR, Th, TS * Tl);
Chris@42 1090 {
Chris@42 1091 E T1K, T1L, T1N, T1O;
Chris@42 1092 T1K = TM * T9;
Chris@42 1093 T1L = TO * Te;
Chris@42 1094 T1M = T1K - T1L;
Chris@42 1095 T21 = T1K + T1L;
Chris@42 1096 T1N = TM * Te;
Chris@42 1097 T1O = TO * T9;
Chris@42 1098 T1P = T1N + T1O;
Chris@42 1099 T22 = T1N - T1O;
Chris@42 1100 }
Chris@42 1101 T1Q = FMA(T1M, Th, T1P * Tl);
Chris@42 1102 T25 = FNMS(T22, Th, T21 * Tl);
Chris@42 1103 T1S = FNMS(T1P, Th, T1M * Tl);
Chris@42 1104 T23 = FMA(T21, Th, T22 * Tl);
Chris@42 1105 }
Chris@42 1106 {
Chris@42 1107 E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B;
Chris@42 1108 E T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u;
Chris@42 1109 E T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47;
Chris@42 1110 E T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p;
Chris@42 1111 E T4W, T5R, T55, T5O;
Chris@42 1112 {
Chris@42 1113 E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp;
Chris@42 1114 T1 = Rp[0];
Chris@42 1115 T7G = Rm[0];
Chris@42 1116 Tn = Rp[WS(rs, 8)];
Chris@42 1117 Tp = Rm[WS(rs, 8)];
Chris@42 1118 Tq = FMA(Tm, Tn, To * Tp);
Chris@42 1119 T7F = FNMS(To, Tn, Tm * Tp);
Chris@42 1120 {
Chris@42 1121 E Tv, Tz, TE, TI;
Chris@42 1122 Tv = Rp[WS(rs, 4)];
Chris@42 1123 Tz = Rm[WS(rs, 4)];
Chris@42 1124 TA = FMA(Tu, Tv, Ty * Tz);
Chris@42 1125 T3C = FNMS(Ty, Tv, Tu * Tz);
Chris@42 1126 TE = Rp[WS(rs, 12)];
Chris@42 1127 TI = Rm[WS(rs, 12)];
Chris@42 1128 TJ = FMA(TD, TE, TH * TI);
Chris@42 1129 T3D = FNMS(TH, TE, TD * TI);
Chris@42 1130 }
Chris@42 1131 {
Chris@42 1132 E Tr, TK, T8a, T8b;
Chris@42 1133 Tr = T1 + Tq;
Chris@42 1134 TK = TA + TJ;
Chris@42 1135 TL = Tr + TK;
Chris@42 1136 T6f = Tr - TK;
Chris@42 1137 T8a = T7G - T7F;
Chris@42 1138 T8b = TA - TJ;
Chris@42 1139 T8c = T8a - T8b;
Chris@42 1140 T8q = T8b + T8a;
Chris@42 1141 }
Chris@42 1142 {
Chris@42 1143 E T3B, T3E, T7E, T7H;
Chris@42 1144 T3B = T1 - Tq;
Chris@42 1145 T3E = T3C - T3D;
Chris@42 1146 T3F = T3B - T3E;
Chris@42 1147 T5t = T3B + T3E;
Chris@42 1148 T7E = T3C + T3D;
Chris@42 1149 T7H = T7F + T7G;
Chris@42 1150 T7I = T7E + T7H;
Chris@42 1151 T7W = T7H - T7E;
Chris@42 1152 }
Chris@42 1153 }
Chris@42 1154 {
Chris@42 1155 E T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y;
Chris@42 1156 {
Chris@42 1157 E T2c, T2d, T2r, T2v;
Chris@42 1158 T2c = Ip[0];
Chris@42 1159 T2d = Im[0];
Chris@42 1160 T2e = FMA(T2, T2c, T5 * T2d);
Chris@42 1161 T4g = FNMS(T5, T2c, T2 * T2d);
Chris@42 1162 T2r = Ip[WS(rs, 12)];
Chris@42 1163 T2v = Im[WS(rs, 12)];
Chris@42 1164 T2w = FMA(T2q, T2r, T2u * T2v);
Chris@42 1165 T4z = FNMS(T2u, T2r, T2q * T2v);
Chris@42 1166 }
Chris@42 1167 {
Chris@42 1168 E T2g, T2i, T2l, T2m;
Chris@42 1169 T2g = Ip[WS(rs, 8)];
Chris@42 1170 T2i = Im[WS(rs, 8)];
Chris@42 1171 T2j = FMA(T2f, T2g, T2h * T2i);
Chris@42 1172 T4h = FNMS(T2h, T2g, T2f * T2i);
Chris@42 1173 T2l = Ip[WS(rs, 4)];
Chris@42 1174 T2m = Im[WS(rs, 4)];
Chris@42 1175 T2n = FMA(T9, T2l, Te * T2m);
Chris@42 1176 T4y = FNMS(Te, T2l, T9 * T2m);
Chris@42 1177 }
Chris@42 1178 {
Chris@42 1179 E T2k, T2x, T6w, T6x;
Chris@42 1180 T2k = T2e + T2j;
Chris@42 1181 T2x = T2n + T2w;
Chris@42 1182 T2y = T2k + T2x;
Chris@42 1183 T6B = T2k - T2x;
Chris@42 1184 T6w = T4g + T4h;
Chris@42 1185 T6x = T4y + T4z;
Chris@42 1186 T6y = T6w - T6x;
Chris@42 1187 T7j = T6w + T6x;
Chris@42 1188 }
Chris@42 1189 {
Chris@42 1190 E T4i, T4j, T4x, T4A;
Chris@42 1191 T4i = T4g - T4h;
Chris@42 1192 T4j = T2n - T2w;
Chris@42 1193 T4k = T4i + T4j;
Chris@42 1194 T5J = T4i - T4j;
Chris@42 1195 T4x = T2e - T2j;
Chris@42 1196 T4A = T4y - T4z;
Chris@42 1197 T4B = T4x - T4A;
Chris@42 1198 T5G = T4x + T4A;
Chris@42 1199 }
Chris@42 1200 }
Chris@42 1201 {
Chris@42 1202 E T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I;
Chris@42 1203 {
Chris@42 1204 E T2W, T30, T3c, T3e;
Chris@42 1205 T2W = Ip[WS(rs, 15)];
Chris@42 1206 T30 = Im[WS(rs, 15)];
Chris@42 1207 T31 = FMA(T2V, T2W, T2Z * T30);
Chris@42 1208 T4Y = FNMS(T2Z, T2W, T2V * T30);
Chris@42 1209 T3c = Ip[WS(rs, 11)];
Chris@42 1210 T3e = Im[WS(rs, 11)];
Chris@42 1211 T3f = FMA(T3b, T3c, T3d * T3e);
Chris@42 1212 T4J = FNMS(T3d, T3c, T3b * T3e);
Chris@42 1213 }
Chris@42 1214 {
Chris@42 1215 E T33, T35, T38, T39;
Chris@42 1216 T33 = Ip[WS(rs, 7)];
Chris@42 1217 T35 = Im[WS(rs, 7)];
Chris@42 1218 T36 = FMA(T32, T33, T34 * T35);
Chris@42 1219 T4Z = FNMS(T34, T33, T32 * T35);
Chris@42 1220 T38 = Ip[WS(rs, 3)];
Chris@42 1221 T39 = Im[WS(rs, 3)];
Chris@42 1222 T3a = FMA(TR, T38, TS * T39);
Chris@42 1223 T4I = FNMS(TS, T38, TR * T39);
Chris@42 1224 }
Chris@42 1225 {
Chris@42 1226 E T37, T3g, T6M, T6N;
Chris@42 1227 T37 = T31 + T36;
Chris@42 1228 T3g = T3a + T3f;
Chris@42 1229 T3h = T37 + T3g;
Chris@42 1230 T6H = T37 - T3g;
Chris@42 1231 T6M = T4Y + T4Z;
Chris@42 1232 T6N = T4I + T4J;
Chris@42 1233 T6O = T6M - T6N;
Chris@42 1234 T7o = T6M + T6N;
Chris@42 1235 }
Chris@42 1236 {
Chris@42 1237 E T4H, T4K, T50, T51;
Chris@42 1238 T4H = T31 - T36;
Chris@42 1239 T4K = T4I - T4J;
Chris@42 1240 T4L = T4H - T4K;
Chris@42 1241 T5N = T4H + T4K;
Chris@42 1242 T50 = T4Y - T4Z;
Chris@42 1243 T51 = T3a - T3f;
Chris@42 1244 T52 = T50 + T51;
Chris@42 1245 T5Q = T50 - T51;
Chris@42 1246 }
Chris@42 1247 }
Chris@42 1248 {
Chris@42 1249 E TQ, T3G, T1g, T3N, TX, T3H, T17, T3M;
Chris@42 1250 {
Chris@42 1251 E TN, TP, T1b, T1f;
Chris@42 1252 TN = Rp[WS(rs, 2)];
Chris@42 1253 TP = Rm[WS(rs, 2)];
Chris@42 1254 TQ = FMA(TM, TN, TO * TP);
Chris@42 1255 T3G = FNMS(TO, TN, TM * TP);
Chris@42 1256 T1b = Rp[WS(rs, 6)];
Chris@42 1257 T1f = Rm[WS(rs, 6)];
Chris@42 1258 T1g = FMA(T1a, T1b, T1e * T1f);
Chris@42 1259 T3N = FNMS(T1e, T1b, T1a * T1f);
Chris@42 1260 }
Chris@42 1261 {
Chris@42 1262 E TU, TW, T12, T16;
Chris@42 1263 TU = Rp[WS(rs, 10)];
Chris@42 1264 TW = Rm[WS(rs, 10)];
Chris@42 1265 TX = FMA(TT, TU, TV * TW);
Chris@42 1266 T3H = FNMS(TV, TU, TT * TW);
Chris@42 1267 T12 = Rp[WS(rs, 14)];
Chris@42 1268 T16 = Rm[WS(rs, 14)];
Chris@42 1269 T17 = FMA(T11, T12, T15 * T16);
Chris@42 1270 T3M = FNMS(T15, T12, T11 * T16);
Chris@42 1271 }
Chris@42 1272 {
Chris@42 1273 E TY, T1h, T6g, T6h;
Chris@42 1274 TY = TQ + TX;
Chris@42 1275 T1h = T17 + T1g;
Chris@42 1276 T1i = TY + T1h;
Chris@42 1277 T7V = T1h - TY;
Chris@42 1278 T6g = T3G + T3H;
Chris@42 1279 T6h = T3M + T3N;
Chris@42 1280 T6i = T6g - T6h;
Chris@42 1281 T7D = T6g + T6h;
Chris@42 1282 }
Chris@42 1283 {
Chris@42 1284 E T3I, T3J, T3L, T3O;
Chris@42 1285 T3I = T3G - T3H;
Chris@42 1286 T3J = TQ - TX;
Chris@42 1287 T3K = T3I - T3J;
Chris@42 1288 T5u = T3J + T3I;
Chris@42 1289 T3L = T17 - T1g;
Chris@42 1290 T3O = T3M - T3N;
Chris@42 1291 T3P = T3L + T3O;
Chris@42 1292 T5v = T3L - T3O;
Chris@42 1293 }
Chris@42 1294 }
Chris@42 1295 {
Chris@42 1296 E T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y;
Chris@42 1297 {
Chris@42 1298 E T1k, T1l, T1z, T1B;
Chris@42 1299 T1k = Rp[WS(rs, 1)];
Chris@42 1300 T1l = Rm[WS(rs, 1)];
Chris@42 1301 T1m = FMA(T8, T1k, Td * T1l);
Chris@42 1302 T3S = FNMS(Td, T1k, T8 * T1l);
Chris@42 1303 T1z = Rp[WS(rs, 13)];
Chris@42 1304 T1B = Rm[WS(rs, 13)];
Chris@42 1305 T1C = FMA(T1y, T1z, T1A * T1B);
Chris@42 1306 T3Z = FNMS(T1A, T1z, T1y * T1B);
Chris@42 1307 }
Chris@42 1308 {
Chris@42 1309 E T1o, T1q, T1u, T1w;
Chris@42 1310 T1o = Rp[WS(rs, 9)];
Chris@42 1311 T1q = Rm[WS(rs, 9)];
Chris@42 1312 T1r = FMA(T1n, T1o, T1p * T1q);
Chris@42 1313 T3T = FNMS(T1p, T1o, T1n * T1q);
Chris@42 1314 T1u = Rp[WS(rs, 5)];
Chris@42 1315 T1w = Rm[WS(rs, 5)];
Chris@42 1316 T1x = FMA(T1t, T1u, T1v * T1w);
Chris@42 1317 T3Y = FNMS(T1v, T1u, T1t * T1w);
Chris@42 1318 }
Chris@42 1319 {
Chris@42 1320 E T1s, T1D, T6k, T6l;
Chris@42 1321 T1s = T1m + T1r;
Chris@42 1322 T1D = T1x + T1C;
Chris@42 1323 T1E = T1s + T1D;
Chris@42 1324 T6n = T1s - T1D;
Chris@42 1325 T6k = T3S + T3T;
Chris@42 1326 T6l = T3Y + T3Z;
Chris@42 1327 T6m = T6k - T6l;
Chris@42 1328 T7e = T6k + T6l;
Chris@42 1329 }
Chris@42 1330 {
Chris@42 1331 E T3U, T3V, T3X, T40;
Chris@42 1332 T3U = T3S - T3T;
Chris@42 1333 T3V = T1x - T1C;
Chris@42 1334 T3W = T3U + T3V;
Chris@42 1335 T5y = T3U - T3V;
Chris@42 1336 T3X = T1m - T1r;
Chris@42 1337 T40 = T3Y - T3Z;
Chris@42 1338 T41 = T3X - T40;
Chris@42 1339 T5z = T3X + T40;
Chris@42 1340 }
Chris@42 1341 }
Chris@42 1342 {
Chris@42 1343 E T1J, T43, T27, T4a, T1U, T44, T20, T49;
Chris@42 1344 {
Chris@42 1345 E T1G, T1I, T24, T26;
Chris@42 1346 T1G = Rp[WS(rs, 15)];
Chris@42 1347 T1I = Rm[WS(rs, 15)];
Chris@42 1348 T1J = FMA(T1F, T1G, T1H * T1I);
Chris@42 1349 T43 = FNMS(T1H, T1G, T1F * T1I);
Chris@42 1350 T24 = Rp[WS(rs, 11)];
Chris@42 1351 T26 = Rm[WS(rs, 11)];
Chris@42 1352 T27 = FMA(T23, T24, T25 * T26);
Chris@42 1353 T4a = FNMS(T25, T24, T23 * T26);
Chris@42 1354 }
Chris@42 1355 {
Chris@42 1356 E T1R, T1T, T1X, T1Z;
Chris@42 1357 T1R = Rp[WS(rs, 7)];
Chris@42 1358 T1T = Rm[WS(rs, 7)];
Chris@42 1359 T1U = FMA(T1Q, T1R, T1S * T1T);
Chris@42 1360 T44 = FNMS(T1S, T1R, T1Q * T1T);
Chris@42 1361 T1X = Rp[WS(rs, 3)];
Chris@42 1362 T1Z = Rm[WS(rs, 3)];
Chris@42 1363 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@42 1364 T49 = FNMS(T1Y, T1X, T1W * T1Z);
Chris@42 1365 }
Chris@42 1366 {
Chris@42 1367 E T1V, T28, T6q, T6r;
Chris@42 1368 T1V = T1J + T1U;
Chris@42 1369 T28 = T20 + T27;
Chris@42 1370 T29 = T1V + T28;
Chris@42 1371 T6p = T1V - T28;
Chris@42 1372 T6q = T43 + T44;
Chris@42 1373 T6r = T49 + T4a;
Chris@42 1374 T6s = T6q - T6r;
Chris@42 1375 T7f = T6q + T6r;
Chris@42 1376 }
Chris@42 1377 {
Chris@42 1378 E T45, T46, T48, T4b;
Chris@42 1379 T45 = T43 - T44;
Chris@42 1380 T46 = T20 - T27;
Chris@42 1381 T47 = T45 + T46;
Chris@42 1382 T5B = T45 - T46;
Chris@42 1383 T48 = T1J - T1U;
Chris@42 1384 T4b = T49 - T4a;
Chris@42 1385 T4c = T48 - T4b;
Chris@42 1386 T5C = T48 + T4b;
Chris@42 1387 }
Chris@42 1388 }
Chris@42 1389 {
Chris@42 1390 E T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o;
Chris@42 1391 {
Chris@42 1392 E T2z, T2A, T2D, T2F;
Chris@42 1393 T2z = Ip[WS(rs, 2)];
Chris@42 1394 T2A = Im[WS(rs, 2)];
Chris@42 1395 T2B = FMA(T21, T2z, T22 * T2A);
Chris@42 1396 T4r = FNMS(T22, T2z, T21 * T2A);
Chris@42 1397 T2D = Ip[WS(rs, 10)];
Chris@42 1398 T2F = Im[WS(rs, 10)];
Chris@42 1399 T2G = FMA(T2C, T2D, T2E * T2F);
Chris@42 1400 T4s = FNMS(T2E, T2D, T2C * T2F);
Chris@42 1401 }
Chris@42 1402 T4q = T2B - T2G;
Chris@42 1403 T4t = T4r - T4s;
Chris@42 1404 {
Chris@42 1405 E T2J, T2L, T2N, T2O;
Chris@42 1406 T2J = Ip[WS(rs, 14)];
Chris@42 1407 T2L = Im[WS(rs, 14)];
Chris@42 1408 T2M = FMA(T2I, T2J, T2K * T2L);
Chris@42 1409 T4m = FNMS(T2K, T2J, T2I * T2L);
Chris@42 1410 T2N = Ip[WS(rs, 6)];
Chris@42 1411 T2O = Im[WS(rs, 6)];
Chris@42 1412 T2P = FMA(T1M, T2N, T1P * T2O);
Chris@42 1413 T4n = FNMS(T1P, T2N, T1M * T2O);
Chris@42 1414 }
Chris@42 1415 T4l = T2M - T2P;
Chris@42 1416 T4o = T4m - T4n;
Chris@42 1417 {
Chris@42 1418 E T2H, T2Q, T6C, T6D;
Chris@42 1419 T2H = T2B + T2G;
Chris@42 1420 T2Q = T2M + T2P;
Chris@42 1421 T2R = T2H + T2Q;
Chris@42 1422 T6z = T2Q - T2H;
Chris@42 1423 T6C = T4r + T4s;
Chris@42 1424 T6D = T4m + T4n;
Chris@42 1425 T6E = T6C - T6D;
Chris@42 1426 T7k = T6C + T6D;
Chris@42 1427 }
Chris@42 1428 {
Chris@42 1429 E T4p, T4u, T4C, T4D;
Chris@42 1430 T4p = T4l - T4o;
Chris@42 1431 T4u = T4q + T4t;
Chris@42 1432 T4v = KP707106781 * (T4p - T4u);
Chris@42 1433 T5H = KP707106781 * (T4u + T4p);
Chris@42 1434 T4C = T4t - T4q;
Chris@42 1435 T4D = T4l + T4o;
Chris@42 1436 T4E = KP707106781 * (T4C - T4D);
Chris@42 1437 T5K = KP707106781 * (T4C + T4D);
Chris@42 1438 }
Chris@42 1439 }
Chris@42 1440 {
Chris@42 1441 E T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U;
Chris@42 1442 {
Chris@42 1443 E T3i, T3j, T3m, T3o;
Chris@42 1444 T3i = Ip[WS(rs, 1)];
Chris@42 1445 T3j = Im[WS(rs, 1)];
Chris@42 1446 T3k = FMA(T3, T3i, T6 * T3j);
Chris@42 1447 T4M = FNMS(T6, T3i, T3 * T3j);
Chris@42 1448 T3m = Ip[WS(rs, 9)];
Chris@42 1449 T3o = Im[WS(rs, 9)];
Chris@42 1450 T3p = FMA(T3l, T3m, T3n * T3o);
Chris@42 1451 T4N = FNMS(T3n, T3m, T3l * T3o);
Chris@42 1452 }
Chris@42 1453 T4O = T4M - T4N;
Chris@42 1454 T4P = T3k - T3p;
Chris@42 1455 {
Chris@42 1456 E T3r, T3s, T3u, T3v;
Chris@42 1457 T3r = Ip[WS(rs, 13)];
Chris@42 1458 T3s = Im[WS(rs, 13)];
Chris@42 1459 T3t = FMA(Th, T3r, Tl * T3s);
Chris@42 1460 T4S = FNMS(Tl, T3r, Th * T3s);
Chris@42 1461 T3u = Ip[WS(rs, 5)];
Chris@42 1462 T3v = Im[WS(rs, 5)];
Chris@42 1463 T3w = FMA(Tg, T3u, Tk * T3v);
Chris@42 1464 T4T = FNMS(Tk, T3u, Tg * T3v);
Chris@42 1465 }
Chris@42 1466 T4R = T3t - T3w;
Chris@42 1467 T4U = T4S - T4T;
Chris@42 1468 {
Chris@42 1469 E T3q, T3x, T6I, T6J;
Chris@42 1470 T3q = T3k + T3p;
Chris@42 1471 T3x = T3t + T3w;
Chris@42 1472 T3y = T3q + T3x;
Chris@42 1473 T6P = T3x - T3q;
Chris@42 1474 T6I = T4M + T4N;
Chris@42 1475 T6J = T4S + T4T;
Chris@42 1476 T6K = T6I - T6J;
Chris@42 1477 T7p = T6I + T6J;
Chris@42 1478 }
Chris@42 1479 {
Chris@42 1480 E T4Q, T4V, T53, T54;
Chris@42 1481 T4Q = T4O - T4P;
Chris@42 1482 T4V = T4R + T4U;
Chris@42 1483 T4W = KP707106781 * (T4Q - T4V);
Chris@42 1484 T5R = KP707106781 * (T4Q + T4V);
Chris@42 1485 T53 = T4R - T4U;
Chris@42 1486 T54 = T4P + T4O;
Chris@42 1487 T55 = KP707106781 * (T53 - T54);
Chris@42 1488 T5O = KP707106781 * (T54 + T53);
Chris@42 1489 }
Chris@42 1490 }
Chris@42 1491 {
Chris@42 1492 E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B;
Chris@42 1493 {
Chris@42 1494 E T1j, T2a, T7C, T7J;
Chris@42 1495 T1j = TL + T1i;
Chris@42 1496 T2a = T1E + T29;
Chris@42 1497 T2b = T1j + T2a;
Chris@42 1498 T7x = T1j - T2a;
Chris@42 1499 T7C = T7e + T7f;
Chris@42 1500 T7J = T7D + T7I;
Chris@42 1501 T7K = T7C + T7J;
Chris@42 1502 T7M = T7J - T7C;
Chris@42 1503 }
Chris@42 1504 {
Chris@42 1505 E T2S, T3z, T7y, T7z;
Chris@42 1506 T2S = T2y + T2R;
Chris@42 1507 T3z = T3h + T3y;
Chris@42 1508 T3A = T2S + T3z;
Chris@42 1509 T7L = T3z - T2S;
Chris@42 1510 T7y = T7j + T7k;
Chris@42 1511 T7z = T7o + T7p;
Chris@42 1512 T7A = T7y - T7z;
Chris@42 1513 T7B = T7y + T7z;
Chris@42 1514 }
Chris@42 1515 Rm[WS(rs, 15)] = T2b - T3A;
Chris@42 1516 Im[WS(rs, 15)] = T7B - T7K;
Chris@42 1517 Rp[0] = T2b + T3A;
Chris@42 1518 Ip[0] = T7B + T7K;
Chris@42 1519 Rm[WS(rs, 7)] = T7x - T7A;
Chris@42 1520 Im[WS(rs, 7)] = T7L - T7M;
Chris@42 1521 Rp[WS(rs, 8)] = T7x + T7A;
Chris@42 1522 Ip[WS(rs, 8)] = T7L + T7M;
Chris@42 1523 }
Chris@42 1524 {
Chris@42 1525 E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v;
Chris@42 1526 {
Chris@42 1527 E T7d, T7g, T7O, T7P;
Chris@42 1528 T7d = TL - T1i;
Chris@42 1529 T7g = T7e - T7f;
Chris@42 1530 T7h = T7d + T7g;
Chris@42 1531 T7t = T7d - T7g;
Chris@42 1532 T7O = T29 - T1E;
Chris@42 1533 T7P = T7I - T7D;
Chris@42 1534 T7Q = T7O + T7P;
Chris@42 1535 T7S = T7P - T7O;
Chris@42 1536 }
Chris@42 1537 {
Chris@42 1538 E T7i, T7l, T7n, T7q;
Chris@42 1539 T7i = T2y - T2R;
Chris@42 1540 T7l = T7j - T7k;
Chris@42 1541 T7m = T7i + T7l;
Chris@42 1542 T7u = T7l - T7i;
Chris@42 1543 T7n = T3h - T3y;
Chris@42 1544 T7q = T7o - T7p;
Chris@42 1545 T7r = T7n - T7q;
Chris@42 1546 T7v = T7n + T7q;
Chris@42 1547 }
Chris@42 1548 {
Chris@42 1549 E T7s, T7N, T7w, T7R;
Chris@42 1550 T7s = KP707106781 * (T7m + T7r);
Chris@42 1551 Rm[WS(rs, 11)] = T7h - T7s;
Chris@42 1552 Rp[WS(rs, 4)] = T7h + T7s;
Chris@42 1553 T7N = KP707106781 * (T7u + T7v);
Chris@42 1554 Im[WS(rs, 11)] = T7N - T7Q;
Chris@42 1555 Ip[WS(rs, 4)] = T7N + T7Q;
Chris@42 1556 T7w = KP707106781 * (T7u - T7v);
Chris@42 1557 Rm[WS(rs, 3)] = T7t - T7w;
Chris@42 1558 Rp[WS(rs, 12)] = T7t + T7w;
Chris@42 1559 T7R = KP707106781 * (T7r - T7m);
Chris@42 1560 Im[WS(rs, 3)] = T7R - T7S;
Chris@42 1561 Ip[WS(rs, 12)] = T7R + T7S;
Chris@42 1562 }
Chris@42 1563 }
Chris@42 1564 {
Chris@42 1565 E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R;
Chris@42 1566 E T6V;
Chris@42 1567 {
Chris@42 1568 E T6o, T6t, T6A, T6F;
Chris@42 1569 T6j = T6f - T6i;
Chris@42 1570 T7X = T7V + T7W;
Chris@42 1571 T83 = T7W - T7V;
Chris@42 1572 T6X = T6f + T6i;
Chris@42 1573 T6o = T6m - T6n;
Chris@42 1574 T6t = T6p + T6s;
Chris@42 1575 T6u = KP707106781 * (T6o - T6t);
Chris@42 1576 T7U = KP707106781 * (T6o + T6t);
Chris@42 1577 {
Chris@42 1578 E T75, T76, T6Y, T6Z;
Chris@42 1579 T75 = T6H + T6K;
Chris@42 1580 T76 = T6O + T6P;
Chris@42 1581 T77 = FNMS(KP382683432, T76, KP923879532 * T75);
Chris@42 1582 T7b = FMA(KP923879532, T76, KP382683432 * T75);
Chris@42 1583 T6Y = T6n + T6m;
Chris@42 1584 T6Z = T6p - T6s;
Chris@42 1585 T70 = KP707106781 * (T6Y + T6Z);
Chris@42 1586 T82 = KP707106781 * (T6Z - T6Y);
Chris@42 1587 }
Chris@42 1588 T6A = T6y - T6z;
Chris@42 1589 T6F = T6B - T6E;
Chris@42 1590 T6G = FMA(KP923879532, T6A, KP382683432 * T6F);
Chris@42 1591 T6U = FNMS(KP923879532, T6F, KP382683432 * T6A);
Chris@42 1592 {
Chris@42 1593 E T72, T73, T6L, T6Q;
Chris@42 1594 T72 = T6y + T6z;
Chris@42 1595 T73 = T6B + T6E;
Chris@42 1596 T74 = FMA(KP382683432, T72, KP923879532 * T73);
Chris@42 1597 T7a = FNMS(KP382683432, T73, KP923879532 * T72);
Chris@42 1598 T6L = T6H - T6K;
Chris@42 1599 T6Q = T6O - T6P;
Chris@42 1600 T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L);
Chris@42 1601 T6V = FMA(KP382683432, T6Q, KP923879532 * T6L);
Chris@42 1602 }
Chris@42 1603 }
Chris@42 1604 {
Chris@42 1605 E T6v, T6S, T81, T84;
Chris@42 1606 T6v = T6j + T6u;
Chris@42 1607 T6S = T6G + T6R;
Chris@42 1608 Rm[WS(rs, 9)] = T6v - T6S;
Chris@42 1609 Rp[WS(rs, 6)] = T6v + T6S;
Chris@42 1610 T81 = T6U + T6V;
Chris@42 1611 T84 = T82 + T83;
Chris@42 1612 Im[WS(rs, 9)] = T81 - T84;
Chris@42 1613 Ip[WS(rs, 6)] = T81 + T84;
Chris@42 1614 }
Chris@42 1615 {
Chris@42 1616 E T6T, T6W, T85, T86;
Chris@42 1617 T6T = T6j - T6u;
Chris@42 1618 T6W = T6U - T6V;
Chris@42 1619 Rm[WS(rs, 1)] = T6T - T6W;
Chris@42 1620 Rp[WS(rs, 14)] = T6T + T6W;
Chris@42 1621 T85 = T6R - T6G;
Chris@42 1622 T86 = T83 - T82;
Chris@42 1623 Im[WS(rs, 1)] = T85 - T86;
Chris@42 1624 Ip[WS(rs, 14)] = T85 + T86;
Chris@42 1625 }
Chris@42 1626 {
Chris@42 1627 E T71, T78, T7T, T7Y;
Chris@42 1628 T71 = T6X + T70;
Chris@42 1629 T78 = T74 + T77;
Chris@42 1630 Rm[WS(rs, 13)] = T71 - T78;
Chris@42 1631 Rp[WS(rs, 2)] = T71 + T78;
Chris@42 1632 T7T = T7a + T7b;
Chris@42 1633 T7Y = T7U + T7X;
Chris@42 1634 Im[WS(rs, 13)] = T7T - T7Y;
Chris@42 1635 Ip[WS(rs, 2)] = T7T + T7Y;
Chris@42 1636 }
Chris@42 1637 {
Chris@42 1638 E T79, T7c, T7Z, T80;
Chris@42 1639 T79 = T6X - T70;
Chris@42 1640 T7c = T7a - T7b;
Chris@42 1641 Rm[WS(rs, 5)] = T79 - T7c;
Chris@42 1642 Rp[WS(rs, 10)] = T79 + T7c;
Chris@42 1643 T7Z = T77 - T74;
Chris@42 1644 T80 = T7X - T7U;
Chris@42 1645 Im[WS(rs, 5)] = T7Z - T80;
Chris@42 1646 Ip[WS(rs, 10)] = T7Z + T80;
Chris@42 1647 }
Chris@42 1648 }
Chris@42 1649 {
Chris@42 1650 E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57;
Chris@42 1651 E T5b, T3Q, T8p;
Chris@42 1652 T3Q = KP707106781 * (T3K - T3P);
Chris@42 1653 T3R = T3F - T3Q;
Chris@42 1654 T5d = T3F + T3Q;
Chris@42 1655 T8p = KP707106781 * (T5v - T5u);
Chris@42 1656 T8r = T8p + T8q;
Chris@42 1657 T8x = T8q - T8p;
Chris@42 1658 {
Chris@42 1659 E T42, T4d, T5l, T5m;
Chris@42 1660 T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
Chris@42 1661 T4d = FMA(KP382683432, T47, KP923879532 * T4c);
Chris@42 1662 T4e = T42 - T4d;
Chris@42 1663 T8o = T42 + T4d;
Chris@42 1664 T5l = T4L + T4W;
Chris@42 1665 T5m = T52 + T55;
Chris@42 1666 T5n = FNMS(KP555570233, T5m, KP831469612 * T5l);
Chris@42 1667 T5r = FMA(KP831469612, T5m, KP555570233 * T5l);
Chris@42 1668 }
Chris@42 1669 {
Chris@42 1670 E T4w, T4F, T5e, T5f;
Chris@42 1671 T4w = T4k - T4v;
Chris@42 1672 T4F = T4B - T4E;
Chris@42 1673 T4G = FMA(KP980785280, T4w, KP195090322 * T4F);
Chris@42 1674 T5a = FNMS(KP980785280, T4F, KP195090322 * T4w);
Chris@42 1675 T5e = FMA(KP923879532, T3W, KP382683432 * T41);
Chris@42 1676 T5f = FNMS(KP923879532, T47, KP382683432 * T4c);
Chris@42 1677 T5g = T5e + T5f;
Chris@42 1678 T8w = T5f - T5e;
Chris@42 1679 }
Chris@42 1680 {
Chris@42 1681 E T5i, T5j, T4X, T56;
Chris@42 1682 T5i = T4k + T4v;
Chris@42 1683 T5j = T4B + T4E;
Chris@42 1684 T5k = FMA(KP555570233, T5i, KP831469612 * T5j);
Chris@42 1685 T5q = FNMS(KP555570233, T5j, KP831469612 * T5i);
Chris@42 1686 T4X = T4L - T4W;
Chris@42 1687 T56 = T52 - T55;
Chris@42 1688 T57 = FNMS(KP980785280, T56, KP195090322 * T4X);
Chris@42 1689 T5b = FMA(KP195090322, T56, KP980785280 * T4X);
Chris@42 1690 }
Chris@42 1691 {
Chris@42 1692 E T4f, T58, T8v, T8y;
Chris@42 1693 T4f = T3R + T4e;
Chris@42 1694 T58 = T4G + T57;
Chris@42 1695 Rm[WS(rs, 8)] = T4f - T58;
Chris@42 1696 Rp[WS(rs, 7)] = T4f + T58;
Chris@42 1697 T8v = T5a + T5b;
Chris@42 1698 T8y = T8w + T8x;
Chris@42 1699 Im[WS(rs, 8)] = T8v - T8y;
Chris@42 1700 Ip[WS(rs, 7)] = T8v + T8y;
Chris@42 1701 }
Chris@42 1702 {
Chris@42 1703 E T59, T5c, T8z, T8A;
Chris@42 1704 T59 = T3R - T4e;
Chris@42 1705 T5c = T5a - T5b;
Chris@42 1706 Rm[0] = T59 - T5c;
Chris@42 1707 Rp[WS(rs, 15)] = T59 + T5c;
Chris@42 1708 T8z = T57 - T4G;
Chris@42 1709 T8A = T8x - T8w;
Chris@42 1710 Im[0] = T8z - T8A;
Chris@42 1711 Ip[WS(rs, 15)] = T8z + T8A;
Chris@42 1712 }
Chris@42 1713 {
Chris@42 1714 E T5h, T5o, T8n, T8s;
Chris@42 1715 T5h = T5d + T5g;
Chris@42 1716 T5o = T5k + T5n;
Chris@42 1717 Rm[WS(rs, 12)] = T5h - T5o;
Chris@42 1718 Rp[WS(rs, 3)] = T5h + T5o;
Chris@42 1719 T8n = T5q + T5r;
Chris@42 1720 T8s = T8o + T8r;
Chris@42 1721 Im[WS(rs, 12)] = T8n - T8s;
Chris@42 1722 Ip[WS(rs, 3)] = T8n + T8s;
Chris@42 1723 }
Chris@42 1724 {
Chris@42 1725 E T5p, T5s, T8t, T8u;
Chris@42 1726 T5p = T5d - T5g;
Chris@42 1727 T5s = T5q - T5r;
Chris@42 1728 Rm[WS(rs, 4)] = T5p - T5s;
Chris@42 1729 Rp[WS(rs, 11)] = T5p + T5s;
Chris@42 1730 T8t = T5n - T5k;
Chris@42 1731 T8u = T8r - T8o;
Chris@42 1732 Im[WS(rs, 4)] = T8t - T8u;
Chris@42 1733 Ip[WS(rs, 11)] = T8t + T8u;
Chris@42 1734 }
Chris@42 1735 }
Chris@42 1736 {
Chris@42 1737 E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T;
Chris@42 1738 E T5X, T5w, T89;
Chris@42 1739 T5w = KP707106781 * (T5u + T5v);
Chris@42 1740 T5x = T5t - T5w;
Chris@42 1741 T5Z = T5t + T5w;
Chris@42 1742 T89 = KP707106781 * (T3K + T3P);
Chris@42 1743 T8d = T89 + T8c;
Chris@42 1744 T8j = T8c - T89;
Chris@42 1745 {
Chris@42 1746 E T5A, T5D, T67, T68;
Chris@42 1747 T5A = FNMS(KP382683432, T5z, KP923879532 * T5y);
Chris@42 1748 T5D = FMA(KP923879532, T5B, KP382683432 * T5C);
Chris@42 1749 T5E = T5A - T5D;
Chris@42 1750 T88 = T5A + T5D;
Chris@42 1751 T67 = T5N + T5O;
Chris@42 1752 T68 = T5Q + T5R;
Chris@42 1753 T69 = FNMS(KP195090322, T68, KP980785280 * T67);
Chris@42 1754 T6d = FMA(KP195090322, T67, KP980785280 * T68);
Chris@42 1755 }
Chris@42 1756 {
Chris@42 1757 E T5I, T5L, T60, T61;
Chris@42 1758 T5I = T5G - T5H;
Chris@42 1759 T5L = T5J - T5K;
Chris@42 1760 T5M = FMA(KP555570233, T5I, KP831469612 * T5L);
Chris@42 1761 T5W = FNMS(KP831469612, T5I, KP555570233 * T5L);
Chris@42 1762 T60 = FMA(KP382683432, T5y, KP923879532 * T5z);
Chris@42 1763 T61 = FNMS(KP382683432, T5B, KP923879532 * T5C);
Chris@42 1764 T62 = T60 + T61;
Chris@42 1765 T8i = T61 - T60;
Chris@42 1766 }
Chris@42 1767 {
Chris@42 1768 E T64, T65, T5P, T5S;
Chris@42 1769 T64 = T5G + T5H;
Chris@42 1770 T65 = T5J + T5K;
Chris@42 1771 T66 = FMA(KP980785280, T64, KP195090322 * T65);
Chris@42 1772 T6c = FNMS(KP195090322, T64, KP980785280 * T65);
Chris@42 1773 T5P = T5N - T5O;
Chris@42 1774 T5S = T5Q - T5R;
Chris@42 1775 T5T = FNMS(KP831469612, T5S, KP555570233 * T5P);
Chris@42 1776 T5X = FMA(KP831469612, T5P, KP555570233 * T5S);
Chris@42 1777 }
Chris@42 1778 {
Chris@42 1779 E T5F, T5U, T8h, T8k;
Chris@42 1780 T5F = T5x + T5E;
Chris@42 1781 T5U = T5M + T5T;
Chris@42 1782 Rm[WS(rs, 10)] = T5F - T5U;
Chris@42 1783 Rp[WS(rs, 5)] = T5F + T5U;
Chris@42 1784 T8h = T5W + T5X;
Chris@42 1785 T8k = T8i + T8j;
Chris@42 1786 Im[WS(rs, 10)] = T8h - T8k;
Chris@42 1787 Ip[WS(rs, 5)] = T8h + T8k;
Chris@42 1788 }
Chris@42 1789 {
Chris@42 1790 E T5V, T5Y, T8l, T8m;
Chris@42 1791 T5V = T5x - T5E;
Chris@42 1792 T5Y = T5W - T5X;
Chris@42 1793 Rm[WS(rs, 2)] = T5V - T5Y;
Chris@42 1794 Rp[WS(rs, 13)] = T5V + T5Y;
Chris@42 1795 T8l = T5T - T5M;
Chris@42 1796 T8m = T8j - T8i;
Chris@42 1797 Im[WS(rs, 2)] = T8l - T8m;
Chris@42 1798 Ip[WS(rs, 13)] = T8l + T8m;
Chris@42 1799 }
Chris@42 1800 {
Chris@42 1801 E T63, T6a, T87, T8e;
Chris@42 1802 T63 = T5Z + T62;
Chris@42 1803 T6a = T66 + T69;
Chris@42 1804 Rm[WS(rs, 14)] = T63 - T6a;
Chris@42 1805 Rp[WS(rs, 1)] = T63 + T6a;
Chris@42 1806 T87 = T6c + T6d;
Chris@42 1807 T8e = T88 + T8d;
Chris@42 1808 Im[WS(rs, 14)] = T87 - T8e;
Chris@42 1809 Ip[WS(rs, 1)] = T87 + T8e;
Chris@42 1810 }
Chris@42 1811 {
Chris@42 1812 E T6b, T6e, T8f, T8g;
Chris@42 1813 T6b = T5Z - T62;
Chris@42 1814 T6e = T6c - T6d;
Chris@42 1815 Rm[WS(rs, 6)] = T6b - T6e;
Chris@42 1816 Rp[WS(rs, 9)] = T6b + T6e;
Chris@42 1817 T8f = T69 - T66;
Chris@42 1818 T8g = T8d - T88;
Chris@42 1819 Im[WS(rs, 6)] = T8f - T8g;
Chris@42 1820 Ip[WS(rs, 9)] = T8f + T8g;
Chris@42 1821 }
Chris@42 1822 }
Chris@42 1823 }
Chris@42 1824 }
Chris@42 1825 }
Chris@42 1826 }
Chris@42 1827
Chris@42 1828 static const tw_instr twinstr[] = {
Chris@42 1829 {TW_CEXP, 1, 1},
Chris@42 1830 {TW_CEXP, 1, 3},
Chris@42 1831 {TW_CEXP, 1, 9},
Chris@42 1832 {TW_CEXP, 1, 27},
Chris@42 1833 {TW_NEXT, 1, 0}
Chris@42 1834 };
Chris@42 1835
Chris@42 1836 static const hc2c_desc desc = { 32, "hc2cf2_32", twinstr, &GENUS, {376, 168, 112, 0} };
Chris@42 1837
Chris@42 1838 void X(codelet_hc2cf2_32) (planner *p) {
Chris@42 1839 X(khc2c_register) (p, hc2cf2_32, &desc, HC2C_VIA_RDFT);
Chris@42 1840 }
Chris@42 1841 #endif /* HAVE_FMA */