annotate src/fftw-3.3.5/rdft/scalar/r2cf/hc2cfdft2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:48:58 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include hc2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 552 FP additions, 414 FP multiplications,
Chris@42 32 * (or, 300 additions, 162 multiplications, 252 fused multiply/add),
Chris@42 33 * 196 stack variables, 8 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cf.h"
Chris@42 36
Chris@42 37 static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 40 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 41 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 46 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 47 {
Chris@42 48 INT m;
Chris@42 49 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 50 E Tax, TaA;
Chris@42 51 {
Chris@42 52 E T1, Th, T2, T5, Ti, Ty, T1t, T3, Tb, Tj, TY, TK, Tl, T4, Tk;
Chris@42 53 T1 = W[0];
Chris@42 54 Th = W[4];
Chris@42 55 T2 = W[2];
Chris@42 56 T5 = W[3];
Chris@42 57 Ti = W[6];
Chris@42 58 Ty = T1 * Th;
Chris@42 59 T1t = T2 * Th;
Chris@42 60 T3 = T1 * T2;
Chris@42 61 Tb = T1 * T5;
Chris@42 62 Tj = Th * Ti;
Chris@42 63 TY = T2 * Ti;
Chris@42 64 TK = T1 * Ti;
Chris@42 65 Tl = W[7];
Chris@42 66 T4 = W[1];
Chris@42 67 Tk = W[5];
Chris@42 68 {
Chris@42 69 E T3j, T7Z, T5b, T93, T6B, T8V, T4d, T8J, T8r, T6e, T8l, T1T, T8C, T54, T8i;
Chris@42 70 E T5O, T94, T31, T8K, T6w, T8U, T3Y, T80, T5g, T8B, T69, T8h, T1s, T8q, T4T;
Chris@42 71 E T8k, T5J, Tx, T8a, T5y, T8d, T4s, T5Y, T8v, T8E, T2k, T82, T6l, T3z, T83;
Chris@42 72 E T5m, T8X, T8O, T2F, T86, T6q, T3M, T85, T5r, T8Y, T8R, TW, T8e, T8x, T4B;
Chris@42 73 E T5D, T8b, T63, T8w;
Chris@42 74 {
Chris@42 75 E TL, T2l, T1c, Tc, T1a, T6, Tm, T2v, Tz, T2q, TR, Ts, T2A, TF, T1H;
Chris@42 76 E T1g, T1d, T1F, T34, T3F, T3B, T32, T3w, T3s, T4p, T4l, T2f, T29, T4K, T4S;
Chris@42 77 E T5G, T5I;
Chris@42 78 {
Chris@42 79 E TZ, T2R, T2H, T15, T2W, T2M, T4I, T4E, T3V, T3S, T4Q, T4M, T1n, T1h, T4X;
Chris@42 80 E T53, T5L, T5N, T5d, T5f;
Chris@42 81 {
Chris@42 82 E T1u, T1A, T51, T4Y, T28, T25, T44, T40, T1O, T1I, T3b, T35, T4b, T3i, T45;
Chris@42 83 E T38, T39, T58, T49, T3e, T41;
Chris@42 84 {
Chris@42 85 E T3g, T3h, T36, T37, TQ;
Chris@42 86 T3g = Ip[0];
Chris@42 87 TZ = FNMS(T5, Tl, TY);
Chris@42 88 T2R = FMA(T5, Tl, TY);
Chris@42 89 TQ = T1 * Tl;
Chris@42 90 {
Chris@42 91 E T14, Tr, T1z, TE;
Chris@42 92 T14 = T2 * Tl;
Chris@42 93 Tr = Th * Tl;
Chris@42 94 TL = FMA(T4, Tl, TK);
Chris@42 95 T2l = FNMS(T4, Tl, TK);
Chris@42 96 T1c = FMA(T4, T2, Tb);
Chris@42 97 Tc = FNMS(T4, T2, Tb);
Chris@42 98 T1a = FNMS(T4, T5, T3);
Chris@42 99 T6 = FMA(T4, T5, T3);
Chris@42 100 Tm = FMA(Tk, Tl, Tj);
Chris@42 101 T2v = FNMS(T5, Tk, T1t);
Chris@42 102 T1u = FMA(T5, Tk, T1t);
Chris@42 103 Tz = FNMS(T4, Tk, Ty);
Chris@42 104 T2H = FMA(T4, Tk, Ty);
Chris@42 105 T1z = T2 * Tk;
Chris@42 106 TE = T1 * Tk;
Chris@42 107 T2q = FMA(T4, Ti, TQ);
Chris@42 108 TR = FNMS(T4, Ti, TQ);
Chris@42 109 T15 = FMA(T5, Ti, T14);
Chris@42 110 T2W = FNMS(T5, Ti, T14);
Chris@42 111 Ts = FNMS(Tk, Ti, Tr);
Chris@42 112 {
Chris@42 113 E T1f, T4H, T4D, T1b;
Chris@42 114 T1f = T1a * Tk;
Chris@42 115 T4H = T1a * Tl;
Chris@42 116 T4D = T1a * Ti;
Chris@42 117 T1b = T1a * Th;
Chris@42 118 {
Chris@42 119 E T27, T3E, T3A, T24;
Chris@42 120 T27 = T6 * Tk;
Chris@42 121 T3E = T6 * Tl;
Chris@42 122 T3A = T6 * Ti;
Chris@42 123 T24 = T6 * Th;
Chris@42 124 {
Chris@42 125 E T3v, T3r, T4P, T4L;
Chris@42 126 T3v = T1u * Tl;
Chris@42 127 T3r = T1u * Ti;
Chris@42 128 T4P = T2v * Tl;
Chris@42 129 T4L = T2v * Ti;
Chris@42 130 {
Chris@42 131 E T4o, T4k, T43, T3Z;
Chris@42 132 T4o = T2H * Tl;
Chris@42 133 T4k = T2H * Ti;
Chris@42 134 T43 = Tz * Tl;
Chris@42 135 T3Z = Tz * Ti;
Chris@42 136 T1A = FNMS(T5, Th, T1z);
Chris@42 137 T2A = FMA(T5, Th, T1z);
Chris@42 138 T2M = FNMS(T4, Th, TE);
Chris@42 139 TF = FMA(T4, Th, TE);
Chris@42 140 T1H = FNMS(T1c, Th, T1f);
Chris@42 141 T1g = FMA(T1c, Th, T1f);
Chris@42 142 T51 = FNMS(T1c, Ti, T4H);
Chris@42 143 T4I = FMA(T1c, Ti, T4H);
Chris@42 144 T4Y = FMA(T1c, Tl, T4D);
Chris@42 145 T4E = FNMS(T1c, Tl, T4D);
Chris@42 146 T1d = FNMS(T1c, Tk, T1b);
Chris@42 147 T1F = FMA(T1c, Tk, T1b);
Chris@42 148 T34 = FMA(Tc, Th, T27);
Chris@42 149 T28 = FNMS(Tc, Th, T27);
Chris@42 150 T3V = FNMS(Tc, Ti, T3E);
Chris@42 151 T3F = FMA(Tc, Ti, T3E);
Chris@42 152 T3S = FMA(Tc, Tl, T3A);
Chris@42 153 T3B = FNMS(Tc, Tl, T3A);
Chris@42 154 T25 = FMA(Tc, Tk, T24);
Chris@42 155 T32 = FNMS(Tc, Tk, T24);
Chris@42 156 T3w = FNMS(T1A, Ti, T3v);
Chris@42 157 T3s = FMA(T1A, Tl, T3r);
Chris@42 158 T4Q = FNMS(T2A, Ti, T4P);
Chris@42 159 T4M = FMA(T2A, Tl, T4L);
Chris@42 160 T4p = FNMS(T2M, Ti, T4o);
Chris@42 161 T4l = FMA(T2M, Tl, T4k);
Chris@42 162 T44 = FNMS(TF, Ti, T43);
Chris@42 163 T40 = FMA(TF, Tl, T3Z);
Chris@42 164 {
Chris@42 165 E T1m, T1e, T1N, T1G;
Chris@42 166 T1m = T1d * Tl;
Chris@42 167 T1e = T1d * Ti;
Chris@42 168 T1N = T1F * Tl;
Chris@42 169 T1G = T1F * Ti;
Chris@42 170 {
Chris@42 171 E T2e, T26, T3a, T33;
Chris@42 172 T2e = T25 * Tl;
Chris@42 173 T26 = T25 * Ti;
Chris@42 174 T3a = T32 * Tl;
Chris@42 175 T33 = T32 * Ti;
Chris@42 176 T1n = FNMS(T1g, Ti, T1m);
Chris@42 177 T1h = FMA(T1g, Tl, T1e);
Chris@42 178 T1O = FNMS(T1H, Ti, T1N);
Chris@42 179 T1I = FMA(T1H, Tl, T1G);
Chris@42 180 T2f = FNMS(T28, Ti, T2e);
Chris@42 181 T29 = FMA(T28, Tl, T26);
Chris@42 182 T3b = FNMS(T34, Ti, T3a);
Chris@42 183 T35 = FMA(T34, Tl, T33);
Chris@42 184 T3h = Im[0];
Chris@42 185 }
Chris@42 186 }
Chris@42 187 }
Chris@42 188 }
Chris@42 189 }
Chris@42 190 }
Chris@42 191 }
Chris@42 192 T36 = Ip[WS(rs, 8)];
Chris@42 193 T37 = Im[WS(rs, 8)];
Chris@42 194 {
Chris@42 195 E T47, T48, T3c, T3d;
Chris@42 196 T47 = Rm[0];
Chris@42 197 T4b = T3g + T3h;
Chris@42 198 T3i = T3g - T3h;
Chris@42 199 T45 = T36 + T37;
Chris@42 200 T38 = T36 - T37;
Chris@42 201 T48 = Rp[0];
Chris@42 202 T3c = Rp[WS(rs, 8)];
Chris@42 203 T3d = Rm[WS(rs, 8)];
Chris@42 204 T39 = T35 * T38;
Chris@42 205 T58 = T48 + T47;
Chris@42 206 T49 = T47 - T48;
Chris@42 207 T3e = T3c + T3d;
Chris@42 208 T41 = T3d - T3c;
Chris@42 209 }
Chris@42 210 }
Chris@42 211 {
Chris@42 212 E T4W, T1x, T1y, T6a, T4U, T1D, T1P, T4V, T5K, T52, T1L, T1Q;
Chris@42 213 {
Chris@42 214 E T1B, T1C, T1J, T1K;
Chris@42 215 {
Chris@42 216 E T1v, T6A, T4c, T5a, T6y, T46, T1w, T6z, T4a;
Chris@42 217 T1v = Ip[WS(rs, 3)];
Chris@42 218 T6z = T4 * T49;
Chris@42 219 T4a = T1 * T49;
Chris@42 220 {
Chris@42 221 E T3f, T59, T6x, T42;
Chris@42 222 T3f = FNMS(T3b, T3e, T39);
Chris@42 223 T59 = T35 * T3e;
Chris@42 224 T6x = T44 * T41;
Chris@42 225 T42 = T40 * T41;
Chris@42 226 T6A = FMA(T1, T4b, T6z);
Chris@42 227 T4c = FNMS(T4, T4b, T4a);
Chris@42 228 T3j = T3f + T3i;
Chris@42 229 T7Z = T3i - T3f;
Chris@42 230 T5a = FMA(T3b, T38, T59);
Chris@42 231 T6y = FMA(T40, T45, T6x);
Chris@42 232 T46 = FNMS(T44, T45, T42);
Chris@42 233 T1w = Im[WS(rs, 3)];
Chris@42 234 }
Chris@42 235 T5b = T58 + T5a;
Chris@42 236 T93 = T58 - T5a;
Chris@42 237 T6B = T6y + T6A;
Chris@42 238 T8V = T6A - T6y;
Chris@42 239 T4d = T46 + T4c;
Chris@42 240 T8J = T4c - T46;
Chris@42 241 T4W = T1v + T1w;
Chris@42 242 T1x = T1v - T1w;
Chris@42 243 }
Chris@42 244 T1B = Rp[WS(rs, 3)];
Chris@42 245 T1C = Rm[WS(rs, 3)];
Chris@42 246 T1y = T1u * T1x;
Chris@42 247 T6a = T25 * T4W;
Chris@42 248 T1J = Ip[WS(rs, 11)];
Chris@42 249 T4U = T1B - T1C;
Chris@42 250 T1D = T1B + T1C;
Chris@42 251 T1K = Im[WS(rs, 11)];
Chris@42 252 T1P = Rp[WS(rs, 11)];
Chris@42 253 T4V = T25 * T4U;
Chris@42 254 T5K = T1u * T1D;
Chris@42 255 T52 = T1J + T1K;
Chris@42 256 T1L = T1J - T1K;
Chris@42 257 T1Q = Rm[WS(rs, 11)];
Chris@42 258 }
Chris@42 259 {
Chris@42 260 E T1E, T6c, T1M, T4Z, T1R, T6b;
Chris@42 261 T1E = FNMS(T1A, T1D, T1y);
Chris@42 262 T6c = T4Y * T52;
Chris@42 263 T1M = T1I * T1L;
Chris@42 264 T4Z = T1P - T1Q;
Chris@42 265 T1R = T1P + T1Q;
Chris@42 266 T6b = FNMS(T28, T4U, T6a);
Chris@42 267 {
Chris@42 268 E T5M, T6d, T50, T1S;
Chris@42 269 T4X = FMA(T28, T4W, T4V);
Chris@42 270 T6d = FNMS(T51, T4Z, T6c);
Chris@42 271 T50 = T4Y * T4Z;
Chris@42 272 T1S = FNMS(T1O, T1R, T1M);
Chris@42 273 T5M = T1I * T1R;
Chris@42 274 T8r = T6d - T6b;
Chris@42 275 T6e = T6b + T6d;
Chris@42 276 T8l = T1E - T1S;
Chris@42 277 T1T = T1E + T1S;
Chris@42 278 T53 = FMA(T51, T52, T50);
Chris@42 279 T5L = FMA(T1A, T1x, T5K);
Chris@42 280 T5N = FMA(T1O, T1L, T5M);
Chris@42 281 }
Chris@42 282 }
Chris@42 283 }
Chris@42 284 }
Chris@42 285 {
Chris@42 286 E T3Q, T2K, T2P, T2L, T6s, T3P, T5c, T3W, T2U, T2X, T2Y, T2V;
Chris@42 287 {
Chris@42 288 E T2I, T2J, T2N, T2O, T2S, T3O, T2T;
Chris@42 289 T2I = Ip[WS(rs, 4)];
Chris@42 290 T8C = T53 - T4X;
Chris@42 291 T54 = T4X + T53;
Chris@42 292 T8i = T5L - T5N;
Chris@42 293 T5O = T5L + T5N;
Chris@42 294 T2J = Im[WS(rs, 4)];
Chris@42 295 T2N = Rp[WS(rs, 4)];
Chris@42 296 T2O = Rm[WS(rs, 4)];
Chris@42 297 T2S = Ip[WS(rs, 12)];
Chris@42 298 T3Q = T2I + T2J;
Chris@42 299 T2K = T2I - T2J;
Chris@42 300 T3O = T2O - T2N;
Chris@42 301 T2P = T2N + T2O;
Chris@42 302 T2T = Im[WS(rs, 12)];
Chris@42 303 T2L = T2H * T2K;
Chris@42 304 T6s = Tk * T3O;
Chris@42 305 T3P = Th * T3O;
Chris@42 306 T5c = T2H * T2P;
Chris@42 307 T3W = T2S + T2T;
Chris@42 308 T2U = T2S - T2T;
Chris@42 309 T2X = Rp[WS(rs, 12)];
Chris@42 310 T2Y = Rm[WS(rs, 12)];
Chris@42 311 T2V = T2R * T2U;
Chris@42 312 }
Chris@42 313 {
Chris@42 314 E T2Q, T6t, T3T, T2Z, T3R, T6u, T3U;
Chris@42 315 T2Q = FNMS(T2M, T2P, T2L);
Chris@42 316 T6t = FMA(Th, T3Q, T6s);
Chris@42 317 T3T = T2Y - T2X;
Chris@42 318 T2Z = T2X + T2Y;
Chris@42 319 T3R = FNMS(Tk, T3Q, T3P);
Chris@42 320 T5d = FMA(T2M, T2K, T5c);
Chris@42 321 T6u = T3V * T3T;
Chris@42 322 T3U = T3S * T3T;
Chris@42 323 {
Chris@42 324 E T30, T5e, T6v, T3X;
Chris@42 325 T30 = FNMS(T2W, T2Z, T2V);
Chris@42 326 T5e = T2R * T2Z;
Chris@42 327 T6v = FMA(T3S, T3W, T6u);
Chris@42 328 T3X = FNMS(T3V, T3W, T3U);
Chris@42 329 T94 = T2Q - T30;
Chris@42 330 T31 = T2Q + T30;
Chris@42 331 T8K = T6t - T6v;
Chris@42 332 T6w = T6t + T6v;
Chris@42 333 T8U = T3R - T3X;
Chris@42 334 T3Y = T3R + T3X;
Chris@42 335 T5f = FMA(T2W, T2U, T5e);
Chris@42 336 }
Chris@42 337 }
Chris@42 338 }
Chris@42 339 {
Chris@42 340 E T4J, T12, T65, T13, T4F, T18, T1o, T4G, T5F, T4R, T1k, T1p;
Chris@42 341 {
Chris@42 342 E T16, T17, T10, T11, T1i, T1j;
Chris@42 343 T10 = Ip[WS(rs, 15)];
Chris@42 344 T11 = Im[WS(rs, 15)];
Chris@42 345 T16 = Rp[WS(rs, 15)];
Chris@42 346 T80 = T5d - T5f;
Chris@42 347 T5g = T5d + T5f;
Chris@42 348 T4J = T10 + T11;
Chris@42 349 T12 = T10 - T11;
Chris@42 350 T17 = Rm[WS(rs, 15)];
Chris@42 351 T1i = Ip[WS(rs, 7)];
Chris@42 352 T65 = T4E * T4J;
Chris@42 353 T13 = TZ * T12;
Chris@42 354 T4F = T16 - T17;
Chris@42 355 T18 = T16 + T17;
Chris@42 356 T1j = Im[WS(rs, 7)];
Chris@42 357 T1o = Rp[WS(rs, 7)];
Chris@42 358 T4G = T4E * T4F;
Chris@42 359 T5F = TZ * T18;
Chris@42 360 T4R = T1i + T1j;
Chris@42 361 T1k = T1i - T1j;
Chris@42 362 T1p = Rm[WS(rs, 7)];
Chris@42 363 }
Chris@42 364 {
Chris@42 365 E T19, T67, T1l, T4N, T1q, T66;
Chris@42 366 T19 = FNMS(T15, T18, T13);
Chris@42 367 T67 = T4M * T4R;
Chris@42 368 T1l = T1h * T1k;
Chris@42 369 T4N = T1o - T1p;
Chris@42 370 T1q = T1o + T1p;
Chris@42 371 T66 = FNMS(T4I, T4F, T65);
Chris@42 372 {
Chris@42 373 E T5H, T68, T4O, T1r;
Chris@42 374 T4K = FMA(T4I, T4J, T4G);
Chris@42 375 T68 = FNMS(T4Q, T4N, T67);
Chris@42 376 T4O = T4M * T4N;
Chris@42 377 T1r = FNMS(T1n, T1q, T1l);
Chris@42 378 T5H = T1h * T1q;
Chris@42 379 T8B = T66 - T68;
Chris@42 380 T69 = T66 + T68;
Chris@42 381 T8h = T19 - T1r;
Chris@42 382 T1s = T19 + T1r;
Chris@42 383 T4S = FMA(T4Q, T4R, T4O);
Chris@42 384 T5G = FMA(T15, T12, T5F);
Chris@42 385 T5I = FMA(T1n, T1k, T5H);
Chris@42 386 }
Chris@42 387 }
Chris@42 388 }
Chris@42 389 }
Chris@42 390 {
Chris@42 391 E T2c, T3x, T2d, T23, T5j, T3q, T2i, T3t, T6i, T8t, T5V, T5X;
Chris@42 392 {
Chris@42 393 E Tn, T4i, T9, T4g, Tf, T5U, Ta, T4h, T5u, To, Tt, Tu;
Chris@42 394 {
Chris@42 395 E T7, T8, Td, Te;
Chris@42 396 T7 = Ip[WS(rs, 1)];
Chris@42 397 T8q = T4S - T4K;
Chris@42 398 T4T = T4K + T4S;
Chris@42 399 T8k = T5G - T5I;
Chris@42 400 T5J = T5G + T5I;
Chris@42 401 T8 = Im[WS(rs, 1)];
Chris@42 402 Td = Rp[WS(rs, 1)];
Chris@42 403 Te = Rm[WS(rs, 1)];
Chris@42 404 Tn = Ip[WS(rs, 9)];
Chris@42 405 T4i = T7 + T8;
Chris@42 406 T9 = T7 - T8;
Chris@42 407 T4g = Td - Te;
Chris@42 408 Tf = Td + Te;
Chris@42 409 T5U = T2 * T4i;
Chris@42 410 Ta = T6 * T9;
Chris@42 411 T4h = T2 * T4g;
Chris@42 412 T5u = T6 * Tf;
Chris@42 413 To = Im[WS(rs, 9)];
Chris@42 414 Tt = Rp[WS(rs, 9)];
Chris@42 415 Tu = Rm[WS(rs, 9)];
Chris@42 416 }
Chris@42 417 {
Chris@42 418 E Tg, T4q, Tp, T4m, Tv, T5W, Tq, T4n, T5w;
Chris@42 419 Tg = FNMS(Tc, Tf, Ta);
Chris@42 420 T4q = Tn + To;
Chris@42 421 Tp = Tn - To;
Chris@42 422 T4m = Tt - Tu;
Chris@42 423 Tv = Tt + Tu;
Chris@42 424 T5W = T4l * T4q;
Chris@42 425 Tq = Tm * Tp;
Chris@42 426 T4n = T4l * T4m;
Chris@42 427 T5w = Tm * Tv;
Chris@42 428 {
Chris@42 429 E T5v, Tw, T4j, T5x, T4r;
Chris@42 430 T5v = FMA(Tc, T9, T5u);
Chris@42 431 Tw = FNMS(Ts, Tv, Tq);
Chris@42 432 T4j = FMA(T5, T4i, T4h);
Chris@42 433 T5x = FMA(Ts, Tp, T5w);
Chris@42 434 T4r = FMA(T4p, T4q, T4n);
Chris@42 435 Tx = Tg + Tw;
Chris@42 436 T8a = Tg - Tw;
Chris@42 437 T5y = T5v + T5x;
Chris@42 438 T8d = T5v - T5x;
Chris@42 439 T4s = T4j + T4r;
Chris@42 440 T8t = T4r - T4j;
Chris@42 441 T5V = FNMS(T5, T4g, T5U);
Chris@42 442 T5X = FNMS(T4p, T4m, T5W);
Chris@42 443 }
Chris@42 444 }
Chris@42 445 }
Chris@42 446 {
Chris@42 447 E T3p, T1Y, T1Z, T22, T2g, T6h, T3o, T5i, T2h;
Chris@42 448 {
Chris@42 449 E T20, T21, T1W, T1X, T8u, T2a, T2b, T3n;
Chris@42 450 T1W = Ip[WS(rs, 2)];
Chris@42 451 T1X = Im[WS(rs, 2)];
Chris@42 452 T8u = T5V - T5X;
Chris@42 453 T5Y = T5V + T5X;
Chris@42 454 T20 = Rp[WS(rs, 2)];
Chris@42 455 T3p = T1W + T1X;
Chris@42 456 T1Y = T1W - T1X;
Chris@42 457 T8v = T8t - T8u;
Chris@42 458 T8E = T8u + T8t;
Chris@42 459 T21 = Rm[WS(rs, 2)];
Chris@42 460 T1Z = T1a * T1Y;
Chris@42 461 T2a = Ip[WS(rs, 10)];
Chris@42 462 T2b = Im[WS(rs, 10)];
Chris@42 463 T3n = T21 - T20;
Chris@42 464 T22 = T20 + T21;
Chris@42 465 T2g = Rp[WS(rs, 10)];
Chris@42 466 T2c = T2a - T2b;
Chris@42 467 T3x = T2a + T2b;
Chris@42 468 T6h = T1H * T3n;
Chris@42 469 T3o = T1F * T3n;
Chris@42 470 T5i = T1a * T22;
Chris@42 471 T2d = T29 * T2c;
Chris@42 472 T2h = Rm[WS(rs, 10)];
Chris@42 473 }
Chris@42 474 T23 = FNMS(T1c, T22, T1Z);
Chris@42 475 T5j = FMA(T1c, T1Y, T5i);
Chris@42 476 T3q = FNMS(T1H, T3p, T3o);
Chris@42 477 T2i = T2g + T2h;
Chris@42 478 T3t = T2h - T2g;
Chris@42 479 T6i = FMA(T1F, T3p, T6h);
Chris@42 480 }
Chris@42 481 {
Chris@42 482 E T2y, T3K, T2z, T2u, T5o, T3H, T2D, T3I, T6n;
Chris@42 483 {
Chris@42 484 E T3G, T2o, T2p, T2t, T6m, T3D, T5n, T2B, T2C;
Chris@42 485 {
Chris@42 486 E T2r, T2s, T2m, T2n, T3C, T2w, T2x;
Chris@42 487 {
Chris@42 488 E T8N, T8M, T6j, T3u, T2j;
Chris@42 489 T2m = Ip[WS(rs, 14)];
Chris@42 490 T6j = T3w * T3t;
Chris@42 491 T3u = T3s * T3t;
Chris@42 492 T2j = FNMS(T2f, T2i, T2d);
Chris@42 493 {
Chris@42 494 E T5k, T6k, T3y, T5l;
Chris@42 495 T5k = T29 * T2i;
Chris@42 496 T6k = FMA(T3s, T3x, T6j);
Chris@42 497 T3y = FNMS(T3w, T3x, T3u);
Chris@42 498 T2k = T23 + T2j;
Chris@42 499 T82 = T23 - T2j;
Chris@42 500 T5l = FMA(T2f, T2c, T5k);
Chris@42 501 T6l = T6i + T6k;
Chris@42 502 T8N = T6i - T6k;
Chris@42 503 T3z = T3q + T3y;
Chris@42 504 T8M = T3q - T3y;
Chris@42 505 T83 = T5j - T5l;
Chris@42 506 T5m = T5j + T5l;
Chris@42 507 T2n = Im[WS(rs, 14)];
Chris@42 508 }
Chris@42 509 T8X = T8M + T8N;
Chris@42 510 T8O = T8M - T8N;
Chris@42 511 }
Chris@42 512 T2r = Rp[WS(rs, 14)];
Chris@42 513 T3G = T2m + T2n;
Chris@42 514 T2o = T2m - T2n;
Chris@42 515 T2s = Rm[WS(rs, 14)];
Chris@42 516 T2w = Ip[WS(rs, 6)];
Chris@42 517 T2x = Im[WS(rs, 6)];
Chris@42 518 T2p = T2l * T2o;
Chris@42 519 T3C = T2s - T2r;
Chris@42 520 T2t = T2r + T2s;
Chris@42 521 T2y = T2w - T2x;
Chris@42 522 T3K = T2w + T2x;
Chris@42 523 T6m = T3F * T3C;
Chris@42 524 T3D = T3B * T3C;
Chris@42 525 T5n = T2l * T2t;
Chris@42 526 T2z = T2v * T2y;
Chris@42 527 T2B = Rp[WS(rs, 6)];
Chris@42 528 T2C = Rm[WS(rs, 6)];
Chris@42 529 }
Chris@42 530 T2u = FNMS(T2q, T2t, T2p);
Chris@42 531 T5o = FMA(T2q, T2o, T5n);
Chris@42 532 T3H = FNMS(T3F, T3G, T3D);
Chris@42 533 T2D = T2B + T2C;
Chris@42 534 T3I = T2C - T2B;
Chris@42 535 T6n = FMA(T3B, T3G, T6m);
Chris@42 536 }
Chris@42 537 {
Chris@42 538 E T4v, TC, T5Z, TD, T4t, TI, TS, T4u, T5z, T4z, TO, TT;
Chris@42 539 {
Chris@42 540 E TG, TH, TA, TB, TM, TN;
Chris@42 541 {
Chris@42 542 E T8Q, T8P, T6o, T3J, T2E;
Chris@42 543 TA = Ip[WS(rs, 5)];
Chris@42 544 T6o = T1g * T3I;
Chris@42 545 T3J = T1d * T3I;
Chris@42 546 T2E = FNMS(T2A, T2D, T2z);
Chris@42 547 {
Chris@42 548 E T5p, T6p, T3L, T5q;
Chris@42 549 T5p = T2v * T2D;
Chris@42 550 T6p = FMA(T1d, T3K, T6o);
Chris@42 551 T3L = FNMS(T1g, T3K, T3J);
Chris@42 552 T2F = T2u + T2E;
Chris@42 553 T86 = T2u - T2E;
Chris@42 554 T5q = FMA(T2A, T2y, T5p);
Chris@42 555 T6q = T6n + T6p;
Chris@42 556 T8Q = T6n - T6p;
Chris@42 557 T3M = T3H + T3L;
Chris@42 558 T8P = T3H - T3L;
Chris@42 559 T85 = T5o - T5q;
Chris@42 560 T5r = T5o + T5q;
Chris@42 561 TB = Im[WS(rs, 5)];
Chris@42 562 }
Chris@42 563 T8Y = T8Q - T8P;
Chris@42 564 T8R = T8P + T8Q;
Chris@42 565 }
Chris@42 566 TG = Rp[WS(rs, 5)];
Chris@42 567 T4v = TA + TB;
Chris@42 568 TC = TA - TB;
Chris@42 569 TH = Rm[WS(rs, 5)];
Chris@42 570 TM = Ip[WS(rs, 13)];
Chris@42 571 T5Z = T32 * T4v;
Chris@42 572 TD = Tz * TC;
Chris@42 573 T4t = TG - TH;
Chris@42 574 TI = TG + TH;
Chris@42 575 TN = Im[WS(rs, 13)];
Chris@42 576 TS = Rp[WS(rs, 13)];
Chris@42 577 T4u = T32 * T4t;
Chris@42 578 T5z = Tz * TI;
Chris@42 579 T4z = TM + TN;
Chris@42 580 TO = TM - TN;
Chris@42 581 TT = Rm[WS(rs, 13)];
Chris@42 582 }
Chris@42 583 {
Chris@42 584 E TJ, T61, TP, T4x, TU;
Chris@42 585 TJ = FNMS(TF, TI, TD);
Chris@42 586 T61 = Ti * T4z;
Chris@42 587 TP = TL * TO;
Chris@42 588 T4x = TS - TT;
Chris@42 589 TU = TS + TT;
Chris@42 590 {
Chris@42 591 E T5A, T60, T5C, T62;
Chris@42 592 T5A = FMA(TF, TC, T5z);
Chris@42 593 {
Chris@42 594 E T4w, T4y, TV, T5B, T4A;
Chris@42 595 T4w = FMA(T34, T4v, T4u);
Chris@42 596 T4y = Ti * T4x;
Chris@42 597 TV = FNMS(TR, TU, TP);
Chris@42 598 T5B = TL * TU;
Chris@42 599 T60 = FNMS(T34, T4t, T5Z);
Chris@42 600 T4A = FMA(Tl, T4z, T4y);
Chris@42 601 TW = TJ + TV;
Chris@42 602 T8e = TJ - TV;
Chris@42 603 T5C = FMA(TR, TO, T5B);
Chris@42 604 T8x = T4w - T4A;
Chris@42 605 T4B = T4w + T4A;
Chris@42 606 T62 = FNMS(Tl, T4x, T61);
Chris@42 607 }
Chris@42 608 T5D = T5A + T5C;
Chris@42 609 T8b = T5A - T5C;
Chris@42 610 T63 = T60 + T62;
Chris@42 611 T8w = T62 - T60;
Chris@42 612 }
Chris@42 613 }
Chris@42 614 }
Chris@42 615 }
Chris@42 616 }
Chris@42 617 }
Chris@42 618 {
Chris@42 619 E T74, T78, T8F, T8y, T7s, T72, T75, T77, T7r, T71, T7f, T7d, T7c, T7g, T7m;
Chris@42 620 E T7k, T7j, T7n, T6V, T6Y, T7T, T7W;
Chris@42 621 {
Chris@42 622 E T6S, T1V, T6I, T3l, T6H, T5Q, T6R, T5t, T56, T6g, T6N, T4f, T6M, T6W, T6D;
Chris@42 623 E T6O;
Chris@42 624 {
Chris@42 625 E T2G, T3k, T5E, T5P, TX, T1U, T5h, T5s;
Chris@42 626 T74 = Tx - TW;
Chris@42 627 TX = Tx + TW;
Chris@42 628 T1U = T1s + T1T;
Chris@42 629 T78 = T1s - T1T;
Chris@42 630 T8F = T8w - T8x;
Chris@42 631 T8y = T8w + T8x;
Chris@42 632 T7s = T2k - T2F;
Chris@42 633 T2G = T2k + T2F;
Chris@42 634 T6S = TX - T1U;
Chris@42 635 T1V = TX + T1U;
Chris@42 636 T3k = T31 + T3j;
Chris@42 637 T72 = T3j - T31;
Chris@42 638 T75 = T5y - T5D;
Chris@42 639 T5E = T5y + T5D;
Chris@42 640 T5P = T5J + T5O;
Chris@42 641 T77 = T5J - T5O;
Chris@42 642 T7r = T5b - T5g;
Chris@42 643 T5h = T5b + T5g;
Chris@42 644 T6I = T3k - T2G;
Chris@42 645 T3l = T2G + T3k;
Chris@42 646 T6H = T5P - T5E;
Chris@42 647 T5Q = T5E + T5P;
Chris@42 648 T5s = T5m + T5r;
Chris@42 649 T71 = T5r - T5m;
Chris@42 650 {
Chris@42 651 E T64, T6L, T6f, T4C, T55;
Chris@42 652 T7f = T4B - T4s;
Chris@42 653 T4C = T4s + T4B;
Chris@42 654 T55 = T4T + T54;
Chris@42 655 T7d = T54 - T4T;
Chris@42 656 T7c = T63 - T5Y;
Chris@42 657 T64 = T5Y + T63;
Chris@42 658 T6R = T5h - T5s;
Chris@42 659 T5t = T5h + T5s;
Chris@42 660 T6L = T4C - T55;
Chris@42 661 T56 = T4C + T55;
Chris@42 662 T7g = T69 - T6e;
Chris@42 663 T6f = T69 + T6e;
Chris@42 664 {
Chris@42 665 E T6r, T6C, T3N, T4e, T6K;
Chris@42 666 T7m = T3z - T3M;
Chris@42 667 T3N = T3z + T3M;
Chris@42 668 T4e = T3Y + T4d;
Chris@42 669 T7k = T4d - T3Y;
Chris@42 670 T6K = T6f - T64;
Chris@42 671 T6g = T64 + T6f;
Chris@42 672 T7j = T6q - T6l;
Chris@42 673 T6r = T6l + T6q;
Chris@42 674 T6N = T4e - T3N;
Chris@42 675 T4f = T3N + T4e;
Chris@42 676 T7n = T6B - T6w;
Chris@42 677 T6C = T6w + T6B;
Chris@42 678 T6M = T6K + T6L;
Chris@42 679 T6W = T6K - T6L;
Chris@42 680 T6D = T6r + T6C;
Chris@42 681 T6O = T6C - T6r;
Chris@42 682 }
Chris@42 683 }
Chris@42 684 }
Chris@42 685 {
Chris@42 686 E T5T, T6X, T6P, T6E;
Chris@42 687 {
Chris@42 688 E T5S, T5R, T6F, T6G, T3m, T57;
Chris@42 689 T5T = T3l - T1V;
Chris@42 690 T3m = T1V + T3l;
Chris@42 691 T57 = T4f - T56;
Chris@42 692 T5S = T56 + T4f;
Chris@42 693 T6X = T6N + T6O;
Chris@42 694 T6P = T6N - T6O;
Chris@42 695 T5R = T5t - T5Q;
Chris@42 696 T6F = T5t + T5Q;
Chris@42 697 Im[WS(rs, 15)] = KP500000000 * (T57 - T3m);
Chris@42 698 Ip[0] = KP500000000 * (T3m + T57);
Chris@42 699 T6G = T6g + T6D;
Chris@42 700 T6E = T6g - T6D;
Chris@42 701 Rp[0] = KP500000000 * (T6F + T6G);
Chris@42 702 Rm[WS(rs, 15)] = KP500000000 * (T6F - T6G);
Chris@42 703 Rp[WS(rs, 8)] = KP500000000 * (T5R + T5S);
Chris@42 704 Rm[WS(rs, 7)] = KP500000000 * (T5R - T5S);
Chris@42 705 }
Chris@42 706 {
Chris@42 707 E T6U, T6T, T6Z, T70, T6J, T6Q;
Chris@42 708 T6V = T6I - T6H;
Chris@42 709 T6J = T6H + T6I;
Chris@42 710 T6Q = T6M + T6P;
Chris@42 711 T6U = T6P - T6M;
Chris@42 712 T6T = T6R - T6S;
Chris@42 713 T6Z = T6R + T6S;
Chris@42 714 Im[WS(rs, 7)] = KP500000000 * (T6E - T5T);
Chris@42 715 Ip[WS(rs, 8)] = KP500000000 * (T5T + T6E);
Chris@42 716 Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6Q, T6J)));
Chris@42 717 Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6Q, T6J));
Chris@42 718 T70 = T6W + T6X;
Chris@42 719 T6Y = T6W - T6X;
Chris@42 720 Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T70, T6Z));
Chris@42 721 Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T70, T6Z));
Chris@42 722 Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6U, T6T));
Chris@42 723 Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6U, T6T));
Chris@42 724 }
Chris@42 725 }
Chris@42 726 }
Chris@42 727 {
Chris@42 728 E T7F, T73, T7P, T7t, T7G, T7w, T7Q, T7a, T7L, T7l, T7K, T7U, T7A, T7i, T7u;
Chris@42 729 E T76;
Chris@42 730 Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6Y, T6V)));
Chris@42 731 Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6Y, T6V));
Chris@42 732 T7F = T72 - T71;
Chris@42 733 T73 = T71 + T72;
Chris@42 734 T7P = T7r - T7s;
Chris@42 735 T7t = T7r + T7s;
Chris@42 736 T7u = T75 + T74;
Chris@42 737 T76 = T74 - T75;
Chris@42 738 {
Chris@42 739 E T7I, T7e, T7v, T79, T7J, T7h;
Chris@42 740 T7v = T77 - T78;
Chris@42 741 T79 = T77 + T78;
Chris@42 742 T7I = T7c - T7d;
Chris@42 743 T7e = T7c + T7d;
Chris@42 744 T7G = T7v - T7u;
Chris@42 745 T7w = T7u + T7v;
Chris@42 746 T7Q = T76 - T79;
Chris@42 747 T7a = T76 + T79;
Chris@42 748 T7J = T7g - T7f;
Chris@42 749 T7h = T7f + T7g;
Chris@42 750 T7L = T7k - T7j;
Chris@42 751 T7l = T7j + T7k;
Chris@42 752 T7K = FMA(KP414213562, T7J, T7I);
Chris@42 753 T7U = FNMS(KP414213562, T7I, T7J);
Chris@42 754 T7A = FNMS(KP414213562, T7e, T7h);
Chris@42 755 T7i = FMA(KP414213562, T7h, T7e);
Chris@42 756 }
Chris@42 757 {
Chris@42 758 E T7z, T7b, T7D, T7x, T7M, T7o;
Chris@42 759 T7z = FNMS(KP707106781, T7a, T73);
Chris@42 760 T7b = FMA(KP707106781, T7a, T73);
Chris@42 761 T7D = FMA(KP707106781, T7w, T7t);
Chris@42 762 T7x = FNMS(KP707106781, T7w, T7t);
Chris@42 763 T7M = T7n - T7m;
Chris@42 764 T7o = T7m + T7n;
Chris@42 765 {
Chris@42 766 E T7S, T7R, T7X, T7Y;
Chris@42 767 {
Chris@42 768 E T7H, T7V, T7B, T7p, T7O, T7N;
Chris@42 769 T7T = FMA(KP707106781, T7G, T7F);
Chris@42 770 T7H = FNMS(KP707106781, T7G, T7F);
Chris@42 771 T7N = FMA(KP414213562, T7M, T7L);
Chris@42 772 T7V = FNMS(KP414213562, T7L, T7M);
Chris@42 773 T7B = FMA(KP414213562, T7l, T7o);
Chris@42 774 T7p = FNMS(KP414213562, T7o, T7l);
Chris@42 775 T7O = T7K - T7N;
Chris@42 776 T7S = T7K + T7N;
Chris@42 777 T7R = FMA(KP707106781, T7Q, T7P);
Chris@42 778 T7X = FNMS(KP707106781, T7Q, T7P);
Chris@42 779 {
Chris@42 780 E T7C, T7E, T7y, T7q;
Chris@42 781 T7C = T7A - T7B;
Chris@42 782 T7E = T7A + T7B;
Chris@42 783 T7y = T7p - T7i;
Chris@42 784 T7q = T7i + T7p;
Chris@42 785 Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7O, T7H)));
Chris@42 786 Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7O, T7H));
Chris@42 787 Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T7C, T7z)));
Chris@42 788 Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T7C, T7z));
Chris@42 789 Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T7E, T7D));
Chris@42 790 Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T7E, T7D));
Chris@42 791 Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T7y, T7x));
Chris@42 792 Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T7y, T7x));
Chris@42 793 Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T7q, T7b)));
Chris@42 794 Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T7q, T7b));
Chris@42 795 T7Y = T7U + T7V;
Chris@42 796 T7W = T7U - T7V;
Chris@42 797 }
Chris@42 798 }
Chris@42 799 Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7Y, T7X));
Chris@42 800 Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7Y, T7X));
Chris@42 801 Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7S, T7R));
Chris@42 802 Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7S, T7R));
Chris@42 803 }
Chris@42 804 }
Chris@42 805 }
Chris@42 806 {
Chris@42 807 E Ta7, Tat, T9l, T89, T9H, Taj, T9v, T99, T9m, T9c, T9w, T8o, Tao, Tay, Tae;
Chris@42 808 E Ta3, T9q, T9A, T9g, T8I, T8Z, T8W, Tak, Taa, Tau, T9O, T9r, T8T, Tar, Taz;
Chris@42 809 E Taf, T9W;
Chris@42 810 {
Chris@42 811 E T9M, T9L, T9J, T9I, T8s, T8G, T8D, Ta0, Tam, T9Z, Ta1, T8z, Ta9, T9K;
Chris@42 812 {
Chris@42 813 E T9F, T81, Ta5, T95, T96, T97, Ta6, T88, T84, T87;
Chris@42 814 T9F = T80 + T7Z;
Chris@42 815 T81 = T7Z - T80;
Chris@42 816 Ta5 = T93 - T94;
Chris@42 817 T95 = T93 + T94;
Chris@42 818 T96 = T83 + T82;
Chris@42 819 T84 = T82 - T83;
Chris@42 820 Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7W, T7T)));
Chris@42 821 Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7W, T7T));
Chris@42 822 T87 = T85 + T86;
Chris@42 823 T97 = T85 - T86;
Chris@42 824 Ta6 = T84 - T87;
Chris@42 825 T88 = T84 + T87;
Chris@42 826 {
Chris@42 827 E T8j, T9a, T8g, T8m;
Chris@42 828 {
Chris@42 829 E T8c, T9G, T98, T8f;
Chris@42 830 T9M = T8a + T8b;
Chris@42 831 T8c = T8a - T8b;
Chris@42 832 Ta7 = FMA(KP707106781, Ta6, Ta5);
Chris@42 833 Tat = FNMS(KP707106781, Ta6, Ta5);
Chris@42 834 T9l = FNMS(KP707106781, T88, T81);
Chris@42 835 T89 = FMA(KP707106781, T88, T81);
Chris@42 836 T9G = T97 - T96;
Chris@42 837 T98 = T96 + T97;
Chris@42 838 T8f = T8d + T8e;
Chris@42 839 T9L = T8d - T8e;
Chris@42 840 T9J = T8h + T8i;
Chris@42 841 T8j = T8h - T8i;
Chris@42 842 T9H = FMA(KP707106781, T9G, T9F);
Chris@42 843 Taj = FNMS(KP707106781, T9G, T9F);
Chris@42 844 T9v = FNMS(KP707106781, T98, T95);
Chris@42 845 T99 = FMA(KP707106781, T98, T95);
Chris@42 846 T9a = FMA(KP414213562, T8c, T8f);
Chris@42 847 T8g = FNMS(KP414213562, T8f, T8c);
Chris@42 848 T8m = T8k + T8l;
Chris@42 849 T9I = T8k - T8l;
Chris@42 850 }
Chris@42 851 {
Chris@42 852 E T9X, T9Y, T9b, T8n;
Chris@42 853 T8s = T8q + T8r;
Chris@42 854 T9X = T8r - T8q;
Chris@42 855 T9Y = T8F - T8E;
Chris@42 856 T8G = T8E + T8F;
Chris@42 857 T8D = T8B + T8C;
Chris@42 858 Ta0 = T8B - T8C;
Chris@42 859 T9b = FNMS(KP414213562, T8j, T8m);
Chris@42 860 T8n = FMA(KP414213562, T8m, T8j);
Chris@42 861 Tam = FMA(KP707106781, T9Y, T9X);
Chris@42 862 T9Z = FNMS(KP707106781, T9Y, T9X);
Chris@42 863 T9m = T9b - T9a;
Chris@42 864 T9c = T9a + T9b;
Chris@42 865 T9w = T8g - T8n;
Chris@42 866 T8o = T8g + T8n;
Chris@42 867 Ta1 = T8y - T8v;
Chris@42 868 T8z = T8v + T8y;
Chris@42 869 }
Chris@42 870 }
Chris@42 871 }
Chris@42 872 {
Chris@42 873 E T9o, T8A, Tan, Ta2, T9p, T8H;
Chris@42 874 Tan = FMA(KP707106781, Ta1, Ta0);
Chris@42 875 Ta2 = FNMS(KP707106781, Ta1, Ta0);
Chris@42 876 T9o = FNMS(KP707106781, T8z, T8s);
Chris@42 877 T8A = FMA(KP707106781, T8z, T8s);
Chris@42 878 Tao = FMA(KP198912367, Tan, Tam);
Chris@42 879 Tay = FNMS(KP198912367, Tam, Tan);
Chris@42 880 Tae = FMA(KP668178637, T9Z, Ta2);
Chris@42 881 Ta3 = FNMS(KP668178637, Ta2, T9Z);
Chris@42 882 T9p = FNMS(KP707106781, T8G, T8D);
Chris@42 883 T8H = FMA(KP707106781, T8G, T8D);
Chris@42 884 Ta9 = FNMS(KP414213562, T9I, T9J);
Chris@42 885 T9K = FMA(KP414213562, T9J, T9I);
Chris@42 886 T9q = FNMS(KP668178637, T9p, T9o);
Chris@42 887 T9A = FMA(KP668178637, T9o, T9p);
Chris@42 888 T9g = FNMS(KP198912367, T8A, T8H);
Chris@42 889 T8I = FMA(KP198912367, T8H, T8A);
Chris@42 890 }
Chris@42 891 {
Chris@42 892 E T8L, T9T, Tap, T9S, T9U, T8S, Taq, T9V;
Chris@42 893 {
Chris@42 894 E T9Q, T9R, Ta8, T9N;
Chris@42 895 T8L = T8J - T8K;
Chris@42 896 T9Q = T8K + T8J;
Chris@42 897 T9R = T8X - T8Y;
Chris@42 898 T8Z = T8X + T8Y;
Chris@42 899 T8W = T8U + T8V;
Chris@42 900 T9T = T8V - T8U;
Chris@42 901 Ta8 = FMA(KP414213562, T9L, T9M);
Chris@42 902 T9N = FNMS(KP414213562, T9M, T9L);
Chris@42 903 Tap = FMA(KP707106781, T9R, T9Q);
Chris@42 904 T9S = FNMS(KP707106781, T9R, T9Q);
Chris@42 905 Tak = Ta8 + Ta9;
Chris@42 906 Taa = Ta8 - Ta9;
Chris@42 907 Tau = T9N + T9K;
Chris@42 908 T9O = T9K - T9N;
Chris@42 909 T9U = T8R - T8O;
Chris@42 910 T8S = T8O + T8R;
Chris@42 911 }
Chris@42 912 Taq = FMA(KP707106781, T9U, T9T);
Chris@42 913 T9V = FNMS(KP707106781, T9U, T9T);
Chris@42 914 T9r = FNMS(KP707106781, T8S, T8L);
Chris@42 915 T8T = FMA(KP707106781, T8S, T8L);
Chris@42 916 Tar = FMA(KP198912367, Taq, Tap);
Chris@42 917 Taz = FNMS(KP198912367, Tap, Taq);
Chris@42 918 Taf = FMA(KP668178637, T9S, T9V);
Chris@42 919 T9W = FNMS(KP668178637, T9V, T9S);
Chris@42 920 }
Chris@42 921 }
Chris@42 922 {
Chris@42 923 E T9z, T9C, Tad, Tag;
Chris@42 924 {
Chris@42 925 E T9f, T8p, T9j, T9d, T9s, T90;
Chris@42 926 T9f = FNMS(KP923879532, T8o, T89);
Chris@42 927 T8p = FMA(KP923879532, T8o, T89);
Chris@42 928 T9j = FMA(KP923879532, T9c, T99);
Chris@42 929 T9d = FNMS(KP923879532, T9c, T99);
Chris@42 930 T9s = FNMS(KP707106781, T8Z, T8W);
Chris@42 931 T90 = FMA(KP707106781, T8Z, T8W);
Chris@42 932 {
Chris@42 933 E T9y, T9x, T9D, T9E;
Chris@42 934 {
Chris@42 935 E T9n, T9B, T9h, T91, T9u, T9t;
Chris@42 936 T9z = FMA(KP923879532, T9m, T9l);
Chris@42 937 T9n = FNMS(KP923879532, T9m, T9l);
Chris@42 938 T9t = FMA(KP668178637, T9s, T9r);
Chris@42 939 T9B = FNMS(KP668178637, T9r, T9s);
Chris@42 940 T9h = FMA(KP198912367, T8T, T90);
Chris@42 941 T91 = FNMS(KP198912367, T90, T8T);
Chris@42 942 T9u = T9q + T9t;
Chris@42 943 T9y = T9t - T9q;
Chris@42 944 T9x = FMA(KP923879532, T9w, T9v);
Chris@42 945 T9D = FNMS(KP923879532, T9w, T9v);
Chris@42 946 {
Chris@42 947 E T9i, T9k, T9e, T92;
Chris@42 948 T9i = T9g - T9h;
Chris@42 949 T9k = T9g + T9h;
Chris@42 950 T9e = T91 - T8I;
Chris@42 951 T92 = T8I + T91;
Chris@42 952 Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T9u, T9n)));
Chris@42 953 Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T9u, T9n));
Chris@42 954 Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T9i, T9f)));
Chris@42 955 Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T9i, T9f));
Chris@42 956 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T9k, T9j));
Chris@42 957 Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T9k, T9j));
Chris@42 958 Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T9e, T9d));
Chris@42 959 Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T9e, T9d));
Chris@42 960 Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T92, T8p)));
Chris@42 961 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T92, T8p));
Chris@42 962 T9E = T9A + T9B;
Chris@42 963 T9C = T9A - T9B;
Chris@42 964 }
Chris@42 965 }
Chris@42 966 Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T9E, T9D));
Chris@42 967 Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T9E, T9D));
Chris@42 968 Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T9y, T9x));
Chris@42 969 Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T9y, T9x));
Chris@42 970 }
Chris@42 971 }
Chris@42 972 {
Chris@42 973 E Tac, Tab, Tah, Tai, T9P, Ta4;
Chris@42 974 Tad = FNMS(KP923879532, T9O, T9H);
Chris@42 975 T9P = FMA(KP923879532, T9O, T9H);
Chris@42 976 Ta4 = T9W - Ta3;
Chris@42 977 Tac = Ta3 + T9W;
Chris@42 978 Tab = FNMS(KP923879532, Taa, Ta7);
Chris@42 979 Tah = FMA(KP923879532, Taa, Ta7);
Chris@42 980 Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T9C, T9z)));
Chris@42 981 Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T9C, T9z));
Chris@42 982 Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, Ta4, T9P)));
Chris@42 983 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, Ta4, T9P));
Chris@42 984 Tai = Tae + Taf;
Chris@42 985 Tag = Tae - Taf;
Chris@42 986 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, Tai, Tah));
Chris@42 987 Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, Tai, Tah));
Chris@42 988 Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, Tac, Tab));
Chris@42 989 Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, Tac, Tab));
Chris@42 990 }
Chris@42 991 {
Chris@42 992 E Taw, Tav, TaB, TaC, Tal, Tas;
Chris@42 993 Tax = FNMS(KP923879532, Tak, Taj);
Chris@42 994 Tal = FMA(KP923879532, Tak, Taj);
Chris@42 995 Tas = Tao - Tar;
Chris@42 996 Taw = Tao + Tar;
Chris@42 997 Tav = FNMS(KP923879532, Tau, Tat);
Chris@42 998 TaB = FMA(KP923879532, Tau, Tat);
Chris@42 999 Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, Tag, Tad)));
Chris@42 1000 Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, Tag, Tad));
Chris@42 1001 Im[0] = -(KP500000000 * (FNMS(KP980785280, Tas, Tal)));
Chris@42 1002 Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, Tas, Tal));
Chris@42 1003 TaC = Tay + Taz;
Chris@42 1004 TaA = Tay - Taz;
Chris@42 1005 Rm[0] = KP500000000 * (FMA(KP980785280, TaC, TaB));
Chris@42 1006 Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, TaC, TaB));
Chris@42 1007 Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Taw, Tav));
Chris@42 1008 Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, Taw, Tav));
Chris@42 1009 }
Chris@42 1010 }
Chris@42 1011 }
Chris@42 1012 }
Chris@42 1013 }
Chris@42 1014 }
Chris@42 1015 Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, TaA, Tax)));
Chris@42 1016 Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, TaA, Tax));
Chris@42 1017 }
Chris@42 1018 }
Chris@42 1019 }
Chris@42 1020
Chris@42 1021 static const tw_instr twinstr[] = {
Chris@42 1022 {TW_CEXP, 1, 1},
Chris@42 1023 {TW_CEXP, 1, 3},
Chris@42 1024 {TW_CEXP, 1, 9},
Chris@42 1025 {TW_CEXP, 1, 27},
Chris@42 1026 {TW_NEXT, 1, 0}
Chris@42 1027 };
Chris@42 1028
Chris@42 1029 static const hc2c_desc desc = { 32, "hc2cfdft2_32", twinstr, &GENUS, {300, 162, 252, 0} };
Chris@42 1030
Chris@42 1031 void X(codelet_hc2cfdft2_32) (planner *p) {
Chris@42 1032 X(khc2c_register) (p, hc2cfdft2_32, &desc, HC2C_VIA_DFT);
Chris@42 1033 }
Chris@42 1034 #else /* HAVE_FMA */
Chris@42 1035
Chris@42 1036 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include hc2cf.h */
Chris@42 1037
Chris@42 1038 /*
Chris@42 1039 * This function contains 552 FP additions, 300 FP multiplications,
Chris@42 1040 * (or, 440 additions, 188 multiplications, 112 fused multiply/add),
Chris@42 1041 * 166 stack variables, 9 constants, and 128 memory accesses
Chris@42 1042 */
Chris@42 1043 #include "hc2cf.h"
Chris@42 1044
Chris@42 1045 static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 1046 {
Chris@42 1047 DK(KP277785116, +0.277785116509801112371415406974266437187468595);
Chris@42 1048 DK(KP415734806, +0.415734806151272618539394188808952878369280406);
Chris@42 1049 DK(KP097545161, +0.097545161008064133924142434238511120463845809);
Chris@42 1050 DK(KP490392640, +0.490392640201615224563091118067119518486966865);
Chris@42 1051 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 1052 DK(KP191341716, +0.191341716182544885864229992015199433380672281);
Chris@42 1053 DK(KP461939766, +0.461939766255643378064091594698394143411208313);
Chris@42 1054 DK(KP353553390, +0.353553390593273762200422181052424519642417969);
Chris@42 1055 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 1056 {
Chris@42 1057 INT m;
Chris@42 1058 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 1059 E T1, T4, T2, T5, T7, T1b, T1d, Td, Ti, Tk, Tj, Tl, TL, TR, T2h;
Chris@42 1060 E T2O, T16, T2l, T10, T2K, Tm, Tq, T3s, T3K, T3w, T3M, T4e, T4u, T4i, T4w;
Chris@42 1061 E Ty, TE, T3h, T3j, T2q, T2u, T4l, T4n, T1v, T1B, T3E, T3G, T2B, T2F, T3Y;
Chris@42 1062 E T40, T1f, T1G, T1i, T1H, T1j, T1M, T1n, T1I, T23, T2U, T26, T2V, T27, T30;
Chris@42 1063 E T2b, T2W;
Chris@42 1064 {
Chris@42 1065 E Tw, T1A, TD, T1t, Tx, T1z, TC, T1u, TJ, T15, TQ, TY, TK, T14, TP;
Chris@42 1066 E TZ;
Chris@42 1067 {
Chris@42 1068 E T3, Tc, T6, Tb;
Chris@42 1069 T1 = W[0];
Chris@42 1070 T4 = W[1];
Chris@42 1071 T2 = W[2];
Chris@42 1072 T5 = W[3];
Chris@42 1073 T3 = T1 * T2;
Chris@42 1074 Tc = T4 * T2;
Chris@42 1075 T6 = T4 * T5;
Chris@42 1076 Tb = T1 * T5;
Chris@42 1077 T7 = T3 + T6;
Chris@42 1078 T1b = T3 - T6;
Chris@42 1079 T1d = Tb + Tc;
Chris@42 1080 Td = Tb - Tc;
Chris@42 1081 Ti = W[4];
Chris@42 1082 Tw = T1 * Ti;
Chris@42 1083 T1A = T5 * Ti;
Chris@42 1084 TD = T4 * Ti;
Chris@42 1085 T1t = T2 * Ti;
Chris@42 1086 Tk = W[5];
Chris@42 1087 Tx = T4 * Tk;
Chris@42 1088 T1z = T2 * Tk;
Chris@42 1089 TC = T1 * Tk;
Chris@42 1090 T1u = T5 * Tk;
Chris@42 1091 Tj = W[6];
Chris@42 1092 TJ = T1 * Tj;
Chris@42 1093 T15 = T5 * Tj;
Chris@42 1094 TQ = T4 * Tj;
Chris@42 1095 TY = T2 * Tj;
Chris@42 1096 Tl = W[7];
Chris@42 1097 TK = T4 * Tl;
Chris@42 1098 T14 = T2 * Tl;
Chris@42 1099 TP = T1 * Tl;
Chris@42 1100 TZ = T5 * Tl;
Chris@42 1101 }
Chris@42 1102 TL = TJ + TK;
Chris@42 1103 TR = TP - TQ;
Chris@42 1104 T2h = TJ - TK;
Chris@42 1105 T2O = T14 - T15;
Chris@42 1106 T16 = T14 + T15;
Chris@42 1107 T2l = TP + TQ;
Chris@42 1108 T10 = TY - TZ;
Chris@42 1109 T2K = TY + TZ;
Chris@42 1110 Tm = FMA(Ti, Tj, Tk * Tl);
Chris@42 1111 Tq = FNMS(Tk, Tj, Ti * Tl);
Chris@42 1112 {
Chris@42 1113 E T3q, T3r, T3u, T3v;
Chris@42 1114 T3q = T7 * Tj;
Chris@42 1115 T3r = Td * Tl;
Chris@42 1116 T3s = T3q + T3r;
Chris@42 1117 T3K = T3q - T3r;
Chris@42 1118 T3u = T7 * Tl;
Chris@42 1119 T3v = Td * Tj;
Chris@42 1120 T3w = T3u - T3v;
Chris@42 1121 T3M = T3u + T3v;
Chris@42 1122 }
Chris@42 1123 {
Chris@42 1124 E T4c, T4d, T4g, T4h;
Chris@42 1125 T4c = T1b * Tj;
Chris@42 1126 T4d = T1d * Tl;
Chris@42 1127 T4e = T4c - T4d;
Chris@42 1128 T4u = T4c + T4d;
Chris@42 1129 T4g = T1b * Tl;
Chris@42 1130 T4h = T1d * Tj;
Chris@42 1131 T4i = T4g + T4h;
Chris@42 1132 T4w = T4g - T4h;
Chris@42 1133 Ty = Tw - Tx;
Chris@42 1134 TE = TC + TD;
Chris@42 1135 T3h = FMA(Ty, Tj, TE * Tl);
Chris@42 1136 T3j = FNMS(TE, Tj, Ty * Tl);
Chris@42 1137 }
Chris@42 1138 T2q = T1t - T1u;
Chris@42 1139 T2u = T1z + T1A;
Chris@42 1140 T4l = FMA(T2q, Tj, T2u * Tl);
Chris@42 1141 T4n = FNMS(T2u, Tj, T2q * Tl);
Chris@42 1142 T1v = T1t + T1u;
Chris@42 1143 T1B = T1z - T1A;
Chris@42 1144 T3E = FMA(T1v, Tj, T1B * Tl);
Chris@42 1145 T3G = FNMS(T1B, Tj, T1v * Tl);
Chris@42 1146 T2B = Tw + Tx;
Chris@42 1147 T2F = TC - TD;
Chris@42 1148 T3Y = FMA(T2B, Tj, T2F * Tl);
Chris@42 1149 T40 = FNMS(T2F, Tj, T2B * Tl);
Chris@42 1150 {
Chris@42 1151 E T1c, T1e, T1g, T1h;
Chris@42 1152 T1c = T1b * Ti;
Chris@42 1153 T1e = T1d * Tk;
Chris@42 1154 T1f = T1c - T1e;
Chris@42 1155 T1G = T1c + T1e;
Chris@42 1156 T1g = T1b * Tk;
Chris@42 1157 T1h = T1d * Ti;
Chris@42 1158 T1i = T1g + T1h;
Chris@42 1159 T1H = T1g - T1h;
Chris@42 1160 }
Chris@42 1161 T1j = FMA(T1f, Tj, T1i * Tl);
Chris@42 1162 T1M = FNMS(T1H, Tj, T1G * Tl);
Chris@42 1163 T1n = FNMS(T1i, Tj, T1f * Tl);
Chris@42 1164 T1I = FMA(T1G, Tj, T1H * Tl);
Chris@42 1165 {
Chris@42 1166 E T21, T22, T24, T25;
Chris@42 1167 T21 = T7 * Ti;
Chris@42 1168 T22 = Td * Tk;
Chris@42 1169 T23 = T21 + T22;
Chris@42 1170 T2U = T21 - T22;
Chris@42 1171 T24 = T7 * Tk;
Chris@42 1172 T25 = Td * Ti;
Chris@42 1173 T26 = T24 - T25;
Chris@42 1174 T2V = T24 + T25;
Chris@42 1175 }
Chris@42 1176 T27 = FMA(T23, Tj, T26 * Tl);
Chris@42 1177 T30 = FNMS(T2V, Tj, T2U * Tl);
Chris@42 1178 T2b = FNMS(T26, Tj, T23 * Tl);
Chris@42 1179 T2W = FMA(T2U, Tj, T2V * Tl);
Chris@42 1180 }
Chris@42 1181 {
Chris@42 1182 E T38, T7l, T7S, T8Y, T7Z, T91, T3A, T6k, T4F, T83, T5C, T6n, T2T, T84, T4I;
Chris@42 1183 E T7m, T2g, T4M, T4P, T2z, T3T, T6m, T7O, T7V, T7j, T87, T5v, T6j, T7L, T7U;
Chris@42 1184 E T7g, T86, Tv, TW, T61, T4U, T4X, T62, T4b, T6c, T7v, T7C, T5g, T6f, T74;
Chris@42 1185 E T8G, T7s, T7B, T71, T8F, T1s, T1R, T65, T51, T54, T64, T4A, T6g, T7G, T8U;
Chris@42 1186 E T5n, T6d, T7b, T8J, T7z, T8R, T78, T8I;
Chris@42 1187 {
Chris@42 1188 E T2E, T2I, T3p, T5w, T37, T4D, T3g, T5A, T2N, T2R, T3y, T5x, T2Z, T33, T3l;
Chris@42 1189 E T5z;
Chris@42 1190 {
Chris@42 1191 E T2C, T2D, T3o, T2G, T2H, T3n;
Chris@42 1192 T2C = Ip[WS(rs, 4)];
Chris@42 1193 T2D = Im[WS(rs, 4)];
Chris@42 1194 T3o = T2C + T2D;
Chris@42 1195 T2G = Rp[WS(rs, 4)];
Chris@42 1196 T2H = Rm[WS(rs, 4)];
Chris@42 1197 T3n = T2G - T2H;
Chris@42 1198 T2E = T2C - T2D;
Chris@42 1199 T2I = T2G + T2H;
Chris@42 1200 T3p = FMA(Ti, T3n, Tk * T3o);
Chris@42 1201 T5w = FNMS(Tk, T3n, Ti * T3o);
Chris@42 1202 }
Chris@42 1203 {
Chris@42 1204 E T35, T36, T3f, T3c, T3d, T3e;
Chris@42 1205 T35 = Ip[0];
Chris@42 1206 T36 = Im[0];
Chris@42 1207 T3f = T35 + T36;
Chris@42 1208 T3c = Rm[0];
Chris@42 1209 T3d = Rp[0];
Chris@42 1210 T3e = T3c - T3d;
Chris@42 1211 T37 = T35 - T36;
Chris@42 1212 T4D = T3d + T3c;
Chris@42 1213 T3g = FNMS(T4, T3f, T1 * T3e);
Chris@42 1214 T5A = FMA(T4, T3e, T1 * T3f);
Chris@42 1215 }
Chris@42 1216 {
Chris@42 1217 E T2L, T2M, T3x, T2P, T2Q, T3t;
Chris@42 1218 T2L = Ip[WS(rs, 12)];
Chris@42 1219 T2M = Im[WS(rs, 12)];
Chris@42 1220 T3x = T2L + T2M;
Chris@42 1221 T2P = Rp[WS(rs, 12)];
Chris@42 1222 T2Q = Rm[WS(rs, 12)];
Chris@42 1223 T3t = T2P - T2Q;
Chris@42 1224 T2N = T2L - T2M;
Chris@42 1225 T2R = T2P + T2Q;
Chris@42 1226 T3y = FMA(T3s, T3t, T3w * T3x);
Chris@42 1227 T5x = FNMS(T3w, T3t, T3s * T3x);
Chris@42 1228 }
Chris@42 1229 {
Chris@42 1230 E T2X, T2Y, T3k, T31, T32, T3i;
Chris@42 1231 T2X = Ip[WS(rs, 8)];
Chris@42 1232 T2Y = Im[WS(rs, 8)];
Chris@42 1233 T3k = T2X + T2Y;
Chris@42 1234 T31 = Rp[WS(rs, 8)];
Chris@42 1235 T32 = Rm[WS(rs, 8)];
Chris@42 1236 T3i = T31 - T32;
Chris@42 1237 T2Z = T2X - T2Y;
Chris@42 1238 T33 = T31 + T32;
Chris@42 1239 T3l = FMA(T3h, T3i, T3j * T3k);
Chris@42 1240 T5z = FNMS(T3j, T3i, T3h * T3k);
Chris@42 1241 }
Chris@42 1242 {
Chris@42 1243 E T34, T7Q, T7R, T4E, T5y, T5B;
Chris@42 1244 T34 = FNMS(T30, T33, T2W * T2Z);
Chris@42 1245 T38 = T34 + T37;
Chris@42 1246 T7l = T37 - T34;
Chris@42 1247 T7Q = T3l + T3g;
Chris@42 1248 T7R = T5w - T5x;
Chris@42 1249 T7S = T7Q - T7R;
Chris@42 1250 T8Y = T7R + T7Q;
Chris@42 1251 {
Chris@42 1252 E T7X, T7Y, T3m, T3z;
Chris@42 1253 T7X = T3y - T3p;
Chris@42 1254 T7Y = T5A - T5z;
Chris@42 1255 T7Z = T7X + T7Y;
Chris@42 1256 T91 = T7Y - T7X;
Chris@42 1257 T3m = T3g - T3l;
Chris@42 1258 T3z = T3p + T3y;
Chris@42 1259 T3A = T3m - T3z;
Chris@42 1260 T6k = T3z + T3m;
Chris@42 1261 }
Chris@42 1262 T4E = FMA(T2W, T33, T30 * T2Z);
Chris@42 1263 T4F = T4D + T4E;
Chris@42 1264 T83 = T4D - T4E;
Chris@42 1265 T5y = T5w + T5x;
Chris@42 1266 T5B = T5z + T5A;
Chris@42 1267 T5C = T5y + T5B;
Chris@42 1268 T6n = T5B - T5y;
Chris@42 1269 {
Chris@42 1270 E T2J, T2S, T4G, T4H;
Chris@42 1271 T2J = FNMS(T2F, T2I, T2B * T2E);
Chris@42 1272 T2S = FNMS(T2O, T2R, T2K * T2N);
Chris@42 1273 T2T = T2J + T2S;
Chris@42 1274 T84 = T2J - T2S;
Chris@42 1275 T4G = FMA(T2B, T2I, T2F * T2E);
Chris@42 1276 T4H = FMA(T2K, T2R, T2O * T2N);
Chris@42 1277 T4I = T4G + T4H;
Chris@42 1278 T7m = T4G - T4H;
Chris@42 1279 }
Chris@42 1280 }
Chris@42 1281 }
Chris@42 1282 {
Chris@42 1283 E T20, T5p, T3D, T4K, T2y, T5t, T3R, T4O, T2f, T5q, T3I, T4L, T2p, T5s, T3O;
Chris@42 1284 E T4N;
Chris@42 1285 {
Chris@42 1286 E T1W, T3C, T1Z, T3B;
Chris@42 1287 {
Chris@42 1288 E T1U, T1V, T1X, T1Y;
Chris@42 1289 T1U = Ip[WS(rs, 2)];
Chris@42 1290 T1V = Im[WS(rs, 2)];
Chris@42 1291 T1W = T1U - T1V;
Chris@42 1292 T3C = T1U + T1V;
Chris@42 1293 T1X = Rp[WS(rs, 2)];
Chris@42 1294 T1Y = Rm[WS(rs, 2)];
Chris@42 1295 T1Z = T1X + T1Y;
Chris@42 1296 T3B = T1X - T1Y;
Chris@42 1297 }
Chris@42 1298 T20 = FNMS(T1d, T1Z, T1b * T1W);
Chris@42 1299 T5p = FNMS(T1H, T3B, T1G * T3C);
Chris@42 1300 T3D = FMA(T1G, T3B, T1H * T3C);
Chris@42 1301 T4K = FMA(T1b, T1Z, T1d * T1W);
Chris@42 1302 }
Chris@42 1303 {
Chris@42 1304 E T2t, T3Q, T2x, T3P;
Chris@42 1305 {
Chris@42 1306 E T2r, T2s, T2v, T2w;
Chris@42 1307 T2r = Ip[WS(rs, 6)];
Chris@42 1308 T2s = Im[WS(rs, 6)];
Chris@42 1309 T2t = T2r - T2s;
Chris@42 1310 T3Q = T2r + T2s;
Chris@42 1311 T2v = Rp[WS(rs, 6)];
Chris@42 1312 T2w = Rm[WS(rs, 6)];
Chris@42 1313 T2x = T2v + T2w;
Chris@42 1314 T3P = T2v - T2w;
Chris@42 1315 }
Chris@42 1316 T2y = FNMS(T2u, T2x, T2q * T2t);
Chris@42 1317 T5t = FNMS(T1i, T3P, T1f * T3Q);
Chris@42 1318 T3R = FMA(T1f, T3P, T1i * T3Q);
Chris@42 1319 T4O = FMA(T2q, T2x, T2u * T2t);
Chris@42 1320 }
Chris@42 1321 {
Chris@42 1322 E T2a, T3H, T2e, T3F;
Chris@42 1323 {
Chris@42 1324 E T28, T29, T2c, T2d;
Chris@42 1325 T28 = Ip[WS(rs, 10)];
Chris@42 1326 T29 = Im[WS(rs, 10)];
Chris@42 1327 T2a = T28 - T29;
Chris@42 1328 T3H = T28 + T29;
Chris@42 1329 T2c = Rp[WS(rs, 10)];
Chris@42 1330 T2d = Rm[WS(rs, 10)];
Chris@42 1331 T2e = T2c + T2d;
Chris@42 1332 T3F = T2c - T2d;
Chris@42 1333 }
Chris@42 1334 T2f = FNMS(T2b, T2e, T27 * T2a);
Chris@42 1335 T5q = FNMS(T3G, T3F, T3E * T3H);
Chris@42 1336 T3I = FMA(T3E, T3F, T3G * T3H);
Chris@42 1337 T4L = FMA(T27, T2e, T2b * T2a);
Chris@42 1338 }
Chris@42 1339 {
Chris@42 1340 E T2k, T3N, T2o, T3L;
Chris@42 1341 {
Chris@42 1342 E T2i, T2j, T2m, T2n;
Chris@42 1343 T2i = Ip[WS(rs, 14)];
Chris@42 1344 T2j = Im[WS(rs, 14)];
Chris@42 1345 T2k = T2i - T2j;
Chris@42 1346 T3N = T2i + T2j;
Chris@42 1347 T2m = Rp[WS(rs, 14)];
Chris@42 1348 T2n = Rm[WS(rs, 14)];
Chris@42 1349 T2o = T2m + T2n;
Chris@42 1350 T3L = T2m - T2n;
Chris@42 1351 }
Chris@42 1352 T2p = FNMS(T2l, T2o, T2h * T2k);
Chris@42 1353 T5s = FNMS(T3M, T3L, T3K * T3N);
Chris@42 1354 T3O = FMA(T3K, T3L, T3M * T3N);
Chris@42 1355 T4N = FMA(T2h, T2o, T2l * T2k);
Chris@42 1356 }
Chris@42 1357 {
Chris@42 1358 E T3J, T3S, T5r, T5u;
Chris@42 1359 T2g = T20 + T2f;
Chris@42 1360 T4M = T4K + T4L;
Chris@42 1361 T4P = T4N + T4O;
Chris@42 1362 T2z = T2p + T2y;
Chris@42 1363 T3J = T3D + T3I;
Chris@42 1364 T3S = T3O + T3R;
Chris@42 1365 T3T = T3J + T3S;
Chris@42 1366 T6m = T3S - T3J;
Chris@42 1367 {
Chris@42 1368 E T7M, T7N, T7h, T7i;
Chris@42 1369 T7M = T5s - T5t;
Chris@42 1370 T7N = T3R - T3O;
Chris@42 1371 T7O = T7M + T7N;
Chris@42 1372 T7V = T7M - T7N;
Chris@42 1373 T7h = T4N - T4O;
Chris@42 1374 T7i = T2p - T2y;
Chris@42 1375 T7j = T7h + T7i;
Chris@42 1376 T87 = T7h - T7i;
Chris@42 1377 }
Chris@42 1378 T5r = T5p + T5q;
Chris@42 1379 T5u = T5s + T5t;
Chris@42 1380 T5v = T5r + T5u;
Chris@42 1381 T6j = T5u - T5r;
Chris@42 1382 {
Chris@42 1383 E T7J, T7K, T7e, T7f;
Chris@42 1384 T7J = T3I - T3D;
Chris@42 1385 T7K = T5p - T5q;
Chris@42 1386 T7L = T7J - T7K;
Chris@42 1387 T7U = T7K + T7J;
Chris@42 1388 T7e = T20 - T2f;
Chris@42 1389 T7f = T4K - T4L;
Chris@42 1390 T7g = T7e - T7f;
Chris@42 1391 T86 = T7f + T7e;
Chris@42 1392 }
Chris@42 1393 }
Chris@42 1394 }
Chris@42 1395 {
Chris@42 1396 E Th, T5a, T3X, T4S, TV, T5e, T49, T4W, Tu, T5b, T42, T4T, TI, T5d, T46;
Chris@42 1397 E T4V;
Chris@42 1398 {
Chris@42 1399 E Ta, T3W, Tg, T3V;
Chris@42 1400 {
Chris@42 1401 E T8, T9, Te, Tf;
Chris@42 1402 T8 = Ip[WS(rs, 1)];
Chris@42 1403 T9 = Im[WS(rs, 1)];
Chris@42 1404 Ta = T8 - T9;
Chris@42 1405 T3W = T8 + T9;
Chris@42 1406 Te = Rp[WS(rs, 1)];
Chris@42 1407 Tf = Rm[WS(rs, 1)];
Chris@42 1408 Tg = Te + Tf;
Chris@42 1409 T3V = Te - Tf;
Chris@42 1410 }
Chris@42 1411 Th = FNMS(Td, Tg, T7 * Ta);
Chris@42 1412 T5a = FNMS(T5, T3V, T2 * T3W);
Chris@42 1413 T3X = FMA(T2, T3V, T5 * T3W);
Chris@42 1414 T4S = FMA(T7, Tg, Td * Ta);
Chris@42 1415 }
Chris@42 1416 {
Chris@42 1417 E TO, T48, TU, T47;
Chris@42 1418 {
Chris@42 1419 E TM, TN, TS, TT;
Chris@42 1420 TM = Ip[WS(rs, 13)];
Chris@42 1421 TN = Im[WS(rs, 13)];
Chris@42 1422 TO = TM - TN;
Chris@42 1423 T48 = TM + TN;
Chris@42 1424 TS = Rp[WS(rs, 13)];
Chris@42 1425 TT = Rm[WS(rs, 13)];
Chris@42 1426 TU = TS + TT;
Chris@42 1427 T47 = TS - TT;
Chris@42 1428 }
Chris@42 1429 TV = FNMS(TR, TU, TL * TO);
Chris@42 1430 T5e = FNMS(Tl, T47, Tj * T48);
Chris@42 1431 T49 = FMA(Tj, T47, Tl * T48);
Chris@42 1432 T4W = FMA(TL, TU, TR * TO);
Chris@42 1433 }
Chris@42 1434 {
Chris@42 1435 E Tp, T41, Tt, T3Z;
Chris@42 1436 {
Chris@42 1437 E Tn, To, Tr, Ts;
Chris@42 1438 Tn = Ip[WS(rs, 9)];
Chris@42 1439 To = Im[WS(rs, 9)];
Chris@42 1440 Tp = Tn - To;
Chris@42 1441 T41 = Tn + To;
Chris@42 1442 Tr = Rp[WS(rs, 9)];
Chris@42 1443 Ts = Rm[WS(rs, 9)];
Chris@42 1444 Tt = Tr + Ts;
Chris@42 1445 T3Z = Tr - Ts;
Chris@42 1446 }
Chris@42 1447 Tu = FNMS(Tq, Tt, Tm * Tp);
Chris@42 1448 T5b = FNMS(T40, T3Z, T3Y * T41);
Chris@42 1449 T42 = FMA(T3Y, T3Z, T40 * T41);
Chris@42 1450 T4T = FMA(Tm, Tt, Tq * Tp);
Chris@42 1451 }
Chris@42 1452 {
Chris@42 1453 E TB, T45, TH, T44;
Chris@42 1454 {
Chris@42 1455 E Tz, TA, TF, TG;
Chris@42 1456 Tz = Ip[WS(rs, 5)];
Chris@42 1457 TA = Im[WS(rs, 5)];
Chris@42 1458 TB = Tz - TA;
Chris@42 1459 T45 = Tz + TA;
Chris@42 1460 TF = Rp[WS(rs, 5)];
Chris@42 1461 TG = Rm[WS(rs, 5)];
Chris@42 1462 TH = TF + TG;
Chris@42 1463 T44 = TF - TG;
Chris@42 1464 }
Chris@42 1465 TI = FNMS(TE, TH, Ty * TB);
Chris@42 1466 T5d = FNMS(T2V, T44, T2U * T45);
Chris@42 1467 T46 = FMA(T2U, T44, T2V * T45);
Chris@42 1468 T4V = FMA(Ty, TH, TE * TB);
Chris@42 1469 }
Chris@42 1470 Tv = Th + Tu;
Chris@42 1471 TW = TI + TV;
Chris@42 1472 T61 = Tv - TW;
Chris@42 1473 T4U = T4S + T4T;
Chris@42 1474 T4X = T4V + T4W;
Chris@42 1475 T62 = T4U - T4X;
Chris@42 1476 {
Chris@42 1477 E T43, T4a, T7t, T7u;
Chris@42 1478 T43 = T3X + T42;
Chris@42 1479 T4a = T46 + T49;
Chris@42 1480 T4b = T43 + T4a;
Chris@42 1481 T6c = T4a - T43;
Chris@42 1482 T7t = T5e - T5d;
Chris@42 1483 T7u = T46 - T49;
Chris@42 1484 T7v = T7t + T7u;
Chris@42 1485 T7C = T7t - T7u;
Chris@42 1486 }
Chris@42 1487 {
Chris@42 1488 E T5c, T5f, T72, T73;
Chris@42 1489 T5c = T5a + T5b;
Chris@42 1490 T5f = T5d + T5e;
Chris@42 1491 T5g = T5c + T5f;
Chris@42 1492 T6f = T5f - T5c;
Chris@42 1493 T72 = T4S - T4T;
Chris@42 1494 T73 = TI - TV;
Chris@42 1495 T74 = T72 + T73;
Chris@42 1496 T8G = T72 - T73;
Chris@42 1497 }
Chris@42 1498 {
Chris@42 1499 E T7q, T7r, T6Z, T70;
Chris@42 1500 T7q = T42 - T3X;
Chris@42 1501 T7r = T5a - T5b;
Chris@42 1502 T7s = T7q - T7r;
Chris@42 1503 T7B = T7r + T7q;
Chris@42 1504 T6Z = Th - Tu;
Chris@42 1505 T70 = T4V - T4W;
Chris@42 1506 T71 = T6Z - T70;
Chris@42 1507 T8F = T6Z + T70;
Chris@42 1508 }
Chris@42 1509 }
Chris@42 1510 {
Chris@42 1511 E T1a, T5h, T4k, T4Z, T1Q, T5l, T4y, T53, T1r, T5i, T4p, T50, T1F, T5k, T4t;
Chris@42 1512 E T52;
Chris@42 1513 {
Chris@42 1514 E T13, T4j, T19, T4f;
Chris@42 1515 {
Chris@42 1516 E T11, T12, T17, T18;
Chris@42 1517 T11 = Ip[WS(rs, 15)];
Chris@42 1518 T12 = Im[WS(rs, 15)];
Chris@42 1519 T13 = T11 - T12;
Chris@42 1520 T4j = T11 + T12;
Chris@42 1521 T17 = Rp[WS(rs, 15)];
Chris@42 1522 T18 = Rm[WS(rs, 15)];
Chris@42 1523 T19 = T17 + T18;
Chris@42 1524 T4f = T17 - T18;
Chris@42 1525 }
Chris@42 1526 T1a = FNMS(T16, T19, T10 * T13);
Chris@42 1527 T5h = FNMS(T4i, T4f, T4e * T4j);
Chris@42 1528 T4k = FMA(T4e, T4f, T4i * T4j);
Chris@42 1529 T4Z = FMA(T10, T19, T16 * T13);
Chris@42 1530 }
Chris@42 1531 {
Chris@42 1532 E T1L, T4x, T1P, T4v;
Chris@42 1533 {
Chris@42 1534 E T1J, T1K, T1N, T1O;
Chris@42 1535 T1J = Ip[WS(rs, 11)];
Chris@42 1536 T1K = Im[WS(rs, 11)];
Chris@42 1537 T1L = T1J - T1K;
Chris@42 1538 T4x = T1J + T1K;
Chris@42 1539 T1N = Rp[WS(rs, 11)];
Chris@42 1540 T1O = Rm[WS(rs, 11)];
Chris@42 1541 T1P = T1N + T1O;
Chris@42 1542 T4v = T1N - T1O;
Chris@42 1543 }
Chris@42 1544 T1Q = FNMS(T1M, T1P, T1I * T1L);
Chris@42 1545 T5l = FNMS(T4w, T4v, T4u * T4x);
Chris@42 1546 T4y = FMA(T4u, T4v, T4w * T4x);
Chris@42 1547 T53 = FMA(T1I, T1P, T1M * T1L);
Chris@42 1548 }
Chris@42 1549 {
Chris@42 1550 E T1m, T4o, T1q, T4m;
Chris@42 1551 {
Chris@42 1552 E T1k, T1l, T1o, T1p;
Chris@42 1553 T1k = Ip[WS(rs, 7)];
Chris@42 1554 T1l = Im[WS(rs, 7)];
Chris@42 1555 T1m = T1k - T1l;
Chris@42 1556 T4o = T1k + T1l;
Chris@42 1557 T1o = Rp[WS(rs, 7)];
Chris@42 1558 T1p = Rm[WS(rs, 7)];
Chris@42 1559 T1q = T1o + T1p;
Chris@42 1560 T4m = T1o - T1p;
Chris@42 1561 }
Chris@42 1562 T1r = FNMS(T1n, T1q, T1j * T1m);
Chris@42 1563 T5i = FNMS(T4n, T4m, T4l * T4o);
Chris@42 1564 T4p = FMA(T4l, T4m, T4n * T4o);
Chris@42 1565 T50 = FMA(T1j, T1q, T1n * T1m);
Chris@42 1566 }
Chris@42 1567 {
Chris@42 1568 E T1y, T4s, T1E, T4r;
Chris@42 1569 {
Chris@42 1570 E T1w, T1x, T1C, T1D;
Chris@42 1571 T1w = Ip[WS(rs, 3)];
Chris@42 1572 T1x = Im[WS(rs, 3)];
Chris@42 1573 T1y = T1w - T1x;
Chris@42 1574 T4s = T1w + T1x;
Chris@42 1575 T1C = Rp[WS(rs, 3)];
Chris@42 1576 T1D = Rm[WS(rs, 3)];
Chris@42 1577 T1E = T1C + T1D;
Chris@42 1578 T4r = T1C - T1D;
Chris@42 1579 }
Chris@42 1580 T1F = FNMS(T1B, T1E, T1v * T1y);
Chris@42 1581 T5k = FNMS(T26, T4r, T23 * T4s);
Chris@42 1582 T4t = FMA(T23, T4r, T26 * T4s);
Chris@42 1583 T52 = FMA(T1v, T1E, T1B * T1y);
Chris@42 1584 }
Chris@42 1585 T1s = T1a + T1r;
Chris@42 1586 T1R = T1F + T1Q;
Chris@42 1587 T65 = T1s - T1R;
Chris@42 1588 T51 = T4Z + T50;
Chris@42 1589 T54 = T52 + T53;
Chris@42 1590 T64 = T51 - T54;
Chris@42 1591 {
Chris@42 1592 E T4q, T4z, T7E, T7F;
Chris@42 1593 T4q = T4k + T4p;
Chris@42 1594 T4z = T4t + T4y;
Chris@42 1595 T4A = T4q + T4z;
Chris@42 1596 T6g = T4z - T4q;
Chris@42 1597 T7E = T5h - T5i;
Chris@42 1598 T7F = T4y - T4t;
Chris@42 1599 T7G = T7E + T7F;
Chris@42 1600 T8U = T7E - T7F;
Chris@42 1601 }
Chris@42 1602 {
Chris@42 1603 E T5j, T5m, T79, T7a;
Chris@42 1604 T5j = T5h + T5i;
Chris@42 1605 T5m = T5k + T5l;
Chris@42 1606 T5n = T5j + T5m;
Chris@42 1607 T6d = T5j - T5m;
Chris@42 1608 T79 = T4Z - T50;
Chris@42 1609 T7a = T1F - T1Q;
Chris@42 1610 T7b = T79 + T7a;
Chris@42 1611 T8J = T79 - T7a;
Chris@42 1612 }
Chris@42 1613 {
Chris@42 1614 E T7x, T7y, T76, T77;
Chris@42 1615 T7x = T4p - T4k;
Chris@42 1616 T7y = T5k - T5l;
Chris@42 1617 T7z = T7x - T7y;
Chris@42 1618 T8R = T7x + T7y;
Chris@42 1619 T76 = T1a - T1r;
Chris@42 1620 T77 = T52 - T53;
Chris@42 1621 T78 = T76 - T77;
Chris@42 1622 T8I = T76 + T77;
Chris@42 1623 }
Chris@42 1624 }
Chris@42 1625 {
Chris@42 1626 E T1T, T5S, T5M, T5W, T5P, T5X, T3a, T5I, T4C, T58, T56, T5H, T5E, T5G, T4R;
Chris@42 1627 E T5R;
Chris@42 1628 {
Chris@42 1629 E TX, T1S, T5K, T5L;
Chris@42 1630 TX = Tv + TW;
Chris@42 1631 T1S = T1s + T1R;
Chris@42 1632 T1T = TX + T1S;
Chris@42 1633 T5S = TX - T1S;
Chris@42 1634 T5K = T5n - T5g;
Chris@42 1635 T5L = T4b - T4A;
Chris@42 1636 T5M = T5K + T5L;
Chris@42 1637 T5W = T5K - T5L;
Chris@42 1638 }
Chris@42 1639 {
Chris@42 1640 E T5N, T5O, T2A, T39;
Chris@42 1641 T5N = T3T + T3A;
Chris@42 1642 T5O = T5C - T5v;
Chris@42 1643 T5P = T5N - T5O;
Chris@42 1644 T5X = T5N + T5O;
Chris@42 1645 T2A = T2g + T2z;
Chris@42 1646 T39 = T2T + T38;
Chris@42 1647 T3a = T2A + T39;
Chris@42 1648 T5I = T39 - T2A;
Chris@42 1649 }
Chris@42 1650 {
Chris@42 1651 E T3U, T4B, T4Y, T55;
Chris@42 1652 T3U = T3A - T3T;
Chris@42 1653 T4B = T4b + T4A;
Chris@42 1654 T4C = T3U - T4B;
Chris@42 1655 T58 = T4B + T3U;
Chris@42 1656 T4Y = T4U + T4X;
Chris@42 1657 T55 = T51 + T54;
Chris@42 1658 T56 = T4Y + T55;
Chris@42 1659 T5H = T55 - T4Y;
Chris@42 1660 }
Chris@42 1661 {
Chris@42 1662 E T5o, T5D, T4J, T4Q;
Chris@42 1663 T5o = T5g + T5n;
Chris@42 1664 T5D = T5v + T5C;
Chris@42 1665 T5E = T5o - T5D;
Chris@42 1666 T5G = T5o + T5D;
Chris@42 1667 T4J = T4F + T4I;
Chris@42 1668 T4Q = T4M + T4P;
Chris@42 1669 T4R = T4J + T4Q;
Chris@42 1670 T5R = T4J - T4Q;
Chris@42 1671 }
Chris@42 1672 {
Chris@42 1673 E T3b, T5F, T57, T59;
Chris@42 1674 T3b = T1T + T3a;
Chris@42 1675 Ip[0] = KP500000000 * (T3b + T4C);
Chris@42 1676 Im[WS(rs, 15)] = KP500000000 * (T4C - T3b);
Chris@42 1677 T5F = T4R + T56;
Chris@42 1678 Rm[WS(rs, 15)] = KP500000000 * (T5F - T5G);
Chris@42 1679 Rp[0] = KP500000000 * (T5F + T5G);
Chris@42 1680 T57 = T4R - T56;
Chris@42 1681 Rm[WS(rs, 7)] = KP500000000 * (T57 - T58);
Chris@42 1682 Rp[WS(rs, 8)] = KP500000000 * (T57 + T58);
Chris@42 1683 T59 = T3a - T1T;
Chris@42 1684 Ip[WS(rs, 8)] = KP500000000 * (T59 + T5E);
Chris@42 1685 Im[WS(rs, 7)] = KP500000000 * (T5E - T59);
Chris@42 1686 }
Chris@42 1687 {
Chris@42 1688 E T5J, T5Q, T5Z, T60;
Chris@42 1689 T5J = KP500000000 * (T5H + T5I);
Chris@42 1690 T5Q = KP353553390 * (T5M + T5P);
Chris@42 1691 Ip[WS(rs, 4)] = T5J + T5Q;
Chris@42 1692 Im[WS(rs, 11)] = T5Q - T5J;
Chris@42 1693 T5Z = KP500000000 * (T5R + T5S);
Chris@42 1694 T60 = KP353553390 * (T5W + T5X);
Chris@42 1695 Rm[WS(rs, 11)] = T5Z - T60;
Chris@42 1696 Rp[WS(rs, 4)] = T5Z + T60;
Chris@42 1697 }
Chris@42 1698 {
Chris@42 1699 E T5T, T5U, T5V, T5Y;
Chris@42 1700 T5T = KP500000000 * (T5R - T5S);
Chris@42 1701 T5U = KP353553390 * (T5P - T5M);
Chris@42 1702 Rm[WS(rs, 3)] = T5T - T5U;
Chris@42 1703 Rp[WS(rs, 12)] = T5T + T5U;
Chris@42 1704 T5V = KP500000000 * (T5I - T5H);
Chris@42 1705 T5Y = KP353553390 * (T5W - T5X);
Chris@42 1706 Ip[WS(rs, 12)] = T5V + T5Y;
Chris@42 1707 Im[WS(rs, 3)] = T5Y - T5V;
Chris@42 1708 }
Chris@42 1709 }
Chris@42 1710 {
Chris@42 1711 E T67, T6Q, T6K, T6U, T6N, T6V, T6a, T6G, T6i, T6A, T6t, T6P, T6w, T6F, T6p;
Chris@42 1712 E T6B;
Chris@42 1713 {
Chris@42 1714 E T63, T66, T6I, T6J;
Chris@42 1715 T63 = T61 - T62;
Chris@42 1716 T66 = T64 + T65;
Chris@42 1717 T67 = KP353553390 * (T63 + T66);
Chris@42 1718 T6Q = KP353553390 * (T63 - T66);
Chris@42 1719 T6I = T6d - T6c;
Chris@42 1720 T6J = T6g - T6f;
Chris@42 1721 T6K = FMA(KP461939766, T6I, KP191341716 * T6J);
Chris@42 1722 T6U = FNMS(KP461939766, T6J, KP191341716 * T6I);
Chris@42 1723 }
Chris@42 1724 {
Chris@42 1725 E T6L, T6M, T68, T69;
Chris@42 1726 T6L = T6k - T6j;
Chris@42 1727 T6M = T6n - T6m;
Chris@42 1728 T6N = FNMS(KP461939766, T6M, KP191341716 * T6L);
Chris@42 1729 T6V = FMA(KP461939766, T6L, KP191341716 * T6M);
Chris@42 1730 T68 = T4P - T4M;
Chris@42 1731 T69 = T38 - T2T;
Chris@42 1732 T6a = KP500000000 * (T68 + T69);
Chris@42 1733 T6G = KP500000000 * (T69 - T68);
Chris@42 1734 }
Chris@42 1735 {
Chris@42 1736 E T6e, T6h, T6r, T6s;
Chris@42 1737 T6e = T6c + T6d;
Chris@42 1738 T6h = T6f + T6g;
Chris@42 1739 T6i = FMA(KP191341716, T6e, KP461939766 * T6h);
Chris@42 1740 T6A = FNMS(KP191341716, T6h, KP461939766 * T6e);
Chris@42 1741 T6r = T4F - T4I;
Chris@42 1742 T6s = T2g - T2z;
Chris@42 1743 T6t = KP500000000 * (T6r + T6s);
Chris@42 1744 T6P = KP500000000 * (T6r - T6s);
Chris@42 1745 }
Chris@42 1746 {
Chris@42 1747 E T6u, T6v, T6l, T6o;
Chris@42 1748 T6u = T62 + T61;
Chris@42 1749 T6v = T64 - T65;
Chris@42 1750 T6w = KP353553390 * (T6u + T6v);
Chris@42 1751 T6F = KP353553390 * (T6v - T6u);
Chris@42 1752 T6l = T6j + T6k;
Chris@42 1753 T6o = T6m + T6n;
Chris@42 1754 T6p = FNMS(KP191341716, T6o, KP461939766 * T6l);
Chris@42 1755 T6B = FMA(KP191341716, T6l, KP461939766 * T6o);
Chris@42 1756 }
Chris@42 1757 {
Chris@42 1758 E T6b, T6q, T6D, T6E;
Chris@42 1759 T6b = T67 + T6a;
Chris@42 1760 T6q = T6i + T6p;
Chris@42 1761 Ip[WS(rs, 2)] = T6b + T6q;
Chris@42 1762 Im[WS(rs, 13)] = T6q - T6b;
Chris@42 1763 T6D = T6t + T6w;
Chris@42 1764 T6E = T6A + T6B;
Chris@42 1765 Rm[WS(rs, 13)] = T6D - T6E;
Chris@42 1766 Rp[WS(rs, 2)] = T6D + T6E;
Chris@42 1767 }
Chris@42 1768 {
Chris@42 1769 E T6x, T6y, T6z, T6C;
Chris@42 1770 T6x = T6t - T6w;
Chris@42 1771 T6y = T6p - T6i;
Chris@42 1772 Rm[WS(rs, 5)] = T6x - T6y;
Chris@42 1773 Rp[WS(rs, 10)] = T6x + T6y;
Chris@42 1774 T6z = T6a - T67;
Chris@42 1775 T6C = T6A - T6B;
Chris@42 1776 Ip[WS(rs, 10)] = T6z + T6C;
Chris@42 1777 Im[WS(rs, 5)] = T6C - T6z;
Chris@42 1778 }
Chris@42 1779 {
Chris@42 1780 E T6H, T6O, T6X, T6Y;
Chris@42 1781 T6H = T6F + T6G;
Chris@42 1782 T6O = T6K + T6N;
Chris@42 1783 Ip[WS(rs, 6)] = T6H + T6O;
Chris@42 1784 Im[WS(rs, 9)] = T6O - T6H;
Chris@42 1785 T6X = T6P + T6Q;
Chris@42 1786 T6Y = T6U + T6V;
Chris@42 1787 Rm[WS(rs, 9)] = T6X - T6Y;
Chris@42 1788 Rp[WS(rs, 6)] = T6X + T6Y;
Chris@42 1789 }
Chris@42 1790 {
Chris@42 1791 E T6R, T6S, T6T, T6W;
Chris@42 1792 T6R = T6P - T6Q;
Chris@42 1793 T6S = T6N - T6K;
Chris@42 1794 Rm[WS(rs, 1)] = T6R - T6S;
Chris@42 1795 Rp[WS(rs, 14)] = T6R + T6S;
Chris@42 1796 T6T = T6G - T6F;
Chris@42 1797 T6W = T6U - T6V;
Chris@42 1798 Ip[WS(rs, 14)] = T6T + T6W;
Chris@42 1799 Im[WS(rs, 1)] = T6W - T6T;
Chris@42 1800 }
Chris@42 1801 }
Chris@42 1802 {
Chris@42 1803 E T7d, T8w, T7o, T8m, T8c, T8l, T89, T8v, T81, T8B, T8h, T8t, T7I, T8A, T8g;
Chris@42 1804 E T8q;
Chris@42 1805 {
Chris@42 1806 E T75, T7c, T85, T88;
Chris@42 1807 T75 = FNMS(KP191341716, T74, KP461939766 * T71);
Chris@42 1808 T7c = FMA(KP461939766, T78, KP191341716 * T7b);
Chris@42 1809 T7d = T75 + T7c;
Chris@42 1810 T8w = T75 - T7c;
Chris@42 1811 {
Chris@42 1812 E T7k, T7n, T8a, T8b;
Chris@42 1813 T7k = KP353553390 * (T7g + T7j);
Chris@42 1814 T7n = KP500000000 * (T7l - T7m);
Chris@42 1815 T7o = T7k + T7n;
Chris@42 1816 T8m = T7n - T7k;
Chris@42 1817 T8a = FMA(KP191341716, T71, KP461939766 * T74);
Chris@42 1818 T8b = FNMS(KP191341716, T78, KP461939766 * T7b);
Chris@42 1819 T8c = T8a + T8b;
Chris@42 1820 T8l = T8b - T8a;
Chris@42 1821 }
Chris@42 1822 T85 = KP500000000 * (T83 + T84);
Chris@42 1823 T88 = KP353553390 * (T86 + T87);
Chris@42 1824 T89 = T85 + T88;
Chris@42 1825 T8v = T85 - T88;
Chris@42 1826 {
Chris@42 1827 E T7T, T8r, T80, T8s, T7P, T7W;
Chris@42 1828 T7P = KP707106781 * (T7L + T7O);
Chris@42 1829 T7T = T7P + T7S;
Chris@42 1830 T8r = T7S - T7P;
Chris@42 1831 T7W = KP707106781 * (T7U + T7V);
Chris@42 1832 T80 = T7W + T7Z;
Chris@42 1833 T8s = T7Z - T7W;
Chris@42 1834 T81 = FNMS(KP097545161, T80, KP490392640 * T7T);
Chris@42 1835 T8B = FMA(KP415734806, T8r, KP277785116 * T8s);
Chris@42 1836 T8h = FMA(KP097545161, T7T, KP490392640 * T80);
Chris@42 1837 T8t = FNMS(KP415734806, T8s, KP277785116 * T8r);
Chris@42 1838 }
Chris@42 1839 {
Chris@42 1840 E T7A, T8o, T7H, T8p, T7w, T7D;
Chris@42 1841 T7w = KP707106781 * (T7s + T7v);
Chris@42 1842 T7A = T7w + T7z;
Chris@42 1843 T8o = T7z - T7w;
Chris@42 1844 T7D = KP707106781 * (T7B + T7C);
Chris@42 1845 T7H = T7D + T7G;
Chris@42 1846 T8p = T7G - T7D;
Chris@42 1847 T7I = FMA(KP490392640, T7A, KP097545161 * T7H);
Chris@42 1848 T8A = FNMS(KP415734806, T8o, KP277785116 * T8p);
Chris@42 1849 T8g = FNMS(KP097545161, T7A, KP490392640 * T7H);
Chris@42 1850 T8q = FMA(KP277785116, T8o, KP415734806 * T8p);
Chris@42 1851 }
Chris@42 1852 }
Chris@42 1853 {
Chris@42 1854 E T7p, T82, T8j, T8k;
Chris@42 1855 T7p = T7d + T7o;
Chris@42 1856 T82 = T7I + T81;
Chris@42 1857 Ip[WS(rs, 1)] = T7p + T82;
Chris@42 1858 Im[WS(rs, 14)] = T82 - T7p;
Chris@42 1859 T8j = T89 + T8c;
Chris@42 1860 T8k = T8g + T8h;
Chris@42 1861 Rm[WS(rs, 14)] = T8j - T8k;
Chris@42 1862 Rp[WS(rs, 1)] = T8j + T8k;
Chris@42 1863 }
Chris@42 1864 {
Chris@42 1865 E T8d, T8e, T8f, T8i;
Chris@42 1866 T8d = T89 - T8c;
Chris@42 1867 T8e = T81 - T7I;
Chris@42 1868 Rm[WS(rs, 6)] = T8d - T8e;
Chris@42 1869 Rp[WS(rs, 9)] = T8d + T8e;
Chris@42 1870 T8f = T7o - T7d;
Chris@42 1871 T8i = T8g - T8h;
Chris@42 1872 Ip[WS(rs, 9)] = T8f + T8i;
Chris@42 1873 Im[WS(rs, 6)] = T8i - T8f;
Chris@42 1874 }
Chris@42 1875 {
Chris@42 1876 E T8n, T8u, T8D, T8E;
Chris@42 1877 T8n = T8l + T8m;
Chris@42 1878 T8u = T8q + T8t;
Chris@42 1879 Ip[WS(rs, 5)] = T8n + T8u;
Chris@42 1880 Im[WS(rs, 10)] = T8u - T8n;
Chris@42 1881 T8D = T8v + T8w;
Chris@42 1882 T8E = T8A + T8B;
Chris@42 1883 Rm[WS(rs, 10)] = T8D - T8E;
Chris@42 1884 Rp[WS(rs, 5)] = T8D + T8E;
Chris@42 1885 }
Chris@42 1886 {
Chris@42 1887 E T8x, T8y, T8z, T8C;
Chris@42 1888 T8x = T8v - T8w;
Chris@42 1889 T8y = T8t - T8q;
Chris@42 1890 Rm[WS(rs, 2)] = T8x - T8y;
Chris@42 1891 Rp[WS(rs, 13)] = T8x + T8y;
Chris@42 1892 T8z = T8m - T8l;
Chris@42 1893 T8C = T8A - T8B;
Chris@42 1894 Ip[WS(rs, 13)] = T8z + T8C;
Chris@42 1895 Im[WS(rs, 2)] = T8C - T8z;
Chris@42 1896 }
Chris@42 1897 }
Chris@42 1898 {
Chris@42 1899 E T8L, T9u, T8O, T9k, T9a, T9j, T97, T9t, T93, T9z, T9f, T9r, T8W, T9y, T9e;
Chris@42 1900 E T9o;
Chris@42 1901 {
Chris@42 1902 E T8H, T8K, T95, T96;
Chris@42 1903 T8H = FNMS(KP461939766, T8G, KP191341716 * T8F);
Chris@42 1904 T8K = FMA(KP191341716, T8I, KP461939766 * T8J);
Chris@42 1905 T8L = T8H + T8K;
Chris@42 1906 T9u = T8H - T8K;
Chris@42 1907 {
Chris@42 1908 E T8M, T8N, T98, T99;
Chris@42 1909 T8M = KP353553390 * (T87 - T86);
Chris@42 1910 T8N = KP500000000 * (T7m + T7l);
Chris@42 1911 T8O = T8M + T8N;
Chris@42 1912 T9k = T8N - T8M;
Chris@42 1913 T98 = FMA(KP461939766, T8F, KP191341716 * T8G);
Chris@42 1914 T99 = FNMS(KP461939766, T8I, KP191341716 * T8J);
Chris@42 1915 T9a = T98 + T99;
Chris@42 1916 T9j = T99 - T98;
Chris@42 1917 }
Chris@42 1918 T95 = KP500000000 * (T83 - T84);
Chris@42 1919 T96 = KP353553390 * (T7g - T7j);
Chris@42 1920 T97 = T95 + T96;
Chris@42 1921 T9t = T95 - T96;
Chris@42 1922 {
Chris@42 1923 E T8Z, T9p, T92, T9q, T8X, T90;
Chris@42 1924 T8X = KP707106781 * (T7V - T7U);
Chris@42 1925 T8Z = T8X + T8Y;
Chris@42 1926 T9p = T8Y - T8X;
Chris@42 1927 T90 = KP707106781 * (T7L - T7O);
Chris@42 1928 T92 = T90 + T91;
Chris@42 1929 T9q = T91 - T90;
Chris@42 1930 T93 = FNMS(KP277785116, T92, KP415734806 * T8Z);
Chris@42 1931 T9z = FMA(KP490392640, T9p, KP097545161 * T9q);
Chris@42 1932 T9f = FMA(KP277785116, T8Z, KP415734806 * T92);
Chris@42 1933 T9r = FNMS(KP490392640, T9q, KP097545161 * T9p);
Chris@42 1934 }
Chris@42 1935 {
Chris@42 1936 E T8S, T9m, T8V, T9n, T8Q, T8T;
Chris@42 1937 T8Q = KP707106781 * (T7C - T7B);
Chris@42 1938 T8S = T8Q + T8R;
Chris@42 1939 T9m = T8R - T8Q;
Chris@42 1940 T8T = KP707106781 * (T7s - T7v);
Chris@42 1941 T8V = T8T + T8U;
Chris@42 1942 T9n = T8U - T8T;
Chris@42 1943 T8W = FMA(KP415734806, T8S, KP277785116 * T8V);
Chris@42 1944 T9y = FNMS(KP490392640, T9m, KP097545161 * T9n);
Chris@42 1945 T9e = FNMS(KP277785116, T8S, KP415734806 * T8V);
Chris@42 1946 T9o = FMA(KP097545161, T9m, KP490392640 * T9n);
Chris@42 1947 }
Chris@42 1948 }
Chris@42 1949 {
Chris@42 1950 E T8P, T94, T9h, T9i;
Chris@42 1951 T8P = T8L + T8O;
Chris@42 1952 T94 = T8W + T93;
Chris@42 1953 Ip[WS(rs, 3)] = T8P + T94;
Chris@42 1954 Im[WS(rs, 12)] = T94 - T8P;
Chris@42 1955 T9h = T97 + T9a;
Chris@42 1956 T9i = T9e + T9f;
Chris@42 1957 Rm[WS(rs, 12)] = T9h - T9i;
Chris@42 1958 Rp[WS(rs, 3)] = T9h + T9i;
Chris@42 1959 }
Chris@42 1960 {
Chris@42 1961 E T9b, T9c, T9d, T9g;
Chris@42 1962 T9b = T97 - T9a;
Chris@42 1963 T9c = T93 - T8W;
Chris@42 1964 Rm[WS(rs, 4)] = T9b - T9c;
Chris@42 1965 Rp[WS(rs, 11)] = T9b + T9c;
Chris@42 1966 T9d = T8O - T8L;
Chris@42 1967 T9g = T9e - T9f;
Chris@42 1968 Ip[WS(rs, 11)] = T9d + T9g;
Chris@42 1969 Im[WS(rs, 4)] = T9g - T9d;
Chris@42 1970 }
Chris@42 1971 {
Chris@42 1972 E T9l, T9s, T9B, T9C;
Chris@42 1973 T9l = T9j + T9k;
Chris@42 1974 T9s = T9o + T9r;
Chris@42 1975 Ip[WS(rs, 7)] = T9l + T9s;
Chris@42 1976 Im[WS(rs, 8)] = T9s - T9l;
Chris@42 1977 T9B = T9t + T9u;
Chris@42 1978 T9C = T9y + T9z;
Chris@42 1979 Rm[WS(rs, 8)] = T9B - T9C;
Chris@42 1980 Rp[WS(rs, 7)] = T9B + T9C;
Chris@42 1981 }
Chris@42 1982 {
Chris@42 1983 E T9v, T9w, T9x, T9A;
Chris@42 1984 T9v = T9t - T9u;
Chris@42 1985 T9w = T9r - T9o;
Chris@42 1986 Rm[0] = T9v - T9w;
Chris@42 1987 Rp[WS(rs, 15)] = T9v + T9w;
Chris@42 1988 T9x = T9k - T9j;
Chris@42 1989 T9A = T9y - T9z;
Chris@42 1990 Ip[WS(rs, 15)] = T9x + T9A;
Chris@42 1991 Im[0] = T9A - T9x;
Chris@42 1992 }
Chris@42 1993 }
Chris@42 1994 }
Chris@42 1995 }
Chris@42 1996 }
Chris@42 1997 }
Chris@42 1998
Chris@42 1999 static const tw_instr twinstr[] = {
Chris@42 2000 {TW_CEXP, 1, 1},
Chris@42 2001 {TW_CEXP, 1, 3},
Chris@42 2002 {TW_CEXP, 1, 9},
Chris@42 2003 {TW_CEXP, 1, 27},
Chris@42 2004 {TW_NEXT, 1, 0}
Chris@42 2005 };
Chris@42 2006
Chris@42 2007 static const hc2c_desc desc = { 32, "hc2cfdft2_32", twinstr, &GENUS, {440, 188, 112, 0} };
Chris@42 2008
Chris@42 2009 void X(codelet_hc2cfdft2_32) (planner *p) {
Chris@42 2010 X(khc2c_register) (p, hc2cfdft2_32, &desc, HC2C_VIA_DFT);
Chris@42 2011 }
Chris@42 2012 #endif /* HAVE_FMA */