annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cfdft_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:12 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 498 FP additions, 324 FP multiplications,
Chris@82 32 * (or, 300 additions, 126 multiplications, 198 fused multiply/add),
Chris@82 33 * 113 stack variables, 8 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 40 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 41 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 47 {
Chris@82 48 INT m;
Chris@82 49 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 50 E T3B, T89, T61, T8l, T2F, T8t, T4B, T7p, T1n, T7L, T5e, T7I, T4u, T82, T5E;
Chris@82 51 E T7R, T3m, T8k, T5W, T8a, T2r, T8u, T4G, T7q, T12, T7K, T59, T7H, T4h, T81;
Chris@82 52 E T5z, T7Q, Tl, T7D, T4Y, T7A, T3Q, T5o, T7V, T84, T1K, T7t, T4M, T7s, T2V;
Chris@82 53 E T8n, T5L, T8e, T25, T7w, T4R, T7v, T38, T8o, T5Q, T8h, TG, T7E, T53, T7B;
Chris@82 54 E T43, T5t, T7Y, T85;
Chris@82 55 {
Chris@82 56 E T2E, T3z, T4y, T3y, T5Z, T3t, T3x, T2v, T2A, T3r, T3q, T5X, T3n, T3p, T2w;
Chris@82 57 E T4z, T3s, T3A;
Chris@82 58 {
Chris@82 59 E T2C, T2D, T3u, T3v, T3w;
Chris@82 60 T2C = Ip[0];
Chris@82 61 T2D = Im[0];
Chris@82 62 T2E = T2C - T2D;
Chris@82 63 T3z = T2C + T2D;
Chris@82 64 T3u = Rm[0];
Chris@82 65 T3v = Rp[0];
Chris@82 66 T3w = T3u - T3v;
Chris@82 67 T4y = T3v + T3u;
Chris@82 68 T3y = W[1];
Chris@82 69 T5Z = T3y * T3w;
Chris@82 70 T3t = W[0];
Chris@82 71 T3x = T3t * T3w;
Chris@82 72 {
Chris@82 73 E T2t, T2u, T3o, T2y, T2z, T2s;
Chris@82 74 T2t = Ip[WS(rs, 8)];
Chris@82 75 T2u = Im[WS(rs, 8)];
Chris@82 76 T2v = T2t - T2u;
Chris@82 77 T2y = Rp[WS(rs, 8)];
Chris@82 78 T2z = Rm[WS(rs, 8)];
Chris@82 79 T2A = T2y + T2z;
Chris@82 80 T3o = T2z - T2y;
Chris@82 81 T3r = T2t + T2u;
Chris@82 82 T3q = W[33];
Chris@82 83 T5X = T3q * T3o;
Chris@82 84 T3n = W[32];
Chris@82 85 T3p = T3n * T3o;
Chris@82 86 T2s = W[30];
Chris@82 87 T2w = T2s * T2v;
Chris@82 88 T4z = T2s * T2A;
Chris@82 89 }
Chris@82 90 }
Chris@82 91 T3s = FNMS(T3q, T3r, T3p);
Chris@82 92 T3A = FNMS(T3y, T3z, T3x);
Chris@82 93 T3B = T3s + T3A;
Chris@82 94 T89 = T3A - T3s;
Chris@82 95 {
Chris@82 96 E T5Y, T60, T2B, T4A, T2x;
Chris@82 97 T5Y = FMA(T3n, T3r, T5X);
Chris@82 98 T60 = FMA(T3t, T3z, T5Z);
Chris@82 99 T61 = T5Y + T60;
Chris@82 100 T8l = T60 - T5Y;
Chris@82 101 T2x = W[31];
Chris@82 102 T2B = FNMS(T2x, T2A, T2w);
Chris@82 103 T4A = FMA(T2x, T2v, T4z);
Chris@82 104 T2F = T2B + T2E;
Chris@82 105 T8t = T4y - T4A;
Chris@82 106 T4B = T4y + T4A;
Chris@82 107 T7p = T2E - T2B;
Chris@82 108 }
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E T16, T4m, T1b, T4j, T17, T5a, T4k, T5A, T1g, T4s, T1l, T4p, T1h, T5c, T4q;
Chris@82 112 E T5C;
Chris@82 113 {
Chris@82 114 E T13, T4i, T1d, T4o;
Chris@82 115 {
Chris@82 116 E T14, T15, T19, T1a;
Chris@82 117 T14 = Ip[WS(rs, 3)];
Chris@82 118 T15 = Im[WS(rs, 3)];
Chris@82 119 T16 = T14 - T15;
Chris@82 120 T4m = T14 + T15;
Chris@82 121 T19 = Rp[WS(rs, 3)];
Chris@82 122 T1a = Rm[WS(rs, 3)];
Chris@82 123 T1b = T19 + T1a;
Chris@82 124 T4j = T19 - T1a;
Chris@82 125 }
Chris@82 126 T13 = W[10];
Chris@82 127 T17 = T13 * T16;
Chris@82 128 T5a = T13 * T1b;
Chris@82 129 T4i = W[12];
Chris@82 130 T4k = T4i * T4j;
Chris@82 131 T5A = T4i * T4m;
Chris@82 132 {
Chris@82 133 E T1e, T1f, T1j, T1k;
Chris@82 134 T1e = Ip[WS(rs, 11)];
Chris@82 135 T1f = Im[WS(rs, 11)];
Chris@82 136 T1g = T1e - T1f;
Chris@82 137 T4s = T1e + T1f;
Chris@82 138 T1j = Rp[WS(rs, 11)];
Chris@82 139 T1k = Rm[WS(rs, 11)];
Chris@82 140 T1l = T1j + T1k;
Chris@82 141 T4p = T1j - T1k;
Chris@82 142 }
Chris@82 143 T1d = W[42];
Chris@82 144 T1h = T1d * T1g;
Chris@82 145 T5c = T1d * T1l;
Chris@82 146 T4o = W[44];
Chris@82 147 T4q = T4o * T4p;
Chris@82 148 T5C = T4o * T4s;
Chris@82 149 }
Chris@82 150 {
Chris@82 151 E T1c, T5b, T1m, T5d, T18, T1i;
Chris@82 152 T18 = W[11];
Chris@82 153 T1c = FNMS(T18, T1b, T17);
Chris@82 154 T5b = FMA(T18, T16, T5a);
Chris@82 155 T1i = W[43];
Chris@82 156 T1m = FNMS(T1i, T1l, T1h);
Chris@82 157 T5d = FMA(T1i, T1g, T5c);
Chris@82 158 T1n = T1c + T1m;
Chris@82 159 T7L = T1c - T1m;
Chris@82 160 T5e = T5b + T5d;
Chris@82 161 T7I = T5b - T5d;
Chris@82 162 }
Chris@82 163 {
Chris@82 164 E T4n, T5B, T4t, T5D, T4l, T4r;
Chris@82 165 T4l = W[13];
Chris@82 166 T4n = FMA(T4l, T4m, T4k);
Chris@82 167 T5B = FNMS(T4l, T4j, T5A);
Chris@82 168 T4r = W[45];
Chris@82 169 T4t = FMA(T4r, T4s, T4q);
Chris@82 170 T5D = FNMS(T4r, T4p, T5C);
Chris@82 171 T4u = T4n + T4t;
Chris@82 172 T82 = T4t - T4n;
Chris@82 173 T5E = T5B + T5D;
Chris@82 174 T7R = T5D - T5B;
Chris@82 175 }
Chris@82 176 }
Chris@82 177 {
Chris@82 178 E T2a, T2f, T3e, T3d, T5S, T3a, T3c, T2b, T4C, T2k, T2p, T3k, T3j, T5U, T3g;
Chris@82 179 E T3i, T2l, T4E;
Chris@82 180 {
Chris@82 181 E T28, T29, T3b, T2d, T2e, T27;
Chris@82 182 T28 = Ip[WS(rs, 4)];
Chris@82 183 T29 = Im[WS(rs, 4)];
Chris@82 184 T2a = T28 - T29;
Chris@82 185 T2d = Rp[WS(rs, 4)];
Chris@82 186 T2e = Rm[WS(rs, 4)];
Chris@82 187 T2f = T2d + T2e;
Chris@82 188 T3b = T2e - T2d;
Chris@82 189 T3e = T28 + T29;
Chris@82 190 T3d = W[17];
Chris@82 191 T5S = T3d * T3b;
Chris@82 192 T3a = W[16];
Chris@82 193 T3c = T3a * T3b;
Chris@82 194 T27 = W[14];
Chris@82 195 T2b = T27 * T2a;
Chris@82 196 T4C = T27 * T2f;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T2i, T2j, T3h, T2n, T2o, T2h;
Chris@82 200 T2i = Ip[WS(rs, 12)];
Chris@82 201 T2j = Im[WS(rs, 12)];
Chris@82 202 T2k = T2i - T2j;
Chris@82 203 T2n = Rp[WS(rs, 12)];
Chris@82 204 T2o = Rm[WS(rs, 12)];
Chris@82 205 T2p = T2n + T2o;
Chris@82 206 T3h = T2o - T2n;
Chris@82 207 T3k = T2i + T2j;
Chris@82 208 T3j = W[49];
Chris@82 209 T5U = T3j * T3h;
Chris@82 210 T3g = W[48];
Chris@82 211 T3i = T3g * T3h;
Chris@82 212 T2h = W[46];
Chris@82 213 T2l = T2h * T2k;
Chris@82 214 T4E = T2h * T2p;
Chris@82 215 }
Chris@82 216 {
Chris@82 217 E T3f, T3l, T5T, T5V;
Chris@82 218 T3f = FNMS(T3d, T3e, T3c);
Chris@82 219 T3l = FNMS(T3j, T3k, T3i);
Chris@82 220 T3m = T3f + T3l;
Chris@82 221 T8k = T3f - T3l;
Chris@82 222 T5T = FMA(T3a, T3e, T5S);
Chris@82 223 T5V = FMA(T3g, T3k, T5U);
Chris@82 224 T5W = T5T + T5V;
Chris@82 225 T8a = T5T - T5V;
Chris@82 226 {
Chris@82 227 E T2g, T4D, T2q, T4F, T2c, T2m;
Chris@82 228 T2c = W[15];
Chris@82 229 T2g = FNMS(T2c, T2f, T2b);
Chris@82 230 T4D = FMA(T2c, T2a, T4C);
Chris@82 231 T2m = W[47];
Chris@82 232 T2q = FNMS(T2m, T2p, T2l);
Chris@82 233 T4F = FMA(T2m, T2k, T4E);
Chris@82 234 T2r = T2g + T2q;
Chris@82 235 T8u = T2g - T2q;
Chris@82 236 T4G = T4D + T4F;
Chris@82 237 T7q = T4D - T4F;
Chris@82 238 }
Chris@82 239 }
Chris@82 240 }
Chris@82 241 {
Chris@82 242 E TL, T49, TQ, T46, TM, T55, T47, T5v, TV, T4f, T10, T4c, TW, T57, T4d;
Chris@82 243 E T5x;
Chris@82 244 {
Chris@82 245 E TI, T45, TS, T4b;
Chris@82 246 {
Chris@82 247 E TJ, TK, TO, TP;
Chris@82 248 TJ = Ip[WS(rs, 15)];
Chris@82 249 TK = Im[WS(rs, 15)];
Chris@82 250 TL = TJ - TK;
Chris@82 251 T49 = TJ + TK;
Chris@82 252 TO = Rp[WS(rs, 15)];
Chris@82 253 TP = Rm[WS(rs, 15)];
Chris@82 254 TQ = TO + TP;
Chris@82 255 T46 = TO - TP;
Chris@82 256 }
Chris@82 257 TI = W[58];
Chris@82 258 TM = TI * TL;
Chris@82 259 T55 = TI * TQ;
Chris@82 260 T45 = W[60];
Chris@82 261 T47 = T45 * T46;
Chris@82 262 T5v = T45 * T49;
Chris@82 263 {
Chris@82 264 E TT, TU, TY, TZ;
Chris@82 265 TT = Ip[WS(rs, 7)];
Chris@82 266 TU = Im[WS(rs, 7)];
Chris@82 267 TV = TT - TU;
Chris@82 268 T4f = TT + TU;
Chris@82 269 TY = Rp[WS(rs, 7)];
Chris@82 270 TZ = Rm[WS(rs, 7)];
Chris@82 271 T10 = TY + TZ;
Chris@82 272 T4c = TY - TZ;
Chris@82 273 }
Chris@82 274 TS = W[26];
Chris@82 275 TW = TS * TV;
Chris@82 276 T57 = TS * T10;
Chris@82 277 T4b = W[28];
Chris@82 278 T4d = T4b * T4c;
Chris@82 279 T5x = T4b * T4f;
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E TR, T56, T11, T58, TN, TX;
Chris@82 283 TN = W[59];
Chris@82 284 TR = FNMS(TN, TQ, TM);
Chris@82 285 T56 = FMA(TN, TL, T55);
Chris@82 286 TX = W[27];
Chris@82 287 T11 = FNMS(TX, T10, TW);
Chris@82 288 T58 = FMA(TX, TV, T57);
Chris@82 289 T12 = TR + T11;
Chris@82 290 T7K = T56 - T58;
Chris@82 291 T59 = T56 + T58;
Chris@82 292 T7H = TR - T11;
Chris@82 293 }
Chris@82 294 {
Chris@82 295 E T4a, T5w, T4g, T5y, T48, T4e;
Chris@82 296 T48 = W[61];
Chris@82 297 T4a = FMA(T48, T49, T47);
Chris@82 298 T5w = FNMS(T48, T46, T5v);
Chris@82 299 T4e = W[29];
Chris@82 300 T4g = FMA(T4e, T4f, T4d);
Chris@82 301 T5y = FNMS(T4e, T4c, T5x);
Chris@82 302 T4h = T4a + T4g;
Chris@82 303 T81 = T5w - T5y;
Chris@82 304 T5z = T5w + T5y;
Chris@82 305 T7Q = T4g - T4a;
Chris@82 306 }
Chris@82 307 }
Chris@82 308 {
Chris@82 309 E T4, T3I, T9, T3F, T5, T4U, T3G, T5k, Te, T3O, Tj, T3L, Tf, T4W, T3M;
Chris@82 310 E T5m;
Chris@82 311 {
Chris@82 312 E T1, T3E, Tb, T3K;
Chris@82 313 {
Chris@82 314 E T2, T3, T7, T8;
Chris@82 315 T2 = Ip[WS(rs, 1)];
Chris@82 316 T3 = Im[WS(rs, 1)];
Chris@82 317 T4 = T2 - T3;
Chris@82 318 T3I = T2 + T3;
Chris@82 319 T7 = Rp[WS(rs, 1)];
Chris@82 320 T8 = Rm[WS(rs, 1)];
Chris@82 321 T9 = T7 + T8;
Chris@82 322 T3F = T7 - T8;
Chris@82 323 }
Chris@82 324 T1 = W[2];
Chris@82 325 T5 = T1 * T4;
Chris@82 326 T4U = T1 * T9;
Chris@82 327 T3E = W[4];
Chris@82 328 T3G = T3E * T3F;
Chris@82 329 T5k = T3E * T3I;
Chris@82 330 {
Chris@82 331 E Tc, Td, Th, Ti;
Chris@82 332 Tc = Ip[WS(rs, 9)];
Chris@82 333 Td = Im[WS(rs, 9)];
Chris@82 334 Te = Tc - Td;
Chris@82 335 T3O = Tc + Td;
Chris@82 336 Th = Rp[WS(rs, 9)];
Chris@82 337 Ti = Rm[WS(rs, 9)];
Chris@82 338 Tj = Th + Ti;
Chris@82 339 T3L = Th - Ti;
Chris@82 340 }
Chris@82 341 Tb = W[34];
Chris@82 342 Tf = Tb * Te;
Chris@82 343 T4W = Tb * Tj;
Chris@82 344 T3K = W[36];
Chris@82 345 T3M = T3K * T3L;
Chris@82 346 T5m = T3K * T3O;
Chris@82 347 }
Chris@82 348 {
Chris@82 349 E Ta, T4V, Tk, T4X, T6, Tg;
Chris@82 350 T6 = W[3];
Chris@82 351 Ta = FNMS(T6, T9, T5);
Chris@82 352 T4V = FMA(T6, T4, T4U);
Chris@82 353 Tg = W[35];
Chris@82 354 Tk = FNMS(Tg, Tj, Tf);
Chris@82 355 T4X = FMA(Tg, Te, T4W);
Chris@82 356 Tl = Ta + Tk;
Chris@82 357 T7D = T4V - T4X;
Chris@82 358 T4Y = T4V + T4X;
Chris@82 359 T7A = Ta - Tk;
Chris@82 360 }
Chris@82 361 {
Chris@82 362 E T3J, T5l, T3P, T5n, T3H, T3N, T7T, T7U;
Chris@82 363 T3H = W[5];
Chris@82 364 T3J = FMA(T3H, T3I, T3G);
Chris@82 365 T5l = FNMS(T3H, T3F, T5k);
Chris@82 366 T3N = W[37];
Chris@82 367 T3P = FMA(T3N, T3O, T3M);
Chris@82 368 T5n = FNMS(T3N, T3L, T5m);
Chris@82 369 T3Q = T3J + T3P;
Chris@82 370 T5o = T5l + T5n;
Chris@82 371 T7T = T3P - T3J;
Chris@82 372 T7U = T5l - T5n;
Chris@82 373 T7V = T7T - T7U;
Chris@82 374 T84 = T7U + T7T;
Chris@82 375 }
Chris@82 376 }
Chris@82 377 {
Chris@82 378 E T1t, T1y, T2N, T2M, T5H, T2J, T2L, T1u, T4I, T1D, T1I, T2T, T2S, T5J, T2P;
Chris@82 379 E T2R, T1E, T4K;
Chris@82 380 {
Chris@82 381 E T1r, T1s, T2K, T1w, T1x, T1q;
Chris@82 382 T1r = Ip[WS(rs, 2)];
Chris@82 383 T1s = Im[WS(rs, 2)];
Chris@82 384 T1t = T1r - T1s;
Chris@82 385 T1w = Rp[WS(rs, 2)];
Chris@82 386 T1x = Rm[WS(rs, 2)];
Chris@82 387 T1y = T1w + T1x;
Chris@82 388 T2K = T1x - T1w;
Chris@82 389 T2N = T1r + T1s;
Chris@82 390 T2M = W[9];
Chris@82 391 T5H = T2M * T2K;
Chris@82 392 T2J = W[8];
Chris@82 393 T2L = T2J * T2K;
Chris@82 394 T1q = W[6];
Chris@82 395 T1u = T1q * T1t;
Chris@82 396 T4I = T1q * T1y;
Chris@82 397 }
Chris@82 398 {
Chris@82 399 E T1B, T1C, T2Q, T1G, T1H, T1A;
Chris@82 400 T1B = Ip[WS(rs, 10)];
Chris@82 401 T1C = Im[WS(rs, 10)];
Chris@82 402 T1D = T1B - T1C;
Chris@82 403 T1G = Rp[WS(rs, 10)];
Chris@82 404 T1H = Rm[WS(rs, 10)];
Chris@82 405 T1I = T1G + T1H;
Chris@82 406 T2Q = T1H - T1G;
Chris@82 407 T2T = T1B + T1C;
Chris@82 408 T2S = W[41];
Chris@82 409 T5J = T2S * T2Q;
Chris@82 410 T2P = W[40];
Chris@82 411 T2R = T2P * T2Q;
Chris@82 412 T1A = W[38];
Chris@82 413 T1E = T1A * T1D;
Chris@82 414 T4K = T1A * T1I;
Chris@82 415 }
Chris@82 416 {
Chris@82 417 E T1z, T4J, T1J, T4L, T1v, T1F;
Chris@82 418 T1v = W[7];
Chris@82 419 T1z = FNMS(T1v, T1y, T1u);
Chris@82 420 T4J = FMA(T1v, T1t, T4I);
Chris@82 421 T1F = W[39];
Chris@82 422 T1J = FNMS(T1F, T1I, T1E);
Chris@82 423 T4L = FMA(T1F, T1D, T4K);
Chris@82 424 T1K = T1z + T1J;
Chris@82 425 T7t = T4J - T4L;
Chris@82 426 T4M = T4J + T4L;
Chris@82 427 T7s = T1z - T1J;
Chris@82 428 }
Chris@82 429 {
Chris@82 430 E T2O, T2U, T8c, T5I, T5K, T8d;
Chris@82 431 T2O = FNMS(T2M, T2N, T2L);
Chris@82 432 T2U = FNMS(T2S, T2T, T2R);
Chris@82 433 T8c = T2O - T2U;
Chris@82 434 T5I = FMA(T2J, T2N, T5H);
Chris@82 435 T5K = FMA(T2P, T2T, T5J);
Chris@82 436 T8d = T5I - T5K;
Chris@82 437 T2V = T2O + T2U;
Chris@82 438 T8n = T8c + T8d;
Chris@82 439 T5L = T5I + T5K;
Chris@82 440 T8e = T8c - T8d;
Chris@82 441 }
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E T1O, T1T, T30, T2Z, T5M, T2W, T2Y, T1P, T4N, T1Y, T23, T36, T35, T5O, T32;
Chris@82 445 E T34, T1Z, T4P;
Chris@82 446 {
Chris@82 447 E T1M, T1N, T2X, T1R, T1S, T1L;
Chris@82 448 T1M = Ip[WS(rs, 14)];
Chris@82 449 T1N = Im[WS(rs, 14)];
Chris@82 450 T1O = T1M - T1N;
Chris@82 451 T1R = Rp[WS(rs, 14)];
Chris@82 452 T1S = Rm[WS(rs, 14)];
Chris@82 453 T1T = T1R + T1S;
Chris@82 454 T2X = T1S - T1R;
Chris@82 455 T30 = T1M + T1N;
Chris@82 456 T2Z = W[57];
Chris@82 457 T5M = T2Z * T2X;
Chris@82 458 T2W = W[56];
Chris@82 459 T2Y = T2W * T2X;
Chris@82 460 T1L = W[54];
Chris@82 461 T1P = T1L * T1O;
Chris@82 462 T4N = T1L * T1T;
Chris@82 463 }
Chris@82 464 {
Chris@82 465 E T1W, T1X, T33, T21, T22, T1V;
Chris@82 466 T1W = Ip[WS(rs, 6)];
Chris@82 467 T1X = Im[WS(rs, 6)];
Chris@82 468 T1Y = T1W - T1X;
Chris@82 469 T21 = Rp[WS(rs, 6)];
Chris@82 470 T22 = Rm[WS(rs, 6)];
Chris@82 471 T23 = T21 + T22;
Chris@82 472 T33 = T22 - T21;
Chris@82 473 T36 = T1W + T1X;
Chris@82 474 T35 = W[25];
Chris@82 475 T5O = T35 * T33;
Chris@82 476 T32 = W[24];
Chris@82 477 T34 = T32 * T33;
Chris@82 478 T1V = W[22];
Chris@82 479 T1Z = T1V * T1Y;
Chris@82 480 T4P = T1V * T23;
Chris@82 481 }
Chris@82 482 {
Chris@82 483 E T1U, T4O, T24, T4Q, T1Q, T20;
Chris@82 484 T1Q = W[55];
Chris@82 485 T1U = FNMS(T1Q, T1T, T1P);
Chris@82 486 T4O = FMA(T1Q, T1O, T4N);
Chris@82 487 T20 = W[23];
Chris@82 488 T24 = FNMS(T20, T23, T1Z);
Chris@82 489 T4Q = FMA(T20, T1Y, T4P);
Chris@82 490 T25 = T1U + T24;
Chris@82 491 T7w = T1U - T24;
Chris@82 492 T4R = T4O + T4Q;
Chris@82 493 T7v = T4O - T4Q;
Chris@82 494 }
Chris@82 495 {
Chris@82 496 E T31, T37, T8f, T5N, T5P, T8g;
Chris@82 497 T31 = FNMS(T2Z, T30, T2Y);
Chris@82 498 T37 = FNMS(T35, T36, T34);
Chris@82 499 T8f = T31 - T37;
Chris@82 500 T5N = FMA(T2W, T30, T5M);
Chris@82 501 T5P = FMA(T32, T36, T5O);
Chris@82 502 T8g = T5N - T5P;
Chris@82 503 T38 = T31 + T37;
Chris@82 504 T8o = T8g - T8f;
Chris@82 505 T5Q = T5N + T5P;
Chris@82 506 T8h = T8f + T8g;
Chris@82 507 }
Chris@82 508 }
Chris@82 509 {
Chris@82 510 E Tp, T3V, Tu, T3S, Tq, T4Z, T3T, T5p, Tz, T41, TE, T3Y, TA, T51, T3Z;
Chris@82 511 E T5r;
Chris@82 512 {
Chris@82 513 E Tm, T3R, Tw, T3X;
Chris@82 514 {
Chris@82 515 E Tn, To, Ts, Tt;
Chris@82 516 Tn = Ip[WS(rs, 5)];
Chris@82 517 To = Im[WS(rs, 5)];
Chris@82 518 Tp = Tn - To;
Chris@82 519 T3V = Tn + To;
Chris@82 520 Ts = Rp[WS(rs, 5)];
Chris@82 521 Tt = Rm[WS(rs, 5)];
Chris@82 522 Tu = Ts + Tt;
Chris@82 523 T3S = Ts - Tt;
Chris@82 524 }
Chris@82 525 Tm = W[18];
Chris@82 526 Tq = Tm * Tp;
Chris@82 527 T4Z = Tm * Tu;
Chris@82 528 T3R = W[20];
Chris@82 529 T3T = T3R * T3S;
Chris@82 530 T5p = T3R * T3V;
Chris@82 531 {
Chris@82 532 E Tx, Ty, TC, TD;
Chris@82 533 Tx = Ip[WS(rs, 13)];
Chris@82 534 Ty = Im[WS(rs, 13)];
Chris@82 535 Tz = Tx - Ty;
Chris@82 536 T41 = Tx + Ty;
Chris@82 537 TC = Rp[WS(rs, 13)];
Chris@82 538 TD = Rm[WS(rs, 13)];
Chris@82 539 TE = TC + TD;
Chris@82 540 T3Y = TC - TD;
Chris@82 541 }
Chris@82 542 Tw = W[50];
Chris@82 543 TA = Tw * Tz;
Chris@82 544 T51 = Tw * TE;
Chris@82 545 T3X = W[52];
Chris@82 546 T3Z = T3X * T3Y;
Chris@82 547 T5r = T3X * T41;
Chris@82 548 }
Chris@82 549 {
Chris@82 550 E Tv, T50, TF, T52, Tr, TB;
Chris@82 551 Tr = W[19];
Chris@82 552 Tv = FNMS(Tr, Tu, Tq);
Chris@82 553 T50 = FMA(Tr, Tp, T4Z);
Chris@82 554 TB = W[51];
Chris@82 555 TF = FNMS(TB, TE, TA);
Chris@82 556 T52 = FMA(TB, Tz, T51);
Chris@82 557 TG = Tv + TF;
Chris@82 558 T7E = Tv - TF;
Chris@82 559 T53 = T50 + T52;
Chris@82 560 T7B = T50 - T52;
Chris@82 561 }
Chris@82 562 {
Chris@82 563 E T3W, T5q, T42, T5s, T3U, T40, T7W, T7X;
Chris@82 564 T3U = W[21];
Chris@82 565 T3W = FMA(T3U, T3V, T3T);
Chris@82 566 T5q = FNMS(T3U, T3S, T5p);
Chris@82 567 T40 = W[53];
Chris@82 568 T42 = FMA(T40, T41, T3Z);
Chris@82 569 T5s = FNMS(T40, T3Y, T5r);
Chris@82 570 T43 = T3W + T42;
Chris@82 571 T5t = T5q + T5s;
Chris@82 572 T7W = T5s - T5q;
Chris@82 573 T7X = T3W - T42;
Chris@82 574 T7Y = T7W + T7X;
Chris@82 575 T85 = T7W - T7X;
Chris@82 576 }
Chris@82 577 }
Chris@82 578 {
Chris@82 579 E T1p, T6i, T2H, T68, T5g, T67, T4T, T6h, T4w, T6m, T5G, T6c, T3D, T6n, T63;
Chris@82 580 E T6f;
Chris@82 581 {
Chris@82 582 E TH, T1o, T4H, T4S;
Chris@82 583 TH = Tl + TG;
Chris@82 584 T1o = T12 + T1n;
Chris@82 585 T1p = TH + T1o;
Chris@82 586 T6i = TH - T1o;
Chris@82 587 {
Chris@82 588 E T26, T2G, T54, T5f;
Chris@82 589 T26 = T1K + T25;
Chris@82 590 T2G = T2r + T2F;
Chris@82 591 T2H = T26 + T2G;
Chris@82 592 T68 = T2G - T26;
Chris@82 593 T54 = T4Y + T53;
Chris@82 594 T5f = T59 + T5e;
Chris@82 595 T5g = T54 + T5f;
Chris@82 596 T67 = T5f - T54;
Chris@82 597 }
Chris@82 598 T4H = T4B + T4G;
Chris@82 599 T4S = T4M + T4R;
Chris@82 600 T4T = T4H + T4S;
Chris@82 601 T6h = T4H - T4S;
Chris@82 602 {
Chris@82 603 E T44, T4v, T6b, T5u, T5F, T6a;
Chris@82 604 T44 = T3Q + T43;
Chris@82 605 T4v = T4h + T4u;
Chris@82 606 T6b = T44 - T4v;
Chris@82 607 T5u = T5o + T5t;
Chris@82 608 T5F = T5z + T5E;
Chris@82 609 T6a = T5F - T5u;
Chris@82 610 T4w = T44 + T4v;
Chris@82 611 T6m = T6a - T6b;
Chris@82 612 T5G = T5u + T5F;
Chris@82 613 T6c = T6a + T6b;
Chris@82 614 }
Chris@82 615 {
Chris@82 616 E T39, T3C, T6d, T5R, T62, T6e;
Chris@82 617 T39 = T2V + T38;
Chris@82 618 T3C = T3m + T3B;
Chris@82 619 T6d = T3C - T39;
Chris@82 620 T5R = T5L + T5Q;
Chris@82 621 T62 = T5W + T61;
Chris@82 622 T6e = T62 - T5R;
Chris@82 623 T3D = T39 + T3C;
Chris@82 624 T6n = T6d + T6e;
Chris@82 625 T63 = T5R + T62;
Chris@82 626 T6f = T6d - T6e;
Chris@82 627 }
Chris@82 628 }
Chris@82 629 {
Chris@82 630 E T2I, T4x, T65, T66;
Chris@82 631 T2I = T1p + T2H;
Chris@82 632 T4x = T3D - T4w;
Chris@82 633 Ip[0] = KP500000000 * (T2I + T4x);
Chris@82 634 Im[WS(rs, 15)] = KP500000000 * (T4x - T2I);
Chris@82 635 T65 = T4T + T5g;
Chris@82 636 T66 = T5G + T63;
Chris@82 637 Rm[WS(rs, 15)] = KP500000000 * (T65 - T66);
Chris@82 638 Rp[0] = KP500000000 * (T65 + T66);
Chris@82 639 }
Chris@82 640 {
Chris@82 641 E T5h, T5i, T5j, T64;
Chris@82 642 T5h = T4T - T5g;
Chris@82 643 T5i = T4w + T3D;
Chris@82 644 Rm[WS(rs, 7)] = KP500000000 * (T5h - T5i);
Chris@82 645 Rp[WS(rs, 8)] = KP500000000 * (T5h + T5i);
Chris@82 646 T5j = T2H - T1p;
Chris@82 647 T64 = T5G - T63;
Chris@82 648 Ip[WS(rs, 8)] = KP500000000 * (T5j + T64);
Chris@82 649 Im[WS(rs, 7)] = KP500000000 * (T64 - T5j);
Chris@82 650 }
Chris@82 651 {
Chris@82 652 E T69, T6g, T6p, T6q;
Chris@82 653 T69 = T67 + T68;
Chris@82 654 T6g = T6c + T6f;
Chris@82 655 Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6g, T69));
Chris@82 656 Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6g, T69)));
Chris@82 657 T6p = T6h + T6i;
Chris@82 658 T6q = T6m + T6n;
Chris@82 659 Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T6q, T6p));
Chris@82 660 Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6q, T6p));
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T6j, T6k, T6l, T6o;
Chris@82 664 T6j = T6h - T6i;
Chris@82 665 T6k = T6f - T6c;
Chris@82 666 Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6k, T6j));
Chris@82 667 Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6k, T6j));
Chris@82 668 T6l = T68 - T67;
Chris@82 669 T6o = T6m - T6n;
Chris@82 670 Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6o, T6l));
Chris@82 671 Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6o, T6l)));
Chris@82 672 }
Chris@82 673 }
Chris@82 674 {
Chris@82 675 E T6t, T75, T6T, T7f, T6A, T7g, T6W, T76, T6I, T7k, T70, T7a, T6P, T7l, T71;
Chris@82 676 E T7d;
Chris@82 677 {
Chris@82 678 E T6r, T6s, T6R, T6S;
Chris@82 679 T6r = T4R - T4M;
Chris@82 680 T6s = T2F - T2r;
Chris@82 681 T6t = T6r + T6s;
Chris@82 682 T75 = T6s - T6r;
Chris@82 683 T6R = T4B - T4G;
Chris@82 684 T6S = T1K - T25;
Chris@82 685 T6T = T6R + T6S;
Chris@82 686 T7f = T6R - T6S;
Chris@82 687 }
Chris@82 688 {
Chris@82 689 E T6w, T6U, T6z, T6V;
Chris@82 690 {
Chris@82 691 E T6u, T6v, T6x, T6y;
Chris@82 692 T6u = Tl - TG;
Chris@82 693 T6v = T4Y - T53;
Chris@82 694 T6w = T6u - T6v;
Chris@82 695 T6U = T6v + T6u;
Chris@82 696 T6x = T59 - T5e;
Chris@82 697 T6y = T12 - T1n;
Chris@82 698 T6z = T6x + T6y;
Chris@82 699 T6V = T6x - T6y;
Chris@82 700 }
Chris@82 701 T6A = T6w + T6z;
Chris@82 702 T7g = T6w - T6z;
Chris@82 703 T6W = T6U + T6V;
Chris@82 704 T76 = T6V - T6U;
Chris@82 705 }
Chris@82 706 {
Chris@82 707 E T6E, T78, T6H, T79;
Chris@82 708 {
Chris@82 709 E T6C, T6D, T6F, T6G;
Chris@82 710 T6C = T5t - T5o;
Chris@82 711 T6D = T4u - T4h;
Chris@82 712 T6E = T6C + T6D;
Chris@82 713 T78 = T6C - T6D;
Chris@82 714 T6F = T43 - T3Q;
Chris@82 715 T6G = T5z - T5E;
Chris@82 716 T6H = T6F + T6G;
Chris@82 717 T79 = T6G - T6F;
Chris@82 718 }
Chris@82 719 T6I = FMA(KP414213562, T6H, T6E);
Chris@82 720 T7k = FNMS(KP414213562, T78, T79);
Chris@82 721 T70 = FNMS(KP414213562, T6E, T6H);
Chris@82 722 T7a = FMA(KP414213562, T79, T78);
Chris@82 723 }
Chris@82 724 {
Chris@82 725 E T6L, T7b, T6O, T7c;
Chris@82 726 {
Chris@82 727 E T6J, T6K, T6M, T6N;
Chris@82 728 T6J = T5Q - T5L;
Chris@82 729 T6K = T3B - T3m;
Chris@82 730 T6L = T6J + T6K;
Chris@82 731 T7b = T6K - T6J;
Chris@82 732 T6M = T2V - T38;
Chris@82 733 T6N = T61 - T5W;
Chris@82 734 T6O = T6M + T6N;
Chris@82 735 T7c = T6N - T6M;
Chris@82 736 }
Chris@82 737 T6P = FNMS(KP414213562, T6O, T6L);
Chris@82 738 T7l = FNMS(KP414213562, T7b, T7c);
Chris@82 739 T71 = FMA(KP414213562, T6L, T6O);
Chris@82 740 T7d = FMA(KP414213562, T7c, T7b);
Chris@82 741 }
Chris@82 742 {
Chris@82 743 E T6B, T6Q, T73, T74;
Chris@82 744 T6B = FMA(KP707106781, T6A, T6t);
Chris@82 745 T6Q = T6I + T6P;
Chris@82 746 Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T6Q, T6B));
Chris@82 747 Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T6Q, T6B)));
Chris@82 748 T73 = FMA(KP707106781, T6W, T6T);
Chris@82 749 T74 = T70 + T71;
Chris@82 750 Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T74, T73));
Chris@82 751 Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T74, T73));
Chris@82 752 }
Chris@82 753 {
Chris@82 754 E T6X, T6Y, T6Z, T72;
Chris@82 755 T6X = FNMS(KP707106781, T6W, T6T);
Chris@82 756 T6Y = T6P - T6I;
Chris@82 757 Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T6Y, T6X));
Chris@82 758 Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T6Y, T6X));
Chris@82 759 T6Z = FNMS(KP707106781, T6A, T6t);
Chris@82 760 T72 = T70 - T71;
Chris@82 761 Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T72, T6Z));
Chris@82 762 Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T72, T6Z)));
Chris@82 763 }
Chris@82 764 {
Chris@82 765 E T77, T7e, T7n, T7o;
Chris@82 766 T77 = FNMS(KP707106781, T76, T75);
Chris@82 767 T7e = T7a - T7d;
Chris@82 768 Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7e, T77));
Chris@82 769 Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7e, T77)));
Chris@82 770 T7n = FNMS(KP707106781, T7g, T7f);
Chris@82 771 T7o = T7k + T7l;
Chris@82 772 Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7o, T7n));
Chris@82 773 Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7o, T7n));
Chris@82 774 }
Chris@82 775 {
Chris@82 776 E T7h, T7i, T7j, T7m;
Chris@82 777 T7h = FMA(KP707106781, T7g, T7f);
Chris@82 778 T7i = T7a + T7d;
Chris@82 779 Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7i, T7h));
Chris@82 780 Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7i, T7h));
Chris@82 781 T7j = FMA(KP707106781, T76, T75);
Chris@82 782 T7m = T7k - T7l;
Chris@82 783 Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7m, T7j));
Chris@82 784 Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7m, T7j)));
Chris@82 785 }
Chris@82 786 }
Chris@82 787 {
Chris@82 788 E T7z, T9T, T8L, T9x, T8z, T9J, T8V, T97, T7O, T8W, T8C, T8M, T9t, T9Y, T9E;
Chris@82 789 E T9O, T88, T90, T8G, T8Q, T9e, T9U, T9A, T9K, T9m, T9Z, T9F, T9R, T8r, T91;
Chris@82 790 E T8H, T8T;
Chris@82 791 {
Chris@82 792 E T7r, T9v, T7y, T9w, T7u, T7x;
Chris@82 793 T7r = T7p - T7q;
Chris@82 794 T9v = T8t - T8u;
Chris@82 795 T7u = T7s - T7t;
Chris@82 796 T7x = T7v + T7w;
Chris@82 797 T7y = T7u + T7x;
Chris@82 798 T9w = T7u - T7x;
Chris@82 799 T7z = FMA(KP707106781, T7y, T7r);
Chris@82 800 T9T = FNMS(KP707106781, T9w, T9v);
Chris@82 801 T8L = FNMS(KP707106781, T7y, T7r);
Chris@82 802 T9x = FMA(KP707106781, T9w, T9v);
Chris@82 803 }
Chris@82 804 {
Chris@82 805 E T8v, T95, T8y, T96, T8w, T8x;
Chris@82 806 T8v = T8t + T8u;
Chris@82 807 T95 = T7q + T7p;
Chris@82 808 T8w = T7t + T7s;
Chris@82 809 T8x = T7v - T7w;
Chris@82 810 T8y = T8w + T8x;
Chris@82 811 T96 = T8x - T8w;
Chris@82 812 T8z = FMA(KP707106781, T8y, T8v);
Chris@82 813 T9J = FNMS(KP707106781, T96, T95);
Chris@82 814 T8V = FNMS(KP707106781, T8y, T8v);
Chris@82 815 T97 = FMA(KP707106781, T96, T95);
Chris@82 816 }
Chris@82 817 {
Chris@82 818 E T7G, T8A, T7N, T8B;
Chris@82 819 {
Chris@82 820 E T7C, T7F, T7J, T7M;
Chris@82 821 T7C = T7A - T7B;
Chris@82 822 T7F = T7D + T7E;
Chris@82 823 T7G = FNMS(KP414213562, T7F, T7C);
Chris@82 824 T8A = FMA(KP414213562, T7C, T7F);
Chris@82 825 T7J = T7H - T7I;
Chris@82 826 T7M = T7K + T7L;
Chris@82 827 T7N = FMA(KP414213562, T7M, T7J);
Chris@82 828 T8B = FNMS(KP414213562, T7J, T7M);
Chris@82 829 }
Chris@82 830 T7O = T7G + T7N;
Chris@82 831 T8W = T7G - T7N;
Chris@82 832 T8C = T8A + T8B;
Chris@82 833 T8M = T8B - T8A;
Chris@82 834 }
Chris@82 835 {
Chris@82 836 E T9p, T9M, T9s, T9N;
Chris@82 837 {
Chris@82 838 E T9n, T9o, T9q, T9r;
Chris@82 839 T9n = T7R - T7Q;
Chris@82 840 T9o = T85 - T84;
Chris@82 841 T9p = FNMS(KP707106781, T9o, T9n);
Chris@82 842 T9M = FMA(KP707106781, T9o, T9n);
Chris@82 843 T9q = T81 - T82;
Chris@82 844 T9r = T7Y - T7V;
Chris@82 845 T9s = FNMS(KP707106781, T9r, T9q);
Chris@82 846 T9N = FMA(KP707106781, T9r, T9q);
Chris@82 847 }
Chris@82 848 T9t = FNMS(KP668178637, T9s, T9p);
Chris@82 849 T9Y = FNMS(KP198912367, T9M, T9N);
Chris@82 850 T9E = FMA(KP668178637, T9p, T9s);
Chris@82 851 T9O = FMA(KP198912367, T9N, T9M);
Chris@82 852 }
Chris@82 853 {
Chris@82 854 E T80, T8O, T87, T8P;
Chris@82 855 {
Chris@82 856 E T7S, T7Z, T83, T86;
Chris@82 857 T7S = T7Q + T7R;
Chris@82 858 T7Z = T7V + T7Y;
Chris@82 859 T80 = FMA(KP707106781, T7Z, T7S);
Chris@82 860 T8O = FNMS(KP707106781, T7Z, T7S);
Chris@82 861 T83 = T81 + T82;
Chris@82 862 T86 = T84 + T85;
Chris@82 863 T87 = FMA(KP707106781, T86, T83);
Chris@82 864 T8P = FNMS(KP707106781, T86, T83);
Chris@82 865 }
Chris@82 866 T88 = FMA(KP198912367, T87, T80);
Chris@82 867 T90 = FMA(KP668178637, T8O, T8P);
Chris@82 868 T8G = FNMS(KP198912367, T80, T87);
Chris@82 869 T8Q = FNMS(KP668178637, T8P, T8O);
Chris@82 870 }
Chris@82 871 {
Chris@82 872 E T9a, T9z, T9d, T9y;
Chris@82 873 {
Chris@82 874 E T98, T99, T9b, T9c;
Chris@82 875 T98 = T7K - T7L;
Chris@82 876 T99 = T7H + T7I;
Chris@82 877 T9a = FMA(KP414213562, T99, T98);
Chris@82 878 T9z = FNMS(KP414213562, T98, T99);
Chris@82 879 T9b = T7D - T7E;
Chris@82 880 T9c = T7A + T7B;
Chris@82 881 T9d = FNMS(KP414213562, T9c, T9b);
Chris@82 882 T9y = FMA(KP414213562, T9b, T9c);
Chris@82 883 }
Chris@82 884 T9e = T9a - T9d;
Chris@82 885 T9U = T9d + T9a;
Chris@82 886 T9A = T9y - T9z;
Chris@82 887 T9K = T9y + T9z;
Chris@82 888 }
Chris@82 889 {
Chris@82 890 E T9i, T9P, T9l, T9Q;
Chris@82 891 {
Chris@82 892 E T9g, T9h, T9j, T9k;
Chris@82 893 T9g = T8a + T89;
Chris@82 894 T9h = T8n - T8o;
Chris@82 895 T9i = FNMS(KP707106781, T9h, T9g);
Chris@82 896 T9P = FMA(KP707106781, T9h, T9g);
Chris@82 897 T9j = T8l - T8k;
Chris@82 898 T9k = T8h - T8e;
Chris@82 899 T9l = FNMS(KP707106781, T9k, T9j);
Chris@82 900 T9Q = FMA(KP707106781, T9k, T9j);
Chris@82 901 }
Chris@82 902 T9m = FNMS(KP668178637, T9l, T9i);
Chris@82 903 T9Z = FNMS(KP198912367, T9P, T9Q);
Chris@82 904 T9F = FMA(KP668178637, T9i, T9l);
Chris@82 905 T9R = FMA(KP198912367, T9Q, T9P);
Chris@82 906 }
Chris@82 907 {
Chris@82 908 E T8j, T8R, T8q, T8S;
Chris@82 909 {
Chris@82 910 E T8b, T8i, T8m, T8p;
Chris@82 911 T8b = T89 - T8a;
Chris@82 912 T8i = T8e + T8h;
Chris@82 913 T8j = FMA(KP707106781, T8i, T8b);
Chris@82 914 T8R = FNMS(KP707106781, T8i, T8b);
Chris@82 915 T8m = T8k + T8l;
Chris@82 916 T8p = T8n + T8o;
Chris@82 917 T8q = FMA(KP707106781, T8p, T8m);
Chris@82 918 T8S = FNMS(KP707106781, T8p, T8m);
Chris@82 919 }
Chris@82 920 T8r = FNMS(KP198912367, T8q, T8j);
Chris@82 921 T91 = FNMS(KP668178637, T8R, T8S);
Chris@82 922 T8H = FMA(KP198912367, T8j, T8q);
Chris@82 923 T8T = FMA(KP668178637, T8S, T8R);
Chris@82 924 }
Chris@82 925 {
Chris@82 926 E T7P, T8s, T8J, T8K;
Chris@82 927 T7P = FMA(KP923879532, T7O, T7z);
Chris@82 928 T8s = T88 + T8r;
Chris@82 929 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8s, T7P));
Chris@82 930 Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T8s, T7P)));
Chris@82 931 T8J = FMA(KP923879532, T8C, T8z);
Chris@82 932 T8K = T8G + T8H;
Chris@82 933 Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T8K, T8J));
Chris@82 934 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8K, T8J));
Chris@82 935 }
Chris@82 936 {
Chris@82 937 E T8D, T8E, T8F, T8I;
Chris@82 938 T8D = FNMS(KP923879532, T8C, T8z);
Chris@82 939 T8E = T8r - T88;
Chris@82 940 Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T8E, T8D));
Chris@82 941 Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8E, T8D));
Chris@82 942 T8F = FNMS(KP923879532, T7O, T7z);
Chris@82 943 T8I = T8G - T8H;
Chris@82 944 Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8I, T8F));
Chris@82 945 Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T8I, T8F)));
Chris@82 946 }
Chris@82 947 {
Chris@82 948 E T8N, T8U, T93, T94;
Chris@82 949 T8N = FNMS(KP923879532, T8M, T8L);
Chris@82 950 T8U = T8Q + T8T;
Chris@82 951 Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T8U, T8N));
Chris@82 952 Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T8U, T8N)));
Chris@82 953 T93 = FNMS(KP923879532, T8W, T8V);
Chris@82 954 T94 = T90 + T91;
Chris@82 955 Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T94, T93));
Chris@82 956 Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T94, T93));
Chris@82 957 }
Chris@82 958 {
Chris@82 959 E T8X, T8Y, T8Z, T92;
Chris@82 960 T8X = FMA(KP923879532, T8W, T8V);
Chris@82 961 T8Y = T8T - T8Q;
Chris@82 962 Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T8Y, T8X));
Chris@82 963 Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T8Y, T8X));
Chris@82 964 T8Z = FMA(KP923879532, T8M, T8L);
Chris@82 965 T92 = T90 - T91;
Chris@82 966 Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T92, T8Z));
Chris@82 967 Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T92, T8Z)));
Chris@82 968 }
Chris@82 969 {
Chris@82 970 E T9f, T9u, T9H, T9I;
Chris@82 971 T9f = FMA(KP923879532, T9e, T97);
Chris@82 972 T9u = T9m - T9t;
Chris@82 973 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9u, T9f));
Chris@82 974 Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, T9u, T9f)));
Chris@82 975 T9H = FMA(KP923879532, T9A, T9x);
Chris@82 976 T9I = T9E + T9F;
Chris@82 977 Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, T9I, T9H));
Chris@82 978 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9I, T9H));
Chris@82 979 }
Chris@82 980 {
Chris@82 981 E T9B, T9C, T9D, T9G;
Chris@82 982 T9B = FNMS(KP923879532, T9A, T9x);
Chris@82 983 T9C = T9t + T9m;
Chris@82 984 Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, T9C, T9B));
Chris@82 985 Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9C, T9B));
Chris@82 986 T9D = FNMS(KP923879532, T9e, T97);
Chris@82 987 T9G = T9E - T9F;
Chris@82 988 Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9G, T9D));
Chris@82 989 Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, T9G, T9D)));
Chris@82 990 }
Chris@82 991 {
Chris@82 992 E T9L, T9S, Ta1, Ta2;
Chris@82 993 T9L = FMA(KP923879532, T9K, T9J);
Chris@82 994 T9S = T9O - T9R;
Chris@82 995 Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, T9S, T9L));
Chris@82 996 Im[0] = -(KP500000000 * (FNMS(KP980785280, T9S, T9L)));
Chris@82 997 Ta1 = FMA(KP923879532, T9U, T9T);
Chris@82 998 Ta2 = T9Y + T9Z;
Chris@82 999 Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, Ta2, Ta1));
Chris@82 1000 Rm[0] = KP500000000 * (FMA(KP980785280, Ta2, Ta1));
Chris@82 1001 }
Chris@82 1002 {
Chris@82 1003 E T9V, T9W, T9X, Ta0;
Chris@82 1004 T9V = FNMS(KP923879532, T9U, T9T);
Chris@82 1005 T9W = T9O + T9R;
Chris@82 1006 Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, T9W, T9V));
Chris@82 1007 Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, T9W, T9V));
Chris@82 1008 T9X = FNMS(KP923879532, T9K, T9J);
Chris@82 1009 Ta0 = T9Y - T9Z;
Chris@82 1010 Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Ta0, T9X));
Chris@82 1011 Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, Ta0, T9X)));
Chris@82 1012 }
Chris@82 1013 }
Chris@82 1014 }
Chris@82 1015 }
Chris@82 1016 }
Chris@82 1017
Chris@82 1018 static const tw_instr twinstr[] = {
Chris@82 1019 {TW_FULL, 1, 32},
Chris@82 1020 {TW_NEXT, 1, 0}
Chris@82 1021 };
Chris@82 1022
Chris@82 1023 static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, {300, 126, 198, 0} };
Chris@82 1024
Chris@82 1025 void X(codelet_hc2cfdft_32) (planner *p) {
Chris@82 1026 X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT);
Chris@82 1027 }
Chris@82 1028 #else
Chris@82 1029
Chris@82 1030 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include rdft/scalar/hc2cf.h */
Chris@82 1031
Chris@82 1032 /*
Chris@82 1033 * This function contains 498 FP additions, 228 FP multiplications,
Chris@82 1034 * (or, 404 additions, 134 multiplications, 94 fused multiply/add),
Chris@82 1035 * 106 stack variables, 9 constants, and 128 memory accesses
Chris@82 1036 */
Chris@82 1037 #include "rdft/scalar/hc2cf.h"
Chris@82 1038
Chris@82 1039 static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 1040 {
Chris@82 1041 DK(KP277785116, +0.277785116509801112371415406974266437187468595);
Chris@82 1042 DK(KP415734806, +0.415734806151272618539394188808952878369280406);
Chris@82 1043 DK(KP097545161, +0.097545161008064133924142434238511120463845809);
Chris@82 1044 DK(KP490392640, +0.490392640201615224563091118067119518486966865);
Chris@82 1045 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1046 DK(KP191341716, +0.191341716182544885864229992015199433380672281);
Chris@82 1047 DK(KP461939766, +0.461939766255643378064091594698394143411208313);
Chris@82 1048 DK(KP353553390, +0.353553390593273762200422181052424519642417969);
Chris@82 1049 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 1050 {
Chris@82 1051 INT m;
Chris@82 1052 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 1053 E T2S, T5K, T52, T5N, T7p, T8r, T7i, T8o, T2q, T7t, T45, T6L, T2d, T7u, T48;
Chris@82 1054 E T6M, T1A, T4c, T4f, T1T, T3f, T5M, T7e, T7l, T6J, T7x, T4V, T5J, T7b, T7k;
Chris@82 1055 E T6G, T7w, Tj, TC, T5r, T4k, T4n, T5s, T3D, T5C, T6V, T72, T4G, T5F, T6u;
Chris@82 1056 E T86, T6S, T71, T6r, T85, TW, T1f, T5v, T4r, T4u, T5u, T40, T5G, T76, T8k;
Chris@82 1057 E T4N, T5D, T6B, T89, T6Z, T8h, T6y, T88;
Chris@82 1058 {
Chris@82 1059 E T1Y, T22, T2L, T4W, T2p, T43, T2A, T50, T27, T2b, T2Q, T4X, T2h, T2l, T2F;
Chris@82 1060 E T4Z;
Chris@82 1061 {
Chris@82 1062 E T1W, T1X, T2K, T20, T21, T2I, T2H, T2J;
Chris@82 1063 T1W = Ip[WS(rs, 4)];
Chris@82 1064 T1X = Im[WS(rs, 4)];
Chris@82 1065 T2K = T1W + T1X;
Chris@82 1066 T20 = Rp[WS(rs, 4)];
Chris@82 1067 T21 = Rm[WS(rs, 4)];
Chris@82 1068 T2I = T20 - T21;
Chris@82 1069 T1Y = T1W - T1X;
Chris@82 1070 T22 = T20 + T21;
Chris@82 1071 T2H = W[16];
Chris@82 1072 T2J = W[17];
Chris@82 1073 T2L = FMA(T2H, T2I, T2J * T2K);
Chris@82 1074 T4W = FNMS(T2J, T2I, T2H * T2K);
Chris@82 1075 }
Chris@82 1076 {
Chris@82 1077 E T2n, T2o, T2z, T2v, T2w, T2x, T2u, T2y;
Chris@82 1078 T2n = Ip[0];
Chris@82 1079 T2o = Im[0];
Chris@82 1080 T2z = T2n + T2o;
Chris@82 1081 T2v = Rm[0];
Chris@82 1082 T2w = Rp[0];
Chris@82 1083 T2x = T2v - T2w;
Chris@82 1084 T2p = T2n - T2o;
Chris@82 1085 T43 = T2w + T2v;
Chris@82 1086 T2u = W[0];
Chris@82 1087 T2y = W[1];
Chris@82 1088 T2A = FNMS(T2y, T2z, T2u * T2x);
Chris@82 1089 T50 = FMA(T2y, T2x, T2u * T2z);
Chris@82 1090 }
Chris@82 1091 {
Chris@82 1092 E T25, T26, T2P, T29, T2a, T2N, T2M, T2O;
Chris@82 1093 T25 = Ip[WS(rs, 12)];
Chris@82 1094 T26 = Im[WS(rs, 12)];
Chris@82 1095 T2P = T25 + T26;
Chris@82 1096 T29 = Rp[WS(rs, 12)];
Chris@82 1097 T2a = Rm[WS(rs, 12)];
Chris@82 1098 T2N = T29 - T2a;
Chris@82 1099 T27 = T25 - T26;
Chris@82 1100 T2b = T29 + T2a;
Chris@82 1101 T2M = W[48];
Chris@82 1102 T2O = W[49];
Chris@82 1103 T2Q = FMA(T2M, T2N, T2O * T2P);
Chris@82 1104 T4X = FNMS(T2O, T2N, T2M * T2P);
Chris@82 1105 }
Chris@82 1106 {
Chris@82 1107 E T2f, T2g, T2E, T2j, T2k, T2C, T2B, T2D;
Chris@82 1108 T2f = Ip[WS(rs, 8)];
Chris@82 1109 T2g = Im[WS(rs, 8)];
Chris@82 1110 T2E = T2f + T2g;
Chris@82 1111 T2j = Rp[WS(rs, 8)];
Chris@82 1112 T2k = Rm[WS(rs, 8)];
Chris@82 1113 T2C = T2j - T2k;
Chris@82 1114 T2h = T2f - T2g;
Chris@82 1115 T2l = T2j + T2k;
Chris@82 1116 T2B = W[32];
Chris@82 1117 T2D = W[33];
Chris@82 1118 T2F = FMA(T2B, T2C, T2D * T2E);
Chris@82 1119 T4Z = FNMS(T2D, T2C, T2B * T2E);
Chris@82 1120 }
Chris@82 1121 {
Chris@82 1122 E T2G, T2R, T7g, T7h;
Chris@82 1123 T2G = T2A - T2F;
Chris@82 1124 T2R = T2L + T2Q;
Chris@82 1125 T2S = T2G - T2R;
Chris@82 1126 T5K = T2R + T2G;
Chris@82 1127 {
Chris@82 1128 E T4Y, T51, T7n, T7o;
Chris@82 1129 T4Y = T4W + T4X;
Chris@82 1130 T51 = T4Z + T50;
Chris@82 1131 T52 = T4Y + T51;
Chris@82 1132 T5N = T51 - T4Y;
Chris@82 1133 T7n = T2Q - T2L;
Chris@82 1134 T7o = T50 - T4Z;
Chris@82 1135 T7p = T7n + T7o;
Chris@82 1136 T8r = T7o - T7n;
Chris@82 1137 }
Chris@82 1138 T7g = T2F + T2A;
Chris@82 1139 T7h = T4W - T4X;
Chris@82 1140 T7i = T7g - T7h;
Chris@82 1141 T8o = T7h + T7g;
Chris@82 1142 {
Chris@82 1143 E T2m, T44, T2e, T2i;
Chris@82 1144 T2e = W[30];
Chris@82 1145 T2i = W[31];
Chris@82 1146 T2m = FNMS(T2i, T2l, T2e * T2h);
Chris@82 1147 T44 = FMA(T2e, T2l, T2i * T2h);
Chris@82 1148 T2q = T2m + T2p;
Chris@82 1149 T7t = T43 - T44;
Chris@82 1150 T45 = T43 + T44;
Chris@82 1151 T6L = T2p - T2m;
Chris@82 1152 }
Chris@82 1153 {
Chris@82 1154 E T23, T46, T2c, T47;
Chris@82 1155 {
Chris@82 1156 E T1V, T1Z, T24, T28;
Chris@82 1157 T1V = W[14];
Chris@82 1158 T1Z = W[15];
Chris@82 1159 T23 = FNMS(T1Z, T22, T1V * T1Y);
Chris@82 1160 T46 = FMA(T1V, T22, T1Z * T1Y);
Chris@82 1161 T24 = W[46];
Chris@82 1162 T28 = W[47];
Chris@82 1163 T2c = FNMS(T28, T2b, T24 * T27);
Chris@82 1164 T47 = FMA(T24, T2b, T28 * T27);
Chris@82 1165 }
Chris@82 1166 T2d = T23 + T2c;
Chris@82 1167 T7u = T23 - T2c;
Chris@82 1168 T48 = T46 + T47;
Chris@82 1169 T6M = T46 - T47;
Chris@82 1170 }
Chris@82 1171 }
Chris@82 1172 }
Chris@82 1173 {
Chris@82 1174 E T1q, T4a, T2X, T4P, T1S, T4e, T3d, T4T, T1z, T4b, T32, T4Q, T1J, T4d, T38;
Chris@82 1175 E T4S;
Chris@82 1176 {
Chris@82 1177 E T1l, T2W, T1p, T2U;
Chris@82 1178 {
Chris@82 1179 E T1j, T1k, T1n, T1o;
Chris@82 1180 T1j = Ip[WS(rs, 2)];
Chris@82 1181 T1k = Im[WS(rs, 2)];
Chris@82 1182 T1l = T1j - T1k;
Chris@82 1183 T2W = T1j + T1k;
Chris@82 1184 T1n = Rp[WS(rs, 2)];
Chris@82 1185 T1o = Rm[WS(rs, 2)];
Chris@82 1186 T1p = T1n + T1o;
Chris@82 1187 T2U = T1n - T1o;
Chris@82 1188 }
Chris@82 1189 {
Chris@82 1190 E T1i, T1m, T2T, T2V;
Chris@82 1191 T1i = W[6];
Chris@82 1192 T1m = W[7];
Chris@82 1193 T1q = FNMS(T1m, T1p, T1i * T1l);
Chris@82 1194 T4a = FMA(T1i, T1p, T1m * T1l);
Chris@82 1195 T2T = W[8];
Chris@82 1196 T2V = W[9];
Chris@82 1197 T2X = FMA(T2T, T2U, T2V * T2W);
Chris@82 1198 T4P = FNMS(T2V, T2U, T2T * T2W);
Chris@82 1199 }
Chris@82 1200 }
Chris@82 1201 {
Chris@82 1202 E T1N, T3c, T1R, T3a;
Chris@82 1203 {
Chris@82 1204 E T1L, T1M, T1P, T1Q;
Chris@82 1205 T1L = Ip[WS(rs, 6)];
Chris@82 1206 T1M = Im[WS(rs, 6)];
Chris@82 1207 T1N = T1L - T1M;
Chris@82 1208 T3c = T1L + T1M;
Chris@82 1209 T1P = Rp[WS(rs, 6)];
Chris@82 1210 T1Q = Rm[WS(rs, 6)];
Chris@82 1211 T1R = T1P + T1Q;
Chris@82 1212 T3a = T1P - T1Q;
Chris@82 1213 }
Chris@82 1214 {
Chris@82 1215 E T1K, T1O, T39, T3b;
Chris@82 1216 T1K = W[22];
Chris@82 1217 T1O = W[23];
Chris@82 1218 T1S = FNMS(T1O, T1R, T1K * T1N);
Chris@82 1219 T4e = FMA(T1K, T1R, T1O * T1N);
Chris@82 1220 T39 = W[24];
Chris@82 1221 T3b = W[25];
Chris@82 1222 T3d = FMA(T39, T3a, T3b * T3c);
Chris@82 1223 T4T = FNMS(T3b, T3a, T39 * T3c);
Chris@82 1224 }
Chris@82 1225 }
Chris@82 1226 {
Chris@82 1227 E T1u, T31, T1y, T2Z;
Chris@82 1228 {
Chris@82 1229 E T1s, T1t, T1w, T1x;
Chris@82 1230 T1s = Ip[WS(rs, 10)];
Chris@82 1231 T1t = Im[WS(rs, 10)];
Chris@82 1232 T1u = T1s - T1t;
Chris@82 1233 T31 = T1s + T1t;
Chris@82 1234 T1w = Rp[WS(rs, 10)];
Chris@82 1235 T1x = Rm[WS(rs, 10)];
Chris@82 1236 T1y = T1w + T1x;
Chris@82 1237 T2Z = T1w - T1x;
Chris@82 1238 }
Chris@82 1239 {
Chris@82 1240 E T1r, T1v, T2Y, T30;
Chris@82 1241 T1r = W[38];
Chris@82 1242 T1v = W[39];
Chris@82 1243 T1z = FNMS(T1v, T1y, T1r * T1u);
Chris@82 1244 T4b = FMA(T1r, T1y, T1v * T1u);
Chris@82 1245 T2Y = W[40];
Chris@82 1246 T30 = W[41];
Chris@82 1247 T32 = FMA(T2Y, T2Z, T30 * T31);
Chris@82 1248 T4Q = FNMS(T30, T2Z, T2Y * T31);
Chris@82 1249 }
Chris@82 1250 }
Chris@82 1251 {
Chris@82 1252 E T1E, T37, T1I, T35;
Chris@82 1253 {
Chris@82 1254 E T1C, T1D, T1G, T1H;
Chris@82 1255 T1C = Ip[WS(rs, 14)];
Chris@82 1256 T1D = Im[WS(rs, 14)];
Chris@82 1257 T1E = T1C - T1D;
Chris@82 1258 T37 = T1C + T1D;
Chris@82 1259 T1G = Rp[WS(rs, 14)];
Chris@82 1260 T1H = Rm[WS(rs, 14)];
Chris@82 1261 T1I = T1G + T1H;
Chris@82 1262 T35 = T1G - T1H;
Chris@82 1263 }
Chris@82 1264 {
Chris@82 1265 E T1B, T1F, T34, T36;
Chris@82 1266 T1B = W[54];
Chris@82 1267 T1F = W[55];
Chris@82 1268 T1J = FNMS(T1F, T1I, T1B * T1E);
Chris@82 1269 T4d = FMA(T1B, T1I, T1F * T1E);
Chris@82 1270 T34 = W[56];
Chris@82 1271 T36 = W[57];
Chris@82 1272 T38 = FMA(T34, T35, T36 * T37);
Chris@82 1273 T4S = FNMS(T36, T35, T34 * T37);
Chris@82 1274 }
Chris@82 1275 }
Chris@82 1276 {
Chris@82 1277 E T33, T3e, T4R, T4U;
Chris@82 1278 T1A = T1q + T1z;
Chris@82 1279 T4c = T4a + T4b;
Chris@82 1280 T4f = T4d + T4e;
Chris@82 1281 T1T = T1J + T1S;
Chris@82 1282 T33 = T2X + T32;
Chris@82 1283 T3e = T38 + T3d;
Chris@82 1284 T3f = T33 + T3e;
Chris@82 1285 T5M = T3e - T33;
Chris@82 1286 {
Chris@82 1287 E T7c, T7d, T6H, T6I;
Chris@82 1288 T7c = T4S - T4T;
Chris@82 1289 T7d = T3d - T38;
Chris@82 1290 T7e = T7c + T7d;
Chris@82 1291 T7l = T7c - T7d;
Chris@82 1292 T6H = T4d - T4e;
Chris@82 1293 T6I = T1J - T1S;
Chris@82 1294 T6J = T6H + T6I;
Chris@82 1295 T7x = T6H - T6I;
Chris@82 1296 }
Chris@82 1297 T4R = T4P + T4Q;
Chris@82 1298 T4U = T4S + T4T;
Chris@82 1299 T4V = T4R + T4U;
Chris@82 1300 T5J = T4U - T4R;
Chris@82 1301 {
Chris@82 1302 E T79, T7a, T6E, T6F;
Chris@82 1303 T79 = T32 - T2X;
Chris@82 1304 T7a = T4P - T4Q;
Chris@82 1305 T7b = T79 - T7a;
Chris@82 1306 T7k = T7a + T79;
Chris@82 1307 T6E = T1q - T1z;
Chris@82 1308 T6F = T4a - T4b;
Chris@82 1309 T6G = T6E - T6F;
Chris@82 1310 T7w = T6F + T6E;
Chris@82 1311 }
Chris@82 1312 }
Chris@82 1313 }
Chris@82 1314 {
Chris@82 1315 E T9, T4i, T3l, T4A, TB, T4m, T3B, T4E, Ti, T4j, T3q, T4B, Ts, T4l, T3w;
Chris@82 1316 E T4D;
Chris@82 1317 {
Chris@82 1318 E T4, T3k, T8, T3i;
Chris@82 1319 {
Chris@82 1320 E T2, T3, T6, T7;
Chris@82 1321 T2 = Ip[WS(rs, 1)];
Chris@82 1322 T3 = Im[WS(rs, 1)];
Chris@82 1323 T4 = T2 - T3;
Chris@82 1324 T3k = T2 + T3;
Chris@82 1325 T6 = Rp[WS(rs, 1)];
Chris@82 1326 T7 = Rm[WS(rs, 1)];
Chris@82 1327 T8 = T6 + T7;
Chris@82 1328 T3i = T6 - T7;
Chris@82 1329 }
Chris@82 1330 {
Chris@82 1331 E T1, T5, T3h, T3j;
Chris@82 1332 T1 = W[2];
Chris@82 1333 T5 = W[3];
Chris@82 1334 T9 = FNMS(T5, T8, T1 * T4);
Chris@82 1335 T4i = FMA(T1, T8, T5 * T4);
Chris@82 1336 T3h = W[4];
Chris@82 1337 T3j = W[5];
Chris@82 1338 T3l = FMA(T3h, T3i, T3j * T3k);
Chris@82 1339 T4A = FNMS(T3j, T3i, T3h * T3k);
Chris@82 1340 }
Chris@82 1341 }
Chris@82 1342 {
Chris@82 1343 E Tw, T3A, TA, T3y;
Chris@82 1344 {
Chris@82 1345 E Tu, Tv, Ty, Tz;
Chris@82 1346 Tu = Ip[WS(rs, 13)];
Chris@82 1347 Tv = Im[WS(rs, 13)];
Chris@82 1348 Tw = Tu - Tv;
Chris@82 1349 T3A = Tu + Tv;
Chris@82 1350 Ty = Rp[WS(rs, 13)];
Chris@82 1351 Tz = Rm[WS(rs, 13)];
Chris@82 1352 TA = Ty + Tz;
Chris@82 1353 T3y = Ty - Tz;
Chris@82 1354 }
Chris@82 1355 {
Chris@82 1356 E Tt, Tx, T3x, T3z;
Chris@82 1357 Tt = W[50];
Chris@82 1358 Tx = W[51];
Chris@82 1359 TB = FNMS(Tx, TA, Tt * Tw);
Chris@82 1360 T4m = FMA(Tt, TA, Tx * Tw);
Chris@82 1361 T3x = W[52];
Chris@82 1362 T3z = W[53];
Chris@82 1363 T3B = FMA(T3x, T3y, T3z * T3A);
Chris@82 1364 T4E = FNMS(T3z, T3y, T3x * T3A);
Chris@82 1365 }
Chris@82 1366 }
Chris@82 1367 {
Chris@82 1368 E Td, T3p, Th, T3n;
Chris@82 1369 {
Chris@82 1370 E Tb, Tc, Tf, Tg;
Chris@82 1371 Tb = Ip[WS(rs, 9)];
Chris@82 1372 Tc = Im[WS(rs, 9)];
Chris@82 1373 Td = Tb - Tc;
Chris@82 1374 T3p = Tb + Tc;
Chris@82 1375 Tf = Rp[WS(rs, 9)];
Chris@82 1376 Tg = Rm[WS(rs, 9)];
Chris@82 1377 Th = Tf + Tg;
Chris@82 1378 T3n = Tf - Tg;
Chris@82 1379 }
Chris@82 1380 {
Chris@82 1381 E Ta, Te, T3m, T3o;
Chris@82 1382 Ta = W[34];
Chris@82 1383 Te = W[35];
Chris@82 1384 Ti = FNMS(Te, Th, Ta * Td);
Chris@82 1385 T4j = FMA(Ta, Th, Te * Td);
Chris@82 1386 T3m = W[36];
Chris@82 1387 T3o = W[37];
Chris@82 1388 T3q = FMA(T3m, T3n, T3o * T3p);
Chris@82 1389 T4B = FNMS(T3o, T3n, T3m * T3p);
Chris@82 1390 }
Chris@82 1391 }
Chris@82 1392 {
Chris@82 1393 E Tn, T3v, Tr, T3t;
Chris@82 1394 {
Chris@82 1395 E Tl, Tm, Tp, Tq;
Chris@82 1396 Tl = Ip[WS(rs, 5)];
Chris@82 1397 Tm = Im[WS(rs, 5)];
Chris@82 1398 Tn = Tl - Tm;
Chris@82 1399 T3v = Tl + Tm;
Chris@82 1400 Tp = Rp[WS(rs, 5)];
Chris@82 1401 Tq = Rm[WS(rs, 5)];
Chris@82 1402 Tr = Tp + Tq;
Chris@82 1403 T3t = Tp - Tq;
Chris@82 1404 }
Chris@82 1405 {
Chris@82 1406 E Tk, To, T3s, T3u;
Chris@82 1407 Tk = W[18];
Chris@82 1408 To = W[19];
Chris@82 1409 Ts = FNMS(To, Tr, Tk * Tn);
Chris@82 1410 T4l = FMA(Tk, Tr, To * Tn);
Chris@82 1411 T3s = W[20];
Chris@82 1412 T3u = W[21];
Chris@82 1413 T3w = FMA(T3s, T3t, T3u * T3v);
Chris@82 1414 T4D = FNMS(T3u, T3t, T3s * T3v);
Chris@82 1415 }
Chris@82 1416 }
Chris@82 1417 Tj = T9 + Ti;
Chris@82 1418 TC = Ts + TB;
Chris@82 1419 T5r = Tj - TC;
Chris@82 1420 T4k = T4i + T4j;
Chris@82 1421 T4n = T4l + T4m;
Chris@82 1422 T5s = T4k - T4n;
Chris@82 1423 {
Chris@82 1424 E T3r, T3C, T6T, T6U;
Chris@82 1425 T3r = T3l + T3q;
Chris@82 1426 T3C = T3w + T3B;
Chris@82 1427 T3D = T3r + T3C;
Chris@82 1428 T5C = T3C - T3r;
Chris@82 1429 T6T = T4E - T4D;
Chris@82 1430 T6U = T3w - T3B;
Chris@82 1431 T6V = T6T + T6U;
Chris@82 1432 T72 = T6T - T6U;
Chris@82 1433 }
Chris@82 1434 {
Chris@82 1435 E T4C, T4F, T6s, T6t;
Chris@82 1436 T4C = T4A + T4B;
Chris@82 1437 T4F = T4D + T4E;
Chris@82 1438 T4G = T4C + T4F;
Chris@82 1439 T5F = T4F - T4C;
Chris@82 1440 T6s = T4i - T4j;
Chris@82 1441 T6t = Ts - TB;
Chris@82 1442 T6u = T6s + T6t;
Chris@82 1443 T86 = T6s - T6t;
Chris@82 1444 }
Chris@82 1445 {
Chris@82 1446 E T6Q, T6R, T6p, T6q;
Chris@82 1447 T6Q = T3q - T3l;
Chris@82 1448 T6R = T4A - T4B;
Chris@82 1449 T6S = T6Q - T6R;
Chris@82 1450 T71 = T6R + T6Q;
Chris@82 1451 T6p = T9 - Ti;
Chris@82 1452 T6q = T4l - T4m;
Chris@82 1453 T6r = T6p - T6q;
Chris@82 1454 T85 = T6p + T6q;
Chris@82 1455 }
Chris@82 1456 }
Chris@82 1457 {
Chris@82 1458 E TM, T4p, T3I, T4H, T1e, T4t, T3Y, T4L, TV, T4q, T3N, T4I, T15, T4s, T3T;
Chris@82 1459 E T4K;
Chris@82 1460 {
Chris@82 1461 E TH, T3H, TL, T3F;
Chris@82 1462 {
Chris@82 1463 E TF, TG, TJ, TK;
Chris@82 1464 TF = Ip[WS(rs, 15)];
Chris@82 1465 TG = Im[WS(rs, 15)];
Chris@82 1466 TH = TF - TG;
Chris@82 1467 T3H = TF + TG;
Chris@82 1468 TJ = Rp[WS(rs, 15)];
Chris@82 1469 TK = Rm[WS(rs, 15)];
Chris@82 1470 TL = TJ + TK;
Chris@82 1471 T3F = TJ - TK;
Chris@82 1472 }
Chris@82 1473 {
Chris@82 1474 E TE, TI, T3E, T3G;
Chris@82 1475 TE = W[58];
Chris@82 1476 TI = W[59];
Chris@82 1477 TM = FNMS(TI, TL, TE * TH);
Chris@82 1478 T4p = FMA(TE, TL, TI * TH);
Chris@82 1479 T3E = W[60];
Chris@82 1480 T3G = W[61];
Chris@82 1481 T3I = FMA(T3E, T3F, T3G * T3H);
Chris@82 1482 T4H = FNMS(T3G, T3F, T3E * T3H);
Chris@82 1483 }
Chris@82 1484 }
Chris@82 1485 {
Chris@82 1486 E T19, T3X, T1d, T3V;
Chris@82 1487 {
Chris@82 1488 E T17, T18, T1b, T1c;
Chris@82 1489 T17 = Ip[WS(rs, 11)];
Chris@82 1490 T18 = Im[WS(rs, 11)];
Chris@82 1491 T19 = T17 - T18;
Chris@82 1492 T3X = T17 + T18;
Chris@82 1493 T1b = Rp[WS(rs, 11)];
Chris@82 1494 T1c = Rm[WS(rs, 11)];
Chris@82 1495 T1d = T1b + T1c;
Chris@82 1496 T3V = T1b - T1c;
Chris@82 1497 }
Chris@82 1498 {
Chris@82 1499 E T16, T1a, T3U, T3W;
Chris@82 1500 T16 = W[42];
Chris@82 1501 T1a = W[43];
Chris@82 1502 T1e = FNMS(T1a, T1d, T16 * T19);
Chris@82 1503 T4t = FMA(T16, T1d, T1a * T19);
Chris@82 1504 T3U = W[44];
Chris@82 1505 T3W = W[45];
Chris@82 1506 T3Y = FMA(T3U, T3V, T3W * T3X);
Chris@82 1507 T4L = FNMS(T3W, T3V, T3U * T3X);
Chris@82 1508 }
Chris@82 1509 }
Chris@82 1510 {
Chris@82 1511 E TQ, T3M, TU, T3K;
Chris@82 1512 {
Chris@82 1513 E TO, TP, TS, TT;
Chris@82 1514 TO = Ip[WS(rs, 7)];
Chris@82 1515 TP = Im[WS(rs, 7)];
Chris@82 1516 TQ = TO - TP;
Chris@82 1517 T3M = TO + TP;
Chris@82 1518 TS = Rp[WS(rs, 7)];
Chris@82 1519 TT = Rm[WS(rs, 7)];
Chris@82 1520 TU = TS + TT;
Chris@82 1521 T3K = TS - TT;
Chris@82 1522 }
Chris@82 1523 {
Chris@82 1524 E TN, TR, T3J, T3L;
Chris@82 1525 TN = W[26];
Chris@82 1526 TR = W[27];
Chris@82 1527 TV = FNMS(TR, TU, TN * TQ);
Chris@82 1528 T4q = FMA(TN, TU, TR * TQ);
Chris@82 1529 T3J = W[28];
Chris@82 1530 T3L = W[29];
Chris@82 1531 T3N = FMA(T3J, T3K, T3L * T3M);
Chris@82 1532 T4I = FNMS(T3L, T3K, T3J * T3M);
Chris@82 1533 }
Chris@82 1534 }
Chris@82 1535 {
Chris@82 1536 E T10, T3S, T14, T3Q;
Chris@82 1537 {
Chris@82 1538 E TY, TZ, T12, T13;
Chris@82 1539 TY = Ip[WS(rs, 3)];
Chris@82 1540 TZ = Im[WS(rs, 3)];
Chris@82 1541 T10 = TY - TZ;
Chris@82 1542 T3S = TY + TZ;
Chris@82 1543 T12 = Rp[WS(rs, 3)];
Chris@82 1544 T13 = Rm[WS(rs, 3)];
Chris@82 1545 T14 = T12 + T13;
Chris@82 1546 T3Q = T12 - T13;
Chris@82 1547 }
Chris@82 1548 {
Chris@82 1549 E TX, T11, T3P, T3R;
Chris@82 1550 TX = W[10];
Chris@82 1551 T11 = W[11];
Chris@82 1552 T15 = FNMS(T11, T14, TX * T10);
Chris@82 1553 T4s = FMA(TX, T14, T11 * T10);
Chris@82 1554 T3P = W[12];
Chris@82 1555 T3R = W[13];
Chris@82 1556 T3T = FMA(T3P, T3Q, T3R * T3S);
Chris@82 1557 T4K = FNMS(T3R, T3Q, T3P * T3S);
Chris@82 1558 }
Chris@82 1559 }
Chris@82 1560 TW = TM + TV;
Chris@82 1561 T1f = T15 + T1e;
Chris@82 1562 T5v = TW - T1f;
Chris@82 1563 T4r = T4p + T4q;
Chris@82 1564 T4u = T4s + T4t;
Chris@82 1565 T5u = T4r - T4u;
Chris@82 1566 {
Chris@82 1567 E T3O, T3Z, T74, T75;
Chris@82 1568 T3O = T3I + T3N;
Chris@82 1569 T3Z = T3T + T3Y;
Chris@82 1570 T40 = T3O + T3Z;
Chris@82 1571 T5G = T3Z - T3O;
Chris@82 1572 T74 = T4H - T4I;
Chris@82 1573 T75 = T3Y - T3T;
Chris@82 1574 T76 = T74 + T75;
Chris@82 1575 T8k = T74 - T75;
Chris@82 1576 }
Chris@82 1577 {
Chris@82 1578 E T4J, T4M, T6z, T6A;
Chris@82 1579 T4J = T4H + T4I;
Chris@82 1580 T4M = T4K + T4L;
Chris@82 1581 T4N = T4J + T4M;
Chris@82 1582 T5D = T4J - T4M;
Chris@82 1583 T6z = T4p - T4q;
Chris@82 1584 T6A = T15 - T1e;
Chris@82 1585 T6B = T6z + T6A;
Chris@82 1586 T89 = T6z - T6A;
Chris@82 1587 }
Chris@82 1588 {
Chris@82 1589 E T6X, T6Y, T6w, T6x;
Chris@82 1590 T6X = T3N - T3I;
Chris@82 1591 T6Y = T4K - T4L;
Chris@82 1592 T6Z = T6X - T6Y;
Chris@82 1593 T8h = T6X + T6Y;
Chris@82 1594 T6w = TM - TV;
Chris@82 1595 T6x = T4s - T4t;
Chris@82 1596 T6y = T6w - T6x;
Chris@82 1597 T88 = T6w + T6x;
Chris@82 1598 }
Chris@82 1599 }
Chris@82 1600 {
Chris@82 1601 E T1h, T5i, T5c, T5m, T5f, T5n, T2s, T58, T42, T4y, T4w, T57, T54, T56, T4h;
Chris@82 1602 E T5h;
Chris@82 1603 {
Chris@82 1604 E TD, T1g, T5a, T5b;
Chris@82 1605 TD = Tj + TC;
Chris@82 1606 T1g = TW + T1f;
Chris@82 1607 T1h = TD + T1g;
Chris@82 1608 T5i = TD - T1g;
Chris@82 1609 T5a = T4N - T4G;
Chris@82 1610 T5b = T3D - T40;
Chris@82 1611 T5c = T5a + T5b;
Chris@82 1612 T5m = T5a - T5b;
Chris@82 1613 }
Chris@82 1614 {
Chris@82 1615 E T5d, T5e, T1U, T2r;
Chris@82 1616 T5d = T3f + T2S;
Chris@82 1617 T5e = T52 - T4V;
Chris@82 1618 T5f = T5d - T5e;
Chris@82 1619 T5n = T5d + T5e;
Chris@82 1620 T1U = T1A + T1T;
Chris@82 1621 T2r = T2d + T2q;
Chris@82 1622 T2s = T1U + T2r;
Chris@82 1623 T58 = T2r - T1U;
Chris@82 1624 }
Chris@82 1625 {
Chris@82 1626 E T3g, T41, T4o, T4v;
Chris@82 1627 T3g = T2S - T3f;
Chris@82 1628 T41 = T3D + T40;
Chris@82 1629 T42 = T3g - T41;
Chris@82 1630 T4y = T41 + T3g;
Chris@82 1631 T4o = T4k + T4n;
Chris@82 1632 T4v = T4r + T4u;
Chris@82 1633 T4w = T4o + T4v;
Chris@82 1634 T57 = T4v - T4o;
Chris@82 1635 }
Chris@82 1636 {
Chris@82 1637 E T4O, T53, T49, T4g;
Chris@82 1638 T4O = T4G + T4N;
Chris@82 1639 T53 = T4V + T52;
Chris@82 1640 T54 = T4O - T53;
Chris@82 1641 T56 = T4O + T53;
Chris@82 1642 T49 = T45 + T48;
Chris@82 1643 T4g = T4c + T4f;
Chris@82 1644 T4h = T49 + T4g;
Chris@82 1645 T5h = T49 - T4g;
Chris@82 1646 }
Chris@82 1647 {
Chris@82 1648 E T2t, T55, T4x, T4z;
Chris@82 1649 T2t = T1h + T2s;
Chris@82 1650 Ip[0] = KP500000000 * (T2t + T42);
Chris@82 1651 Im[WS(rs, 15)] = KP500000000 * (T42 - T2t);
Chris@82 1652 T55 = T4h + T4w;
Chris@82 1653 Rm[WS(rs, 15)] = KP500000000 * (T55 - T56);
Chris@82 1654 Rp[0] = KP500000000 * (T55 + T56);
Chris@82 1655 T4x = T4h - T4w;
Chris@82 1656 Rm[WS(rs, 7)] = KP500000000 * (T4x - T4y);
Chris@82 1657 Rp[WS(rs, 8)] = KP500000000 * (T4x + T4y);
Chris@82 1658 T4z = T2s - T1h;
Chris@82 1659 Ip[WS(rs, 8)] = KP500000000 * (T4z + T54);
Chris@82 1660 Im[WS(rs, 7)] = KP500000000 * (T54 - T4z);
Chris@82 1661 }
Chris@82 1662 {
Chris@82 1663 E T59, T5g, T5p, T5q;
Chris@82 1664 T59 = KP500000000 * (T57 + T58);
Chris@82 1665 T5g = KP353553390 * (T5c + T5f);
Chris@82 1666 Ip[WS(rs, 4)] = T59 + T5g;
Chris@82 1667 Im[WS(rs, 11)] = T5g - T59;
Chris@82 1668 T5p = KP500000000 * (T5h + T5i);
Chris@82 1669 T5q = KP353553390 * (T5m + T5n);
Chris@82 1670 Rm[WS(rs, 11)] = T5p - T5q;
Chris@82 1671 Rp[WS(rs, 4)] = T5p + T5q;
Chris@82 1672 }
Chris@82 1673 {
Chris@82 1674 E T5j, T5k, T5l, T5o;
Chris@82 1675 T5j = KP500000000 * (T5h - T5i);
Chris@82 1676 T5k = KP353553390 * (T5f - T5c);
Chris@82 1677 Rm[WS(rs, 3)] = T5j - T5k;
Chris@82 1678 Rp[WS(rs, 12)] = T5j + T5k;
Chris@82 1679 T5l = KP500000000 * (T58 - T57);
Chris@82 1680 T5o = KP353553390 * (T5m - T5n);
Chris@82 1681 Ip[WS(rs, 12)] = T5l + T5o;
Chris@82 1682 Im[WS(rs, 3)] = T5o - T5l;
Chris@82 1683 }
Chris@82 1684 }
Chris@82 1685 {
Chris@82 1686 E T5x, T6g, T6a, T6k, T6d, T6l, T5A, T66, T5I, T60, T5T, T6f, T5W, T65, T5P;
Chris@82 1687 E T61;
Chris@82 1688 {
Chris@82 1689 E T5t, T5w, T68, T69;
Chris@82 1690 T5t = T5r - T5s;
Chris@82 1691 T5w = T5u + T5v;
Chris@82 1692 T5x = KP353553390 * (T5t + T5w);
Chris@82 1693 T6g = KP353553390 * (T5t - T5w);
Chris@82 1694 T68 = T5D - T5C;
Chris@82 1695 T69 = T5G - T5F;
Chris@82 1696 T6a = FMA(KP461939766, T68, KP191341716 * T69);
Chris@82 1697 T6k = FNMS(KP461939766, T69, KP191341716 * T68);
Chris@82 1698 }
Chris@82 1699 {
Chris@82 1700 E T6b, T6c, T5y, T5z;
Chris@82 1701 T6b = T5K - T5J;
Chris@82 1702 T6c = T5N - T5M;
Chris@82 1703 T6d = FNMS(KP461939766, T6c, KP191341716 * T6b);
Chris@82 1704 T6l = FMA(KP461939766, T6b, KP191341716 * T6c);
Chris@82 1705 T5y = T4f - T4c;
Chris@82 1706 T5z = T2q - T2d;
Chris@82 1707 T5A = KP500000000 * (T5y + T5z);
Chris@82 1708 T66 = KP500000000 * (T5z - T5y);
Chris@82 1709 }
Chris@82 1710 {
Chris@82 1711 E T5E, T5H, T5R, T5S;
Chris@82 1712 T5E = T5C + T5D;
Chris@82 1713 T5H = T5F + T5G;
Chris@82 1714 T5I = FMA(KP191341716, T5E, KP461939766 * T5H);
Chris@82 1715 T60 = FNMS(KP191341716, T5H, KP461939766 * T5E);
Chris@82 1716 T5R = T45 - T48;
Chris@82 1717 T5S = T1A - T1T;
Chris@82 1718 T5T = KP500000000 * (T5R + T5S);
Chris@82 1719 T6f = KP500000000 * (T5R - T5S);
Chris@82 1720 }
Chris@82 1721 {
Chris@82 1722 E T5U, T5V, T5L, T5O;
Chris@82 1723 T5U = T5s + T5r;
Chris@82 1724 T5V = T5u - T5v;
Chris@82 1725 T5W = KP353553390 * (T5U + T5V);
Chris@82 1726 T65 = KP353553390 * (T5V - T5U);
Chris@82 1727 T5L = T5J + T5K;
Chris@82 1728 T5O = T5M + T5N;
Chris@82 1729 T5P = FNMS(KP191341716, T5O, KP461939766 * T5L);
Chris@82 1730 T61 = FMA(KP191341716, T5L, KP461939766 * T5O);
Chris@82 1731 }
Chris@82 1732 {
Chris@82 1733 E T5B, T5Q, T63, T64;
Chris@82 1734 T5B = T5x + T5A;
Chris@82 1735 T5Q = T5I + T5P;
Chris@82 1736 Ip[WS(rs, 2)] = T5B + T5Q;
Chris@82 1737 Im[WS(rs, 13)] = T5Q - T5B;
Chris@82 1738 T63 = T5T + T5W;
Chris@82 1739 T64 = T60 + T61;
Chris@82 1740 Rm[WS(rs, 13)] = T63 - T64;
Chris@82 1741 Rp[WS(rs, 2)] = T63 + T64;
Chris@82 1742 }
Chris@82 1743 {
Chris@82 1744 E T5X, T5Y, T5Z, T62;
Chris@82 1745 T5X = T5T - T5W;
Chris@82 1746 T5Y = T5P - T5I;
Chris@82 1747 Rm[WS(rs, 5)] = T5X - T5Y;
Chris@82 1748 Rp[WS(rs, 10)] = T5X + T5Y;
Chris@82 1749 T5Z = T5A - T5x;
Chris@82 1750 T62 = T60 - T61;
Chris@82 1751 Ip[WS(rs, 10)] = T5Z + T62;
Chris@82 1752 Im[WS(rs, 5)] = T62 - T5Z;
Chris@82 1753 }
Chris@82 1754 {
Chris@82 1755 E T67, T6e, T6n, T6o;
Chris@82 1756 T67 = T65 + T66;
Chris@82 1757 T6e = T6a + T6d;
Chris@82 1758 Ip[WS(rs, 6)] = T67 + T6e;
Chris@82 1759 Im[WS(rs, 9)] = T6e - T67;
Chris@82 1760 T6n = T6f + T6g;
Chris@82 1761 T6o = T6k + T6l;
Chris@82 1762 Rm[WS(rs, 9)] = T6n - T6o;
Chris@82 1763 Rp[WS(rs, 6)] = T6n + T6o;
Chris@82 1764 }
Chris@82 1765 {
Chris@82 1766 E T6h, T6i, T6j, T6m;
Chris@82 1767 T6h = T6f - T6g;
Chris@82 1768 T6i = T6d - T6a;
Chris@82 1769 Rm[WS(rs, 1)] = T6h - T6i;
Chris@82 1770 Rp[WS(rs, 14)] = T6h + T6i;
Chris@82 1771 T6j = T66 - T65;
Chris@82 1772 T6m = T6k - T6l;
Chris@82 1773 Ip[WS(rs, 14)] = T6j + T6m;
Chris@82 1774 Im[WS(rs, 1)] = T6m - T6j;
Chris@82 1775 }
Chris@82 1776 }
Chris@82 1777 {
Chris@82 1778 E T6D, T7W, T6O, T7M, T7C, T7L, T7z, T7V, T7r, T81, T7H, T7T, T78, T80, T7G;
Chris@82 1779 E T7Q;
Chris@82 1780 {
Chris@82 1781 E T6v, T6C, T7v, T7y;
Chris@82 1782 T6v = FNMS(KP191341716, T6u, KP461939766 * T6r);
Chris@82 1783 T6C = FMA(KP461939766, T6y, KP191341716 * T6B);
Chris@82 1784 T6D = T6v + T6C;
Chris@82 1785 T7W = T6v - T6C;
Chris@82 1786 {
Chris@82 1787 E T6K, T6N, T7A, T7B;
Chris@82 1788 T6K = KP353553390 * (T6G + T6J);
Chris@82 1789 T6N = KP500000000 * (T6L - T6M);
Chris@82 1790 T6O = T6K + T6N;
Chris@82 1791 T7M = T6N - T6K;
Chris@82 1792 T7A = FMA(KP191341716, T6r, KP461939766 * T6u);
Chris@82 1793 T7B = FNMS(KP191341716, T6y, KP461939766 * T6B);
Chris@82 1794 T7C = T7A + T7B;
Chris@82 1795 T7L = T7B - T7A;
Chris@82 1796 }
Chris@82 1797 T7v = KP500000000 * (T7t + T7u);
Chris@82 1798 T7y = KP353553390 * (T7w + T7x);
Chris@82 1799 T7z = T7v + T7y;
Chris@82 1800 T7V = T7v - T7y;
Chris@82 1801 {
Chris@82 1802 E T7j, T7R, T7q, T7S, T7f, T7m;
Chris@82 1803 T7f = KP707106781 * (T7b + T7e);
Chris@82 1804 T7j = T7f + T7i;
Chris@82 1805 T7R = T7i - T7f;
Chris@82 1806 T7m = KP707106781 * (T7k + T7l);
Chris@82 1807 T7q = T7m + T7p;
Chris@82 1808 T7S = T7p - T7m;
Chris@82 1809 T7r = FNMS(KP097545161, T7q, KP490392640 * T7j);
Chris@82 1810 T81 = FMA(KP415734806, T7R, KP277785116 * T7S);
Chris@82 1811 T7H = FMA(KP097545161, T7j, KP490392640 * T7q);
Chris@82 1812 T7T = FNMS(KP415734806, T7S, KP277785116 * T7R);
Chris@82 1813 }
Chris@82 1814 {
Chris@82 1815 E T70, T7O, T77, T7P, T6W, T73;
Chris@82 1816 T6W = KP707106781 * (T6S + T6V);
Chris@82 1817 T70 = T6W + T6Z;
Chris@82 1818 T7O = T6Z - T6W;
Chris@82 1819 T73 = KP707106781 * (T71 + T72);
Chris@82 1820 T77 = T73 + T76;
Chris@82 1821 T7P = T76 - T73;
Chris@82 1822 T78 = FMA(KP490392640, T70, KP097545161 * T77);
Chris@82 1823 T80 = FNMS(KP415734806, T7O, KP277785116 * T7P);
Chris@82 1824 T7G = FNMS(KP097545161, T70, KP490392640 * T77);
Chris@82 1825 T7Q = FMA(KP277785116, T7O, KP415734806 * T7P);
Chris@82 1826 }
Chris@82 1827 }
Chris@82 1828 {
Chris@82 1829 E T6P, T7s, T7J, T7K;
Chris@82 1830 T6P = T6D + T6O;
Chris@82 1831 T7s = T78 + T7r;
Chris@82 1832 Ip[WS(rs, 1)] = T6P + T7s;
Chris@82 1833 Im[WS(rs, 14)] = T7s - T6P;
Chris@82 1834 T7J = T7z + T7C;
Chris@82 1835 T7K = T7G + T7H;
Chris@82 1836 Rm[WS(rs, 14)] = T7J - T7K;
Chris@82 1837 Rp[WS(rs, 1)] = T7J + T7K;
Chris@82 1838 }
Chris@82 1839 {
Chris@82 1840 E T7D, T7E, T7F, T7I;
Chris@82 1841 T7D = T7z - T7C;
Chris@82 1842 T7E = T7r - T78;
Chris@82 1843 Rm[WS(rs, 6)] = T7D - T7E;
Chris@82 1844 Rp[WS(rs, 9)] = T7D + T7E;
Chris@82 1845 T7F = T6O - T6D;
Chris@82 1846 T7I = T7G - T7H;
Chris@82 1847 Ip[WS(rs, 9)] = T7F + T7I;
Chris@82 1848 Im[WS(rs, 6)] = T7I - T7F;
Chris@82 1849 }
Chris@82 1850 {
Chris@82 1851 E T7N, T7U, T83, T84;
Chris@82 1852 T7N = T7L + T7M;
Chris@82 1853 T7U = T7Q + T7T;
Chris@82 1854 Ip[WS(rs, 5)] = T7N + T7U;
Chris@82 1855 Im[WS(rs, 10)] = T7U - T7N;
Chris@82 1856 T83 = T7V + T7W;
Chris@82 1857 T84 = T80 + T81;
Chris@82 1858 Rm[WS(rs, 10)] = T83 - T84;
Chris@82 1859 Rp[WS(rs, 5)] = T83 + T84;
Chris@82 1860 }
Chris@82 1861 {
Chris@82 1862 E T7X, T7Y, T7Z, T82;
Chris@82 1863 T7X = T7V - T7W;
Chris@82 1864 T7Y = T7T - T7Q;
Chris@82 1865 Rm[WS(rs, 2)] = T7X - T7Y;
Chris@82 1866 Rp[WS(rs, 13)] = T7X + T7Y;
Chris@82 1867 T7Z = T7M - T7L;
Chris@82 1868 T82 = T80 - T81;
Chris@82 1869 Ip[WS(rs, 13)] = T7Z + T82;
Chris@82 1870 Im[WS(rs, 2)] = T82 - T7Z;
Chris@82 1871 }
Chris@82 1872 }
Chris@82 1873 {
Chris@82 1874 E T8b, T8U, T8e, T8K, T8A, T8J, T8x, T8T, T8t, T8Z, T8F, T8R, T8m, T8Y, T8E;
Chris@82 1875 E T8O;
Chris@82 1876 {
Chris@82 1877 E T87, T8a, T8v, T8w;
Chris@82 1878 T87 = FNMS(KP461939766, T86, KP191341716 * T85);
Chris@82 1879 T8a = FMA(KP191341716, T88, KP461939766 * T89);
Chris@82 1880 T8b = T87 + T8a;
Chris@82 1881 T8U = T87 - T8a;
Chris@82 1882 {
Chris@82 1883 E T8c, T8d, T8y, T8z;
Chris@82 1884 T8c = KP353553390 * (T7x - T7w);
Chris@82 1885 T8d = KP500000000 * (T6M + T6L);
Chris@82 1886 T8e = T8c + T8d;
Chris@82 1887 T8K = T8d - T8c;
Chris@82 1888 T8y = FMA(KP461939766, T85, KP191341716 * T86);
Chris@82 1889 T8z = FNMS(KP461939766, T88, KP191341716 * T89);
Chris@82 1890 T8A = T8y + T8z;
Chris@82 1891 T8J = T8z - T8y;
Chris@82 1892 }
Chris@82 1893 T8v = KP500000000 * (T7t - T7u);
Chris@82 1894 T8w = KP353553390 * (T6G - T6J);
Chris@82 1895 T8x = T8v + T8w;
Chris@82 1896 T8T = T8v - T8w;
Chris@82 1897 {
Chris@82 1898 E T8p, T8P, T8s, T8Q, T8n, T8q;
Chris@82 1899 T8n = KP707106781 * (T7l - T7k);
Chris@82 1900 T8p = T8n + T8o;
Chris@82 1901 T8P = T8o - T8n;
Chris@82 1902 T8q = KP707106781 * (T7b - T7e);
Chris@82 1903 T8s = T8q + T8r;
Chris@82 1904 T8Q = T8r - T8q;
Chris@82 1905 T8t = FNMS(KP277785116, T8s, KP415734806 * T8p);
Chris@82 1906 T8Z = FMA(KP490392640, T8P, KP097545161 * T8Q);
Chris@82 1907 T8F = FMA(KP277785116, T8p, KP415734806 * T8s);
Chris@82 1908 T8R = FNMS(KP490392640, T8Q, KP097545161 * T8P);
Chris@82 1909 }
Chris@82 1910 {
Chris@82 1911 E T8i, T8M, T8l, T8N, T8g, T8j;
Chris@82 1912 T8g = KP707106781 * (T72 - T71);
Chris@82 1913 T8i = T8g + T8h;
Chris@82 1914 T8M = T8h - T8g;
Chris@82 1915 T8j = KP707106781 * (T6S - T6V);
Chris@82 1916 T8l = T8j + T8k;
Chris@82 1917 T8N = T8k - T8j;
Chris@82 1918 T8m = FMA(KP415734806, T8i, KP277785116 * T8l);
Chris@82 1919 T8Y = FNMS(KP490392640, T8M, KP097545161 * T8N);
Chris@82 1920 T8E = FNMS(KP277785116, T8i, KP415734806 * T8l);
Chris@82 1921 T8O = FMA(KP097545161, T8M, KP490392640 * T8N);
Chris@82 1922 }
Chris@82 1923 }
Chris@82 1924 {
Chris@82 1925 E T8f, T8u, T8H, T8I;
Chris@82 1926 T8f = T8b + T8e;
Chris@82 1927 T8u = T8m + T8t;
Chris@82 1928 Ip[WS(rs, 3)] = T8f + T8u;
Chris@82 1929 Im[WS(rs, 12)] = T8u - T8f;
Chris@82 1930 T8H = T8x + T8A;
Chris@82 1931 T8I = T8E + T8F;
Chris@82 1932 Rm[WS(rs, 12)] = T8H - T8I;
Chris@82 1933 Rp[WS(rs, 3)] = T8H + T8I;
Chris@82 1934 }
Chris@82 1935 {
Chris@82 1936 E T8B, T8C, T8D, T8G;
Chris@82 1937 T8B = T8x - T8A;
Chris@82 1938 T8C = T8t - T8m;
Chris@82 1939 Rm[WS(rs, 4)] = T8B - T8C;
Chris@82 1940 Rp[WS(rs, 11)] = T8B + T8C;
Chris@82 1941 T8D = T8e - T8b;
Chris@82 1942 T8G = T8E - T8F;
Chris@82 1943 Ip[WS(rs, 11)] = T8D + T8G;
Chris@82 1944 Im[WS(rs, 4)] = T8G - T8D;
Chris@82 1945 }
Chris@82 1946 {
Chris@82 1947 E T8L, T8S, T91, T92;
Chris@82 1948 T8L = T8J + T8K;
Chris@82 1949 T8S = T8O + T8R;
Chris@82 1950 Ip[WS(rs, 7)] = T8L + T8S;
Chris@82 1951 Im[WS(rs, 8)] = T8S - T8L;
Chris@82 1952 T91 = T8T + T8U;
Chris@82 1953 T92 = T8Y + T8Z;
Chris@82 1954 Rm[WS(rs, 8)] = T91 - T92;
Chris@82 1955 Rp[WS(rs, 7)] = T91 + T92;
Chris@82 1956 }
Chris@82 1957 {
Chris@82 1958 E T8V, T8W, T8X, T90;
Chris@82 1959 T8V = T8T - T8U;
Chris@82 1960 T8W = T8R - T8O;
Chris@82 1961 Rm[0] = T8V - T8W;
Chris@82 1962 Rp[WS(rs, 15)] = T8V + T8W;
Chris@82 1963 T8X = T8K - T8J;
Chris@82 1964 T90 = T8Y - T8Z;
Chris@82 1965 Ip[WS(rs, 15)] = T8X + T90;
Chris@82 1966 Im[0] = T90 - T8X;
Chris@82 1967 }
Chris@82 1968 }
Chris@82 1969 }
Chris@82 1970 }
Chris@82 1971 }
Chris@82 1972
Chris@82 1973 static const tw_instr twinstr[] = {
Chris@82 1974 {TW_FULL, 1, 32},
Chris@82 1975 {TW_NEXT, 1, 0}
Chris@82 1976 };
Chris@82 1977
Chris@82 1978 static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, {404, 134, 94, 0} };
Chris@82 1979
Chris@82 1980 void X(codelet_hc2cfdft_32) (planner *p) {
Chris@82 1981 X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT);
Chris@82 1982 }
Chris@82 1983 #endif