annotate src/fftw-3.3.5/rdft/scalar/r2cf/hc2cfdft_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:48:44 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include hc2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 498 FP additions, 324 FP multiplications,
Chris@42 32 * (or, 300 additions, 126 multiplications, 198 fused multiply/add),
Chris@42 33 * 172 stack variables, 8 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cf.h"
Chris@42 36
Chris@42 37 static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 40 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 41 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 46 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 47 {
Chris@42 48 INT m;
Chris@42 49 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 50 E T9X, Ta0;
Chris@42 51 {
Chris@42 52 E T3B, T89, T61, T8l, T2F, T7p, T8t, T4B, T7I, T5e, T7L, T1n, T7R, T5E, T82;
Chris@42 53 E T4u, T3m, T8k, T5W, T8a, T2r, T8u, T4G, T7q, T59, T7K, T7H, T12, T5z, T81;
Chris@42 54 E T7Q, T4h, T4Y, T7D, T7A, Tl, T5o, T3Q, T84, T7V, T2V, T4M, T7t, T7s, T1K;
Chris@42 55 E T5L, T8e, T8n, T38, T7v, T4R, T7w, T25, T5Q, T8h, T8o, T3V, T3S, T5p, T3T;
Chris@42 56 E T41, Tz, T3Y, TE, TA, T51, T5r, T3Z, Tv, T50, TB, T3U, T40;
Chris@42 57 {
Chris@42 58 E T49, T46, T5v, T47, T4f, TV, T4c, T10, TW, T57, T5x, T4d, TR, T56, TX;
Chris@42 59 E T48, T4e;
Chris@42 60 {
Chris@42 61 E T4m, T4j, T5A, T4k, T4s, T1g, T4p, T1l, T1h, T5c, T5C, T4q, T1c, T5b, T1i;
Chris@42 62 E T4l, T4r;
Chris@42 63 {
Chris@42 64 E T2E, T4y, T2B, T4A;
Chris@42 65 {
Chris@42 66 E T3y, T3z, T3t, T5Z, T3x, T2v, T3r, T3q, T3n, T2A, T3o, T2s;
Chris@42 67 {
Chris@42 68 E T2C, T2D, T3w, T3u, T3v;
Chris@42 69 T2C = Ip[0];
Chris@42 70 T2D = Im[0];
Chris@42 71 T3u = Rm[0];
Chris@42 72 T3v = Rp[0];
Chris@42 73 T3y = W[1];
Chris@42 74 T3z = T2C + T2D;
Chris@42 75 T2E = T2C - T2D;
Chris@42 76 T4y = T3v + T3u;
Chris@42 77 T3w = T3u - T3v;
Chris@42 78 T3t = W[0];
Chris@42 79 {
Chris@42 80 E T2y, T2z, T2t, T2u;
Chris@42 81 T2t = Ip[WS(rs, 8)];
Chris@42 82 T2u = Im[WS(rs, 8)];
Chris@42 83 T5Z = T3y * T3w;
Chris@42 84 T3x = T3t * T3w;
Chris@42 85 T2y = Rp[WS(rs, 8)];
Chris@42 86 T2v = T2t - T2u;
Chris@42 87 T3r = T2t + T2u;
Chris@42 88 T2z = Rm[WS(rs, 8)];
Chris@42 89 T3q = W[33];
Chris@42 90 T3n = W[32];
Chris@42 91 T2A = T2y + T2z;
Chris@42 92 T3o = T2z - T2y;
Chris@42 93 T2s = W[30];
Chris@42 94 }
Chris@42 95 }
Chris@42 96 {
Chris@42 97 E T3A, T5X, T4z, T2w, T3s, T3p, T5Y, T60, T2x;
Chris@42 98 T3A = FNMS(T3y, T3z, T3x);
Chris@42 99 T3p = T3n * T3o;
Chris@42 100 T5X = T3q * T3o;
Chris@42 101 T4z = T2s * T2A;
Chris@42 102 T2w = T2s * T2v;
Chris@42 103 T3s = FNMS(T3q, T3r, T3p);
Chris@42 104 T5Y = FMA(T3n, T3r, T5X);
Chris@42 105 T60 = FMA(T3t, T3z, T5Z);
Chris@42 106 T2x = W[31];
Chris@42 107 T3B = T3s + T3A;
Chris@42 108 T89 = T3A - T3s;
Chris@42 109 T61 = T5Y + T60;
Chris@42 110 T8l = T60 - T5Y;
Chris@42 111 T2B = FNMS(T2x, T2A, T2w);
Chris@42 112 T4A = FMA(T2x, T2v, T4z);
Chris@42 113 }
Chris@42 114 }
Chris@42 115 {
Chris@42 116 E T16, T1b, T17, T5a, T1d, T4o, T18;
Chris@42 117 {
Chris@42 118 E T19, T1a, T13, T4i, T14, T15;
Chris@42 119 T14 = Ip[WS(rs, 3)];
Chris@42 120 T15 = Im[WS(rs, 3)];
Chris@42 121 T2F = T2B + T2E;
Chris@42 122 T7p = T2E - T2B;
Chris@42 123 T8t = T4y - T4A;
Chris@42 124 T4B = T4y + T4A;
Chris@42 125 T4m = T14 + T15;
Chris@42 126 T16 = T14 - T15;
Chris@42 127 T19 = Rp[WS(rs, 3)];
Chris@42 128 T1a = Rm[WS(rs, 3)];
Chris@42 129 T13 = W[10];
Chris@42 130 T4i = W[12];
Chris@42 131 {
Chris@42 132 E T1e, T1f, T1j, T1k;
Chris@42 133 T1e = Ip[WS(rs, 11)];
Chris@42 134 T4j = T19 - T1a;
Chris@42 135 T1b = T19 + T1a;
Chris@42 136 T17 = T13 * T16;
Chris@42 137 T5A = T4i * T4m;
Chris@42 138 T4k = T4i * T4j;
Chris@42 139 T5a = T13 * T1b;
Chris@42 140 T1f = Im[WS(rs, 11)];
Chris@42 141 T1j = Rp[WS(rs, 11)];
Chris@42 142 T1k = Rm[WS(rs, 11)];
Chris@42 143 T1d = W[42];
Chris@42 144 T4s = T1e + T1f;
Chris@42 145 T1g = T1e - T1f;
Chris@42 146 T4p = T1j - T1k;
Chris@42 147 T1l = T1j + T1k;
Chris@42 148 T4o = W[44];
Chris@42 149 T1h = T1d * T1g;
Chris@42 150 }
Chris@42 151 }
Chris@42 152 T18 = W[11];
Chris@42 153 T5c = T1d * T1l;
Chris@42 154 T5C = T4o * T4s;
Chris@42 155 T4q = T4o * T4p;
Chris@42 156 T1c = FNMS(T18, T1b, T17);
Chris@42 157 T5b = FMA(T18, T16, T5a);
Chris@42 158 T1i = W[43];
Chris@42 159 T4l = W[13];
Chris@42 160 T4r = W[45];
Chris@42 161 }
Chris@42 162 }
Chris@42 163 {
Chris@42 164 E T4D, T2g, T2q, T4F;
Chris@42 165 {
Chris@42 166 E T3d, T3e, T2a, T2f, T3a, T5S, T3c, T4C, T2b, T3j, T2k, T3k, T2p, T3h, T3g;
Chris@42 167 E T2h, T5U, T3b, T27;
Chris@42 168 {
Chris@42 169 E T28, T29, T2d, T2e, T5d, T1m;
Chris@42 170 T28 = Ip[WS(rs, 4)];
Chris@42 171 T5d = FMA(T1i, T1g, T5c);
Chris@42 172 T1m = FNMS(T1i, T1l, T1h);
Chris@42 173 {
Chris@42 174 E T5B, T4n, T5D, T4t;
Chris@42 175 T5B = FNMS(T4l, T4j, T5A);
Chris@42 176 T4n = FMA(T4l, T4m, T4k);
Chris@42 177 T5D = FNMS(T4r, T4p, T5C);
Chris@42 178 T4t = FMA(T4r, T4s, T4q);
Chris@42 179 T7I = T5b - T5d;
Chris@42 180 T5e = T5b + T5d;
Chris@42 181 T7L = T1c - T1m;
Chris@42 182 T1n = T1c + T1m;
Chris@42 183 T7R = T5D - T5B;
Chris@42 184 T5E = T5B + T5D;
Chris@42 185 T82 = T4t - T4n;
Chris@42 186 T4u = T4n + T4t;
Chris@42 187 T29 = Im[WS(rs, 4)];
Chris@42 188 }
Chris@42 189 T2d = Rp[WS(rs, 4)];
Chris@42 190 T2e = Rm[WS(rs, 4)];
Chris@42 191 T3d = W[17];
Chris@42 192 T3e = T28 + T29;
Chris@42 193 T2a = T28 - T29;
Chris@42 194 T3b = T2e - T2d;
Chris@42 195 T2f = T2d + T2e;
Chris@42 196 T3a = W[16];
Chris@42 197 T27 = W[14];
Chris@42 198 T5S = T3d * T3b;
Chris@42 199 }
Chris@42 200 {
Chris@42 201 E T2i, T2j, T2n, T2o;
Chris@42 202 T2i = Ip[WS(rs, 12)];
Chris@42 203 T3c = T3a * T3b;
Chris@42 204 T4C = T27 * T2f;
Chris@42 205 T2b = T27 * T2a;
Chris@42 206 T2j = Im[WS(rs, 12)];
Chris@42 207 T2n = Rp[WS(rs, 12)];
Chris@42 208 T2o = Rm[WS(rs, 12)];
Chris@42 209 T3j = W[49];
Chris@42 210 T2k = T2i - T2j;
Chris@42 211 T3k = T2i + T2j;
Chris@42 212 T2p = T2n + T2o;
Chris@42 213 T3h = T2o - T2n;
Chris@42 214 T3g = W[48];
Chris@42 215 T2h = W[46];
Chris@42 216 T5U = T3j * T3h;
Chris@42 217 }
Chris@42 218 {
Chris@42 219 E T3f, T3i, T4E, T2l;
Chris@42 220 T3f = FNMS(T3d, T3e, T3c);
Chris@42 221 T3i = T3g * T3h;
Chris@42 222 T4E = T2h * T2p;
Chris@42 223 T2l = T2h * T2k;
Chris@42 224 {
Chris@42 225 E T5T, T3l, T5V, T2c, T2m;
Chris@42 226 T5T = FMA(T3a, T3e, T5S);
Chris@42 227 T3l = FNMS(T3j, T3k, T3i);
Chris@42 228 T5V = FMA(T3g, T3k, T5U);
Chris@42 229 T2c = W[15];
Chris@42 230 T2m = W[47];
Chris@42 231 T3m = T3f + T3l;
Chris@42 232 T8k = T3f - T3l;
Chris@42 233 T5W = T5T + T5V;
Chris@42 234 T8a = T5T - T5V;
Chris@42 235 T4D = FMA(T2c, T2a, T4C);
Chris@42 236 T2g = FNMS(T2c, T2f, T2b);
Chris@42 237 T2q = FNMS(T2m, T2p, T2l);
Chris@42 238 T4F = FMA(T2m, T2k, T4E);
Chris@42 239 }
Chris@42 240 }
Chris@42 241 }
Chris@42 242 {
Chris@42 243 E TL, TQ, TM, T55, TS, T4b, TN;
Chris@42 244 {
Chris@42 245 E TO, TP, TI, T45, TJ, TK;
Chris@42 246 TJ = Ip[WS(rs, 15)];
Chris@42 247 TK = Im[WS(rs, 15)];
Chris@42 248 T2r = T2g + T2q;
Chris@42 249 T8u = T2g - T2q;
Chris@42 250 T4G = T4D + T4F;
Chris@42 251 T7q = T4D - T4F;
Chris@42 252 T49 = TJ + TK;
Chris@42 253 TL = TJ - TK;
Chris@42 254 TO = Rp[WS(rs, 15)];
Chris@42 255 TP = Rm[WS(rs, 15)];
Chris@42 256 TI = W[58];
Chris@42 257 T45 = W[60];
Chris@42 258 {
Chris@42 259 E TT, TU, TY, TZ;
Chris@42 260 TT = Ip[WS(rs, 7)];
Chris@42 261 T46 = TO - TP;
Chris@42 262 TQ = TO + TP;
Chris@42 263 TM = TI * TL;
Chris@42 264 T5v = T45 * T49;
Chris@42 265 T47 = T45 * T46;
Chris@42 266 T55 = TI * TQ;
Chris@42 267 TU = Im[WS(rs, 7)];
Chris@42 268 TY = Rp[WS(rs, 7)];
Chris@42 269 TZ = Rm[WS(rs, 7)];
Chris@42 270 TS = W[26];
Chris@42 271 T4f = TT + TU;
Chris@42 272 TV = TT - TU;
Chris@42 273 T4c = TY - TZ;
Chris@42 274 T10 = TY + TZ;
Chris@42 275 T4b = W[28];
Chris@42 276 TW = TS * TV;
Chris@42 277 }
Chris@42 278 }
Chris@42 279 TN = W[59];
Chris@42 280 T57 = TS * T10;
Chris@42 281 T5x = T4b * T4f;
Chris@42 282 T4d = T4b * T4c;
Chris@42 283 TR = FNMS(TN, TQ, TM);
Chris@42 284 T56 = FMA(TN, TL, T55);
Chris@42 285 TX = W[27];
Chris@42 286 T48 = W[61];
Chris@42 287 T4e = W[29];
Chris@42 288 }
Chris@42 289 }
Chris@42 290 }
Chris@42 291 {
Chris@42 292 E T8c, T8d, T8f, T8g;
Chris@42 293 {
Chris@42 294 E T3I, T3F, T5k, T3G, T3O, Te, T3L, Tj, Tf, T4W, T5m, T3M, Ta, T4V, Tg;
Chris@42 295 E T3H, T3N;
Chris@42 296 {
Chris@42 297 E T4, T9, T5, T4U, Tb, T3K, T1, T3E, T6;
Chris@42 298 {
Chris@42 299 E T2, T3, T7, T8, T58, T11;
Chris@42 300 T2 = Ip[WS(rs, 1)];
Chris@42 301 T58 = FMA(TX, TV, T57);
Chris@42 302 T11 = FNMS(TX, T10, TW);
Chris@42 303 {
Chris@42 304 E T5w, T4a, T5y, T4g;
Chris@42 305 T5w = FNMS(T48, T46, T5v);
Chris@42 306 T4a = FMA(T48, T49, T47);
Chris@42 307 T5y = FNMS(T4e, T4c, T5x);
Chris@42 308 T4g = FMA(T4e, T4f, T4d);
Chris@42 309 T59 = T56 + T58;
Chris@42 310 T7K = T56 - T58;
Chris@42 311 T7H = TR - T11;
Chris@42 312 T12 = TR + T11;
Chris@42 313 T5z = T5w + T5y;
Chris@42 314 T81 = T5w - T5y;
Chris@42 315 T7Q = T4g - T4a;
Chris@42 316 T4h = T4a + T4g;
Chris@42 317 T3 = Im[WS(rs, 1)];
Chris@42 318 }
Chris@42 319 T7 = Rp[WS(rs, 1)];
Chris@42 320 T8 = Rm[WS(rs, 1)];
Chris@42 321 T1 = W[2];
Chris@42 322 T3I = T2 + T3;
Chris@42 323 T4 = T2 - T3;
Chris@42 324 T3F = T7 - T8;
Chris@42 325 T9 = T7 + T8;
Chris@42 326 T3E = W[4];
Chris@42 327 T5 = T1 * T4;
Chris@42 328 }
Chris@42 329 {
Chris@42 330 E Tc, Td, Th, Ti;
Chris@42 331 Tc = Ip[WS(rs, 9)];
Chris@42 332 T4U = T1 * T9;
Chris@42 333 T5k = T3E * T3I;
Chris@42 334 T3G = T3E * T3F;
Chris@42 335 Td = Im[WS(rs, 9)];
Chris@42 336 Th = Rp[WS(rs, 9)];
Chris@42 337 Ti = Rm[WS(rs, 9)];
Chris@42 338 Tb = W[34];
Chris@42 339 T3O = Tc + Td;
Chris@42 340 Te = Tc - Td;
Chris@42 341 T3L = Th - Ti;
Chris@42 342 Tj = Th + Ti;
Chris@42 343 T3K = W[36];
Chris@42 344 Tf = Tb * Te;
Chris@42 345 }
Chris@42 346 T6 = W[3];
Chris@42 347 T4W = Tb * Tj;
Chris@42 348 T5m = T3K * T3O;
Chris@42 349 T3M = T3K * T3L;
Chris@42 350 Ta = FNMS(T6, T9, T5);
Chris@42 351 T4V = FMA(T6, T4, T4U);
Chris@42 352 Tg = W[35];
Chris@42 353 T3H = W[5];
Chris@42 354 T3N = W[37];
Chris@42 355 }
Chris@42 356 {
Chris@42 357 E T1t, T2N, T2M, T2J, T1y, T2L, T5H, T4I, T1u, T2S, T1D, T2T, T1I, T2Q, T2P;
Chris@42 358 E T1A, T5J;
Chris@42 359 {
Chris@42 360 E T2K, T1q, T1w, T1x;
Chris@42 361 {
Chris@42 362 E T1r, T7U, T7T, T1s, T4X, Tk;
Chris@42 363 T1r = Ip[WS(rs, 2)];
Chris@42 364 T4X = FMA(Tg, Te, T4W);
Chris@42 365 Tk = FNMS(Tg, Tj, Tf);
Chris@42 366 {
Chris@42 367 E T5l, T3J, T5n, T3P;
Chris@42 368 T5l = FNMS(T3H, T3F, T5k);
Chris@42 369 T3J = FMA(T3H, T3I, T3G);
Chris@42 370 T5n = FNMS(T3N, T3L, T5m);
Chris@42 371 T3P = FMA(T3N, T3O, T3M);
Chris@42 372 T4Y = T4V + T4X;
Chris@42 373 T7D = T4V - T4X;
Chris@42 374 T7A = Ta - Tk;
Chris@42 375 Tl = Ta + Tk;
Chris@42 376 T7U = T5l - T5n;
Chris@42 377 T5o = T5l + T5n;
Chris@42 378 T7T = T3P - T3J;
Chris@42 379 T3Q = T3J + T3P;
Chris@42 380 T1s = Im[WS(rs, 2)];
Chris@42 381 }
Chris@42 382 T1w = Rp[WS(rs, 2)];
Chris@42 383 T84 = T7U + T7T;
Chris@42 384 T7V = T7T - T7U;
Chris@42 385 T1t = T1r - T1s;
Chris@42 386 T2N = T1r + T1s;
Chris@42 387 T1x = Rm[WS(rs, 2)];
Chris@42 388 }
Chris@42 389 T2M = W[9];
Chris@42 390 T2J = W[8];
Chris@42 391 T1y = T1w + T1x;
Chris@42 392 T2K = T1x - T1w;
Chris@42 393 T1q = W[6];
Chris@42 394 {
Chris@42 395 E T1B, T1C, T1G, T1H;
Chris@42 396 T1B = Ip[WS(rs, 10)];
Chris@42 397 T2L = T2J * T2K;
Chris@42 398 T5H = T2M * T2K;
Chris@42 399 T4I = T1q * T1y;
Chris@42 400 T1u = T1q * T1t;
Chris@42 401 T1C = Im[WS(rs, 10)];
Chris@42 402 T1G = Rp[WS(rs, 10)];
Chris@42 403 T1H = Rm[WS(rs, 10)];
Chris@42 404 T2S = W[41];
Chris@42 405 T1D = T1B - T1C;
Chris@42 406 T2T = T1B + T1C;
Chris@42 407 T1I = T1G + T1H;
Chris@42 408 T2Q = T1H - T1G;
Chris@42 409 T2P = W[40];
Chris@42 410 T1A = W[38];
Chris@42 411 T5J = T2S * T2Q;
Chris@42 412 }
Chris@42 413 }
Chris@42 414 {
Chris@42 415 E T2R, T4K, T1E, T1z, T4J, T1F, T1v, T2O, T2U;
Chris@42 416 T1v = W[7];
Chris@42 417 T2R = T2P * T2Q;
Chris@42 418 T4K = T1A * T1I;
Chris@42 419 T1E = T1A * T1D;
Chris@42 420 T1z = FNMS(T1v, T1y, T1u);
Chris@42 421 T4J = FMA(T1v, T1t, T4I);
Chris@42 422 T1F = W[39];
Chris@42 423 T2O = FNMS(T2M, T2N, T2L);
Chris@42 424 T2U = FNMS(T2S, T2T, T2R);
Chris@42 425 {
Chris@42 426 E T5I, T4L, T1J, T5K;
Chris@42 427 T5I = FMA(T2J, T2N, T5H);
Chris@42 428 T4L = FMA(T1F, T1D, T4K);
Chris@42 429 T1J = FNMS(T1F, T1I, T1E);
Chris@42 430 T8c = T2O - T2U;
Chris@42 431 T2V = T2O + T2U;
Chris@42 432 T5K = FMA(T2P, T2T, T5J);
Chris@42 433 T4M = T4J + T4L;
Chris@42 434 T7t = T4J - T4L;
Chris@42 435 T7s = T1z - T1J;
Chris@42 436 T1K = T1z + T1J;
Chris@42 437 T8d = T5I - T5K;
Chris@42 438 T5L = T5I + T5K;
Chris@42 439 }
Chris@42 440 }
Chris@42 441 }
Chris@42 442 }
Chris@42 443 {
Chris@42 444 E T2Z, T30, T1O, T1T, T2W, T5M, T2Y, T4N, T1P, T35, T1Y, T36, T23, T33, T32;
Chris@42 445 E T1V, T5O, T2X, T1L;
Chris@42 446 {
Chris@42 447 E T1M, T1N, T1R, T1S;
Chris@42 448 T1M = Ip[WS(rs, 14)];
Chris@42 449 T8e = T8c - T8d;
Chris@42 450 T8n = T8c + T8d;
Chris@42 451 T1N = Im[WS(rs, 14)];
Chris@42 452 T1R = Rp[WS(rs, 14)];
Chris@42 453 T1S = Rm[WS(rs, 14)];
Chris@42 454 T2Z = W[57];
Chris@42 455 T30 = T1M + T1N;
Chris@42 456 T1O = T1M - T1N;
Chris@42 457 T2X = T1S - T1R;
Chris@42 458 T1T = T1R + T1S;
Chris@42 459 T2W = W[56];
Chris@42 460 T1L = W[54];
Chris@42 461 T5M = T2Z * T2X;
Chris@42 462 }
Chris@42 463 {
Chris@42 464 E T1W, T1X, T21, T22;
Chris@42 465 T1W = Ip[WS(rs, 6)];
Chris@42 466 T2Y = T2W * T2X;
Chris@42 467 T4N = T1L * T1T;
Chris@42 468 T1P = T1L * T1O;
Chris@42 469 T1X = Im[WS(rs, 6)];
Chris@42 470 T21 = Rp[WS(rs, 6)];
Chris@42 471 T22 = Rm[WS(rs, 6)];
Chris@42 472 T35 = W[25];
Chris@42 473 T1Y = T1W - T1X;
Chris@42 474 T36 = T1W + T1X;
Chris@42 475 T23 = T21 + T22;
Chris@42 476 T33 = T22 - T21;
Chris@42 477 T32 = W[24];
Chris@42 478 T1V = W[22];
Chris@42 479 T5O = T35 * T33;
Chris@42 480 }
Chris@42 481 {
Chris@42 482 E T34, T4P, T1Z, T1U, T4O, T20, T1Q, T31, T37;
Chris@42 483 T1Q = W[55];
Chris@42 484 T34 = T32 * T33;
Chris@42 485 T4P = T1V * T23;
Chris@42 486 T1Z = T1V * T1Y;
Chris@42 487 T1U = FNMS(T1Q, T1T, T1P);
Chris@42 488 T4O = FMA(T1Q, T1O, T4N);
Chris@42 489 T20 = W[23];
Chris@42 490 T31 = FNMS(T2Z, T30, T2Y);
Chris@42 491 T37 = FNMS(T35, T36, T34);
Chris@42 492 {
Chris@42 493 E T5N, T4Q, T24, T5P;
Chris@42 494 T5N = FMA(T2W, T30, T5M);
Chris@42 495 T4Q = FMA(T20, T1Y, T4P);
Chris@42 496 T24 = FNMS(T20, T23, T1Z);
Chris@42 497 T8f = T31 - T37;
Chris@42 498 T38 = T31 + T37;
Chris@42 499 T5P = FMA(T32, T36, T5O);
Chris@42 500 T7v = T4O - T4Q;
Chris@42 501 T4R = T4O + T4Q;
Chris@42 502 T7w = T1U - T24;
Chris@42 503 T25 = T1U + T24;
Chris@42 504 T8g = T5N - T5P;
Chris@42 505 T5Q = T5N + T5P;
Chris@42 506 }
Chris@42 507 }
Chris@42 508 }
Chris@42 509 {
Chris@42 510 E Tp, Tu, Tq, T4Z, Tw, T3X, Tm, T3R, Tr;
Chris@42 511 {
Chris@42 512 E Tn, To, Ts, Tt;
Chris@42 513 Tn = Ip[WS(rs, 5)];
Chris@42 514 T8h = T8f + T8g;
Chris@42 515 T8o = T8g - T8f;
Chris@42 516 To = Im[WS(rs, 5)];
Chris@42 517 Ts = Rp[WS(rs, 5)];
Chris@42 518 Tt = Rm[WS(rs, 5)];
Chris@42 519 Tm = W[18];
Chris@42 520 T3V = Tn + To;
Chris@42 521 Tp = Tn - To;
Chris@42 522 T3S = Ts - Tt;
Chris@42 523 Tu = Ts + Tt;
Chris@42 524 T3R = W[20];
Chris@42 525 Tq = Tm * Tp;
Chris@42 526 }
Chris@42 527 {
Chris@42 528 E Tx, Ty, TC, TD;
Chris@42 529 Tx = Ip[WS(rs, 13)];
Chris@42 530 T4Z = Tm * Tu;
Chris@42 531 T5p = T3R * T3V;
Chris@42 532 T3T = T3R * T3S;
Chris@42 533 Ty = Im[WS(rs, 13)];
Chris@42 534 TC = Rp[WS(rs, 13)];
Chris@42 535 TD = Rm[WS(rs, 13)];
Chris@42 536 Tw = W[50];
Chris@42 537 T41 = Tx + Ty;
Chris@42 538 Tz = Tx - Ty;
Chris@42 539 T3Y = TC - TD;
Chris@42 540 TE = TC + TD;
Chris@42 541 T3X = W[52];
Chris@42 542 TA = Tw * Tz;
Chris@42 543 }
Chris@42 544 Tr = W[19];
Chris@42 545 T51 = Tw * TE;
Chris@42 546 T5r = T3X * T41;
Chris@42 547 T3Z = T3X * T3Y;
Chris@42 548 Tv = FNMS(Tr, Tu, Tq);
Chris@42 549 T50 = FMA(Tr, Tp, T4Z);
Chris@42 550 TB = W[51];
Chris@42 551 T3U = W[21];
Chris@42 552 T40 = W[53];
Chris@42 553 }
Chris@42 554 }
Chris@42 555 }
Chris@42 556 {
Chris@42 557 E T6y, T7B, T7E, T6u, T6S, T85, T7Y, T6s, T6v, T6x, T6R, T6r, T6F, T6D, T6C;
Chris@42 558 E T6G, T6M, T6K, T6J, T6N, T6l, T6o, T7j, T7m;
Chris@42 559 {
Chris@42 560 E T6i, T1p, T68, T2H, T67, T5g, T6h, T4T, T4w, T5G, T6d, T3D, T6c, T6m, T63;
Chris@42 561 E T6e;
Chris@42 562 {
Chris@42 563 E T5t, T43, T26, T2G, T54, T5f, T4H, T4S;
Chris@42 564 {
Chris@42 565 E T1o, T53, T7W, T7X, TH, T52, TF, T5q;
Chris@42 566 T6y = T12 - T1n;
Chris@42 567 T1o = T12 + T1n;
Chris@42 568 T52 = FMA(TB, Tz, T51);
Chris@42 569 TF = FNMS(TB, TE, TA);
Chris@42 570 T5q = FNMS(T3U, T3S, T5p);
Chris@42 571 {
Chris@42 572 E T3W, T5s, T42, TG;
Chris@42 573 T3W = FMA(T3U, T3V, T3T);
Chris@42 574 T5s = FNMS(T40, T3Y, T5r);
Chris@42 575 T42 = FMA(T40, T41, T3Z);
Chris@42 576 T7B = T50 - T52;
Chris@42 577 T53 = T50 + T52;
Chris@42 578 T7E = Tv - TF;
Chris@42 579 TG = Tv + TF;
Chris@42 580 T7W = T5s - T5q;
Chris@42 581 T5t = T5q + T5s;
Chris@42 582 T7X = T3W - T42;
Chris@42 583 T43 = T3W + T42;
Chris@42 584 TH = Tl + TG;
Chris@42 585 T6u = Tl - TG;
Chris@42 586 }
Chris@42 587 T6S = T1K - T25;
Chris@42 588 T26 = T1K + T25;
Chris@42 589 T85 = T7W - T7X;
Chris@42 590 T7Y = T7W + T7X;
Chris@42 591 T6i = TH - T1o;
Chris@42 592 T1p = TH + T1o;
Chris@42 593 T2G = T2r + T2F;
Chris@42 594 T6s = T2F - T2r;
Chris@42 595 T6v = T4Y - T53;
Chris@42 596 T54 = T4Y + T53;
Chris@42 597 T5f = T59 + T5e;
Chris@42 598 T6x = T59 - T5e;
Chris@42 599 }
Chris@42 600 T6R = T4B - T4G;
Chris@42 601 T4H = T4B + T4G;
Chris@42 602 T68 = T2G - T26;
Chris@42 603 T2H = T26 + T2G;
Chris@42 604 T67 = T5f - T54;
Chris@42 605 T5g = T54 + T5f;
Chris@42 606 T4S = T4M + T4R;
Chris@42 607 T6r = T4R - T4M;
Chris@42 608 {
Chris@42 609 E T5u, T6b, T5F, T44, T4v;
Chris@42 610 T6F = T43 - T3Q;
Chris@42 611 T44 = T3Q + T43;
Chris@42 612 T4v = T4h + T4u;
Chris@42 613 T6D = T4u - T4h;
Chris@42 614 T6C = T5t - T5o;
Chris@42 615 T5u = T5o + T5t;
Chris@42 616 T6h = T4H - T4S;
Chris@42 617 T4T = T4H + T4S;
Chris@42 618 T6b = T44 - T4v;
Chris@42 619 T4w = T44 + T4v;
Chris@42 620 T6G = T5z - T5E;
Chris@42 621 T5F = T5z + T5E;
Chris@42 622 {
Chris@42 623 E T5R, T62, T39, T3C, T6a;
Chris@42 624 T6M = T2V - T38;
Chris@42 625 T39 = T2V + T38;
Chris@42 626 T3C = T3m + T3B;
Chris@42 627 T6K = T3B - T3m;
Chris@42 628 T6a = T5F - T5u;
Chris@42 629 T5G = T5u + T5F;
Chris@42 630 T6J = T5Q - T5L;
Chris@42 631 T5R = T5L + T5Q;
Chris@42 632 T6d = T3C - T39;
Chris@42 633 T3D = T39 + T3C;
Chris@42 634 T6N = T61 - T5W;
Chris@42 635 T62 = T5W + T61;
Chris@42 636 T6c = T6a + T6b;
Chris@42 637 T6m = T6a - T6b;
Chris@42 638 T63 = T5R + T62;
Chris@42 639 T6e = T62 - T5R;
Chris@42 640 }
Chris@42 641 }
Chris@42 642 }
Chris@42 643 {
Chris@42 644 E T5j, T6n, T6f, T64;
Chris@42 645 {
Chris@42 646 E T5i, T5h, T65, T66, T2I, T4x;
Chris@42 647 T5j = T2H - T1p;
Chris@42 648 T2I = T1p + T2H;
Chris@42 649 T4x = T3D - T4w;
Chris@42 650 T5i = T4w + T3D;
Chris@42 651 T6n = T6d + T6e;
Chris@42 652 T6f = T6d - T6e;
Chris@42 653 T5h = T4T - T5g;
Chris@42 654 T65 = T4T + T5g;
Chris@42 655 Im[WS(rs, 15)] = KP500000000 * (T4x - T2I);
Chris@42 656 Ip[0] = KP500000000 * (T2I + T4x);
Chris@42 657 T66 = T5G + T63;
Chris@42 658 T64 = T5G - T63;
Chris@42 659 Rp[0] = KP500000000 * (T65 + T66);
Chris@42 660 Rm[WS(rs, 15)] = KP500000000 * (T65 - T66);
Chris@42 661 Rp[WS(rs, 8)] = KP500000000 * (T5h + T5i);
Chris@42 662 Rm[WS(rs, 7)] = KP500000000 * (T5h - T5i);
Chris@42 663 }
Chris@42 664 {
Chris@42 665 E T6k, T6j, T6p, T6q, T69, T6g;
Chris@42 666 T6l = T68 - T67;
Chris@42 667 T69 = T67 + T68;
Chris@42 668 T6g = T6c + T6f;
Chris@42 669 T6k = T6f - T6c;
Chris@42 670 T6j = T6h - T6i;
Chris@42 671 T6p = T6h + T6i;
Chris@42 672 Im[WS(rs, 7)] = KP500000000 * (T64 - T5j);
Chris@42 673 Ip[WS(rs, 8)] = KP500000000 * (T5j + T64);
Chris@42 674 Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6g, T69)));
Chris@42 675 Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6g, T69));
Chris@42 676 T6q = T6m + T6n;
Chris@42 677 T6o = T6m - T6n;
Chris@42 678 Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6q, T6p));
Chris@42 679 Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T6q, T6p));
Chris@42 680 Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6k, T6j));
Chris@42 681 Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6k, T6j));
Chris@42 682 }
Chris@42 683 }
Chris@42 684 }
Chris@42 685 {
Chris@42 686 E T75, T6t, T7f, T6T, T76, T6W, T7g, T6A, T7b, T6L, T7a, T7k, T70, T6I, T6U;
Chris@42 687 E T6w;
Chris@42 688 Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6o, T6l)));
Chris@42 689 Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6o, T6l));
Chris@42 690 T75 = T6s - T6r;
Chris@42 691 T6t = T6r + T6s;
Chris@42 692 T7f = T6R - T6S;
Chris@42 693 T6T = T6R + T6S;
Chris@42 694 T6U = T6v + T6u;
Chris@42 695 T6w = T6u - T6v;
Chris@42 696 {
Chris@42 697 E T78, T6E, T6V, T6z, T79, T6H;
Chris@42 698 T6V = T6x - T6y;
Chris@42 699 T6z = T6x + T6y;
Chris@42 700 T78 = T6C - T6D;
Chris@42 701 T6E = T6C + T6D;
Chris@42 702 T76 = T6V - T6U;
Chris@42 703 T6W = T6U + T6V;
Chris@42 704 T7g = T6w - T6z;
Chris@42 705 T6A = T6w + T6z;
Chris@42 706 T79 = T6G - T6F;
Chris@42 707 T6H = T6F + T6G;
Chris@42 708 T7b = T6K - T6J;
Chris@42 709 T6L = T6J + T6K;
Chris@42 710 T7a = FMA(KP414213562, T79, T78);
Chris@42 711 T7k = FNMS(KP414213562, T78, T79);
Chris@42 712 T70 = FNMS(KP414213562, T6E, T6H);
Chris@42 713 T6I = FMA(KP414213562, T6H, T6E);
Chris@42 714 }
Chris@42 715 {
Chris@42 716 E T6Z, T6B, T73, T6X, T7c, T6O;
Chris@42 717 T6Z = FNMS(KP707106781, T6A, T6t);
Chris@42 718 T6B = FMA(KP707106781, T6A, T6t);
Chris@42 719 T73 = FMA(KP707106781, T6W, T6T);
Chris@42 720 T6X = FNMS(KP707106781, T6W, T6T);
Chris@42 721 T7c = T6N - T6M;
Chris@42 722 T6O = T6M + T6N;
Chris@42 723 {
Chris@42 724 E T7i, T7h, T7n, T7o;
Chris@42 725 {
Chris@42 726 E T77, T7l, T71, T6P, T7e, T7d;
Chris@42 727 T7j = FMA(KP707106781, T76, T75);
Chris@42 728 T77 = FNMS(KP707106781, T76, T75);
Chris@42 729 T7d = FMA(KP414213562, T7c, T7b);
Chris@42 730 T7l = FNMS(KP414213562, T7b, T7c);
Chris@42 731 T71 = FMA(KP414213562, T6L, T6O);
Chris@42 732 T6P = FNMS(KP414213562, T6O, T6L);
Chris@42 733 T7e = T7a - T7d;
Chris@42 734 T7i = T7a + T7d;
Chris@42 735 T7h = FMA(KP707106781, T7g, T7f);
Chris@42 736 T7n = FNMS(KP707106781, T7g, T7f);
Chris@42 737 {
Chris@42 738 E T72, T74, T6Y, T6Q;
Chris@42 739 T72 = T70 - T71;
Chris@42 740 T74 = T70 + T71;
Chris@42 741 T6Y = T6P - T6I;
Chris@42 742 T6Q = T6I + T6P;
Chris@42 743 Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7e, T77)));
Chris@42 744 Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7e, T77));
Chris@42 745 Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T72, T6Z)));
Chris@42 746 Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T72, T6Z));
Chris@42 747 Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T74, T73));
Chris@42 748 Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T74, T73));
Chris@42 749 Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T6Y, T6X));
Chris@42 750 Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T6Y, T6X));
Chris@42 751 Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T6Q, T6B)));
Chris@42 752 Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T6Q, T6B));
Chris@42 753 T7o = T7k + T7l;
Chris@42 754 T7m = T7k - T7l;
Chris@42 755 }
Chris@42 756 }
Chris@42 757 Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7o, T7n));
Chris@42 758 Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7o, T7n));
Chris@42 759 Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7i, T7h));
Chris@42 760 Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7i, T7h));
Chris@42 761 }
Chris@42 762 }
Chris@42 763 }
Chris@42 764 {
Chris@42 765 E T9x, T9T, T8L, T7z, T97, T9J, T8V, T8z, T8M, T8C, T8W, T7O, T9O, T9Y, T9E;
Chris@42 766 E T9t, T8Q, T90, T8G, T88, T8p, T8m, T9K, T9A, T9U, T9e, T8R, T8j, T9R, T9Z;
Chris@42 767 E T9F, T9m;
Chris@42 768 {
Chris@42 769 E T9c, T9b, T99, T98, T7S, T86, T83, T9q, T9M, T9p, T9r, T7Z, T9z, T9a;
Chris@42 770 {
Chris@42 771 E T95, T7r, T9v, T8v, T8w, T8x, T9w, T7y, T7u, T7x;
Chris@42 772 T95 = T7q + T7p;
Chris@42 773 T7r = T7p - T7q;
Chris@42 774 T9v = T8t - T8u;
Chris@42 775 T8v = T8t + T8u;
Chris@42 776 T8w = T7t + T7s;
Chris@42 777 T7u = T7s - T7t;
Chris@42 778 Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7m, T7j)));
Chris@42 779 Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7m, T7j));
Chris@42 780 T7x = T7v + T7w;
Chris@42 781 T8x = T7v - T7w;
Chris@42 782 T9w = T7u - T7x;
Chris@42 783 T7y = T7u + T7x;
Chris@42 784 {
Chris@42 785 E T7J, T8A, T7G, T7M;
Chris@42 786 {
Chris@42 787 E T7C, T96, T8y, T7F;
Chris@42 788 T9c = T7A + T7B;
Chris@42 789 T7C = T7A - T7B;
Chris@42 790 T9x = FMA(KP707106781, T9w, T9v);
Chris@42 791 T9T = FNMS(KP707106781, T9w, T9v);
Chris@42 792 T8L = FNMS(KP707106781, T7y, T7r);
Chris@42 793 T7z = FMA(KP707106781, T7y, T7r);
Chris@42 794 T96 = T8x - T8w;
Chris@42 795 T8y = T8w + T8x;
Chris@42 796 T7F = T7D + T7E;
Chris@42 797 T9b = T7D - T7E;
Chris@42 798 T99 = T7H + T7I;
Chris@42 799 T7J = T7H - T7I;
Chris@42 800 T97 = FMA(KP707106781, T96, T95);
Chris@42 801 T9J = FNMS(KP707106781, T96, T95);
Chris@42 802 T8V = FNMS(KP707106781, T8y, T8v);
Chris@42 803 T8z = FMA(KP707106781, T8y, T8v);
Chris@42 804 T8A = FMA(KP414213562, T7C, T7F);
Chris@42 805 T7G = FNMS(KP414213562, T7F, T7C);
Chris@42 806 T7M = T7K + T7L;
Chris@42 807 T98 = T7K - T7L;
Chris@42 808 }
Chris@42 809 {
Chris@42 810 E T9n, T9o, T8B, T7N;
Chris@42 811 T7S = T7Q + T7R;
Chris@42 812 T9n = T7R - T7Q;
Chris@42 813 T9o = T85 - T84;
Chris@42 814 T86 = T84 + T85;
Chris@42 815 T83 = T81 + T82;
Chris@42 816 T9q = T81 - T82;
Chris@42 817 T8B = FNMS(KP414213562, T7J, T7M);
Chris@42 818 T7N = FMA(KP414213562, T7M, T7J);
Chris@42 819 T9M = FMA(KP707106781, T9o, T9n);
Chris@42 820 T9p = FNMS(KP707106781, T9o, T9n);
Chris@42 821 T8M = T8B - T8A;
Chris@42 822 T8C = T8A + T8B;
Chris@42 823 T8W = T7G - T7N;
Chris@42 824 T7O = T7G + T7N;
Chris@42 825 T9r = T7Y - T7V;
Chris@42 826 T7Z = T7V + T7Y;
Chris@42 827 }
Chris@42 828 }
Chris@42 829 }
Chris@42 830 {
Chris@42 831 E T8O, T80, T9N, T9s, T8P, T87;
Chris@42 832 T9N = FMA(KP707106781, T9r, T9q);
Chris@42 833 T9s = FNMS(KP707106781, T9r, T9q);
Chris@42 834 T8O = FNMS(KP707106781, T7Z, T7S);
Chris@42 835 T80 = FMA(KP707106781, T7Z, T7S);
Chris@42 836 T9O = FMA(KP198912367, T9N, T9M);
Chris@42 837 T9Y = FNMS(KP198912367, T9M, T9N);
Chris@42 838 T9E = FMA(KP668178637, T9p, T9s);
Chris@42 839 T9t = FNMS(KP668178637, T9s, T9p);
Chris@42 840 T8P = FNMS(KP707106781, T86, T83);
Chris@42 841 T87 = FMA(KP707106781, T86, T83);
Chris@42 842 T9z = FNMS(KP414213562, T98, T99);
Chris@42 843 T9a = FMA(KP414213562, T99, T98);
Chris@42 844 T8Q = FNMS(KP668178637, T8P, T8O);
Chris@42 845 T90 = FMA(KP668178637, T8O, T8P);
Chris@42 846 T8G = FNMS(KP198912367, T80, T87);
Chris@42 847 T88 = FMA(KP198912367, T87, T80);
Chris@42 848 }
Chris@42 849 {
Chris@42 850 E T8b, T9j, T9P, T9i, T9k, T8i, T9Q, T9l;
Chris@42 851 {
Chris@42 852 E T9g, T9h, T9y, T9d;
Chris@42 853 T8b = T89 - T8a;
Chris@42 854 T9g = T8a + T89;
Chris@42 855 T9h = T8n - T8o;
Chris@42 856 T8p = T8n + T8o;
Chris@42 857 T8m = T8k + T8l;
Chris@42 858 T9j = T8l - T8k;
Chris@42 859 T9y = FMA(KP414213562, T9b, T9c);
Chris@42 860 T9d = FNMS(KP414213562, T9c, T9b);
Chris@42 861 T9P = FMA(KP707106781, T9h, T9g);
Chris@42 862 T9i = FNMS(KP707106781, T9h, T9g);
Chris@42 863 T9K = T9y + T9z;
Chris@42 864 T9A = T9y - T9z;
Chris@42 865 T9U = T9d + T9a;
Chris@42 866 T9e = T9a - T9d;
Chris@42 867 T9k = T8h - T8e;
Chris@42 868 T8i = T8e + T8h;
Chris@42 869 }
Chris@42 870 T9Q = FMA(KP707106781, T9k, T9j);
Chris@42 871 T9l = FNMS(KP707106781, T9k, T9j);
Chris@42 872 T8R = FNMS(KP707106781, T8i, T8b);
Chris@42 873 T8j = FMA(KP707106781, T8i, T8b);
Chris@42 874 T9R = FMA(KP198912367, T9Q, T9P);
Chris@42 875 T9Z = FNMS(KP198912367, T9P, T9Q);
Chris@42 876 T9F = FMA(KP668178637, T9i, T9l);
Chris@42 877 T9m = FNMS(KP668178637, T9l, T9i);
Chris@42 878 }
Chris@42 879 }
Chris@42 880 {
Chris@42 881 E T8Z, T92, T9D, T9G;
Chris@42 882 {
Chris@42 883 E T8F, T7P, T8J, T8D, T8S, T8q;
Chris@42 884 T8F = FNMS(KP923879532, T7O, T7z);
Chris@42 885 T7P = FMA(KP923879532, T7O, T7z);
Chris@42 886 T8J = FMA(KP923879532, T8C, T8z);
Chris@42 887 T8D = FNMS(KP923879532, T8C, T8z);
Chris@42 888 T8S = FNMS(KP707106781, T8p, T8m);
Chris@42 889 T8q = FMA(KP707106781, T8p, T8m);
Chris@42 890 {
Chris@42 891 E T8Y, T8X, T93, T94;
Chris@42 892 {
Chris@42 893 E T8N, T91, T8H, T8r, T8U, T8T;
Chris@42 894 T8Z = FMA(KP923879532, T8M, T8L);
Chris@42 895 T8N = FNMS(KP923879532, T8M, T8L);
Chris@42 896 T8T = FMA(KP668178637, T8S, T8R);
Chris@42 897 T91 = FNMS(KP668178637, T8R, T8S);
Chris@42 898 T8H = FMA(KP198912367, T8j, T8q);
Chris@42 899 T8r = FNMS(KP198912367, T8q, T8j);
Chris@42 900 T8U = T8Q + T8T;
Chris@42 901 T8Y = T8T - T8Q;
Chris@42 902 T8X = FMA(KP923879532, T8W, T8V);
Chris@42 903 T93 = FNMS(KP923879532, T8W, T8V);
Chris@42 904 {
Chris@42 905 E T8I, T8K, T8E, T8s;
Chris@42 906 T8I = T8G - T8H;
Chris@42 907 T8K = T8G + T8H;
Chris@42 908 T8E = T8r - T88;
Chris@42 909 T8s = T88 + T8r;
Chris@42 910 Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T8U, T8N)));
Chris@42 911 Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T8U, T8N));
Chris@42 912 Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T8I, T8F)));
Chris@42 913 Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8I, T8F));
Chris@42 914 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8K, T8J));
Chris@42 915 Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T8K, T8J));
Chris@42 916 Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8E, T8D));
Chris@42 917 Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T8E, T8D));
Chris@42 918 Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T8s, T7P)));
Chris@42 919 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8s, T7P));
Chris@42 920 T94 = T90 + T91;
Chris@42 921 T92 = T90 - T91;
Chris@42 922 }
Chris@42 923 }
Chris@42 924 Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T94, T93));
Chris@42 925 Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T94, T93));
Chris@42 926 Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T8Y, T8X));
Chris@42 927 Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T8Y, T8X));
Chris@42 928 }
Chris@42 929 }
Chris@42 930 {
Chris@42 931 E T9C, T9B, T9H, T9I, T9f, T9u;
Chris@42 932 T9D = FNMS(KP923879532, T9e, T97);
Chris@42 933 T9f = FMA(KP923879532, T9e, T97);
Chris@42 934 T9u = T9m - T9t;
Chris@42 935 T9C = T9t + T9m;
Chris@42 936 T9B = FNMS(KP923879532, T9A, T9x);
Chris@42 937 T9H = FMA(KP923879532, T9A, T9x);
Chris@42 938 Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T92, T8Z)));
Chris@42 939 Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T92, T8Z));
Chris@42 940 Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, T9u, T9f)));
Chris@42 941 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9u, T9f));
Chris@42 942 T9I = T9E + T9F;
Chris@42 943 T9G = T9E - T9F;
Chris@42 944 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9I, T9H));
Chris@42 945 Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, T9I, T9H));
Chris@42 946 Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9C, T9B));
Chris@42 947 Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, T9C, T9B));
Chris@42 948 }
Chris@42 949 {
Chris@42 950 E T9W, T9V, Ta1, Ta2, T9L, T9S;
Chris@42 951 T9X = FNMS(KP923879532, T9K, T9J);
Chris@42 952 T9L = FMA(KP923879532, T9K, T9J);
Chris@42 953 T9S = T9O - T9R;
Chris@42 954 T9W = T9O + T9R;
Chris@42 955 T9V = FNMS(KP923879532, T9U, T9T);
Chris@42 956 Ta1 = FMA(KP923879532, T9U, T9T);
Chris@42 957 Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, T9G, T9D)));
Chris@42 958 Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9G, T9D));
Chris@42 959 Im[0] = -(KP500000000 * (FNMS(KP980785280, T9S, T9L)));
Chris@42 960 Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, T9S, T9L));
Chris@42 961 Ta2 = T9Y + T9Z;
Chris@42 962 Ta0 = T9Y - T9Z;
Chris@42 963 Rm[0] = KP500000000 * (FMA(KP980785280, Ta2, Ta1));
Chris@42 964 Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, Ta2, Ta1));
Chris@42 965 Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, T9W, T9V));
Chris@42 966 Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, T9W, T9V));
Chris@42 967 }
Chris@42 968 }
Chris@42 969 }
Chris@42 970 }
Chris@42 971 }
Chris@42 972 Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, Ta0, T9X)));
Chris@42 973 Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Ta0, T9X));
Chris@42 974 }
Chris@42 975 }
Chris@42 976 }
Chris@42 977
Chris@42 978 static const tw_instr twinstr[] = {
Chris@42 979 {TW_FULL, 1, 32},
Chris@42 980 {TW_NEXT, 1, 0}
Chris@42 981 };
Chris@42 982
Chris@42 983 static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, {300, 126, 198, 0} };
Chris@42 984
Chris@42 985 void X(codelet_hc2cfdft_32) (planner *p) {
Chris@42 986 X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT);
Chris@42 987 }
Chris@42 988 #else /* HAVE_FMA */
Chris@42 989
Chris@42 990 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include hc2cf.h */
Chris@42 991
Chris@42 992 /*
Chris@42 993 * This function contains 498 FP additions, 228 FP multiplications,
Chris@42 994 * (or, 404 additions, 134 multiplications, 94 fused multiply/add),
Chris@42 995 * 106 stack variables, 9 constants, and 128 memory accesses
Chris@42 996 */
Chris@42 997 #include "hc2cf.h"
Chris@42 998
Chris@42 999 static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 1000 {
Chris@42 1001 DK(KP277785116, +0.277785116509801112371415406974266437187468595);
Chris@42 1002 DK(KP415734806, +0.415734806151272618539394188808952878369280406);
Chris@42 1003 DK(KP097545161, +0.097545161008064133924142434238511120463845809);
Chris@42 1004 DK(KP490392640, +0.490392640201615224563091118067119518486966865);
Chris@42 1005 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 1006 DK(KP191341716, +0.191341716182544885864229992015199433380672281);
Chris@42 1007 DK(KP461939766, +0.461939766255643378064091594698394143411208313);
Chris@42 1008 DK(KP353553390, +0.353553390593273762200422181052424519642417969);
Chris@42 1009 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 1010 {
Chris@42 1011 INT m;
Chris@42 1012 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 1013 E T2S, T5K, T52, T5N, T7p, T8r, T7i, T8o, T2q, T7t, T45, T6L, T2d, T7u, T48;
Chris@42 1014 E T6M, T1A, T4c, T4f, T1T, T3f, T5M, T7e, T7l, T6J, T7x, T4V, T5J, T7b, T7k;
Chris@42 1015 E T6G, T7w, Tj, TC, T5r, T4k, T4n, T5s, T3D, T5C, T6V, T72, T4G, T5F, T6u;
Chris@42 1016 E T86, T6S, T71, T6r, T85, TW, T1f, T5v, T4r, T4u, T5u, T40, T5G, T76, T8k;
Chris@42 1017 E T4N, T5D, T6B, T89, T6Z, T8h, T6y, T88;
Chris@42 1018 {
Chris@42 1019 E T1Y, T22, T2L, T4W, T2p, T43, T2A, T50, T27, T2b, T2Q, T4X, T2h, T2l, T2F;
Chris@42 1020 E T4Z;
Chris@42 1021 {
Chris@42 1022 E T1W, T1X, T2K, T20, T21, T2I, T2H, T2J;
Chris@42 1023 T1W = Ip[WS(rs, 4)];
Chris@42 1024 T1X = Im[WS(rs, 4)];
Chris@42 1025 T2K = T1W + T1X;
Chris@42 1026 T20 = Rp[WS(rs, 4)];
Chris@42 1027 T21 = Rm[WS(rs, 4)];
Chris@42 1028 T2I = T20 - T21;
Chris@42 1029 T1Y = T1W - T1X;
Chris@42 1030 T22 = T20 + T21;
Chris@42 1031 T2H = W[16];
Chris@42 1032 T2J = W[17];
Chris@42 1033 T2L = FMA(T2H, T2I, T2J * T2K);
Chris@42 1034 T4W = FNMS(T2J, T2I, T2H * T2K);
Chris@42 1035 }
Chris@42 1036 {
Chris@42 1037 E T2n, T2o, T2z, T2v, T2w, T2x, T2u, T2y;
Chris@42 1038 T2n = Ip[0];
Chris@42 1039 T2o = Im[0];
Chris@42 1040 T2z = T2n + T2o;
Chris@42 1041 T2v = Rm[0];
Chris@42 1042 T2w = Rp[0];
Chris@42 1043 T2x = T2v - T2w;
Chris@42 1044 T2p = T2n - T2o;
Chris@42 1045 T43 = T2w + T2v;
Chris@42 1046 T2u = W[0];
Chris@42 1047 T2y = W[1];
Chris@42 1048 T2A = FNMS(T2y, T2z, T2u * T2x);
Chris@42 1049 T50 = FMA(T2y, T2x, T2u * T2z);
Chris@42 1050 }
Chris@42 1051 {
Chris@42 1052 E T25, T26, T2P, T29, T2a, T2N, T2M, T2O;
Chris@42 1053 T25 = Ip[WS(rs, 12)];
Chris@42 1054 T26 = Im[WS(rs, 12)];
Chris@42 1055 T2P = T25 + T26;
Chris@42 1056 T29 = Rp[WS(rs, 12)];
Chris@42 1057 T2a = Rm[WS(rs, 12)];
Chris@42 1058 T2N = T29 - T2a;
Chris@42 1059 T27 = T25 - T26;
Chris@42 1060 T2b = T29 + T2a;
Chris@42 1061 T2M = W[48];
Chris@42 1062 T2O = W[49];
Chris@42 1063 T2Q = FMA(T2M, T2N, T2O * T2P);
Chris@42 1064 T4X = FNMS(T2O, T2N, T2M * T2P);
Chris@42 1065 }
Chris@42 1066 {
Chris@42 1067 E T2f, T2g, T2E, T2j, T2k, T2C, T2B, T2D;
Chris@42 1068 T2f = Ip[WS(rs, 8)];
Chris@42 1069 T2g = Im[WS(rs, 8)];
Chris@42 1070 T2E = T2f + T2g;
Chris@42 1071 T2j = Rp[WS(rs, 8)];
Chris@42 1072 T2k = Rm[WS(rs, 8)];
Chris@42 1073 T2C = T2j - T2k;
Chris@42 1074 T2h = T2f - T2g;
Chris@42 1075 T2l = T2j + T2k;
Chris@42 1076 T2B = W[32];
Chris@42 1077 T2D = W[33];
Chris@42 1078 T2F = FMA(T2B, T2C, T2D * T2E);
Chris@42 1079 T4Z = FNMS(T2D, T2C, T2B * T2E);
Chris@42 1080 }
Chris@42 1081 {
Chris@42 1082 E T2G, T2R, T7g, T7h;
Chris@42 1083 T2G = T2A - T2F;
Chris@42 1084 T2R = T2L + T2Q;
Chris@42 1085 T2S = T2G - T2R;
Chris@42 1086 T5K = T2R + T2G;
Chris@42 1087 {
Chris@42 1088 E T4Y, T51, T7n, T7o;
Chris@42 1089 T4Y = T4W + T4X;
Chris@42 1090 T51 = T4Z + T50;
Chris@42 1091 T52 = T4Y + T51;
Chris@42 1092 T5N = T51 - T4Y;
Chris@42 1093 T7n = T2Q - T2L;
Chris@42 1094 T7o = T50 - T4Z;
Chris@42 1095 T7p = T7n + T7o;
Chris@42 1096 T8r = T7o - T7n;
Chris@42 1097 }
Chris@42 1098 T7g = T2F + T2A;
Chris@42 1099 T7h = T4W - T4X;
Chris@42 1100 T7i = T7g - T7h;
Chris@42 1101 T8o = T7h + T7g;
Chris@42 1102 {
Chris@42 1103 E T2m, T44, T2e, T2i;
Chris@42 1104 T2e = W[30];
Chris@42 1105 T2i = W[31];
Chris@42 1106 T2m = FNMS(T2i, T2l, T2e * T2h);
Chris@42 1107 T44 = FMA(T2e, T2l, T2i * T2h);
Chris@42 1108 T2q = T2m + T2p;
Chris@42 1109 T7t = T43 - T44;
Chris@42 1110 T45 = T43 + T44;
Chris@42 1111 T6L = T2p - T2m;
Chris@42 1112 }
Chris@42 1113 {
Chris@42 1114 E T23, T46, T2c, T47;
Chris@42 1115 {
Chris@42 1116 E T1V, T1Z, T24, T28;
Chris@42 1117 T1V = W[14];
Chris@42 1118 T1Z = W[15];
Chris@42 1119 T23 = FNMS(T1Z, T22, T1V * T1Y);
Chris@42 1120 T46 = FMA(T1V, T22, T1Z * T1Y);
Chris@42 1121 T24 = W[46];
Chris@42 1122 T28 = W[47];
Chris@42 1123 T2c = FNMS(T28, T2b, T24 * T27);
Chris@42 1124 T47 = FMA(T24, T2b, T28 * T27);
Chris@42 1125 }
Chris@42 1126 T2d = T23 + T2c;
Chris@42 1127 T7u = T23 - T2c;
Chris@42 1128 T48 = T46 + T47;
Chris@42 1129 T6M = T46 - T47;
Chris@42 1130 }
Chris@42 1131 }
Chris@42 1132 }
Chris@42 1133 {
Chris@42 1134 E T1q, T4a, T2X, T4P, T1S, T4e, T3d, T4T, T1z, T4b, T32, T4Q, T1J, T4d, T38;
Chris@42 1135 E T4S;
Chris@42 1136 {
Chris@42 1137 E T1l, T2W, T1p, T2U;
Chris@42 1138 {
Chris@42 1139 E T1j, T1k, T1n, T1o;
Chris@42 1140 T1j = Ip[WS(rs, 2)];
Chris@42 1141 T1k = Im[WS(rs, 2)];
Chris@42 1142 T1l = T1j - T1k;
Chris@42 1143 T2W = T1j + T1k;
Chris@42 1144 T1n = Rp[WS(rs, 2)];
Chris@42 1145 T1o = Rm[WS(rs, 2)];
Chris@42 1146 T1p = T1n + T1o;
Chris@42 1147 T2U = T1n - T1o;
Chris@42 1148 }
Chris@42 1149 {
Chris@42 1150 E T1i, T1m, T2T, T2V;
Chris@42 1151 T1i = W[6];
Chris@42 1152 T1m = W[7];
Chris@42 1153 T1q = FNMS(T1m, T1p, T1i * T1l);
Chris@42 1154 T4a = FMA(T1i, T1p, T1m * T1l);
Chris@42 1155 T2T = W[8];
Chris@42 1156 T2V = W[9];
Chris@42 1157 T2X = FMA(T2T, T2U, T2V * T2W);
Chris@42 1158 T4P = FNMS(T2V, T2U, T2T * T2W);
Chris@42 1159 }
Chris@42 1160 }
Chris@42 1161 {
Chris@42 1162 E T1N, T3c, T1R, T3a;
Chris@42 1163 {
Chris@42 1164 E T1L, T1M, T1P, T1Q;
Chris@42 1165 T1L = Ip[WS(rs, 6)];
Chris@42 1166 T1M = Im[WS(rs, 6)];
Chris@42 1167 T1N = T1L - T1M;
Chris@42 1168 T3c = T1L + T1M;
Chris@42 1169 T1P = Rp[WS(rs, 6)];
Chris@42 1170 T1Q = Rm[WS(rs, 6)];
Chris@42 1171 T1R = T1P + T1Q;
Chris@42 1172 T3a = T1P - T1Q;
Chris@42 1173 }
Chris@42 1174 {
Chris@42 1175 E T1K, T1O, T39, T3b;
Chris@42 1176 T1K = W[22];
Chris@42 1177 T1O = W[23];
Chris@42 1178 T1S = FNMS(T1O, T1R, T1K * T1N);
Chris@42 1179 T4e = FMA(T1K, T1R, T1O * T1N);
Chris@42 1180 T39 = W[24];
Chris@42 1181 T3b = W[25];
Chris@42 1182 T3d = FMA(T39, T3a, T3b * T3c);
Chris@42 1183 T4T = FNMS(T3b, T3a, T39 * T3c);
Chris@42 1184 }
Chris@42 1185 }
Chris@42 1186 {
Chris@42 1187 E T1u, T31, T1y, T2Z;
Chris@42 1188 {
Chris@42 1189 E T1s, T1t, T1w, T1x;
Chris@42 1190 T1s = Ip[WS(rs, 10)];
Chris@42 1191 T1t = Im[WS(rs, 10)];
Chris@42 1192 T1u = T1s - T1t;
Chris@42 1193 T31 = T1s + T1t;
Chris@42 1194 T1w = Rp[WS(rs, 10)];
Chris@42 1195 T1x = Rm[WS(rs, 10)];
Chris@42 1196 T1y = T1w + T1x;
Chris@42 1197 T2Z = T1w - T1x;
Chris@42 1198 }
Chris@42 1199 {
Chris@42 1200 E T1r, T1v, T2Y, T30;
Chris@42 1201 T1r = W[38];
Chris@42 1202 T1v = W[39];
Chris@42 1203 T1z = FNMS(T1v, T1y, T1r * T1u);
Chris@42 1204 T4b = FMA(T1r, T1y, T1v * T1u);
Chris@42 1205 T2Y = W[40];
Chris@42 1206 T30 = W[41];
Chris@42 1207 T32 = FMA(T2Y, T2Z, T30 * T31);
Chris@42 1208 T4Q = FNMS(T30, T2Z, T2Y * T31);
Chris@42 1209 }
Chris@42 1210 }
Chris@42 1211 {
Chris@42 1212 E T1E, T37, T1I, T35;
Chris@42 1213 {
Chris@42 1214 E T1C, T1D, T1G, T1H;
Chris@42 1215 T1C = Ip[WS(rs, 14)];
Chris@42 1216 T1D = Im[WS(rs, 14)];
Chris@42 1217 T1E = T1C - T1D;
Chris@42 1218 T37 = T1C + T1D;
Chris@42 1219 T1G = Rp[WS(rs, 14)];
Chris@42 1220 T1H = Rm[WS(rs, 14)];
Chris@42 1221 T1I = T1G + T1H;
Chris@42 1222 T35 = T1G - T1H;
Chris@42 1223 }
Chris@42 1224 {
Chris@42 1225 E T1B, T1F, T34, T36;
Chris@42 1226 T1B = W[54];
Chris@42 1227 T1F = W[55];
Chris@42 1228 T1J = FNMS(T1F, T1I, T1B * T1E);
Chris@42 1229 T4d = FMA(T1B, T1I, T1F * T1E);
Chris@42 1230 T34 = W[56];
Chris@42 1231 T36 = W[57];
Chris@42 1232 T38 = FMA(T34, T35, T36 * T37);
Chris@42 1233 T4S = FNMS(T36, T35, T34 * T37);
Chris@42 1234 }
Chris@42 1235 }
Chris@42 1236 {
Chris@42 1237 E T33, T3e, T4R, T4U;
Chris@42 1238 T1A = T1q + T1z;
Chris@42 1239 T4c = T4a + T4b;
Chris@42 1240 T4f = T4d + T4e;
Chris@42 1241 T1T = T1J + T1S;
Chris@42 1242 T33 = T2X + T32;
Chris@42 1243 T3e = T38 + T3d;
Chris@42 1244 T3f = T33 + T3e;
Chris@42 1245 T5M = T3e - T33;
Chris@42 1246 {
Chris@42 1247 E T7c, T7d, T6H, T6I;
Chris@42 1248 T7c = T4S - T4T;
Chris@42 1249 T7d = T3d - T38;
Chris@42 1250 T7e = T7c + T7d;
Chris@42 1251 T7l = T7c - T7d;
Chris@42 1252 T6H = T4d - T4e;
Chris@42 1253 T6I = T1J - T1S;
Chris@42 1254 T6J = T6H + T6I;
Chris@42 1255 T7x = T6H - T6I;
Chris@42 1256 }
Chris@42 1257 T4R = T4P + T4Q;
Chris@42 1258 T4U = T4S + T4T;
Chris@42 1259 T4V = T4R + T4U;
Chris@42 1260 T5J = T4U - T4R;
Chris@42 1261 {
Chris@42 1262 E T79, T7a, T6E, T6F;
Chris@42 1263 T79 = T32 - T2X;
Chris@42 1264 T7a = T4P - T4Q;
Chris@42 1265 T7b = T79 - T7a;
Chris@42 1266 T7k = T7a + T79;
Chris@42 1267 T6E = T1q - T1z;
Chris@42 1268 T6F = T4a - T4b;
Chris@42 1269 T6G = T6E - T6F;
Chris@42 1270 T7w = T6F + T6E;
Chris@42 1271 }
Chris@42 1272 }
Chris@42 1273 }
Chris@42 1274 {
Chris@42 1275 E T9, T4i, T3l, T4A, TB, T4m, T3B, T4E, Ti, T4j, T3q, T4B, Ts, T4l, T3w;
Chris@42 1276 E T4D;
Chris@42 1277 {
Chris@42 1278 E T4, T3k, T8, T3i;
Chris@42 1279 {
Chris@42 1280 E T2, T3, T6, T7;
Chris@42 1281 T2 = Ip[WS(rs, 1)];
Chris@42 1282 T3 = Im[WS(rs, 1)];
Chris@42 1283 T4 = T2 - T3;
Chris@42 1284 T3k = T2 + T3;
Chris@42 1285 T6 = Rp[WS(rs, 1)];
Chris@42 1286 T7 = Rm[WS(rs, 1)];
Chris@42 1287 T8 = T6 + T7;
Chris@42 1288 T3i = T6 - T7;
Chris@42 1289 }
Chris@42 1290 {
Chris@42 1291 E T1, T5, T3h, T3j;
Chris@42 1292 T1 = W[2];
Chris@42 1293 T5 = W[3];
Chris@42 1294 T9 = FNMS(T5, T8, T1 * T4);
Chris@42 1295 T4i = FMA(T1, T8, T5 * T4);
Chris@42 1296 T3h = W[4];
Chris@42 1297 T3j = W[5];
Chris@42 1298 T3l = FMA(T3h, T3i, T3j * T3k);
Chris@42 1299 T4A = FNMS(T3j, T3i, T3h * T3k);
Chris@42 1300 }
Chris@42 1301 }
Chris@42 1302 {
Chris@42 1303 E Tw, T3A, TA, T3y;
Chris@42 1304 {
Chris@42 1305 E Tu, Tv, Ty, Tz;
Chris@42 1306 Tu = Ip[WS(rs, 13)];
Chris@42 1307 Tv = Im[WS(rs, 13)];
Chris@42 1308 Tw = Tu - Tv;
Chris@42 1309 T3A = Tu + Tv;
Chris@42 1310 Ty = Rp[WS(rs, 13)];
Chris@42 1311 Tz = Rm[WS(rs, 13)];
Chris@42 1312 TA = Ty + Tz;
Chris@42 1313 T3y = Ty - Tz;
Chris@42 1314 }
Chris@42 1315 {
Chris@42 1316 E Tt, Tx, T3x, T3z;
Chris@42 1317 Tt = W[50];
Chris@42 1318 Tx = W[51];
Chris@42 1319 TB = FNMS(Tx, TA, Tt * Tw);
Chris@42 1320 T4m = FMA(Tt, TA, Tx * Tw);
Chris@42 1321 T3x = W[52];
Chris@42 1322 T3z = W[53];
Chris@42 1323 T3B = FMA(T3x, T3y, T3z * T3A);
Chris@42 1324 T4E = FNMS(T3z, T3y, T3x * T3A);
Chris@42 1325 }
Chris@42 1326 }
Chris@42 1327 {
Chris@42 1328 E Td, T3p, Th, T3n;
Chris@42 1329 {
Chris@42 1330 E Tb, Tc, Tf, Tg;
Chris@42 1331 Tb = Ip[WS(rs, 9)];
Chris@42 1332 Tc = Im[WS(rs, 9)];
Chris@42 1333 Td = Tb - Tc;
Chris@42 1334 T3p = Tb + Tc;
Chris@42 1335 Tf = Rp[WS(rs, 9)];
Chris@42 1336 Tg = Rm[WS(rs, 9)];
Chris@42 1337 Th = Tf + Tg;
Chris@42 1338 T3n = Tf - Tg;
Chris@42 1339 }
Chris@42 1340 {
Chris@42 1341 E Ta, Te, T3m, T3o;
Chris@42 1342 Ta = W[34];
Chris@42 1343 Te = W[35];
Chris@42 1344 Ti = FNMS(Te, Th, Ta * Td);
Chris@42 1345 T4j = FMA(Ta, Th, Te * Td);
Chris@42 1346 T3m = W[36];
Chris@42 1347 T3o = W[37];
Chris@42 1348 T3q = FMA(T3m, T3n, T3o * T3p);
Chris@42 1349 T4B = FNMS(T3o, T3n, T3m * T3p);
Chris@42 1350 }
Chris@42 1351 }
Chris@42 1352 {
Chris@42 1353 E Tn, T3v, Tr, T3t;
Chris@42 1354 {
Chris@42 1355 E Tl, Tm, Tp, Tq;
Chris@42 1356 Tl = Ip[WS(rs, 5)];
Chris@42 1357 Tm = Im[WS(rs, 5)];
Chris@42 1358 Tn = Tl - Tm;
Chris@42 1359 T3v = Tl + Tm;
Chris@42 1360 Tp = Rp[WS(rs, 5)];
Chris@42 1361 Tq = Rm[WS(rs, 5)];
Chris@42 1362 Tr = Tp + Tq;
Chris@42 1363 T3t = Tp - Tq;
Chris@42 1364 }
Chris@42 1365 {
Chris@42 1366 E Tk, To, T3s, T3u;
Chris@42 1367 Tk = W[18];
Chris@42 1368 To = W[19];
Chris@42 1369 Ts = FNMS(To, Tr, Tk * Tn);
Chris@42 1370 T4l = FMA(Tk, Tr, To * Tn);
Chris@42 1371 T3s = W[20];
Chris@42 1372 T3u = W[21];
Chris@42 1373 T3w = FMA(T3s, T3t, T3u * T3v);
Chris@42 1374 T4D = FNMS(T3u, T3t, T3s * T3v);
Chris@42 1375 }
Chris@42 1376 }
Chris@42 1377 Tj = T9 + Ti;
Chris@42 1378 TC = Ts + TB;
Chris@42 1379 T5r = Tj - TC;
Chris@42 1380 T4k = T4i + T4j;
Chris@42 1381 T4n = T4l + T4m;
Chris@42 1382 T5s = T4k - T4n;
Chris@42 1383 {
Chris@42 1384 E T3r, T3C, T6T, T6U;
Chris@42 1385 T3r = T3l + T3q;
Chris@42 1386 T3C = T3w + T3B;
Chris@42 1387 T3D = T3r + T3C;
Chris@42 1388 T5C = T3C - T3r;
Chris@42 1389 T6T = T4E - T4D;
Chris@42 1390 T6U = T3w - T3B;
Chris@42 1391 T6V = T6T + T6U;
Chris@42 1392 T72 = T6T - T6U;
Chris@42 1393 }
Chris@42 1394 {
Chris@42 1395 E T4C, T4F, T6s, T6t;
Chris@42 1396 T4C = T4A + T4B;
Chris@42 1397 T4F = T4D + T4E;
Chris@42 1398 T4G = T4C + T4F;
Chris@42 1399 T5F = T4F - T4C;
Chris@42 1400 T6s = T4i - T4j;
Chris@42 1401 T6t = Ts - TB;
Chris@42 1402 T6u = T6s + T6t;
Chris@42 1403 T86 = T6s - T6t;
Chris@42 1404 }
Chris@42 1405 {
Chris@42 1406 E T6Q, T6R, T6p, T6q;
Chris@42 1407 T6Q = T3q - T3l;
Chris@42 1408 T6R = T4A - T4B;
Chris@42 1409 T6S = T6Q - T6R;
Chris@42 1410 T71 = T6R + T6Q;
Chris@42 1411 T6p = T9 - Ti;
Chris@42 1412 T6q = T4l - T4m;
Chris@42 1413 T6r = T6p - T6q;
Chris@42 1414 T85 = T6p + T6q;
Chris@42 1415 }
Chris@42 1416 }
Chris@42 1417 {
Chris@42 1418 E TM, T4p, T3I, T4H, T1e, T4t, T3Y, T4L, TV, T4q, T3N, T4I, T15, T4s, T3T;
Chris@42 1419 E T4K;
Chris@42 1420 {
Chris@42 1421 E TH, T3H, TL, T3F;
Chris@42 1422 {
Chris@42 1423 E TF, TG, TJ, TK;
Chris@42 1424 TF = Ip[WS(rs, 15)];
Chris@42 1425 TG = Im[WS(rs, 15)];
Chris@42 1426 TH = TF - TG;
Chris@42 1427 T3H = TF + TG;
Chris@42 1428 TJ = Rp[WS(rs, 15)];
Chris@42 1429 TK = Rm[WS(rs, 15)];
Chris@42 1430 TL = TJ + TK;
Chris@42 1431 T3F = TJ - TK;
Chris@42 1432 }
Chris@42 1433 {
Chris@42 1434 E TE, TI, T3E, T3G;
Chris@42 1435 TE = W[58];
Chris@42 1436 TI = W[59];
Chris@42 1437 TM = FNMS(TI, TL, TE * TH);
Chris@42 1438 T4p = FMA(TE, TL, TI * TH);
Chris@42 1439 T3E = W[60];
Chris@42 1440 T3G = W[61];
Chris@42 1441 T3I = FMA(T3E, T3F, T3G * T3H);
Chris@42 1442 T4H = FNMS(T3G, T3F, T3E * T3H);
Chris@42 1443 }
Chris@42 1444 }
Chris@42 1445 {
Chris@42 1446 E T19, T3X, T1d, T3V;
Chris@42 1447 {
Chris@42 1448 E T17, T18, T1b, T1c;
Chris@42 1449 T17 = Ip[WS(rs, 11)];
Chris@42 1450 T18 = Im[WS(rs, 11)];
Chris@42 1451 T19 = T17 - T18;
Chris@42 1452 T3X = T17 + T18;
Chris@42 1453 T1b = Rp[WS(rs, 11)];
Chris@42 1454 T1c = Rm[WS(rs, 11)];
Chris@42 1455 T1d = T1b + T1c;
Chris@42 1456 T3V = T1b - T1c;
Chris@42 1457 }
Chris@42 1458 {
Chris@42 1459 E T16, T1a, T3U, T3W;
Chris@42 1460 T16 = W[42];
Chris@42 1461 T1a = W[43];
Chris@42 1462 T1e = FNMS(T1a, T1d, T16 * T19);
Chris@42 1463 T4t = FMA(T16, T1d, T1a * T19);
Chris@42 1464 T3U = W[44];
Chris@42 1465 T3W = W[45];
Chris@42 1466 T3Y = FMA(T3U, T3V, T3W * T3X);
Chris@42 1467 T4L = FNMS(T3W, T3V, T3U * T3X);
Chris@42 1468 }
Chris@42 1469 }
Chris@42 1470 {
Chris@42 1471 E TQ, T3M, TU, T3K;
Chris@42 1472 {
Chris@42 1473 E TO, TP, TS, TT;
Chris@42 1474 TO = Ip[WS(rs, 7)];
Chris@42 1475 TP = Im[WS(rs, 7)];
Chris@42 1476 TQ = TO - TP;
Chris@42 1477 T3M = TO + TP;
Chris@42 1478 TS = Rp[WS(rs, 7)];
Chris@42 1479 TT = Rm[WS(rs, 7)];
Chris@42 1480 TU = TS + TT;
Chris@42 1481 T3K = TS - TT;
Chris@42 1482 }
Chris@42 1483 {
Chris@42 1484 E TN, TR, T3J, T3L;
Chris@42 1485 TN = W[26];
Chris@42 1486 TR = W[27];
Chris@42 1487 TV = FNMS(TR, TU, TN * TQ);
Chris@42 1488 T4q = FMA(TN, TU, TR * TQ);
Chris@42 1489 T3J = W[28];
Chris@42 1490 T3L = W[29];
Chris@42 1491 T3N = FMA(T3J, T3K, T3L * T3M);
Chris@42 1492 T4I = FNMS(T3L, T3K, T3J * T3M);
Chris@42 1493 }
Chris@42 1494 }
Chris@42 1495 {
Chris@42 1496 E T10, T3S, T14, T3Q;
Chris@42 1497 {
Chris@42 1498 E TY, TZ, T12, T13;
Chris@42 1499 TY = Ip[WS(rs, 3)];
Chris@42 1500 TZ = Im[WS(rs, 3)];
Chris@42 1501 T10 = TY - TZ;
Chris@42 1502 T3S = TY + TZ;
Chris@42 1503 T12 = Rp[WS(rs, 3)];
Chris@42 1504 T13 = Rm[WS(rs, 3)];
Chris@42 1505 T14 = T12 + T13;
Chris@42 1506 T3Q = T12 - T13;
Chris@42 1507 }
Chris@42 1508 {
Chris@42 1509 E TX, T11, T3P, T3R;
Chris@42 1510 TX = W[10];
Chris@42 1511 T11 = W[11];
Chris@42 1512 T15 = FNMS(T11, T14, TX * T10);
Chris@42 1513 T4s = FMA(TX, T14, T11 * T10);
Chris@42 1514 T3P = W[12];
Chris@42 1515 T3R = W[13];
Chris@42 1516 T3T = FMA(T3P, T3Q, T3R * T3S);
Chris@42 1517 T4K = FNMS(T3R, T3Q, T3P * T3S);
Chris@42 1518 }
Chris@42 1519 }
Chris@42 1520 TW = TM + TV;
Chris@42 1521 T1f = T15 + T1e;
Chris@42 1522 T5v = TW - T1f;
Chris@42 1523 T4r = T4p + T4q;
Chris@42 1524 T4u = T4s + T4t;
Chris@42 1525 T5u = T4r - T4u;
Chris@42 1526 {
Chris@42 1527 E T3O, T3Z, T74, T75;
Chris@42 1528 T3O = T3I + T3N;
Chris@42 1529 T3Z = T3T + T3Y;
Chris@42 1530 T40 = T3O + T3Z;
Chris@42 1531 T5G = T3Z - T3O;
Chris@42 1532 T74 = T4H - T4I;
Chris@42 1533 T75 = T3Y - T3T;
Chris@42 1534 T76 = T74 + T75;
Chris@42 1535 T8k = T74 - T75;
Chris@42 1536 }
Chris@42 1537 {
Chris@42 1538 E T4J, T4M, T6z, T6A;
Chris@42 1539 T4J = T4H + T4I;
Chris@42 1540 T4M = T4K + T4L;
Chris@42 1541 T4N = T4J + T4M;
Chris@42 1542 T5D = T4J - T4M;
Chris@42 1543 T6z = T4p - T4q;
Chris@42 1544 T6A = T15 - T1e;
Chris@42 1545 T6B = T6z + T6A;
Chris@42 1546 T89 = T6z - T6A;
Chris@42 1547 }
Chris@42 1548 {
Chris@42 1549 E T6X, T6Y, T6w, T6x;
Chris@42 1550 T6X = T3N - T3I;
Chris@42 1551 T6Y = T4K - T4L;
Chris@42 1552 T6Z = T6X - T6Y;
Chris@42 1553 T8h = T6X + T6Y;
Chris@42 1554 T6w = TM - TV;
Chris@42 1555 T6x = T4s - T4t;
Chris@42 1556 T6y = T6w - T6x;
Chris@42 1557 T88 = T6w + T6x;
Chris@42 1558 }
Chris@42 1559 }
Chris@42 1560 {
Chris@42 1561 E T1h, T5i, T5c, T5m, T5f, T5n, T2s, T58, T42, T4y, T4w, T57, T54, T56, T4h;
Chris@42 1562 E T5h;
Chris@42 1563 {
Chris@42 1564 E TD, T1g, T5a, T5b;
Chris@42 1565 TD = Tj + TC;
Chris@42 1566 T1g = TW + T1f;
Chris@42 1567 T1h = TD + T1g;
Chris@42 1568 T5i = TD - T1g;
Chris@42 1569 T5a = T4N - T4G;
Chris@42 1570 T5b = T3D - T40;
Chris@42 1571 T5c = T5a + T5b;
Chris@42 1572 T5m = T5a - T5b;
Chris@42 1573 }
Chris@42 1574 {
Chris@42 1575 E T5d, T5e, T1U, T2r;
Chris@42 1576 T5d = T3f + T2S;
Chris@42 1577 T5e = T52 - T4V;
Chris@42 1578 T5f = T5d - T5e;
Chris@42 1579 T5n = T5d + T5e;
Chris@42 1580 T1U = T1A + T1T;
Chris@42 1581 T2r = T2d + T2q;
Chris@42 1582 T2s = T1U + T2r;
Chris@42 1583 T58 = T2r - T1U;
Chris@42 1584 }
Chris@42 1585 {
Chris@42 1586 E T3g, T41, T4o, T4v;
Chris@42 1587 T3g = T2S - T3f;
Chris@42 1588 T41 = T3D + T40;
Chris@42 1589 T42 = T3g - T41;
Chris@42 1590 T4y = T41 + T3g;
Chris@42 1591 T4o = T4k + T4n;
Chris@42 1592 T4v = T4r + T4u;
Chris@42 1593 T4w = T4o + T4v;
Chris@42 1594 T57 = T4v - T4o;
Chris@42 1595 }
Chris@42 1596 {
Chris@42 1597 E T4O, T53, T49, T4g;
Chris@42 1598 T4O = T4G + T4N;
Chris@42 1599 T53 = T4V + T52;
Chris@42 1600 T54 = T4O - T53;
Chris@42 1601 T56 = T4O + T53;
Chris@42 1602 T49 = T45 + T48;
Chris@42 1603 T4g = T4c + T4f;
Chris@42 1604 T4h = T49 + T4g;
Chris@42 1605 T5h = T49 - T4g;
Chris@42 1606 }
Chris@42 1607 {
Chris@42 1608 E T2t, T55, T4x, T4z;
Chris@42 1609 T2t = T1h + T2s;
Chris@42 1610 Ip[0] = KP500000000 * (T2t + T42);
Chris@42 1611 Im[WS(rs, 15)] = KP500000000 * (T42 - T2t);
Chris@42 1612 T55 = T4h + T4w;
Chris@42 1613 Rm[WS(rs, 15)] = KP500000000 * (T55 - T56);
Chris@42 1614 Rp[0] = KP500000000 * (T55 + T56);
Chris@42 1615 T4x = T4h - T4w;
Chris@42 1616 Rm[WS(rs, 7)] = KP500000000 * (T4x - T4y);
Chris@42 1617 Rp[WS(rs, 8)] = KP500000000 * (T4x + T4y);
Chris@42 1618 T4z = T2s - T1h;
Chris@42 1619 Ip[WS(rs, 8)] = KP500000000 * (T4z + T54);
Chris@42 1620 Im[WS(rs, 7)] = KP500000000 * (T54 - T4z);
Chris@42 1621 }
Chris@42 1622 {
Chris@42 1623 E T59, T5g, T5p, T5q;
Chris@42 1624 T59 = KP500000000 * (T57 + T58);
Chris@42 1625 T5g = KP353553390 * (T5c + T5f);
Chris@42 1626 Ip[WS(rs, 4)] = T59 + T5g;
Chris@42 1627 Im[WS(rs, 11)] = T5g - T59;
Chris@42 1628 T5p = KP500000000 * (T5h + T5i);
Chris@42 1629 T5q = KP353553390 * (T5m + T5n);
Chris@42 1630 Rm[WS(rs, 11)] = T5p - T5q;
Chris@42 1631 Rp[WS(rs, 4)] = T5p + T5q;
Chris@42 1632 }
Chris@42 1633 {
Chris@42 1634 E T5j, T5k, T5l, T5o;
Chris@42 1635 T5j = KP500000000 * (T5h - T5i);
Chris@42 1636 T5k = KP353553390 * (T5f - T5c);
Chris@42 1637 Rm[WS(rs, 3)] = T5j - T5k;
Chris@42 1638 Rp[WS(rs, 12)] = T5j + T5k;
Chris@42 1639 T5l = KP500000000 * (T58 - T57);
Chris@42 1640 T5o = KP353553390 * (T5m - T5n);
Chris@42 1641 Ip[WS(rs, 12)] = T5l + T5o;
Chris@42 1642 Im[WS(rs, 3)] = T5o - T5l;
Chris@42 1643 }
Chris@42 1644 }
Chris@42 1645 {
Chris@42 1646 E T5x, T6g, T6a, T6k, T6d, T6l, T5A, T66, T5I, T60, T5T, T6f, T5W, T65, T5P;
Chris@42 1647 E T61;
Chris@42 1648 {
Chris@42 1649 E T5t, T5w, T68, T69;
Chris@42 1650 T5t = T5r - T5s;
Chris@42 1651 T5w = T5u + T5v;
Chris@42 1652 T5x = KP353553390 * (T5t + T5w);
Chris@42 1653 T6g = KP353553390 * (T5t - T5w);
Chris@42 1654 T68 = T5D - T5C;
Chris@42 1655 T69 = T5G - T5F;
Chris@42 1656 T6a = FMA(KP461939766, T68, KP191341716 * T69);
Chris@42 1657 T6k = FNMS(KP461939766, T69, KP191341716 * T68);
Chris@42 1658 }
Chris@42 1659 {
Chris@42 1660 E T6b, T6c, T5y, T5z;
Chris@42 1661 T6b = T5K - T5J;
Chris@42 1662 T6c = T5N - T5M;
Chris@42 1663 T6d = FNMS(KP461939766, T6c, KP191341716 * T6b);
Chris@42 1664 T6l = FMA(KP461939766, T6b, KP191341716 * T6c);
Chris@42 1665 T5y = T4f - T4c;
Chris@42 1666 T5z = T2q - T2d;
Chris@42 1667 T5A = KP500000000 * (T5y + T5z);
Chris@42 1668 T66 = KP500000000 * (T5z - T5y);
Chris@42 1669 }
Chris@42 1670 {
Chris@42 1671 E T5E, T5H, T5R, T5S;
Chris@42 1672 T5E = T5C + T5D;
Chris@42 1673 T5H = T5F + T5G;
Chris@42 1674 T5I = FMA(KP191341716, T5E, KP461939766 * T5H);
Chris@42 1675 T60 = FNMS(KP191341716, T5H, KP461939766 * T5E);
Chris@42 1676 T5R = T45 - T48;
Chris@42 1677 T5S = T1A - T1T;
Chris@42 1678 T5T = KP500000000 * (T5R + T5S);
Chris@42 1679 T6f = KP500000000 * (T5R - T5S);
Chris@42 1680 }
Chris@42 1681 {
Chris@42 1682 E T5U, T5V, T5L, T5O;
Chris@42 1683 T5U = T5s + T5r;
Chris@42 1684 T5V = T5u - T5v;
Chris@42 1685 T5W = KP353553390 * (T5U + T5V);
Chris@42 1686 T65 = KP353553390 * (T5V - T5U);
Chris@42 1687 T5L = T5J + T5K;
Chris@42 1688 T5O = T5M + T5N;
Chris@42 1689 T5P = FNMS(KP191341716, T5O, KP461939766 * T5L);
Chris@42 1690 T61 = FMA(KP191341716, T5L, KP461939766 * T5O);
Chris@42 1691 }
Chris@42 1692 {
Chris@42 1693 E T5B, T5Q, T63, T64;
Chris@42 1694 T5B = T5x + T5A;
Chris@42 1695 T5Q = T5I + T5P;
Chris@42 1696 Ip[WS(rs, 2)] = T5B + T5Q;
Chris@42 1697 Im[WS(rs, 13)] = T5Q - T5B;
Chris@42 1698 T63 = T5T + T5W;
Chris@42 1699 T64 = T60 + T61;
Chris@42 1700 Rm[WS(rs, 13)] = T63 - T64;
Chris@42 1701 Rp[WS(rs, 2)] = T63 + T64;
Chris@42 1702 }
Chris@42 1703 {
Chris@42 1704 E T5X, T5Y, T5Z, T62;
Chris@42 1705 T5X = T5T - T5W;
Chris@42 1706 T5Y = T5P - T5I;
Chris@42 1707 Rm[WS(rs, 5)] = T5X - T5Y;
Chris@42 1708 Rp[WS(rs, 10)] = T5X + T5Y;
Chris@42 1709 T5Z = T5A - T5x;
Chris@42 1710 T62 = T60 - T61;
Chris@42 1711 Ip[WS(rs, 10)] = T5Z + T62;
Chris@42 1712 Im[WS(rs, 5)] = T62 - T5Z;
Chris@42 1713 }
Chris@42 1714 {
Chris@42 1715 E T67, T6e, T6n, T6o;
Chris@42 1716 T67 = T65 + T66;
Chris@42 1717 T6e = T6a + T6d;
Chris@42 1718 Ip[WS(rs, 6)] = T67 + T6e;
Chris@42 1719 Im[WS(rs, 9)] = T6e - T67;
Chris@42 1720 T6n = T6f + T6g;
Chris@42 1721 T6o = T6k + T6l;
Chris@42 1722 Rm[WS(rs, 9)] = T6n - T6o;
Chris@42 1723 Rp[WS(rs, 6)] = T6n + T6o;
Chris@42 1724 }
Chris@42 1725 {
Chris@42 1726 E T6h, T6i, T6j, T6m;
Chris@42 1727 T6h = T6f - T6g;
Chris@42 1728 T6i = T6d - T6a;
Chris@42 1729 Rm[WS(rs, 1)] = T6h - T6i;
Chris@42 1730 Rp[WS(rs, 14)] = T6h + T6i;
Chris@42 1731 T6j = T66 - T65;
Chris@42 1732 T6m = T6k - T6l;
Chris@42 1733 Ip[WS(rs, 14)] = T6j + T6m;
Chris@42 1734 Im[WS(rs, 1)] = T6m - T6j;
Chris@42 1735 }
Chris@42 1736 }
Chris@42 1737 {
Chris@42 1738 E T6D, T7W, T6O, T7M, T7C, T7L, T7z, T7V, T7r, T81, T7H, T7T, T78, T80, T7G;
Chris@42 1739 E T7Q;
Chris@42 1740 {
Chris@42 1741 E T6v, T6C, T7v, T7y;
Chris@42 1742 T6v = FNMS(KP191341716, T6u, KP461939766 * T6r);
Chris@42 1743 T6C = FMA(KP461939766, T6y, KP191341716 * T6B);
Chris@42 1744 T6D = T6v + T6C;
Chris@42 1745 T7W = T6v - T6C;
Chris@42 1746 {
Chris@42 1747 E T6K, T6N, T7A, T7B;
Chris@42 1748 T6K = KP353553390 * (T6G + T6J);
Chris@42 1749 T6N = KP500000000 * (T6L - T6M);
Chris@42 1750 T6O = T6K + T6N;
Chris@42 1751 T7M = T6N - T6K;
Chris@42 1752 T7A = FMA(KP191341716, T6r, KP461939766 * T6u);
Chris@42 1753 T7B = FNMS(KP191341716, T6y, KP461939766 * T6B);
Chris@42 1754 T7C = T7A + T7B;
Chris@42 1755 T7L = T7B - T7A;
Chris@42 1756 }
Chris@42 1757 T7v = KP500000000 * (T7t + T7u);
Chris@42 1758 T7y = KP353553390 * (T7w + T7x);
Chris@42 1759 T7z = T7v + T7y;
Chris@42 1760 T7V = T7v - T7y;
Chris@42 1761 {
Chris@42 1762 E T7j, T7R, T7q, T7S, T7f, T7m;
Chris@42 1763 T7f = KP707106781 * (T7b + T7e);
Chris@42 1764 T7j = T7f + T7i;
Chris@42 1765 T7R = T7i - T7f;
Chris@42 1766 T7m = KP707106781 * (T7k + T7l);
Chris@42 1767 T7q = T7m + T7p;
Chris@42 1768 T7S = T7p - T7m;
Chris@42 1769 T7r = FNMS(KP097545161, T7q, KP490392640 * T7j);
Chris@42 1770 T81 = FMA(KP415734806, T7R, KP277785116 * T7S);
Chris@42 1771 T7H = FMA(KP097545161, T7j, KP490392640 * T7q);
Chris@42 1772 T7T = FNMS(KP415734806, T7S, KP277785116 * T7R);
Chris@42 1773 }
Chris@42 1774 {
Chris@42 1775 E T70, T7O, T77, T7P, T6W, T73;
Chris@42 1776 T6W = KP707106781 * (T6S + T6V);
Chris@42 1777 T70 = T6W + T6Z;
Chris@42 1778 T7O = T6Z - T6W;
Chris@42 1779 T73 = KP707106781 * (T71 + T72);
Chris@42 1780 T77 = T73 + T76;
Chris@42 1781 T7P = T76 - T73;
Chris@42 1782 T78 = FMA(KP490392640, T70, KP097545161 * T77);
Chris@42 1783 T80 = FNMS(KP415734806, T7O, KP277785116 * T7P);
Chris@42 1784 T7G = FNMS(KP097545161, T70, KP490392640 * T77);
Chris@42 1785 T7Q = FMA(KP277785116, T7O, KP415734806 * T7P);
Chris@42 1786 }
Chris@42 1787 }
Chris@42 1788 {
Chris@42 1789 E T6P, T7s, T7J, T7K;
Chris@42 1790 T6P = T6D + T6O;
Chris@42 1791 T7s = T78 + T7r;
Chris@42 1792 Ip[WS(rs, 1)] = T6P + T7s;
Chris@42 1793 Im[WS(rs, 14)] = T7s - T6P;
Chris@42 1794 T7J = T7z + T7C;
Chris@42 1795 T7K = T7G + T7H;
Chris@42 1796 Rm[WS(rs, 14)] = T7J - T7K;
Chris@42 1797 Rp[WS(rs, 1)] = T7J + T7K;
Chris@42 1798 }
Chris@42 1799 {
Chris@42 1800 E T7D, T7E, T7F, T7I;
Chris@42 1801 T7D = T7z - T7C;
Chris@42 1802 T7E = T7r - T78;
Chris@42 1803 Rm[WS(rs, 6)] = T7D - T7E;
Chris@42 1804 Rp[WS(rs, 9)] = T7D + T7E;
Chris@42 1805 T7F = T6O - T6D;
Chris@42 1806 T7I = T7G - T7H;
Chris@42 1807 Ip[WS(rs, 9)] = T7F + T7I;
Chris@42 1808 Im[WS(rs, 6)] = T7I - T7F;
Chris@42 1809 }
Chris@42 1810 {
Chris@42 1811 E T7N, T7U, T83, T84;
Chris@42 1812 T7N = T7L + T7M;
Chris@42 1813 T7U = T7Q + T7T;
Chris@42 1814 Ip[WS(rs, 5)] = T7N + T7U;
Chris@42 1815 Im[WS(rs, 10)] = T7U - T7N;
Chris@42 1816 T83 = T7V + T7W;
Chris@42 1817 T84 = T80 + T81;
Chris@42 1818 Rm[WS(rs, 10)] = T83 - T84;
Chris@42 1819 Rp[WS(rs, 5)] = T83 + T84;
Chris@42 1820 }
Chris@42 1821 {
Chris@42 1822 E T7X, T7Y, T7Z, T82;
Chris@42 1823 T7X = T7V - T7W;
Chris@42 1824 T7Y = T7T - T7Q;
Chris@42 1825 Rm[WS(rs, 2)] = T7X - T7Y;
Chris@42 1826 Rp[WS(rs, 13)] = T7X + T7Y;
Chris@42 1827 T7Z = T7M - T7L;
Chris@42 1828 T82 = T80 - T81;
Chris@42 1829 Ip[WS(rs, 13)] = T7Z + T82;
Chris@42 1830 Im[WS(rs, 2)] = T82 - T7Z;
Chris@42 1831 }
Chris@42 1832 }
Chris@42 1833 {
Chris@42 1834 E T8b, T8U, T8e, T8K, T8A, T8J, T8x, T8T, T8t, T8Z, T8F, T8R, T8m, T8Y, T8E;
Chris@42 1835 E T8O;
Chris@42 1836 {
Chris@42 1837 E T87, T8a, T8v, T8w;
Chris@42 1838 T87 = FNMS(KP461939766, T86, KP191341716 * T85);
Chris@42 1839 T8a = FMA(KP191341716, T88, KP461939766 * T89);
Chris@42 1840 T8b = T87 + T8a;
Chris@42 1841 T8U = T87 - T8a;
Chris@42 1842 {
Chris@42 1843 E T8c, T8d, T8y, T8z;
Chris@42 1844 T8c = KP353553390 * (T7x - T7w);
Chris@42 1845 T8d = KP500000000 * (T6M + T6L);
Chris@42 1846 T8e = T8c + T8d;
Chris@42 1847 T8K = T8d - T8c;
Chris@42 1848 T8y = FMA(KP461939766, T85, KP191341716 * T86);
Chris@42 1849 T8z = FNMS(KP461939766, T88, KP191341716 * T89);
Chris@42 1850 T8A = T8y + T8z;
Chris@42 1851 T8J = T8z - T8y;
Chris@42 1852 }
Chris@42 1853 T8v = KP500000000 * (T7t - T7u);
Chris@42 1854 T8w = KP353553390 * (T6G - T6J);
Chris@42 1855 T8x = T8v + T8w;
Chris@42 1856 T8T = T8v - T8w;
Chris@42 1857 {
Chris@42 1858 E T8p, T8P, T8s, T8Q, T8n, T8q;
Chris@42 1859 T8n = KP707106781 * (T7l - T7k);
Chris@42 1860 T8p = T8n + T8o;
Chris@42 1861 T8P = T8o - T8n;
Chris@42 1862 T8q = KP707106781 * (T7b - T7e);
Chris@42 1863 T8s = T8q + T8r;
Chris@42 1864 T8Q = T8r - T8q;
Chris@42 1865 T8t = FNMS(KP277785116, T8s, KP415734806 * T8p);
Chris@42 1866 T8Z = FMA(KP490392640, T8P, KP097545161 * T8Q);
Chris@42 1867 T8F = FMA(KP277785116, T8p, KP415734806 * T8s);
Chris@42 1868 T8R = FNMS(KP490392640, T8Q, KP097545161 * T8P);
Chris@42 1869 }
Chris@42 1870 {
Chris@42 1871 E T8i, T8M, T8l, T8N, T8g, T8j;
Chris@42 1872 T8g = KP707106781 * (T72 - T71);
Chris@42 1873 T8i = T8g + T8h;
Chris@42 1874 T8M = T8h - T8g;
Chris@42 1875 T8j = KP707106781 * (T6S - T6V);
Chris@42 1876 T8l = T8j + T8k;
Chris@42 1877 T8N = T8k - T8j;
Chris@42 1878 T8m = FMA(KP415734806, T8i, KP277785116 * T8l);
Chris@42 1879 T8Y = FNMS(KP490392640, T8M, KP097545161 * T8N);
Chris@42 1880 T8E = FNMS(KP277785116, T8i, KP415734806 * T8l);
Chris@42 1881 T8O = FMA(KP097545161, T8M, KP490392640 * T8N);
Chris@42 1882 }
Chris@42 1883 }
Chris@42 1884 {
Chris@42 1885 E T8f, T8u, T8H, T8I;
Chris@42 1886 T8f = T8b + T8e;
Chris@42 1887 T8u = T8m + T8t;
Chris@42 1888 Ip[WS(rs, 3)] = T8f + T8u;
Chris@42 1889 Im[WS(rs, 12)] = T8u - T8f;
Chris@42 1890 T8H = T8x + T8A;
Chris@42 1891 T8I = T8E + T8F;
Chris@42 1892 Rm[WS(rs, 12)] = T8H - T8I;
Chris@42 1893 Rp[WS(rs, 3)] = T8H + T8I;
Chris@42 1894 }
Chris@42 1895 {
Chris@42 1896 E T8B, T8C, T8D, T8G;
Chris@42 1897 T8B = T8x - T8A;
Chris@42 1898 T8C = T8t - T8m;
Chris@42 1899 Rm[WS(rs, 4)] = T8B - T8C;
Chris@42 1900 Rp[WS(rs, 11)] = T8B + T8C;
Chris@42 1901 T8D = T8e - T8b;
Chris@42 1902 T8G = T8E - T8F;
Chris@42 1903 Ip[WS(rs, 11)] = T8D + T8G;
Chris@42 1904 Im[WS(rs, 4)] = T8G - T8D;
Chris@42 1905 }
Chris@42 1906 {
Chris@42 1907 E T8L, T8S, T91, T92;
Chris@42 1908 T8L = T8J + T8K;
Chris@42 1909 T8S = T8O + T8R;
Chris@42 1910 Ip[WS(rs, 7)] = T8L + T8S;
Chris@42 1911 Im[WS(rs, 8)] = T8S - T8L;
Chris@42 1912 T91 = T8T + T8U;
Chris@42 1913 T92 = T8Y + T8Z;
Chris@42 1914 Rm[WS(rs, 8)] = T91 - T92;
Chris@42 1915 Rp[WS(rs, 7)] = T91 + T92;
Chris@42 1916 }
Chris@42 1917 {
Chris@42 1918 E T8V, T8W, T8X, T90;
Chris@42 1919 T8V = T8T - T8U;
Chris@42 1920 T8W = T8R - T8O;
Chris@42 1921 Rm[0] = T8V - T8W;
Chris@42 1922 Rp[WS(rs, 15)] = T8V + T8W;
Chris@42 1923 T8X = T8K - T8J;
Chris@42 1924 T90 = T8Y - T8Z;
Chris@42 1925 Ip[WS(rs, 15)] = T8X + T90;
Chris@42 1926 Im[0] = T90 - T8X;
Chris@42 1927 }
Chris@42 1928 }
Chris@42 1929 }
Chris@42 1930 }
Chris@42 1931 }
Chris@42 1932
Chris@42 1933 static const tw_instr twinstr[] = {
Chris@42 1934 {TW_FULL, 1, 32},
Chris@42 1935 {TW_NEXT, 1, 0}
Chris@42 1936 };
Chris@42 1937
Chris@42 1938 static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, {404, 134, 94, 0} };
Chris@42 1939
Chris@42 1940 void X(codelet_hc2cfdft_32) (planner *p) {
Chris@42 1941 X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT);
Chris@42 1942 }
Chris@42 1943 #endif /* HAVE_FMA */