annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cf_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:59 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cf_32 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@82 33 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cf_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 40 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 41 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 49 E T8, T8x, T3w, T87, Tl, T8y, T3B, T83, Tz, T6F, T3J, T5T, TM, T6G, T3Q;
Chris@82 50 E T5U, T11, T1e, T6M, T6J, T6K, T6L, T3Z, T5Y, T46, T5X, T1s, T1F, T6O, T6P;
Chris@82 51 E T6Q, T6R, T4e, T61, T4l, T60, T32, T7b, T79, T7N, T54, T6c, T5r, T6f, T29;
Chris@82 52 E T70, T6X, T7I, T4v, T65, T4S, T68, T3t, T76, T7e, T7O, T5b, T5s, T5i, T5t;
Chris@82 53 E T2A, T6Y, T73, T7J, T4C, T4T, T4J, T4U;
Chris@82 54 {
Chris@82 55 E T1, T86, T3, T6, T4, T84, T2, T7, T85, T5;
Chris@82 56 T1 = Rp[0];
Chris@82 57 T86 = Rm[0];
Chris@82 58 T3 = Rp[WS(rs, 8)];
Chris@82 59 T6 = Rm[WS(rs, 8)];
Chris@82 60 T2 = W[30];
Chris@82 61 T4 = T2 * T3;
Chris@82 62 T84 = T2 * T6;
Chris@82 63 T5 = W[31];
Chris@82 64 T7 = FMA(T5, T6, T4);
Chris@82 65 T85 = FNMS(T5, T3, T84);
Chris@82 66 T8 = T1 + T7;
Chris@82 67 T8x = T86 - T85;
Chris@82 68 T3w = T1 - T7;
Chris@82 69 T87 = T85 + T86;
Chris@82 70 }
Chris@82 71 {
Chris@82 72 E Ta, Td, Tb, T3x, Tg, Tj, Th, T3z, T9, Tf;
Chris@82 73 Ta = Rp[WS(rs, 4)];
Chris@82 74 Td = Rm[WS(rs, 4)];
Chris@82 75 T9 = W[14];
Chris@82 76 Tb = T9 * Ta;
Chris@82 77 T3x = T9 * Td;
Chris@82 78 Tg = Rp[WS(rs, 12)];
Chris@82 79 Tj = Rm[WS(rs, 12)];
Chris@82 80 Tf = W[46];
Chris@82 81 Th = Tf * Tg;
Chris@82 82 T3z = Tf * Tj;
Chris@82 83 {
Chris@82 84 E Te, T3y, Tk, T3A, Tc, Ti;
Chris@82 85 Tc = W[15];
Chris@82 86 Te = FMA(Tc, Td, Tb);
Chris@82 87 T3y = FNMS(Tc, Ta, T3x);
Chris@82 88 Ti = W[47];
Chris@82 89 Tk = FMA(Ti, Tj, Th);
Chris@82 90 T3A = FNMS(Ti, Tg, T3z);
Chris@82 91 Tl = Te + Tk;
Chris@82 92 T8y = Te - Tk;
Chris@82 93 T3B = T3y - T3A;
Chris@82 94 T83 = T3y + T3A;
Chris@82 95 }
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E Ts, T3F, Ty, T3H, T3D, T3I;
Chris@82 99 {
Chris@82 100 E To, Tr, Tp, T3E, Tn, Tq;
Chris@82 101 To = Rp[WS(rs, 2)];
Chris@82 102 Tr = Rm[WS(rs, 2)];
Chris@82 103 Tn = W[6];
Chris@82 104 Tp = Tn * To;
Chris@82 105 T3E = Tn * Tr;
Chris@82 106 Tq = W[7];
Chris@82 107 Ts = FMA(Tq, Tr, Tp);
Chris@82 108 T3F = FNMS(Tq, To, T3E);
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E Tu, Tx, Tv, T3G, Tt, Tw;
Chris@82 112 Tu = Rp[WS(rs, 10)];
Chris@82 113 Tx = Rm[WS(rs, 10)];
Chris@82 114 Tt = W[38];
Chris@82 115 Tv = Tt * Tu;
Chris@82 116 T3G = Tt * Tx;
Chris@82 117 Tw = W[39];
Chris@82 118 Ty = FMA(Tw, Tx, Tv);
Chris@82 119 T3H = FNMS(Tw, Tu, T3G);
Chris@82 120 }
Chris@82 121 Tz = Ts + Ty;
Chris@82 122 T6F = T3F + T3H;
Chris@82 123 T3D = Ts - Ty;
Chris@82 124 T3I = T3F - T3H;
Chris@82 125 T3J = T3D + T3I;
Chris@82 126 T5T = T3I - T3D;
Chris@82 127 }
Chris@82 128 {
Chris@82 129 E TF, T3M, TL, T3O, T3K, T3P;
Chris@82 130 {
Chris@82 131 E TB, TE, TC, T3L, TA, TD;
Chris@82 132 TB = Rp[WS(rs, 14)];
Chris@82 133 TE = Rm[WS(rs, 14)];
Chris@82 134 TA = W[54];
Chris@82 135 TC = TA * TB;
Chris@82 136 T3L = TA * TE;
Chris@82 137 TD = W[55];
Chris@82 138 TF = FMA(TD, TE, TC);
Chris@82 139 T3M = FNMS(TD, TB, T3L);
Chris@82 140 }
Chris@82 141 {
Chris@82 142 E TH, TK, TI, T3N, TG, TJ;
Chris@82 143 TH = Rp[WS(rs, 6)];
Chris@82 144 TK = Rm[WS(rs, 6)];
Chris@82 145 TG = W[22];
Chris@82 146 TI = TG * TH;
Chris@82 147 T3N = TG * TK;
Chris@82 148 TJ = W[23];
Chris@82 149 TL = FMA(TJ, TK, TI);
Chris@82 150 T3O = FNMS(TJ, TH, T3N);
Chris@82 151 }
Chris@82 152 TM = TF + TL;
Chris@82 153 T6G = T3M + T3O;
Chris@82 154 T3K = TF - TL;
Chris@82 155 T3P = T3M - T3O;
Chris@82 156 T3Q = T3K - T3P;
Chris@82 157 T5U = T3K + T3P;
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E TU, T41, T1d, T3X, T10, T43, T17, T3V;
Chris@82 161 {
Chris@82 162 E TQ, TT, TR, T40, TP, TS;
Chris@82 163 TQ = Rp[WS(rs, 1)];
Chris@82 164 TT = Rm[WS(rs, 1)];
Chris@82 165 TP = W[2];
Chris@82 166 TR = TP * TQ;
Chris@82 167 T40 = TP * TT;
Chris@82 168 TS = W[3];
Chris@82 169 TU = FMA(TS, TT, TR);
Chris@82 170 T41 = FNMS(TS, TQ, T40);
Chris@82 171 }
Chris@82 172 {
Chris@82 173 E T19, T1c, T1a, T3W, T18, T1b;
Chris@82 174 T19 = Rp[WS(rs, 13)];
Chris@82 175 T1c = Rm[WS(rs, 13)];
Chris@82 176 T18 = W[50];
Chris@82 177 T1a = T18 * T19;
Chris@82 178 T3W = T18 * T1c;
Chris@82 179 T1b = W[51];
Chris@82 180 T1d = FMA(T1b, T1c, T1a);
Chris@82 181 T3X = FNMS(T1b, T19, T3W);
Chris@82 182 }
Chris@82 183 {
Chris@82 184 E TW, TZ, TX, T42, TV, TY;
Chris@82 185 TW = Rp[WS(rs, 9)];
Chris@82 186 TZ = Rm[WS(rs, 9)];
Chris@82 187 TV = W[34];
Chris@82 188 TX = TV * TW;
Chris@82 189 T42 = TV * TZ;
Chris@82 190 TY = W[35];
Chris@82 191 T10 = FMA(TY, TZ, TX);
Chris@82 192 T43 = FNMS(TY, TW, T42);
Chris@82 193 }
Chris@82 194 {
Chris@82 195 E T13, T16, T14, T3U, T12, T15;
Chris@82 196 T13 = Rp[WS(rs, 5)];
Chris@82 197 T16 = Rm[WS(rs, 5)];
Chris@82 198 T12 = W[18];
Chris@82 199 T14 = T12 * T13;
Chris@82 200 T3U = T12 * T16;
Chris@82 201 T15 = W[19];
Chris@82 202 T17 = FMA(T15, T16, T14);
Chris@82 203 T3V = FNMS(T15, T13, T3U);
Chris@82 204 }
Chris@82 205 T11 = TU + T10;
Chris@82 206 T1e = T17 + T1d;
Chris@82 207 T6M = T11 - T1e;
Chris@82 208 T6J = T41 + T43;
Chris@82 209 T6K = T3V + T3X;
Chris@82 210 T6L = T6J - T6K;
Chris@82 211 {
Chris@82 212 E T3T, T3Y, T44, T45;
Chris@82 213 T3T = TU - T10;
Chris@82 214 T3Y = T3V - T3X;
Chris@82 215 T3Z = T3T + T3Y;
Chris@82 216 T5Y = T3T - T3Y;
Chris@82 217 T44 = T41 - T43;
Chris@82 218 T45 = T17 - T1d;
Chris@82 219 T46 = T44 - T45;
Chris@82 220 T5X = T44 + T45;
Chris@82 221 }
Chris@82 222 }
Chris@82 223 {
Chris@82 224 E T1l, T4g, T1E, T4c, T1r, T4i, T1y, T4a;
Chris@82 225 {
Chris@82 226 E T1h, T1k, T1i, T4f, T1g, T1j;
Chris@82 227 T1h = Rp[WS(rs, 15)];
Chris@82 228 T1k = Rm[WS(rs, 15)];
Chris@82 229 T1g = W[58];
Chris@82 230 T1i = T1g * T1h;
Chris@82 231 T4f = T1g * T1k;
Chris@82 232 T1j = W[59];
Chris@82 233 T1l = FMA(T1j, T1k, T1i);
Chris@82 234 T4g = FNMS(T1j, T1h, T4f);
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E T1A, T1D, T1B, T4b, T1z, T1C;
Chris@82 238 T1A = Rp[WS(rs, 11)];
Chris@82 239 T1D = Rm[WS(rs, 11)];
Chris@82 240 T1z = W[42];
Chris@82 241 T1B = T1z * T1A;
Chris@82 242 T4b = T1z * T1D;
Chris@82 243 T1C = W[43];
Chris@82 244 T1E = FMA(T1C, T1D, T1B);
Chris@82 245 T4c = FNMS(T1C, T1A, T4b);
Chris@82 246 }
Chris@82 247 {
Chris@82 248 E T1n, T1q, T1o, T4h, T1m, T1p;
Chris@82 249 T1n = Rp[WS(rs, 7)];
Chris@82 250 T1q = Rm[WS(rs, 7)];
Chris@82 251 T1m = W[26];
Chris@82 252 T1o = T1m * T1n;
Chris@82 253 T4h = T1m * T1q;
Chris@82 254 T1p = W[27];
Chris@82 255 T1r = FMA(T1p, T1q, T1o);
Chris@82 256 T4i = FNMS(T1p, T1n, T4h);
Chris@82 257 }
Chris@82 258 {
Chris@82 259 E T1u, T1x, T1v, T49, T1t, T1w;
Chris@82 260 T1u = Rp[WS(rs, 3)];
Chris@82 261 T1x = Rm[WS(rs, 3)];
Chris@82 262 T1t = W[10];
Chris@82 263 T1v = T1t * T1u;
Chris@82 264 T49 = T1t * T1x;
Chris@82 265 T1w = W[11];
Chris@82 266 T1y = FMA(T1w, T1x, T1v);
Chris@82 267 T4a = FNMS(T1w, T1u, T49);
Chris@82 268 }
Chris@82 269 T1s = T1l + T1r;
Chris@82 270 T1F = T1y + T1E;
Chris@82 271 T6O = T1s - T1F;
Chris@82 272 T6P = T4g + T4i;
Chris@82 273 T6Q = T4a + T4c;
Chris@82 274 T6R = T6P - T6Q;
Chris@82 275 {
Chris@82 276 E T48, T4d, T4j, T4k;
Chris@82 277 T48 = T1l - T1r;
Chris@82 278 T4d = T4a - T4c;
Chris@82 279 T4e = T48 + T4d;
Chris@82 280 T61 = T48 - T4d;
Chris@82 281 T4j = T4g - T4i;
Chris@82 282 T4k = T1y - T1E;
Chris@82 283 T4l = T4j - T4k;
Chris@82 284 T60 = T4j + T4k;
Chris@82 285 }
Chris@82 286 }
Chris@82 287 {
Chris@82 288 E T2H, T5m, T30, T52, T2N, T5o, T2U, T50;
Chris@82 289 {
Chris@82 290 E T2D, T2G, T2E, T5l, T2C, T2F;
Chris@82 291 T2D = Ip[WS(rs, 15)];
Chris@82 292 T2G = Im[WS(rs, 15)];
Chris@82 293 T2C = W[60];
Chris@82 294 T2E = T2C * T2D;
Chris@82 295 T5l = T2C * T2G;
Chris@82 296 T2F = W[61];
Chris@82 297 T2H = FMA(T2F, T2G, T2E);
Chris@82 298 T5m = FNMS(T2F, T2D, T5l);
Chris@82 299 }
Chris@82 300 {
Chris@82 301 E T2W, T2Z, T2X, T51, T2V, T2Y;
Chris@82 302 T2W = Ip[WS(rs, 11)];
Chris@82 303 T2Z = Im[WS(rs, 11)];
Chris@82 304 T2V = W[44];
Chris@82 305 T2X = T2V * T2W;
Chris@82 306 T51 = T2V * T2Z;
Chris@82 307 T2Y = W[45];
Chris@82 308 T30 = FMA(T2Y, T2Z, T2X);
Chris@82 309 T52 = FNMS(T2Y, T2W, T51);
Chris@82 310 }
Chris@82 311 {
Chris@82 312 E T2J, T2M, T2K, T5n, T2I, T2L;
Chris@82 313 T2J = Ip[WS(rs, 7)];
Chris@82 314 T2M = Im[WS(rs, 7)];
Chris@82 315 T2I = W[28];
Chris@82 316 T2K = T2I * T2J;
Chris@82 317 T5n = T2I * T2M;
Chris@82 318 T2L = W[29];
Chris@82 319 T2N = FMA(T2L, T2M, T2K);
Chris@82 320 T5o = FNMS(T2L, T2J, T5n);
Chris@82 321 }
Chris@82 322 {
Chris@82 323 E T2Q, T2T, T2R, T4Z, T2P, T2S;
Chris@82 324 T2Q = Ip[WS(rs, 3)];
Chris@82 325 T2T = Im[WS(rs, 3)];
Chris@82 326 T2P = W[12];
Chris@82 327 T2R = T2P * T2Q;
Chris@82 328 T4Z = T2P * T2T;
Chris@82 329 T2S = W[13];
Chris@82 330 T2U = FMA(T2S, T2T, T2R);
Chris@82 331 T50 = FNMS(T2S, T2Q, T4Z);
Chris@82 332 }
Chris@82 333 {
Chris@82 334 E T2O, T31, T77, T78;
Chris@82 335 T2O = T2H + T2N;
Chris@82 336 T31 = T2U + T30;
Chris@82 337 T32 = T2O + T31;
Chris@82 338 T7b = T2O - T31;
Chris@82 339 T77 = T5m + T5o;
Chris@82 340 T78 = T50 + T52;
Chris@82 341 T79 = T77 - T78;
Chris@82 342 T7N = T77 + T78;
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T4Y, T53, T5p, T5q;
Chris@82 346 T4Y = T2H - T2N;
Chris@82 347 T53 = T50 - T52;
Chris@82 348 T54 = T4Y + T53;
Chris@82 349 T6c = T4Y - T53;
Chris@82 350 T5p = T5m - T5o;
Chris@82 351 T5q = T30 - T2U;
Chris@82 352 T5r = T5p + T5q;
Chris@82 353 T6f = T5q - T5p;
Chris@82 354 }
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E T1O, T4N, T27, T4t, T1U, T4P, T21, T4r;
Chris@82 358 {
Chris@82 359 E T1K, T1N, T1L, T4M, T1J, T1M;
Chris@82 360 T1K = Ip[0];
Chris@82 361 T1N = Im[0];
Chris@82 362 T1J = W[0];
Chris@82 363 T1L = T1J * T1K;
Chris@82 364 T4M = T1J * T1N;
Chris@82 365 T1M = W[1];
Chris@82 366 T1O = FMA(T1M, T1N, T1L);
Chris@82 367 T4N = FNMS(T1M, T1K, T4M);
Chris@82 368 }
Chris@82 369 {
Chris@82 370 E T23, T26, T24, T4s, T22, T25;
Chris@82 371 T23 = Ip[WS(rs, 12)];
Chris@82 372 T26 = Im[WS(rs, 12)];
Chris@82 373 T22 = W[48];
Chris@82 374 T24 = T22 * T23;
Chris@82 375 T4s = T22 * T26;
Chris@82 376 T25 = W[49];
Chris@82 377 T27 = FMA(T25, T26, T24);
Chris@82 378 T4t = FNMS(T25, T23, T4s);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E T1Q, T1T, T1R, T4O, T1P, T1S;
Chris@82 382 T1Q = Ip[WS(rs, 8)];
Chris@82 383 T1T = Im[WS(rs, 8)];
Chris@82 384 T1P = W[32];
Chris@82 385 T1R = T1P * T1Q;
Chris@82 386 T4O = T1P * T1T;
Chris@82 387 T1S = W[33];
Chris@82 388 T1U = FMA(T1S, T1T, T1R);
Chris@82 389 T4P = FNMS(T1S, T1Q, T4O);
Chris@82 390 }
Chris@82 391 {
Chris@82 392 E T1X, T20, T1Y, T4q, T1W, T1Z;
Chris@82 393 T1X = Ip[WS(rs, 4)];
Chris@82 394 T20 = Im[WS(rs, 4)];
Chris@82 395 T1W = W[16];
Chris@82 396 T1Y = T1W * T1X;
Chris@82 397 T4q = T1W * T20;
Chris@82 398 T1Z = W[17];
Chris@82 399 T21 = FMA(T1Z, T20, T1Y);
Chris@82 400 T4r = FNMS(T1Z, T1X, T4q);
Chris@82 401 }
Chris@82 402 {
Chris@82 403 E T1V, T28, T6V, T6W;
Chris@82 404 T1V = T1O + T1U;
Chris@82 405 T28 = T21 + T27;
Chris@82 406 T29 = T1V + T28;
Chris@82 407 T70 = T1V - T28;
Chris@82 408 T6V = T4N + T4P;
Chris@82 409 T6W = T4r + T4t;
Chris@82 410 T6X = T6V - T6W;
Chris@82 411 T7I = T6V + T6W;
Chris@82 412 }
Chris@82 413 {
Chris@82 414 E T4p, T4u, T4Q, T4R;
Chris@82 415 T4p = T1O - T1U;
Chris@82 416 T4u = T4r - T4t;
Chris@82 417 T4v = T4p + T4u;
Chris@82 418 T65 = T4p - T4u;
Chris@82 419 T4Q = T4N - T4P;
Chris@82 420 T4R = T21 - T27;
Chris@82 421 T4S = T4Q - T4R;
Chris@82 422 T68 = T4Q + T4R;
Chris@82 423 }
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T38, T57, T3r, T5g, T3e, T59, T3l, T5e;
Chris@82 427 {
Chris@82 428 E T34, T37, T35, T56, T33, T36;
Chris@82 429 T34 = Ip[WS(rs, 1)];
Chris@82 430 T37 = Im[WS(rs, 1)];
Chris@82 431 T33 = W[4];
Chris@82 432 T35 = T33 * T34;
Chris@82 433 T56 = T33 * T37;
Chris@82 434 T36 = W[5];
Chris@82 435 T38 = FMA(T36, T37, T35);
Chris@82 436 T57 = FNMS(T36, T34, T56);
Chris@82 437 }
Chris@82 438 {
Chris@82 439 E T3n, T3q, T3o, T5f, T3m, T3p;
Chris@82 440 T3n = Ip[WS(rs, 5)];
Chris@82 441 T3q = Im[WS(rs, 5)];
Chris@82 442 T3m = W[20];
Chris@82 443 T3o = T3m * T3n;
Chris@82 444 T5f = T3m * T3q;
Chris@82 445 T3p = W[21];
Chris@82 446 T3r = FMA(T3p, T3q, T3o);
Chris@82 447 T5g = FNMS(T3p, T3n, T5f);
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T3a, T3d, T3b, T58, T39, T3c;
Chris@82 451 T3a = Ip[WS(rs, 9)];
Chris@82 452 T3d = Im[WS(rs, 9)];
Chris@82 453 T39 = W[36];
Chris@82 454 T3b = T39 * T3a;
Chris@82 455 T58 = T39 * T3d;
Chris@82 456 T3c = W[37];
Chris@82 457 T3e = FMA(T3c, T3d, T3b);
Chris@82 458 T59 = FNMS(T3c, T3a, T58);
Chris@82 459 }
Chris@82 460 {
Chris@82 461 E T3h, T3k, T3i, T5d, T3g, T3j;
Chris@82 462 T3h = Ip[WS(rs, 13)];
Chris@82 463 T3k = Im[WS(rs, 13)];
Chris@82 464 T3g = W[52];
Chris@82 465 T3i = T3g * T3h;
Chris@82 466 T5d = T3g * T3k;
Chris@82 467 T3j = W[53];
Chris@82 468 T3l = FMA(T3j, T3k, T3i);
Chris@82 469 T5e = FNMS(T3j, T3h, T5d);
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E T3f, T3s, T7c, T7d;
Chris@82 473 T3f = T38 + T3e;
Chris@82 474 T3s = T3l + T3r;
Chris@82 475 T3t = T3f + T3s;
Chris@82 476 T76 = T3s - T3f;
Chris@82 477 T7c = T57 + T59;
Chris@82 478 T7d = T5e + T5g;
Chris@82 479 T7e = T7c - T7d;
Chris@82 480 T7O = T7c + T7d;
Chris@82 481 }
Chris@82 482 {
Chris@82 483 E T55, T5a, T5c, T5h;
Chris@82 484 T55 = T38 - T3e;
Chris@82 485 T5a = T57 - T59;
Chris@82 486 T5b = T55 + T5a;
Chris@82 487 T5s = T5a - T55;
Chris@82 488 T5c = T3l - T3r;
Chris@82 489 T5h = T5e - T5g;
Chris@82 490 T5i = T5c - T5h;
Chris@82 491 T5t = T5c + T5h;
Chris@82 492 }
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T2f, T4y, T2y, T4H, T2l, T4A, T2s, T4F;
Chris@82 496 {
Chris@82 497 E T2b, T2e, T2c, T4x, T2a, T2d;
Chris@82 498 T2b = Ip[WS(rs, 2)];
Chris@82 499 T2e = Im[WS(rs, 2)];
Chris@82 500 T2a = W[8];
Chris@82 501 T2c = T2a * T2b;
Chris@82 502 T4x = T2a * T2e;
Chris@82 503 T2d = W[9];
Chris@82 504 T2f = FMA(T2d, T2e, T2c);
Chris@82 505 T4y = FNMS(T2d, T2b, T4x);
Chris@82 506 }
Chris@82 507 {
Chris@82 508 E T2u, T2x, T2v, T4G, T2t, T2w;
Chris@82 509 T2u = Ip[WS(rs, 6)];
Chris@82 510 T2x = Im[WS(rs, 6)];
Chris@82 511 T2t = W[24];
Chris@82 512 T2v = T2t * T2u;
Chris@82 513 T4G = T2t * T2x;
Chris@82 514 T2w = W[25];
Chris@82 515 T2y = FMA(T2w, T2x, T2v);
Chris@82 516 T4H = FNMS(T2w, T2u, T4G);
Chris@82 517 }
Chris@82 518 {
Chris@82 519 E T2h, T2k, T2i, T4z, T2g, T2j;
Chris@82 520 T2h = Ip[WS(rs, 10)];
Chris@82 521 T2k = Im[WS(rs, 10)];
Chris@82 522 T2g = W[40];
Chris@82 523 T2i = T2g * T2h;
Chris@82 524 T4z = T2g * T2k;
Chris@82 525 T2j = W[41];
Chris@82 526 T2l = FMA(T2j, T2k, T2i);
Chris@82 527 T4A = FNMS(T2j, T2h, T4z);
Chris@82 528 }
Chris@82 529 {
Chris@82 530 E T2o, T2r, T2p, T4E, T2n, T2q;
Chris@82 531 T2o = Ip[WS(rs, 14)];
Chris@82 532 T2r = Im[WS(rs, 14)];
Chris@82 533 T2n = W[56];
Chris@82 534 T2p = T2n * T2o;
Chris@82 535 T4E = T2n * T2r;
Chris@82 536 T2q = W[57];
Chris@82 537 T2s = FMA(T2q, T2r, T2p);
Chris@82 538 T4F = FNMS(T2q, T2o, T4E);
Chris@82 539 }
Chris@82 540 {
Chris@82 541 E T2m, T2z, T71, T72;
Chris@82 542 T2m = T2f + T2l;
Chris@82 543 T2z = T2s + T2y;
Chris@82 544 T2A = T2m + T2z;
Chris@82 545 T6Y = T2z - T2m;
Chris@82 546 T71 = T4y + T4A;
Chris@82 547 T72 = T4F + T4H;
Chris@82 548 T73 = T71 - T72;
Chris@82 549 T7J = T71 + T72;
Chris@82 550 }
Chris@82 551 {
Chris@82 552 E T4w, T4B, T4D, T4I;
Chris@82 553 T4w = T2f - T2l;
Chris@82 554 T4B = T4y - T4A;
Chris@82 555 T4C = T4w + T4B;
Chris@82 556 T4T = T4B - T4w;
Chris@82 557 T4D = T2s - T2y;
Chris@82 558 T4I = T4F - T4H;
Chris@82 559 T4J = T4D - T4I;
Chris@82 560 T4U = T4D + T4I;
Chris@82 561 }
Chris@82 562 }
Chris@82 563 {
Chris@82 564 E TO, T7C, T7Z, T80, T89, T8e, T1H, T8d, T3v, T8b, T7L, T7T, T7Q, T7U, T7F;
Chris@82 565 E T81;
Chris@82 566 {
Chris@82 567 E Tm, TN, T7X, T7Y;
Chris@82 568 Tm = T8 + Tl;
Chris@82 569 TN = Tz + TM;
Chris@82 570 TO = Tm + TN;
Chris@82 571 T7C = Tm - TN;
Chris@82 572 T7X = T7I + T7J;
Chris@82 573 T7Y = T7N + T7O;
Chris@82 574 T7Z = T7X - T7Y;
Chris@82 575 T80 = T7X + T7Y;
Chris@82 576 }
Chris@82 577 {
Chris@82 578 E T82, T88, T1f, T1G;
Chris@82 579 T82 = T6F + T6G;
Chris@82 580 T88 = T83 + T87;
Chris@82 581 T89 = T82 + T88;
Chris@82 582 T8e = T88 - T82;
Chris@82 583 T1f = T11 + T1e;
Chris@82 584 T1G = T1s + T1F;
Chris@82 585 T1H = T1f + T1G;
Chris@82 586 T8d = T1G - T1f;
Chris@82 587 }
Chris@82 588 {
Chris@82 589 E T2B, T3u, T7H, T7K;
Chris@82 590 T2B = T29 + T2A;
Chris@82 591 T3u = T32 + T3t;
Chris@82 592 T3v = T2B + T3u;
Chris@82 593 T8b = T3u - T2B;
Chris@82 594 T7H = T29 - T2A;
Chris@82 595 T7K = T7I - T7J;
Chris@82 596 T7L = T7H + T7K;
Chris@82 597 T7T = T7K - T7H;
Chris@82 598 }
Chris@82 599 {
Chris@82 600 E T7M, T7P, T7D, T7E;
Chris@82 601 T7M = T32 - T3t;
Chris@82 602 T7P = T7N - T7O;
Chris@82 603 T7Q = T7M - T7P;
Chris@82 604 T7U = T7M + T7P;
Chris@82 605 T7D = T6J + T6K;
Chris@82 606 T7E = T6P + T6Q;
Chris@82 607 T7F = T7D - T7E;
Chris@82 608 T81 = T7D + T7E;
Chris@82 609 }
Chris@82 610 {
Chris@82 611 E T1I, T8a, T7W, T8c;
Chris@82 612 T1I = TO + T1H;
Chris@82 613 Rm[WS(rs, 15)] = T1I - T3v;
Chris@82 614 Rp[0] = T1I + T3v;
Chris@82 615 T8a = T81 + T89;
Chris@82 616 Im[WS(rs, 15)] = T80 - T8a;
Chris@82 617 Ip[0] = T80 + T8a;
Chris@82 618 T7W = TO - T1H;
Chris@82 619 Rm[WS(rs, 7)] = T7W - T7Z;
Chris@82 620 Rp[WS(rs, 8)] = T7W + T7Z;
Chris@82 621 T8c = T89 - T81;
Chris@82 622 Im[WS(rs, 7)] = T8b - T8c;
Chris@82 623 Ip[WS(rs, 8)] = T8b + T8c;
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T7G, T7R, T8f, T8g;
Chris@82 627 T7G = T7C + T7F;
Chris@82 628 T7R = T7L + T7Q;
Chris@82 629 Rm[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G);
Chris@82 630 Rp[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
Chris@82 631 T8f = T8d + T8e;
Chris@82 632 T8g = T7T + T7U;
Chris@82 633 Im[WS(rs, 11)] = FMS(KP707106781, T8g, T8f);
Chris@82 634 Ip[WS(rs, 4)] = FMA(KP707106781, T8g, T8f);
Chris@82 635 }
Chris@82 636 {
Chris@82 637 E T7S, T7V, T8h, T8i;
Chris@82 638 T7S = T7C - T7F;
Chris@82 639 T7V = T7T - T7U;
Chris@82 640 Rm[WS(rs, 3)] = FNMS(KP707106781, T7V, T7S);
Chris@82 641 Rp[WS(rs, 12)] = FMA(KP707106781, T7V, T7S);
Chris@82 642 T8h = T8e - T8d;
Chris@82 643 T8i = T7Q - T7L;
Chris@82 644 Im[WS(rs, 3)] = FMS(KP707106781, T8i, T8h);
Chris@82 645 Ip[WS(rs, 12)] = FMA(KP707106781, T8i, T8h);
Chris@82 646 }
Chris@82 647 }
Chris@82 648 {
Chris@82 649 E T6I, T7m, T7w, T7A, T8l, T8r, T6T, T8m, T75, T7j, T7p, T8s, T7t, T7z, T7g;
Chris@82 650 E T7k;
Chris@82 651 {
Chris@82 652 E T6E, T6H, T7u, T7v;
Chris@82 653 T6E = T8 - Tl;
Chris@82 654 T6H = T6F - T6G;
Chris@82 655 T6I = T6E - T6H;
Chris@82 656 T7m = T6E + T6H;
Chris@82 657 T7u = T7b + T7e;
Chris@82 658 T7v = T79 + T76;
Chris@82 659 T7w = FNMS(KP414213562, T7v, T7u);
Chris@82 660 T7A = FMA(KP414213562, T7u, T7v);
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T8j, T8k, T6N, T6S;
Chris@82 664 T8j = TM - Tz;
Chris@82 665 T8k = T87 - T83;
Chris@82 666 T8l = T8j + T8k;
Chris@82 667 T8r = T8k - T8j;
Chris@82 668 T6N = T6L - T6M;
Chris@82 669 T6S = T6O + T6R;
Chris@82 670 T6T = T6N - T6S;
Chris@82 671 T8m = T6N + T6S;
Chris@82 672 }
Chris@82 673 {
Chris@82 674 E T6Z, T74, T7n, T7o;
Chris@82 675 T6Z = T6X - T6Y;
Chris@82 676 T74 = T70 - T73;
Chris@82 677 T75 = FMA(KP414213562, T74, T6Z);
Chris@82 678 T7j = FNMS(KP414213562, T6Z, T74);
Chris@82 679 T7n = T6M + T6L;
Chris@82 680 T7o = T6O - T6R;
Chris@82 681 T7p = T7n + T7o;
Chris@82 682 T8s = T7o - T7n;
Chris@82 683 }
Chris@82 684 {
Chris@82 685 E T7r, T7s, T7a, T7f;
Chris@82 686 T7r = T70 + T73;
Chris@82 687 T7s = T6X + T6Y;
Chris@82 688 T7t = FMA(KP414213562, T7s, T7r);
Chris@82 689 T7z = FNMS(KP414213562, T7r, T7s);
Chris@82 690 T7a = T76 - T79;
Chris@82 691 T7f = T7b - T7e;
Chris@82 692 T7g = FMA(KP414213562, T7f, T7a);
Chris@82 693 T7k = FNMS(KP414213562, T7a, T7f);
Chris@82 694 }
Chris@82 695 {
Chris@82 696 E T6U, T7h, T8t, T8u;
Chris@82 697 T6U = FMA(KP707106781, T6T, T6I);
Chris@82 698 T7h = T75 + T7g;
Chris@82 699 Rm[WS(rs, 9)] = FNMS(KP923879532, T7h, T6U);
Chris@82 700 Rp[WS(rs, 6)] = FMA(KP923879532, T7h, T6U);
Chris@82 701 T8t = FMA(KP707106781, T8s, T8r);
Chris@82 702 T8u = T7k - T7j;
Chris@82 703 Im[WS(rs, 9)] = FMS(KP923879532, T8u, T8t);
Chris@82 704 Ip[WS(rs, 6)] = FMA(KP923879532, T8u, T8t);
Chris@82 705 }
Chris@82 706 {
Chris@82 707 E T7i, T7l, T8v, T8w;
Chris@82 708 T7i = FNMS(KP707106781, T6T, T6I);
Chris@82 709 T7l = T7j + T7k;
Chris@82 710 Rp[WS(rs, 14)] = FNMS(KP923879532, T7l, T7i);
Chris@82 711 Rm[WS(rs, 1)] = FMA(KP923879532, T7l, T7i);
Chris@82 712 T8v = FNMS(KP707106781, T8s, T8r);
Chris@82 713 T8w = T7g - T75;
Chris@82 714 Im[WS(rs, 1)] = FMS(KP923879532, T8w, T8v);
Chris@82 715 Ip[WS(rs, 14)] = FMA(KP923879532, T8w, T8v);
Chris@82 716 }
Chris@82 717 {
Chris@82 718 E T7q, T7x, T8n, T8o;
Chris@82 719 T7q = FMA(KP707106781, T7p, T7m);
Chris@82 720 T7x = T7t + T7w;
Chris@82 721 Rm[WS(rs, 13)] = FNMS(KP923879532, T7x, T7q);
Chris@82 722 Rp[WS(rs, 2)] = FMA(KP923879532, T7x, T7q);
Chris@82 723 T8n = FMA(KP707106781, T8m, T8l);
Chris@82 724 T8o = T7z + T7A;
Chris@82 725 Im[WS(rs, 13)] = FMS(KP923879532, T8o, T8n);
Chris@82 726 Ip[WS(rs, 2)] = FMA(KP923879532, T8o, T8n);
Chris@82 727 }
Chris@82 728 {
Chris@82 729 E T7y, T7B, T8p, T8q;
Chris@82 730 T7y = FNMS(KP707106781, T7p, T7m);
Chris@82 731 T7B = T7z - T7A;
Chris@82 732 Rm[WS(rs, 5)] = FNMS(KP923879532, T7B, T7y);
Chris@82 733 Rp[WS(rs, 10)] = FMA(KP923879532, T7B, T7y);
Chris@82 734 T8p = FNMS(KP707106781, T8m, T8l);
Chris@82 735 T8q = T7w - T7t;
Chris@82 736 Im[WS(rs, 5)] = FMS(KP923879532, T8q, T8p);
Chris@82 737 Ip[WS(rs, 10)] = FMA(KP923879532, T8q, T8p);
Chris@82 738 }
Chris@82 739 }
Chris@82 740 {
Chris@82 741 E T3S, T5C, T4n, T8I, T8B, T8H, T5F, T8C, T5w, T5Q, T5A, T5M, T4X, T5P, T5z;
Chris@82 742 E T5J;
Chris@82 743 {
Chris@82 744 E T3C, T3R, T5D, T5E;
Chris@82 745 T3C = T3w + T3B;
Chris@82 746 T3R = T3J + T3Q;
Chris@82 747 T3S = FMA(KP707106781, T3R, T3C);
Chris@82 748 T5C = FNMS(KP707106781, T3R, T3C);
Chris@82 749 {
Chris@82 750 E T47, T4m, T8z, T8A;
Chris@82 751 T47 = FMA(KP414213562, T46, T3Z);
Chris@82 752 T4m = FNMS(KP414213562, T4l, T4e);
Chris@82 753 T4n = T47 + T4m;
Chris@82 754 T8I = T4m - T47;
Chris@82 755 T8z = T8x - T8y;
Chris@82 756 T8A = T5T + T5U;
Chris@82 757 T8B = FMA(KP707106781, T8A, T8z);
Chris@82 758 T8H = FNMS(KP707106781, T8A, T8z);
Chris@82 759 }
Chris@82 760 T5D = FNMS(KP414213562, T3Z, T46);
Chris@82 761 T5E = FMA(KP414213562, T4e, T4l);
Chris@82 762 T5F = T5D - T5E;
Chris@82 763 T8C = T5D + T5E;
Chris@82 764 {
Chris@82 765 E T5k, T5K, T5v, T5L, T5j, T5u;
Chris@82 766 T5j = T5b + T5i;
Chris@82 767 T5k = FMA(KP707106781, T5j, T54);
Chris@82 768 T5K = FNMS(KP707106781, T5j, T54);
Chris@82 769 T5u = T5s + T5t;
Chris@82 770 T5v = FMA(KP707106781, T5u, T5r);
Chris@82 771 T5L = FNMS(KP707106781, T5u, T5r);
Chris@82 772 T5w = FNMS(KP198912367, T5v, T5k);
Chris@82 773 T5Q = FNMS(KP668178637, T5K, T5L);
Chris@82 774 T5A = FMA(KP198912367, T5k, T5v);
Chris@82 775 T5M = FMA(KP668178637, T5L, T5K);
Chris@82 776 }
Chris@82 777 {
Chris@82 778 E T4L, T5H, T4W, T5I, T4K, T4V;
Chris@82 779 T4K = T4C + T4J;
Chris@82 780 T4L = FMA(KP707106781, T4K, T4v);
Chris@82 781 T5H = FNMS(KP707106781, T4K, T4v);
Chris@82 782 T4V = T4T + T4U;
Chris@82 783 T4W = FMA(KP707106781, T4V, T4S);
Chris@82 784 T5I = FNMS(KP707106781, T4V, T4S);
Chris@82 785 T4X = FMA(KP198912367, T4W, T4L);
Chris@82 786 T5P = FMA(KP668178637, T5H, T5I);
Chris@82 787 T5z = FNMS(KP198912367, T4L, T4W);
Chris@82 788 T5J = FNMS(KP668178637, T5I, T5H);
Chris@82 789 }
Chris@82 790 }
Chris@82 791 {
Chris@82 792 E T4o, T5x, T8D, T8E;
Chris@82 793 T4o = FMA(KP923879532, T4n, T3S);
Chris@82 794 T5x = T4X + T5w;
Chris@82 795 Rm[WS(rs, 14)] = FNMS(KP980785280, T5x, T4o);
Chris@82 796 Rp[WS(rs, 1)] = FMA(KP980785280, T5x, T4o);
Chris@82 797 T8D = FMA(KP923879532, T8C, T8B);
Chris@82 798 T8E = T5z + T5A;
Chris@82 799 Im[WS(rs, 14)] = FMS(KP980785280, T8E, T8D);
Chris@82 800 Ip[WS(rs, 1)] = FMA(KP980785280, T8E, T8D);
Chris@82 801 }
Chris@82 802 {
Chris@82 803 E T5y, T5B, T8F, T8G;
Chris@82 804 T5y = FNMS(KP923879532, T4n, T3S);
Chris@82 805 T5B = T5z - T5A;
Chris@82 806 Rm[WS(rs, 6)] = FNMS(KP980785280, T5B, T5y);
Chris@82 807 Rp[WS(rs, 9)] = FMA(KP980785280, T5B, T5y);
Chris@82 808 T8F = FNMS(KP923879532, T8C, T8B);
Chris@82 809 T8G = T5w - T4X;
Chris@82 810 Im[WS(rs, 6)] = FMS(KP980785280, T8G, T8F);
Chris@82 811 Ip[WS(rs, 9)] = FMA(KP980785280, T8G, T8F);
Chris@82 812 }
Chris@82 813 {
Chris@82 814 E T5G, T5N, T8L, T8M;
Chris@82 815 T5G = FNMS(KP923879532, T5F, T5C);
Chris@82 816 T5N = T5J + T5M;
Chris@82 817 Rp[WS(rs, 13)] = FNMS(KP831469612, T5N, T5G);
Chris@82 818 Rm[WS(rs, 2)] = FMA(KP831469612, T5N, T5G);
Chris@82 819 T8L = FNMS(KP923879532, T8I, T8H);
Chris@82 820 T8M = T5P + T5Q;
Chris@82 821 Im[WS(rs, 2)] = -(FMA(KP831469612, T8M, T8L));
Chris@82 822 Ip[WS(rs, 13)] = FNMS(KP831469612, T8M, T8L);
Chris@82 823 }
Chris@82 824 {
Chris@82 825 E T5O, T5R, T8J, T8K;
Chris@82 826 T5O = FMA(KP923879532, T5F, T5C);
Chris@82 827 T5R = T5P - T5Q;
Chris@82 828 Rm[WS(rs, 10)] = FNMS(KP831469612, T5R, T5O);
Chris@82 829 Rp[WS(rs, 5)] = FMA(KP831469612, T5R, T5O);
Chris@82 830 T8J = FMA(KP923879532, T8I, T8H);
Chris@82 831 T8K = T5M - T5J;
Chris@82 832 Im[WS(rs, 10)] = FMS(KP831469612, T8K, T8J);
Chris@82 833 Ip[WS(rs, 5)] = FMA(KP831469612, T8K, T8J);
Chris@82 834 }
Chris@82 835 }
Chris@82 836 {
Chris@82 837 E T5W, T6o, T63, T8W, T8P, T8V, T6r, T8Q, T6i, T6C, T6m, T6y, T6b, T6B, T6l;
Chris@82 838 E T6v;
Chris@82 839 {
Chris@82 840 E T5S, T5V, T6p, T6q;
Chris@82 841 T5S = T3w - T3B;
Chris@82 842 T5V = T5T - T5U;
Chris@82 843 T5W = FMA(KP707106781, T5V, T5S);
Chris@82 844 T6o = FNMS(KP707106781, T5V, T5S);
Chris@82 845 {
Chris@82 846 E T5Z, T62, T8N, T8O;
Chris@82 847 T5Z = FMA(KP414213562, T5Y, T5X);
Chris@82 848 T62 = FNMS(KP414213562, T61, T60);
Chris@82 849 T63 = T5Z - T62;
Chris@82 850 T8W = T5Z + T62;
Chris@82 851 T8N = T8y + T8x;
Chris@82 852 T8O = T3Q - T3J;
Chris@82 853 T8P = FMA(KP707106781, T8O, T8N);
Chris@82 854 T8V = FNMS(KP707106781, T8O, T8N);
Chris@82 855 }
Chris@82 856 T6p = FNMS(KP414213562, T5X, T5Y);
Chris@82 857 T6q = FMA(KP414213562, T60, T61);
Chris@82 858 T6r = T6p + T6q;
Chris@82 859 T8Q = T6q - T6p;
Chris@82 860 {
Chris@82 861 E T6e, T6x, T6h, T6w, T6d, T6g;
Chris@82 862 T6d = T5t - T5s;
Chris@82 863 T6e = FNMS(KP707106781, T6d, T6c);
Chris@82 864 T6x = FMA(KP707106781, T6d, T6c);
Chris@82 865 T6g = T5i - T5b;
Chris@82 866 T6h = FNMS(KP707106781, T6g, T6f);
Chris@82 867 T6w = FMA(KP707106781, T6g, T6f);
Chris@82 868 T6i = FMA(KP668178637, T6h, T6e);
Chris@82 869 T6C = FNMS(KP198912367, T6w, T6x);
Chris@82 870 T6m = FNMS(KP668178637, T6e, T6h);
Chris@82 871 T6y = FMA(KP198912367, T6x, T6w);
Chris@82 872 }
Chris@82 873 {
Chris@82 874 E T67, T6u, T6a, T6t, T66, T69;
Chris@82 875 T66 = T4U - T4T;
Chris@82 876 T67 = FNMS(KP707106781, T66, T65);
Chris@82 877 T6u = FMA(KP707106781, T66, T65);
Chris@82 878 T69 = T4C - T4J;
Chris@82 879 T6a = FNMS(KP707106781, T69, T68);
Chris@82 880 T6t = FMA(KP707106781, T69, T68);
Chris@82 881 T6b = FMA(KP668178637, T6a, T67);
Chris@82 882 T6B = FNMS(KP198912367, T6t, T6u);
Chris@82 883 T6l = FNMS(KP668178637, T67, T6a);
Chris@82 884 T6v = FMA(KP198912367, T6u, T6t);
Chris@82 885 }
Chris@82 886 }
Chris@82 887 {
Chris@82 888 E T64, T6j, T8R, T8S;
Chris@82 889 T64 = FMA(KP923879532, T63, T5W);
Chris@82 890 T6j = T6b + T6i;
Chris@82 891 Rm[WS(rs, 12)] = FNMS(KP831469612, T6j, T64);
Chris@82 892 Rp[WS(rs, 3)] = FMA(KP831469612, T6j, T64);
Chris@82 893 T8R = FMA(KP923879532, T8Q, T8P);
Chris@82 894 T8S = T6l - T6m;
Chris@82 895 Im[WS(rs, 12)] = FMS(KP831469612, T8S, T8R);
Chris@82 896 Ip[WS(rs, 3)] = FMA(KP831469612, T8S, T8R);
Chris@82 897 }
Chris@82 898 {
Chris@82 899 E T6k, T6n, T8T, T8U;
Chris@82 900 T6k = FNMS(KP923879532, T63, T5W);
Chris@82 901 T6n = T6l + T6m;
Chris@82 902 Rm[WS(rs, 4)] = FNMS(KP831469612, T6n, T6k);
Chris@82 903 Rp[WS(rs, 11)] = FMA(KP831469612, T6n, T6k);
Chris@82 904 T8T = FNMS(KP923879532, T8Q, T8P);
Chris@82 905 T8U = T6i - T6b;
Chris@82 906 Im[WS(rs, 4)] = FMS(KP831469612, T8U, T8T);
Chris@82 907 Ip[WS(rs, 11)] = FMA(KP831469612, T8U, T8T);
Chris@82 908 }
Chris@82 909 {
Chris@82 910 E T6s, T6z, T8X, T8Y;
Chris@82 911 T6s = FNMS(KP923879532, T6r, T6o);
Chris@82 912 T6z = T6v + T6y;
Chris@82 913 Rm[WS(rs, 8)] = FNMS(KP980785280, T6z, T6s);
Chris@82 914 Rp[WS(rs, 7)] = FMA(KP980785280, T6z, T6s);
Chris@82 915 T8X = FNMS(KP923879532, T8W, T8V);
Chris@82 916 T8Y = T6C - T6B;
Chris@82 917 Im[WS(rs, 8)] = FMS(KP980785280, T8Y, T8X);
Chris@82 918 Ip[WS(rs, 7)] = FMA(KP980785280, T8Y, T8X);
Chris@82 919 }
Chris@82 920 {
Chris@82 921 E T6A, T6D, T8Z, T90;
Chris@82 922 T6A = FMA(KP923879532, T6r, T6o);
Chris@82 923 T6D = T6B + T6C;
Chris@82 924 Rp[WS(rs, 15)] = FNMS(KP980785280, T6D, T6A);
Chris@82 925 Rm[0] = FMA(KP980785280, T6D, T6A);
Chris@82 926 T8Z = FMA(KP923879532, T8W, T8V);
Chris@82 927 T90 = T6y - T6v;
Chris@82 928 Im[0] = FMS(KP980785280, T90, T8Z);
Chris@82 929 Ip[WS(rs, 15)] = FMA(KP980785280, T90, T8Z);
Chris@82 930 }
Chris@82 931 }
Chris@82 932 }
Chris@82 933 }
Chris@82 934 }
Chris@82 935
Chris@82 936 static const tw_instr twinstr[] = {
Chris@82 937 {TW_FULL, 1, 32},
Chris@82 938 {TW_NEXT, 1, 0}
Chris@82 939 };
Chris@82 940
Chris@82 941 static const hc2c_desc desc = { 32, "hc2cf_32", twinstr, &GENUS, {236, 62, 198, 0} };
Chris@82 942
Chris@82 943 void X(codelet_hc2cf_32) (planner *p) {
Chris@82 944 X(khc2c_register) (p, hc2cf_32, &desc, HC2C_VIA_RDFT);
Chris@82 945 }
Chris@82 946 #else
Chris@82 947
Chris@82 948 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cf_32 -include rdft/scalar/hc2cf.h */
Chris@82 949
Chris@82 950 /*
Chris@82 951 * This function contains 434 FP additions, 208 FP multiplications,
Chris@82 952 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@82 953 * 96 stack variables, 7 constants, and 128 memory accesses
Chris@82 954 */
Chris@82 955 #include "rdft/scalar/hc2cf.h"
Chris@82 956
Chris@82 957 static void hc2cf_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 958 {
Chris@82 959 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 960 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 961 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 962 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 963 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 964 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 965 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 966 {
Chris@82 967 INT m;
Chris@82 968 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 969 E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41;
Chris@82 970 E T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U;
Chris@82 971 E T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x;
Chris@82 972 E T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P;
Chris@82 973 E T4m, T5h, T4v, T5e;
Chris@82 974 {
Chris@82 975 E T1, T76, T6, T75, Tc, T32, Th, T33;
Chris@82 976 T1 = Rp[0];
Chris@82 977 T76 = Rm[0];
Chris@82 978 {
Chris@82 979 E T3, T5, T2, T4;
Chris@82 980 T3 = Rp[WS(rs, 8)];
Chris@82 981 T5 = Rm[WS(rs, 8)];
Chris@82 982 T2 = W[30];
Chris@82 983 T4 = W[31];
Chris@82 984 T6 = FMA(T2, T3, T4 * T5);
Chris@82 985 T75 = FNMS(T4, T3, T2 * T5);
Chris@82 986 }
Chris@82 987 {
Chris@82 988 E T9, Tb, T8, Ta;
Chris@82 989 T9 = Rp[WS(rs, 4)];
Chris@82 990 Tb = Rm[WS(rs, 4)];
Chris@82 991 T8 = W[14];
Chris@82 992 Ta = W[15];
Chris@82 993 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 994 T32 = FNMS(Ta, T9, T8 * Tb);
Chris@82 995 }
Chris@82 996 {
Chris@82 997 E Te, Tg, Td, Tf;
Chris@82 998 Te = Rp[WS(rs, 12)];
Chris@82 999 Tg = Rm[WS(rs, 12)];
Chris@82 1000 Td = W[46];
Chris@82 1001 Tf = W[47];
Chris@82 1002 Th = FMA(Td, Te, Tf * Tg);
Chris@82 1003 T33 = FNMS(Tf, Te, Td * Tg);
Chris@82 1004 }
Chris@82 1005 {
Chris@82 1006 E T7, Ti, T7A, T7B;
Chris@82 1007 T7 = T1 + T6;
Chris@82 1008 Ti = Tc + Th;
Chris@82 1009 Tj = T7 + Ti;
Chris@82 1010 T5F = T7 - Ti;
Chris@82 1011 T7A = T76 - T75;
Chris@82 1012 T7B = Tc - Th;
Chris@82 1013 T7C = T7A - T7B;
Chris@82 1014 T7Q = T7B + T7A;
Chris@82 1015 }
Chris@82 1016 {
Chris@82 1017 E T31, T34, T74, T77;
Chris@82 1018 T31 = T1 - T6;
Chris@82 1019 T34 = T32 - T33;
Chris@82 1020 T35 = T31 - T34;
Chris@82 1021 T4T = T31 + T34;
Chris@82 1022 T74 = T32 + T33;
Chris@82 1023 T77 = T75 + T76;
Chris@82 1024 T78 = T74 + T77;
Chris@82 1025 T7m = T77 - T74;
Chris@82 1026 }
Chris@82 1027 }
Chris@82 1028 {
Chris@82 1029 E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y;
Chris@82 1030 {
Chris@82 1031 E T1v, T1x, T1u, T1w;
Chris@82 1032 T1v = Ip[0];
Chris@82 1033 T1x = Im[0];
Chris@82 1034 T1u = W[0];
Chris@82 1035 T1w = W[1];
Chris@82 1036 T1y = FMA(T1u, T1v, T1w * T1x);
Chris@82 1037 T3G = FNMS(T1w, T1v, T1u * T1x);
Chris@82 1038 }
Chris@82 1039 {
Chris@82 1040 E T1L, T1N, T1K, T1M;
Chris@82 1041 T1L = Ip[WS(rs, 12)];
Chris@82 1042 T1N = Im[WS(rs, 12)];
Chris@82 1043 T1K = W[48];
Chris@82 1044 T1M = W[49];
Chris@82 1045 T1O = FMA(T1K, T1L, T1M * T1N);
Chris@82 1046 T3Z = FNMS(T1M, T1L, T1K * T1N);
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E T1A, T1C, T1z, T1B;
Chris@82 1050 T1A = Ip[WS(rs, 8)];
Chris@82 1051 T1C = Im[WS(rs, 8)];
Chris@82 1052 T1z = W[32];
Chris@82 1053 T1B = W[33];
Chris@82 1054 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@82 1055 T3H = FNMS(T1B, T1A, T1z * T1C);
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T1G, T1I, T1F, T1H;
Chris@82 1059 T1G = Ip[WS(rs, 4)];
Chris@82 1060 T1I = Im[WS(rs, 4)];
Chris@82 1061 T1F = W[16];
Chris@82 1062 T1H = W[17];
Chris@82 1063 T1J = FMA(T1F, T1G, T1H * T1I);
Chris@82 1064 T3Y = FNMS(T1H, T1G, T1F * T1I);
Chris@82 1065 }
Chris@82 1066 {
Chris@82 1067 E T1E, T1P, T5W, T5X;
Chris@82 1068 T1E = T1y + T1D;
Chris@82 1069 T1P = T1J + T1O;
Chris@82 1070 T1Q = T1E + T1P;
Chris@82 1071 T61 = T1E - T1P;
Chris@82 1072 T5W = T3G + T3H;
Chris@82 1073 T5X = T3Y + T3Z;
Chris@82 1074 T5Y = T5W - T5X;
Chris@82 1075 T6J = T5W + T5X;
Chris@82 1076 }
Chris@82 1077 {
Chris@82 1078 E T3I, T3J, T3X, T40;
Chris@82 1079 T3I = T3G - T3H;
Chris@82 1080 T3J = T1J - T1O;
Chris@82 1081 T3K = T3I + T3J;
Chris@82 1082 T59 = T3I - T3J;
Chris@82 1083 T3X = T1y - T1D;
Chris@82 1084 T40 = T3Y - T3Z;
Chris@82 1085 T41 = T3X - T40;
Chris@82 1086 T56 = T3X + T40;
Chris@82 1087 }
Chris@82 1088 }
Chris@82 1089 {
Chris@82 1090 E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48;
Chris@82 1091 {
Chris@82 1092 E T2g, T2i, T2f, T2h;
Chris@82 1093 T2g = Ip[WS(rs, 15)];
Chris@82 1094 T2i = Im[WS(rs, 15)];
Chris@82 1095 T2f = W[60];
Chris@82 1096 T2h = W[61];
Chris@82 1097 T2j = FMA(T2f, T2g, T2h * T2i);
Chris@82 1098 T4o = FNMS(T2h, T2g, T2f * T2i);
Chris@82 1099 }
Chris@82 1100 {
Chris@82 1101 E T2w, T2y, T2v, T2x;
Chris@82 1102 T2w = Ip[WS(rs, 11)];
Chris@82 1103 T2y = Im[WS(rs, 11)];
Chris@82 1104 T2v = W[44];
Chris@82 1105 T2x = W[45];
Chris@82 1106 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@82 1107 T49 = FNMS(T2x, T2w, T2v * T2y);
Chris@82 1108 }
Chris@82 1109 {
Chris@82 1110 E T2l, T2n, T2k, T2m;
Chris@82 1111 T2l = Ip[WS(rs, 7)];
Chris@82 1112 T2n = Im[WS(rs, 7)];
Chris@82 1113 T2k = W[28];
Chris@82 1114 T2m = W[29];
Chris@82 1115 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@82 1116 T4p = FNMS(T2m, T2l, T2k * T2n);
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E T2r, T2t, T2q, T2s;
Chris@82 1120 T2r = Ip[WS(rs, 3)];
Chris@82 1121 T2t = Im[WS(rs, 3)];
Chris@82 1122 T2q = W[12];
Chris@82 1123 T2s = W[13];
Chris@82 1124 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@82 1125 T48 = FNMS(T2s, T2r, T2q * T2t);
Chris@82 1126 }
Chris@82 1127 {
Chris@82 1128 E T2p, T2A, T6c, T6d;
Chris@82 1129 T2p = T2j + T2o;
Chris@82 1130 T2A = T2u + T2z;
Chris@82 1131 T2B = T2p + T2A;
Chris@82 1132 T67 = T2p - T2A;
Chris@82 1133 T6c = T4o + T4p;
Chris@82 1134 T6d = T48 + T49;
Chris@82 1135 T6e = T6c - T6d;
Chris@82 1136 T6O = T6c + T6d;
Chris@82 1137 }
Chris@82 1138 {
Chris@82 1139 E T47, T4a, T4q, T4r;
Chris@82 1140 T47 = T2j - T2o;
Chris@82 1141 T4a = T48 - T49;
Chris@82 1142 T4b = T47 - T4a;
Chris@82 1143 T5d = T47 + T4a;
Chris@82 1144 T4q = T4o - T4p;
Chris@82 1145 T4r = T2u - T2z;
Chris@82 1146 T4s = T4q + T4r;
Chris@82 1147 T5g = T4q - T4r;
Chris@82 1148 }
Chris@82 1149 }
Chris@82 1150 {
Chris@82 1151 E To, T36, TE, T3d, Tt, T37, Tz, T3c;
Chris@82 1152 {
Chris@82 1153 E Tl, Tn, Tk, Tm;
Chris@82 1154 Tl = Rp[WS(rs, 2)];
Chris@82 1155 Tn = Rm[WS(rs, 2)];
Chris@82 1156 Tk = W[6];
Chris@82 1157 Tm = W[7];
Chris@82 1158 To = FMA(Tk, Tl, Tm * Tn);
Chris@82 1159 T36 = FNMS(Tm, Tl, Tk * Tn);
Chris@82 1160 }
Chris@82 1161 {
Chris@82 1162 E TB, TD, TA, TC;
Chris@82 1163 TB = Rp[WS(rs, 6)];
Chris@82 1164 TD = Rm[WS(rs, 6)];
Chris@82 1165 TA = W[22];
Chris@82 1166 TC = W[23];
Chris@82 1167 TE = FMA(TA, TB, TC * TD);
Chris@82 1168 T3d = FNMS(TC, TB, TA * TD);
Chris@82 1169 }
Chris@82 1170 {
Chris@82 1171 E Tq, Ts, Tp, Tr;
Chris@82 1172 Tq = Rp[WS(rs, 10)];
Chris@82 1173 Ts = Rm[WS(rs, 10)];
Chris@82 1174 Tp = W[38];
Chris@82 1175 Tr = W[39];
Chris@82 1176 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 1177 T37 = FNMS(Tr, Tq, Tp * Ts);
Chris@82 1178 }
Chris@82 1179 {
Chris@82 1180 E Tw, Ty, Tv, Tx;
Chris@82 1181 Tw = Rp[WS(rs, 14)];
Chris@82 1182 Ty = Rm[WS(rs, 14)];
Chris@82 1183 Tv = W[54];
Chris@82 1184 Tx = W[55];
Chris@82 1185 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 1186 T3c = FNMS(Tx, Tw, Tv * Ty);
Chris@82 1187 }
Chris@82 1188 {
Chris@82 1189 E Tu, TF, T5G, T5H;
Chris@82 1190 Tu = To + Tt;
Chris@82 1191 TF = Tz + TE;
Chris@82 1192 TG = Tu + TF;
Chris@82 1193 T7l = TF - Tu;
Chris@82 1194 T5G = T36 + T37;
Chris@82 1195 T5H = T3c + T3d;
Chris@82 1196 T5I = T5G - T5H;
Chris@82 1197 T73 = T5G + T5H;
Chris@82 1198 }
Chris@82 1199 {
Chris@82 1200 E T38, T39, T3b, T3e;
Chris@82 1201 T38 = T36 - T37;
Chris@82 1202 T39 = To - Tt;
Chris@82 1203 T3a = T38 - T39;
Chris@82 1204 T4U = T39 + T38;
Chris@82 1205 T3b = Tz - TE;
Chris@82 1206 T3e = T3c - T3d;
Chris@82 1207 T3f = T3b + T3e;
Chris@82 1208 T4V = T3b - T3e;
Chris@82 1209 }
Chris@82 1210 }
Chris@82 1211 {
Chris@82 1212 E TM, T3i, T12, T3p, TR, T3j, TX, T3o;
Chris@82 1213 {
Chris@82 1214 E TJ, TL, TI, TK;
Chris@82 1215 TJ = Rp[WS(rs, 1)];
Chris@82 1216 TL = Rm[WS(rs, 1)];
Chris@82 1217 TI = W[2];
Chris@82 1218 TK = W[3];
Chris@82 1219 TM = FMA(TI, TJ, TK * TL);
Chris@82 1220 T3i = FNMS(TK, TJ, TI * TL);
Chris@82 1221 }
Chris@82 1222 {
Chris@82 1223 E TZ, T11, TY, T10;
Chris@82 1224 TZ = Rp[WS(rs, 13)];
Chris@82 1225 T11 = Rm[WS(rs, 13)];
Chris@82 1226 TY = W[50];
Chris@82 1227 T10 = W[51];
Chris@82 1228 T12 = FMA(TY, TZ, T10 * T11);
Chris@82 1229 T3p = FNMS(T10, TZ, TY * T11);
Chris@82 1230 }
Chris@82 1231 {
Chris@82 1232 E TO, TQ, TN, TP;
Chris@82 1233 TO = Rp[WS(rs, 9)];
Chris@82 1234 TQ = Rm[WS(rs, 9)];
Chris@82 1235 TN = W[34];
Chris@82 1236 TP = W[35];
Chris@82 1237 TR = FMA(TN, TO, TP * TQ);
Chris@82 1238 T3j = FNMS(TP, TO, TN * TQ);
Chris@82 1239 }
Chris@82 1240 {
Chris@82 1241 E TU, TW, TT, TV;
Chris@82 1242 TU = Rp[WS(rs, 5)];
Chris@82 1243 TW = Rm[WS(rs, 5)];
Chris@82 1244 TT = W[18];
Chris@82 1245 TV = W[19];
Chris@82 1246 TX = FMA(TT, TU, TV * TW);
Chris@82 1247 T3o = FNMS(TV, TU, TT * TW);
Chris@82 1248 }
Chris@82 1249 {
Chris@82 1250 E TS, T13, T5K, T5L;
Chris@82 1251 TS = TM + TR;
Chris@82 1252 T13 = TX + T12;
Chris@82 1253 T14 = TS + T13;
Chris@82 1254 T5N = TS - T13;
Chris@82 1255 T5K = T3i + T3j;
Chris@82 1256 T5L = T3o + T3p;
Chris@82 1257 T5M = T5K - T5L;
Chris@82 1258 T6E = T5K + T5L;
Chris@82 1259 }
Chris@82 1260 {
Chris@82 1261 E T3k, T3l, T3n, T3q;
Chris@82 1262 T3k = T3i - T3j;
Chris@82 1263 T3l = TX - T12;
Chris@82 1264 T3m = T3k + T3l;
Chris@82 1265 T4Y = T3k - T3l;
Chris@82 1266 T3n = TM - TR;
Chris@82 1267 T3q = T3o - T3p;
Chris@82 1268 T3r = T3n - T3q;
Chris@82 1269 T4Z = T3n + T3q;
Chris@82 1270 }
Chris@82 1271 }
Chris@82 1272 {
Chris@82 1273 E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
Chris@82 1274 {
Chris@82 1275 E T16, T18, T15, T17;
Chris@82 1276 T16 = Rp[WS(rs, 15)];
Chris@82 1277 T18 = Rm[WS(rs, 15)];
Chris@82 1278 T15 = W[58];
Chris@82 1279 T17 = W[59];
Chris@82 1280 T19 = FMA(T15, T16, T17 * T18);
Chris@82 1281 T3t = FNMS(T17, T16, T15 * T18);
Chris@82 1282 }
Chris@82 1283 {
Chris@82 1284 E T1m, T1o, T1l, T1n;
Chris@82 1285 T1m = Rp[WS(rs, 11)];
Chris@82 1286 T1o = Rm[WS(rs, 11)];
Chris@82 1287 T1l = W[42];
Chris@82 1288 T1n = W[43];
Chris@82 1289 T1p = FMA(T1l, T1m, T1n * T1o);
Chris@82 1290 T3A = FNMS(T1n, T1m, T1l * T1o);
Chris@82 1291 }
Chris@82 1292 {
Chris@82 1293 E T1b, T1d, T1a, T1c;
Chris@82 1294 T1b = Rp[WS(rs, 7)];
Chris@82 1295 T1d = Rm[WS(rs, 7)];
Chris@82 1296 T1a = W[26];
Chris@82 1297 T1c = W[27];
Chris@82 1298 T1e = FMA(T1a, T1b, T1c * T1d);
Chris@82 1299 T3u = FNMS(T1c, T1b, T1a * T1d);
Chris@82 1300 }
Chris@82 1301 {
Chris@82 1302 E T1h, T1j, T1g, T1i;
Chris@82 1303 T1h = Rp[WS(rs, 3)];
Chris@82 1304 T1j = Rm[WS(rs, 3)];
Chris@82 1305 T1g = W[10];
Chris@82 1306 T1i = W[11];
Chris@82 1307 T1k = FMA(T1g, T1h, T1i * T1j);
Chris@82 1308 T3z = FNMS(T1i, T1h, T1g * T1j);
Chris@82 1309 }
Chris@82 1310 {
Chris@82 1311 E T1f, T1q, T5Q, T5R;
Chris@82 1312 T1f = T19 + T1e;
Chris@82 1313 T1q = T1k + T1p;
Chris@82 1314 T1r = T1f + T1q;
Chris@82 1315 T5P = T1f - T1q;
Chris@82 1316 T5Q = T3t + T3u;
Chris@82 1317 T5R = T3z + T3A;
Chris@82 1318 T5S = T5Q - T5R;
Chris@82 1319 T6F = T5Q + T5R;
Chris@82 1320 }
Chris@82 1321 {
Chris@82 1322 E T3v, T3w, T3y, T3B;
Chris@82 1323 T3v = T3t - T3u;
Chris@82 1324 T3w = T1k - T1p;
Chris@82 1325 T3x = T3v + T3w;
Chris@82 1326 T51 = T3v - T3w;
Chris@82 1327 T3y = T19 - T1e;
Chris@82 1328 T3B = T3z - T3A;
Chris@82 1329 T3C = T3y - T3B;
Chris@82 1330 T52 = T3y + T3B;
Chris@82 1331 }
Chris@82 1332 }
Chris@82 1333 {
Chris@82 1334 E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O;
Chris@82 1335 {
Chris@82 1336 E T1S, T1U, T1R, T1T;
Chris@82 1337 T1S = Ip[WS(rs, 2)];
Chris@82 1338 T1U = Im[WS(rs, 2)];
Chris@82 1339 T1R = W[8];
Chris@82 1340 T1T = W[9];
Chris@82 1341 T1V = FMA(T1R, T1S, T1T * T1U);
Chris@82 1342 T3R = FNMS(T1T, T1S, T1R * T1U);
Chris@82 1343 }
Chris@82 1344 {
Chris@82 1345 E T1X, T1Z, T1W, T1Y;
Chris@82 1346 T1X = Ip[WS(rs, 10)];
Chris@82 1347 T1Z = Im[WS(rs, 10)];
Chris@82 1348 T1W = W[40];
Chris@82 1349 T1Y = W[41];
Chris@82 1350 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@82 1351 T3S = FNMS(T1Y, T1X, T1W * T1Z);
Chris@82 1352 }
Chris@82 1353 T3Q = T1V - T20;
Chris@82 1354 T3T = T3R - T3S;
Chris@82 1355 {
Chris@82 1356 E T23, T25, T22, T24;
Chris@82 1357 T23 = Ip[WS(rs, 14)];
Chris@82 1358 T25 = Im[WS(rs, 14)];
Chris@82 1359 T22 = W[56];
Chris@82 1360 T24 = W[57];
Chris@82 1361 T26 = FMA(T22, T23, T24 * T25);
Chris@82 1362 T3M = FNMS(T24, T23, T22 * T25);
Chris@82 1363 }
Chris@82 1364 {
Chris@82 1365 E T28, T2a, T27, T29;
Chris@82 1366 T28 = Ip[WS(rs, 6)];
Chris@82 1367 T2a = Im[WS(rs, 6)];
Chris@82 1368 T27 = W[24];
Chris@82 1369 T29 = W[25];
Chris@82 1370 T2b = FMA(T27, T28, T29 * T2a);
Chris@82 1371 T3N = FNMS(T29, T28, T27 * T2a);
Chris@82 1372 }
Chris@82 1373 T3L = T26 - T2b;
Chris@82 1374 T3O = T3M - T3N;
Chris@82 1375 {
Chris@82 1376 E T21, T2c, T62, T63;
Chris@82 1377 T21 = T1V + T20;
Chris@82 1378 T2c = T26 + T2b;
Chris@82 1379 T2d = T21 + T2c;
Chris@82 1380 T5Z = T2c - T21;
Chris@82 1381 T62 = T3R + T3S;
Chris@82 1382 T63 = T3M + T3N;
Chris@82 1383 T64 = T62 - T63;
Chris@82 1384 T6K = T62 + T63;
Chris@82 1385 }
Chris@82 1386 {
Chris@82 1387 E T3P, T3U, T42, T43;
Chris@82 1388 T3P = T3L - T3O;
Chris@82 1389 T3U = T3Q + T3T;
Chris@82 1390 T3V = KP707106781 * (T3P - T3U);
Chris@82 1391 T57 = KP707106781 * (T3U + T3P);
Chris@82 1392 T42 = T3T - T3Q;
Chris@82 1393 T43 = T3L + T3O;
Chris@82 1394 T44 = KP707106781 * (T42 - T43);
Chris@82 1395 T5a = KP707106781 * (T42 + T43);
Chris@82 1396 }
Chris@82 1397 }
Chris@82 1398 {
Chris@82 1399 E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k;
Chris@82 1400 {
Chris@82 1401 E T2D, T2F, T2C, T2E;
Chris@82 1402 T2D = Ip[WS(rs, 1)];
Chris@82 1403 T2F = Im[WS(rs, 1)];
Chris@82 1404 T2C = W[4];
Chris@82 1405 T2E = W[5];
Chris@82 1406 T2G = FMA(T2C, T2D, T2E * T2F);
Chris@82 1407 T4c = FNMS(T2E, T2D, T2C * T2F);
Chris@82 1408 }
Chris@82 1409 {
Chris@82 1410 E T2I, T2K, T2H, T2J;
Chris@82 1411 T2I = Ip[WS(rs, 9)];
Chris@82 1412 T2K = Im[WS(rs, 9)];
Chris@82 1413 T2H = W[36];
Chris@82 1414 T2J = W[37];
Chris@82 1415 T2L = FMA(T2H, T2I, T2J * T2K);
Chris@82 1416 T4d = FNMS(T2J, T2I, T2H * T2K);
Chris@82 1417 }
Chris@82 1418 T4e = T4c - T4d;
Chris@82 1419 T4f = T2G - T2L;
Chris@82 1420 {
Chris@82 1421 E T2O, T2Q, T2N, T2P;
Chris@82 1422 T2O = Ip[WS(rs, 13)];
Chris@82 1423 T2Q = Im[WS(rs, 13)];
Chris@82 1424 T2N = W[52];
Chris@82 1425 T2P = W[53];
Chris@82 1426 T2R = FMA(T2N, T2O, T2P * T2Q);
Chris@82 1427 T4i = FNMS(T2P, T2O, T2N * T2Q);
Chris@82 1428 }
Chris@82 1429 {
Chris@82 1430 E T2T, T2V, T2S, T2U;
Chris@82 1431 T2T = Ip[WS(rs, 5)];
Chris@82 1432 T2V = Im[WS(rs, 5)];
Chris@82 1433 T2S = W[20];
Chris@82 1434 T2U = W[21];
Chris@82 1435 T2W = FMA(T2S, T2T, T2U * T2V);
Chris@82 1436 T4j = FNMS(T2U, T2T, T2S * T2V);
Chris@82 1437 }
Chris@82 1438 T4h = T2R - T2W;
Chris@82 1439 T4k = T4i - T4j;
Chris@82 1440 {
Chris@82 1441 E T2M, T2X, T68, T69;
Chris@82 1442 T2M = T2G + T2L;
Chris@82 1443 T2X = T2R + T2W;
Chris@82 1444 T2Y = T2M + T2X;
Chris@82 1445 T6f = T2X - T2M;
Chris@82 1446 T68 = T4c + T4d;
Chris@82 1447 T69 = T4i + T4j;
Chris@82 1448 T6a = T68 - T69;
Chris@82 1449 T6P = T68 + T69;
Chris@82 1450 }
Chris@82 1451 {
Chris@82 1452 E T4g, T4l, T4t, T4u;
Chris@82 1453 T4g = T4e - T4f;
Chris@82 1454 T4l = T4h + T4k;
Chris@82 1455 T4m = KP707106781 * (T4g - T4l);
Chris@82 1456 T5h = KP707106781 * (T4g + T4l);
Chris@82 1457 T4t = T4h - T4k;
Chris@82 1458 T4u = T4f + T4e;
Chris@82 1459 T4v = KP707106781 * (T4t - T4u);
Chris@82 1460 T5e = KP707106781 * (T4u + T4t);
Chris@82 1461 }
Chris@82 1462 }
Chris@82 1463 {
Chris@82 1464 E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
Chris@82 1465 {
Chris@82 1466 E TH, T1s, T72, T79;
Chris@82 1467 TH = Tj + TG;
Chris@82 1468 T1s = T14 + T1r;
Chris@82 1469 T1t = TH + T1s;
Chris@82 1470 T6X = TH - T1s;
Chris@82 1471 T72 = T6E + T6F;
Chris@82 1472 T79 = T73 + T78;
Chris@82 1473 T7a = T72 + T79;
Chris@82 1474 T7c = T79 - T72;
Chris@82 1475 }
Chris@82 1476 {
Chris@82 1477 E T2e, T2Z, T6Y, T6Z;
Chris@82 1478 T2e = T1Q + T2d;
Chris@82 1479 T2Z = T2B + T2Y;
Chris@82 1480 T30 = T2e + T2Z;
Chris@82 1481 T7b = T2Z - T2e;
Chris@82 1482 T6Y = T6J + T6K;
Chris@82 1483 T6Z = T6O + T6P;
Chris@82 1484 T70 = T6Y - T6Z;
Chris@82 1485 T71 = T6Y + T6Z;
Chris@82 1486 }
Chris@82 1487 Rm[WS(rs, 15)] = T1t - T30;
Chris@82 1488 Im[WS(rs, 15)] = T71 - T7a;
Chris@82 1489 Rp[0] = T1t + T30;
Chris@82 1490 Ip[0] = T71 + T7a;
Chris@82 1491 Rm[WS(rs, 7)] = T6X - T70;
Chris@82 1492 Im[WS(rs, 7)] = T7b - T7c;
Chris@82 1493 Rp[WS(rs, 8)] = T6X + T70;
Chris@82 1494 Ip[WS(rs, 8)] = T7b + T7c;
Chris@82 1495 }
Chris@82 1496 {
Chris@82 1497 E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
Chris@82 1498 {
Chris@82 1499 E T6D, T6G, T7e, T7f;
Chris@82 1500 T6D = Tj - TG;
Chris@82 1501 T6G = T6E - T6F;
Chris@82 1502 T6H = T6D + T6G;
Chris@82 1503 T6T = T6D - T6G;
Chris@82 1504 T7e = T1r - T14;
Chris@82 1505 T7f = T78 - T73;
Chris@82 1506 T7g = T7e + T7f;
Chris@82 1507 T7i = T7f - T7e;
Chris@82 1508 }
Chris@82 1509 {
Chris@82 1510 E T6I, T6L, T6N, T6Q;
Chris@82 1511 T6I = T1Q - T2d;
Chris@82 1512 T6L = T6J - T6K;
Chris@82 1513 T6M = T6I + T6L;
Chris@82 1514 T6U = T6L - T6I;
Chris@82 1515 T6N = T2B - T2Y;
Chris@82 1516 T6Q = T6O - T6P;
Chris@82 1517 T6R = T6N - T6Q;
Chris@82 1518 T6V = T6N + T6Q;
Chris@82 1519 }
Chris@82 1520 {
Chris@82 1521 E T6S, T7d, T6W, T7h;
Chris@82 1522 T6S = KP707106781 * (T6M + T6R);
Chris@82 1523 Rm[WS(rs, 11)] = T6H - T6S;
Chris@82 1524 Rp[WS(rs, 4)] = T6H + T6S;
Chris@82 1525 T7d = KP707106781 * (T6U + T6V);
Chris@82 1526 Im[WS(rs, 11)] = T7d - T7g;
Chris@82 1527 Ip[WS(rs, 4)] = T7d + T7g;
Chris@82 1528 T6W = KP707106781 * (T6U - T6V);
Chris@82 1529 Rm[WS(rs, 3)] = T6T - T6W;
Chris@82 1530 Rp[WS(rs, 12)] = T6T + T6W;
Chris@82 1531 T7h = KP707106781 * (T6R - T6M);
Chris@82 1532 Im[WS(rs, 3)] = T7h - T7i;
Chris@82 1533 Ip[WS(rs, 12)] = T7h + T7i;
Chris@82 1534 }
Chris@82 1535 }
Chris@82 1536 {
Chris@82 1537 E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
Chris@82 1538 E T6l;
Chris@82 1539 {
Chris@82 1540 E T5O, T5T, T60, T65;
Chris@82 1541 T5J = T5F - T5I;
Chris@82 1542 T7n = T7l + T7m;
Chris@82 1543 T7t = T7m - T7l;
Chris@82 1544 T6n = T5F + T5I;
Chris@82 1545 T5O = T5M - T5N;
Chris@82 1546 T5T = T5P + T5S;
Chris@82 1547 T5U = KP707106781 * (T5O - T5T);
Chris@82 1548 T7k = KP707106781 * (T5O + T5T);
Chris@82 1549 {
Chris@82 1550 E T6v, T6w, T6o, T6p;
Chris@82 1551 T6v = T67 + T6a;
Chris@82 1552 T6w = T6e + T6f;
Chris@82 1553 T6x = FNMS(KP382683432, T6w, KP923879532 * T6v);
Chris@82 1554 T6B = FMA(KP923879532, T6w, KP382683432 * T6v);
Chris@82 1555 T6o = T5N + T5M;
Chris@82 1556 T6p = T5P - T5S;
Chris@82 1557 T6q = KP707106781 * (T6o + T6p);
Chris@82 1558 T7s = KP707106781 * (T6p - T6o);
Chris@82 1559 }
Chris@82 1560 T60 = T5Y - T5Z;
Chris@82 1561 T65 = T61 - T64;
Chris@82 1562 T66 = FMA(KP923879532, T60, KP382683432 * T65);
Chris@82 1563 T6k = FNMS(KP923879532, T65, KP382683432 * T60);
Chris@82 1564 {
Chris@82 1565 E T6s, T6t, T6b, T6g;
Chris@82 1566 T6s = T5Y + T5Z;
Chris@82 1567 T6t = T61 + T64;
Chris@82 1568 T6u = FMA(KP382683432, T6s, KP923879532 * T6t);
Chris@82 1569 T6A = FNMS(KP382683432, T6t, KP923879532 * T6s);
Chris@82 1570 T6b = T67 - T6a;
Chris@82 1571 T6g = T6e - T6f;
Chris@82 1572 T6h = FNMS(KP923879532, T6g, KP382683432 * T6b);
Chris@82 1573 T6l = FMA(KP382683432, T6g, KP923879532 * T6b);
Chris@82 1574 }
Chris@82 1575 }
Chris@82 1576 {
Chris@82 1577 E T5V, T6i, T7r, T7u;
Chris@82 1578 T5V = T5J + T5U;
Chris@82 1579 T6i = T66 + T6h;
Chris@82 1580 Rm[WS(rs, 9)] = T5V - T6i;
Chris@82 1581 Rp[WS(rs, 6)] = T5V + T6i;
Chris@82 1582 T7r = T6k + T6l;
Chris@82 1583 T7u = T7s + T7t;
Chris@82 1584 Im[WS(rs, 9)] = T7r - T7u;
Chris@82 1585 Ip[WS(rs, 6)] = T7r + T7u;
Chris@82 1586 }
Chris@82 1587 {
Chris@82 1588 E T6j, T6m, T7v, T7w;
Chris@82 1589 T6j = T5J - T5U;
Chris@82 1590 T6m = T6k - T6l;
Chris@82 1591 Rm[WS(rs, 1)] = T6j - T6m;
Chris@82 1592 Rp[WS(rs, 14)] = T6j + T6m;
Chris@82 1593 T7v = T6h - T66;
Chris@82 1594 T7w = T7t - T7s;
Chris@82 1595 Im[WS(rs, 1)] = T7v - T7w;
Chris@82 1596 Ip[WS(rs, 14)] = T7v + T7w;
Chris@82 1597 }
Chris@82 1598 {
Chris@82 1599 E T6r, T6y, T7j, T7o;
Chris@82 1600 T6r = T6n + T6q;
Chris@82 1601 T6y = T6u + T6x;
Chris@82 1602 Rm[WS(rs, 13)] = T6r - T6y;
Chris@82 1603 Rp[WS(rs, 2)] = T6r + T6y;
Chris@82 1604 T7j = T6A + T6B;
Chris@82 1605 T7o = T7k + T7n;
Chris@82 1606 Im[WS(rs, 13)] = T7j - T7o;
Chris@82 1607 Ip[WS(rs, 2)] = T7j + T7o;
Chris@82 1608 }
Chris@82 1609 {
Chris@82 1610 E T6z, T6C, T7p, T7q;
Chris@82 1611 T6z = T6n - T6q;
Chris@82 1612 T6C = T6A - T6B;
Chris@82 1613 Rm[WS(rs, 5)] = T6z - T6C;
Chris@82 1614 Rp[WS(rs, 10)] = T6z + T6C;
Chris@82 1615 T7p = T6x - T6u;
Chris@82 1616 T7q = T7n - T7k;
Chris@82 1617 Im[WS(rs, 5)] = T7p - T7q;
Chris@82 1618 Ip[WS(rs, 10)] = T7p + T7q;
Chris@82 1619 }
Chris@82 1620 }
Chris@82 1621 {
Chris@82 1622 E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
Chris@82 1623 E T4B, T3g, T7P;
Chris@82 1624 T3g = KP707106781 * (T3a - T3f);
Chris@82 1625 T3h = T35 - T3g;
Chris@82 1626 T4D = T35 + T3g;
Chris@82 1627 T7P = KP707106781 * (T4V - T4U);
Chris@82 1628 T7R = T7P + T7Q;
Chris@82 1629 T7X = T7Q - T7P;
Chris@82 1630 {
Chris@82 1631 E T3s, T3D, T4L, T4M;
Chris@82 1632 T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
Chris@82 1633 T3D = FMA(KP382683432, T3x, KP923879532 * T3C);
Chris@82 1634 T3E = T3s - T3D;
Chris@82 1635 T7O = T3s + T3D;
Chris@82 1636 T4L = T4b + T4m;
Chris@82 1637 T4M = T4s + T4v;
Chris@82 1638 T4N = FNMS(KP555570233, T4M, KP831469612 * T4L);
Chris@82 1639 T4R = FMA(KP831469612, T4M, KP555570233 * T4L);
Chris@82 1640 }
Chris@82 1641 {
Chris@82 1642 E T3W, T45, T4E, T4F;
Chris@82 1643 T3W = T3K - T3V;
Chris@82 1644 T45 = T41 - T44;
Chris@82 1645 T46 = FMA(KP980785280, T3W, KP195090322 * T45);
Chris@82 1646 T4A = FNMS(KP980785280, T45, KP195090322 * T3W);
Chris@82 1647 T4E = FMA(KP923879532, T3m, KP382683432 * T3r);
Chris@82 1648 T4F = FNMS(KP923879532, T3x, KP382683432 * T3C);
Chris@82 1649 T4G = T4E + T4F;
Chris@82 1650 T7W = T4F - T4E;
Chris@82 1651 }
Chris@82 1652 {
Chris@82 1653 E T4I, T4J, T4n, T4w;
Chris@82 1654 T4I = T3K + T3V;
Chris@82 1655 T4J = T41 + T44;
Chris@82 1656 T4K = FMA(KP555570233, T4I, KP831469612 * T4J);
Chris@82 1657 T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I);
Chris@82 1658 T4n = T4b - T4m;
Chris@82 1659 T4w = T4s - T4v;
Chris@82 1660 T4x = FNMS(KP980785280, T4w, KP195090322 * T4n);
Chris@82 1661 T4B = FMA(KP195090322, T4w, KP980785280 * T4n);
Chris@82 1662 }
Chris@82 1663 {
Chris@82 1664 E T3F, T4y, T7V, T7Y;
Chris@82 1665 T3F = T3h + T3E;
Chris@82 1666 T4y = T46 + T4x;
Chris@82 1667 Rm[WS(rs, 8)] = T3F - T4y;
Chris@82 1668 Rp[WS(rs, 7)] = T3F + T4y;
Chris@82 1669 T7V = T4A + T4B;
Chris@82 1670 T7Y = T7W + T7X;
Chris@82 1671 Im[WS(rs, 8)] = T7V - T7Y;
Chris@82 1672 Ip[WS(rs, 7)] = T7V + T7Y;
Chris@82 1673 }
Chris@82 1674 {
Chris@82 1675 E T4z, T4C, T7Z, T80;
Chris@82 1676 T4z = T3h - T3E;
Chris@82 1677 T4C = T4A - T4B;
Chris@82 1678 Rm[0] = T4z - T4C;
Chris@82 1679 Rp[WS(rs, 15)] = T4z + T4C;
Chris@82 1680 T7Z = T4x - T46;
Chris@82 1681 T80 = T7X - T7W;
Chris@82 1682 Im[0] = T7Z - T80;
Chris@82 1683 Ip[WS(rs, 15)] = T7Z + T80;
Chris@82 1684 }
Chris@82 1685 {
Chris@82 1686 E T4H, T4O, T7N, T7S;
Chris@82 1687 T4H = T4D + T4G;
Chris@82 1688 T4O = T4K + T4N;
Chris@82 1689 Rm[WS(rs, 12)] = T4H - T4O;
Chris@82 1690 Rp[WS(rs, 3)] = T4H + T4O;
Chris@82 1691 T7N = T4Q + T4R;
Chris@82 1692 T7S = T7O + T7R;
Chris@82 1693 Im[WS(rs, 12)] = T7N - T7S;
Chris@82 1694 Ip[WS(rs, 3)] = T7N + T7S;
Chris@82 1695 }
Chris@82 1696 {
Chris@82 1697 E T4P, T4S, T7T, T7U;
Chris@82 1698 T4P = T4D - T4G;
Chris@82 1699 T4S = T4Q - T4R;
Chris@82 1700 Rm[WS(rs, 4)] = T4P - T4S;
Chris@82 1701 Rp[WS(rs, 11)] = T4P + T4S;
Chris@82 1702 T7T = T4N - T4K;
Chris@82 1703 T7U = T7R - T7O;
Chris@82 1704 Im[WS(rs, 4)] = T7T - T7U;
Chris@82 1705 Ip[WS(rs, 11)] = T7T + T7U;
Chris@82 1706 }
Chris@82 1707 }
Chris@82 1708 {
Chris@82 1709 E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
Chris@82 1710 E T5n, T4W, T7z;
Chris@82 1711 T4W = KP707106781 * (T4U + T4V);
Chris@82 1712 T4X = T4T - T4W;
Chris@82 1713 T5p = T4T + T4W;
Chris@82 1714 T7z = KP707106781 * (T3a + T3f);
Chris@82 1715 T7D = T7z + T7C;
Chris@82 1716 T7J = T7C - T7z;
Chris@82 1717 {
Chris@82 1718 E T50, T53, T5x, T5y;
Chris@82 1719 T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y);
Chris@82 1720 T53 = FMA(KP923879532, T51, KP382683432 * T52);
Chris@82 1721 T54 = T50 - T53;
Chris@82 1722 T7y = T50 + T53;
Chris@82 1723 T5x = T5d + T5e;
Chris@82 1724 T5y = T5g + T5h;
Chris@82 1725 T5z = FNMS(KP195090322, T5y, KP980785280 * T5x);
Chris@82 1726 T5D = FMA(KP195090322, T5x, KP980785280 * T5y);
Chris@82 1727 }
Chris@82 1728 {
Chris@82 1729 E T58, T5b, T5q, T5r;
Chris@82 1730 T58 = T56 - T57;
Chris@82 1731 T5b = T59 - T5a;
Chris@82 1732 T5c = FMA(KP555570233, T58, KP831469612 * T5b);
Chris@82 1733 T5m = FNMS(KP831469612, T58, KP555570233 * T5b);
Chris@82 1734 T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z);
Chris@82 1735 T5r = FNMS(KP382683432, T51, KP923879532 * T52);
Chris@82 1736 T5s = T5q + T5r;
Chris@82 1737 T7I = T5r - T5q;
Chris@82 1738 }
Chris@82 1739 {
Chris@82 1740 E T5u, T5v, T5f, T5i;
Chris@82 1741 T5u = T56 + T57;
Chris@82 1742 T5v = T59 + T5a;
Chris@82 1743 T5w = FMA(KP980785280, T5u, KP195090322 * T5v);
Chris@82 1744 T5C = FNMS(KP195090322, T5u, KP980785280 * T5v);
Chris@82 1745 T5f = T5d - T5e;
Chris@82 1746 T5i = T5g - T5h;
Chris@82 1747 T5j = FNMS(KP831469612, T5i, KP555570233 * T5f);
Chris@82 1748 T5n = FMA(KP831469612, T5f, KP555570233 * T5i);
Chris@82 1749 }
Chris@82 1750 {
Chris@82 1751 E T55, T5k, T7H, T7K;
Chris@82 1752 T55 = T4X + T54;
Chris@82 1753 T5k = T5c + T5j;
Chris@82 1754 Rm[WS(rs, 10)] = T55 - T5k;
Chris@82 1755 Rp[WS(rs, 5)] = T55 + T5k;
Chris@82 1756 T7H = T5m + T5n;
Chris@82 1757 T7K = T7I + T7J;
Chris@82 1758 Im[WS(rs, 10)] = T7H - T7K;
Chris@82 1759 Ip[WS(rs, 5)] = T7H + T7K;
Chris@82 1760 }
Chris@82 1761 {
Chris@82 1762 E T5l, T5o, T7L, T7M;
Chris@82 1763 T5l = T4X - T54;
Chris@82 1764 T5o = T5m - T5n;
Chris@82 1765 Rm[WS(rs, 2)] = T5l - T5o;
Chris@82 1766 Rp[WS(rs, 13)] = T5l + T5o;
Chris@82 1767 T7L = T5j - T5c;
Chris@82 1768 T7M = T7J - T7I;
Chris@82 1769 Im[WS(rs, 2)] = T7L - T7M;
Chris@82 1770 Ip[WS(rs, 13)] = T7L + T7M;
Chris@82 1771 }
Chris@82 1772 {
Chris@82 1773 E T5t, T5A, T7x, T7E;
Chris@82 1774 T5t = T5p + T5s;
Chris@82 1775 T5A = T5w + T5z;
Chris@82 1776 Rm[WS(rs, 14)] = T5t - T5A;
Chris@82 1777 Rp[WS(rs, 1)] = T5t + T5A;
Chris@82 1778 T7x = T5C + T5D;
Chris@82 1779 T7E = T7y + T7D;
Chris@82 1780 Im[WS(rs, 14)] = T7x - T7E;
Chris@82 1781 Ip[WS(rs, 1)] = T7x + T7E;
Chris@82 1782 }
Chris@82 1783 {
Chris@82 1784 E T5B, T5E, T7F, T7G;
Chris@82 1785 T5B = T5p - T5s;
Chris@82 1786 T5E = T5C - T5D;
Chris@82 1787 Rm[WS(rs, 6)] = T5B - T5E;
Chris@82 1788 Rp[WS(rs, 9)] = T5B + T5E;
Chris@82 1789 T7F = T5z - T5w;
Chris@82 1790 T7G = T7D - T7y;
Chris@82 1791 Im[WS(rs, 6)] = T7F - T7G;
Chris@82 1792 Ip[WS(rs, 9)] = T7F + T7G;
Chris@82 1793 }
Chris@82 1794 }
Chris@82 1795 }
Chris@82 1796 }
Chris@82 1797 }
Chris@82 1798
Chris@82 1799 static const tw_instr twinstr[] = {
Chris@82 1800 {TW_FULL, 1, 32},
Chris@82 1801 {TW_NEXT, 1, 0}
Chris@82 1802 };
Chris@82 1803
Chris@82 1804 static const hc2c_desc desc = { 32, "hc2cf_32", twinstr, &GENUS, {340, 114, 94, 0} };
Chris@82 1805
Chris@82 1806 void X(codelet_hc2cf_32) (planner *p) {
Chris@82 1807 X(khc2c_register) (p, hc2cf_32, &desc, HC2C_VIA_RDFT);
Chris@82 1808 }
Chris@82 1809 #endif