annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cbdft2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:08:00 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft2_32 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 498 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 300 additions, 62 multiplications, 198 fused multiply/add),
Chris@82 33 * 122 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cbdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 40 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 41 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 44 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 45 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 49 E T3h, T4B, Tv, T3K, T6T, T8Y, T7i, T8L, T7f, T8X, T1G, T4Y, T1j, T4K, T2M;
Chris@82 50 E T4X, T6d, T8C, T66, T8o, T6M, T8K, T2P, T4L, T3o, T4C, T4q, T5q, T6C, T8p;
Chris@82 51 E T6z, T8B, TK, TZ, T10, T32, T39, T3L, T4t, T4E, T8t, T8F, T4w, T4F, T8w;
Chris@82 52 E T8E, T6l, T6E, T6s, T6F, T28, T51, T2R, T4P, T71, T90, T7k, T8P, T2z, T50;
Chris@82 53 E T2S, T4S, T78, T91, T7l, T8S;
Chris@82 54 {
Chris@82 55 E T16, T3l, T2H, T3m, T3, T6, T7, T2E, T13, Ta, Td, Te, T1c, T3j, T3i;
Chris@82 56 E T2J, T1h, T2K, Tt, T6Q, T6R, T1z, T1E, T6a, T6b, T3g, Tm, T6N, T6O, T1o;
Chris@82 57 E T1t, T67, T68, T3d, T4o, T4p;
Chris@82 58 {
Chris@82 59 E T14, T15, T2F, T2G;
Chris@82 60 T14 = Ip[0];
Chris@82 61 T15 = Im[WS(rs, 15)];
Chris@82 62 T16 = T14 + T15;
Chris@82 63 T3l = T14 - T15;
Chris@82 64 T2F = Ip[WS(rs, 8)];
Chris@82 65 T2G = Im[WS(rs, 7)];
Chris@82 66 T2H = T2F + T2G;
Chris@82 67 T3m = T2F - T2G;
Chris@82 68 {
Chris@82 69 E T1, T2, T4, T5;
Chris@82 70 T1 = Rp[0];
Chris@82 71 T2 = Rm[WS(rs, 15)];
Chris@82 72 T3 = T1 + T2;
Chris@82 73 T4 = Rp[WS(rs, 8)];
Chris@82 74 T5 = Rm[WS(rs, 7)];
Chris@82 75 T6 = T4 + T5;
Chris@82 76 T7 = T3 + T6;
Chris@82 77 T2E = T1 - T2;
Chris@82 78 T13 = T4 - T5;
Chris@82 79 }
Chris@82 80 }
Chris@82 81 {
Chris@82 82 E T19, T1a, T1b, T18, T1e, T1f, T1g, T1d;
Chris@82 83 {
Chris@82 84 E T8, T9, Tb, Tc;
Chris@82 85 T19 = Ip[WS(rs, 4)];
Chris@82 86 T1a = Im[WS(rs, 11)];
Chris@82 87 T1b = T19 + T1a;
Chris@82 88 T8 = Rp[WS(rs, 4)];
Chris@82 89 T9 = Rm[WS(rs, 11)];
Chris@82 90 Ta = T8 + T9;
Chris@82 91 T18 = T8 - T9;
Chris@82 92 T1e = Im[WS(rs, 3)];
Chris@82 93 T1f = Ip[WS(rs, 12)];
Chris@82 94 T1g = T1e + T1f;
Chris@82 95 Tb = Rm[WS(rs, 3)];
Chris@82 96 Tc = Rp[WS(rs, 12)];
Chris@82 97 Td = Tb + Tc;
Chris@82 98 T1d = Tb - Tc;
Chris@82 99 }
Chris@82 100 Te = Ta + Td;
Chris@82 101 T1c = T18 + T1b;
Chris@82 102 T3j = T1f - T1e;
Chris@82 103 T3i = T19 - T1a;
Chris@82 104 T2J = T18 - T1b;
Chris@82 105 T1h = T1d + T1g;
Chris@82 106 T2K = T1d - T1g;
Chris@82 107 }
Chris@82 108 {
Chris@82 109 E Tp, T1A, T1y, T3e, Ts, T1v, T1D, T3f;
Chris@82 110 {
Chris@82 111 E Tn, To, T1w, T1x;
Chris@82 112 Tn = Rm[WS(rs, 1)];
Chris@82 113 To = Rp[WS(rs, 14)];
Chris@82 114 Tp = Tn + To;
Chris@82 115 T1A = Tn - To;
Chris@82 116 T1w = Im[WS(rs, 1)];
Chris@82 117 T1x = Ip[WS(rs, 14)];
Chris@82 118 T1y = T1w + T1x;
Chris@82 119 T3e = T1x - T1w;
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E Tq, Tr, T1B, T1C;
Chris@82 123 Tq = Rp[WS(rs, 6)];
Chris@82 124 Tr = Rm[WS(rs, 9)];
Chris@82 125 Ts = Tq + Tr;
Chris@82 126 T1v = Tq - Tr;
Chris@82 127 T1B = Ip[WS(rs, 6)];
Chris@82 128 T1C = Im[WS(rs, 9)];
Chris@82 129 T1D = T1B + T1C;
Chris@82 130 T3f = T1B - T1C;
Chris@82 131 }
Chris@82 132 Tt = Tp + Ts;
Chris@82 133 T6Q = T1A + T1D;
Chris@82 134 T6R = T1v + T1y;
Chris@82 135 T1z = T1v - T1y;
Chris@82 136 T1E = T1A - T1D;
Chris@82 137 T6a = Tp - Ts;
Chris@82 138 T6b = T3e - T3f;
Chris@82 139 T3g = T3e + T3f;
Chris@82 140 }
Chris@82 141 {
Chris@82 142 E Ti, T1p, T1n, T3b, Tl, T1k, T1s, T3c;
Chris@82 143 {
Chris@82 144 E Tg, Th, T1l, T1m;
Chris@82 145 Tg = Rp[WS(rs, 2)];
Chris@82 146 Th = Rm[WS(rs, 13)];
Chris@82 147 Ti = Tg + Th;
Chris@82 148 T1p = Tg - Th;
Chris@82 149 T1l = Ip[WS(rs, 2)];
Chris@82 150 T1m = Im[WS(rs, 13)];
Chris@82 151 T1n = T1l + T1m;
Chris@82 152 T3b = T1l - T1m;
Chris@82 153 }
Chris@82 154 {
Chris@82 155 E Tj, Tk, T1q, T1r;
Chris@82 156 Tj = Rp[WS(rs, 10)];
Chris@82 157 Tk = Rm[WS(rs, 5)];
Chris@82 158 Tl = Tj + Tk;
Chris@82 159 T1k = Tj - Tk;
Chris@82 160 T1q = Ip[WS(rs, 10)];
Chris@82 161 T1r = Im[WS(rs, 5)];
Chris@82 162 T1s = T1q + T1r;
Chris@82 163 T3c = T1q - T1r;
Chris@82 164 }
Chris@82 165 Tm = Ti + Tl;
Chris@82 166 T6N = T1p + T1s;
Chris@82 167 T6O = T1n - T1k;
Chris@82 168 T1o = T1k + T1n;
Chris@82 169 T1t = T1p - T1s;
Chris@82 170 T67 = Ti - Tl;
Chris@82 171 T68 = T3b - T3c;
Chris@82 172 T3d = T3b + T3c;
Chris@82 173 }
Chris@82 174 T3h = T3d + T3g;
Chris@82 175 T4B = Tm - Tt;
Chris@82 176 {
Chris@82 177 E Tf, Tu, T6P, T6S;
Chris@82 178 Tf = T7 + Te;
Chris@82 179 Tu = Tm + Tt;
Chris@82 180 Tv = Tf + Tu;
Chris@82 181 T3K = Tf - Tu;
Chris@82 182 T6P = FMA(KP414213562, T6O, T6N);
Chris@82 183 T6S = FMA(KP414213562, T6R, T6Q);
Chris@82 184 T6T = T6P - T6S;
Chris@82 185 T8Y = T6P + T6S;
Chris@82 186 }
Chris@82 187 {
Chris@82 188 E T7g, T7h, T7d, T7e;
Chris@82 189 T7g = FNMS(KP414213562, T6N, T6O);
Chris@82 190 T7h = FNMS(KP414213562, T6Q, T6R);
Chris@82 191 T7i = T7g + T7h;
Chris@82 192 T8L = T7h - T7g;
Chris@82 193 T7d = T2E + T2H;
Chris@82 194 T7e = T1c + T1h;
Chris@82 195 T7f = FNMS(KP707106781, T7e, T7d);
Chris@82 196 T8X = FMA(KP707106781, T7e, T7d);
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T1u, T1F, T17, T1i;
Chris@82 200 T1u = FMA(KP414213562, T1t, T1o);
Chris@82 201 T1F = FNMS(KP414213562, T1E, T1z);
Chris@82 202 T1G = T1u + T1F;
Chris@82 203 T4Y = T1F - T1u;
Chris@82 204 T17 = T13 + T16;
Chris@82 205 T1i = T1c - T1h;
Chris@82 206 T1j = FMA(KP707106781, T1i, T17);
Chris@82 207 T4K = FNMS(KP707106781, T1i, T17);
Chris@82 208 }
Chris@82 209 {
Chris@82 210 E T2I, T2L, T69, T6c;
Chris@82 211 T2I = T2E - T2H;
Chris@82 212 T2L = T2J + T2K;
Chris@82 213 T2M = FMA(KP707106781, T2L, T2I);
Chris@82 214 T4X = FNMS(KP707106781, T2L, T2I);
Chris@82 215 T69 = T67 - T68;
Chris@82 216 T6c = T6a + T6b;
Chris@82 217 T6d = T69 + T6c;
Chris@82 218 T8C = T69 - T6c;
Chris@82 219 }
Chris@82 220 {
Chris@82 221 E T64, T65, T6K, T6L;
Chris@82 222 T64 = T3 - T6;
Chris@82 223 T65 = T3j - T3i;
Chris@82 224 T66 = T64 + T65;
Chris@82 225 T8o = T64 - T65;
Chris@82 226 T6K = T16 - T13;
Chris@82 227 T6L = T2J - T2K;
Chris@82 228 T6M = FMA(KP707106781, T6L, T6K);
Chris@82 229 T8K = FNMS(KP707106781, T6L, T6K);
Chris@82 230 }
Chris@82 231 {
Chris@82 232 E T2N, T2O, T3k, T3n;
Chris@82 233 T2N = FNMS(KP414213562, T1o, T1t);
Chris@82 234 T2O = FMA(KP414213562, T1z, T1E);
Chris@82 235 T2P = T2N + T2O;
Chris@82 236 T4L = T2N - T2O;
Chris@82 237 T3k = T3i + T3j;
Chris@82 238 T3n = T3l + T3m;
Chris@82 239 T3o = T3k + T3n;
Chris@82 240 T4C = T3n - T3k;
Chris@82 241 }
Chris@82 242 T4o = T7 - Te;
Chris@82 243 T4p = T3g - T3d;
Chris@82 244 T4q = T4o + T4p;
Chris@82 245 T5q = T4o - T4p;
Chris@82 246 {
Chris@82 247 E T6A, T6B, T6x, T6y;
Chris@82 248 T6A = T67 + T68;
Chris@82 249 T6B = T6b - T6a;
Chris@82 250 T6C = T6A + T6B;
Chris@82 251 T8p = T6B - T6A;
Chris@82 252 T6x = Ta - Td;
Chris@82 253 T6y = T3l - T3m;
Chris@82 254 T6z = T6x + T6y;
Chris@82 255 T8B = T6y - T6x;
Chris@82 256 }
Chris@82 257 }
Chris@82 258 {
Chris@82 259 E TC, T6V, T6Y, T1M, T23, T6f, T6j, T31, TY, T6n, T6p, T2i, T2n, T2w, T35;
Chris@82 260 E T2v, TJ, T6g, T6i, T1R, T1W, T25, T2Y, T24, TR, T72, T75, T2d, T2u, T6m;
Chris@82 261 E T6q, T38;
Chris@82 262 {
Chris@82 263 E Ty, T1Z, T1L, T2Z, TB, T1I, T22, T30;
Chris@82 264 {
Chris@82 265 E Tw, Tx, T1J, T1K;
Chris@82 266 Tw = Rp[WS(rs, 1)];
Chris@82 267 Tx = Rm[WS(rs, 14)];
Chris@82 268 Ty = Tw + Tx;
Chris@82 269 T1Z = Tw - Tx;
Chris@82 270 T1J = Ip[WS(rs, 1)];
Chris@82 271 T1K = Im[WS(rs, 14)];
Chris@82 272 T1L = T1J + T1K;
Chris@82 273 T2Z = T1J - T1K;
Chris@82 274 }
Chris@82 275 {
Chris@82 276 E Tz, TA, T20, T21;
Chris@82 277 Tz = Rp[WS(rs, 9)];
Chris@82 278 TA = Rm[WS(rs, 6)];
Chris@82 279 TB = Tz + TA;
Chris@82 280 T1I = Tz - TA;
Chris@82 281 T20 = Ip[WS(rs, 9)];
Chris@82 282 T21 = Im[WS(rs, 6)];
Chris@82 283 T22 = T20 + T21;
Chris@82 284 T30 = T20 - T21;
Chris@82 285 }
Chris@82 286 TC = Ty + TB;
Chris@82 287 T6V = T1L - T1I;
Chris@82 288 T6Y = T1Z + T22;
Chris@82 289 T1M = T1I + T1L;
Chris@82 290 T23 = T1Z - T22;
Chris@82 291 T6f = Ty - TB;
Chris@82 292 T6j = T2Z - T30;
Chris@82 293 T31 = T2Z + T30;
Chris@82 294 }
Chris@82 295 {
Chris@82 296 E TU, T2e, T2h, T33, TX, T2j, T2m, T34;
Chris@82 297 {
Chris@82 298 E TS, TT, T2f, T2g;
Chris@82 299 TS = Rp[WS(rs, 3)];
Chris@82 300 TT = Rm[WS(rs, 12)];
Chris@82 301 TU = TS + TT;
Chris@82 302 T2e = TS - TT;
Chris@82 303 T2f = Ip[WS(rs, 3)];
Chris@82 304 T2g = Im[WS(rs, 12)];
Chris@82 305 T2h = T2f + T2g;
Chris@82 306 T33 = T2f - T2g;
Chris@82 307 }
Chris@82 308 {
Chris@82 309 E TV, TW, T2k, T2l;
Chris@82 310 TV = Rm[WS(rs, 4)];
Chris@82 311 TW = Rp[WS(rs, 11)];
Chris@82 312 TX = TV + TW;
Chris@82 313 T2j = TV - TW;
Chris@82 314 T2k = Im[WS(rs, 4)];
Chris@82 315 T2l = Ip[WS(rs, 11)];
Chris@82 316 T2m = T2k + T2l;
Chris@82 317 T34 = T2l - T2k;
Chris@82 318 }
Chris@82 319 TY = TU + TX;
Chris@82 320 T6n = T34 - T33;
Chris@82 321 T6p = TU - TX;
Chris@82 322 T2i = T2e + T2h;
Chris@82 323 T2n = T2j + T2m;
Chris@82 324 T2w = T2j - T2m;
Chris@82 325 T35 = T33 + T34;
Chris@82 326 T2v = T2e - T2h;
Chris@82 327 }
Chris@82 328 {
Chris@82 329 E TF, T1N, T1Q, T2W, TI, T1S, T1V, T2X;
Chris@82 330 {
Chris@82 331 E TD, TE, T1O, T1P;
Chris@82 332 TD = Rp[WS(rs, 5)];
Chris@82 333 TE = Rm[WS(rs, 10)];
Chris@82 334 TF = TD + TE;
Chris@82 335 T1N = TD - TE;
Chris@82 336 T1O = Ip[WS(rs, 5)];
Chris@82 337 T1P = Im[WS(rs, 10)];
Chris@82 338 T1Q = T1O + T1P;
Chris@82 339 T2W = T1O - T1P;
Chris@82 340 }
Chris@82 341 {
Chris@82 342 E TG, TH, T1T, T1U;
Chris@82 343 TG = Rm[WS(rs, 2)];
Chris@82 344 TH = Rp[WS(rs, 13)];
Chris@82 345 TI = TG + TH;
Chris@82 346 T1S = TG - TH;
Chris@82 347 T1T = Im[WS(rs, 2)];
Chris@82 348 T1U = Ip[WS(rs, 13)];
Chris@82 349 T1V = T1T + T1U;
Chris@82 350 T2X = T1U - T1T;
Chris@82 351 }
Chris@82 352 TJ = TF + TI;
Chris@82 353 T6g = T2X - T2W;
Chris@82 354 T6i = TF - TI;
Chris@82 355 T1R = T1N + T1Q;
Chris@82 356 T1W = T1S + T1V;
Chris@82 357 T25 = T1S - T1V;
Chris@82 358 T2Y = T2W + T2X;
Chris@82 359 T24 = T1N - T1Q;
Chris@82 360 }
Chris@82 361 {
Chris@82 362 E TN, T2q, T2c, T36, TQ, T29, T2t, T37;
Chris@82 363 {
Chris@82 364 E TL, TM, T2a, T2b;
Chris@82 365 TL = Rm[0];
Chris@82 366 TM = Rp[WS(rs, 15)];
Chris@82 367 TN = TL + TM;
Chris@82 368 T2q = TL - TM;
Chris@82 369 T2a = Im[0];
Chris@82 370 T2b = Ip[WS(rs, 15)];
Chris@82 371 T2c = T2a + T2b;
Chris@82 372 T36 = T2b - T2a;
Chris@82 373 }
Chris@82 374 {
Chris@82 375 E TO, TP, T2r, T2s;
Chris@82 376 TO = Rp[WS(rs, 7)];
Chris@82 377 TP = Rm[WS(rs, 8)];
Chris@82 378 TQ = TO + TP;
Chris@82 379 T29 = TO - TP;
Chris@82 380 T2r = Ip[WS(rs, 7)];
Chris@82 381 T2s = Im[WS(rs, 8)];
Chris@82 382 T2t = T2r + T2s;
Chris@82 383 T37 = T2r - T2s;
Chris@82 384 }
Chris@82 385 TR = TN + TQ;
Chris@82 386 T72 = T29 + T2c;
Chris@82 387 T75 = T2q + T2t;
Chris@82 388 T2d = T29 - T2c;
Chris@82 389 T2u = T2q - T2t;
Chris@82 390 T6m = TN - TQ;
Chris@82 391 T6q = T36 - T37;
Chris@82 392 T38 = T36 + T37;
Chris@82 393 }
Chris@82 394 {
Chris@82 395 E T4r, T4s, T8r, T8s;
Chris@82 396 TK = TC + TJ;
Chris@82 397 TZ = TR + TY;
Chris@82 398 T10 = TK + TZ;
Chris@82 399 T32 = T2Y + T31;
Chris@82 400 T39 = T35 + T38;
Chris@82 401 T3L = T39 - T32;
Chris@82 402 T4r = TC - TJ;
Chris@82 403 T4s = T31 - T2Y;
Chris@82 404 T4t = T4r - T4s;
Chris@82 405 T4E = T4r + T4s;
Chris@82 406 T8r = T6q - T6p;
Chris@82 407 T8s = T6m - T6n;
Chris@82 408 T8t = FMA(KP414213562, T8s, T8r);
Chris@82 409 T8F = FNMS(KP414213562, T8r, T8s);
Chris@82 410 {
Chris@82 411 E T4u, T4v, T8u, T8v;
Chris@82 412 T4u = TR - TY;
Chris@82 413 T4v = T38 - T35;
Chris@82 414 T4w = T4u + T4v;
Chris@82 415 T4F = T4v - T4u;
Chris@82 416 T8u = T6j - T6i;
Chris@82 417 T8v = T6f - T6g;
Chris@82 418 T8w = FNMS(KP414213562, T8v, T8u);
Chris@82 419 T8E = FMA(KP414213562, T8u, T8v);
Chris@82 420 }
Chris@82 421 }
Chris@82 422 {
Chris@82 423 E T6h, T6k, T6o, T6r;
Chris@82 424 T6h = T6f + T6g;
Chris@82 425 T6k = T6i + T6j;
Chris@82 426 T6l = FNMS(KP414213562, T6k, T6h);
Chris@82 427 T6E = FMA(KP414213562, T6h, T6k);
Chris@82 428 T6o = T6m + T6n;
Chris@82 429 T6r = T6p + T6q;
Chris@82 430 T6s = FMA(KP414213562, T6r, T6o);
Chris@82 431 T6F = FNMS(KP414213562, T6o, T6r);
Chris@82 432 {
Chris@82 433 E T1Y, T4O, T27, T4N, T1X, T26;
Chris@82 434 T1X = T1R - T1W;
Chris@82 435 T1Y = FMA(KP707106781, T1X, T1M);
Chris@82 436 T4O = FNMS(KP707106781, T1X, T1M);
Chris@82 437 T26 = T24 + T25;
Chris@82 438 T27 = FMA(KP707106781, T26, T23);
Chris@82 439 T4N = FNMS(KP707106781, T26, T23);
Chris@82 440 T28 = FMA(KP198912367, T27, T1Y);
Chris@82 441 T51 = FNMS(KP668178637, T4N, T4O);
Chris@82 442 T2R = FNMS(KP198912367, T1Y, T27);
Chris@82 443 T4P = FMA(KP668178637, T4O, T4N);
Chris@82 444 }
Chris@82 445 }
Chris@82 446 {
Chris@82 447 E T6X, T8O, T70, T8N, T6W, T6Z;
Chris@82 448 T6W = T25 - T24;
Chris@82 449 T6X = FNMS(KP707106781, T6W, T6V);
Chris@82 450 T8O = FMA(KP707106781, T6W, T6V);
Chris@82 451 T6Z = T1R + T1W;
Chris@82 452 T70 = FNMS(KP707106781, T6Z, T6Y);
Chris@82 453 T8N = FMA(KP707106781, T6Z, T6Y);
Chris@82 454 T71 = FMA(KP668178637, T70, T6X);
Chris@82 455 T90 = FNMS(KP198912367, T8N, T8O);
Chris@82 456 T7k = FNMS(KP668178637, T6X, T70);
Chris@82 457 T8P = FMA(KP198912367, T8O, T8N);
Chris@82 458 }
Chris@82 459 {
Chris@82 460 E T2p, T4R, T2y, T4Q, T2o, T2x;
Chris@82 461 T2o = T2i - T2n;
Chris@82 462 T2p = FMA(KP707106781, T2o, T2d);
Chris@82 463 T4R = FNMS(KP707106781, T2o, T2d);
Chris@82 464 T2x = T2v + T2w;
Chris@82 465 T2y = FMA(KP707106781, T2x, T2u);
Chris@82 466 T4Q = FNMS(KP707106781, T2x, T2u);
Chris@82 467 T2z = FNMS(KP198912367, T2y, T2p);
Chris@82 468 T50 = FMA(KP668178637, T4Q, T4R);
Chris@82 469 T2S = FMA(KP198912367, T2p, T2y);
Chris@82 470 T4S = FNMS(KP668178637, T4R, T4Q);
Chris@82 471 }
Chris@82 472 {
Chris@82 473 E T74, T8R, T77, T8Q, T73, T76;
Chris@82 474 T73 = T2v - T2w;
Chris@82 475 T74 = FNMS(KP707106781, T73, T72);
Chris@82 476 T8R = FMA(KP707106781, T73, T72);
Chris@82 477 T76 = T2i + T2n;
Chris@82 478 T77 = FNMS(KP707106781, T76, T75);
Chris@82 479 T8Q = FMA(KP707106781, T76, T75);
Chris@82 480 T78 = FMA(KP668178637, T77, T74);
Chris@82 481 T91 = FNMS(KP198912367, T8Q, T8R);
Chris@82 482 T7l = FNMS(KP668178637, T74, T77);
Chris@82 483 T8S = FMA(KP198912367, T8R, T8Q);
Chris@82 484 }
Chris@82 485 }
Chris@82 486 {
Chris@82 487 E T11, T3q, T3x, T3t, T3v, T3w, T3F, T2B, T3A, T2U, T3D, T2C, T3r, T3B, T3H;
Chris@82 488 E T2V, T3s, T2D;
Chris@82 489 {
Chris@82 490 E T3a, T3p, T3u, T12, T3z;
Chris@82 491 T11 = Tv + T10;
Chris@82 492 T3a = T32 + T39;
Chris@82 493 T3p = T3h + T3o;
Chris@82 494 T3q = T3a + T3p;
Chris@82 495 T3x = T3p - T3a;
Chris@82 496 T3u = Tv - T10;
Chris@82 497 T3t = W[30];
Chris@82 498 T3v = T3t * T3u;
Chris@82 499 T3w = W[31];
Chris@82 500 T3F = T3w * T3u;
Chris@82 501 {
Chris@82 502 E T1H, T2A, T2Q, T2T;
Chris@82 503 T1H = FMA(KP923879532, T1G, T1j);
Chris@82 504 T2A = T28 + T2z;
Chris@82 505 T2B = FMA(KP980785280, T2A, T1H);
Chris@82 506 T3A = FNMS(KP980785280, T2A, T1H);
Chris@82 507 T2Q = FMA(KP923879532, T2P, T2M);
Chris@82 508 T2T = T2R + T2S;
Chris@82 509 T2U = FMA(KP980785280, T2T, T2Q);
Chris@82 510 T3D = FNMS(KP980785280, T2T, T2Q);
Chris@82 511 }
Chris@82 512 T12 = W[0];
Chris@82 513 T2C = T12 * T2B;
Chris@82 514 T3r = T12 * T2U;
Chris@82 515 T3z = W[32];
Chris@82 516 T3B = T3z * T3A;
Chris@82 517 T3H = T3z * T3D;
Chris@82 518 }
Chris@82 519 T2D = W[1];
Chris@82 520 T2V = FMA(T2D, T2U, T2C);
Chris@82 521 T3s = FNMS(T2D, T2B, T3r);
Chris@82 522 Rp[0] = T11 - T2V;
Chris@82 523 Ip[0] = T3q + T3s;
Chris@82 524 Rm[0] = T11 + T2V;
Chris@82 525 Im[0] = T3s - T3q;
Chris@82 526 {
Chris@82 527 E T3y, T3G, T3E, T3I, T3C;
Chris@82 528 T3y = FNMS(T3w, T3x, T3v);
Chris@82 529 T3G = FMA(T3t, T3x, T3F);
Chris@82 530 T3C = W[33];
Chris@82 531 T3E = FMA(T3C, T3D, T3B);
Chris@82 532 T3I = FNMS(T3C, T3A, T3H);
Chris@82 533 Rp[WS(rs, 8)] = T3y - T3E;
Chris@82 534 Ip[WS(rs, 8)] = T3G + T3I;
Chris@82 535 Rm[WS(rs, 8)] = T3y + T3E;
Chris@82 536 Im[WS(rs, 8)] = T3I - T3G;
Chris@82 537 }
Chris@82 538 }
Chris@82 539 {
Chris@82 540 E T3R, T4b, T47, T49, T4a, T4j, T3J, T3N, T3O, T43, T3W, T4e, T41, T4h, T3X;
Chris@82 541 E T45, T4f, T4l;
Chris@82 542 {
Chris@82 543 E T3P, T3Q, T48, T3M, T3T, T4d;
Chris@82 544 T3P = TK - TZ;
Chris@82 545 T3Q = T3o - T3h;
Chris@82 546 T3R = T3P + T3Q;
Chris@82 547 T4b = T3Q - T3P;
Chris@82 548 T48 = T3K - T3L;
Chris@82 549 T47 = W[46];
Chris@82 550 T49 = T47 * T48;
Chris@82 551 T4a = W[47];
Chris@82 552 T4j = T4a * T48;
Chris@82 553 T3M = T3K + T3L;
Chris@82 554 T3J = W[14];
Chris@82 555 T3N = T3J * T3M;
Chris@82 556 T3O = W[15];
Chris@82 557 T43 = T3O * T3M;
Chris@82 558 {
Chris@82 559 E T3U, T3V, T3Z, T40;
Chris@82 560 T3U = FNMS(KP923879532, T1G, T1j);
Chris@82 561 T3V = T2R - T2S;
Chris@82 562 T3W = FMA(KP980785280, T3V, T3U);
Chris@82 563 T4e = FNMS(KP980785280, T3V, T3U);
Chris@82 564 T3Z = FNMS(KP923879532, T2P, T2M);
Chris@82 565 T40 = T2z - T28;
Chris@82 566 T41 = FMA(KP980785280, T40, T3Z);
Chris@82 567 T4h = FNMS(KP980785280, T40, T3Z);
Chris@82 568 }
Chris@82 569 T3T = W[16];
Chris@82 570 T3X = T3T * T3W;
Chris@82 571 T45 = T3T * T41;
Chris@82 572 T4d = W[48];
Chris@82 573 T4f = T4d * T4e;
Chris@82 574 T4l = T4d * T4h;
Chris@82 575 }
Chris@82 576 {
Chris@82 577 E T3S, T44, T42, T46, T3Y;
Chris@82 578 T3S = FNMS(T3O, T3R, T3N);
Chris@82 579 T44 = FMA(T3J, T3R, T43);
Chris@82 580 T3Y = W[17];
Chris@82 581 T42 = FMA(T3Y, T41, T3X);
Chris@82 582 T46 = FNMS(T3Y, T3W, T45);
Chris@82 583 Rp[WS(rs, 4)] = T3S - T42;
Chris@82 584 Ip[WS(rs, 4)] = T44 + T46;
Chris@82 585 Rm[WS(rs, 4)] = T3S + T42;
Chris@82 586 Im[WS(rs, 4)] = T46 - T44;
Chris@82 587 }
Chris@82 588 {
Chris@82 589 E T4c, T4k, T4i, T4m, T4g;
Chris@82 590 T4c = FNMS(T4a, T4b, T49);
Chris@82 591 T4k = FMA(T47, T4b, T4j);
Chris@82 592 T4g = W[49];
Chris@82 593 T4i = FMA(T4g, T4h, T4f);
Chris@82 594 T4m = FNMS(T4g, T4e, T4l);
Chris@82 595 Rp[WS(rs, 12)] = T4c - T4i;
Chris@82 596 Ip[WS(rs, 12)] = T4k + T4m;
Chris@82 597 Rm[WS(rs, 12)] = T4c + T4i;
Chris@82 598 Im[WS(rs, 12)] = T4m - T4k;
Chris@82 599 }
Chris@82 600 }
Chris@82 601 {
Chris@82 602 E T4H, T5d, T4n, T4z, T4A, T55, T59, T5b, T5c, T5l, T4U, T5g, T53, T5j, T4V;
Chris@82 603 E T57, T5h, T5n, T4D, T4G;
Chris@82 604 T4D = T4B + T4C;
Chris@82 605 T4G = T4E + T4F;
Chris@82 606 T4H = FMA(KP707106781, T4G, T4D);
Chris@82 607 T5d = FNMS(KP707106781, T4G, T4D);
Chris@82 608 {
Chris@82 609 E T4y, T5a, T4x, T4J, T5f;
Chris@82 610 T4x = T4t + T4w;
Chris@82 611 T4y = FMA(KP707106781, T4x, T4q);
Chris@82 612 T5a = FNMS(KP707106781, T4x, T4q);
Chris@82 613 T4n = W[6];
Chris@82 614 T4z = T4n * T4y;
Chris@82 615 T4A = W[7];
Chris@82 616 T55 = T4A * T4y;
Chris@82 617 T59 = W[38];
Chris@82 618 T5b = T59 * T5a;
Chris@82 619 T5c = W[39];
Chris@82 620 T5l = T5c * T5a;
Chris@82 621 {
Chris@82 622 E T4M, T4T, T4Z, T52;
Chris@82 623 T4M = FMA(KP923879532, T4L, T4K);
Chris@82 624 T4T = T4P - T4S;
Chris@82 625 T4U = FMA(KP831469612, T4T, T4M);
Chris@82 626 T5g = FNMS(KP831469612, T4T, T4M);
Chris@82 627 T4Z = FMA(KP923879532, T4Y, T4X);
Chris@82 628 T52 = T50 - T51;
Chris@82 629 T53 = FMA(KP831469612, T52, T4Z);
Chris@82 630 T5j = FNMS(KP831469612, T52, T4Z);
Chris@82 631 }
Chris@82 632 T4J = W[8];
Chris@82 633 T4V = T4J * T4U;
Chris@82 634 T57 = T4J * T53;
Chris@82 635 T5f = W[40];
Chris@82 636 T5h = T5f * T5g;
Chris@82 637 T5n = T5f * T5j;
Chris@82 638 }
Chris@82 639 {
Chris@82 640 E T4I, T56, T54, T58, T4W;
Chris@82 641 T4I = FNMS(T4A, T4H, T4z);
Chris@82 642 T56 = FMA(T4n, T4H, T55);
Chris@82 643 T4W = W[9];
Chris@82 644 T54 = FMA(T4W, T53, T4V);
Chris@82 645 T58 = FNMS(T4W, T4U, T57);
Chris@82 646 Rp[WS(rs, 2)] = T4I - T54;
Chris@82 647 Ip[WS(rs, 2)] = T56 + T58;
Chris@82 648 Rm[WS(rs, 2)] = T4I + T54;
Chris@82 649 Im[WS(rs, 2)] = T58 - T56;
Chris@82 650 }
Chris@82 651 {
Chris@82 652 E T5e, T5m, T5k, T5o, T5i;
Chris@82 653 T5e = FNMS(T5c, T5d, T5b);
Chris@82 654 T5m = FMA(T59, T5d, T5l);
Chris@82 655 T5i = W[41];
Chris@82 656 T5k = FMA(T5i, T5j, T5h);
Chris@82 657 T5o = FNMS(T5i, T5g, T5n);
Chris@82 658 Rp[WS(rs, 10)] = T5e - T5k;
Chris@82 659 Ip[WS(rs, 10)] = T5m + T5o;
Chris@82 660 Rm[WS(rs, 10)] = T5e + T5k;
Chris@82 661 Im[WS(rs, 10)] = T5o - T5m;
Chris@82 662 }
Chris@82 663 }
Chris@82 664 {
Chris@82 665 E T5x, T5R, T5p, T5t, T5u, T5J, T5N, T5P, T5Q, T5Z, T5C, T5U, T5H, T5X, T5D;
Chris@82 666 E T5L, T5V, T61, T5v, T5w;
Chris@82 667 T5v = T4C - T4B;
Chris@82 668 T5w = T4t - T4w;
Chris@82 669 T5x = FMA(KP707106781, T5w, T5v);
Chris@82 670 T5R = FNMS(KP707106781, T5w, T5v);
Chris@82 671 {
Chris@82 672 E T5s, T5O, T5r, T5z, T5T;
Chris@82 673 T5r = T4F - T4E;
Chris@82 674 T5s = FMA(KP707106781, T5r, T5q);
Chris@82 675 T5O = FNMS(KP707106781, T5r, T5q);
Chris@82 676 T5p = W[22];
Chris@82 677 T5t = T5p * T5s;
Chris@82 678 T5u = W[23];
Chris@82 679 T5J = T5u * T5s;
Chris@82 680 T5N = W[54];
Chris@82 681 T5P = T5N * T5O;
Chris@82 682 T5Q = W[55];
Chris@82 683 T5Z = T5Q * T5O;
Chris@82 684 {
Chris@82 685 E T5A, T5B, T5F, T5G;
Chris@82 686 T5A = FNMS(KP923879532, T4L, T4K);
Chris@82 687 T5B = T51 + T50;
Chris@82 688 T5C = FNMS(KP831469612, T5B, T5A);
Chris@82 689 T5U = FMA(KP831469612, T5B, T5A);
Chris@82 690 T5F = FNMS(KP923879532, T4Y, T4X);
Chris@82 691 T5G = T4P + T4S;
Chris@82 692 T5H = FNMS(KP831469612, T5G, T5F);
Chris@82 693 T5X = FMA(KP831469612, T5G, T5F);
Chris@82 694 }
Chris@82 695 T5z = W[24];
Chris@82 696 T5D = T5z * T5C;
Chris@82 697 T5L = T5z * T5H;
Chris@82 698 T5T = W[56];
Chris@82 699 T5V = T5T * T5U;
Chris@82 700 T61 = T5T * T5X;
Chris@82 701 }
Chris@82 702 {
Chris@82 703 E T5y, T5K, T5I, T5M, T5E;
Chris@82 704 T5y = FNMS(T5u, T5x, T5t);
Chris@82 705 T5K = FMA(T5p, T5x, T5J);
Chris@82 706 T5E = W[25];
Chris@82 707 T5I = FMA(T5E, T5H, T5D);
Chris@82 708 T5M = FNMS(T5E, T5C, T5L);
Chris@82 709 Rp[WS(rs, 6)] = T5y - T5I;
Chris@82 710 Ip[WS(rs, 6)] = T5K + T5M;
Chris@82 711 Rm[WS(rs, 6)] = T5y + T5I;
Chris@82 712 Im[WS(rs, 6)] = T5M - T5K;
Chris@82 713 }
Chris@82 714 {
Chris@82 715 E T5S, T60, T5Y, T62, T5W;
Chris@82 716 T5S = FNMS(T5Q, T5R, T5P);
Chris@82 717 T60 = FMA(T5N, T5R, T5Z);
Chris@82 718 T5W = W[57];
Chris@82 719 T5Y = FMA(T5W, T5X, T5V);
Chris@82 720 T62 = FNMS(T5W, T5U, T61);
Chris@82 721 Rp[WS(rs, 14)] = T5S - T5Y;
Chris@82 722 Ip[WS(rs, 14)] = T60 + T62;
Chris@82 723 Rm[WS(rs, 14)] = T5S + T5Y;
Chris@82 724 Im[WS(rs, 14)] = T62 - T60;
Chris@82 725 }
Chris@82 726 }
Chris@82 727 {
Chris@82 728 E T6H, T7x, T63, T6v, T6w, T7p, T7t, T7v, T7w, T7F, T7a, T7A, T7n, T7D, T7b;
Chris@82 729 E T7r, T7B, T7H;
Chris@82 730 {
Chris@82 731 E T6D, T6G, T6J, T7z;
Chris@82 732 T6D = FMA(KP707106781, T6C, T6z);
Chris@82 733 T6G = T6E + T6F;
Chris@82 734 T6H = FMA(KP923879532, T6G, T6D);
Chris@82 735 T7x = FNMS(KP923879532, T6G, T6D);
Chris@82 736 {
Chris@82 737 E T6u, T7u, T6e, T6t;
Chris@82 738 T6e = FMA(KP707106781, T6d, T66);
Chris@82 739 T6t = T6l + T6s;
Chris@82 740 T6u = FMA(KP923879532, T6t, T6e);
Chris@82 741 T7u = FNMS(KP923879532, T6t, T6e);
Chris@82 742 T63 = W[2];
Chris@82 743 T6v = T63 * T6u;
Chris@82 744 T6w = W[3];
Chris@82 745 T7p = T6w * T6u;
Chris@82 746 T7t = W[34];
Chris@82 747 T7v = T7t * T7u;
Chris@82 748 T7w = W[35];
Chris@82 749 T7F = T7w * T7u;
Chris@82 750 }
Chris@82 751 {
Chris@82 752 E T6U, T79, T7j, T7m;
Chris@82 753 T6U = FMA(KP923879532, T6T, T6M);
Chris@82 754 T79 = T71 - T78;
Chris@82 755 T7a = FMA(KP831469612, T79, T6U);
Chris@82 756 T7A = FNMS(KP831469612, T79, T6U);
Chris@82 757 T7j = FNMS(KP923879532, T7i, T7f);
Chris@82 758 T7m = T7k + T7l;
Chris@82 759 T7n = FMA(KP831469612, T7m, T7j);
Chris@82 760 T7D = FNMS(KP831469612, T7m, T7j);
Chris@82 761 }
Chris@82 762 T6J = W[4];
Chris@82 763 T7b = T6J * T7a;
Chris@82 764 T7r = T6J * T7n;
Chris@82 765 T7z = W[36];
Chris@82 766 T7B = T7z * T7A;
Chris@82 767 T7H = T7z * T7D;
Chris@82 768 }
Chris@82 769 {
Chris@82 770 E T6I, T7q, T7o, T7s, T7c;
Chris@82 771 T6I = FNMS(T6w, T6H, T6v);
Chris@82 772 T7q = FMA(T63, T6H, T7p);
Chris@82 773 T7c = W[5];
Chris@82 774 T7o = FMA(T7c, T7n, T7b);
Chris@82 775 T7s = FNMS(T7c, T7a, T7r);
Chris@82 776 Rp[WS(rs, 1)] = T6I - T7o;
Chris@82 777 Ip[WS(rs, 1)] = T7q + T7s;
Chris@82 778 Rm[WS(rs, 1)] = T6I + T7o;
Chris@82 779 Im[WS(rs, 1)] = T7s - T7q;
Chris@82 780 }
Chris@82 781 {
Chris@82 782 E T7y, T7G, T7E, T7I, T7C;
Chris@82 783 T7y = FNMS(T7w, T7x, T7v);
Chris@82 784 T7G = FMA(T7t, T7x, T7F);
Chris@82 785 T7C = W[37];
Chris@82 786 T7E = FMA(T7C, T7D, T7B);
Chris@82 787 T7I = FNMS(T7C, T7A, T7H);
Chris@82 788 Rp[WS(rs, 9)] = T7y - T7E;
Chris@82 789 Ip[WS(rs, 9)] = T7G + T7I;
Chris@82 790 Rm[WS(rs, 9)] = T7y + T7E;
Chris@82 791 Im[WS(rs, 9)] = T7I - T7G;
Chris@82 792 }
Chris@82 793 }
Chris@82 794 {
Chris@82 795 E T8H, T9d, T8n, T8z, T8A, T95, T99, T9b, T9c, T9l, T8U, T9g, T93, T9j, T8V;
Chris@82 796 E T97, T9h, T9n;
Chris@82 797 {
Chris@82 798 E T8D, T8G, T8J, T9f;
Chris@82 799 T8D = FMA(KP707106781, T8C, T8B);
Chris@82 800 T8G = T8E - T8F;
Chris@82 801 T8H = FMA(KP923879532, T8G, T8D);
Chris@82 802 T9d = FNMS(KP923879532, T8G, T8D);
Chris@82 803 {
Chris@82 804 E T8y, T9a, T8q, T8x;
Chris@82 805 T8q = FMA(KP707106781, T8p, T8o);
Chris@82 806 T8x = T8t - T8w;
Chris@82 807 T8y = FMA(KP923879532, T8x, T8q);
Chris@82 808 T9a = FNMS(KP923879532, T8x, T8q);
Chris@82 809 T8n = W[10];
Chris@82 810 T8z = T8n * T8y;
Chris@82 811 T8A = W[11];
Chris@82 812 T95 = T8A * T8y;
Chris@82 813 T99 = W[42];
Chris@82 814 T9b = T99 * T9a;
Chris@82 815 T9c = W[43];
Chris@82 816 T9l = T9c * T9a;
Chris@82 817 }
Chris@82 818 {
Chris@82 819 E T8M, T8T, T8Z, T92;
Chris@82 820 T8M = FMA(KP923879532, T8L, T8K);
Chris@82 821 T8T = T8P - T8S;
Chris@82 822 T8U = FMA(KP980785280, T8T, T8M);
Chris@82 823 T9g = FNMS(KP980785280, T8T, T8M);
Chris@82 824 T8Z = FNMS(KP923879532, T8Y, T8X);
Chris@82 825 T92 = T90 + T91;
Chris@82 826 T93 = FNMS(KP980785280, T92, T8Z);
Chris@82 827 T9j = FMA(KP980785280, T92, T8Z);
Chris@82 828 }
Chris@82 829 T8J = W[12];
Chris@82 830 T8V = T8J * T8U;
Chris@82 831 T97 = T8J * T93;
Chris@82 832 T9f = W[44];
Chris@82 833 T9h = T9f * T9g;
Chris@82 834 T9n = T9f * T9j;
Chris@82 835 }
Chris@82 836 {
Chris@82 837 E T8I, T96, T94, T98, T8W;
Chris@82 838 T8I = FNMS(T8A, T8H, T8z);
Chris@82 839 T96 = FMA(T8n, T8H, T95);
Chris@82 840 T8W = W[13];
Chris@82 841 T94 = FMA(T8W, T93, T8V);
Chris@82 842 T98 = FNMS(T8W, T8U, T97);
Chris@82 843 Rp[WS(rs, 3)] = T8I - T94;
Chris@82 844 Ip[WS(rs, 3)] = T96 + T98;
Chris@82 845 Rm[WS(rs, 3)] = T8I + T94;
Chris@82 846 Im[WS(rs, 3)] = T98 - T96;
Chris@82 847 }
Chris@82 848 {
Chris@82 849 E T9e, T9m, T9k, T9o, T9i;
Chris@82 850 T9e = FNMS(T9c, T9d, T9b);
Chris@82 851 T9m = FMA(T99, T9d, T9l);
Chris@82 852 T9i = W[45];
Chris@82 853 T9k = FMA(T9i, T9j, T9h);
Chris@82 854 T9o = FNMS(T9i, T9g, T9n);
Chris@82 855 Rp[WS(rs, 11)] = T9e - T9k;
Chris@82 856 Ip[WS(rs, 11)] = T9m + T9o;
Chris@82 857 Rm[WS(rs, 11)] = T9e + T9k;
Chris@82 858 Im[WS(rs, 11)] = T9o - T9m;
Chris@82 859 }
Chris@82 860 }
Chris@82 861 {
Chris@82 862 E T9x, T9R, T9p, T9t, T9u, T9J, T9N, T9P, T9Q, T9Z, T9C, T9U, T9H, T9X, T9D;
Chris@82 863 E T9L, T9V, Ta1;
Chris@82 864 {
Chris@82 865 E T9v, T9w, T9z, T9T;
Chris@82 866 T9v = FNMS(KP707106781, T8C, T8B);
Chris@82 867 T9w = T8w + T8t;
Chris@82 868 T9x = FNMS(KP923879532, T9w, T9v);
Chris@82 869 T9R = FMA(KP923879532, T9w, T9v);
Chris@82 870 {
Chris@82 871 E T9s, T9O, T9q, T9r;
Chris@82 872 T9q = FNMS(KP707106781, T8p, T8o);
Chris@82 873 T9r = T8E + T8F;
Chris@82 874 T9s = FNMS(KP923879532, T9r, T9q);
Chris@82 875 T9O = FMA(KP923879532, T9r, T9q);
Chris@82 876 T9p = W[26];
Chris@82 877 T9t = T9p * T9s;
Chris@82 878 T9u = W[27];
Chris@82 879 T9J = T9u * T9s;
Chris@82 880 T9N = W[58];
Chris@82 881 T9P = T9N * T9O;
Chris@82 882 T9Q = W[59];
Chris@82 883 T9Z = T9Q * T9O;
Chris@82 884 }
Chris@82 885 {
Chris@82 886 E T9A, T9B, T9F, T9G;
Chris@82 887 T9A = FNMS(KP923879532, T8L, T8K);
Chris@82 888 T9B = T91 - T90;
Chris@82 889 T9C = FMA(KP980785280, T9B, T9A);
Chris@82 890 T9U = FNMS(KP980785280, T9B, T9A);
Chris@82 891 T9F = FMA(KP923879532, T8Y, T8X);
Chris@82 892 T9G = T8P + T8S;
Chris@82 893 T9H = FNMS(KP980785280, T9G, T9F);
Chris@82 894 T9X = FMA(KP980785280, T9G, T9F);
Chris@82 895 }
Chris@82 896 T9z = W[28];
Chris@82 897 T9D = T9z * T9C;
Chris@82 898 T9L = T9z * T9H;
Chris@82 899 T9T = W[60];
Chris@82 900 T9V = T9T * T9U;
Chris@82 901 Ta1 = T9T * T9X;
Chris@82 902 }
Chris@82 903 {
Chris@82 904 E T9y, T9K, T9I, T9M, T9E;
Chris@82 905 T9y = FNMS(T9u, T9x, T9t);
Chris@82 906 T9K = FMA(T9p, T9x, T9J);
Chris@82 907 T9E = W[29];
Chris@82 908 T9I = FMA(T9E, T9H, T9D);
Chris@82 909 T9M = FNMS(T9E, T9C, T9L);
Chris@82 910 Rp[WS(rs, 7)] = T9y - T9I;
Chris@82 911 Ip[WS(rs, 7)] = T9K + T9M;
Chris@82 912 Rm[WS(rs, 7)] = T9y + T9I;
Chris@82 913 Im[WS(rs, 7)] = T9M - T9K;
Chris@82 914 }
Chris@82 915 {
Chris@82 916 E T9S, Ta0, T9Y, Ta2, T9W;
Chris@82 917 T9S = FNMS(T9Q, T9R, T9P);
Chris@82 918 Ta0 = FMA(T9N, T9R, T9Z);
Chris@82 919 T9W = W[61];
Chris@82 920 T9Y = FMA(T9W, T9X, T9V);
Chris@82 921 Ta2 = FNMS(T9W, T9U, Ta1);
Chris@82 922 Rp[WS(rs, 15)] = T9S - T9Y;
Chris@82 923 Ip[WS(rs, 15)] = Ta0 + Ta2;
Chris@82 924 Rm[WS(rs, 15)] = T9S + T9Y;
Chris@82 925 Im[WS(rs, 15)] = Ta2 - Ta0;
Chris@82 926 }
Chris@82 927 }
Chris@82 928 {
Chris@82 929 E T7R, T8b, T7J, T7N, T7O, T83, T87, T89, T8a, T8j, T7W, T8e, T81, T8h, T7X;
Chris@82 930 E T85, T8f, T8l;
Chris@82 931 {
Chris@82 932 E T7P, T7Q, T7T, T8d;
Chris@82 933 T7P = FNMS(KP707106781, T6C, T6z);
Chris@82 934 T7Q = T6l - T6s;
Chris@82 935 T7R = FMA(KP923879532, T7Q, T7P);
Chris@82 936 T8b = FNMS(KP923879532, T7Q, T7P);
Chris@82 937 {
Chris@82 938 E T7M, T88, T7K, T7L;
Chris@82 939 T7K = FNMS(KP707106781, T6d, T66);
Chris@82 940 T7L = T6F - T6E;
Chris@82 941 T7M = FMA(KP923879532, T7L, T7K);
Chris@82 942 T88 = FNMS(KP923879532, T7L, T7K);
Chris@82 943 T7J = W[18];
Chris@82 944 T7N = T7J * T7M;
Chris@82 945 T7O = W[19];
Chris@82 946 T83 = T7O * T7M;
Chris@82 947 T87 = W[50];
Chris@82 948 T89 = T87 * T88;
Chris@82 949 T8a = W[51];
Chris@82 950 T8j = T8a * T88;
Chris@82 951 }
Chris@82 952 {
Chris@82 953 E T7U, T7V, T7Z, T80;
Chris@82 954 T7U = FNMS(KP923879532, T6T, T6M);
Chris@82 955 T7V = T7k - T7l;
Chris@82 956 T7W = FMA(KP831469612, T7V, T7U);
Chris@82 957 T8e = FNMS(KP831469612, T7V, T7U);
Chris@82 958 T7Z = FMA(KP923879532, T7i, T7f);
Chris@82 959 T80 = T71 + T78;
Chris@82 960 T81 = FNMS(KP831469612, T80, T7Z);
Chris@82 961 T8h = FMA(KP831469612, T80, T7Z);
Chris@82 962 }
Chris@82 963 T7T = W[20];
Chris@82 964 T7X = T7T * T7W;
Chris@82 965 T85 = T7T * T81;
Chris@82 966 T8d = W[52];
Chris@82 967 T8f = T8d * T8e;
Chris@82 968 T8l = T8d * T8h;
Chris@82 969 }
Chris@82 970 {
Chris@82 971 E T7S, T84, T82, T86, T7Y;
Chris@82 972 T7S = FNMS(T7O, T7R, T7N);
Chris@82 973 T84 = FMA(T7J, T7R, T83);
Chris@82 974 T7Y = W[21];
Chris@82 975 T82 = FMA(T7Y, T81, T7X);
Chris@82 976 T86 = FNMS(T7Y, T7W, T85);
Chris@82 977 Rp[WS(rs, 5)] = T7S - T82;
Chris@82 978 Ip[WS(rs, 5)] = T84 + T86;
Chris@82 979 Rm[WS(rs, 5)] = T7S + T82;
Chris@82 980 Im[WS(rs, 5)] = T86 - T84;
Chris@82 981 }
Chris@82 982 {
Chris@82 983 E T8c, T8k, T8i, T8m, T8g;
Chris@82 984 T8c = FNMS(T8a, T8b, T89);
Chris@82 985 T8k = FMA(T87, T8b, T8j);
Chris@82 986 T8g = W[53];
Chris@82 987 T8i = FMA(T8g, T8h, T8f);
Chris@82 988 T8m = FNMS(T8g, T8e, T8l);
Chris@82 989 Rp[WS(rs, 13)] = T8c - T8i;
Chris@82 990 Ip[WS(rs, 13)] = T8k + T8m;
Chris@82 991 Rm[WS(rs, 13)] = T8c + T8i;
Chris@82 992 Im[WS(rs, 13)] = T8m - T8k;
Chris@82 993 }
Chris@82 994 }
Chris@82 995 }
Chris@82 996 }
Chris@82 997 }
Chris@82 998
Chris@82 999 static const tw_instr twinstr[] = {
Chris@82 1000 {TW_FULL, 1, 32},
Chris@82 1001 {TW_NEXT, 1, 0}
Chris@82 1002 };
Chris@82 1003
Chris@82 1004 static const hc2c_desc desc = { 32, "hc2cbdft2_32", twinstr, &GENUS, {300, 62, 198, 0} };
Chris@82 1005
Chris@82 1006 void X(codelet_hc2cbdft2_32) (planner *p) {
Chris@82 1007 X(khc2c_register) (p, hc2cbdft2_32, &desc, HC2C_VIA_DFT);
Chris@82 1008 }
Chris@82 1009 #else
Chris@82 1010
Chris@82 1011 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft2_32 -include rdft/scalar/hc2cb.h */
Chris@82 1012
Chris@82 1013 /*
Chris@82 1014 * This function contains 498 FP additions, 208 FP multiplications,
Chris@82 1015 * (or, 404 additions, 114 multiplications, 94 fused multiply/add),
Chris@82 1016 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@82 1017 */
Chris@82 1018 #include "rdft/scalar/hc2cb.h"
Chris@82 1019
Chris@82 1020 static void hc2cbdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 1021 {
Chris@82 1022 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 1023 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 1024 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 1025 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 1026 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 1027 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 1028 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1029 {
Chris@82 1030 INT m;
Chris@82 1031 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 1032 E Tf, T4a, T6h, T7Z, T6P, T8e, T1j, T4v, T2R, T4L, T5C, T7E, T6a, T7U, T3n;
Chris@82 1033 E T4q, TZ, T38, T2p, T4B, T7M, T7R, T2y, T4C, T5Y, T63, T6C, T86, T4i, T4n;
Chris@82 1034 E T6z, T85, TK, T31, T1Y, T4y, T7J, T7Q, T27, T4z, T5R, T62, T6v, T83, T4f;
Chris@82 1035 E T4m, T6s, T82, Tu, T4p, T6o, T8f, T6M, T80, T1G, T4K, T2I, T4w, T5J, T7T;
Chris@82 1036 E T67, T7F, T3g, T4b;
Chris@82 1037 {
Chris@82 1038 E T3, T2M, T16, T3k, T6, T13, T2P, T3l, Td, T3i, T1h, T2K, Ta, T3h, T1c;
Chris@82 1039 E T2J;
Chris@82 1040 {
Chris@82 1041 E T1, T2, T2N, T2O;
Chris@82 1042 T1 = Rp[0];
Chris@82 1043 T2 = Rm[WS(rs, 15)];
Chris@82 1044 T3 = T1 + T2;
Chris@82 1045 T2M = T1 - T2;
Chris@82 1046 {
Chris@82 1047 E T14, T15, T4, T5;
Chris@82 1048 T14 = Ip[0];
Chris@82 1049 T15 = Im[WS(rs, 15)];
Chris@82 1050 T16 = T14 + T15;
Chris@82 1051 T3k = T14 - T15;
Chris@82 1052 T4 = Rp[WS(rs, 8)];
Chris@82 1053 T5 = Rm[WS(rs, 7)];
Chris@82 1054 T6 = T4 + T5;
Chris@82 1055 T13 = T4 - T5;
Chris@82 1056 }
Chris@82 1057 T2N = Ip[WS(rs, 8)];
Chris@82 1058 T2O = Im[WS(rs, 7)];
Chris@82 1059 T2P = T2N + T2O;
Chris@82 1060 T3l = T2N - T2O;
Chris@82 1061 {
Chris@82 1062 E Tb, Tc, T1d, T1e, T1f, T1g;
Chris@82 1063 Tb = Rm[WS(rs, 3)];
Chris@82 1064 Tc = Rp[WS(rs, 12)];
Chris@82 1065 T1d = Tb - Tc;
Chris@82 1066 T1e = Im[WS(rs, 3)];
Chris@82 1067 T1f = Ip[WS(rs, 12)];
Chris@82 1068 T1g = T1e + T1f;
Chris@82 1069 Td = Tb + Tc;
Chris@82 1070 T3i = T1f - T1e;
Chris@82 1071 T1h = T1d + T1g;
Chris@82 1072 T2K = T1d - T1g;
Chris@82 1073 }
Chris@82 1074 {
Chris@82 1075 E T8, T9, T18, T19, T1a, T1b;
Chris@82 1076 T8 = Rp[WS(rs, 4)];
Chris@82 1077 T9 = Rm[WS(rs, 11)];
Chris@82 1078 T18 = T8 - T9;
Chris@82 1079 T19 = Ip[WS(rs, 4)];
Chris@82 1080 T1a = Im[WS(rs, 11)];
Chris@82 1081 T1b = T19 + T1a;
Chris@82 1082 Ta = T8 + T9;
Chris@82 1083 T3h = T19 - T1a;
Chris@82 1084 T1c = T18 + T1b;
Chris@82 1085 T2J = T18 - T1b;
Chris@82 1086 }
Chris@82 1087 }
Chris@82 1088 {
Chris@82 1089 E T7, Te, T6f, T6g;
Chris@82 1090 T7 = T3 + T6;
Chris@82 1091 Te = Ta + Td;
Chris@82 1092 Tf = T7 + Te;
Chris@82 1093 T4a = T7 - Te;
Chris@82 1094 T6f = T16 - T13;
Chris@82 1095 T6g = KP707106781 * (T2J - T2K);
Chris@82 1096 T6h = T6f + T6g;
Chris@82 1097 T7Z = T6f - T6g;
Chris@82 1098 }
Chris@82 1099 {
Chris@82 1100 E T6N, T6O, T17, T1i;
Chris@82 1101 T6N = T2M + T2P;
Chris@82 1102 T6O = KP707106781 * (T1c + T1h);
Chris@82 1103 T6P = T6N - T6O;
Chris@82 1104 T8e = T6O + T6N;
Chris@82 1105 T17 = T13 + T16;
Chris@82 1106 T1i = KP707106781 * (T1c - T1h);
Chris@82 1107 T1j = T17 + T1i;
Chris@82 1108 T4v = T17 - T1i;
Chris@82 1109 }
Chris@82 1110 {
Chris@82 1111 E T2L, T2Q, T5A, T5B;
Chris@82 1112 T2L = KP707106781 * (T2J + T2K);
Chris@82 1113 T2Q = T2M - T2P;
Chris@82 1114 T2R = T2L + T2Q;
Chris@82 1115 T4L = T2Q - T2L;
Chris@82 1116 T5A = T3 - T6;
Chris@82 1117 T5B = T3i - T3h;
Chris@82 1118 T5C = T5A + T5B;
Chris@82 1119 T7E = T5A - T5B;
Chris@82 1120 }
Chris@82 1121 {
Chris@82 1122 E T68, T69, T3j, T3m;
Chris@82 1123 T68 = Ta - Td;
Chris@82 1124 T69 = T3k - T3l;
Chris@82 1125 T6a = T68 + T69;
Chris@82 1126 T7U = T69 - T68;
Chris@82 1127 T3j = T3h + T3i;
Chris@82 1128 T3m = T3k + T3l;
Chris@82 1129 T3n = T3j + T3m;
Chris@82 1130 T4q = T3m - T3j;
Chris@82 1131 }
Chris@82 1132 }
Chris@82 1133 {
Chris@82 1134 E TR, T5S, T29, T2t, T2c, T5W, T2w, T37, TY, T5T, T5V, T2i, T2n, T2r, T34;
Chris@82 1135 E T2q, T6A, T6B;
Chris@82 1136 {
Chris@82 1137 E TL, TM, TN, TO, TP, TQ;
Chris@82 1138 TL = Rm[0];
Chris@82 1139 TM = Rp[WS(rs, 15)];
Chris@82 1140 TN = TL + TM;
Chris@82 1141 TO = Rp[WS(rs, 7)];
Chris@82 1142 TP = Rm[WS(rs, 8)];
Chris@82 1143 TQ = TO + TP;
Chris@82 1144 TR = TN + TQ;
Chris@82 1145 T5S = TN - TQ;
Chris@82 1146 T29 = TO - TP;
Chris@82 1147 T2t = TL - TM;
Chris@82 1148 }
Chris@82 1149 {
Chris@82 1150 E T2a, T2b, T35, T2u, T2v, T36;
Chris@82 1151 T2a = Im[0];
Chris@82 1152 T2b = Ip[WS(rs, 15)];
Chris@82 1153 T35 = T2b - T2a;
Chris@82 1154 T2u = Ip[WS(rs, 7)];
Chris@82 1155 T2v = Im[WS(rs, 8)];
Chris@82 1156 T36 = T2u - T2v;
Chris@82 1157 T2c = T2a + T2b;
Chris@82 1158 T5W = T35 - T36;
Chris@82 1159 T2w = T2u + T2v;
Chris@82 1160 T37 = T35 + T36;
Chris@82 1161 }
Chris@82 1162 {
Chris@82 1163 E TU, T2e, T2h, T32, TX, T2j, T2m, T33;
Chris@82 1164 {
Chris@82 1165 E TS, TT, T2f, T2g;
Chris@82 1166 TS = Rp[WS(rs, 3)];
Chris@82 1167 TT = Rm[WS(rs, 12)];
Chris@82 1168 TU = TS + TT;
Chris@82 1169 T2e = TS - TT;
Chris@82 1170 T2f = Ip[WS(rs, 3)];
Chris@82 1171 T2g = Im[WS(rs, 12)];
Chris@82 1172 T2h = T2f + T2g;
Chris@82 1173 T32 = T2f - T2g;
Chris@82 1174 }
Chris@82 1175 {
Chris@82 1176 E TV, TW, T2k, T2l;
Chris@82 1177 TV = Rm[WS(rs, 4)];
Chris@82 1178 TW = Rp[WS(rs, 11)];
Chris@82 1179 TX = TV + TW;
Chris@82 1180 T2j = TV - TW;
Chris@82 1181 T2k = Im[WS(rs, 4)];
Chris@82 1182 T2l = Ip[WS(rs, 11)];
Chris@82 1183 T2m = T2k + T2l;
Chris@82 1184 T33 = T2l - T2k;
Chris@82 1185 }
Chris@82 1186 TY = TU + TX;
Chris@82 1187 T5T = T33 - T32;
Chris@82 1188 T5V = TU - TX;
Chris@82 1189 T2i = T2e + T2h;
Chris@82 1190 T2n = T2j + T2m;
Chris@82 1191 T2r = T2j - T2m;
Chris@82 1192 T34 = T32 + T33;
Chris@82 1193 T2q = T2e - T2h;
Chris@82 1194 }
Chris@82 1195 TZ = TR + TY;
Chris@82 1196 T38 = T34 + T37;
Chris@82 1197 {
Chris@82 1198 E T2d, T2o, T7K, T7L;
Chris@82 1199 T2d = T29 - T2c;
Chris@82 1200 T2o = KP707106781 * (T2i - T2n);
Chris@82 1201 T2p = T2d + T2o;
Chris@82 1202 T4B = T2d - T2o;
Chris@82 1203 T7K = T5S - T5T;
Chris@82 1204 T7L = T5W - T5V;
Chris@82 1205 T7M = FMA(KP382683432, T7K, KP923879532 * T7L);
Chris@82 1206 T7R = FNMS(KP923879532, T7K, KP382683432 * T7L);
Chris@82 1207 }
Chris@82 1208 {
Chris@82 1209 E T2s, T2x, T5U, T5X;
Chris@82 1210 T2s = KP707106781 * (T2q + T2r);
Chris@82 1211 T2x = T2t - T2w;
Chris@82 1212 T2y = T2s + T2x;
Chris@82 1213 T4C = T2x - T2s;
Chris@82 1214 T5U = T5S + T5T;
Chris@82 1215 T5X = T5V + T5W;
Chris@82 1216 T5Y = FMA(KP923879532, T5U, KP382683432 * T5X);
Chris@82 1217 T63 = FNMS(KP382683432, T5U, KP923879532 * T5X);
Chris@82 1218 }
Chris@82 1219 T6A = T2t + T2w;
Chris@82 1220 T6B = KP707106781 * (T2i + T2n);
Chris@82 1221 T6C = T6A - T6B;
Chris@82 1222 T86 = T6B + T6A;
Chris@82 1223 {
Chris@82 1224 E T4g, T4h, T6x, T6y;
Chris@82 1225 T4g = TR - TY;
Chris@82 1226 T4h = T37 - T34;
Chris@82 1227 T4i = T4g + T4h;
Chris@82 1228 T4n = T4h - T4g;
Chris@82 1229 T6x = KP707106781 * (T2q - T2r);
Chris@82 1230 T6y = T29 + T2c;
Chris@82 1231 T6z = T6x - T6y;
Chris@82 1232 T85 = T6y + T6x;
Chris@82 1233 }
Chris@82 1234 }
Chris@82 1235 {
Chris@82 1236 E TC, T5L, T1I, T22, T1L, T5P, T25, T30, TJ, T5M, T5O, T1R, T1W, T20, T2X;
Chris@82 1237 E T1Z, T6t, T6u;
Chris@82 1238 {
Chris@82 1239 E Tw, Tx, Ty, Tz, TA, TB;
Chris@82 1240 Tw = Rp[WS(rs, 1)];
Chris@82 1241 Tx = Rm[WS(rs, 14)];
Chris@82 1242 Ty = Tw + Tx;
Chris@82 1243 Tz = Rp[WS(rs, 9)];
Chris@82 1244 TA = Rm[WS(rs, 6)];
Chris@82 1245 TB = Tz + TA;
Chris@82 1246 TC = Ty + TB;
Chris@82 1247 T5L = Ty - TB;
Chris@82 1248 T1I = Tz - TA;
Chris@82 1249 T22 = Tw - Tx;
Chris@82 1250 }
Chris@82 1251 {
Chris@82 1252 E T1J, T1K, T2Y, T23, T24, T2Z;
Chris@82 1253 T1J = Ip[WS(rs, 1)];
Chris@82 1254 T1K = Im[WS(rs, 14)];
Chris@82 1255 T2Y = T1J - T1K;
Chris@82 1256 T23 = Ip[WS(rs, 9)];
Chris@82 1257 T24 = Im[WS(rs, 6)];
Chris@82 1258 T2Z = T23 - T24;
Chris@82 1259 T1L = T1J + T1K;
Chris@82 1260 T5P = T2Y - T2Z;
Chris@82 1261 T25 = T23 + T24;
Chris@82 1262 T30 = T2Y + T2Z;
Chris@82 1263 }
Chris@82 1264 {
Chris@82 1265 E TF, T1N, T1Q, T2V, TI, T1S, T1V, T2W;
Chris@82 1266 {
Chris@82 1267 E TD, TE, T1O, T1P;
Chris@82 1268 TD = Rp[WS(rs, 5)];
Chris@82 1269 TE = Rm[WS(rs, 10)];
Chris@82 1270 TF = TD + TE;
Chris@82 1271 T1N = TD - TE;
Chris@82 1272 T1O = Ip[WS(rs, 5)];
Chris@82 1273 T1P = Im[WS(rs, 10)];
Chris@82 1274 T1Q = T1O + T1P;
Chris@82 1275 T2V = T1O - T1P;
Chris@82 1276 }
Chris@82 1277 {
Chris@82 1278 E TG, TH, T1T, T1U;
Chris@82 1279 TG = Rm[WS(rs, 2)];
Chris@82 1280 TH = Rp[WS(rs, 13)];
Chris@82 1281 TI = TG + TH;
Chris@82 1282 T1S = TG - TH;
Chris@82 1283 T1T = Im[WS(rs, 2)];
Chris@82 1284 T1U = Ip[WS(rs, 13)];
Chris@82 1285 T1V = T1T + T1U;
Chris@82 1286 T2W = T1U - T1T;
Chris@82 1287 }
Chris@82 1288 TJ = TF + TI;
Chris@82 1289 T5M = T2W - T2V;
Chris@82 1290 T5O = TF - TI;
Chris@82 1291 T1R = T1N + T1Q;
Chris@82 1292 T1W = T1S + T1V;
Chris@82 1293 T20 = T1S - T1V;
Chris@82 1294 T2X = T2V + T2W;
Chris@82 1295 T1Z = T1N - T1Q;
Chris@82 1296 }
Chris@82 1297 TK = TC + TJ;
Chris@82 1298 T31 = T2X + T30;
Chris@82 1299 {
Chris@82 1300 E T1M, T1X, T7H, T7I;
Chris@82 1301 T1M = T1I + T1L;
Chris@82 1302 T1X = KP707106781 * (T1R - T1W);
Chris@82 1303 T1Y = T1M + T1X;
Chris@82 1304 T4y = T1M - T1X;
Chris@82 1305 T7H = T5L - T5M;
Chris@82 1306 T7I = T5P - T5O;
Chris@82 1307 T7J = FNMS(KP923879532, T7I, KP382683432 * T7H);
Chris@82 1308 T7Q = FMA(KP923879532, T7H, KP382683432 * T7I);
Chris@82 1309 }
Chris@82 1310 {
Chris@82 1311 E T21, T26, T5N, T5Q;
Chris@82 1312 T21 = KP707106781 * (T1Z + T20);
Chris@82 1313 T26 = T22 - T25;
Chris@82 1314 T27 = T21 + T26;
Chris@82 1315 T4z = T26 - T21;
Chris@82 1316 T5N = T5L + T5M;
Chris@82 1317 T5Q = T5O + T5P;
Chris@82 1318 T5R = FNMS(KP382683432, T5Q, KP923879532 * T5N);
Chris@82 1319 T62 = FMA(KP382683432, T5N, KP923879532 * T5Q);
Chris@82 1320 }
Chris@82 1321 T6t = T22 + T25;
Chris@82 1322 T6u = KP707106781 * (T1R + T1W);
Chris@82 1323 T6v = T6t - T6u;
Chris@82 1324 T83 = T6u + T6t;
Chris@82 1325 {
Chris@82 1326 E T4d, T4e, T6q, T6r;
Chris@82 1327 T4d = TC - TJ;
Chris@82 1328 T4e = T30 - T2X;
Chris@82 1329 T4f = T4d - T4e;
Chris@82 1330 T4m = T4d + T4e;
Chris@82 1331 T6q = T1L - T1I;
Chris@82 1332 T6r = KP707106781 * (T1Z - T20);
Chris@82 1333 T6s = T6q + T6r;
Chris@82 1334 T82 = T6q - T6r;
Chris@82 1335 }
Chris@82 1336 }
Chris@82 1337 {
Chris@82 1338 E Ti, T3a, Tl, T3b, T1o, T1t, T6j, T6i, T5E, T5D, Tp, T3d, Ts, T3e, T1z;
Chris@82 1339 E T1E, T6m, T6l, T5H, T5G;
Chris@82 1340 {
Chris@82 1341 E T1p, T1n, T1k, T1s;
Chris@82 1342 {
Chris@82 1343 E Tg, Th, T1l, T1m;
Chris@82 1344 Tg = Rp[WS(rs, 2)];
Chris@82 1345 Th = Rm[WS(rs, 13)];
Chris@82 1346 Ti = Tg + Th;
Chris@82 1347 T1p = Tg - Th;
Chris@82 1348 T1l = Ip[WS(rs, 2)];
Chris@82 1349 T1m = Im[WS(rs, 13)];
Chris@82 1350 T1n = T1l + T1m;
Chris@82 1351 T3a = T1l - T1m;
Chris@82 1352 }
Chris@82 1353 {
Chris@82 1354 E Tj, Tk, T1q, T1r;
Chris@82 1355 Tj = Rp[WS(rs, 10)];
Chris@82 1356 Tk = Rm[WS(rs, 5)];
Chris@82 1357 Tl = Tj + Tk;
Chris@82 1358 T1k = Tj - Tk;
Chris@82 1359 T1q = Ip[WS(rs, 10)];
Chris@82 1360 T1r = Im[WS(rs, 5)];
Chris@82 1361 T1s = T1q + T1r;
Chris@82 1362 T3b = T1q - T1r;
Chris@82 1363 }
Chris@82 1364 T1o = T1k + T1n;
Chris@82 1365 T1t = T1p - T1s;
Chris@82 1366 T6j = T1p + T1s;
Chris@82 1367 T6i = T1n - T1k;
Chris@82 1368 T5E = T3a - T3b;
Chris@82 1369 T5D = Ti - Tl;
Chris@82 1370 }
Chris@82 1371 {
Chris@82 1372 E T1A, T1y, T1v, T1D;
Chris@82 1373 {
Chris@82 1374 E Tn, To, T1w, T1x;
Chris@82 1375 Tn = Rm[WS(rs, 1)];
Chris@82 1376 To = Rp[WS(rs, 14)];
Chris@82 1377 Tp = Tn + To;
Chris@82 1378 T1A = Tn - To;
Chris@82 1379 T1w = Im[WS(rs, 1)];
Chris@82 1380 T1x = Ip[WS(rs, 14)];
Chris@82 1381 T1y = T1w + T1x;
Chris@82 1382 T3d = T1x - T1w;
Chris@82 1383 }
Chris@82 1384 {
Chris@82 1385 E Tq, Tr, T1B, T1C;
Chris@82 1386 Tq = Rp[WS(rs, 6)];
Chris@82 1387 Tr = Rm[WS(rs, 9)];
Chris@82 1388 Ts = Tq + Tr;
Chris@82 1389 T1v = Tq - Tr;
Chris@82 1390 T1B = Ip[WS(rs, 6)];
Chris@82 1391 T1C = Im[WS(rs, 9)];
Chris@82 1392 T1D = T1B + T1C;
Chris@82 1393 T3e = T1B - T1C;
Chris@82 1394 }
Chris@82 1395 T1z = T1v - T1y;
Chris@82 1396 T1E = T1A - T1D;
Chris@82 1397 T6m = T1A + T1D;
Chris@82 1398 T6l = T1v + T1y;
Chris@82 1399 T5H = T3d - T3e;
Chris@82 1400 T5G = Tp - Ts;
Chris@82 1401 }
Chris@82 1402 {
Chris@82 1403 E Tm, Tt, T6k, T6n;
Chris@82 1404 Tm = Ti + Tl;
Chris@82 1405 Tt = Tp + Ts;
Chris@82 1406 Tu = Tm + Tt;
Chris@82 1407 T4p = Tm - Tt;
Chris@82 1408 T6k = FMA(KP382683432, T6i, KP923879532 * T6j);
Chris@82 1409 T6n = FMA(KP382683432, T6l, KP923879532 * T6m);
Chris@82 1410 T6o = T6k - T6n;
Chris@82 1411 T8f = T6k + T6n;
Chris@82 1412 }
Chris@82 1413 {
Chris@82 1414 E T6K, T6L, T1u, T1F;
Chris@82 1415 T6K = FNMS(KP923879532, T6i, KP382683432 * T6j);
Chris@82 1416 T6L = FNMS(KP923879532, T6l, KP382683432 * T6m);
Chris@82 1417 T6M = T6K + T6L;
Chris@82 1418 T80 = T6K - T6L;
Chris@82 1419 T1u = FMA(KP923879532, T1o, KP382683432 * T1t);
Chris@82 1420 T1F = FNMS(KP382683432, T1E, KP923879532 * T1z);
Chris@82 1421 T1G = T1u + T1F;
Chris@82 1422 T4K = T1F - T1u;
Chris@82 1423 }
Chris@82 1424 {
Chris@82 1425 E T2G, T2H, T5F, T5I;
Chris@82 1426 T2G = FNMS(KP382683432, T1o, KP923879532 * T1t);
Chris@82 1427 T2H = FMA(KP382683432, T1z, KP923879532 * T1E);
Chris@82 1428 T2I = T2G + T2H;
Chris@82 1429 T4w = T2G - T2H;
Chris@82 1430 T5F = T5D - T5E;
Chris@82 1431 T5I = T5G + T5H;
Chris@82 1432 T5J = KP707106781 * (T5F + T5I);
Chris@82 1433 T7T = KP707106781 * (T5F - T5I);
Chris@82 1434 }
Chris@82 1435 {
Chris@82 1436 E T65, T66, T3c, T3f;
Chris@82 1437 T65 = T5D + T5E;
Chris@82 1438 T66 = T5H - T5G;
Chris@82 1439 T67 = KP707106781 * (T65 + T66);
Chris@82 1440 T7F = KP707106781 * (T66 - T65);
Chris@82 1441 T3c = T3a + T3b;
Chris@82 1442 T3f = T3d + T3e;
Chris@82 1443 T3g = T3c + T3f;
Chris@82 1444 T4b = T3f - T3c;
Chris@82 1445 }
Chris@82 1446 }
Chris@82 1447 {
Chris@82 1448 E T11, T3s, T3p, T3u, T3K, T40, T3G, T3Y, T2T, T43, T3z, T3P, T2B, T45, T3x;
Chris@82 1449 E T3T;
Chris@82 1450 {
Chris@82 1451 E Tv, T10, T3E, T3F;
Chris@82 1452 Tv = Tf + Tu;
Chris@82 1453 T10 = TK + TZ;
Chris@82 1454 T11 = Tv + T10;
Chris@82 1455 T3s = Tv - T10;
Chris@82 1456 {
Chris@82 1457 E T39, T3o, T3I, T3J;
Chris@82 1458 T39 = T31 + T38;
Chris@82 1459 T3o = T3g + T3n;
Chris@82 1460 T3p = T39 + T3o;
Chris@82 1461 T3u = T3o - T39;
Chris@82 1462 T3I = TK - TZ;
Chris@82 1463 T3J = T3n - T3g;
Chris@82 1464 T3K = T3I + T3J;
Chris@82 1465 T40 = T3J - T3I;
Chris@82 1466 }
Chris@82 1467 T3E = Tf - Tu;
Chris@82 1468 T3F = T38 - T31;
Chris@82 1469 T3G = T3E + T3F;
Chris@82 1470 T3Y = T3E - T3F;
Chris@82 1471 {
Chris@82 1472 E T2S, T3N, T2F, T3O, T2D, T2E;
Chris@82 1473 T2S = T2I + T2R;
Chris@82 1474 T3N = T1j - T1G;
Chris@82 1475 T2D = FNMS(KP195090322, T1Y, KP980785280 * T27);
Chris@82 1476 T2E = FMA(KP195090322, T2p, KP980785280 * T2y);
Chris@82 1477 T2F = T2D + T2E;
Chris@82 1478 T3O = T2D - T2E;
Chris@82 1479 T2T = T2F + T2S;
Chris@82 1480 T43 = T3N - T3O;
Chris@82 1481 T3z = T2S - T2F;
Chris@82 1482 T3P = T3N + T3O;
Chris@82 1483 }
Chris@82 1484 {
Chris@82 1485 E T1H, T3S, T2A, T3R, T28, T2z;
Chris@82 1486 T1H = T1j + T1G;
Chris@82 1487 T3S = T2R - T2I;
Chris@82 1488 T28 = FMA(KP980785280, T1Y, KP195090322 * T27);
Chris@82 1489 T2z = FNMS(KP195090322, T2y, KP980785280 * T2p);
Chris@82 1490 T2A = T28 + T2z;
Chris@82 1491 T3R = T2z - T28;
Chris@82 1492 T2B = T1H + T2A;
Chris@82 1493 T45 = T3S - T3R;
Chris@82 1494 T3x = T1H - T2A;
Chris@82 1495 T3T = T3R + T3S;
Chris@82 1496 }
Chris@82 1497 }
Chris@82 1498 {
Chris@82 1499 E T2U, T3q, T12, T2C;
Chris@82 1500 T12 = W[0];
Chris@82 1501 T2C = W[1];
Chris@82 1502 T2U = FMA(T12, T2B, T2C * T2T);
Chris@82 1503 T3q = FNMS(T2C, T2B, T12 * T2T);
Chris@82 1504 Rp[0] = T11 - T2U;
Chris@82 1505 Ip[0] = T3p + T3q;
Chris@82 1506 Rm[0] = T11 + T2U;
Chris@82 1507 Im[0] = T3q - T3p;
Chris@82 1508 }
Chris@82 1509 {
Chris@82 1510 E T41, T47, T46, T48;
Chris@82 1511 {
Chris@82 1512 E T3X, T3Z, T42, T44;
Chris@82 1513 T3X = W[46];
Chris@82 1514 T3Z = W[47];
Chris@82 1515 T41 = FNMS(T3Z, T40, T3X * T3Y);
Chris@82 1516 T47 = FMA(T3Z, T3Y, T3X * T40);
Chris@82 1517 T42 = W[48];
Chris@82 1518 T44 = W[49];
Chris@82 1519 T46 = FMA(T42, T43, T44 * T45);
Chris@82 1520 T48 = FNMS(T44, T43, T42 * T45);
Chris@82 1521 }
Chris@82 1522 Rp[WS(rs, 12)] = T41 - T46;
Chris@82 1523 Ip[WS(rs, 12)] = T47 + T48;
Chris@82 1524 Rm[WS(rs, 12)] = T41 + T46;
Chris@82 1525 Im[WS(rs, 12)] = T48 - T47;
Chris@82 1526 }
Chris@82 1527 {
Chris@82 1528 E T3v, T3B, T3A, T3C;
Chris@82 1529 {
Chris@82 1530 E T3r, T3t, T3w, T3y;
Chris@82 1531 T3r = W[30];
Chris@82 1532 T3t = W[31];
Chris@82 1533 T3v = FNMS(T3t, T3u, T3r * T3s);
Chris@82 1534 T3B = FMA(T3t, T3s, T3r * T3u);
Chris@82 1535 T3w = W[32];
Chris@82 1536 T3y = W[33];
Chris@82 1537 T3A = FMA(T3w, T3x, T3y * T3z);
Chris@82 1538 T3C = FNMS(T3y, T3x, T3w * T3z);
Chris@82 1539 }
Chris@82 1540 Rp[WS(rs, 8)] = T3v - T3A;
Chris@82 1541 Ip[WS(rs, 8)] = T3B + T3C;
Chris@82 1542 Rm[WS(rs, 8)] = T3v + T3A;
Chris@82 1543 Im[WS(rs, 8)] = T3C - T3B;
Chris@82 1544 }
Chris@82 1545 {
Chris@82 1546 E T3L, T3V, T3U, T3W;
Chris@82 1547 {
Chris@82 1548 E T3D, T3H, T3M, T3Q;
Chris@82 1549 T3D = W[14];
Chris@82 1550 T3H = W[15];
Chris@82 1551 T3L = FNMS(T3H, T3K, T3D * T3G);
Chris@82 1552 T3V = FMA(T3H, T3G, T3D * T3K);
Chris@82 1553 T3M = W[16];
Chris@82 1554 T3Q = W[17];
Chris@82 1555 T3U = FMA(T3M, T3P, T3Q * T3T);
Chris@82 1556 T3W = FNMS(T3Q, T3P, T3M * T3T);
Chris@82 1557 }
Chris@82 1558 Rp[WS(rs, 4)] = T3L - T3U;
Chris@82 1559 Ip[WS(rs, 4)] = T3V + T3W;
Chris@82 1560 Rm[WS(rs, 4)] = T3L + T3U;
Chris@82 1561 Im[WS(rs, 4)] = T3W - T3V;
Chris@82 1562 }
Chris@82 1563 }
Chris@82 1564 {
Chris@82 1565 E T7O, T8m, T7W, T8o, T8E, T8U, T8A, T8S, T8h, T8X, T8t, T8J, T89, T8Z, T8r;
Chris@82 1566 E T8N;
Chris@82 1567 {
Chris@82 1568 E T7G, T7N, T8y, T8z;
Chris@82 1569 T7G = T7E + T7F;
Chris@82 1570 T7N = T7J + T7M;
Chris@82 1571 T7O = T7G + T7N;
Chris@82 1572 T8m = T7G - T7N;
Chris@82 1573 {
Chris@82 1574 E T7S, T7V, T8C, T8D;
Chris@82 1575 T7S = T7Q + T7R;
Chris@82 1576 T7V = T7T + T7U;
Chris@82 1577 T7W = T7S + T7V;
Chris@82 1578 T8o = T7V - T7S;
Chris@82 1579 T8C = T7J - T7M;
Chris@82 1580 T8D = T7U - T7T;
Chris@82 1581 T8E = T8C + T8D;
Chris@82 1582 T8U = T8D - T8C;
Chris@82 1583 }
Chris@82 1584 T8y = T7E - T7F;
Chris@82 1585 T8z = T7R - T7Q;
Chris@82 1586 T8A = T8y + T8z;
Chris@82 1587 T8S = T8y - T8z;
Chris@82 1588 {
Chris@82 1589 E T8g, T8H, T8d, T8I, T8b, T8c;
Chris@82 1590 T8g = T8e - T8f;
Chris@82 1591 T8H = T7Z - T80;
Chris@82 1592 T8b = FNMS(KP980785280, T82, KP195090322 * T83);
Chris@82 1593 T8c = FNMS(KP980785280, T85, KP195090322 * T86);
Chris@82 1594 T8d = T8b + T8c;
Chris@82 1595 T8I = T8b - T8c;
Chris@82 1596 T8h = T8d + T8g;
Chris@82 1597 T8X = T8H - T8I;
Chris@82 1598 T8t = T8g - T8d;
Chris@82 1599 T8J = T8H + T8I;
Chris@82 1600 }
Chris@82 1601 {
Chris@82 1602 E T81, T8L, T88, T8M, T84, T87;
Chris@82 1603 T81 = T7Z + T80;
Chris@82 1604 T8L = T8f + T8e;
Chris@82 1605 T84 = FMA(KP195090322, T82, KP980785280 * T83);
Chris@82 1606 T87 = FMA(KP195090322, T85, KP980785280 * T86);
Chris@82 1607 T88 = T84 - T87;
Chris@82 1608 T8M = T84 + T87;
Chris@82 1609 T89 = T81 + T88;
Chris@82 1610 T8Z = T8M + T8L;
Chris@82 1611 T8r = T81 - T88;
Chris@82 1612 T8N = T8L - T8M;
Chris@82 1613 }
Chris@82 1614 }
Chris@82 1615 {
Chris@82 1616 E T7X, T8j, T8i, T8k;
Chris@82 1617 {
Chris@82 1618 E T7D, T7P, T7Y, T8a;
Chris@82 1619 T7D = W[10];
Chris@82 1620 T7P = W[11];
Chris@82 1621 T7X = FNMS(T7P, T7W, T7D * T7O);
Chris@82 1622 T8j = FMA(T7P, T7O, T7D * T7W);
Chris@82 1623 T7Y = W[12];
Chris@82 1624 T8a = W[13];
Chris@82 1625 T8i = FMA(T7Y, T89, T8a * T8h);
Chris@82 1626 T8k = FNMS(T8a, T89, T7Y * T8h);
Chris@82 1627 }
Chris@82 1628 Rp[WS(rs, 3)] = T7X - T8i;
Chris@82 1629 Ip[WS(rs, 3)] = T8j + T8k;
Chris@82 1630 Rm[WS(rs, 3)] = T7X + T8i;
Chris@82 1631 Im[WS(rs, 3)] = T8k - T8j;
Chris@82 1632 }
Chris@82 1633 {
Chris@82 1634 E T8V, T91, T90, T92;
Chris@82 1635 {
Chris@82 1636 E T8R, T8T, T8W, T8Y;
Chris@82 1637 T8R = W[58];
Chris@82 1638 T8T = W[59];
Chris@82 1639 T8V = FNMS(T8T, T8U, T8R * T8S);
Chris@82 1640 T91 = FMA(T8T, T8S, T8R * T8U);
Chris@82 1641 T8W = W[60];
Chris@82 1642 T8Y = W[61];
Chris@82 1643 T90 = FMA(T8W, T8X, T8Y * T8Z);
Chris@82 1644 T92 = FNMS(T8Y, T8X, T8W * T8Z);
Chris@82 1645 }
Chris@82 1646 Rp[WS(rs, 15)] = T8V - T90;
Chris@82 1647 Ip[WS(rs, 15)] = T91 + T92;
Chris@82 1648 Rm[WS(rs, 15)] = T8V + T90;
Chris@82 1649 Im[WS(rs, 15)] = T92 - T91;
Chris@82 1650 }
Chris@82 1651 {
Chris@82 1652 E T8p, T8v, T8u, T8w;
Chris@82 1653 {
Chris@82 1654 E T8l, T8n, T8q, T8s;
Chris@82 1655 T8l = W[42];
Chris@82 1656 T8n = W[43];
Chris@82 1657 T8p = FNMS(T8n, T8o, T8l * T8m);
Chris@82 1658 T8v = FMA(T8n, T8m, T8l * T8o);
Chris@82 1659 T8q = W[44];
Chris@82 1660 T8s = W[45];
Chris@82 1661 T8u = FMA(T8q, T8r, T8s * T8t);
Chris@82 1662 T8w = FNMS(T8s, T8r, T8q * T8t);
Chris@82 1663 }
Chris@82 1664 Rp[WS(rs, 11)] = T8p - T8u;
Chris@82 1665 Ip[WS(rs, 11)] = T8v + T8w;
Chris@82 1666 Rm[WS(rs, 11)] = T8p + T8u;
Chris@82 1667 Im[WS(rs, 11)] = T8w - T8v;
Chris@82 1668 }
Chris@82 1669 {
Chris@82 1670 E T8F, T8P, T8O, T8Q;
Chris@82 1671 {
Chris@82 1672 E T8x, T8B, T8G, T8K;
Chris@82 1673 T8x = W[26];
Chris@82 1674 T8B = W[27];
Chris@82 1675 T8F = FNMS(T8B, T8E, T8x * T8A);
Chris@82 1676 T8P = FMA(T8B, T8A, T8x * T8E);
Chris@82 1677 T8G = W[28];
Chris@82 1678 T8K = W[29];
Chris@82 1679 T8O = FMA(T8G, T8J, T8K * T8N);
Chris@82 1680 T8Q = FNMS(T8K, T8J, T8G * T8N);
Chris@82 1681 }
Chris@82 1682 Rp[WS(rs, 7)] = T8F - T8O;
Chris@82 1683 Ip[WS(rs, 7)] = T8P + T8Q;
Chris@82 1684 Rm[WS(rs, 7)] = T8F + T8O;
Chris@82 1685 Im[WS(rs, 7)] = T8Q - T8P;
Chris@82 1686 }
Chris@82 1687 }
Chris@82 1688 {
Chris@82 1689 E T4k, T4S, T4s, T4U, T5a, T5q, T56, T5o, T4N, T5t, T4Z, T5f, T4F, T5v, T4X;
Chris@82 1690 E T5j;
Chris@82 1691 {
Chris@82 1692 E T4c, T4j, T54, T55;
Chris@82 1693 T4c = T4a + T4b;
Chris@82 1694 T4j = KP707106781 * (T4f + T4i);
Chris@82 1695 T4k = T4c + T4j;
Chris@82 1696 T4S = T4c - T4j;
Chris@82 1697 {
Chris@82 1698 E T4o, T4r, T58, T59;
Chris@82 1699 T4o = KP707106781 * (T4m + T4n);
Chris@82 1700 T4r = T4p + T4q;
Chris@82 1701 T4s = T4o + T4r;
Chris@82 1702 T4U = T4r - T4o;
Chris@82 1703 T58 = KP707106781 * (T4f - T4i);
Chris@82 1704 T59 = T4q - T4p;
Chris@82 1705 T5a = T58 + T59;
Chris@82 1706 T5q = T59 - T58;
Chris@82 1707 }
Chris@82 1708 T54 = T4a - T4b;
Chris@82 1709 T55 = KP707106781 * (T4n - T4m);
Chris@82 1710 T56 = T54 + T55;
Chris@82 1711 T5o = T54 - T55;
Chris@82 1712 {
Chris@82 1713 E T4M, T5d, T4J, T5e, T4H, T4I;
Chris@82 1714 T4M = T4K + T4L;
Chris@82 1715 T5d = T4v - T4w;
Chris@82 1716 T4H = FNMS(KP831469612, T4y, KP555570233 * T4z);
Chris@82 1717 T4I = FMA(KP831469612, T4B, KP555570233 * T4C);
Chris@82 1718 T4J = T4H + T4I;
Chris@82 1719 T5e = T4H - T4I;
Chris@82 1720 T4N = T4J + T4M;
Chris@82 1721 T5t = T5d - T5e;
Chris@82 1722 T4Z = T4M - T4J;
Chris@82 1723 T5f = T5d + T5e;
Chris@82 1724 }
Chris@82 1725 {
Chris@82 1726 E T4x, T5i, T4E, T5h, T4A, T4D;
Chris@82 1727 T4x = T4v + T4w;
Chris@82 1728 T5i = T4L - T4K;
Chris@82 1729 T4A = FMA(KP555570233, T4y, KP831469612 * T4z);
Chris@82 1730 T4D = FNMS(KP831469612, T4C, KP555570233 * T4B);
Chris@82 1731 T4E = T4A + T4D;
Chris@82 1732 T5h = T4D - T4A;
Chris@82 1733 T4F = T4x + T4E;
Chris@82 1734 T5v = T5i - T5h;
Chris@82 1735 T4X = T4x - T4E;
Chris@82 1736 T5j = T5h + T5i;
Chris@82 1737 }
Chris@82 1738 }
Chris@82 1739 {
Chris@82 1740 E T4t, T4P, T4O, T4Q;
Chris@82 1741 {
Chris@82 1742 E T49, T4l, T4u, T4G;
Chris@82 1743 T49 = W[6];
Chris@82 1744 T4l = W[7];
Chris@82 1745 T4t = FNMS(T4l, T4s, T49 * T4k);
Chris@82 1746 T4P = FMA(T4l, T4k, T49 * T4s);
Chris@82 1747 T4u = W[8];
Chris@82 1748 T4G = W[9];
Chris@82 1749 T4O = FMA(T4u, T4F, T4G * T4N);
Chris@82 1750 T4Q = FNMS(T4G, T4F, T4u * T4N);
Chris@82 1751 }
Chris@82 1752 Rp[WS(rs, 2)] = T4t - T4O;
Chris@82 1753 Ip[WS(rs, 2)] = T4P + T4Q;
Chris@82 1754 Rm[WS(rs, 2)] = T4t + T4O;
Chris@82 1755 Im[WS(rs, 2)] = T4Q - T4P;
Chris@82 1756 }
Chris@82 1757 {
Chris@82 1758 E T5r, T5x, T5w, T5y;
Chris@82 1759 {
Chris@82 1760 E T5n, T5p, T5s, T5u;
Chris@82 1761 T5n = W[54];
Chris@82 1762 T5p = W[55];
Chris@82 1763 T5r = FNMS(T5p, T5q, T5n * T5o);
Chris@82 1764 T5x = FMA(T5p, T5o, T5n * T5q);
Chris@82 1765 T5s = W[56];
Chris@82 1766 T5u = W[57];
Chris@82 1767 T5w = FMA(T5s, T5t, T5u * T5v);
Chris@82 1768 T5y = FNMS(T5u, T5t, T5s * T5v);
Chris@82 1769 }
Chris@82 1770 Rp[WS(rs, 14)] = T5r - T5w;
Chris@82 1771 Ip[WS(rs, 14)] = T5x + T5y;
Chris@82 1772 Rm[WS(rs, 14)] = T5r + T5w;
Chris@82 1773 Im[WS(rs, 14)] = T5y - T5x;
Chris@82 1774 }
Chris@82 1775 {
Chris@82 1776 E T4V, T51, T50, T52;
Chris@82 1777 {
Chris@82 1778 E T4R, T4T, T4W, T4Y;
Chris@82 1779 T4R = W[38];
Chris@82 1780 T4T = W[39];
Chris@82 1781 T4V = FNMS(T4T, T4U, T4R * T4S);
Chris@82 1782 T51 = FMA(T4T, T4S, T4R * T4U);
Chris@82 1783 T4W = W[40];
Chris@82 1784 T4Y = W[41];
Chris@82 1785 T50 = FMA(T4W, T4X, T4Y * T4Z);
Chris@82 1786 T52 = FNMS(T4Y, T4X, T4W * T4Z);
Chris@82 1787 }
Chris@82 1788 Rp[WS(rs, 10)] = T4V - T50;
Chris@82 1789 Ip[WS(rs, 10)] = T51 + T52;
Chris@82 1790 Rm[WS(rs, 10)] = T4V + T50;
Chris@82 1791 Im[WS(rs, 10)] = T52 - T51;
Chris@82 1792 }
Chris@82 1793 {
Chris@82 1794 E T5b, T5l, T5k, T5m;
Chris@82 1795 {
Chris@82 1796 E T53, T57, T5c, T5g;
Chris@82 1797 T53 = W[22];
Chris@82 1798 T57 = W[23];
Chris@82 1799 T5b = FNMS(T57, T5a, T53 * T56);
Chris@82 1800 T5l = FMA(T57, T56, T53 * T5a);
Chris@82 1801 T5c = W[24];
Chris@82 1802 T5g = W[25];
Chris@82 1803 T5k = FMA(T5c, T5f, T5g * T5j);
Chris@82 1804 T5m = FNMS(T5g, T5f, T5c * T5j);
Chris@82 1805 }
Chris@82 1806 Rp[WS(rs, 6)] = T5b - T5k;
Chris@82 1807 Ip[WS(rs, 6)] = T5l + T5m;
Chris@82 1808 Rm[WS(rs, 6)] = T5b + T5k;
Chris@82 1809 Im[WS(rs, 6)] = T5m - T5l;
Chris@82 1810 }
Chris@82 1811 }
Chris@82 1812 {
Chris@82 1813 E T60, T6W, T6c, T6Y, T7e, T7u, T7a, T7s, T6R, T7x, T73, T7j, T6F, T7z, T71;
Chris@82 1814 E T7n;
Chris@82 1815 {
Chris@82 1816 E T5K, T5Z, T78, T79;
Chris@82 1817 T5K = T5C + T5J;
Chris@82 1818 T5Z = T5R + T5Y;
Chris@82 1819 T60 = T5K + T5Z;
Chris@82 1820 T6W = T5K - T5Z;
Chris@82 1821 {
Chris@82 1822 E T64, T6b, T7c, T7d;
Chris@82 1823 T64 = T62 + T63;
Chris@82 1824 T6b = T67 + T6a;
Chris@82 1825 T6c = T64 + T6b;
Chris@82 1826 T6Y = T6b - T64;
Chris@82 1827 T7c = T5R - T5Y;
Chris@82 1828 T7d = T6a - T67;
Chris@82 1829 T7e = T7c + T7d;
Chris@82 1830 T7u = T7d - T7c;
Chris@82 1831 }
Chris@82 1832 T78 = T5C - T5J;
Chris@82 1833 T79 = T63 - T62;
Chris@82 1834 T7a = T78 + T79;
Chris@82 1835 T7s = T78 - T79;
Chris@82 1836 {
Chris@82 1837 E T6Q, T7h, T6J, T7i, T6H, T6I;
Chris@82 1838 T6Q = T6M + T6P;
Chris@82 1839 T7h = T6h - T6o;
Chris@82 1840 T6H = FNMS(KP555570233, T6s, KP831469612 * T6v);
Chris@82 1841 T6I = FMA(KP555570233, T6z, KP831469612 * T6C);
Chris@82 1842 T6J = T6H + T6I;
Chris@82 1843 T7i = T6H - T6I;
Chris@82 1844 T6R = T6J + T6Q;
Chris@82 1845 T7x = T7h - T7i;
Chris@82 1846 T73 = T6Q - T6J;
Chris@82 1847 T7j = T7h + T7i;
Chris@82 1848 }
Chris@82 1849 {
Chris@82 1850 E T6p, T7m, T6E, T7l, T6w, T6D;
Chris@82 1851 T6p = T6h + T6o;
Chris@82 1852 T7m = T6P - T6M;
Chris@82 1853 T6w = FMA(KP831469612, T6s, KP555570233 * T6v);
Chris@82 1854 T6D = FNMS(KP555570233, T6C, KP831469612 * T6z);
Chris@82 1855 T6E = T6w + T6D;
Chris@82 1856 T7l = T6D - T6w;
Chris@82 1857 T6F = T6p + T6E;
Chris@82 1858 T7z = T7m - T7l;
Chris@82 1859 T71 = T6p - T6E;
Chris@82 1860 T7n = T7l + T7m;
Chris@82 1861 }
Chris@82 1862 }
Chris@82 1863 {
Chris@82 1864 E T6d, T6T, T6S, T6U;
Chris@82 1865 {
Chris@82 1866 E T5z, T61, T6e, T6G;
Chris@82 1867 T5z = W[2];
Chris@82 1868 T61 = W[3];
Chris@82 1869 T6d = FNMS(T61, T6c, T5z * T60);
Chris@82 1870 T6T = FMA(T61, T60, T5z * T6c);
Chris@82 1871 T6e = W[4];
Chris@82 1872 T6G = W[5];
Chris@82 1873 T6S = FMA(T6e, T6F, T6G * T6R);
Chris@82 1874 T6U = FNMS(T6G, T6F, T6e * T6R);
Chris@82 1875 }
Chris@82 1876 Rp[WS(rs, 1)] = T6d - T6S;
Chris@82 1877 Ip[WS(rs, 1)] = T6T + T6U;
Chris@82 1878 Rm[WS(rs, 1)] = T6d + T6S;
Chris@82 1879 Im[WS(rs, 1)] = T6U - T6T;
Chris@82 1880 }
Chris@82 1881 {
Chris@82 1882 E T7v, T7B, T7A, T7C;
Chris@82 1883 {
Chris@82 1884 E T7r, T7t, T7w, T7y;
Chris@82 1885 T7r = W[50];
Chris@82 1886 T7t = W[51];
Chris@82 1887 T7v = FNMS(T7t, T7u, T7r * T7s);
Chris@82 1888 T7B = FMA(T7t, T7s, T7r * T7u);
Chris@82 1889 T7w = W[52];
Chris@82 1890 T7y = W[53];
Chris@82 1891 T7A = FMA(T7w, T7x, T7y * T7z);
Chris@82 1892 T7C = FNMS(T7y, T7x, T7w * T7z);
Chris@82 1893 }
Chris@82 1894 Rp[WS(rs, 13)] = T7v - T7A;
Chris@82 1895 Ip[WS(rs, 13)] = T7B + T7C;
Chris@82 1896 Rm[WS(rs, 13)] = T7v + T7A;
Chris@82 1897 Im[WS(rs, 13)] = T7C - T7B;
Chris@82 1898 }
Chris@82 1899 {
Chris@82 1900 E T6Z, T75, T74, T76;
Chris@82 1901 {
Chris@82 1902 E T6V, T6X, T70, T72;
Chris@82 1903 T6V = W[34];
Chris@82 1904 T6X = W[35];
Chris@82 1905 T6Z = FNMS(T6X, T6Y, T6V * T6W);
Chris@82 1906 T75 = FMA(T6X, T6W, T6V * T6Y);
Chris@82 1907 T70 = W[36];
Chris@82 1908 T72 = W[37];
Chris@82 1909 T74 = FMA(T70, T71, T72 * T73);
Chris@82 1910 T76 = FNMS(T72, T71, T70 * T73);
Chris@82 1911 }
Chris@82 1912 Rp[WS(rs, 9)] = T6Z - T74;
Chris@82 1913 Ip[WS(rs, 9)] = T75 + T76;
Chris@82 1914 Rm[WS(rs, 9)] = T6Z + T74;
Chris@82 1915 Im[WS(rs, 9)] = T76 - T75;
Chris@82 1916 }
Chris@82 1917 {
Chris@82 1918 E T7f, T7p, T7o, T7q;
Chris@82 1919 {
Chris@82 1920 E T77, T7b, T7g, T7k;
Chris@82 1921 T77 = W[18];
Chris@82 1922 T7b = W[19];
Chris@82 1923 T7f = FNMS(T7b, T7e, T77 * T7a);
Chris@82 1924 T7p = FMA(T7b, T7a, T77 * T7e);
Chris@82 1925 T7g = W[20];
Chris@82 1926 T7k = W[21];
Chris@82 1927 T7o = FMA(T7g, T7j, T7k * T7n);
Chris@82 1928 T7q = FNMS(T7k, T7j, T7g * T7n);
Chris@82 1929 }
Chris@82 1930 Rp[WS(rs, 5)] = T7f - T7o;
Chris@82 1931 Ip[WS(rs, 5)] = T7p + T7q;
Chris@82 1932 Rm[WS(rs, 5)] = T7f + T7o;
Chris@82 1933 Im[WS(rs, 5)] = T7q - T7p;
Chris@82 1934 }
Chris@82 1935 }
Chris@82 1936 }
Chris@82 1937 }
Chris@82 1938 }
Chris@82 1939
Chris@82 1940 static const tw_instr twinstr[] = {
Chris@82 1941 {TW_FULL, 1, 32},
Chris@82 1942 {TW_NEXT, 1, 0}
Chris@82 1943 };
Chris@82 1944
Chris@82 1945 static const hc2c_desc desc = { 32, "hc2cbdft2_32", twinstr, &GENUS, {404, 114, 94, 0} };
Chris@82 1946
Chris@82 1947 void X(codelet_hc2cbdft2_32) (planner *p) {
Chris@82 1948 X(khc2c_register) (p, hc2cbdft2_32, &desc, HC2C_VIA_DFT);
Chris@82 1949 }
Chris@82 1950 #endif